summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--[-rwxr-xr-x]storage/archive/CMakeLists.txt3
-rw-r--r--storage/archive/Makefile.am21
-rw-r--r--storage/archive/ha_archive.cc31
-rw-r--r--storage/blackhole/Makefile.am21
-rw-r--r--storage/blackhole/ha_blackhole.cc64
-rw-r--r--storage/csv/Makefile.am22
-rw-r--r--storage/csv/ha_tina.cc27
-rw-r--r--storage/example/Makefile.am22
-rw-r--r--storage/example/ha_example.cc45
-rw-r--r--[-rwxr-xr-x]storage/federated/CMakeLists.txt9
-rw-r--r--storage/federated/Makefile.am21
-rw-r--r--storage/federated/ha_federated.cc38
-rw-r--r--storage/federated/ha_federated.h1
-rw-r--r--storage/heap/Makefile.am21
-rw-r--r--storage/heap/ha_heap.cc21
-rw-r--r--[-rwxr-xr-x]storage/innobase/CMakeLists.txt131
-rw-r--r--storage/innobase/COPYING (renamed from storage/innodb_plugin/COPYING)0
-rw-r--r--storage/innobase/COPYING.Google (renamed from storage/innodb_plugin/COPYING.Google)0
-rw-r--r--storage/innobase/COPYING.Percona (renamed from storage/innodb_plugin/COPYING.Percona)0
-rw-r--r--storage/innobase/COPYING.Sun_Microsystems (renamed from storage/innodb_plugin/COPYING.Sun_Microsystems)0
-rw-r--r--storage/innobase/ChangeLog (renamed from storage/innodb_plugin/ChangeLog)0
-rw-r--r--storage/innobase/Doxyfile (renamed from storage/innodb_plugin/Doxyfile)0
-rw-r--r--storage/innobase/Makefile.am417
-rw-r--r--storage/innobase/btr/btr0btr.c2753
-rw-r--r--storage/innobase/btr/btr0cur.c3442
-rw-r--r--storage/innobase/btr/btr0pcur.c249
-rw-r--r--storage/innobase/btr/btr0sea.c617
-rw-r--r--storage/innobase/buf/buf0buddy.c (renamed from storage/innodb_plugin/buf/buf0buddy.c)0
-rw-r--r--storage/innobase/buf/buf0buf.c3736
-rw-r--r--storage/innobase/buf/buf0flu.c1301
-rw-r--r--storage/innobase/buf/buf0lru.c1871
-rw-r--r--storage/innobase/buf/buf0rea.c522
-rwxr-xr-xstorage/innobase/compile-innodb (renamed from storage/innodb_plugin/compile-innodb)0
-rwxr-xr-xstorage/innobase/compile-innodb-debug (renamed from storage/innodb_plugin/compile-innodb-debug)0
-rw-r--r--storage/innobase/data/data0data.c677
-rw-r--r--storage/innobase/data/data0type.c152
-rw-r--r--storage/innobase/dict/dict0boot.c155
-rw-r--r--storage/innobase/dict/dict0crea.c447
-rw-r--r--storage/innobase/dict/dict0dict.c2196
-rw-r--r--storage/innobase/dict/dict0load.c408
-rw-r--r--storage/innobase/dict/dict0mem.c205
-rw-r--r--storage/innobase/dyn/dyn0dyn.c33
-rw-r--r--storage/innobase/eval/eval0eval.c120
-rw-r--r--storage/innobase/eval/eval0proc.c83
-rw-r--r--storage/innobase/fil/fil0fil.c2250
-rw-r--r--storage/innobase/fsp/fsp0fsp.c2248
-rw-r--r--storage/innobase/fut/fut0fut.c23
-rw-r--r--storage/innobase/fut/fut0lst.c270
-rw-r--r--storage/innobase/ha/ha0ha.c275
-rw-r--r--storage/innobase/ha/ha0storage.c (renamed from storage/innodb_plugin/ha/ha0storage.c)0
-rw-r--r--storage/innobase/ha/hash0hash.c101
-rw-r--r--storage/innobase/ha_innodb.def (renamed from storage/innodb_plugin/ha_innodb.def)0
-rw-r--r--storage/innobase/handler/ha_innodb.cc4411
-rw-r--r--storage/innobase/handler/ha_innodb.h155
-rw-r--r--storage/innobase/handler/handler0alter.cc (renamed from storage/innodb_plugin/handler/handler0alter.cc)0
-rw-r--r--storage/innobase/handler/i_s.cc (renamed from storage/innodb_plugin/handler/i_s.cc)0
-rw-r--r--storage/innobase/handler/i_s.h (renamed from storage/innodb_plugin/handler/i_s.h)0
-rw-r--r--storage/innobase/handler/mysql_addons.cc (renamed from storage/innodb_plugin/handler/mysql_addons.cc)0
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.c2286
-rw-r--r--storage/innobase/include/btr0btr.h544
-rw-r--r--storage/innobase/include/btr0btr.ic254
-rw-r--r--storage/innobase/include/btr0cur.h773
-rw-r--r--storage/innobase/include/btr0cur.ic134
-rw-r--r--storage/innobase/include/btr0pcur.h389
-rw-r--r--storage/innobase/include/btr0pcur.ic295
-rw-r--r--storage/innobase/include/btr0sea.h245
-rw-r--r--storage/innobase/include/btr0sea.ic45
-rw-r--r--storage/innobase/include/btr0types.h36
-rw-r--r--storage/innobase/include/buf0buddy.h (renamed from storage/innodb_plugin/include/buf0buddy.h)0
-rw-r--r--storage/innobase/include/buf0buddy.ic (renamed from storage/innodb_plugin/include/buf0buddy.ic)0
-rw-r--r--storage/innobase/include/buf0buf.h1853
-rw-r--r--storage/innobase/include/buf0buf.ic1207
-rw-r--r--storage/innobase/include/buf0flu.h171
-rw-r--r--storage/innobase/include/buf0flu.ic93
-rw-r--r--storage/innobase/include/buf0lru.h273
-rw-r--r--storage/innobase/include/buf0lru.ic23
-rw-r--r--storage/innobase/include/buf0rea.h123
-rw-r--r--storage/innobase/include/buf0types.h70
-rw-r--r--storage/innobase/include/data0data.h509
-rw-r--r--storage/innobase/include/data0data.ic408
-rw-r--r--storage/innobase/include/data0type.h316
-rw-r--r--storage/innobase/include/data0type.ic267
-rw-r--r--storage/innobase/include/data0types.h23
-rw-r--r--storage/innobase/include/db0err.h110
-rw-r--r--storage/innobase/include/dict0boot.h72
-rw-r--r--storage/innobase/include/dict0boot.ic47
-rw-r--r--storage/innobase/include/dict0crea.h140
-rw-r--r--storage/innobase/include/dict0crea.ic23
-rw-r--r--storage/innobase/include/dict0dict.h1267
-rw-r--r--storage/innobase/include/dict0dict.ic574
-rw-r--r--storage/innobase/include/dict0load.h91
-rw-r--r--storage/innobase/include/dict0load.ic23
-rw-r--r--storage/innobase/include/dict0mem.h490
-rw-r--r--storage/innobase/include/dict0mem.ic23
-rw-r--r--storage/innobase/include/dict0types.h27
-rw-r--r--storage/innobase/include/dyn0dyn.h148
-rw-r--r--storage/innobase/include/dyn0dyn.ic133
-rw-r--r--storage/innobase/include/eval0eval.h83
-rw-r--r--storage/innobase/include/eval0eval.ic101
-rw-r--r--storage/innobase/include/eval0proc.h99
-rw-r--r--storage/innobase/include/eval0proc.ic39
-rw-r--r--storage/innobase/include/fil0fil.h727
-rw-r--r--storage/innobase/include/fsp0fsp.h374
-rw-r--r--storage/innobase/include/fsp0fsp.ic41
-rw-r--r--storage/innobase/include/fsp0types.h79
-rw-r--r--storage/innobase/include/fut0fut.h41
-rw-r--r--storage/innobase/include/fut0fut.ic52
-rw-r--r--storage/innobase/include/fut0lst.h199
-rw-r--r--storage/innobase/include/fut0lst.ic108
-rw-r--r--storage/innobase/include/ha0ha.h255
-rw-r--r--storage/innobase/include/ha0ha.ic149
-rw-r--r--storage/innobase/include/ha0storage.h (renamed from storage/innodb_plugin/include/ha0storage.h)0
-rw-r--r--storage/innobase/include/ha0storage.ic (renamed from storage/innodb_plugin/include/ha0storage.ic)0
-rw-r--r--storage/innobase/include/ha_prototypes.h272
-rw-r--r--storage/innobase/include/handler0alter.h (renamed from storage/innodb_plugin/include/handler0alter.h)0
-rw-r--r--storage/innobase/include/hash0hash.h269
-rw-r--r--storage/innobase/include/hash0hash.ic116
-rw-r--r--storage/innobase/include/ibuf0ibuf.h420
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic291
-rw-r--r--storage/innobase/include/ibuf0types.h24
-rw-r--r--storage/innobase/include/lock0iter.h47
-rw-r--r--storage/innobase/include/lock0lock.h962
-rw-r--r--storage/innobase/include/lock0lock.ic84
-rw-r--r--storage/innobase/include/lock0priv.h95
-rw-r--r--storage/innobase/include/lock0priv.ic35
-rw-r--r--storage/innobase/include/lock0types.h35
-rw-r--r--storage/innobase/include/log0log.h781
-rw-r--r--storage/innobase/include/log0log.ic296
-rw-r--r--storage/innobase/include/log0recv.h476
-rw-r--r--storage/innobase/include/log0recv.ic48
-rw-r--r--storage/innobase/include/mach0data.h357
-rw-r--r--storage/innobase/include/mach0data.ic328
-rw-r--r--storage/innobase/include/mem0dbg.h99
-rw-r--r--storage/innobase/include/mem0dbg.ic79
-rw-r--r--storage/innobase/include/mem0mem.h343
-rw-r--r--storage/innobase/include/mem0mem.ic291
-rw-r--r--storage/innobase/include/mem0pool.h112
-rw-r--r--storage/innobase/include/mem0pool.ic23
-rw-r--r--storage/innobase/include/mtr0log.h261
-rw-r--r--storage/innobase/include/mtr0log.ic117
-rw-r--r--storage/innobase/include/mtr0mtr.h394
-rw-r--r--storage/innobase/include/mtr0mtr.ic139
-rw-r--r--storage/innobase/include/mtr0types.h23
-rw-r--r--storage/innobase/include/mysql_addons.h (renamed from storage/innodb_plugin/include/mysql_addons.h)0
-rw-r--r--storage/innobase/include/os0file.h678
-rw-r--r--storage/innobase/include/os0proc.h141
-rw-r--r--storage/innobase/include/os0proc.ic23
-rw-r--r--storage/innobase/include/os0sync.h369
-rw-r--r--storage/innobase/include/os0sync.ic41
-rw-r--r--storage/innobase/include/os0thread.h123
-rw-r--r--storage/innobase/include/os0thread.ic23
-rw-r--r--storage/innobase/include/page0cur.h356
-rw-r--r--storage/innobase/include/page0cur.ic231
-rw-r--r--storage/innobase/include/page0page.h932
-rw-r--r--storage/innobase/include/page0page.ic740
-rw-r--r--storage/innobase/include/page0types.h140
-rw-r--r--storage/innobase/include/page0zip.h (renamed from storage/innodb_plugin/include/page0zip.h)0
-rw-r--r--storage/innobase/include/page0zip.ic (renamed from storage/innodb_plugin/include/page0zip.ic)0
-rw-r--r--storage/innobase/include/pars0grm.h40
-rw-r--r--storage/innobase/include/pars0opt.h49
-rw-r--r--storage/innobase/include/pars0opt.ic23
-rw-r--r--storage/innobase/include/pars0pars.h761
-rw-r--r--storage/innobase/include/pars0pars.ic23
-rw-r--r--storage/innobase/include/pars0sym.h203
-rw-r--r--storage/innobase/include/pars0sym.ic23
-rw-r--r--storage/innobase/include/pars0types.h23
-rw-r--r--storage/innobase/include/que0que.h349
-rw-r--r--storage/innobase/include/que0que.ic140
-rw-r--r--storage/innobase/include/que0types.h29
-rw-r--r--storage/innobase/include/read0read.h159
-rw-r--r--storage/innobase/include/read0read.ic53
-rw-r--r--storage/innobase/include/read0types.h23
-rw-r--r--storage/innobase/include/rem0cmp.h203
-rw-r--r--storage/innobase/include/rem0cmp.ic77
-rw-r--r--storage/innobase/include/rem0rec.h872
-rw-r--r--storage/innobase/include/rem0rec.ic1066
-rw-r--r--storage/innobase/include/rem0types.h32
-rw-r--r--storage/innobase/include/row0ext.h (renamed from storage/innodb_plugin/include/row0ext.h)0
-rw-r--r--storage/innobase/include/row0ext.ic (renamed from storage/innodb_plugin/include/row0ext.ic)0
-rw-r--r--storage/innobase/include/row0ins.h155
-rw-r--r--storage/innobase/include/row0ins.ic23
-rw-r--r--storage/innobase/include/row0merge.h (renamed from storage/innodb_plugin/include/row0merge.h)0
-rw-r--r--storage/innobase/include/row0mysql.h641
-rw-r--r--storage/innobase/include/row0mysql.ic23
-rw-r--r--storage/innobase/include/row0purge.h65
-rw-r--r--storage/innobase/include/row0purge.ic23
-rw-r--r--storage/innobase/include/row0row.h372
-rw-r--r--storage/innobase/include/row0row.ic162
-rw-r--r--storage/innobase/include/row0sel.h379
-rw-r--r--storage/innobase/include/row0sel.ic47
-rw-r--r--storage/innobase/include/row0types.h28
-rw-r--r--storage/innobase/include/row0uins.h36
-rw-r--r--storage/innobase/include/row0uins.ic23
-rw-r--r--storage/innobase/include/row0umod.h35
-rw-r--r--storage/innobase/include/row0umod.ic23
-rw-r--r--storage/innobase/include/row0undo.h125
-rw-r--r--storage/innobase/include/row0undo.ic23
-rw-r--r--storage/innobase/include/row0upd.h447
-rw-r--r--storage/innobase/include/row0upd.ic156
-rw-r--r--storage/innobase/include/row0vers.h122
-rw-r--r--storage/innobase/include/row0vers.ic23
-rw-r--r--storage/innobase/include/srv0que.h61
-rw-r--r--storage/innobase/include/srv0srv.h505
-rw-r--r--storage/innobase/include/srv0srv.ic23
-rw-r--r--storage/innobase/include/srv0start.h138
-rw-r--r--storage/innobase/include/sync0arr.h114
-rw-r--r--storage/innobase/include/sync0arr.ic23
-rw-r--r--storage/innobase/include/sync0rw.h483
-rw-r--r--storage/innobase/include/sync0rw.ic604
-rw-r--r--storage/innobase/include/sync0sync.h332
-rw-r--r--storage/innobase/include/sync0sync.ic188
-rw-r--r--storage/innobase/include/sync0types.h26
-rw-r--r--storage/innobase/include/thr0loc.h69
-rw-r--r--storage/innobase/include/thr0loc.ic23
-rw-r--r--storage/innobase/include/trx0i_s.h (renamed from storage/innodb_plugin/include/trx0i_s.h)0
-rw-r--r--storage/innobase/include/trx0purge.h144
-rw-r--r--storage/innobase/include/trx0purge.ic31
-rw-r--r--storage/innobase/include/trx0rec.h345
-rw-r--r--storage/innobase/include/trx0rec.ic94
-rw-r--r--storage/innobase/include/trx0roll.h348
-rw-r--r--storage/innobase/include/trx0roll.ic33
-rw-r--r--storage/innobase/include/trx0rseg.h165
-rw-r--r--storage/innobase/include/trx0rseg.ic102
-rw-r--r--storage/innobase/include/trx0sys.h581
-rw-r--r--storage/innobase/include/trx0sys.ic200
-rw-r--r--storage/innobase/include/trx0trx.h647
-rw-r--r--storage/innobase/include/trx0trx.ic138
-rw-r--r--storage/innobase/include/trx0types.h81
-rw-r--r--storage/innobase/include/trx0undo.h514
-rw-r--r--storage/innobase/include/trx0undo.ic221
-rw-r--r--storage/innobase/include/trx0xa.h205
-rw-r--r--storage/innobase/include/univ.i207
-rw-r--r--storage/innobase/include/usr0sess.h53
-rw-r--r--storage/innobase/include/usr0sess.ic23
-rw-r--r--storage/innobase/include/usr0types.h23
-rw-r--r--storage/innobase/include/ut0auxconf.h (renamed from storage/innodb_plugin/include/ut0auxconf.h)0
-rw-r--r--storage/innobase/include/ut0byte.h282
-rw-r--r--storage/innobase/include/ut0byte.ic268
-rw-r--r--storage/innobase/include/ut0dbg.h112
-rw-r--r--storage/innobase/include/ut0list.h140
-rw-r--r--storage/innobase/include/ut0list.ic41
-rw-r--r--storage/innobase/include/ut0lst.h210
-rw-r--r--storage/innobase/include/ut0mem.h266
-rw-r--r--storage/innobase/include/ut0mem.ic288
-rw-r--r--storage/innobase/include/ut0rnd.h120
-rw-r--r--storage/innobase/include/ut0rnd.ic115
-rw-r--r--storage/innobase/include/ut0sort.h33
-rw-r--r--storage/innobase/include/ut0ut.h416
-rw-r--r--storage/innobase/include/ut0ut.ic126
-rw-r--r--storage/innobase/include/ut0vec.h100
-rw-r--r--storage/innobase/include/ut0vec.ic88
-rw-r--r--storage/innobase/include/ut0wqueue.h69
-rw-r--r--storage/innobase/lock/lock0iter.c58
-rw-r--r--storage/innobase/lock/lock0lock.c3187
-rw-r--r--storage/innobase/log/log0log.c1179
-rw-r--r--storage/innobase/log/log0recv.c1742
-rw-r--r--storage/innobase/mach/mach0data.c53
-rw-r--r--storage/innobase/mem/mem0dbg.c235
-rw-r--r--storage/innobase/mem/mem0mem.c298
-rw-r--r--storage/innobase/mem/mem0pool.c209
-rw-r--r--storage/innobase/mtr/mtr0log.c337
-rw-r--r--storage/innobase/mtr/mtr0mtr.c176
-rw-r--r--storage/innobase/mysql-test/ctype_innodb_like.inc (renamed from storage/innodb_plugin/mysql-test/ctype_innodb_like.inc)0
-rw-r--r--storage/innobase/mysql-test/have_innodb.inc (renamed from storage/innodb_plugin/mysql-test/have_innodb.inc)0
-rw-r--r--storage/innobase/mysql-test/innodb-analyze.result (renamed from storage/innodb_plugin/mysql-test/innodb-analyze.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-analyze.test (renamed from storage/innodb_plugin/mysql-test/innodb-analyze.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-autoinc.result (renamed from storage/innodb_plugin/mysql-test/innodb-autoinc.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-autoinc.test (renamed from storage/innodb_plugin/mysql-test/innodb-autoinc.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-consistent-master.opt (renamed from storage/innodb_plugin/mysql-test/innodb-consistent-master.opt)0
-rw-r--r--storage/innobase/mysql-test/innodb-consistent.result (renamed from storage/innodb_plugin/mysql-test/innodb-consistent.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-consistent.test (renamed from storage/innodb_plugin/mysql-test/innodb-consistent.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-index.inc (renamed from storage/innodb_plugin/mysql-test/innodb-index.inc)0
-rw-r--r--storage/innobase/mysql-test/innodb-index.result (renamed from storage/innodb_plugin/mysql-test/innodb-index.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-index.test (renamed from storage/innodb_plugin/mysql-test/innodb-index.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-index_ucs2.result (renamed from storage/innodb_plugin/mysql-test/innodb-index_ucs2.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-index_ucs2.test (renamed from storage/innodb_plugin/mysql-test/innodb-index_ucs2.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-lock.result (renamed from storage/innodb_plugin/mysql-test/innodb-lock.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-lock.test (renamed from storage/innodb_plugin/mysql-test/innodb-lock.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-master.opt (renamed from storage/innodb_plugin/mysql-test/innodb-master.opt)0
-rw-r--r--storage/innobase/mysql-test/innodb-replace.result (renamed from storage/innodb_plugin/mysql-test/innodb-replace.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-replace.test (renamed from storage/innodb_plugin/mysql-test/innodb-replace.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-semi-consistent-master.opt (renamed from storage/innodb_plugin/mysql-test/innodb-semi-consistent-master.opt)0
-rw-r--r--storage/innobase/mysql-test/innodb-semi-consistent.result (renamed from storage/innodb_plugin/mysql-test/innodb-semi-consistent.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-semi-consistent.test (renamed from storage/innodb_plugin/mysql-test/innodb-semi-consistent.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-timeout.result (renamed from storage/innodb_plugin/mysql-test/innodb-timeout.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-timeout.test (renamed from storage/innodb_plugin/mysql-test/innodb-timeout.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-use-sys-malloc-master.opt (renamed from storage/innodb_plugin/mysql-test/innodb-use-sys-malloc-master.opt)0
-rw-r--r--storage/innobase/mysql-test/innodb-use-sys-malloc.result (renamed from storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-use-sys-malloc.test (renamed from storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.test)0
-rw-r--r--storage/innobase/mysql-test/innodb-zip.result (renamed from storage/innodb_plugin/mysql-test/innodb-zip.result)0
-rw-r--r--storage/innobase/mysql-test/innodb-zip.test (renamed from storage/innodb_plugin/mysql-test/innodb-zip.test)0
-rw-r--r--storage/innobase/mysql-test/innodb.result (renamed from storage/innodb_plugin/mysql-test/innodb.result)0
-rw-r--r--storage/innobase/mysql-test/innodb.test (renamed from storage/innodb_plugin/mysql-test/innodb.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug21704.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug21704.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug21704.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug21704.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug34053.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug34053.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug34053.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug34053.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug34300.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug34300.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug34300.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug34300.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug35220.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug35220.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug35220.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug35220.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug36169.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug36169.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug36169.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug36169.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug36172.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug36172.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug36172.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug36172.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug40360.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug40360.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug40360.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug40360.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug40565.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug40565.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug40565.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug40565.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug41904.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug41904.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug41904.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug41904.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug42101-nonzero-master.opt (renamed from storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero-master.opt)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug42101-nonzero.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug42101-nonzero.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug42101.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug42101.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug42101.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug42101.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug44032.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug44032.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug44032.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug44032.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug44369.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug44369.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug44369.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug44369.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug44571.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug44571.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug44571.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug44571.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug45357.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug45357.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug45357.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug45357.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug46000.result (renamed from storage/innodb_plugin/mysql-test/innodb_bug46000.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_bug46000.test (renamed from storage/innodb_plugin/mysql-test/innodb_bug46000.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_file_format.result (renamed from storage/innodb_plugin/mysql-test/innodb_file_format.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_file_format.test (renamed from storage/innodb_plugin/mysql-test/innodb_file_format.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_information_schema.result (renamed from storage/innodb_plugin/mysql-test/innodb_information_schema.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_information_schema.test (renamed from storage/innodb_plugin/mysql-test/innodb_information_schema.test)0
-rw-r--r--storage/innobase/mysql-test/innodb_trx_weight.inc (renamed from storage/innodb_plugin/mysql-test/innodb_trx_weight.inc)0
-rw-r--r--storage/innobase/mysql-test/innodb_trx_weight.result (renamed from storage/innodb_plugin/mysql-test/innodb_trx_weight.result)0
-rw-r--r--storage/innobase/mysql-test/innodb_trx_weight.test (renamed from storage/innodb_plugin/mysql-test/innodb_trx_weight.test)0
-rw-r--r--storage/innobase/mysql-test/patches/README (renamed from storage/innodb_plugin/mysql-test/patches/README)0
-rw-r--r--storage/innobase/mysql-test/patches/index_merge_innodb-explain.diff (renamed from storage/innodb_plugin/mysql-test/patches/index_merge_innodb-explain.diff)0
-rw-r--r--storage/innobase/mysql-test/patches/information_schema.diff (renamed from storage/innodb_plugin/mysql-test/patches/information_schema.diff)0
-rw-r--r--storage/innobase/mysql-test/patches/innodb-index.diff (renamed from storage/innodb_plugin/mysql-test/patches/innodb-index.diff)0
-rw-r--r--storage/innobase/mysql-test/patches/innodb_file_per_table.diff (renamed from storage/innodb_plugin/mysql-test/patches/innodb_file_per_table.diff)0
-rw-r--r--storage/innobase/mysql-test/patches/innodb_lock_wait_timeout.diff (renamed from storage/innodb_plugin/mysql-test/patches/innodb_lock_wait_timeout.diff)0
-rw-r--r--storage/innobase/mysql-test/patches/innodb_thread_concurrency_basic.diff (renamed from storage/innodb_plugin/mysql-test/patches/innodb_thread_concurrency_basic.diff)0
-rw-r--r--storage/innobase/mysql-test/patches/partition_innodb.diff (renamed from storage/innodb_plugin/mysql-test/patches/partition_innodb.diff)0
-rw-r--r--storage/innobase/os/os0file.c1494
-rw-r--r--storage/innobase/os/os0proc.c734
-rw-r--r--storage/innobase/os/os0sync.c272
-rw-r--r--storage/innobase/os/os0thread.c133
-rw-r--r--storage/innobase/page/page0cur.c1345
-rw-r--r--storage/innobase/page/page0page.c1864
-rw-r--r--storage/innobase/page/page0zip.c (renamed from storage/innodb_plugin/page/page0zip.c)0
-rw-r--r--storage/innobase/pars/lexyy.c135
-rwxr-xr-xstorage/innobase/pars/make_bison.sh28
-rwxr-xr-xstorage/innobase/pars/make_flex.sh32
-rw-r--r--storage/innobase/pars/pars0grm.c504
-rw-r--r--storage/innobase/pars/pars0grm.h234
-rw-r--r--storage/innobase/pars/pars0grm.y25
-rw-r--r--storage/innobase/pars/pars0lex.l44
-rw-r--r--storage/innobase/pars/pars0opt.c236
-rw-r--r--storage/innobase/pars/pars0pars.c928
-rw-r--r--storage/innobase/pars/pars0sym.c131
-rw-r--r--storage/innobase/plug.in193
-rw-r--r--storage/innobase/que/que0que.c305
-rw-r--r--storage/innobase/read/read0read.c127
-rw-r--r--storage/innobase/rem/rem0cmp.c478
-rw-r--r--storage/innobase/rem/rem0rec.c1181
-rwxr-xr-xstorage/innobase/revert_gen.sh (renamed from storage/innodb_plugin/revert_gen.sh)0
-rw-r--r--storage/innobase/row/row0ext.c (renamed from storage/innodb_plugin/row/row0ext.c)0
-rw-r--r--storage/innobase/row/row0ins.c774
-rw-r--r--storage/innobase/row/row0merge.c (renamed from storage/innodb_plugin/row/row0merge.c)0
-rw-r--r--storage/innobase/row/row0mysql.c1260
-rw-r--r--storage/innobase/row/row0purge.c252
-rw-r--r--storage/innobase/row/row0row.c1060
-rw-r--r--storage/innobase/row/row0sel.c1127
-rw-r--r--storage/innobase/row/row0uins.c164
-rw-r--r--storage/innobase/row/row0umod.c289
-rw-r--r--storage/innobase/row/row0undo.c93
-rw-r--r--storage/innobase/row/row0upd.c1224
-rw-r--r--storage/innobase/row/row0vers.c296
-rwxr-xr-xstorage/innobase/scripts/export.sh (renamed from storage/innodb_plugin/scripts/export.sh)0
-rw-r--r--storage/innobase/scripts/install_innodb_plugins.sql (renamed from storage/innodb_plugin/scripts/install_innodb_plugins.sql)0
-rw-r--r--storage/innobase/scripts/install_innodb_plugins_win.sql (renamed from storage/innodb_plugin/scripts/install_innodb_plugins_win.sql)0
-rwxr-xr-xstorage/innobase/setup.sh (renamed from storage/innodb_plugin/setup.sh)0
-rw-r--r--storage/innobase/srv/srv0que.c107
-rw-r--r--storage/innobase/srv/srv0srv.c1175
-rw-r--r--storage/innobase/srv/srv0start.c731
-rw-r--r--storage/innobase/sync/sync0arr.c379
-rw-r--r--storage/innobase/sync/sync0rw.c722
-rw-r--r--storage/innobase/sync/sync0sync.c609
-rw-r--r--storage/innobase/thr/thr0loc.c135
-rw-r--r--storage/innobase/trx/trx0i_s.c (renamed from storage/innodb_plugin/trx/trx0i_s.c)0
-rw-r--r--storage/innobase/trx/trx0purge.c266
-rw-r--r--storage/innobase/trx/trx0rec.c1058
-rw-r--r--storage/innobase/trx/trx0roll.c565
-rw-r--r--storage/innobase/trx/trx0rseg.c172
-rw-r--r--storage/innobase/trx/trx0sys.c974
-rw-r--r--storage/innobase/trx/trx0trx.c527
-rw-r--r--storage/innobase/trx/trx0undo.c802
-rw-r--r--storage/innobase/usr/usr0sess.c72
-rw-r--r--storage/innobase/ut/ut0auxconf_atomic_pthread_t_gcc.c (renamed from storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_gcc.c)0
-rw-r--r--storage/innobase/ut/ut0auxconf_atomic_pthread_t_solaris.c (renamed from storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c)0
-rw-r--r--storage/innobase/ut/ut0auxconf_have_gcc_atomics.c (renamed from storage/innodb_plugin/ut/ut0auxconf_have_gcc_atomics.c)0
-rw-r--r--storage/innobase/ut/ut0auxconf_have_solaris_atomics.c (renamed from storage/innodb_plugin/ut/ut0auxconf_have_solaris_atomics.c)0
-rw-r--r--storage/innobase/ut/ut0auxconf_pause.c (renamed from storage/innodb_plugin/ut/ut0auxconf_pause.c)0
-rw-r--r--storage/innobase/ut/ut0auxconf_sizeof_pthread_t.c (renamed from storage/innodb_plugin/ut/ut0auxconf_sizeof_pthread_t.c)0
-rw-r--r--storage/innobase/ut/ut0byte.c46
-rw-r--r--storage/innobase/ut/ut0dbg.c141
-rw-r--r--storage/innobase/ut/ut0list.c101
-rw-r--r--storage/innobase/ut/ut0mem.c360
-rw-r--r--storage/innobase/ut/ut0rnd.c41
-rw-r--r--storage/innobase/ut/ut0ut.c301
-rw-r--r--storage/innobase/ut/ut0vec.c45
-rw-r--r--storage/innobase/ut/ut0wqueue.c58
-rw-r--r--storage/innodb_plugin/CMakeLists.txt94
-rw-r--r--storage/innodb_plugin/Makefile.am342
-rw-r--r--storage/innodb_plugin/btr/btr0btr.c3719
-rw-r--r--storage/innodb_plugin/btr/btr0cur.c4847
-rw-r--r--storage/innodb_plugin/btr/btr0pcur.c582
-rw-r--r--storage/innodb_plugin/btr/btr0sea.c1889
-rw-r--r--storage/innodb_plugin/buf/buf0buf.c4052
-rw-r--r--storage/innodb_plugin/buf/buf0flu.c1410
-rw-r--r--storage/innodb_plugin/buf/buf0lru.c2092
-rw-r--r--storage/innodb_plugin/buf/buf0rea.c656
-rw-r--r--storage/innodb_plugin/data/data0data.c764
-rw-r--r--storage/innodb_plugin/data/data0type.c297
-rw-r--r--storage/innodb_plugin/dict/dict0boot.c462
-rw-r--r--storage/innodb_plugin/dict/dict0crea.c1499
-rw-r--r--storage/innodb_plugin/dict/dict0dict.c4851
-rw-r--r--storage/innodb_plugin/dict/dict0load.c1450
-rw-r--r--storage/innodb_plugin/dict/dict0mem.c319
-rw-r--r--storage/innodb_plugin/dyn/dyn0dyn.c65
-rw-r--r--storage/innodb_plugin/eval/eval0eval.c852
-rw-r--r--storage/innodb_plugin/eval/eval0proc.c295
-rw-r--r--storage/innodb_plugin/fil/fil0fil.c4798
-rw-r--r--storage/innodb_plugin/fsp/fsp0fsp.c4310
-rw-r--r--storage/innodb_plugin/fut/fut0fut.c31
-rw-r--r--storage/innodb_plugin/fut/fut0lst.c530
-rw-r--r--storage/innodb_plugin/ha/ha0ha.c441
-rw-r--r--storage/innodb_plugin/ha/hash0hash.c174
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.cc10420
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.h311
-rw-r--r--storage/innodb_plugin/ibuf/ibuf0ibuf.c3624
-rw-r--r--storage/innodb_plugin/include/btr0btr.h509
-rw-r--r--storage/innodb_plugin/include/btr0btr.ic310
-rw-r--r--storage/innodb_plugin/include/btr0cur.h753
-rw-r--r--storage/innodb_plugin/include/btr0cur.ic200
-rw-r--r--storage/innodb_plugin/include/btr0pcur.h537
-rw-r--r--storage/innodb_plugin/include/btr0pcur.ic651
-rw-r--r--storage/innodb_plugin/include/btr0sea.h310
-rw-r--r--storage/innodb_plugin/include/btr0sea.ic84
-rw-r--r--storage/innodb_plugin/include/btr0types.h51
-rw-r--r--storage/innodb_plugin/include/buf0buf.h1521
-rw-r--r--storage/innodb_plugin/include/buf0buf.ic1068
-rw-r--r--storage/innodb_plugin/include/buf0flu.h191
-rw-r--r--storage/innodb_plugin/include/buf0flu.ic123
-rw-r--r--storage/innodb_plugin/include/buf0lru.h295
-rw-r--r--storage/innodb_plugin/include/buf0lru.ic25
-rw-r--r--storage/innodb_plugin/include/buf0rea.h137
-rw-r--r--storage/innodb_plugin/include/buf0types.h82
-rw-r--r--storage/innodb_plugin/include/data0data.h483
-rw-r--r--storage/innodb_plugin/include/data0data.ic612
-rw-r--r--storage/innodb_plugin/include/data0type.h486
-rw-r--r--storage/innodb_plugin/include/data0type.ic599
-rw-r--r--storage/innodb_plugin/include/data0types.h36
-rw-r--r--storage/innodb_plugin/include/db0err.h106
-rw-r--r--storage/innodb_plugin/include/dict0boot.h150
-rw-r--r--storage/innodb_plugin/include/dict0boot.ic93
-rw-r--r--storage/innodb_plugin/include/dict0crea.h197
-rw-r--r--storage/innodb_plugin/include/dict0crea.ic25
-rw-r--r--storage/innodb_plugin/include/dict0dict.h1165
-rw-r--r--storage/innodb_plugin/include/dict0dict.ic806
-rw-r--r--storage/innodb_plugin/include/dict0load.h115
-rw-r--r--storage/innodb_plugin/include/dict0load.ic26
-rw-r--r--storage/innodb_plugin/include/dict0mem.h537
-rw-r--r--storage/innodb_plugin/include/dict0mem.ic26
-rw-r--r--storage/innodb_plugin/include/dict0types.h48
-rw-r--r--storage/innodb_plugin/include/dyn0dyn.h188
-rw-r--r--storage/innodb_plugin/include/dyn0dyn.ic365
-rw-r--r--storage/innodb_plugin/include/eval0eval.h114
-rw-r--r--storage/innodb_plugin/include/eval0eval.ic251
-rw-r--r--storage/innodb_plugin/include/eval0proc.h104
-rw-r--r--storage/innodb_plugin/include/eval0proc.ic88
-rw-r--r--storage/innodb_plugin/include/fil0fil.h723
-rw-r--r--storage/innodb_plugin/include/fsp0fsp.h359
-rw-r--r--storage/innodb_plugin/include/fsp0fsp.ic45
-rw-r--r--storage/innodb_plugin/include/fsp0types.h110
-rw-r--r--storage/innodb_plugin/include/fut0fut.h55
-rw-r--r--storage/innodb_plugin/include/fut0fut.ic56
-rw-r--r--storage/innodb_plugin/include/fut0lst.h217
-rw-r--r--storage/innodb_plugin/include/fut0lst.ic167
-rw-r--r--storage/innodb_plugin/include/ha0ha.h241
-rw-r--r--storage/innodb_plugin/include/ha0ha.ic220
-rw-r--r--storage/innodb_plugin/include/ha_prototypes.h283
-rw-r--r--storage/innodb_plugin/include/hash0hash.h446
-rw-r--r--storage/innodb_plugin/include/hash0hash.ic163
-rw-r--r--storage/innodb_plugin/include/ibuf0ibuf.h383
-rw-r--r--storage/innodb_plugin/include/ibuf0ibuf.ic327
-rw-r--r--storage/innodb_plugin/include/ibuf0types.h31
-rw-r--r--storage/innodb_plugin/include/lock0iter.h69
-rw-r--r--storage/innodb_plugin/include/lock0lock.h823
-rw-r--r--storage/innodb_plugin/include/lock0lock.ic121
-rw-r--r--storage/innodb_plugin/include/lock0priv.h108
-rw-r--r--storage/innodb_plugin/include/lock0priv.ic49
-rw-r--r--storage/innodb_plugin/include/lock0types.h45
-rw-r--r--storage/innodb_plugin/include/log0log.h976
-rw-r--r--storage/innodb_plugin/include/log0log.ic440
-rw-r--r--storage/innodb_plugin/include/log0recv.h489
-rw-r--r--storage/innodb_plugin/include/log0recv.ic53
-rw-r--r--storage/innodb_plugin/include/mach0data.h400
-rw-r--r--storage/innodb_plugin/include/mach0data.ic786
-rw-r--r--storage/innodb_plugin/include/mem0dbg.h143
-rw-r--r--storage/innodb_plugin/include/mem0dbg.ic112
-rw-r--r--storage/innodb_plugin/include/mem0mem.h399
-rw-r--r--storage/innodb_plugin/include/mem0mem.ic646
-rw-r--r--storage/innodb_plugin/include/mem0pool.h136
-rw-r--r--storage/innodb_plugin/include/mem0pool.ic24
-rw-r--r--storage/innodb_plugin/include/mtr0log.h250
-rw-r--r--storage/innodb_plugin/include/mtr0log.ic274
-rw-r--r--storage/innodb_plugin/include/mtr0mtr.h419
-rw-r--r--storage/innodb_plugin/include/mtr0mtr.ic272
-rw-r--r--storage/innodb_plugin/include/mtr0types.h31
-rw-r--r--storage/innodb_plugin/include/os0file.h805
-rw-r--r--storage/innodb_plugin/include/os0proc.h77
-rw-r--r--storage/innodb_plugin/include/os0proc.ic27
-rw-r--r--storage/innodb_plugin/include/os0sync.h445
-rw-r--r--storage/innodb_plugin/include/os0sync.ic53
-rw-r--r--storage/innodb_plugin/include/os0thread.h162
-rw-r--r--storage/innodb_plugin/include/os0thread.ic25
-rw-r--r--storage/innodb_plugin/include/page0cur.h346
-rw-r--r--storage/innodb_plugin/include/page0cur.ic299
-rw-r--r--storage/innodb_plugin/include/page0page.h1015
-rw-r--r--storage/innodb_plugin/include/page0page.ic1073
-rw-r--r--storage/innodb_plugin/include/page0types.h150
-rw-r--r--storage/innodb_plugin/include/pars0grm.h236
-rw-r--r--storage/innodb_plugin/include/pars0opt.h75
-rw-r--r--storage/innodb_plugin/include/pars0opt.ic24
-rw-r--r--storage/innodb_plugin/include/pars0pars.h748
-rw-r--r--storage/innodb_plugin/include/pars0pars.ic24
-rw-r--r--storage/innodb_plugin/include/pars0sym.h244
-rw-r--r--storage/innodb_plugin/include/pars0sym.ic24
-rw-r--r--storage/innodb_plugin/include/pars0types.h50
-rw-r--r--storage/innodb_plugin/include/que0que.h513
-rw-r--r--storage/innodb_plugin/include/que0que.ic273
-rw-r--r--storage/innodb_plugin/include/que0types.h60
-rw-r--r--storage/innodb_plugin/include/read0read.h194
-rw-r--r--storage/innodb_plugin/include/read0read.ic98
-rw-r--r--storage/innodb_plugin/include/read0types.h32
-rw-r--r--storage/innodb_plugin/include/rem0cmp.h194
-rw-r--r--storage/innodb_plugin/include/rem0cmp.ic91
-rw-r--r--storage/innodb_plugin/include/rem0rec.h824
-rw-r--r--storage/innodb_plugin/include/rem0rec.ic1647
-rw-r--r--storage/innodb_plugin/include/rem0types.h46
-rw-r--r--storage/innodb_plugin/include/row0ins.h156
-rw-r--r--storage/innodb_plugin/include/row0ins.ic26
-rw-r--r--storage/innodb_plugin/include/row0mysql.h784
-rw-r--r--storage/innodb_plugin/include/row0mysql.ic24
-rw-r--r--storage/innodb_plugin/include/row0purge.h96
-rw-r--r--storage/innodb_plugin/include/row0purge.ic25
-rw-r--r--storage/innodb_plugin/include/row0row.h310
-rw-r--r--storage/innodb_plugin/include/row0row.ic120
-rw-r--r--storage/innodb_plugin/include/row0sel.h413
-rw-r--r--storage/innodb_plugin/include/row0sel.ic105
-rw-r--r--storage/innodb_plugin/include/row0types.h59
-rw-r--r--storage/innodb_plugin/include/row0uins.h54
-rw-r--r--storage/innodb_plugin/include/row0uins.ic25
-rw-r--r--storage/innodb_plugin/include/row0umod.h52
-rw-r--r--storage/innodb_plugin/include/row0umod.ic24
-rw-r--r--storage/innodb_plugin/include/row0undo.h142
-rw-r--r--storage/innodb_plugin/include/row0undo.ic24
-rw-r--r--storage/innodb_plugin/include/row0upd.h483
-rw-r--r--storage/innodb_plugin/include/row0upd.ic184
-rw-r--r--storage/innodb_plugin/include/row0vers.h142
-rw-r--r--storage/innodb_plugin/include/row0vers.ic30
-rw-r--r--storage/innodb_plugin/include/srv0que.h42
-rw-r--r--storage/innodb_plugin/include/srv0srv.h660
-rw-r--r--storage/innodb_plugin/include/srv0srv.ic24
-rw-r--r--storage/innodb_plugin/include/srv0start.h134
-rw-r--r--storage/innodb_plugin/include/sync0arr.h142
-rw-r--r--storage/innodb_plugin/include/sync0arr.ic27
-rw-r--r--storage/innodb_plugin/include/sync0rw.h585
-rw-r--r--storage/innodb_plugin/include/sync0rw.ic624
-rw-r--r--storage/innodb_plugin/include/sync0sync.h578
-rw-r--r--storage/innodb_plugin/include/sync0sync.ic222
-rw-r--r--storage/innodb_plugin/include/sync0types.h34
-rw-r--r--storage/innodb_plugin/include/thr0loc.h90
-rw-r--r--storage/innodb_plugin/include/thr0loc.ic24
-rw-r--r--storage/innodb_plugin/include/trx0purge.h189
-rw-r--r--storage/innodb_plugin/include/trx0purge.ic43
-rw-r--r--storage/innodb_plugin/include/trx0rec.h338
-rw-r--r--storage/innodb_plugin/include/trx0rec.ic112
-rw-r--r--storage/innodb_plugin/include/trx0roll.h352
-rw-r--r--storage/innodb_plugin/include/trx0roll.ic40
-rw-r--r--storage/innodb_plugin/include/trx0rseg.h220
-rw-r--r--storage/innodb_plugin/include/trx0rseg.ic145
-rw-r--r--storage/innodb_plugin/include/trx0sys.h624
-rw-r--r--storage/innodb_plugin/include/trx0sys.ic387
-rw-r--r--storage/innodb_plugin/include/trx0trx.h814
-rw-r--r--storage/innodb_plugin/include/trx0trx.ic164
-rw-r--r--storage/innodb_plugin/include/trx0types.h108
-rw-r--r--storage/innodb_plugin/include/trx0undo.h551
-rw-r--r--storage/innodb_plugin/include/trx0undo.ic351
-rw-r--r--storage/innodb_plugin/include/trx0xa.h70
-rw-r--r--storage/innodb_plugin/include/univ.i486
-rw-r--r--storage/innodb_plugin/include/usr0sess.h76
-rw-r--r--storage/innodb_plugin/include/usr0sess.ic24
-rw-r--r--storage/innodb_plugin/include/usr0types.h31
-rw-r--r--storage/innodb_plugin/include/ut0byte.h270
-rw-r--r--storage/innodb_plugin/include/ut0byte.ic411
-rw-r--r--storage/innodb_plugin/include/ut0dbg.h175
-rw-r--r--storage/innodb_plugin/include/ut0list.h172
-rw-r--r--storage/innodb_plugin/include/ut0list.ic48
-rw-r--r--storage/innodb_plugin/include/ut0lst.h261
-rw-r--r--storage/innodb_plugin/include/ut0mem.h306
-rw-r--r--storage/innodb_plugin/include/ut0mem.ic338
-rw-r--r--storage/innodb_plugin/include/ut0rnd.h143
-rw-r--r--storage/innodb_plugin/include/ut0rnd.ic230
-rw-r--r--storage/innodb_plugin/include/ut0sort.h106
-rw-r--r--storage/innodb_plugin/include/ut0ut.h403
-rw-r--r--storage/innodb_plugin/include/ut0ut.ic162
-rw-r--r--storage/innodb_plugin/include/ut0vec.h125
-rw-r--r--storage/innodb_plugin/include/ut0vec.ic96
-rw-r--r--storage/innodb_plugin/include/ut0wqueue.h85
-rw-r--r--storage/innodb_plugin/lock/lock0iter.c114
-rw-r--r--storage/innodb_plugin/lock/lock0lock.c5640
-rw-r--r--storage/innodb_plugin/log/log0log.c3467
-rw-r--r--storage/innodb_plugin/log/log0recv.c3764
-rw-r--r--storage/innodb_plugin/mach/mach0data.c134
-rw-r--r--storage/innodb_plugin/mem/mem0dbg.c1037
-rw-r--r--storage/innodb_plugin/mem/mem0mem.c555
-rw-r--r--storage/innodb_plugin/mem/mem0pool.c717
-rw-r--r--storage/innodb_plugin/mtr/mtr0log.c612
-rw-r--r--storage/innodb_plugin/mtr/mtr0mtr.c358
-rw-r--r--storage/innodb_plugin/os/os0file.c4476
-rw-r--r--storage/innodb_plugin/os/os0proc.c231
-rw-r--r--storage/innodb_plugin/os/os0sync.c725
-rw-r--r--storage/innodb_plugin/os/os0thread.c375
-rw-r--r--storage/innodb_plugin/page/page0cur.c1987
-rw-r--r--storage/innodb_plugin/page/page0page.c2608
-rw-r--r--storage/innodb_plugin/pars/lexyy.c2793
-rwxr-xr-xstorage/innodb_plugin/pars/make_bison.sh32
-rwxr-xr-xstorage/innodb_plugin/pars/make_flex.sh48
-rw-r--r--storage/innodb_plugin/pars/pars0grm.c2601
-rw-r--r--storage/innodb_plugin/pars/pars0grm.y635
-rw-r--r--storage/innodb_plugin/pars/pars0lex.l676
-rw-r--r--storage/innodb_plugin/pars/pars0opt.c1216
-rw-r--r--storage/innodb_plugin/pars/pars0pars.c2196
-rw-r--r--storage/innodb_plugin/pars/pars0sym.c371
-rw-r--r--storage/innodb_plugin/plug.in224
-rw-r--r--storage/innodb_plugin/que/que0que.c1436
-rw-r--r--storage/innodb_plugin/read/read0read.c540
-rw-r--r--storage/innodb_plugin/rem/rem0cmp.c1194
-rw-r--r--storage/innodb_plugin/rem/rem0rec.c1720
-rw-r--r--storage/innodb_plugin/row/row0ins.c2508
-rw-r--r--storage/innodb_plugin/row/row0mysql.c4213
-rw-r--r--storage/innodb_plugin/row/row0purge.c689
-rw-r--r--storage/innodb_plugin/row/row0row.c1168
-rw-r--r--storage/innodb_plugin/row/row0sel.c4736
-rw-r--r--storage/innodb_plugin/row/row0uins.c350
-rw-r--r--storage/innodb_plugin/row/row0umod.c815
-rw-r--r--storage/innodb_plugin/row/row0undo.c377
-rw-r--r--storage/innodb_plugin/row/row0upd.c2177
-rw-r--r--storage/innodb_plugin/row/row0vers.c741
-rw-r--r--storage/innodb_plugin/srv/srv0que.c49
-rw-r--r--storage/innodb_plugin/srv/srv0srv.c2758
-rw-r--r--storage/innodb_plugin/srv/srv0start.c2071
-rw-r--r--storage/innodb_plugin/sync/sync0arr.c1022
-rw-r--r--storage/innodb_plugin/sync/sync0rw.c1042
-rw-r--r--storage/innodb_plugin/sync/sync0sync.c1441
-rw-r--r--storage/innodb_plugin/thr/thr0loc.c279
-rw-r--r--storage/innodb_plugin/trx/trx0purge.c1211
-rw-r--r--storage/innodb_plugin/trx/trx0rec.c1601
-rw-r--r--storage/innodb_plugin/trx/trx0roll.c1366
-rw-r--r--storage/innodb_plugin/trx/trx0rseg.c324
-rw-r--r--storage/innodb_plugin/trx/trx0sys.c1613
-rw-r--r--storage/innodb_plugin/trx/trx0trx.c2063
-rw-r--r--storage/innodb_plugin/trx/trx0undo.c1993
-rw-r--r--storage/innodb_plugin/usr/usr0sess.c71
-rw-r--r--storage/innodb_plugin/ut/ut0byte.c55
-rw-r--r--storage/innodb_plugin/ut/ut0dbg.c187
-rw-r--r--storage/innodb_plugin/ut/ut0list.c194
-rw-r--r--storage/innodb_plugin/ut/ut0mem.c708
-rw-r--r--storage/innodb_plugin/ut/ut0rnd.c97
-rw-r--r--storage/innodb_plugin/ut/ut0ut.c625
-rw-r--r--storage/innodb_plugin/ut/ut0vec.c79
-rw-r--r--storage/innodb_plugin/ut/ut0wqueue.c118
-rw-r--r--storage/myisam/Makefile.am21
-rw-r--r--storage/myisam/ha_myisam.cc23
-rw-r--r--storage/myisammrg/Makefile.am21
-rw-r--r--storage/myisammrg/ha_myisammrg.cc23
-rw-r--r--storage/ndb/include/portlib/prefetch.h4
-rw-r--r--storage/ndb/src/common/util/version.c2
688 files changed, 63727 insertions, 228319 deletions
diff --git a/storage/archive/CMakeLists.txt b/storage/archive/CMakeLists.txt
index f4492c3ce77..ce4d92d3f99 100755..100644
--- a/storage/archive/CMakeLists.txt
+++ b/storage/archive/CMakeLists.txt
@@ -13,9 +13,6 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-
INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
SET(ARCHIVE_SOURCES azio.c ha_archive.cc ha_archive.h)
MYSQL_STORAGE_ENGINE(ARCHIVE)
diff --git a/storage/archive/Makefile.am b/storage/archive/Makefile.am
index d092f091798..319ed467966 100644
--- a/storage/archive/Makefile.am
+++ b/storage/archive/Makefile.am
@@ -66,5 +66,26 @@ archive_reader_LDFLAGS = @NOINST_LDFLAGS@
EXTRA_DIST = CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libarchive_a_LIBADD = probes_mysql.o
+libarchive_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = libarchive_a-ha_archive.o
+DTRACEPROVIDER = probes_mysql.d
+
+dtrace_files:
+ echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+ echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+ -$(RM) -f probes_mysql.d
+ $(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+ echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+ $(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 4648ca798da..2e812ffa6a5 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -18,6 +18,7 @@
#endif
#include "mysql_priv.h"
+#include "probes_mysql.h"
#include <myisam.h>
#include "ha_archive.h"
@@ -931,7 +932,9 @@ int ha_archive::index_read(uchar *buf, const uchar *key,
{
int rc;
DBUG_ENTER("ha_archive::index_read");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
rc= index_read_idx(buf, active_index, key, key_len, find_flag);
+ MYSQL_INDEX_READ_ROW_DONE(rc);
DBUG_RETURN(rc);
}
@@ -974,8 +977,10 @@ error:
int ha_archive::index_next(uchar * buf)
{
bool found= 0;
+ int rc;
DBUG_ENTER("ha_archive::index_next");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
while (!(get_row(&archive, buf)))
{
@@ -986,7 +991,9 @@ int ha_archive::index_next(uchar * buf)
}
}
- DBUG_RETURN(found ? 0 : HA_ERR_END_OF_FILE);
+ rc= found ? 0 : HA_ERR_END_OF_FILE;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
/*
@@ -1213,12 +1220,17 @@ int ha_archive::rnd_next(uchar *buf)
{
int rc;
DBUG_ENTER("ha_archive::rnd_next");
+ MYSQL_READ_ROW_START(table_share->db.str,
+ table_share->table_name.str, TRUE);
if (share->crashed)
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
if (!scan_rows)
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ {
+ rc= HA_ERR_END_OF_FILE;
+ goto end;
+ }
scan_rows--;
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
@@ -1227,6 +1239,8 @@ int ha_archive::rnd_next(uchar *buf)
table->status=rc ? STATUS_NOT_FOUND: 0;
+end:
+ MYSQL_READ_ROW_DONE(rc);
DBUG_RETURN(rc);
}
@@ -1254,12 +1268,21 @@ void ha_archive::position(const uchar *record)
int ha_archive::rnd_pos(uchar * buf, uchar *pos)
{
+ int rc;
DBUG_ENTER("ha_archive::rnd_pos");
+ MYSQL_READ_ROW_START(table_share->db.str,
+ table_share->table_name.str, FALSE);
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
current_position= (my_off_t)my_get_ptr(pos, ref_length);
if (azseek(&archive, current_position, SEEK_SET) == (my_off_t)(-1L))
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
- DBUG_RETURN(get_row(&archive, buf));
+ {
+ rc= HA_ERR_CRASHED_ON_USAGE;
+ goto end;
+ }
+ rc= get_row(&archive, buf);
+end:
+ MYSQL_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
/*
diff --git a/storage/blackhole/Makefile.am b/storage/blackhole/Makefile.am
index db4f67cf847..538dc5999f0 100644
--- a/storage/blackhole/Makefile.am
+++ b/storage/blackhole/Makefile.am
@@ -48,5 +48,26 @@ libblackhole_a_SOURCES= ha_blackhole.cc
EXTRA_DIST = CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libblackhole_a_LIBADD = probes_mysql.o
+libblackhole_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = libblackhole_a-ha_blackhole.o
+DTRACEPROVIDER = probes_mysql.d
+
+dtrace_files:
+ echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+ echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+ -$(RM) -f probes_mysql.d
+ $(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+ echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+ $(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/blackhole/ha_blackhole.cc b/storage/blackhole/ha_blackhole.cc
index e3ba111043b..eaa17a41ac5 100644
--- a/storage/blackhole/ha_blackhole.cc
+++ b/storage/blackhole/ha_blackhole.cc
@@ -20,6 +20,7 @@
#define MYSQL_SERVER 1
#include "mysql_priv.h"
+#include "probes_mysql.h"
#include "ha_blackhole.h"
/* Static declarations for handlerton */
@@ -128,18 +129,27 @@ int ha_blackhole::rnd_init(bool scan)
int ha_blackhole::rnd_next(uchar *buf)
{
+ int rc;
DBUG_ENTER("ha_blackhole::rnd_next");
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
THD *thd= ha_thd();
if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query() == NULL)
- DBUG_RETURN(0);
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ rc= 0;
+ else
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
int ha_blackhole::rnd_pos(uchar * buf, uchar *pos)
{
DBUG_ENTER("ha_blackhole::rnd_pos");
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ FALSE);
DBUG_ASSERT(0);
+ MYSQL_READ_ROW_DONE(0);
DBUG_RETURN(0);
}
@@ -210,11 +220,16 @@ int ha_blackhole::index_read_map(uchar * buf, const uchar * key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
+ int rc;
DBUG_ENTER("ha_blackhole::index_read");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
THD *thd= ha_thd();
if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query() == NULL)
- DBUG_RETURN(0);
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ rc= 0;
+ else
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
@@ -222,50 +237,77 @@ int ha_blackhole::index_read_idx_map(uchar * buf, uint idx, const uchar * key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
+ int rc;
DBUG_ENTER("ha_blackhole::index_read_idx");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
THD *thd= ha_thd();
if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query() == NULL)
- DBUG_RETURN(0);
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ rc= 0;
+ else
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
int ha_blackhole::index_read_last_map(uchar * buf, const uchar * key,
key_part_map keypart_map)
{
+ int rc;
DBUG_ENTER("ha_blackhole::index_read_last");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
THD *thd= ha_thd();
if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query() == NULL)
- DBUG_RETURN(0);
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ rc= 0;
+ else
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
int ha_blackhole::index_next(uchar * buf)
{
+ int rc;
DBUG_ENTER("ha_blackhole::index_next");
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
int ha_blackhole::index_prev(uchar * buf)
{
+ int rc;
DBUG_ENTER("ha_blackhole::index_prev");
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
int ha_blackhole::index_first(uchar * buf)
{
+ int rc;
DBUG_ENTER("ha_blackhole::index_first");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
DBUG_RETURN(HA_ERR_END_OF_FILE);
}
int ha_blackhole::index_last(uchar * buf)
{
+ int rc;
DBUG_ENTER("ha_blackhole::index_last");
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
diff --git a/storage/csv/Makefile.am b/storage/csv/Makefile.am
index 07ffac88a96..96fc26bed07 100644
--- a/storage/csv/Makefile.am
+++ b/storage/csv/Makefile.am
@@ -41,5 +41,27 @@ libcsv_a_CXXFLAGS = $(AM_CFLAGS)
libcsv_a_SOURCES = transparent_file.cc ha_tina.cc
EXTRA_DIST = CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libcsv_a_LIBADD = probes_mysql.o
+libcsv_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = libcsv_a-ha_tina.o
+DTRACEPROVIDER = probes_mysql.d
+CLEANFILES += $(DTRACEPROVIDER) dtrace_sources
+
+dtrace_files:
+ echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+ echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+ -$(RM) -f probes_mysql.d
+ $(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+ echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+ $(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc
index ca9d5215310..07c4785ab90 100644
--- a/storage/csv/ha_tina.cc
+++ b/storage/csv/ha_tina.cc
@@ -48,6 +48,7 @@ TODO:
#include "mysql_priv.h"
#include <mysql/plugin.h>
#include "ha_tina.h"
+#include "probes_mysql.h"
/*
@@ -1095,9 +1096,14 @@ int ha_tina::rnd_next(uchar *buf)
{
int rc;
DBUG_ENTER("ha_tina::rnd_next");
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
if (share->crashed)
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
+ {
+ rc= HA_ERR_CRASHED_ON_USAGE;
+ goto end;
+ }
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
@@ -1105,13 +1111,19 @@ int ha_tina::rnd_next(uchar *buf)
/* don't scan an empty file */
if (!local_saved_data_file_length)
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ {
+ rc= HA_ERR_END_OF_FILE;
+ goto end;
+ }
if ((rc= find_current_row(buf)))
- DBUG_RETURN(rc);
+ goto end;
stats.records++;
- DBUG_RETURN(0);
+ rc= 0;
+end:
+ MYSQL_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
/*
@@ -1138,10 +1150,15 @@ void ha_tina::position(const uchar *record)
int ha_tina::rnd_pos(uchar * buf, uchar *pos)
{
+ int rc;
DBUG_ENTER("ha_tina::rnd_pos");
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ FALSE);
ha_statistic_increment(&SSV::ha_read_rnd_count);
current_position= my_get_ptr(pos,ref_length);
- DBUG_RETURN(find_current_row(buf));
+ rc= find_current_row(buf);
+ MYSQL_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
/*
diff --git a/storage/example/Makefile.am b/storage/example/Makefile.am
index 4b2f165377c..ce269aee59b 100644
--- a/storage/example/Makefile.am
+++ b/storage/example/Makefile.am
@@ -48,5 +48,27 @@ libexample_a_SOURCES= ha_example.cc
EXTRA_DIST = CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libexample_a_LIBADD = probes_mysql.o
+libexample_a_DEPENDENCIES = probes_mysql.o
+CLEANFILES =
+BUILT_SOURCES =
+DTRACEFILES = libexample_a-ha_example.o
+DTRACEPROVIDER = probes_mysql.d
+
+dtrace_files:
+ echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+ echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+ -$(RM) -f probes_mysql.d
+ $(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+ echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+ $(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/example/ha_example.cc b/storage/example/ha_example.cc
index 604722c3c8c..30fc82c82d2 100644
--- a/storage/example/ha_example.cc
+++ b/storage/example/ha_example.cc
@@ -94,6 +94,7 @@
#define MYSQL_SERVER 1
#include "mysql_priv.h"
#include "ha_example.h"
+#include "probes_mysql.h"
#include <mysql/plugin.h>
static handler *example_create_handler(handlerton *hton,
@@ -428,8 +429,12 @@ int ha_example::index_read_map(uchar *buf, const uchar *key,
enum ha_rkey_function find_flag
__attribute__((unused)))
{
+ int rc;
DBUG_ENTER("ha_example::index_read");
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_WRONG_COMMAND;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
@@ -440,8 +445,12 @@ int ha_example::index_read_map(uchar *buf, const uchar *key,
int ha_example::index_next(uchar *buf)
{
+ int rc;
DBUG_ENTER("ha_example::index_next");
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_WRONG_COMMAND;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
@@ -452,8 +461,12 @@ int ha_example::index_next(uchar *buf)
int ha_example::index_prev(uchar *buf)
{
+ int rc;
DBUG_ENTER("ha_example::index_prev");
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_WRONG_COMMAND;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
@@ -469,8 +482,12 @@ int ha_example::index_prev(uchar *buf)
*/
int ha_example::index_first(uchar *buf)
{
+ int rc;
DBUG_ENTER("ha_example::index_first");
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_WRONG_COMMAND;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
@@ -486,8 +503,12 @@ int ha_example::index_first(uchar *buf)
*/
int ha_example::index_last(uchar *buf)
{
+ int rc;
DBUG_ENTER("ha_example::index_last");
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ rc= HA_ERR_WRONG_COMMAND;
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
@@ -533,8 +554,13 @@ int ha_example::rnd_end()
*/
int ha_example::rnd_next(uchar *buf)
{
+ int rc;
DBUG_ENTER("ha_example::rnd_next");
- DBUG_RETURN(HA_ERR_END_OF_FILE);
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
+ rc= HA_ERR_END_OF_FILE;
+ MYSQL_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
@@ -581,8 +607,13 @@ void ha_example::position(const uchar *record)
*/
int ha_example::rnd_pos(uchar *buf, uchar *pos)
{
+ int rc;
DBUG_ENTER("ha_example::rnd_pos");
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
+ rc= HA_ERR_WRONG_COMMAND;
+ MYSQL_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
diff --git a/storage/federated/CMakeLists.txt b/storage/federated/CMakeLists.txt
index d371c0fc02b..fa54d36481a 100755..100644
--- a/storage/federated/CMakeLists.txt
+++ b/storage/federated/CMakeLists.txt
@@ -1,21 +1,18 @@
# Copyright (C) 2006 MySQL AB
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-
INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
SET(FEDERATED_SOURCES ha_federated.cc)
MYSQL_STORAGE_ENGINE(FEDERATED)
diff --git a/storage/federated/Makefile.am b/storage/federated/Makefile.am
index 64ea0207017..58e08b1462a 100644
--- a/storage/federated/Makefile.am
+++ b/storage/federated/Makefile.am
@@ -48,5 +48,26 @@ libfederated_a_SOURCES= ha_federated.cc
EXTRA_DIST = CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libfederated_a_LIBADD = probes_mysql.o
+libfederated_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = libfederated_a-ha_federated.o
+DTRACEPROVIDER = probes_mysql.d
+
+dtrace_files:
+ echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+ echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+ -$(RM) -f probes_mysql.d
+ $(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+ echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+ $(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index 8deb8693960..a3fa42d7d05 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -380,6 +380,7 @@
#endif
#include "ha_federated.h"
+#include "probes_mysql.h"
#include "m_string.h"
@@ -2324,13 +2325,17 @@ int ha_federated::delete_row(const uchar *buf)
int ha_federated::index_read(uchar *buf, const uchar *key,
uint key_len, ha_rkey_function find_flag)
{
+ int rc;
DBUG_ENTER("ha_federated::index_read");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
if (stored_result)
mysql_free_result(stored_result);
- DBUG_RETURN(index_read_idx_with_result_set(buf, active_index, key,
- key_len, find_flag,
- &stored_result));
+ rc= index_read_idx_with_result_set(buf, active_index, key,
+ key_len, find_flag,
+ &stored_result);
+ MYSQL_INDEX_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
}
@@ -2478,6 +2483,7 @@ int ha_federated::read_range_first(const key_range *start_key,
sizeof(sql_query_buffer),
&my_charset_bin);
DBUG_ENTER("ha_federated::read_range_first");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(!(start_key == NULL && end_key == NULL));
@@ -2506,10 +2512,12 @@ int ha_federated::read_range_first(const key_range *start_key,
}
retval= read_next(table->record[0], stored_result);
+ MYSQL_INDEX_READ_ROW_DONE(retval);
DBUG_RETURN(retval);
error:
table->status= STATUS_NOT_FOUND;
+ MYSQL_INDEX_READ_ROW_DONE(retval);
DBUG_RETURN(retval);
}
@@ -2518,7 +2526,9 @@ int ha_federated::read_range_next()
{
int retval;
DBUG_ENTER("ha_federated::read_range_next");
- retval= rnd_next(table->record[0]);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+ retval= rnd_next_int(table->record[0]);
+ MYSQL_INDEX_READ_ROW_DONE(retval);
DBUG_RETURN(retval);
}
@@ -2526,9 +2536,13 @@ int ha_federated::read_range_next()
/* Used to read forward through the index. */
int ha_federated::index_next(uchar *buf)
{
+ int retval;
DBUG_ENTER("ha_federated::index_next");
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_next_count);
- DBUG_RETURN(read_next(buf, stored_result));
+ retval= read_next(buf, stored_result);
+ MYSQL_INDEX_READ_ROW_DONE(retval);
+ DBUG_RETURN(retval);
}
@@ -2637,7 +2651,18 @@ int ha_federated::index_end(void)
int ha_federated::rnd_next(uchar *buf)
{
+ int rc;
DBUG_ENTER("ha_federated::rnd_next");
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
+ rc= rnd_next_int(buf);
+ MYSQL_READ_ROW_DONE(rc);
+ DBUG_RETURN(rc);
+}
+
+int ha_federated::rnd_next_int(uchar *buf)
+{
+ DBUG_ENTER("ha_federated::rnd_next_int");
if (stored_result == 0)
{
@@ -2726,6 +2751,8 @@ int ha_federated::rnd_pos(uchar *buf, uchar *pos)
{
int result;
DBUG_ENTER("ha_federated::rnd_pos");
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ FALSE);
ha_statistic_increment(&SSV::ha_read_rnd_count);
if (table->s->primary_key != MAX_KEY)
{
@@ -2740,6 +2767,7 @@ int ha_federated::rnd_pos(uchar *buf, uchar *pos)
result= 0;
}
table->status= result ? STATUS_NOT_FOUND : 0;
+ MYSQL_READ_ROW_DONE(result);
DBUG_RETURN(result);
}
diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h
index 2f1c62decca..ea0a3d2f591 100644
--- a/storage/federated/ha_federated.h
+++ b/storage/federated/ha_federated.h
@@ -232,6 +232,7 @@ public:
int rnd_init(bool scan); //required
int rnd_end();
int rnd_next(uchar *buf); //required
+ int rnd_next_int(uchar *buf);
int rnd_pos(uchar *buf, uchar *pos); //required
void position(const uchar *record); //required
int info(uint); //required
diff --git a/storage/heap/Makefile.am b/storage/heap/Makefile.am
index ec1445dea67..c30fd1590f0 100644
--- a/storage/heap/Makefile.am
+++ b/storage/heap/Makefile.am
@@ -51,5 +51,26 @@ libheap_a_SOURCES = hp_open.c hp_extra.c hp_close.c hp_panic.c hp_info.c \
EXTRA_DIST = CMakeLists.txt plug.in
+if HAVE_DTRACE_DASH_G
+libheap_a_LIBADD = probes_mysql.o
+libheap_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = ha_heap.o
+DTRACEPROVIDER = probes_mysql.d
+CLEANFILES += $(DTRACEPROVIDER) dtrace_sources
+
+dtrace_files:
+ echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+ echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+ -$(RM) -f probes_mysql.d
+ $(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+ echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+ $(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index fb7c13e4e41..99c1ca5d2e9 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -20,6 +20,7 @@
#define MYSQL_SERVER 1
#include "mysql_priv.h"
+#include "probes_mysql.h"
#include <mysql/plugin.h>
#include "ha_heap.h"
#include "heapdef.h"
@@ -274,21 +275,25 @@ int ha_heap::index_read_map(uchar *buf, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_key_count);
int error = heap_rkey(file,buf,active_index, key, keypart_map, find_flag);
table->status = error ? STATUS_NOT_FOUND : 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_heap::index_read_last_map(uchar *buf, const uchar *key,
key_part_map keypart_map)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_key_count);
int error= heap_rkey(file, buf, active_index, key, keypart_map,
HA_READ_PREFIX_LAST);
table->status= error ? STATUS_NOT_FOUND : 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -296,45 +301,55 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_key_count);
int error = heap_rkey(file, buf, index, key, keypart_map, find_flag);
table->status = error ? STATUS_NOT_FOUND : 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_heap::index_next(uchar * buf)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_next_count);
int error=heap_rnext(file,buf);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_heap::index_prev(uchar * buf)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_prev_count);
int error=heap_rprev(file,buf);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_heap::index_first(uchar * buf)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_first_count);
int error=heap_rfirst(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_heap::index_last(uchar * buf)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_last_count);
int error=heap_rlast(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -345,9 +360,12 @@ int ha_heap::rnd_init(bool scan)
int ha_heap::rnd_next(uchar *buf)
{
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error=heap_scan(file, buf);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_READ_ROW_DONE(error);
return error;
}
@@ -355,10 +373,13 @@ int ha_heap::rnd_pos(uchar * buf, uchar *pos)
{
int error;
HEAP_PTR heap_position;
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ FALSE);
ha_statistic_increment(&SSV::ha_read_rnd_count);
memcpy_fixed((char*) &heap_position, pos, sizeof(HEAP_PTR));
error=heap_rrnd(file, buf, heap_position);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_READ_ROW_DONE(error);
return error;
}
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 5918db7ab11..d67b518642c 100755..100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -1,5 +1,5 @@
-# Copyright (C) 2006 MySQL AB
-#
+# Copyright (C) 2009 Oracle/Innobase Oy
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
@@ -11,57 +11,92 @@
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# This is the CMakeLists for InnoDB Plugin
+
+# TODO: remove the two FLAGS_DEBUG settings when merging into
+# 6.0-based trees, like is already the case for other engines in
+# those trees.
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
-ADD_DEFINITIONS(-DMYSQL_SERVER -D_WIN32 -D_LIB)
-# Bug 19424 - InnoDB: Possibly a memory overrun of the buffer being freed (64-bit Visual C)
-# Removing Win64 compiler optimizations for all innodb/mem/* files.
-IF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8)
- SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0mem.c
- ${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0pool.c
- PROPERTIES COMPILE_FLAGS -Od)
-ENDIF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8)
+# Starting at 5.1.38, MySQL CMake files are simplified. But the plugin
+# CMakeLists.txt still needs to work with previous versions of MySQL.
+IF (MYSQL_VERSION_ID GREATER "50137")
+ INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
+ENDIF (MYSQL_VERSION_ID GREATER "50137")
+
+IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
+ SET(WIN64 TRUE)
+ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
+
+# Include directories under innobase
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
+ ${CMAKE_SOURCE_DIR}/storage/innobase/handler)
-INCLUDE_DIRECTORIES(
- ${CMAKE_SOURCE_DIR}/storage/innobase/include
- ${CMAKE_SOURCE_DIR}/storage/innobase/handler
- )
+# Include directories under mysql
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
+ ${CMAKE_SOURCE_DIR}/sql
+ ${CMAKE_SOURCE_DIR}/regex
+ ${CMAKE_SOURCE_DIR}/zlib
+ ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
- buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
- data/data0data.c data/data0type.c
- dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c
- dyn/dyn0dyn.c
- eval/eval0eval.c eval/eval0proc.c
- fil/fil0fil.c
- fsp/fsp0fsp.c
- fut/fut0fut.c fut/fut0lst.c
- ha/ha0ha.c ha/hash0hash.c
- ibuf/ibuf0ibuf.c
- pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
- lock/lock0lock.c
- log/log0log.c log/log0recv.c
- mach/mach0data.c
- mem/mem0mem.c mem/mem0pool.c
- mtr/mtr0log.c mtr/mtr0mtr.c
- os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
- page/page0cur.c page/page0page.c
- que/que0que.c
- handler/ha_innodb.cc
- read/read0read.c
- rem/rem0cmp.c rem/rem0rec.c
- row/row0ins.c row/row0mysql.c row/row0purge.c row/row0row.c row/row0sel.c row/row0uins.c
- row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
- srv/srv0que.c srv/srv0srv.c srv/srv0start.c
- sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
- thr/thr0loc.c
- trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
- usr/usr0sess.c
- ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c)
+# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
+# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
+IF (MSVC AND $(WIN64))
+ SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.c mem/mem0pool.c
+ PROPERTIES COMPILE_FLAGS -Od)
+ENDIF (MSVC AND $(WIN64))
-MYSQL_STORAGE_ENGINE(INNOBASE)
+SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
+ buf/buf0buddy.c buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
+ data/data0data.c data/data0type.c
+ dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c
+ dyn/dyn0dyn.c
+ eval/eval0eval.c eval/eval0proc.c
+ fil/fil0fil.c
+ fsp/fsp0fsp.c
+ fut/fut0fut.c fut/fut0lst.c
+ ha/ha0ha.c ha/hash0hash.c ha/ha0storage.c
+ ibuf/ibuf0ibuf.c
+ pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
+ lock/lock0lock.c lock/lock0iter.c
+ log/log0log.c log/log0recv.c
+ mach/mach0data.c
+ mem/mem0mem.c mem/mem0pool.c
+ mtr/mtr0log.c mtr/mtr0mtr.c
+ os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
+ page/page0cur.c page/page0page.c page/page0zip.c
+ que/que0que.c
+ handler/ha_innodb.cc handler/handler0alter.cc handler/i_s.cc handler/mysql_addons.cc
+ read/read0read.c
+ rem/rem0cmp.c rem/rem0rec.c
+ row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c row/row0purge.c row/row0row.c
+ row/row0sel.c row/row0uins.c row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
+ srv/srv0que.c srv/srv0srv.c srv/srv0start.c
+ sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
+ thr/thr0loc.c
+ trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
+ trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
+ usr/usr0sess.c
+ ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
+ ut/ut0list.c ut/ut0wqueue.c)
+ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DIB_HAVE_PAUSE_INSTRUCTION)
+IF (MYSQL_VERSION_ID GREATER "50137")
+ MYSQL_STORAGE_ENGINE(INNOBASE)
+ # Use ha_innodb for plugin name, if plugin is built
+ GET_TARGET_PROPERTY(LIB_LOCATION ha_innobase LOCATION)
+ IF(LIB_LOCATION)
+ SET_TARGET_PROPERTIES(ha_innobase PROPERTIES OUTPUT_NAME ha_innodb)
+ ENDIF(LIB_LOCATION)
+ELSE (MYSQL_VERSION_ID GREATER "50137")
+ IF (NOT SOURCE_SUBLIBS)
+ ADD_DEFINITIONS(-D_WIN32 -DMYSQL_SERVER)
+ ADD_LIBRARY(innobase STATIC ${INNOBASE_SOURCES})
+ # Require mysqld_error.h, which is built as part of the GenError
+ ADD_DEPENDENCIES(innobase GenError)
+ ENDIF (NOT SOURCE_SUBLIBS)
+ENDIF (MYSQL_VERSION_ID GREATER "50137")
diff --git a/storage/innodb_plugin/COPYING b/storage/innobase/COPYING
index 6b106e18fdb..6b106e18fdb 100644
--- a/storage/innodb_plugin/COPYING
+++ b/storage/innobase/COPYING
diff --git a/storage/innodb_plugin/COPYING.Google b/storage/innobase/COPYING.Google
index 5ade2b0e381..5ade2b0e381 100644
--- a/storage/innodb_plugin/COPYING.Google
+++ b/storage/innobase/COPYING.Google
diff --git a/storage/innodb_plugin/COPYING.Percona b/storage/innobase/COPYING.Percona
index 8c786811719..8c786811719 100644
--- a/storage/innodb_plugin/COPYING.Percona
+++ b/storage/innobase/COPYING.Percona
diff --git a/storage/innodb_plugin/COPYING.Sun_Microsystems b/storage/innobase/COPYING.Sun_Microsystems
index 5a77ef3ab73..5a77ef3ab73 100644
--- a/storage/innodb_plugin/COPYING.Sun_Microsystems
+++ b/storage/innobase/COPYING.Sun_Microsystems
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innobase/ChangeLog
index 1a6e07fd147..1a6e07fd147 100644
--- a/storage/innodb_plugin/ChangeLog
+++ b/storage/innobase/ChangeLog
diff --git a/storage/innodb_plugin/Doxyfile b/storage/innobase/Doxyfile
index 62aa7dd8abc..62aa7dd8abc 100644
--- a/storage/innodb_plugin/Doxyfile
+++ b/storage/innobase/Doxyfile
diff --git a/storage/innobase/Makefile.am b/storage/innobase/Makefile.am
index a597e3c24e4..6051fe9ef96 100644
--- a/storage/innobase/Makefile.am
+++ b/storage/innobase/Makefile.am
@@ -22,139 +22,306 @@ MYSQLLIBdir= $(pkglibdir)
pkgplugindir= $(pkglibdir)/plugin
INCLUDES= -I$(top_srcdir)/include -I$(top_builddir)/include \
-I$(top_srcdir)/regex \
- -I$(top_srcdir)/storage/innobase/include \
+ -I$(srcdir)/include \
-I$(top_srcdir)/sql \
- -I$(srcdir)
+ -I$(srcdir) @ZLIB_INCLUDES@
DEFS= @DEFS@
-noinst_HEADERS= include/btr0btr.h include/btr0btr.ic \
- include/btr0cur.h include/btr0cur.ic \
- include/btr0pcur.h include/btr0pcur.ic \
- include/btr0sea.h include/btr0sea.ic \
- include/btr0types.h include/buf0buf.h \
- include/buf0buf.ic include/buf0flu.h \
- include/buf0flu.ic include/buf0lru.h \
- include/buf0lru.ic include/buf0rea.h \
- include/buf0types.h include/data0data.h \
- include/data0data.ic include/data0type.h \
- include/data0type.ic include/data0types.h \
- include/db0err.h include/dict0boot.h \
- include/dict0boot.ic include/dict0crea.h \
- include/dict0crea.ic include/dict0dict.h \
- include/dict0dict.ic include/dict0load.h \
- include/dict0load.ic include/dict0mem.h \
- include/dict0mem.ic include/dict0types.h \
- include/dyn0dyn.h include/dyn0dyn.ic \
- include/eval0eval.h include/eval0eval.ic \
- include/eval0proc.h include/eval0proc.ic \
- include/fil0fil.h include/fsp0fsp.h \
- include/fsp0fsp.ic include/fsp0types.h \
- include/fut0fut.h \
- include/fut0fut.ic include/fut0lst.h \
- include/fut0lst.ic include/ha0ha.h \
- include/ha0ha.ic include/hash0hash.h \
- include/hash0hash.ic include/ibuf0ibuf.h \
- include/ibuf0ibuf.ic include/ibuf0types.h \
- include/lock0iter.h \
- include/lock0lock.h include/lock0lock.ic \
- include/lock0priv.h include/lock0priv.ic \
- include/lock0types.h include/log0log.h \
- include/log0log.ic include/log0recv.h \
- include/log0recv.ic include/mach0data.h \
- include/mach0data.ic include/mem0dbg.h \
- include/mem0dbg.ic mem/mem0dbg.c \
- include/mem0mem.h include/mem0mem.ic \
- include/mem0pool.h include/mem0pool.ic \
- include/mtr0log.h include/mtr0log.ic \
- include/mtr0mtr.h include/mtr0mtr.ic \
- include/mtr0types.h include/os0file.h \
- include/os0proc.h include/os0proc.ic \
- include/os0sync.h include/os0sync.ic \
- include/os0thread.h include/os0thread.ic \
- include/page0cur.h include/page0cur.ic \
- include/page0page.h include/page0page.ic \
- include/page0types.h include/pars0grm.h \
- include/pars0opt.h include/pars0opt.ic \
- include/pars0pars.h include/pars0pars.ic \
- include/pars0sym.h include/pars0sym.ic \
- include/pars0types.h include/que0que.h \
- include/que0que.ic include/que0types.h \
- include/read0read.h include/read0read.ic \
- include/read0types.h include/rem0cmp.h \
- include/rem0cmp.ic include/rem0rec.h \
- include/rem0rec.ic include/rem0types.h \
- include/row0ins.h include/row0ins.ic \
- include/row0mysql.h include/row0mysql.ic \
- include/row0purge.h include/row0purge.ic \
- include/row0row.h include/row0row.ic \
- include/row0sel.h include/row0sel.ic \
- include/row0types.h include/row0uins.h \
- include/row0uins.ic include/row0umod.h \
- include/row0umod.ic include/row0undo.h \
- include/row0undo.ic include/row0upd.h \
- include/row0upd.ic include/row0vers.h \
- include/row0vers.ic include/srv0que.h \
- include/srv0srv.h include/srv0srv.ic \
- include/srv0start.h include/sync0arr.h \
- include/sync0arr.ic include/sync0rw.h \
- include/sync0rw.ic include/sync0sync.h \
- include/sync0sync.ic include/sync0types.h \
- include/thr0loc.h include/thr0loc.ic \
- include/trx0purge.h include/trx0purge.ic \
- include/trx0rec.h include/trx0rec.ic \
- include/trx0roll.h include/trx0roll.ic \
- include/trx0rseg.h include/trx0rseg.ic \
- include/trx0sys.h include/trx0sys.ic \
- include/trx0trx.h include/trx0trx.ic \
- include/trx0types.h include/trx0undo.h \
- include/trx0undo.ic include/trx0xa.h \
- include/univ.i include/usr0sess.h \
- include/usr0sess.ic include/usr0types.h \
- include/ut0byte.h include/ut0byte.ic \
- include/ut0dbg.h include/ut0lst.h \
- include/ut0mem.h include/ut0mem.ic \
- include/ut0rnd.h include/ut0rnd.ic \
- include/ut0sort.h include/ut0ut.h \
- include/ut0ut.ic include/ut0vec.h \
- include/ut0vec.ic include/ut0list.h \
- include/ut0list.ic include/ut0wqueue.h \
- include/ha_prototypes.h handler/ha_innodb.h
+noinst_HEADERS= \
+ handler/ha_innodb.h \
+ handler/i_s.h \
+ include/btr0btr.h \
+ include/btr0btr.ic \
+ include/btr0cur.h \
+ include/btr0cur.ic \
+ include/btr0pcur.h \
+ include/btr0pcur.ic \
+ include/btr0sea.h \
+ include/btr0sea.ic \
+ include/btr0types.h \
+ include/buf0buddy.h \
+ include/buf0buddy.ic \
+ include/buf0buf.h \
+ include/buf0buf.ic \
+ include/buf0flu.h \
+ include/buf0flu.ic \
+ include/buf0lru.h \
+ include/buf0lru.ic \
+ include/buf0rea.h \
+ include/buf0types.h \
+ include/data0data.h \
+ include/data0data.ic \
+ include/data0type.h \
+ include/data0type.ic \
+ include/data0types.h \
+ include/db0err.h \
+ include/dict0boot.h \
+ include/dict0boot.ic \
+ include/dict0crea.h \
+ include/dict0crea.ic \
+ include/dict0dict.h \
+ include/dict0dict.ic \
+ include/dict0load.h \
+ include/dict0load.ic \
+ include/dict0mem.h \
+ include/dict0mem.ic \
+ include/dict0types.h \
+ include/dyn0dyn.h \
+ include/dyn0dyn.ic \
+ include/eval0eval.h \
+ include/eval0eval.ic \
+ include/eval0proc.h \
+ include/eval0proc.ic \
+ include/fil0fil.h \
+ include/fsp0fsp.h \
+ include/fsp0fsp.ic \
+ include/fsp0types.h \
+ include/fut0fut.h \
+ include/fut0fut.ic \
+ include/fut0lst.h \
+ include/fut0lst.ic \
+ include/ha0ha.h \
+ include/ha0ha.ic \
+ include/ha0storage.h \
+ include/ha0storage.ic \
+ include/ha_prototypes.h \
+ include/handler0alter.h \
+ include/hash0hash.h \
+ include/hash0hash.ic \
+ include/ibuf0ibuf.h \
+ include/ibuf0ibuf.ic \
+ include/ibuf0types.h \
+ include/lock0iter.h \
+ include/lock0lock.h \
+ include/lock0lock.ic \
+ include/lock0priv.h \
+ include/lock0priv.ic \
+ include/lock0types.h \
+ include/log0log.h \
+ include/log0log.ic \
+ include/log0recv.h \
+ include/log0recv.ic \
+ include/mach0data.h \
+ include/mach0data.ic \
+ include/mem0dbg.h \
+ include/mem0dbg.ic \
+ include/mem0mem.h \
+ include/mem0mem.ic \
+ include/mem0pool.h \
+ include/mem0pool.ic \
+ include/mtr0log.h \
+ include/mtr0log.ic \
+ include/mtr0mtr.h \
+ include/mtr0mtr.ic \
+ include/mtr0types.h \
+ include/mysql_addons.h \
+ include/os0file.h \
+ include/os0proc.h \
+ include/os0proc.ic \
+ include/os0sync.h \
+ include/os0sync.ic \
+ include/os0thread.h \
+ include/os0thread.ic \
+ include/page0cur.h \
+ include/page0cur.ic \
+ include/page0page.h \
+ include/page0page.ic \
+ include/page0types.h \
+ include/page0zip.h \
+ include/page0zip.ic \
+ include/pars0grm.h \
+ include/pars0opt.h \
+ include/pars0opt.ic \
+ include/pars0pars.h \
+ include/pars0pars.ic \
+ include/pars0sym.h \
+ include/pars0sym.ic \
+ include/pars0types.h \
+ include/que0que.h \
+ include/que0que.ic \
+ include/que0types.h \
+ include/read0read.h \
+ include/read0read.ic \
+ include/read0types.h \
+ include/rem0cmp.h \
+ include/rem0cmp.ic \
+ include/rem0rec.h \
+ include/rem0rec.ic \
+ include/rem0types.h \
+ include/row0ext.h \
+ include/row0ext.ic \
+ include/row0ins.h \
+ include/row0ins.ic \
+ include/row0merge.h \
+ include/row0mysql.h \
+ include/row0mysql.ic \
+ include/row0purge.h \
+ include/row0purge.ic \
+ include/row0row.h \
+ include/row0row.ic \
+ include/row0sel.h \
+ include/row0sel.ic \
+ include/row0types.h \
+ include/row0uins.h \
+ include/row0uins.ic \
+ include/row0umod.h \
+ include/row0umod.ic \
+ include/row0undo.h \
+ include/row0undo.ic \
+ include/row0upd.h \
+ include/row0upd.ic \
+ include/row0vers.h \
+ include/row0vers.ic \
+ include/srv0que.h \
+ include/srv0srv.h \
+ include/srv0srv.ic \
+ include/srv0start.h \
+ include/sync0arr.h \
+ include/sync0arr.ic \
+ include/sync0rw.h \
+ include/sync0rw.ic \
+ include/sync0sync.h \
+ include/sync0sync.ic \
+ include/sync0types.h \
+ include/thr0loc.h \
+ include/thr0loc.ic \
+ include/trx0i_s.h \
+ include/trx0purge.h \
+ include/trx0purge.ic \
+ include/trx0rec.h \
+ include/trx0rec.ic \
+ include/trx0roll.h \
+ include/trx0roll.ic \
+ include/trx0rseg.h \
+ include/trx0rseg.ic \
+ include/trx0sys.h \
+ include/trx0sys.ic \
+ include/trx0trx.h \
+ include/trx0trx.ic \
+ include/trx0types.h \
+ include/trx0undo.h \
+ include/trx0undo.ic \
+ include/trx0xa.h \
+ include/univ.i \
+ include/usr0sess.h \
+ include/usr0sess.ic \
+ include/usr0types.h \
+ include/ut0auxconf.h \
+ include/ut0byte.h \
+ include/ut0byte.ic \
+ include/ut0dbg.h \
+ include/ut0list.h \
+ include/ut0list.ic \
+ include/ut0lst.h \
+ include/ut0mem.h \
+ include/ut0mem.ic \
+ include/ut0rnd.h \
+ include/ut0rnd.ic \
+ include/ut0sort.h \
+ include/ut0ut.h \
+ include/ut0ut.ic \
+ include/ut0vec.h \
+ include/ut0vec.ic \
+ include/ut0wqueue.h \
+ mem/mem0dbg.c
EXTRA_LIBRARIES= libinnobase.a
noinst_LIBRARIES= @plugin_innobase_static_target@
-libinnobase_a_SOURCES= btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \
- btr/btr0sea.c buf/buf0buf.c buf/buf0flu.c \
- buf/buf0lru.c buf/buf0rea.c data/data0data.c \
- data/data0type.c dict/dict0boot.c \
- dict/dict0crea.c dict/dict0dict.c \
- dict/dict0load.c dict/dict0mem.c dyn/dyn0dyn.c \
- eval/eval0eval.c eval/eval0proc.c \
- fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c \
- fut/fut0lst.c ha/ha0ha.c ha/hash0hash.c \
- ibuf/ibuf0ibuf.c lock/lock0iter.c \
- lock/lock0lock.c \
- log/log0log.c log/log0recv.c mach/mach0data.c \
- mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c \
- mtr/mtr0mtr.c os/os0file.c os/os0proc.c \
- os/os0sync.c os/os0thread.c page/page0cur.c \
- page/page0page.c pars/lexyy.c pars/pars0grm.c \
- pars/pars0opt.c pars/pars0pars.c \
- pars/pars0sym.c que/que0que.c read/read0read.c \
- rem/rem0cmp.c rem/rem0rec.c row/row0ins.c \
- row/row0mysql.c row/row0purge.c row/row0row.c \
- row/row0sel.c row/row0uins.c row/row0umod.c \
- row/row0undo.c row/row0upd.c row/row0vers.c \
- srv/srv0que.c srv/srv0srv.c srv/srv0start.c \
- sync/sync0arr.c sync/sync0rw.c \
- sync/sync0sync.c thr/thr0loc.c trx/trx0purge.c \
- trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c \
- trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c \
- usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c \
- ut/ut0list.c ut/ut0mem.c ut/ut0rnd.c \
- ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c \
- handler/ha_innodb.cc
+libinnobase_a_SOURCES= \
+ btr/btr0btr.c \
+ btr/btr0cur.c \
+ btr/btr0pcur.c \
+ btr/btr0sea.c \
+ buf/buf0buddy.c \
+ buf/buf0buf.c \
+ buf/buf0flu.c \
+ buf/buf0lru.c \
+ buf/buf0rea.c \
+ data/data0data.c \
+ data/data0type.c \
+ dict/dict0boot.c \
+ dict/dict0crea.c \
+ dict/dict0dict.c \
+ dict/dict0load.c \
+ dict/dict0mem.c \
+ dyn/dyn0dyn.c \
+ eval/eval0eval.c \
+ eval/eval0proc.c \
+ fil/fil0fil.c \
+ fsp/fsp0fsp.c \
+ fut/fut0fut.c \
+ fut/fut0lst.c \
+ ha/ha0ha.c \
+ ha/ha0storage.c \
+ ha/hash0hash.c \
+ handler/ha_innodb.cc \
+ handler/handler0alter.cc \
+ handler/i_s.cc \
+ handler/mysql_addons.cc \
+ ibuf/ibuf0ibuf.c \
+ lock/lock0iter.c \
+ lock/lock0lock.c \
+ log/log0log.c \
+ log/log0recv.c \
+ mach/mach0data.c \
+ mem/mem0mem.c \
+ mem/mem0pool.c \
+ mtr/mtr0log.c \
+ mtr/mtr0mtr.c \
+ os/os0file.c \
+ os/os0proc.c \
+ os/os0sync.c \
+ os/os0thread.c \
+ page/page0cur.c \
+ page/page0page.c \
+ page/page0zip.c \
+ pars/lexyy.c \
+ pars/pars0grm.c \
+ pars/pars0opt.c \
+ pars/pars0pars.c \
+ pars/pars0sym.c \
+ que/que0que.c \
+ read/read0read.c \
+ rem/rem0cmp.c \
+ rem/rem0rec.c \
+ row/row0ext.c \
+ row/row0ins.c \
+ row/row0merge.c \
+ row/row0mysql.c \
+ row/row0purge.c \
+ row/row0row.c \
+ row/row0sel.c \
+ row/row0uins.c \
+ row/row0umod.c \
+ row/row0undo.c \
+ row/row0upd.c \
+ row/row0vers.c \
+ srv/srv0que.c \
+ srv/srv0srv.c \
+ srv/srv0start.c \
+ sync/sync0arr.c \
+ sync/sync0rw.c \
+ sync/sync0sync.c \
+ thr/thr0loc.c \
+ trx/trx0i_s.c \
+ trx/trx0purge.c \
+ trx/trx0rec.c \
+ trx/trx0roll.c \
+ trx/trx0rseg.c \
+ trx/trx0sys.c \
+ trx/trx0trx.c \
+ trx/trx0undo.c \
+ usr/usr0sess.c \
+ ut/ut0byte.c \
+ ut/ut0dbg.c \
+ ut/ut0list.c \
+ ut/ut0mem.c \
+ ut/ut0rnd.c \
+ ut/ut0ut.c \
+ ut/ut0vec.c \
+ ut/ut0wqueue.c
libinnobase_a_CXXFLAGS= $(AM_CFLAGS)
libinnobase_a_CFLAGS= $(AM_CFLAGS)
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
index 5e8831b5d5e..086b3a0a599 100644
--- a/storage/innobase/btr/btr0btr.c
+++ b/storage/innobase/btr/btr0btr.c
@@ -1,7 +1,24 @@
-/******************************************************
-The B-tree
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file btr/btr0btr.c
+The B-tree
Created 6/2/1994 Heikki Tuuri
*******************************************************/
@@ -14,6 +31,9 @@ Created 6/2/1994 Heikki Tuuri
#include "fsp0fsp.h"
#include "page0page.h"
+#include "page0zip.h"
+
+#ifndef UNIV_HOTBACKUP
#include "btr0cur.h"
#include "btr0sea.h"
#include "btr0pcur.h"
@@ -77,83 +97,90 @@ make them consecutive on disk if possible. From the other file segment
we allocate pages for the non-leaf levels of the tree.
*/
-/****************************************************************
-Returns the upper level node pointer to a page. It is assumed that
-mtr holds an x-latch on the tree. */
-static
-rec_t*
-btr_page_get_father_node_ptr(
-/*=========================*/
- /* out: pointer to node pointer record */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Empties an index page. */
-static
-void
-btr_page_empty(
-/*===========*/
- page_t* page, /* in: page to be emptied */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Returns TRUE if the insert fits on the appropriate half-page
-with the chosen split_rec. */
+#ifdef UNIV_BTR_DEBUG
+/**************************************************************//**
+Checks a file segment header within a B-tree root page.
+@return TRUE if valid */
static
ibool
-btr_page_insert_fits(
-/*=================*/
- /* out: TRUE if fits */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- rec_t* split_rec, /* in: suggestion for first record
- on upper half-page, or NULL if
- tuple should be first */
- const ulint* offsets, /* in: rec_get_offsets(
- split_rec, cursor->index) */
- dtuple_t* tuple, /* in: tuple to insert */
- mem_heap_t* heap); /* in: temporary memory heap */
-
-/******************************************************************
-Gets the root node of a tree and x-latches it. */
+btr_root_fseg_validate(
+/*===================*/
+ const fseg_header_t* seg_header, /*!< in: segment header */
+ ulint space) /*!< in: tablespace identifier */
+{
+ ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
-page_t*
-btr_root_get(
-/*=========*/
- /* out: root page, x-latched */
- dict_index_t* index, /* in: index tree */
- mtr_t* mtr) /* in: mtr */
+ ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
+ ut_a(offset >= FIL_PAGE_DATA);
+ ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
+ return(TRUE);
+}
+#endif /* UNIV_BTR_DEBUG */
+
+/**************************************************************//**
+Gets the root node of a tree and x-latches it.
+@return root page, x-latched */
+static
+buf_block_t*
+btr_root_block_get(
+/*===============*/
+ dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr) /*!< in: mtr */
{
- ulint space;
- ulint root_page_no;
- page_t* root;
+ ulint space;
+ ulint zip_size;
+ ulint root_page_no;
+ buf_block_t* block;
space = dict_index_get_space(index);
+ zip_size = dict_table_zip_size(index->table);
root_page_no = dict_index_get_page(index);
- root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
- ut_a((ibool)!!page_is_comp(root) == dict_table_is_comp(index->table));
+ block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
+ ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
+ == dict_table_is_comp(index->table));
+#ifdef UNIV_BTR_DEBUG
+ if (!dict_index_is_ibuf(index)) {
+ const page_t* root = buf_block_get_frame(block);
+
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + root, space));
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + root, space));
+ }
+#endif /* UNIV_BTR_DEBUG */
- return(root);
+ return(block);
}
-/*****************************************************************
-Gets pointer to the previous user record in the tree. It is assumed that
-the caller has appropriate latches on the page and its neighbor. */
+/**************************************************************//**
+Gets the root node of a tree and x-latches it.
+@return root page, x-latched */
+UNIV_INTERN
+page_t*
+btr_root_get(
+/*=========*/
+ dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ return(buf_block_get_frame(btr_root_block_get(index, mtr)));
+}
+/*************************************************************//**
+Gets pointer to the previous user record in the tree. It is assumed that
+the caller has appropriate latches on the page and its neighbor.
+@return previous user record, NULL if there is none */
+UNIV_INTERN
rec_t*
btr_get_prev_user_rec(
/*==================*/
- /* out: previous user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr) /* in: mtr holding a latch on the page, and if
+ rec_t* rec, /*!< in: record on leaf level */
+ mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
needed, also to the previous page */
{
page_t* page;
page_t* prev_page;
ulint prev_page_no;
- ulint space;
if (!page_rec_is_infimum(rec)) {
@@ -165,23 +192,30 @@ btr_get_prev_user_rec(
}
}
- page = buf_frame_align(rec);
+ page = page_align(rec);
prev_page_no = btr_page_get_prev(page, mtr);
- space = buf_frame_get_space_id(page);
if (prev_page_no != FIL_NULL) {
- prev_page = buf_page_get_with_no_latch(space, prev_page_no,
- mtr);
+ ulint space;
+ ulint zip_size;
+ buf_block_t* prev_block;
+
+ space = page_get_space_id(page);
+ zip_size = fil_space_get_zip_size(space);
+
+ prev_block = buf_page_get_with_no_latch(space, zip_size,
+ prev_page_no, mtr);
+ prev_page = buf_block_get_frame(prev_block);
/* The caller must already have a latch to the brother */
- ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page),
- MTR_MEMO_PAGE_S_FIX))
- || (mtr_memo_contains(mtr, buf_block_align(prev_page),
- MTR_MEMO_PAGE_X_FIX)));
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
+ ut_ad(mtr_memo_contains(mtr, prev_block,
+ MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains(mtr, prev_block,
+ MTR_MEMO_PAGE_X_FIX));
#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(prev_page) == page_is_comp(page));
ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
+ == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
@@ -190,22 +224,21 @@ btr_get_prev_user_rec(
return(NULL);
}
-/*****************************************************************
+/*************************************************************//**
Gets pointer to the next user record in the tree. It is assumed that the
-caller has appropriate latches on the page and its neighbor. */
-
+caller has appropriate latches on the page and its neighbor.
+@return next user record, NULL if there is none */
+UNIV_INTERN
rec_t*
btr_get_next_user_rec(
/*==================*/
- /* out: next user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr) /* in: mtr holding a latch on the page, and if
+ rec_t* rec, /*!< in: record on leaf level */
+ mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
needed, also to the next page */
{
page_t* page;
page_t* next_page;
ulint next_page_no;
- ulint space;
if (!page_rec_is_supremum(rec)) {
@@ -217,64 +250,81 @@ btr_get_next_user_rec(
}
}
- page = buf_frame_align(rec);
+ page = page_align(rec);
next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
if (next_page_no != FIL_NULL) {
+ ulint space;
+ ulint zip_size;
+ buf_block_t* next_block;
+
+ space = page_get_space_id(page);
+ zip_size = fil_space_get_zip_size(space);
- next_page = buf_page_get_with_no_latch(space, next_page_no,
- mtr);
+ next_block = buf_page_get_with_no_latch(space, zip_size,
+ next_page_no, mtr);
+ next_page = buf_block_get_frame(next_block);
/* The caller must already have a latch to the brother */
- ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page),
- MTR_MEMO_PAGE_S_FIX))
- || (mtr_memo_contains(mtr, buf_block_align(next_page),
- MTR_MEMO_PAGE_X_FIX)));
+ ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains(mtr, next_block,
+ MTR_MEMO_PAGE_X_FIX));
#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(next_page) == page_is_comp(page));
ut_a(btr_page_get_prev(next_page, mtr)
- == buf_frame_get_page_no(page));
+ == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- ut_a(page_is_comp(next_page) == page_is_comp(page));
return(page_rec_get_next(page_get_infimum_rec(next_page)));
}
return(NULL);
}
-/******************************************************************
+/**************************************************************//**
Creates a new index page (not the root, and also not
-used in page reorganization). */
+used in page reorganization). @see btr_page_empty(). */
static
void
btr_page_create(
/*============*/
- page_t* page, /* in: page to be created */
- dict_index_t* index, /* in: index */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in/out: page to be created */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: the B-tree level of the page */
+ mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- page_create(page, mtr, dict_table_is_comp(index->table));
- buf_block_align(page)->check_index_page_at_flush = TRUE;
+ page_t* page = buf_block_get_frame(block);
+
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- btr_page_set_index_id(page, index->id, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page_create_zip(block, index, level, mtr);
+ } else {
+ page_create(block, mtr, dict_table_is_comp(index->table));
+ /* Set the level of the new index page */
+ btr_page_set_level(page, NULL, level, mtr);
+ }
+
+ block->check_index_page_at_flush = TRUE;
+
+ btr_page_set_index_id(page, page_zip, index->id, mtr);
}
-/******************************************************************
+/**************************************************************//**
Allocates a new file page to be used in an ibuf tree. Takes the page from
-the free list of the tree, which must contain pages! */
+the free list of the tree, which must contain pages!
+@return new allocated block, x-latched */
static
-page_t*
+buf_block_t*
btr_page_alloc_for_ibuf(
/*====================*/
- /* out: new allocated page, x-latched */
- dict_index_t* index, /* in: index tree */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr) /*!< in: mtr */
{
fil_addr_t node_addr;
page_t* root;
page_t* new_page;
+ buf_block_t* new_block;
root = btr_root_get(index, mtr);
@@ -282,11 +332,11 @@ btr_page_alloc_for_ibuf(
+ PAGE_BTR_IBUF_FREE_LIST, mtr);
ut_a(node_addr.page != FIL_NULL);
- new_page = buf_page_get(dict_index_get_space(index), node_addr.page,
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+ new_block = buf_page_get(dict_index_get_space(index),
+ dict_table_zip_size(index->table),
+ node_addr.page, RW_X_LATCH, mtr);
+ new_page = buf_block_get_frame(new_block);
+ buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
@@ -294,32 +344,31 @@ btr_page_alloc_for_ibuf(
ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
mtr));
- return(new_page);
+ return(new_block);
}
-/******************************************************************
+/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents! */
-
-page_t*
+that the caller has made the reservation for free extents!
+@return new allocated block, x-latched; NULL if out of space */
+UNIV_INTERN
+buf_block_t*
btr_page_alloc(
/*===========*/
- /* out: new allocated page, x-latched;
- NULL if out of space */
- dict_index_t* index, /* in: index */
- ulint hint_page_no, /* in: hint of a good page */
- byte file_direction, /* in: direction where a possible
+ dict_index_t* index, /*!< in: index */
+ ulint hint_page_no, /*!< in: hint of a good page */
+ byte file_direction, /*!< in: direction where a possible
page split is made */
- ulint level, /* in: level where the page is placed
+ ulint level, /*!< in: level where the page is placed
in the tree */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
fseg_header_t* seg_header;
page_t* root;
- page_t* new_page;
+ buf_block_t* new_block;
ulint new_page_no;
- if (index->type & DICT_IBUF) {
+ if (dict_index_is_ibuf(index)) {
return(btr_page_alloc_for_ibuf(index, mtr));
}
@@ -343,24 +392,23 @@ btr_page_alloc(
return(NULL);
}
- new_page = buf_page_get(dict_index_get_space(index), new_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+ new_block = buf_page_get(dict_index_get_space(index),
+ dict_table_zip_size(index->table),
+ new_page_no, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
- return(new_page);
+ return(new_block);
}
-/******************************************************************
-Gets the number of pages in a B-tree. */
-
+/**************************************************************//**
+Gets the number of pages in a B-tree.
+@return number of pages */
+UNIV_INTERN
ulint
btr_get_size(
/*=========*/
- /* out: number of pages */
- dict_index_t* index, /* in: index */
- ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+ dict_index_t* index, /*!< in: index */
+ ulint flag) /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
{
fseg_header_t* seg_header;
page_t* root;
@@ -396,58 +444,55 @@ btr_get_size(
return(n);
}
-/******************************************************************
+/**************************************************************//**
Frees a page used in an ibuf tree. Puts the page to the free list of the
ibuf tree. */
static
void
btr_page_free_for_ibuf(
/*===================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: block to be freed, x-latched */
+ mtr_t* mtr) /*!< in: mtr */
{
page_t* root;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
root = btr_root_get(index, mtr);
flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
+ buf_block_get_frame(block)
+ + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
mtr));
}
-/******************************************************************
+/**************************************************************//**
Frees a file page used in an index tree. Can be used also to (BLOB)
external storage pages, because the page level 0 can be given as an
argument. */
-
+UNIV_INTERN
void
btr_page_free_low(
/*==============*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- ulint level, /* in: page level */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: block to be freed, x-latched */
+ ulint level, /*!< in: page level */
+ mtr_t* mtr) /*!< in: mtr */
{
fseg_header_t* seg_header;
page_t* root;
- ulint space;
- ulint page_no;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* The page gets invalid for optimistic searches: increment the frame
modify clock */
- buf_frame_modify_clock_inc(page);
+ buf_block_modify_clock_inc(block);
- if (index->type & DICT_IBUF) {
+ if (dict_index_is_ibuf(index)) {
- btr_page_free_for_ibuf(index, page, mtr);
+ btr_page_free_for_ibuf(index, block, mtr);
return;
}
@@ -460,133 +505,138 @@ btr_page_free_low(
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
}
- space = buf_frame_get_space_id(page);
- page_no = buf_frame_get_page_no(page);
-
- fseg_free_page(seg_header, space, page_no, mtr);
+ fseg_free_page(seg_header,
+ buf_block_get_space(block),
+ buf_block_get_page_no(block), mtr);
}
-/******************************************************************
+/**************************************************************//**
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
-
+UNIV_INTERN
void
btr_page_free(
/*==========*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: block to be freed, x-latched */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint level;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- level = btr_page_get_level(page, mtr);
+ level = btr_page_get_level(buf_block_get_frame(block), mtr);
- btr_page_free_low(index, page, level, mtr);
+ btr_page_free_low(index, block, level, mtr);
}
-/******************************************************************
+/**************************************************************//**
Sets the child node file address in a node pointer. */
UNIV_INLINE
void
btr_node_ptr_set_child_page_no(
/*===========================*/
- rec_t* rec, /* in: node pointer record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint page_no,/* in: child node address */
- mtr_t* mtr) /* in: mtr */
+ rec_t* rec, /*!< in: node pointer record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
+ part will be updated, or NULL */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint page_no,/*!< in: child node address */
+ mtr_t* mtr) /*!< in: mtr */
{
byte* field;
ulint len;
ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr));
+ ut_ad(!page_is_leaf(page_align(rec)));
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
/* The child address is in the last field */
field = rec_get_nth_field(rec, offsets,
rec_offs_n_fields(offsets) - 1, &len);
- ut_ad(len == 4);
+ ut_ad(len == REC_NODE_PTR_SIZE);
- mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page_zip_write_node_ptr(page_zip, rec,
+ rec_offs_data_size(offsets),
+ page_no, mtr);
+ } else {
+ mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
+ }
}
-/****************************************************************
-Returns the child page of a node pointer and x-latches it. */
+/************************************************************//**
+Returns the child page of a node pointer and x-latches it.
+@return child page, x-latched */
static
-page_t*
+buf_block_t*
btr_node_ptr_get_child(
/*===================*/
- /* out: child page, x-latched */
- rec_t* node_ptr,/* in: node pointer */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- mtr_t* mtr) /* in: mtr */
+ const rec_t* node_ptr,/*!< in: node pointer */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint page_no;
ulint space;
- page_t* page;
- ut_ad(rec_offs_validate(node_ptr, NULL, offsets));
- space = buf_frame_get_space_id(node_ptr);
+ ut_ad(rec_offs_validate(node_ptr, index, offsets));
+ space = page_get_space_id(page_align(node_ptr));
page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
-
- return(page);
+ return(btr_block_get(space, dict_table_zip_size(index->table),
+ page_no, RW_X_LATCH, mtr));
}
-/****************************************************************
+/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree. */
+an x-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
static
-rec_t*
-btr_page_get_father_for_rec(
-/*========================*/
- /* out: pointer to node pointer record,
+ulint*
+btr_page_get_father_node_ptr(
+/*=========================*/
+ ulint* offsets,/*!< in: work area for the return value */
+ mem_heap_t* heap, /*!< in: memory heap to use */
+ btr_cur_t* cursor, /*!< in: cursor pointing to user record,
+ out: cursor on node pointer record,
its page x-latched */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- rec_t* user_rec,/* in: user_record on page */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
- mem_heap_t* heap;
dtuple_t* tuple;
- btr_cur_t cursor;
+ rec_t* user_rec;
rec_t* node_ptr;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ ulint level;
+ ulint page_no;
+ dict_index_t* index;
+
+ page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
+ index = btr_cur_get_index(cursor);
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_a(page_rec_is_user_rec(user_rec));
- ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page));
+ ut_ad(dict_index_get_page(index) != page_no);
- heap = mem_heap_create(100);
-
- tuple = dict_index_build_node_ptr(index, user_rec, 0, heap,
- btr_page_get_level(page, mtr));
+ level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
+ user_rec = btr_cur_get_rec(cursor);
+ ut_a(page_rec_is_user_rec(user_rec));
+ tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
- btr_cur_search_to_nth_level(index,
- btr_page_get_level(page, mtr) + 1,
- tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE, &cursor, 0, mtr);
+ btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
+ BTR_CONT_MODIFY_TREE, cursor, 0, mtr);
- node_ptr = btr_cur_get_rec(&cursor);
+ node_ptr = btr_cur_get_rec(cursor);
+ ut_ad(!page_rec_is_comp(node_ptr)
+ || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
offsets = rec_get_offsets(node_ptr, index, offsets,
ULINT_UNDEFINED, &heap);
if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
- != buf_frame_get_page_no(page))) {
+ != page_no)) {
rec_t* print_rec;
fputs("InnoDB: Dump of the child page:\n", stderr);
- buf_page_print(buf_frame_align(page));
+ buf_page_print(page_align(user_rec), 0);
fputs("InnoDB: Dump of the parent page:\n", stderr);
- buf_page_print(buf_frame_align(node_ptr));
+ buf_page_print(page_align(node_ptr), 0);
fputs("InnoDB: Corruption of an index tree: table ", stderr);
ut_print_name(stderr, NULL, TRUE, index->table_name);
@@ -596,8 +646,9 @@ btr_page_get_father_for_rec(
"InnoDB: father ptr page no %lu, child page no %lu\n",
(ulong)
btr_node_ptr_get_child_page_no(node_ptr, offsets),
- (ulong) buf_frame_get_page_no(page));
- print_rec = page_rec_get_next(page_get_infimum_rec(page));
+ (ulong) page_no);
+ print_rec = page_rec_get_next(
+ page_get_infimum_rec(page_align(user_rec)));
offsets = rec_get_offsets(print_rec, index,
offsets, ULINT_UNDEFINED, &heap);
page_rec_print(print_rec, offsets);
@@ -609,55 +660,83 @@ btr_page_get_father_for_rec(
" to fix the\n"
"InnoDB: corruption. If the crash happens at "
"the database startup, see\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html about\n"
+ "InnoDB: " REFMAN "forcing-recovery.html about\n"
"InnoDB: forcing recovery. "
"Then dump + drop + reimport.\n", stderr);
- }
- ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets)
- == buf_frame_get_page_no(page));
- mem_heap_free(heap);
+ ut_error;
+ }
- return(node_ptr);
+ return(offsets);
}
-/****************************************************************
-Returns the upper level node pointer to a page. It is assumed that
-mtr holds an x-latch on the tree. */
+/************************************************************//**
+Returns the upper level node pointer to a page. It is assumed that mtr holds
+an x-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
static
-rec_t*
-btr_page_get_father_node_ptr(
-/*=========================*/
- /* out: pointer to node pointer record */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- mtr_t* mtr) /* in: mtr */
+ulint*
+btr_page_get_father_block(
+/*======================*/
+ ulint* offsets,/*!< in: work area for the return value */
+ mem_heap_t* heap, /*!< in: memory heap to use */
+ dict_index_t* index, /*!< in: b-tree index */
+ buf_block_t* block, /*!< in: child page in the index */
+ mtr_t* mtr, /*!< in: mtr */
+ btr_cur_t* cursor) /*!< out: cursor on node pointer record,
+ its page x-latched */
{
- return(btr_page_get_father_for_rec(
- index, page,
- page_rec_get_next(page_get_infimum_rec(page)), mtr));
+ rec_t* rec
+ = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
+ block)));
+ btr_cur_position(index, rec, block, cursor);
+ return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
}
-/****************************************************************
-Creates the root node for a new index tree. */
+/************************************************************//**
+Seeks to the upper level node pointer to a page.
+It is assumed that mtr holds an x-latch on the tree. */
+static
+void
+btr_page_get_father(
+/*================*/
+ dict_index_t* index, /*!< in: b-tree index */
+ buf_block_t* block, /*!< in: child page in the index */
+ mtr_t* mtr, /*!< in: mtr */
+ btr_cur_t* cursor) /*!< out: cursor on node pointer record,
+ its page x-latched */
+{
+ mem_heap_t* heap;
+ rec_t* rec
+ = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
+ block)));
+ btr_cur_position(index, rec, block, cursor);
+
+ heap = mem_heap_create(100);
+ btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
+ mem_heap_free(heap);
+}
+/************************************************************//**
+Creates the root node for a new index tree.
+@return page number of the created root, FIL_NULL if did not succeed */
+UNIV_INTERN
ulint
btr_create(
/*=======*/
- /* out: page number of the created root, FIL_NULL if
- did not succeed */
- ulint type, /* in: type of the index */
- ulint space, /* in: space where created */
- dulint index_id,/* in: index id */
- ulint comp, /* in: nonzero=compact page format */
- mtr_t* mtr) /* in: mini-transaction handle */
+ ulint type, /*!< in: type of the index */
+ ulint space, /*!< in: space where created */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ dulint index_id,/*!< in: index id */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint page_no;
- buf_frame_t* ibuf_hdr_frame;
+ buf_block_t* block;
buf_frame_t* frame;
page_t* page;
+ page_zip_des_t* page_zip;
/* Create the two new segments (one, in the case of an ibuf tree) for
the index tree; the segment headers are put on the allocated root page
@@ -666,39 +745,41 @@ btr_create(
if (type & DICT_IBUF) {
/* Allocate first the ibuf header page */
- ibuf_hdr_frame = fseg_create(
- space, 0, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
+ buf_block_t* ibuf_hdr_block = fseg_create(
+ space, 0,
+ IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(buf_frame_get_page_no(ibuf_hdr_frame)
+ buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
+
+ ut_ad(buf_block_get_page_no(ibuf_hdr_block)
== IBUF_HEADER_PAGE_NO);
/* Allocate then the next page to the segment: it will be the
tree root page */
- page_no = fseg_alloc_free_page(ibuf_hdr_frame + IBUF_HEADER
+ page_no = fseg_alloc_free_page(buf_block_get_frame(
+ ibuf_hdr_block)
+ + IBUF_HEADER
+ IBUF_TREE_SEG_HEADER,
IBUF_TREE_ROOT_PAGE_NO,
FSP_UP, mtr);
ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
- frame = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+ block = buf_page_get(space, zip_size, page_no,
+ RW_X_LATCH, mtr);
} else {
- frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP,
- mtr);
+ block = fseg_create(space, 0,
+ PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
}
- if (frame == NULL) {
+ if (block == NULL) {
return(FIL_NULL);
}
- page_no = buf_frame_get_page_no(frame);
+ page_no = buf_block_get_page_no(block);
+ frame = buf_block_get_frame(block);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
if (type & DICT_IBUF) {
/* It is an insert buffer tree: initialize the free list */
@@ -713,37 +794,44 @@ btr_create(
PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
/* Not enough space for new segment, free root
segment before return. */
- btr_free_root(space, page_no, mtr);
+ btr_free_root(space, zip_size, page_no, mtr);
return(FIL_NULL);
}
/* The fseg create acquires a second latch on the page,
therefore we must declare it: */
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
}
- /* Create a new index page on the the allocated segment page */
- page = page_create(frame, mtr, comp);
- buf_block_align(page)->check_index_page_at_flush = TRUE;
+ /* Create a new index page on the allocated segment page */
+ page_zip = buf_block_get_page_zip(block);
- /* Set the index id of the page */
- btr_page_set_index_id(page, index_id, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page = page_create_zip(block, index, 0, mtr);
+ } else {
+ page = page_create(block, mtr,
+ dict_table_is_comp(index->table));
+ /* Set the level of the new index page */
+ btr_page_set_level(page, NULL, 0, mtr);
+ }
- /* Set the level of the new index page */
- btr_page_set_level(page, 0, mtr);
+ block->check_index_page_at_flush = TRUE;
+
+ /* Set the index id of the page */
+ btr_page_set_index_id(page, page_zip, index_id, mtr);
/* Set the next node and previous node fields */
- btr_page_set_next(page, FIL_NULL, mtr);
- btr_page_set_prev(page, FIL_NULL, mtr);
+ btr_page_set_next(page, page_zip, FIL_NULL, mtr);
+ btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
/* We reset the free bits for the page to allow creation of several
trees in the same mtr, otherwise the latch on a bitmap page would
prevent it because of the latching order */
- ibuf_reset_free_bits_with_type(type, page);
+ if (!(type & DICT_CLUSTERED)) {
+ ibuf_reset_free_bits(block);
+ }
/* In the following assertion we test that two records of maximum
allowed size fit on the root page: this fact is needed to ensure
@@ -754,15 +842,17 @@ btr_create(
return(page_no);
}
-/****************************************************************
+/************************************************************//**
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
-
+UNIV_INTERN
void
btr_free_but_not_root(
/*==================*/
- ulint space, /* in: space where created */
- ulint root_page_no) /* in: root page number */
+ ulint space, /*!< in: space where created */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint root_page_no) /*!< in: root page number */
{
ibool finished;
page_t* root;
@@ -771,7 +861,13 @@ btr_free_but_not_root(
leaf_loop:
mtr_start(&mtr);
- root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
+ root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
+#ifdef UNIV_BTR_DEBUG
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + root, space));
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + root, space));
+#endif /* UNIV_BTR_DEBUG */
/* NOTE: page hash indexes are dropped when a page is freed inside
fsp0fsp. */
@@ -787,7 +883,11 @@ leaf_loop:
top_loop:
mtr_start(&mtr);
- root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
+ root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
+#ifdef UNIV_BTR_DEBUG
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + root, space));
+#endif /* UNIV_BTR_DEBUG */
finished = fseg_free_step_not_header(
root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
@@ -799,102 +899,163 @@ top_loop:
}
}
-/****************************************************************
+/************************************************************//**
Frees the B-tree root page. Other tree MUST already have been freed. */
-
+UNIV_INTERN
void
btr_free_root(
/*==========*/
- ulint space, /* in: space where created */
- ulint root_page_no, /* in: root page number */
- mtr_t* mtr) /* in: a mini-transaction which has already
+ ulint space, /*!< in: space where created */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint root_page_no, /*!< in: root page number */
+ mtr_t* mtr) /*!< in: a mini-transaction which has already
been started */
{
- ibool finished;
- page_t* root;
+ buf_block_t* block;
+ fseg_header_t* header;
- root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
+ block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
- btr_search_drop_page_hash_index(root);
-top_loop:
- finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
- if (!finished) {
+ btr_search_drop_page_hash_index(block);
- goto top_loop;
- }
+ header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+#ifdef UNIV_BTR_DEBUG
+ ut_a(btr_root_fseg_validate(header, space));
+#endif /* UNIV_BTR_DEBUG */
+
+ while (!fseg_free_step(header, mtr));
}
+#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************
+/*************************************************************//**
Reorganizes an index page. */
static
-void
+ibool
btr_page_reorganize_low(
/*====================*/
- ibool recovery,/* in: TRUE if called in recovery:
+ ibool recovery,/*!< in: TRUE if called in recovery:
locks should not be updated, i.e.,
there cannot exist locks on the
page, and a hash index should not be
dropped: it cannot exist */
- page_t* page, /* in: page to be reorganized */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in: page to be reorganized */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* new_page;
- ulint log_mode;
- ulint data_size1;
- ulint data_size2;
- ulint max_ins_size1;
- ulint max_ins_size2;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ page_t* page = buf_block_get_frame(block);
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ buf_block_t* temp_block;
+ page_t* temp_page;
+ ulint log_mode;
+ ulint data_size1;
+ ulint data_size2;
+ ulint max_ins_size1;
+ ulint max_ins_size2;
+ ibool success = FALSE;
+
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
+#ifndef UNIV_HOTBACKUP
/* Write the log record */
mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
? MLOG_COMP_PAGE_REORGANIZE
: MLOG_PAGE_REORGANIZE, 0);
+#endif /* !UNIV_HOTBACKUP */
/* Turn logging off */
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
- new_page = buf_frame_alloc();
+#ifndef UNIV_HOTBACKUP
+ temp_block = buf_block_alloc(0);
+#else /* !UNIV_HOTBACKUP */
+ ut_ad(block == back_block1);
+ temp_block = back_block2;
+#endif /* !UNIV_HOTBACKUP */
+ temp_page = temp_block->frame;
/* Copy the old page to temporary space */
- buf_frame_copy(new_page, page);
+ buf_frame_copy(temp_page, page);
- if (!recovery) {
- btr_search_drop_page_hash_index(page);
+#ifndef UNIV_HOTBACKUP
+ if (UNIV_LIKELY(!recovery)) {
+ btr_search_drop_page_hash_index(block);
}
+ block->check_index_page_at_flush = TRUE;
+#endif /* !UNIV_HOTBACKUP */
+
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
- page_create(page, mtr, page_is_comp(page));
- buf_block_align(page)->check_index_page_at_flush = TRUE;
+ page_create(block, mtr, dict_table_is_comp(index->table));
/* Copy the records from the temporary space to the recreated page;
do not copy the lock bits yet */
- page_copy_rec_list_end_no_locks(page, new_page,
- page_get_infimum_rec(new_page),
+ page_copy_rec_list_end_no_locks(block, temp_block,
+ page_get_infimum_rec(temp_page),
index, mtr);
- /* Copy max trx id to recreated page */
- page_set_max_trx_id(page, page_get_max_trx_id(new_page));
- if (!recovery) {
+ if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+ /* Copy max trx id to recreated page */
+ trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
+ page_set_max_trx_id(block, NULL, max_trx_id, mtr);
+ /* In crash recovery, dict_index_is_sec_or_ibuf() always
+ returns TRUE, even for clustered indexes. max_trx_id is
+ unused in clustered index pages. */
+ ut_ad(!ut_dulint_is_zero(max_trx_id) || recovery);
+ }
+
+ if (UNIV_LIKELY_NULL(page_zip)
+ && UNIV_UNLIKELY
+ (!page_zip_compress(page_zip, page, index, NULL))) {
+
+ /* Restore the old page and exit. */
+
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+ /* Check that the bytes that we skip are identical. */
+ ut_a(!memcmp(page, temp_page, PAGE_HEADER));
+ ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
+ PAGE_HEADER + PAGE_N_RECS + temp_page,
+ PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
+ ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page,
+ UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page,
+ FIL_PAGE_DATA_END));
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+
+ memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
+ PAGE_N_RECS - PAGE_N_DIR_SLOTS);
+ memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
+ UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
+
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+ ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE));
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+
+ goto func_exit;
+ }
+
+#ifndef UNIV_HOTBACKUP
+ if (UNIV_LIKELY(!recovery)) {
/* Update the record lock bitmaps */
- lock_move_reorganize_page(page, new_page);
+ lock_move_reorganize_page(block, temp_block);
}
+#endif /* !UNIV_HOTBACKUP */
data_size2 = page_get_data_size(page);
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
- if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
- buf_page_print(page);
- buf_page_print(new_page);
+ if (UNIV_UNLIKELY(data_size1 != data_size2)
+ || UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
+ buf_page_print(page, 0);
+ buf_page_print(temp_page, 0);
fprintf(stderr,
"InnoDB: Error: page old data size %lu"
" new data size %lu\n"
@@ -905,89 +1066,123 @@ btr_page_reorganize_low(
(unsigned long) data_size1, (unsigned long) data_size2,
(unsigned long) max_ins_size1,
(unsigned long) max_ins_size2);
+ } else {
+ success = TRUE;
}
- buf_frame_free(new_page);
+func_exit:
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+#ifndef UNIV_HOTBACKUP
+ buf_block_free(temp_block);
+#endif /* !UNIV_HOTBACKUP */
/* Restore logging mode */
mtr_set_log_mode(mtr, log_mode);
-}
-/*****************************************************************
-Reorganizes an index page. */
+ return(success);
+}
-void
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Reorganizes an index page.
+IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
+page of a non-clustered index, the caller must update the insert
+buffer free bits in the same mini-transaction in such a way that the
+modification will be redo-logged.
+@return TRUE on success, FALSE on failure */
+UNIV_INTERN
+ibool
btr_page_reorganize(
/*================*/
- page_t* page, /* in: page to be reorganized */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in: page to be reorganized */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
- btr_page_reorganize_low(FALSE, page, index, mtr);
+ return(btr_page_reorganize_low(FALSE, block, index, mtr));
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************
-Parses a redo log record of reorganizing a page. */
-
+/***********************************************************//**
+Parses a redo log record of reorganizing a page.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_parse_page_reorganize(
/*======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
+ byte* ptr, /*!< in: buffer */
byte* end_ptr __attribute__((unused)),
- /* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ /*!< in: buffer end */
+ dict_index_t* index, /*!< in: record descriptor */
+ buf_block_t* block, /*!< in: page to be reorganized, or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
ut_ad(ptr && end_ptr);
/* The record is empty, except for the record initial part */
- if (page) {
- btr_page_reorganize_low(TRUE, page, index, mtr);
+ if (UNIV_LIKELY(block != NULL)) {
+ btr_page_reorganize_low(TRUE, block, index, mtr);
}
return(ptr);
}
-/*****************************************************************
-Empties an index page. */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Empties an index page. @see btr_page_create(). */
static
void
btr_page_empty(
/*===========*/
- page_t* page, /* in: page to be emptied */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in: page to be emptied */
+ page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
+ dict_index_t* index, /*!< in: index of the page */
+ ulint level, /*!< in: the B-tree level of the page */
+ mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(page);
+ page_t* page = buf_block_get_frame(block);
+
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_zip == buf_block_get_page_zip(block));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+ btr_search_drop_page_hash_index(block);
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
- page_create(page, mtr, page_is_comp(page));
- buf_block_align(page)->check_index_page_at_flush = TRUE;
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page_create_zip(block, index, level, mtr);
+ } else {
+ page_create(block, mtr, dict_table_is_comp(index->table));
+ btr_page_set_level(page, NULL, level, mtr);
+ }
+
+ block->check_index_page_at_flush = TRUE;
}
-/*****************************************************************
+/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts
the tuple. It is assumed that mtr contains an x-latch on the tree.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called. */
-
+guaranteed to be available before this function is called.
+@return inserted record */
+UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
- /* out: inserted record */
- btr_cur_t* cursor, /* in: cursor at which to insert: must be
+ btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr) /* in: mtr */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
page_t* root;
@@ -999,77 +1194,127 @@ btr_root_raise_and_insert(
ulint level;
rec_t* node_ptr_rec;
page_cur_t* page_cursor;
+ page_zip_des_t* root_page_zip;
+ page_zip_des_t* new_page_zip;
+ buf_block_t* root_block;
+ buf_block_t* new_block;
root = btr_cur_get_page(cursor);
+ root_block = btr_cur_get_block(cursor);
+ root_page_zip = buf_block_get_page_zip(root_block);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
+#endif /* UNIV_ZIP_DEBUG */
index = btr_cur_get_index(cursor);
+#ifdef UNIV_BTR_DEBUG
+ if (!dict_index_is_ibuf(index)) {
+ ulint space = dict_index_get_space(index);
+
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + root, space));
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + root, space));
+ }
- ut_ad(dict_index_get_page(index) == buf_frame_get_page_no(root));
+ ut_a(dict_index_get_page(index) == page_get_page_no(root));
+#endif /* UNIV_BTR_DEBUG */
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(root),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(root);
+ ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
/* Allocate a new page to the tree. Root splitting is done by first
moving the root records to the new page, emptying the root, putting
a node pointer to the new page, and then splitting the new page. */
- new_page = btr_page_alloc(index, 0, FSP_NO_DIR,
- btr_page_get_level(root, mtr), mtr);
-
- btr_page_create(new_page, index, mtr);
-
level = btr_page_get_level(root, mtr);
- /* Set the levels of the new index page and root page */
- btr_page_set_level(new_page, level, mtr);
- btr_page_set_level(root, level + 1, mtr);
+ new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
+ new_page = buf_block_get_frame(new_block);
+ new_page_zip = buf_block_get_page_zip(new_block);
+ ut_a(!new_page_zip == !root_page_zip);
+ ut_a(!new_page_zip
+ || page_zip_get_size(new_page_zip)
+ == page_zip_get_size(root_page_zip));
+
+ btr_page_create(new_block, new_page_zip, index, level, mtr);
/* Set the next node and previous node fields of new page */
- btr_page_set_next(new_page, FIL_NULL, mtr);
- btr_page_set_prev(new_page, FIL_NULL, mtr);
+ btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
+ btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
+
+ /* Copy the records from root to the new page one by one. */
+
+ if (0
+#ifdef UNIV_ZIP_COPY
+ || new_page_zip
+#endif /* UNIV_ZIP_COPY */
+ || UNIV_UNLIKELY
+ (!page_copy_rec_list_end(new_block, root_block,
+ page_get_infimum_rec(root),
+ index, mtr))) {
+ ut_a(new_page_zip);
- /* Move the records from root to the new page */
+ /* Copy the page byte for byte. */
+ page_zip_copy_recs(new_page_zip, new_page,
+ root_page_zip, root, index, mtr);
+
+ /* Update the lock table and possible hash index. */
+
+ lock_move_rec_list_end(new_block, root_block,
+ page_get_infimum_rec(root));
+
+ btr_search_move_or_delete_hash_entries(new_block, root_block,
+ index);
+ }
- page_move_rec_list_end(new_page, root, page_get_infimum_rec(root),
- index, mtr);
/* If this is a pessimistic insert which is actually done to
perform a pessimistic update then we have stored the lock
information of the record to be inserted on the infimum of the
root page: we cannot discard the lock structs on the root page */
- lock_update_root_raise(new_page, root);
+ lock_update_root_raise(new_block, root_block);
/* Create a memory heap where the node pointer is stored */
heap = mem_heap_create(100);
rec = page_rec_get_next(page_get_infimum_rec(new_page));
- new_page_no = buf_frame_get_page_no(new_page);
+ new_page_no = buf_block_get_page_no(new_block);
/* Build the node pointer (= node key and page address) for the
child */
node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
level);
- /* Reorganize the root to get free space */
- btr_page_reorganize(root, index, mtr);
+ /* The node pointer must be marked as the predefined minimum record,
+ as there is no lower alphabetical limit to records in the leftmost
+ node of a level: */
+ dtuple_set_info_bits(node_ptr,
+ dtuple_get_info_bits(node_ptr)
+ | REC_INFO_MIN_REC_FLAG);
+
+ /* Rebuild the root page to get free space */
+ btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
+
+ /* Set the next node and previous node fields, although
+ they should already have been set. The previous node field
+ must be FIL_NULL if root_page_zip != NULL, because the
+ REC_INFO_MIN_REC_FLAG (of the first user record) will be
+ set if and only if btr_page_get_prev() == FIL_NULL. */
+ btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
+ btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
page_cursor = btr_cur_get_page_cur(cursor);
/* Insert node pointer to the root */
- page_cur_set_before_first(root, page_cursor);
+ page_cur_set_before_first(root_block, page_cursor);
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
- index, mtr);
-
- ut_ad(node_ptr_rec);
-
- /* The node pointer must be marked as the predefined minimum record,
- as there is no lower alphabetical limit to records in the leftmost
- node of a level: */
+ index, 0, mtr);
- btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr);
+ /* The root page should only contain the node pointer
+ to new_page at this point. Thus, the data should fit. */
+ ut_a(node_ptr_rec);
/* Free the memory heap */
mem_heap_free(heap);
@@ -1077,29 +1322,31 @@ btr_root_raise_and_insert(
/* We play safe and reset the free bits for the new page */
#if 0
- fprintf(stderr, "Root raise new page no %lu\n",
- buf_frame_get_page_no(new_page));
+ fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
#endif
- ibuf_reset_free_bits(index, new_page);
+ if (!dict_index_is_clust(index)) {
+ ibuf_reset_free_bits(new_block);
+ }
+
/* Reposition the cursor to the child node */
- page_cur_search(new_page, index, tuple,
+ page_cur_search(new_block, index, tuple,
PAGE_CUR_LE, page_cursor);
/* Split the child and insert tuple */
- return(btr_page_split_and_insert(cursor, tuple, mtr));
+ return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
}
-/*****************************************************************
+/*************************************************************//**
Decides if the page should be split at the convergence point of inserts
-converging to the left. */
-
+converging to the left.
+@return TRUE if split recommended */
+UNIV_INTERN
ibool
btr_page_get_split_rec_to_left(
/*===========================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec) /* out: if split recommended,
+ btr_cur_t* cursor, /*!< in: cursor at which to insert */
+ rec_t** split_rec) /*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple to be inserted should
be first */
@@ -1135,16 +1382,16 @@ btr_page_get_split_rec_to_left(
return(FALSE);
}
-/*****************************************************************
+/*************************************************************//**
Decides if the page should be split at the convergence point of inserts
-converging to the right. */
-
+converging to the right.
+@return TRUE if split recommended */
+UNIV_INTERN
ibool
btr_page_get_split_rec_to_right(
/*============================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec) /* out: if split recommended,
+ btr_cur_t* cursor, /*!< in: cursor at which to insert */
+ rec_t** split_rec) /*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple to be inserted should
be first */
@@ -1193,40 +1440,52 @@ split_at_new:
return(FALSE);
}
-/*****************************************************************
+/*************************************************************//**
Calculates a split record such that the tuple will certainly fit on
its half-page when the split is performed. We assume in this function
-only that the cursor page has at least one user record. */
+only that the cursor page has at least one user record.
+@return split record, or NULL if tuple will be the first record on
+upper half-page */
static
rec_t*
btr_page_get_sure_split_rec(
/*========================*/
- /* out: split record, or NULL if
- tuple will be the first record on
- upper half-page */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- dtuple_t* tuple) /* in: tuple to insert */
+ btr_cur_t* cursor, /*!< in: cursor at which insert should be made */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext) /*!< in: number of externally stored columns */
{
- page_t* page;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- ulint total_space;
- ulint incl_data;
- rec_t* ins_rec;
- rec_t* rec;
- rec_t* next_rec;
- ulint n;
- mem_heap_t* heap;
- ulint* offsets;
+ page_t* page;
+ page_zip_des_t* page_zip;
+ ulint insert_size;
+ ulint free_space;
+ ulint total_data;
+ ulint total_n_recs;
+ ulint total_space;
+ ulint incl_data;
+ rec_t* ins_rec;
+ rec_t* rec;
+ rec_t* next_rec;
+ ulint n;
+ mem_heap_t* heap;
+ ulint* offsets;
page = btr_cur_get_page(cursor);
- insert_size = rec_get_converted_size(cursor->index, tuple);
+ insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
free_space = page_get_free_space_of_empty(page_is_comp(page));
+ page_zip = btr_cur_get_page_zip(cursor);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ /* Estimate the free space of an empty compressed page. */
+ ulint free_space_zip = page_zip_empty_size(
+ cursor->index->n_fields,
+ page_zip_get_size(page_zip));
+
+ if (UNIV_LIKELY(free_space > (ulint) free_space_zip)) {
+ free_space = (ulint) free_space_zip;
+ }
+ }
+
/* free_space is now the free space of a created new page */
total_data = page_get_data_size(page) + insert_size;
@@ -1249,7 +1508,7 @@ btr_page_get_sure_split_rec(
otherwise the last included record will be the first on the right
half page */
- for (;;) {
+ do {
/* Decide the next record to include */
if (rec == ins_rec) {
rec = NULL; /* NULL denotes that tuple is
@@ -1271,66 +1530,63 @@ btr_page_get_sure_split_rec(
}
n++;
+ } while (incl_data + page_dir_calc_reserved_space(n)
+ < total_space / 2);
- if (incl_data + page_dir_calc_reserved_space(n)
- >= total_space / 2) {
-
- if (incl_data + page_dir_calc_reserved_space(n)
- <= free_space) {
- /* The next record will be the first on
- the right half page if it is not the
- supremum record of page */
+ if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
+ /* The next record will be the first on
+ the right half page if it is not the
+ supremum record of page */
- if (rec == ins_rec) {
- rec = NULL;
+ if (rec == ins_rec) {
+ rec = NULL;
- goto func_exit;
- } else if (rec == NULL) {
- next_rec = page_rec_get_next(ins_rec);
- } else {
- next_rec = page_rec_get_next(rec);
- }
- ut_ad(next_rec);
- if (!page_rec_is_supremum(next_rec)) {
- rec = next_rec;
- }
- }
+ goto func_exit;
+ } else if (rec == NULL) {
+ next_rec = page_rec_get_next(ins_rec);
+ } else {
+ next_rec = page_rec_get_next(rec);
+ }
+ ut_ad(next_rec);
+ if (!page_rec_is_supremum(next_rec)) {
+ rec = next_rec;
+ }
+ }
func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(rec);
- }
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
}
+ return(rec);
}
-/*****************************************************************
+/*************************************************************//**
Returns TRUE if the insert fits on the appropriate half-page with the
-chosen split_rec. */
+chosen split_rec.
+@return TRUE if fits */
static
ibool
btr_page_insert_fits(
/*=================*/
- /* out: TRUE if fits */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- rec_t* split_rec, /* in: suggestion for first record
- on upper half-page, or NULL if
- tuple to be inserted should be first */
- const ulint* offsets, /* in: rec_get_offsets(
- split_rec, cursor->index) */
- dtuple_t* tuple, /* in: tuple to insert */
- mem_heap_t* heap) /* in: temporary memory heap */
+ btr_cur_t* cursor, /*!< in: cursor at which insert
+ should be made */
+ const rec_t* split_rec,/*!< in: suggestion for first record
+ on upper half-page, or NULL if
+ tuple to be inserted should be first */
+ const ulint* offsets,/*!< in: rec_get_offsets(
+ split_rec, cursor->index) */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mem_heap_t* heap) /*!< in: temporary memory heap */
{
- page_t* page;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- rec_t* rec;
- rec_t* end_rec;
- ulint* offs;
+ page_t* page;
+ ulint insert_size;
+ ulint free_space;
+ ulint total_data;
+ ulint total_n_recs;
+ const rec_t* rec;
+ const rec_t* end_rec;
+ ulint* offs;
page = btr_cur_get_page(cursor);
@@ -1340,7 +1596,7 @@ btr_page_insert_fits(
ut_ad(!offsets
|| rec_offs_validate(split_rec, cursor->index, offsets));
- insert_size = rec_get_converted_size(cursor->index, tuple);
+ insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
free_space = page_get_free_space_of_empty(page_is_comp(page));
/* free_space is now the free space of a created new page */
@@ -1395,23 +1651,23 @@ btr_page_insert_fits(
return(TRUE);
}
- rec = page_rec_get_next(rec);
+ rec = page_rec_get_next_const(rec);
}
return(FALSE);
}
-/***********************************************************
+/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
-
+UNIV_INTERN
void
btr_insert_on_non_leaf_level(
/*=========================*/
- dict_index_t* index, /* in: index */
- ulint level, /* in: level, must be > 0 */
- dtuple_t* tuple, /* in: the record to be inserted */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level, must be > 0 */
+ dtuple_t* tuple, /*!< in: the record to be inserted */
+ mtr_t* mtr) /*!< in: mtr */
{
big_rec_t* dummy_big_rec;
btr_cur_t cursor;
@@ -1428,44 +1684,42 @@ btr_insert_on_non_leaf_level(
| BTR_KEEP_SYS_FLAG
| BTR_NO_UNDO_LOG_FLAG,
&cursor, tuple, &rec,
- &dummy_big_rec, NULL, mtr);
+ &dummy_big_rec, 0, NULL, mtr);
ut_a(err == DB_SUCCESS);
}
-/******************************************************************
+/**************************************************************//**
Attaches the halves of an index page on the appropriate level in an
index tree. */
static
void
btr_attach_half_pages(
/*==================*/
- dict_index_t* index, /* in: the index tree */
- page_t* page, /* in: page to be split */
- rec_t* split_rec, /* in: first record on upper
+ dict_index_t* index, /*!< in: the index tree */
+ buf_block_t* block, /*!< in/out: page to be split */
+ rec_t* split_rec, /*!< in: first record on upper
half page */
- page_t* new_page, /* in: the new half page */
- ulint direction, /* in: FSP_UP or FSP_DOWN */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* new_block, /*!< in/out: the new half page */
+ ulint direction, /*!< in: FSP_UP or FSP_DOWN */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint space;
- rec_t* node_ptr;
- page_t* prev_page;
- page_t* next_page;
+ ulint zip_size;
ulint prev_page_no;
ulint next_page_no;
ulint level;
+ page_t* page = buf_block_get_frame(block);
page_t* lower_page;
page_t* upper_page;
ulint lower_page_no;
ulint upper_page_no;
+ page_zip_des_t* lower_page_zip;
+ page_zip_des_t* upper_page_zip;
dtuple_t* node_ptr_upper;
mem_heap_t* heap;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page),
- MTR_MEMO_PAGE_X_FIX));
- ut_a(page_is_comp(page) == page_is_comp(new_page));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
/* Create a memory heap where the data tuple is stored */
heap = mem_heap_create(1024);
@@ -1473,33 +1727,41 @@ btr_attach_half_pages(
/* Based on split direction, decide upper and lower pages */
if (direction == FSP_DOWN) {
- lower_page_no = buf_frame_get_page_no(new_page);
- upper_page_no = buf_frame_get_page_no(page);
- lower_page = new_page;
- upper_page = page;
+ btr_cur_t cursor;
+ ulint* offsets;
+
+ lower_page = buf_block_get_frame(new_block);
+ lower_page_no = buf_block_get_page_no(new_block);
+ lower_page_zip = buf_block_get_page_zip(new_block);
+ upper_page = buf_block_get_frame(block);
+ upper_page_no = buf_block_get_page_no(block);
+ upper_page_zip = buf_block_get_page_zip(block);
/* Look up the index for the node pointer to page */
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
+ offsets = btr_page_get_father_block(NULL, heap, index,
+ block, mtr, &cursor);
/* Replace the address of the old child node (= page) with the
address of the new lower half */
- btr_node_ptr_set_child_page_no(node_ptr,
- rec_get_offsets(
- node_ptr, index,
- NULL, ULINT_UNDEFINED,
- &heap),
- lower_page_no, mtr);
+ btr_node_ptr_set_child_page_no(
+ btr_cur_get_rec(&cursor),
+ btr_cur_get_page_zip(&cursor),
+ offsets, lower_page_no, mtr);
mem_heap_empty(heap);
} else {
- lower_page_no = buf_frame_get_page_no(page);
- upper_page_no = buf_frame_get_page_no(new_page);
- lower_page = page;
- upper_page = new_page;
+ lower_page = buf_block_get_frame(block);
+ lower_page_no = buf_block_get_page_no(block);
+ lower_page_zip = buf_block_get_page_zip(block);
+ upper_page = buf_block_get_frame(new_block);
+ upper_page_no = buf_block_get_page_no(new_block);
+ upper_page_zip = buf_block_get_page_zip(new_block);
}
/* Get the level of the split pages */
- level = btr_page_get_level(page, mtr);
+ level = btr_page_get_level(buf_block_get_frame(block), mtr);
+ ut_ad(level
+ == btr_page_get_level(buf_block_get_frame(new_block), mtr));
/* Build the node pointer (= node key and page address) for the upper
half */
@@ -1519,73 +1781,88 @@ btr_attach_half_pages(
prev_page_no = btr_page_get_prev(page, mtr);
next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
+ space = buf_block_get_space(block);
+ zip_size = buf_block_get_zip_size(block);
/* Update page links of the level */
if (prev_page_no != FIL_NULL) {
-
- prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
+ buf_block_t* prev_block = btr_block_get(space, zip_size,
+ prev_page_no,
+ RW_X_LATCH, mtr);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
+ ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
+ ut_a(btr_page_get_next(prev_block->frame, mtr)
+ == buf_block_get_page_no(block));
#endif /* UNIV_BTR_DEBUG */
- btr_page_set_next(prev_page, lower_page_no, mtr);
+ btr_page_set_next(buf_block_get_frame(prev_block),
+ buf_block_get_page_zip(prev_block),
+ lower_page_no, mtr);
}
if (next_page_no != FIL_NULL) {
+ buf_block_t* next_block = btr_block_get(space, zip_size,
+ next_page_no,
+ RW_X_LATCH, mtr);
+#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
+ ut_a(btr_page_get_prev(next_block->frame, mtr)
+ == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
- next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(next_page) == page_is_comp(page));
-
- btr_page_set_prev(next_page, upper_page_no, mtr);
+ btr_page_set_prev(buf_block_get_frame(next_block),
+ buf_block_get_page_zip(next_block),
+ upper_page_no, mtr);
}
- btr_page_set_prev(lower_page, prev_page_no, mtr);
- btr_page_set_next(lower_page, upper_page_no, mtr);
- btr_page_set_level(lower_page, level, mtr);
+ btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
+ btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
- btr_page_set_prev(upper_page, lower_page_no, mtr);
- btr_page_set_next(upper_page, next_page_no, mtr);
- btr_page_set_level(upper_page, level, mtr);
+ btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
+ btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
}
-/*****************************************************************
+/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
-is released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore
-enough free disk space must be guaranteed to be available before
-this function is called. */
-
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+
+@return inserted record */
+UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
- /* out: inserted record; NOTE: the tree
- x-latch is released! NOTE: 2 free disk
- pages must be available! */
- btr_cur_t* cursor, /* in: cursor at which to insert; when the
+ btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr) /* in: mtr */
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* block;
page_t* page;
+ page_zip_des_t* page_zip;
ulint page_no;
byte direction;
ulint hint_page_no;
+ buf_block_t* new_block;
page_t* new_page;
+ page_zip_des_t* new_page_zip;
rec_t* split_rec;
- page_t* left_page;
- page_t* right_page;
+ buf_block_t* left_block;
+ buf_block_t* right_block;
+ buf_block_t* insert_block;
page_t* insert_page;
page_cur_t* page_cursor;
rec_t* first_rec;
byte* buf = 0; /* remove warning */
rec_t* move_limit;
ibool insert_will_fit;
+ ibool insert_left;
ulint n_iterations = 0;
rec_t* rec;
mem_heap_t* heap;
@@ -1604,13 +1881,14 @@ func_start:
ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- page = btr_cur_get_page(cursor);
+ block = btr_cur_get_block(cursor);
+ page = buf_block_get_frame(block);
+ page_zip = buf_block_get_page_zip(block);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_get_n_recs(page) >= 2);
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_get_n_recs(page) >= 1);
- page_no = buf_frame_get_page_no(page);
+ page_no = buf_block_get_page_no(block);
/* 1. Decide the split record; split_rec == NULL means that the
tuple to be inserted should be the first record on the upper
@@ -1619,7 +1897,7 @@ func_start:
if (n_iterations > 0) {
direction = FSP_UP;
hint_page_no = page_no + 1;
- split_rec = btr_page_get_sure_split_rec(cursor, tuple);
+ split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext);
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP;
@@ -1631,37 +1909,80 @@ func_start:
} else {
direction = FSP_UP;
hint_page_no = page_no + 1;
- split_rec = page_get_middle_rec(page);
+
+ if (page_get_n_recs(page) == 1) {
+ page_cur_t pcur;
+
+ /* There is only one record in the index page
+ therefore we can't split the node in the middle
+ by default. We need to determine whether the
+ new record will be inserted to the left or right. */
+
+ /* Read the first (and only) record in the page. */
+ page_cur_set_before_first(block, &pcur);
+ page_cur_move_to_next(&pcur);
+ first_rec = page_cur_get_rec(&pcur);
+
+ offsets = rec_get_offsets(
+ first_rec, cursor->index, offsets,
+ n_uniq, &heap);
+
+ /* If the new record is less than the existing record
+ the split in the middle will copy the existing
+ record to the new node. */
+ if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) {
+ split_rec = page_get_middle_rec(page);
+ } else {
+ split_rec = NULL;
+ }
+ } else {
+ split_rec = page_get_middle_rec(page);
+ }
}
/* 2. Allocate a new page to the index */
- new_page = btr_page_alloc(cursor->index, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr);
- btr_page_create(new_page, cursor->index, mtr);
+ new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
+ btr_page_get_level(page, mtr), mtr);
+ new_page = buf_block_get_frame(new_block);
+ new_page_zip = buf_block_get_page_zip(new_block);
+ btr_page_create(new_block, new_page_zip, cursor->index,
+ btr_page_get_level(page, mtr), mtr);
/* 3. Calculate the first record on the upper half-page, and the
first record (move_limit) on original page which ends up on the
upper half */
- if (split_rec != NULL) {
- first_rec = split_rec;
- move_limit = split_rec;
+ if (split_rec) {
+ first_rec = move_limit = split_rec;
+
+ offsets = rec_get_offsets(split_rec, cursor->index, offsets,
+ n_uniq, &heap);
+
+ insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
+
+ if (UNIV_UNLIKELY(!insert_left && new_page_zip
+ && n_iterations > 0)) {
+ /* If a compressed page has already been split,
+ avoid further splits by inserting the record
+ to an empty page. */
+ split_rec = NULL;
+ goto insert_right;
+ }
} else {
- buf = mem_alloc(rec_get_converted_size(cursor->index, tuple));
+insert_right:
+ insert_left = FALSE;
+ buf = mem_alloc(rec_get_converted_size(cursor->index,
+ tuple, n_ext));
- first_rec = rec_convert_dtuple_to_rec(buf,
- cursor->index, tuple);
+ first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
+ tuple, n_ext);
move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
}
/* 4. Do first the modifications in the tree structure */
- btr_attach_half_pages(cursor->index, page, first_rec,
- new_page, direction, mtr);
-
- if (split_rec == NULL) {
- mem_free(buf);
- }
+ btr_attach_half_pages(cursor->index, block,
+ first_rec, new_block, direction, mtr);
/* If the split is made on the leaf level and the insert will fit
on the appropriate half-page, we may release the tree x-latch.
@@ -1669,19 +1990,17 @@ func_start:
thus reducing the tree latch contention. */
if (split_rec) {
- offsets = rec_get_offsets(split_rec, cursor->index, offsets,
- n_uniq, &heap);
-
- insert_will_fit = btr_page_insert_fits(cursor,
- split_rec, offsets,
- tuple, heap);
+ insert_will_fit = !new_page_zip
+ && btr_page_insert_fits(cursor, split_rec,
+ offsets, tuple, n_ext, heap);
} else {
- insert_will_fit = btr_page_insert_fits(cursor,
- NULL, NULL,
- tuple, heap);
+ mem_free(buf);
+ insert_will_fit = !new_page_zip
+ && btr_page_insert_fits(cursor, NULL,
+ NULL, tuple, n_ext, heap);
}
- if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) {
+ if (insert_will_fit && page_is_leaf(page)) {
mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK);
@@ -1691,187 +2010,282 @@ func_start:
if (direction == FSP_DOWN) {
/* fputs("Split left\n", stderr); */
- page_move_rec_list_start(new_page, page, move_limit,
- cursor->index, mtr);
- left_page = new_page;
- right_page = page;
+ if (0
+#ifdef UNIV_ZIP_COPY
+ || page_zip
+#endif /* UNIV_ZIP_COPY */
+ || UNIV_UNLIKELY
+ (!page_move_rec_list_start(new_block, block, move_limit,
+ cursor->index, mtr))) {
+ /* For some reason, compressing new_page failed,
+ even though it should contain fewer records than
+ the original page. Copy the page byte for byte
+ and then delete the records from both pages
+ as appropriate. Deleting will always succeed. */
+ ut_a(new_page_zip);
+
+ page_zip_copy_recs(new_page_zip, new_page,
+ page_zip, page, cursor->index, mtr);
+ page_delete_rec_list_end(move_limit - page + new_page,
+ new_block, cursor->index,
+ ULINT_UNDEFINED,
+ ULINT_UNDEFINED, mtr);
+
+ /* Update the lock table and possible hash index. */
+
+ lock_move_rec_list_start(
+ new_block, block, move_limit,
+ new_page + PAGE_NEW_INFIMUM);
+
+ btr_search_move_or_delete_hash_entries(
+ new_block, block, cursor->index);
+
+ /* Delete the records from the source page. */
+
+ page_delete_rec_list_start(move_limit, block,
+ cursor->index, mtr);
+ }
+
+ left_block = new_block;
+ right_block = block;
- lock_update_split_left(right_page, left_page);
+ lock_update_split_left(right_block, left_block);
} else {
/* fputs("Split right\n", stderr); */
- page_move_rec_list_end(new_page, page, move_limit,
- cursor->index, mtr);
- left_page = page;
- right_page = new_page;
+ if (0
+#ifdef UNIV_ZIP_COPY
+ || page_zip
+#endif /* UNIV_ZIP_COPY */
+ || UNIV_UNLIKELY
+ (!page_move_rec_list_end(new_block, block, move_limit,
+ cursor->index, mtr))) {
+ /* For some reason, compressing new_page failed,
+ even though it should contain fewer records than
+ the original page. Copy the page byte for byte
+ and then delete the records from both pages
+ as appropriate. Deleting will always succeed. */
+ ut_a(new_page_zip);
+
+ page_zip_copy_recs(new_page_zip, new_page,
+ page_zip, page, cursor->index, mtr);
+ page_delete_rec_list_start(move_limit - page
+ + new_page, new_block,
+ cursor->index, mtr);
+
+ /* Update the lock table and possible hash index. */
+
+ lock_move_rec_list_end(new_block, block, move_limit);
+
+ btr_search_move_or_delete_hash_entries(
+ new_block, block, cursor->index);
+
+ /* Delete the records from the source page. */
+
+ page_delete_rec_list_end(move_limit, block,
+ cursor->index,
+ ULINT_UNDEFINED,
+ ULINT_UNDEFINED, mtr);
+ }
+
+ left_block = block;
+ right_block = new_block;
- lock_update_split_right(right_page, left_page);
+ lock_update_split_right(right_block, left_block);
}
+#ifdef UNIV_ZIP_DEBUG
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(new_page_zip, new_page));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
+ /* At this point, split_rec, move_limit and first_rec may point
+ to garbage on the old page. */
+
/* 6. The split and the tree modification is now completed. Decide the
page where the tuple should be inserted */
- if (split_rec == NULL) {
- insert_page = right_page;
-
+ if (insert_left) {
+ insert_block = left_block;
} else {
- offsets = rec_get_offsets(first_rec, cursor->index,
- offsets, n_uniq, &heap);
-
- if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) {
-
- insert_page = right_page;
- } else {
- insert_page = left_page;
- }
+ insert_block = right_block;
}
+ insert_page = buf_block_get_frame(insert_block);
+
/* 7. Reposition the cursor for insert and try insertion */
page_cursor = btr_cur_get_page_cur(cursor);
- page_cur_search(insert_page, cursor->index, tuple,
+ page_cur_search(insert_block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple,
+ cursor->index, n_ext, mtr);
+
+#ifdef UNIV_ZIP_DEBUG
+ {
+ page_zip_des_t* insert_page_zip
+ = buf_block_get_page_zip(insert_block);
+ ut_a(!insert_page_zip
+ || page_zip_validate(insert_page_zip, insert_page));
+ }
+#endif /* UNIV_ZIP_DEBUG */
- if (rec != NULL) {
- /* Insert fit on the page: update the free bits for the
- left and right pages in the same mtr */
+ if (UNIV_LIKELY(rec != NULL)) {
- ibuf_update_free_bits_for_two_pages_low(cursor->index,
- left_page,
- right_page, mtr);
- /* fprintf(stderr, "Split and insert done %lu %lu\n",
- buf_frame_get_page_no(left_page),
- buf_frame_get_page_no(right_page)); */
- mem_heap_free(heap);
- return(rec);
+ goto func_exit;
}
/* 8. If insert did not fit, try page reorganization */
- btr_page_reorganize(insert_page, cursor->index, mtr);
+ if (UNIV_UNLIKELY
+ (!btr_page_reorganize(insert_block, cursor->index, mtr))) {
+
+ goto insert_failed;
+ }
- page_cur_search(insert_page, cursor->index, tuple,
+ page_cur_search(insert_block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+ n_ext, mtr);
- if (rec == NULL) {
+ if (UNIV_UNLIKELY(rec == NULL)) {
/* The insert did not fit on the page: loop back to the
start of the function for a new split */
-
+insert_failed:
/* We play safe and reset the free bits for new_page */
- ibuf_reset_free_bits(cursor->index, new_page);
+ if (!dict_index_is_clust(cursor->index)) {
+ ibuf_reset_free_bits(new_block);
+ }
/* fprintf(stderr, "Split second round %lu\n",
- buf_frame_get_page_no(page)); */
+ page_get_page_no(page)); */
n_iterations++;
- ut_ad(n_iterations < 2);
+ ut_ad(n_iterations < 2
+ || buf_block_get_page_zip(insert_block));
ut_ad(!insert_will_fit);
goto func_start;
}
+func_exit:
/* Insert fit on the page: update the free bits for the
left and right pages in the same mtr */
- ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page,
- right_page, mtr);
+ if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
+ ibuf_update_free_bits_for_two_pages_low(
+ buf_block_get_zip_size(left_block),
+ left_block, right_block, mtr);
+ }
+
#if 0
fprintf(stderr, "Split and insert done %lu %lu\n",
- buf_frame_get_page_no(left_page),
- buf_frame_get_page_no(right_page));
+ buf_block_get_page_no(left_block),
+ buf_block_get_page_no(right_block));
#endif
- ut_ad(page_validate(left_page, cursor->index));
- ut_ad(page_validate(right_page, cursor->index));
+ ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
+ ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
mem_heap_free(heap);
return(rec);
}
-/*****************************************************************
+/*************************************************************//**
Removes a page from the level list of pages. */
static
void
btr_level_list_remove(
/*==================*/
- page_t* page, /* in: page to remove */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space where removed */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ page_t* page, /*!< in: page to remove */
+ mtr_t* mtr) /*!< in: mtr */
{
- ulint space;
ulint prev_page_no;
- page_t* prev_page;
ulint next_page_no;
- page_t* next_page;
ut_ad(page && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(space == page_get_space_id(page));
/* Get the previous and next page numbers of page */
prev_page_no = btr_page_get_prev(page, mtr);
next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
/* Update page links of the level */
if (prev_page_no != FIL_NULL) {
-
- prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
+ buf_block_t* prev_block
+ = btr_block_get(space, zip_size, prev_page_no,
+ RW_X_LATCH, mtr);
+ page_t* prev_page
+ = buf_block_get_frame(prev_block);
#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(prev_page) == page_is_comp(page));
ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
+ == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- btr_page_set_next(prev_page, next_page_no, mtr);
+ btr_page_set_next(prev_page,
+ buf_block_get_page_zip(prev_block),
+ next_page_no, mtr);
}
if (next_page_no != FIL_NULL) {
-
- next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(next_page) == page_is_comp(page));
+ buf_block_t* next_block
+ = btr_block_get(space, zip_size, next_page_no,
+ RW_X_LATCH, mtr);
+ page_t* next_page
+ = buf_block_get_frame(next_block);
#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(next_page) == page_is_comp(page));
ut_a(btr_page_get_prev(next_page, mtr)
- == buf_frame_get_page_no(page));
+ == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- btr_page_set_prev(next_page, prev_page_no, mtr);
+ btr_page_set_prev(next_page,
+ buf_block_get_page_zip(next_block),
+ prev_page_no, mtr);
}
}
-/********************************************************************
+/****************************************************************//**
Writes the redo log record for setting an index record as the predefined
minimum record. */
UNIV_INLINE
void
btr_set_min_rec_mark_log(
/*=====================*/
- rec_t* rec, /* in: record */
- ulint comp, /* nonzero=compact record format */
- mtr_t* mtr) /* in: mtr */
+ rec_t* rec, /*!< in: record */
+ byte type, /*!< in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
+ mtr_t* mtr) /*!< in: mtr */
{
- mlog_write_initial_log_record(
- rec, comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr);
+ mlog_write_initial_log_record(rec, type, mtr);
/* Write rec offset as a 2-byte ulint */
mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
}
+#else /* !UNIV_HOTBACKUP */
+# define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
-/********************************************************************
+/****************************************************************//**
Parses the redo log record for setting an index record as the predefined
-minimum record. */
-
+minimum record.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_parse_set_min_rec_mark(
/*=======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint comp, /* in: nonzero=compact page format */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ ulint comp, /*!< in: nonzero=compact page format */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
rec_t* rec;
@@ -1885,54 +2299,59 @@ btr_parse_set_min_rec_mark(
rec = page + mach_read_from_2(ptr);
- btr_set_min_rec_mark(rec, comp, mtr);
+ btr_set_min_rec_mark(rec, mtr);
}
return(ptr + 2);
}
-/********************************************************************
+/****************************************************************//**
Sets a record as the predefined minimum record. */
-
+UNIV_INTERN
void
btr_set_min_rec_mark(
/*=================*/
- rec_t* rec, /* in: record */
- ulint comp, /* in: nonzero=compact page format */
- mtr_t* mtr) /* in: mtr */
+ rec_t* rec, /*!< in: record */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint info_bits;
- info_bits = rec_get_info_bits(rec, comp);
+ if (UNIV_LIKELY(page_rec_is_comp(rec))) {
+ info_bits = rec_get_info_bits(rec, TRUE);
- rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG);
+ rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
- btr_set_min_rec_mark_log(rec, comp, mtr);
+ btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
+ } else {
+ info_bits = rec_get_info_bits(rec, FALSE);
+
+ rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
+
+ btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
+ }
}
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Deletes on the upper level the node pointer to a page. */
-
+UNIV_INTERN
void
btr_node_ptr_delete(
/*================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page whose node pointer is deleted */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: page whose node pointer is deleted */
+ mtr_t* mtr) /*!< in: mtr */
{
- rec_t* node_ptr;
btr_cur_t cursor;
ibool compressed;
ulint err;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- /* Delete node pointer on father page */
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
+ /* Delete node pointer on father page */
+ btr_page_get_father(index, block, mtr, &cursor);
- btr_cur_position(index, node_ptr, &cursor);
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE,
+ compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
mtr);
ut_a(err == DB_SUCCESS);
@@ -1941,91 +2360,129 @@ btr_node_ptr_delete(
}
}
-/*****************************************************************
+/*************************************************************//**
If page is the only on its level, this function moves its records to the
father page, thus reducing the tree height. */
static
void
btr_lift_page_up(
/*=============*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page which is the only on its level;
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: page which is the only on its level;
must not be empty: use
btr_discard_only_page_on_level if the last
record from the page should be removed */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* father_block;
page_t* father_page;
- page_t* iter_page;
- page_t* pages[BTR_MAX_LEVELS];
ulint page_level;
+ page_zip_des_t* father_page_zip;
+ page_t* page = buf_block_get_frame(block);
ulint root_page_no;
- ulint ancestors;
+ buf_block_t* blocks[BTR_MAX_LEVELS];
+ ulint n_blocks; /*!< last used index in blocks[] */
ulint i;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- father_page = buf_frame_align(
- btr_page_get_father_node_ptr(index, page, mtr));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
page_level = btr_page_get_level(page, mtr);
root_page_no = dict_index_get_page(index);
- ancestors = 1;
- pages[0] = father_page;
+ {
+ btr_cur_t cursor;
+ mem_heap_t* heap = mem_heap_create(100);
+ ulint* offsets;
+ buf_block_t* b;
+
+ offsets = btr_page_get_father_block(NULL, heap, index,
+ block, mtr, &cursor);
+ father_block = btr_cur_get_block(&cursor);
+ father_page_zip = buf_block_get_page_zip(father_block);
+ father_page = buf_block_get_frame(father_block);
+
+ n_blocks = 0;
+
+ /* Store all ancestor pages so we can reset their
+ levels later on. We have to do all the searches on
+ the tree now because later on, after we've replaced
+ the first level, the tree is in an inconsistent state
+ and can not be searched. */
+ for (b = father_block;
+ buf_block_get_page_no(b) != root_page_no; ) {
+ ut_a(n_blocks < BTR_MAX_LEVELS);
+
+ offsets = btr_page_get_father_block(offsets, heap,
+ index, b,
+ mtr, &cursor);
+
+ blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
+ }
- /* Store all ancestor pages so we can reset their levels later on.
- We have to do all the searches on the tree now because later on,
- after we've replaced the first level, the tree is in an inconsistent
- state and can not be searched. */
- iter_page = father_page;
- for (;;) {
- if (buf_block_get_page_no(buf_block_align(iter_page))
- == root_page_no) {
+ mem_heap_free(heap);
+ }
- break;
- }
+ btr_search_drop_page_hash_index(block);
- ut_a(ancestors < BTR_MAX_LEVELS);
+ /* Make the father empty */
+ btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
- iter_page = buf_frame_align(
- btr_page_get_father_node_ptr(index, iter_page, mtr));
+ /* Copy the records to the father page one by one. */
+ if (0
+#ifdef UNIV_ZIP_COPY
+ || father_page_zip
+#endif /* UNIV_ZIP_COPY */
+ || UNIV_UNLIKELY
+ (!page_copy_rec_list_end(father_block, block,
+ page_get_infimum_rec(page),
+ index, mtr))) {
+ const page_zip_des_t* page_zip
+ = buf_block_get_page_zip(block);
+ ut_a(father_page_zip);
+ ut_a(page_zip);
- pages[ancestors++] = iter_page;
- }
+ /* Copy the page byte for byte. */
+ page_zip_copy_recs(father_page_zip, father_page,
+ page_zip, page, index, mtr);
- btr_search_drop_page_hash_index(page);
+ /* Update the lock table and possible hash index. */
- /* Make the father empty */
- btr_page_empty(father_page, mtr);
+ lock_move_rec_list_end(father_block, block,
+ page_get_infimum_rec(page));
- /* Move records to the father */
- page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page),
- index, mtr);
- lock_update_copy_and_discard(father_page, page);
+ btr_search_move_or_delete_hash_entries(father_block, block,
+ index);
+ }
- /* Go upward to root page, decreasing levels by one. */
- for (i = 0; i < ancestors; i++) {
- iter_page = pages[i];
+ lock_update_copy_and_discard(father_block, block);
- ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1));
+ /* Go upward to root page, decrementing levels by one. */
+ for (i = 0; i < n_blocks; i++, page_level++) {
+ page_t* page = buf_block_get_frame(blocks[i]);
+ page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
- btr_page_set_level(iter_page, page_level, mtr);
- page_level++;
+ ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
+
+ btr_page_set_level(page, page_zip, page_level, mtr);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
}
/* Free the file page */
- btr_page_free(index, page, mtr);
+ btr_page_free(index, block, mtr);
- /* We play safe and reset the free bits for the father */
- ibuf_reset_free_bits(index, father_page);
+ /* We play it safe and reset the free bits for the father */
+ if (!dict_index_is_clust(index)) {
+ ibuf_reset_free_bits(father_block);
+ }
ut_ad(page_validate(father_page, index));
- ut_ad(btr_check_node_ptr(index, father_page, mtr));
+ ut_ad(btr_check_node_ptr(index, father_block, mtr));
}
-/*****************************************************************
+/*************************************************************//**
Tries to merge the page first to the left immediate brother if such a
brother exists, and the node pointers to the current page and to the brother
reside on the same page. If the left brother does not satisfy these
@@ -2033,47 +2490,49 @@ conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to the
-brothers, if they exist. NOTE: it is assumed that the caller has reserved
-enough free extents so that the compression will always succeed if done! */
-
-void
+brothers, if they exist.
+@return TRUE on success */
+UNIV_INTERN
+ibool
btr_compress(
/*=========*/
- btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
+ btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
the page must not be empty: in record delete
use btr_discard_page if the page would become
empty */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
ulint space;
+ ulint zip_size;
ulint left_page_no;
ulint right_page_no;
+ buf_block_t* merge_block;
page_t* merge_page;
- page_t* father_page;
+ page_zip_des_t* merge_page_zip;
ibool is_left;
+ buf_block_t* block;
page_t* page;
- rec_t* orig_pred;
- rec_t* orig_succ;
- rec_t* node_ptr;
+ btr_cur_t father_cursor;
+ mem_heap_t* heap;
+ ulint* offsets;
ulint data_size;
ulint n_recs;
ulint max_ins_size;
ulint max_ins_size_reorg;
ulint level;
- ulint comp;
+ block = btr_cur_get_block(cursor);
page = btr_cur_get_page(cursor);
index = btr_cur_get_index(cursor);
- comp = page_is_comp(page);
- ut_a((ibool)!!comp == dict_table_is_comp(index->table));
+ ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
level = btr_page_get_level(page, mtr);
space = dict_index_get_space(index);
+ zip_size = dict_table_zip_size(index->table);
left_page_no = btr_page_get_prev(page, mtr);
right_page_no = btr_page_get_next(page, mtr);
@@ -2083,10 +2542,9 @@ btr_compress(
left_page_no, right_page_no);
#endif
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
- ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
- father_page = buf_frame_align(node_ptr);
- ut_a(comp == page_is_comp(father_page));
+ heap = mem_heap_create(100);
+ offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
+ &father_cursor);
/* Decide the page to which we try to merge and which will inherit
the locks */
@@ -2095,272 +2553,422 @@ btr_compress(
if (is_left) {
- merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
- mtr);
+ merge_block = btr_block_get(space, zip_size, left_page_no,
+ RW_X_LATCH, mtr);
+ merge_page = buf_block_get_frame(merge_block);
#ifdef UNIV_BTR_DEBUG
ut_a(btr_page_get_next(merge_page, mtr)
- == buf_frame_get_page_no(page));
+ == buf_block_get_page_no(block));
#endif /* UNIV_BTR_DEBUG */
} else if (right_page_no != FIL_NULL) {
- merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- mtr);
+ merge_block = btr_block_get(space, zip_size, right_page_no,
+ RW_X_LATCH, mtr);
+ merge_page = buf_block_get_frame(merge_block);
#ifdef UNIV_BTR_DEBUG
ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_frame_get_page_no(page));
+ == buf_block_get_page_no(block));
#endif /* UNIV_BTR_DEBUG */
} else {
/* The page is the only one on the level, lift the records
to the father */
- btr_lift_page_up(index, page, mtr);
-
- return;
+ btr_lift_page_up(index, block, mtr);
+ mem_heap_free(heap);
+ return(TRUE);
}
n_recs = page_get_n_recs(page);
data_size = page_get_data_size(page);
- ut_a(page_is_comp(merge_page) == comp);
+#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(merge_page) == page_is_comp(page));
+#endif /* UNIV_BTR_DEBUG */
max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
merge_page, n_recs);
if (data_size > max_ins_size_reorg) {
/* No space for merge */
+err_exit:
+ /* We play it safe and reset the free bits. */
+ if (zip_size
+ && page_is_leaf(merge_page)
+ && !dict_index_is_clust(index)) {
+ ibuf_reset_free_bits(merge_block);
+ }
- return;
+ mem_heap_free(heap);
+ return(FALSE);
}
ut_ad(page_validate(merge_page, index));
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
- if (data_size > max_ins_size) {
+ if (UNIV_UNLIKELY(data_size > max_ins_size)) {
/* We have to reorganize merge_page */
- btr_page_reorganize(merge_page, index, mtr);
+ if (UNIV_UNLIKELY(!btr_page_reorganize(merge_block,
+ index, mtr))) {
+
+ goto err_exit;
+ }
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
ut_ad(page_validate(merge_page, index));
- ut_ad(page_get_max_insert_size(merge_page, n_recs)
- == max_ins_size_reorg);
- }
+ ut_ad(max_ins_size == max_ins_size_reorg);
- if (data_size > max_ins_size) {
+ if (UNIV_UNLIKELY(data_size > max_ins_size)) {
- /* Add fault tolerance, though this should never happen */
+ /* Add fault tolerance, though this should
+ never happen */
- return;
+ goto err_exit;
+ }
}
- btr_search_drop_page_hash_index(page);
-
- /* Remove the page from the level list */
- btr_level_list_remove(page, mtr);
+ merge_page_zip = buf_block_get_page_zip(merge_block);
+#ifdef UNIV_ZIP_DEBUG
+ if (UNIV_LIKELY_NULL(merge_page_zip)) {
+ const page_zip_des_t* page_zip
+ = buf_block_get_page_zip(block);
+ ut_a(page_zip);
+ ut_a(page_zip_validate(merge_page_zip, merge_page));
+ ut_a(page_zip_validate(page_zip, page));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+ /* Move records to the merge page */
if (is_left) {
- btr_node_ptr_delete(index, page, mtr);
- } else {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
- /* Replace the address of the old child node (= page) with the
- address of the merge page to the right */
+ rec_t* orig_pred = page_copy_rec_list_start(
+ merge_block, block, page_get_supremum_rec(page),
+ index, mtr);
- btr_node_ptr_set_child_page_no(node_ptr,
- rec_get_offsets(
- node_ptr, index,
- offsets_,
- ULINT_UNDEFINED,
- &heap),
- right_page_no, mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
+ if (UNIV_UNLIKELY(!orig_pred)) {
+ goto err_exit;
}
- btr_node_ptr_delete(index, merge_page, mtr);
- }
- /* Move records to the merge page */
- if (is_left) {
- orig_pred = page_rec_get_prev(
- page_get_supremum_rec(merge_page));
- page_copy_rec_list_start(merge_page, page,
- page_get_supremum_rec(page),
- index, mtr);
+ btr_search_drop_page_hash_index(block);
- lock_update_merge_left(merge_page, orig_pred, page);
+ /* Remove the page from the level list */
+ btr_level_list_remove(space, zip_size, page, mtr);
+
+ btr_node_ptr_delete(index, block, mtr);
+ lock_update_merge_left(merge_block, orig_pred, block);
} else {
- orig_succ = page_rec_get_next(
- page_get_infimum_rec(merge_page));
- page_copy_rec_list_end(merge_page, page,
- page_get_infimum_rec(page),
- index, mtr);
+ rec_t* orig_succ;
+#ifdef UNIV_BTR_DEBUG
+ byte fil_page_prev[4];
+#endif /* UNIV_BTR_DEBUG */
+
+ if (UNIV_LIKELY_NULL(merge_page_zip)) {
+ /* The function page_zip_compress(), which will be
+ invoked by page_copy_rec_list_end() below,
+ requires that FIL_PAGE_PREV be FIL_NULL.
+ Clear the field, but prepare to restore it. */
+#ifdef UNIV_BTR_DEBUG
+ memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
+#endif /* UNIV_BTR_DEBUG */
+#if FIL_NULL != 0xffffffff
+# error "FIL_NULL != 0xffffffff"
+#endif
+ memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
+ }
+
+ orig_succ = page_copy_rec_list_end(merge_block, block,
+ page_get_infimum_rec(page),
+ cursor->index, mtr);
+
+ if (UNIV_UNLIKELY(!orig_succ)) {
+ ut_a(merge_page_zip);
+#ifdef UNIV_BTR_DEBUG
+ /* FIL_PAGE_PREV was restored from merge_page_zip. */
+ ut_a(!memcmp(fil_page_prev,
+ merge_page + FIL_PAGE_PREV, 4));
+#endif /* UNIV_BTR_DEBUG */
+ goto err_exit;
+ }
+
+ btr_search_drop_page_hash_index(block);
- lock_update_merge_right(orig_succ, page);
+#ifdef UNIV_BTR_DEBUG
+ if (UNIV_LIKELY_NULL(merge_page_zip)) {
+ /* Restore FIL_PAGE_PREV in order to avoid an assertion
+ failure in btr_level_list_remove(), which will set
+ the field again to FIL_NULL. Even though this makes
+ merge_page and merge_page_zip inconsistent for a
+ split second, it is harmless, because the pages
+ are X-latched. */
+ memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
+ }
+#endif /* UNIV_BTR_DEBUG */
+
+ /* Remove the page from the level list */
+ btr_level_list_remove(space, zip_size, page, mtr);
+
+ /* Replace the address of the old child node (= page) with the
+ address of the merge page to the right */
+
+ btr_node_ptr_set_child_page_no(
+ btr_cur_get_rec(&father_cursor),
+ btr_cur_get_page_zip(&father_cursor),
+ offsets, right_page_no, mtr);
+ btr_node_ptr_delete(index, merge_block, mtr);
+
+ lock_update_merge_right(merge_block, orig_succ, block);
}
- /* We have added new records to merge_page: update its free bits */
- ibuf_update_free_bits_if_full(index, merge_page,
- UNIV_PAGE_SIZE, ULINT_UNDEFINED);
+ mem_heap_free(heap);
+
+ if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
+ /* Update the free bits of the B-tree page in the
+ insert buffer bitmap. This has to be done in a
+ separate mini-transaction that is committed before the
+ main mini-transaction. We cannot update the insert
+ buffer bitmap in this mini-transaction, because
+ btr_compress() can be invoked recursively without
+ committing the mini-transaction in between. Since
+ insert buffer bitmap pages have a lower rank than
+ B-tree pages, we must not access other pages in the
+ same mini-transaction after accessing an insert buffer
+ bitmap page. */
+
+ /* The free bits in the insert buffer bitmap must
+ never exceed the free space on a page. It is safe to
+ decrement or reset the bits in the bitmap in a
+ mini-transaction that is committed before the
+ mini-transaction that affects the free space. */
+
+ /* It is unsafe to increment the bits in a separately
+ committed mini-transaction, because in crash recovery,
+ the free bits could momentarily be set too high. */
+
+ if (zip_size) {
+ /* Because the free bits may be incremented
+ and we cannot update the insert buffer bitmap
+ in the same mini-transaction, the only safe
+ thing we can do here is the pessimistic
+ approach: reset the free bits. */
+ ibuf_reset_free_bits(merge_block);
+ } else {
+ /* On uncompressed pages, the free bits will
+ never increase here. Thus, it is safe to
+ write the bits accurately in a separate
+ mini-transaction. */
+ ibuf_update_free_bits_if_full(merge_block,
+ UNIV_PAGE_SIZE,
+ ULINT_UNDEFINED);
+ }
+ }
ut_ad(page_validate(merge_page, index));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
+#endif /* UNIV_ZIP_DEBUG */
/* Free the file page */
- btr_page_free(index, page, mtr);
+ btr_page_free(index, block, mtr);
- ut_ad(btr_check_node_ptr(index, merge_page, mtr));
+ ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+ return(TRUE);
}
-/*****************************************************************
-Discards a page that is the only page on its level. */
+/*************************************************************//**
+Discards a page that is the only page on its level. This will empty
+the whole B-tree, leaving just an empty root page. This function
+should never be reached, because btr_compress(), which is invoked in
+delete operations, calls btr_lift_page_up() to flatten the B-tree. */
static
void
btr_discard_only_page_on_level(
/*===========================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page which is the only on its level */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: page which is the only on its level */
+ mtr_t* mtr) /*!< in: mtr */
{
- rec_t* node_ptr;
- page_t* father_page;
- ulint page_level;
+ ulint page_level = 0;
+ trx_id_t max_trx_id;
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(page);
+ /* Save the PAGE_MAX_TRX_ID from the leaf page. */
+ max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
- father_page = buf_frame_align(node_ptr);
+ while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+ btr_cur_t cursor;
+ buf_block_t* father;
+ const page_t* page = buf_block_get_frame(block);
- page_level = btr_page_get_level(page, mtr);
+ ut_a(page_get_n_recs(page) == 1);
+ ut_a(page_level == btr_page_get_level(page, mtr));
+ ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
- lock_update_discard(page_get_supremum_rec(father_page), page);
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ btr_search_drop_page_hash_index(block);
- btr_page_set_level(father_page, page_level, mtr);
+ btr_page_get_father(index, block, mtr, &cursor);
+ father = btr_cur_get_block(&cursor);
- /* Free the file page */
- btr_page_free(index, page, mtr);
+ lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
+
+ /* Free the file page */
+ btr_page_free(index, block, mtr);
- if (buf_frame_get_page_no(father_page) == dict_index_get_page(index)) {
- /* The father is the root page */
+ block = father;
+ page_level++;
+ }
- btr_page_empty(father_page, mtr);
+ /* block is the root page, which must be empty, except
+ for the node pointer to the (now discarded) block(s). */
- /* We play safe and reset the free bits for the father */
- ibuf_reset_free_bits(index, father_page);
- } else {
- ut_ad(page_get_n_recs(father_page) == 1);
+#ifdef UNIV_BTR_DEBUG
+ if (!dict_index_is_ibuf(index)) {
+ const page_t* root = buf_block_get_frame(block);
+ const ulint space = dict_index_get_space(index);
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + root, space));
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + root, space));
+ }
+#endif /* UNIV_BTR_DEBUG */
+
+ btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
- btr_discard_only_page_on_level(index, father_page, mtr);
+ if (!dict_index_is_clust(index)) {
+ /* We play it safe and reset the free bits for the root */
+ ibuf_reset_free_bits(block);
+
+ if (page_is_leaf(buf_block_get_frame(block))) {
+ ut_a(!ut_dulint_is_zero(max_trx_id));
+ page_set_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ max_trx_id, mtr);
+ }
}
}
-/*****************************************************************
+/*************************************************************//**
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
be used for the root page, which is allowed to be empty. */
-
+UNIV_INTERN
void
btr_discard_page(
/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to discard: not on
+ btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
the root page */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
ulint space;
+ ulint zip_size;
ulint left_page_no;
ulint right_page_no;
+ buf_block_t* merge_block;
page_t* merge_page;
+ buf_block_t* block;
page_t* page;
rec_t* node_ptr;
- page = btr_cur_get_page(cursor);
+ block = btr_cur_get_block(cursor);
index = btr_cur_get_index(cursor);
- ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page));
+ ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
space = dict_index_get_space(index);
+ zip_size = dict_table_zip_size(index->table);
/* Decide the page which will inherit the locks */
- left_page_no = btr_page_get_prev(page, mtr);
- right_page_no = btr_page_get_next(page, mtr);
+ left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
+ right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
if (left_page_no != FIL_NULL) {
- merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
- mtr);
+ merge_block = btr_block_get(space, zip_size, left_page_no,
+ RW_X_LATCH, mtr);
+ merge_page = buf_block_get_frame(merge_block);
#ifdef UNIV_BTR_DEBUG
ut_a(btr_page_get_next(merge_page, mtr)
- == buf_frame_get_page_no(page));
+ == buf_block_get_page_no(block));
#endif /* UNIV_BTR_DEBUG */
} else if (right_page_no != FIL_NULL) {
- merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- mtr);
+ merge_block = btr_block_get(space, zip_size, right_page_no,
+ RW_X_LATCH, mtr);
+ merge_page = buf_block_get_frame(merge_block);
#ifdef UNIV_BTR_DEBUG
ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_frame_get_page_no(page));
+ == buf_block_get_page_no(block));
#endif /* UNIV_BTR_DEBUG */
} else {
- btr_discard_only_page_on_level(index, page, mtr);
+ btr_discard_only_page_on_level(index, block, mtr);
return;
}
+ page = buf_block_get_frame(block);
ut_a(page_is_comp(merge_page) == page_is_comp(page));
- btr_search_drop_page_hash_index(page);
+ btr_search_drop_page_hash_index(block);
- if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) {
+ if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
/* We have to mark the leftmost node pointer on the right
side page as the predefined minimum record */
-
node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
ut_ad(page_rec_is_user_rec(node_ptr));
- btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr);
+ /* This will make page_zip_validate() fail on merge_page
+ until btr_level_list_remove() completes. This is harmless,
+ because everything will take place within a single
+ mini-transaction and because writing to the redo log
+ is an atomic operation (performed by mtr_commit()). */
+ btr_set_min_rec_mark(node_ptr, mtr);
}
- btr_node_ptr_delete(index, page, mtr);
+ btr_node_ptr_delete(index, block, mtr);
/* Remove the page from the level list */
- btr_level_list_remove(page, mtr);
+ btr_level_list_remove(space, zip_size, page, mtr);
+#ifdef UNIV_ZIP_DEBUG
+ {
+ page_zip_des_t* merge_page_zip
+ = buf_block_get_page_zip(merge_block);
+ ut_a(!merge_page_zip
+ || page_zip_validate(merge_page_zip, merge_page));
+ }
+#endif /* UNIV_ZIP_DEBUG */
if (left_page_no != FIL_NULL) {
- lock_update_discard(page_get_supremum_rec(merge_page), page);
+ lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
+ block);
} else {
- lock_update_discard(page_rec_get_next(
- page_get_infimum_rec(merge_page)),
- page);
+ lock_update_discard(merge_block,
+ lock_get_min_heap_no(merge_block),
+ block);
}
/* Free the file page */
- btr_page_free(index, page, mtr);
+ btr_page_free(index, block, mtr);
- ut_ad(btr_check_node_ptr(index, merge_page, mtr));
+ ut_ad(btr_check_node_ptr(index, merge_block, mtr));
}
#ifdef UNIV_BTR_PRINT
-/*****************************************************************
+/*************************************************************//**
Prints size info of a B-tree. */
-
+UNIV_INTERN
void
btr_print_size(
/*===========*/
- dict_index_t* index) /* in: index tree */
+ dict_index_t* index) /*!< in: index tree */
{
page_t* root;
fseg_header_t* seg;
mtr_t mtr;
- if (index->type & DICT_IBUF) {
+ if (dict_index_is_ibuf(index)) {
fputs("Sorry, cannot print info of an ibuf tree:"
" use ibuf functions\n", stderr);
@@ -2387,58 +2995,60 @@ btr_print_size(
mtr_commit(&mtr);
}
-/****************************************************************
+/************************************************************//**
Prints recursively index tree pages. */
static
void
btr_print_recursive(
/*================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: index page */
- ulint width, /* in: print this many entries from start
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: index page */
+ ulint width, /*!< in: print this many entries from start
and end */
- mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
- ulint** offsets,/* in/out: buffer for rec_get_offsets() */
- mtr_t* mtr) /* in: mtr */
+ mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
+ ulint** offsets,/*!< in/out: buffer for rec_get_offsets() */
+ mtr_t* mtr) /*!< in: mtr */
{
+ const page_t* page = buf_block_get_frame(block);
page_cur_t cursor;
ulint n_recs;
ulint i = 0;
mtr_t mtr2;
- rec_t* node_ptr;
- page_t* child;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
(ulong) btr_page_get_level(page, mtr),
- (ulong) buf_frame_get_page_no(page));
+ (ulong) buf_block_get_page_no(block));
- page_print(page, index, width, width);
+ page_print(block, index, width, width);
n_recs = page_get_n_recs(page);
- page_cur_set_before_first(page, &cursor);
+ page_cur_set_before_first(block, &cursor);
page_cur_move_to_next(&cursor);
while (!page_cur_is_after_last(&cursor)) {
- if (0 == btr_page_get_level(page, mtr)) {
+ if (page_is_leaf(page)) {
/* If this is the leaf level, do nothing */
} else if ((i <= width) || (i >= n_recs - width)) {
+ const rec_t* node_ptr;
+
mtr_start(&mtr2);
node_ptr = page_cur_get_rec(&cursor);
*offsets = rec_get_offsets(node_ptr, index, *offsets,
ULINT_UNDEFINED, heap);
- child = btr_node_ptr_get_child(node_ptr,
- *offsets, &mtr2);
- btr_print_recursive(index, child, width,
- heap, offsets, &mtr2);
+ btr_print_recursive(index,
+ btr_node_ptr_get_child(node_ptr,
+ index,
+ *offsets,
+ &mtr2),
+ width, heap, offsets, &mtr2);
mtr_commit(&mtr2);
}
@@ -2447,29 +3057,29 @@ btr_print_recursive(
}
}
-/******************************************************************
+/**************************************************************//**
Prints directories and other info of all nodes in the tree. */
-
+UNIV_INTERN
void
btr_print_index(
/*============*/
- dict_index_t* index, /* in: index */
- ulint width) /* in: print this many entries from start
+ dict_index_t* index, /*!< in: index */
+ ulint width) /*!< in: print this many entries from start
and end */
{
mtr_t mtr;
- page_t* root;
+ buf_block_t* root;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
fputs("--------------------------\n"
"INDEX TREE PRINT\n", stderr);
mtr_start(&mtr);
- root = btr_root_get(index, &mtr);
+ root = btr_root_block_get(index, &mtr);
btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
if (UNIV_LIKELY_NULL(heap)) {
@@ -2483,91 +3093,90 @@ btr_print_index(
#endif /* UNIV_BTR_PRINT */
#ifdef UNIV_DEBUG
-/****************************************************************
-Checks that the node pointer to a page is appropriate. */
-
+/************************************************************//**
+Checks that the node pointer to a page is appropriate.
+@return TRUE */
+UNIV_INTERN
ibool
btr_check_node_ptr(
/*===============*/
- /* out: TRUE */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: index page */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: index page */
+ mtr_t* mtr) /*!< in: mtr */
{
mem_heap_t* heap;
- rec_t* node_ptr;
- dtuple_t* node_ptr_tuple;
+ dtuple_t* tuple;
+ ulint* offsets;
+ btr_cur_t cursor;
+ page_t* page = buf_block_get_frame(block);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
return(TRUE);
}
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
+ heap = mem_heap_create(256);
+ offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
+ &cursor);
- if (btr_page_get_level(page, mtr) == 0) {
+ if (page_is_leaf(page)) {
- return(TRUE);
+ goto func_exit;
}
- heap = mem_heap_create(256);
-
- node_ptr_tuple = dict_index_build_node_ptr(
+ tuple = dict_index_build_node_ptr(
index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
btr_page_get_level(page, mtr));
- ut_a(!cmp_dtuple_rec(node_ptr_tuple, node_ptr,
- rec_get_offsets(node_ptr, index,
- NULL, ULINT_UNDEFINED, &heap)));
-
+ ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
+func_exit:
mem_heap_free(heap);
return(TRUE);
}
#endif /* UNIV_DEBUG */
-/****************************************************************
+/************************************************************//**
Display identification information for a record. */
static
void
btr_index_rec_validate_report(
/*==========================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: index record */
- dict_index_t* index) /* in: index */
+ const page_t* page, /*!< in: index page */
+ const rec_t* rec, /*!< in: index record */
+ const dict_index_t* index) /*!< in: index */
{
fputs("InnoDB: Record in ", stderr);
dict_index_name_print(stderr, NULL, index);
fprintf(stderr, ", page %lu, at offset %lu\n",
- buf_frame_get_page_no(page), (ulint)(rec - page));
+ page_get_page_no(page), (ulint) page_offset(rec));
}
-/****************************************************************
+/************************************************************//**
Checks the size and number of fields in a record based on the definition of
-the index. */
-
+the index.
+@return TRUE if ok */
+UNIV_INTERN
ibool
btr_index_rec_validate(
/*===================*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: index record */
- dict_index_t* index, /* in: index */
- ibool dump_on_error) /* in: TRUE if the function
- should print hex dump of record
- and page on error */
+ const rec_t* rec, /*!< in: index record */
+ const dict_index_t* index, /*!< in: index */
+ ibool dump_on_error) /*!< in: TRUE if the function
+ should print hex dump of record
+ and page on error */
{
ulint len;
ulint n;
ulint i;
- page_t* page;
+ const page_t* page;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
- page = buf_frame_align(rec);
+ page = page_align(rec);
if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
/* The insert buffer index tree can contain records from any
@@ -2596,7 +3205,7 @@ btr_index_rec_validate(
(ulong) rec_get_n_fields_old(rec), (ulong) n);
if (dump_on_error) {
- buf_page_print(page);
+ buf_page_print(page, 0);
fputs("InnoDB: corrupt record ", stderr);
rec_print_old(stderr, rec);
@@ -2609,9 +3218,9 @@ btr_index_rec_validate(
for (i = 0; i < n; i++) {
ulint fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i));
+ dict_index_get_nth_col(index, i), page_is_comp(page));
- rec_get_nth_field(rec, offsets, i, &len);
+ rec_get_nth_field_offs(offsets, i, &len);
/* Note that if fixed_size != 0, it equals the
length of a fixed-size column in the clustered index.
@@ -2634,7 +3243,7 @@ btr_index_rec_validate(
(ulong) i, (ulong) len, (ulong) fixed_size);
if (dump_on_error) {
- buf_page_print(page);
+ buf_page_print(page, 0);
fputs("InnoDB: corrupt record ", stderr);
rec_print_new(stderr, rec, offsets);
@@ -2653,21 +3262,21 @@ btr_index_rec_validate(
return(TRUE);
}
-/****************************************************************
+/************************************************************//**
Checks the size and number of fields in records based on the definition of
-the index. */
+the index.
+@return TRUE if ok */
static
ibool
btr_index_page_validate(
/*====================*/
- /* out: TRUE if ok */
- page_t* page, /* in: index page */
- dict_index_t* index) /* in: index */
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index) /*!< in: index */
{
page_cur_t cur;
ibool ret = TRUE;
- page_cur_set_before_first(page, &cur);
+ page_cur_set_before_first(block, &cur);
page_cur_move_to_next(&cur);
for (;;) {
@@ -2687,19 +3296,18 @@ btr_index_page_validate(
return(ret);
}
-/****************************************************************
+/************************************************************//**
Report an error on one page of an index tree. */
static
void
btr_validate_report1(
/*=================*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- ulint level, /* in: B-tree level */
- page_t* page) /* in: index page */
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: B-tree level */
+ const buf_block_t* block) /*!< in: index page */
{
fprintf(stderr, "InnoDB: Error in page %lu of ",
- buf_frame_get_page_no(page));
+ buf_block_get_page_no(block));
dict_index_name_print(stderr, NULL, index);
if (level) {
fprintf(stderr, ", index tree level %lu", level);
@@ -2707,21 +3315,20 @@ btr_validate_report1(
putc('\n', stderr);
}
-/****************************************************************
+/************************************************************//**
Report an error on two pages of an index tree. */
static
void
btr_validate_report2(
/*=================*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- ulint level, /* in: B-tree level */
- page_t* page1, /* in: first index page */
- page_t* page2) /* in: second index page */
+ const dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: B-tree level */
+ const buf_block_t* block1, /*!< in: first index page */
+ const buf_block_t* block2) /*!< in: second index page */
{
fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
- buf_frame_get_page_no(page1),
- buf_frame_get_page_no(page2));
+ buf_block_get_page_no(block1),
+ buf_block_get_page_no(block2));
dict_index_name_print(stderr, NULL, index);
if (level) {
fprintf(stderr, ", index tree level %lu", level);
@@ -2729,24 +3336,26 @@ btr_validate_report2(
putc('\n', stderr);
}
-/****************************************************************
-Validates index tree level. */
+/************************************************************//**
+Validates index tree level.
+@return TRUE if ok */
static
ibool
btr_validate_level(
/*===============*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index tree */
- trx_t* trx, /* in: transaction or NULL */
- ulint level) /* in: level number */
+ dict_index_t* index, /*!< in: index tree */
+ trx_t* trx, /*!< in: transaction or NULL */
+ ulint level) /*!< in: level number */
{
ulint space;
+ ulint zip_size;
+ buf_block_t* block;
page_t* page;
+ buf_block_t* right_block = 0; /* remove warning */
page_t* right_page = 0; /* remove warning */
page_t* father_page;
- page_t* right_father_page;
- rec_t* node_ptr;
- rec_t* right_node_ptr;
+ btr_cur_t node_cur;
+ btr_cur_t right_node_cur;
rec_t* rec;
ulint right_page_no;
ulint left_page_no;
@@ -2757,26 +3366,39 @@ btr_validate_level(
mem_heap_t* heap = mem_heap_create(256);
ulint* offsets = NULL;
ulint* offsets2= NULL;
+#ifdef UNIV_ZIP_DEBUG
+ page_zip_des_t* page_zip;
+#endif /* UNIV_ZIP_DEBUG */
mtr_start(&mtr);
mtr_x_lock(dict_index_get_lock(index), &mtr);
- page = btr_root_get(index, &mtr);
+ block = btr_root_block_get(index, &mtr);
+ page = buf_block_get_frame(block);
- space = buf_frame_get_space_id(page);
+ space = dict_index_get_space(index);
+ zip_size = dict_table_zip_size(index->table);
while (level != btr_page_get_level(page, &mtr)) {
+ const rec_t* node_ptr;
- ut_a(btr_page_get_level(page, &mtr) > 0);
+ ut_a(space == buf_block_get_space(block));
+ ut_a(space == page_get_space_id(page));
+#ifdef UNIV_ZIP_DEBUG
+ page_zip = buf_block_get_page_zip(block);
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+ ut_a(!page_is_leaf(page));
- page_cur_set_before_first(page, &cursor);
+ page_cur_set_before_first(block, &cursor);
page_cur_move_to_next(&cursor);
node_ptr = page_cur_get_rec(&cursor);
offsets = rec_get_offsets(node_ptr, index, offsets,
ULINT_UNDEFINED, &heap);
- page = btr_node_ptr_get_child(node_ptr, offsets, &mtr);
+ block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
+ page = buf_block_get_frame(block);
}
/* Now we are on the desired level. Loop through the pages on that
@@ -2791,17 +3413,22 @@ loop:
offsets = offsets2 = NULL;
mtr_x_lock(dict_index_get_lock(index), &mtr);
+#ifdef UNIV_ZIP_DEBUG
+ page_zip = buf_block_get_page_zip(block);
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
/* Check ordering etc. of records */
if (!page_validate(page, index)) {
- btr_validate_report1(index, level, page);
+ btr_validate_report1(index, level, block);
ret = FALSE;
} else if (level == 0) {
/* We are on level 0. Check that the records have the right
number of fields, and field lengths are right. */
- if (!btr_index_page_validate(page, index)) {
+ if (!btr_index_page_validate(block, index)) {
ret = FALSE;
}
@@ -2812,32 +3439,32 @@ loop:
right_page_no = btr_page_get_next(page, &mtr);
left_page_no = btr_page_get_prev(page, &mtr);
- ut_a((page_get_n_recs(page) > 0)
- || ((level == 0)
- && (buf_frame_get_page_no(page)
- == dict_index_get_page(index))));
+ ut_a(page_get_n_recs(page) > 0 || (level == 0
+ && page_get_page_no(page)
+ == dict_index_get_page(index)));
if (right_page_no != FIL_NULL) {
- rec_t* right_rec;
- right_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- &mtr);
+ const rec_t* right_rec;
+ right_block = btr_block_get(space, zip_size, right_page_no,
+ RW_X_LATCH, &mtr);
+ right_page = buf_block_get_frame(right_block);
if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
- != buf_frame_get_page_no(page))) {
- btr_validate_report2(index, level, page, right_page);
+ != page_get_page_no(page))) {
+ btr_validate_report2(index, level, block, right_block);
fputs("InnoDB: broken FIL_PAGE_NEXT"
" or FIL_PAGE_PREV links\n", stderr);
- buf_page_print(page);
- buf_page_print(right_page);
+ buf_page_print(page, 0);
+ buf_page_print(right_page, 0);
ret = FALSE;
}
if (UNIV_UNLIKELY(page_is_comp(right_page)
!= page_is_comp(page))) {
- btr_validate_report2(index, level, page, right_page);
+ btr_validate_report2(index, level, block, right_block);
fputs("InnoDB: 'compact' flag mismatch\n", stderr);
- buf_page_print(page);
- buf_page_print(right_page);
+ buf_page_print(page, 0);
+ buf_page_print(right_page, 0);
ret = FALSE;
@@ -2855,13 +3482,13 @@ loop:
offsets, offsets2,
index) >= 0)) {
- btr_validate_report2(index, level, page, right_page);
+ btr_validate_report2(index, level, block, right_block);
fputs("InnoDB: records in wrong order"
" on adjacent pages\n", stderr);
- buf_page_print(page);
- buf_page_print(right_page);
+ buf_page_print(page, 0);
+ buf_page_print(right_page, 0);
fputs("InnoDB: record ", stderr);
rec = page_rec_get_prev(page_get_supremum_rec(page));
@@ -2883,54 +3510,54 @@ loop:
page_is_comp(page)));
}
- if (buf_frame_get_page_no(page) != dict_index_get_page(index)) {
+ if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
/* Check father node pointers */
- node_ptr = btr_page_get_father_node_ptr(index, page, &mtr);
- father_page = buf_frame_align(node_ptr);
- offsets = rec_get_offsets(node_ptr, index,
- offsets, ULINT_UNDEFINED, &heap);
+ rec_t* node_ptr;
+
+ offsets = btr_page_get_father_block(offsets, heap, index,
+ block, &mtr, &node_cur);
+ father_page = btr_cur_get_page(&node_cur);
+ node_ptr = btr_cur_get_rec(&node_cur);
+
+ btr_cur_position(
+ index, page_rec_get_prev(page_get_supremum_rec(page)),
+ block, &node_cur);
+ offsets = btr_page_get_father_node_ptr(offsets, heap,
+ &node_cur, &mtr);
+
+ if (UNIV_UNLIKELY(node_ptr != btr_cur_get_rec(&node_cur))
+ || UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr,
+ offsets)
+ != buf_block_get_page_no(block))) {
- if (btr_node_ptr_get_child_page_no(node_ptr, offsets)
- != buf_frame_get_page_no(page)
- || node_ptr != btr_page_get_father_for_rec(
- index, page,
- page_rec_get_prev(page_get_supremum_rec(page)),
- &mtr)) {
- btr_validate_report1(index, level, page);
+ btr_validate_report1(index, level, block);
fputs("InnoDB: node pointer to the page is wrong\n",
stderr);
- buf_page_print(father_page);
- buf_page_print(page);
+ buf_page_print(father_page, 0);
+ buf_page_print(page, 0);
fputs("InnoDB: node ptr ", stderr);
- rec_print_new(stderr, node_ptr, offsets);
+ rec_print(stderr, node_ptr, index);
+ rec = btr_cur_get_rec(&node_cur);
fprintf(stderr, "\n"
"InnoDB: node ptr child page n:o %lu\n",
- (unsigned long) btr_node_ptr_get_child_page_no
- (node_ptr, offsets));
+ (ulong) btr_node_ptr_get_child_page_no(
+ rec, offsets));
fputs("InnoDB: record on page ", stderr);
- rec = btr_page_get_father_for_rec(
- index, page,
- page_rec_get_prev(page_get_supremum_rec(page)),
- &mtr);
- rec_print(stderr, rec, index);
+ rec_print_new(stderr, rec, offsets);
putc('\n', stderr);
ret = FALSE;
goto node_ptr_fails;
}
- if (btr_page_get_level(page, &mtr) > 0) {
- offsets = rec_get_offsets(node_ptr, index,
- offsets, ULINT_UNDEFINED,
- &heap);
-
+ if (!page_is_leaf(page)) {
node_ptr_tuple = dict_index_build_node_ptr(
index,
page_rec_get_next(page_get_infimum_rec(page)),
@@ -2938,13 +3565,13 @@ loop:
if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
offsets)) {
- rec_t* first_rec = page_rec_get_next(
+ const rec_t* first_rec = page_rec_get_next(
page_get_infimum_rec(page));
- btr_validate_report1(index, level, page);
+ btr_validate_report1(index, level, block);
- buf_page_print(father_page);
- buf_page_print(page);
+ buf_page_print(father_page, 0);
+ buf_page_print(page, 0);
fputs("InnoDB: Error: node ptrs differ"
" on levels > 0\n"
@@ -2970,30 +3597,35 @@ loop:
page_get_supremum_rec(father_page)));
ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
} else {
- right_node_ptr = btr_page_get_father_node_ptr(
- index, right_page, &mtr);
- if (page_rec_get_next(node_ptr)
+ const rec_t* right_node_ptr
+ = page_rec_get_next(node_ptr);
+
+ offsets = btr_page_get_father_block(
+ offsets, heap, index, right_block,
+ &mtr, &right_node_cur);
+ if (right_node_ptr
!= page_get_supremum_rec(father_page)) {
- if (right_node_ptr
- != page_rec_get_next(node_ptr)) {
+ if (btr_cur_get_rec(&right_node_cur)
+ != right_node_ptr) {
ret = FALSE;
fputs("InnoDB: node pointer to"
" the right page is wrong\n",
stderr);
btr_validate_report1(index, level,
- page);
+ block);
- buf_page_print(father_page);
- buf_page_print(page);
- buf_page_print(right_page);
+ buf_page_print(father_page, 0);
+ buf_page_print(page, 0);
+ buf_page_print(right_page, 0);
}
} else {
- right_father_page = buf_frame_align(
- right_node_ptr);
+ page_t* right_father_page
+ = btr_cur_get_page(&right_node_cur);
- if (right_node_ptr != page_rec_get_next(
+ if (btr_cur_get_rec(&right_node_cur)
+ != page_rec_get_next(
page_get_infimum_rec(
right_father_page))) {
ret = FALSE;
@@ -3002,15 +3634,15 @@ loop:
stderr);
btr_validate_report1(index, level,
- page);
+ block);
- buf_page_print(father_page);
- buf_page_print(right_father_page);
- buf_page_print(page);
- buf_page_print(right_page);
+ buf_page_print(father_page, 0);
+ buf_page_print(right_father_page, 0);
+ buf_page_print(page, 0);
+ buf_page_print(right_page, 0);
}
- if (buf_frame_get_page_no(right_father_page)
+ if (page_get_page_no(right_father_page)
!= btr_page_get_next(father_page, &mtr)) {
ret = FALSE;
@@ -3019,12 +3651,12 @@ loop:
stderr);
btr_validate_report1(index, level,
- page);
+ block);
- buf_page_print(father_page);
- buf_page_print(right_father_page);
- buf_page_print(page);
- buf_page_print(right_page);
+ buf_page_print(father_page, 0);
+ buf_page_print(right_father_page, 0);
+ buf_page_print(page, 0);
+ buf_page_print(right_page, 0);
}
}
}
@@ -3039,7 +3671,9 @@ node_ptr_fails:
if (right_page_no != FIL_NULL) {
mtr_start(&mtr);
- page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr);
+ block = btr_block_get(space, zip_size, right_page_no,
+ RW_X_LATCH, &mtr);
+ page = buf_block_get_frame(block);
goto loop;
}
@@ -3048,15 +3682,15 @@ node_ptr_fails:
return(ret);
}
-/******************************************************************
-Checks the consistency of an index tree. */
-
+/**************************************************************//**
+Checks the consistency of an index tree.
+@return TRUE if ok */
+UNIV_INTERN
ibool
btr_validate_index(
/*===============*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- trx_t* trx) /* in: transaction or NULL */
+ dict_index_t* index, /*!< in: index */
+ trx_t* trx) /*!< in: transaction or NULL */
{
mtr_t mtr;
page_t* root;
@@ -3082,3 +3716,4 @@ btr_validate_index(
return(TRUE);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index d2a2e4d2157..46dfb5d1a46 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -1,4 +1,30 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file btr/btr0cur.c
The index tree cursor
All changes that row operations make to a B-tree or the records
@@ -12,8 +38,6 @@ many pages in the tablespace before we start the operation, because
if leaf splitting has been started, it is difficult to undo, except
by crashing the database and doing a roll-forward.
-(c) 1994-2001 Innobase Oy
-
Created 10/16/1994 Heikki Tuuri
*******************************************************/
@@ -23,50 +47,69 @@ Created 10/16/1994 Heikki Tuuri
#include "btr0cur.ic"
#endif
+#include "row0upd.h"
+#ifndef UNIV_HOTBACKUP
#include "mtr0log.h"
#include "page0page.h"
+#include "page0zip.h"
#include "rem0rec.h"
#include "rem0cmp.h"
+#include "buf0lru.h"
#include "btr0btr.h"
#include "btr0sea.h"
-#include "row0upd.h"
#include "trx0rec.h"
+#include "trx0roll.h" /* trx_is_recv() */
#include "que0que.h"
#include "row0row.h"
#include "srv0srv.h"
#include "ibuf0ibuf.h"
#include "lock0lock.h"
+#include "zlib.h"
#ifdef UNIV_DEBUG
-/* If the following is set to TRUE, this module prints a lot of
+/** If the following is set to TRUE, this module prints a lot of
trace information of individual record operations */
-ibool btr_cur_print_record_ops = FALSE;
+UNIV_INTERN ibool btr_cur_print_record_ops = FALSE;
#endif /* UNIV_DEBUG */
-ulint btr_cur_n_non_sea = 0;
-ulint btr_cur_n_sea = 0;
-ulint btr_cur_n_non_sea_old = 0;
-ulint btr_cur_n_sea_old = 0;
-
-/* In the optimistic insert, if the insert does not fit, but this much space
+/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
+UNIV_INTERN ulint btr_cur_n_non_sea = 0;
+/** Number of successful adaptive hash index lookups in
+btr_cur_search_to_nth_level(). */
+UNIV_INTERN ulint btr_cur_n_sea = 0;
+/** Old value of btr_cur_n_non_sea. Copied by
+srv_refresh_innodb_monitor_stats(). Referenced by
+srv_printf_innodb_monitor(). */
+UNIV_INTERN ulint btr_cur_n_non_sea_old = 0;
+/** Old value of btr_cur_n_sea. Copied by
+srv_refresh_innodb_monitor_stats(). Referenced by
+srv_printf_innodb_monitor(). */
+UNIV_INTERN ulint btr_cur_n_sea_old = 0;
+
+/** In the optimistic insert, if the insert does not fit, but this much space
can be released by page reorganize, then it is reorganized */
-
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
-/* When estimating number of different key values in an index, sample
-this many index pages */
-#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8
-
-/* The structure of a BLOB part header */
+/** The structure of a BLOB part header */
+/* @{ */
/*--------------------------------------*/
-#define BTR_BLOB_HDR_PART_LEN 0 /* BLOB part len on this
+#define BTR_BLOB_HDR_PART_LEN 0 /*!< BLOB part len on this
page */
-#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /* next BLOB part page no,
+#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /*!< next BLOB part page no,
FIL_NULL if none */
/*--------------------------------------*/
-#define BTR_BLOB_HDR_SIZE 8
-
-/***********************************************************************
+#define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB
+ part header, in bytes */
+/* @} */
+#endif /* !UNIV_HOTBACKUP */
+
+/** A BLOB field reference full of zero, for use in assertions and tests.
+Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+UNIV_INTERN const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
Marks all extern fields in a record as owned by the record. This function
should be called if the delete mark of a record is removed: a not delete
marked record always owns all its extern fields. */
@@ -74,165 +117,193 @@ static
void
btr_cur_unmark_extern_fields(
/*=========================*/
- rec_t* rec, /* in: record in a clustered index */
- mtr_t* mtr, /* in: mtr */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/***********************************************************************
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
+ part will be updated, or NULL */
+ rec_t* rec, /*!< in/out: record in a clustered index */
+ dict_index_t* index, /*!< in: index of the page */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ mtr_t* mtr); /*!< in: mtr, or NULL if not logged */
+/*******************************************************************//**
Adds path information to the cursor for the current page, for which
the binary search has been performed. */
static
void
btr_cur_add_path_info(
/*==================*/
- btr_cur_t* cursor, /* in: cursor positioned on a page */
- ulint height, /* in: height of the page in tree;
+ btr_cur_t* cursor, /*!< in: cursor positioned on a page */
+ ulint height, /*!< in: height of the page in tree;
0 means leaf node */
- ulint root_height); /* in: root node height in tree */
-/***************************************************************
+ ulint root_height); /*!< in: root node height in tree */
+/***********************************************************//**
Frees the externally stored fields for a record, if the field is mentioned
in the update vector. */
static
void
btr_rec_free_updated_extern_fields(
/*===============================*/
- dict_index_t* index, /* in: index of rec; the index tree MUST be
+ dict_index_t* index, /*!< in: index of rec; the index tree MUST be
X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- upd_t* update, /* in: update vector */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* mtr); /* in: mini-transaction handle which contains
+ rec_t* rec, /*!< in: record */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const upd_t* update, /*!< in: update vector */
+ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ mtr_t* mtr); /*!< in: mini-transaction handle which contains
an X-latch to record page and to the tree */
-/***************************************************************
-Gets the externally stored size of a record, in units of a database page. */
+/***********************************************************//**
+Frees the externally stored fields for a record. */
+static
+void
+btr_rec_free_externally_stored_fields(
+/*==================================*/
+ dict_index_t* index, /*!< in: index of the data, the index
+ tree MUST be X-latched */
+ rec_t* rec, /*!< in: record */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ mtr_t* mtr); /*!< in: mini-transaction handle which contains
+ an X-latch to record page and to the index
+ tree */
+/***********************************************************//**
+Gets the externally stored size of a record, in units of a database page.
+@return externally stored part, in units of a database page */
static
ulint
btr_rec_get_externally_stored_len(
/*==============================*/
- /* out: externally stored part,
- in units of a database page */
- rec_t* rec, /* in: record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
+ rec_t* rec, /*!< in: record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+#endif /* !UNIV_HOTBACKUP */
+/******************************************************//**
+The following function is used to set the deleted bit of a record. */
+UNIV_INLINE
+void
+btr_rec_set_deleted_flag(
+/*=====================*/
+ rec_t* rec, /*!< in/out: physical record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
+ ulint flag) /*!< in: nonzero if delete marked */
+{
+ if (page_rec_is_comp(rec)) {
+ rec_set_deleted_flag_new(rec, page_zip, flag);
+ } else {
+ ut_ad(!page_zip);
+ rec_set_deleted_flag_old(rec, flag);
+ }
+}
+
+#ifndef UNIV_HOTBACKUP
/*==================== B-TREE SEARCH =========================*/
-/************************************************************************
+/********************************************************************//**
Latches the leaf page or pages requested. */
static
void
btr_cur_latch_leaves(
/*=================*/
- page_t* page, /* in: leaf page where the search
+ page_t* page, /*!< in: leaf page where the search
converged */
- ulint space, /* in: space id */
- ulint page_no, /* in: page number of the leaf */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /* in: cursor */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number of the leaf */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /*!< in: cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
- ulint left_page_no;
- ulint right_page_no;
- page_t* get_page;
+ ulint mode;
+ ulint left_page_no;
+ ulint right_page_no;
+ buf_block_t* get_block;
ut_ad(page && mtr);
- if (latch_mode == BTR_SEARCH_LEAF) {
-
- get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
- } else if (latch_mode == BTR_MODIFY_LEAF) {
-
- get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
- } else if (latch_mode == BTR_MODIFY_TREE) {
-
+ switch (latch_mode) {
+ case BTR_SEARCH_LEAF:
+ case BTR_MODIFY_LEAF:
+ mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
+ get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
+#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+#endif /* UNIV_BTR_DEBUG */
+ get_block->check_index_page_at_flush = TRUE;
+ return;
+ case BTR_MODIFY_TREE:
/* x-latch also brothers from left to right */
left_page_no = btr_page_get_prev(page, mtr);
if (left_page_no != FIL_NULL) {
- get_page = btr_page_get(space, left_page_no,
- RW_X_LATCH, mtr);
+ get_block = btr_block_get(space, zip_size,
+ left_page_no,
+ RW_X_LATCH, mtr);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(get_page, mtr)
- == buf_frame_get_page_no(page));
+ ut_a(page_is_comp(get_block->frame)
+ == page_is_comp(page));
+ ut_a(btr_page_get_next(get_block->frame, mtr)
+ == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush
- = TRUE;
+ get_block->check_index_page_at_flush = TRUE;
}
- get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
+ get_block = btr_block_get(space, zip_size, page_no,
+ RW_X_LATCH, mtr);
+#ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+#endif /* UNIV_BTR_DEBUG */
+ get_block->check_index_page_at_flush = TRUE;
right_page_no = btr_page_get_next(page, mtr);
if (right_page_no != FIL_NULL) {
- get_page = btr_page_get(space, right_page_no,
- RW_X_LATCH, mtr);
+ get_block = btr_block_get(space, zip_size,
+ right_page_no,
+ RW_X_LATCH, mtr);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(get_page, mtr)
- == buf_frame_get_page_no(page));
+ ut_a(page_is_comp(get_block->frame)
+ == page_is_comp(page));
+ ut_a(btr_page_get_prev(get_block->frame, mtr)
+ == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- buf_block_align(get_page)->check_index_page_at_flush
- = TRUE;
+ get_block->check_index_page_at_flush = TRUE;
}
- } else if (latch_mode == BTR_SEARCH_PREV) {
+ return;
- /* s-latch also left brother */
+ case BTR_SEARCH_PREV:
+ case BTR_MODIFY_PREV:
+ mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
+ /* latch also left brother */
left_page_no = btr_page_get_prev(page, mtr);
if (left_page_no != FIL_NULL) {
- cursor->left_page = btr_page_get(space, left_page_no,
- RW_S_LATCH, mtr);
+ get_block = btr_block_get(space, zip_size,
+ left_page_no, mode, mtr);
+ cursor->left_block = get_block;
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(cursor->left_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- ut_a(page_is_comp(cursor->left_page)
+ ut_a(page_is_comp(get_block->frame)
== page_is_comp(page));
- buf_block_align(cursor->left_page)
- ->check_index_page_at_flush = TRUE;
+ ut_a(btr_page_get_next(get_block->frame, mtr)
+ == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+ get_block->check_index_page_at_flush = TRUE;
}
- get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
- } else if (latch_mode == BTR_MODIFY_PREV) {
-
- /* x-latch also left brother */
- left_page_no = btr_page_get_prev(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- cursor->left_page = btr_page_get(space, left_page_no,
- RW_X_LATCH, mtr);
+ get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(cursor->left_page, mtr)
- == buf_frame_get_page_no(page));
+ ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
#endif /* UNIV_BTR_DEBUG */
- ut_a(page_is_comp(cursor->left_page)
- == page_is_comp(page));
- buf_block_align(cursor->left_page)
- ->check_index_page_at_flush = TRUE;
- }
-
- get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
- } else {
- ut_error;
+ get_block->check_index_page_at_flush = TRUE;
+ return;
}
+
+ ut_error;
}
-/************************************************************************
+/********************************************************************//**
Searches an index tree and positions a tree cursor on a given level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
to node pointer page number fields on the upper levels of the tree!
@@ -244,21 +315,21 @@ If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
search tuple should be performed in the B-tree. InnoDB does an insert
immediately after the cursor. Thus, the cursor may end up on a user record,
or on a page infimum record. */
-
+UNIV_INTERN
void
btr_cur_search_to_nth_level(
/*========================*/
- dict_index_t* index, /* in: index */
- ulint level, /* in: the tree level of search */
- dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: the tree level of search */
+ const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
- ulint mode, /* in: PAGE_CUR_L, ...;
+ ulint mode, /*!< in: PAGE_CUR_L, ...;
Inserts should always be made using
PAGE_CUR_LE to search the position! */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
BTR_INSERT and BTR_ESTIMATE;
- cursor->left_page is used to store a pointer
+ cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
NOTE that if has_search_latch
@@ -266,16 +337,16 @@ btr_cur_search_to_nth_level(
on the cursor page, we assume
the caller uses his search latch
to protect the record! */
- btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is
+ btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
- ulint has_search_latch,/* in: info on the latch mode the
+ ulint has_search_latch,/*!< in: info on the latch mode the
caller currently has on btr_search_latch:
RW_S_LATCH, or 0 */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
page_t* page;
- page_t* guess;
+ buf_block_t* guess;
rec_t* node_ptr;
ulint page_no;
ulint space;
@@ -285,10 +356,8 @@ btr_cur_search_to_nth_level(
ulint low_bytes;
ulint height;
ulint savepoint;
- ulint rw_latch;
ulint page_mode;
ulint insert_planned;
- ulint buf_mode;
ulint estimate;
ulint ignore_sec_unique;
ulint root_height = 0; /* remove warning */
@@ -298,13 +367,13 @@ btr_cur_search_to_nth_level(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
/* Currently, PAGE_CUR_LE is the only search mode used for searches
ending to upper levels */
ut_ad(level == 0 || mode == PAGE_CUR_LE);
ut_ad(dict_index_check_search_tuple(index, tuple));
- ut_ad(!(index->type & DICT_IBUF) || ibuf_inside());
+ ut_ad(!dict_index_is_ibuf(index) || ibuf_inside());
ut_ad(dtuple_check_typed(tuple));
#ifdef UNIV_DEBUG
@@ -334,13 +403,16 @@ btr_cur_search_to_nth_level(
#ifdef UNIV_SEARCH_PERF_STAT
info->n_searches++;
#endif
- if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
+ if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& !estimate
#ifdef PAGE_CUR_LE_OR_EXTENDS
&& mode != PAGE_CUR_LE_OR_EXTENDS
#endif /* PAGE_CUR_LE_OR_EXTENDS */
- && srv_use_adaptive_hash_indexes
+ /* If !has_search_latch, we do a dirty read of
+ btr_search_enabled below, and btr_search_guess_on_hash()
+ will have to check it again. */
+ && UNIV_LIKELY(btr_search_enabled)
&& btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor,
has_search_latch, mtr)) {
@@ -357,8 +429,8 @@ btr_cur_search_to_nth_level(
return;
}
-#endif
-#endif
+#endif /* BTR_CUR_HASH_ADAPT */
+#endif /* BTR_CUR_ADAPT */
btr_cur_n_non_sea++;
/* If the hash search did not succeed, do binary search down the
@@ -396,8 +468,6 @@ btr_cur_search_to_nth_level(
low_bytes = 0;
height = ULINT_UNDEFINED;
- rw_latch = RW_NO_LATCH;
- buf_mode = BUF_GET;
/* We use these modified search modes on non-leaf levels of the
B-tree. These let us end up in the right B-tree leaf. In that leaf
@@ -424,7 +494,16 @@ btr_cur_search_to_nth_level(
/* Loop and search until we arrive at the desired level */
for (;;) {
- if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
+ ulint zip_size;
+ buf_block_t* block;
+ ulint rw_latch;
+ ulint buf_mode;
+
+ zip_size = dict_table_zip_size(index->table);
+ rw_latch = RW_NO_LATCH;
+ buf_mode = BUF_GET;
+
+ if (height == 0 && latch_mode <= BTR_MODIFY_LEAF) {
rw_latch = latch_mode;
@@ -437,12 +516,12 @@ btr_cur_search_to_nth_level(
buf_mode = BUF_GET_IF_IN_POOL;
}
}
+
retry_page_get:
- page = buf_page_get_gen(space, page_no, rw_latch, guess,
- buf_mode,
- __FILE__, __LINE__,
- mtr);
- if (page == NULL) {
+ block = buf_page_get_gen(space, zip_size, page_no,
+ rw_latch, guess, buf_mode,
+ __FILE__, __LINE__, mtr);
+ if (block == NULL) {
/* This must be a search to perform an insert;
try insert to the insert buffer */
@@ -450,9 +529,8 @@ retry_page_get:
ut_ad(insert_planned);
ut_ad(cursor->thr);
- if (ibuf_should_try(index, ignore_sec_unique)
- && ibuf_insert(tuple, index, space, page_no,
- cursor->thr)) {
+ if (ibuf_insert(tuple, index, space, zip_size,
+ page_no, cursor->thr)) {
/* Insertion to the insert buffer succeeded */
cursor->flag = BTR_CUR_INSERT_TO_IBUF;
if (UNIV_LIKELY_NULL(heap)) {
@@ -469,25 +547,32 @@ retry_page_get:
goto retry_page_get;
}
- buf_block_align(page)->check_index_page_at_flush = TRUE;
+ page = buf_block_get_frame(block);
+
+ block->check_index_page_at_flush = TRUE;
-#ifdef UNIV_SYNC_DEBUG
if (rw_latch != RW_NO_LATCH) {
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+#ifdef UNIV_ZIP_DEBUG
+ const page_zip_des_t* page_zip
+ = buf_block_get_page_zip(block);
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
}
-#endif
+
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
- if (height == ULINT_UNDEFINED) {
+ if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
/* We are in the root node */
height = btr_page_get_level(page, mtr);
root_height = height;
cursor->tree_height = root_height + 1;
#ifdef BTR_CUR_ADAPT
- if (page != guess) {
- info->root_guess = page;
+ if (block != guess) {
+ info->root_guess = block;
}
#endif
}
@@ -495,7 +580,7 @@ retry_page_get:
if (height == 0) {
if (rw_latch == RW_NO_LATCH) {
- btr_cur_latch_leaves(page, space,
+ btr_cur_latch_leaves(page, space, zip_size,
page_no, latch_mode,
cursor, mtr);
}
@@ -513,10 +598,11 @@ retry_page_get:
page_mode = mode;
}
- page_cur_search_with_match(page, index, tuple, page_mode,
+ page_cur_search_with_match(block, index, tuple, page_mode,
&up_match, &up_bytes,
&low_match, &low_bytes,
page_cursor);
+
if (estimate) {
btr_cur_add_path_info(cursor, height, root_height);
}
@@ -530,7 +616,7 @@ retry_page_get:
if (level > 0) {
/* x-latch the page */
- page = btr_page_get(space,
+ page = btr_page_get(space, zip_size,
page_no, RW_X_LATCH, mtr);
ut_a((ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
@@ -542,6 +628,7 @@ retry_page_get:
ut_ad(height > 0);
height--;
+
guess = NULL;
node_ptr = page_cur_get_rec(page_cursor);
@@ -562,7 +649,11 @@ retry_page_get:
cursor->up_bytes = up_bytes;
#ifdef BTR_CUR_ADAPT
- if (srv_use_adaptive_hash_indexes) {
+ /* We do a dirty read of btr_search_enabled here. We
+ will properly check btr_search_enabled again in
+ btr_search_build_page_hash_index() before building a
+ page hash index, while holding btr_search_latch. */
+ if (UNIV_LIKELY(btr_search_enabled)) {
btr_search_info_update(index, cursor);
}
@@ -582,23 +673,23 @@ func_exit:
}
}
-/*********************************************************************
+/*****************************************************************//**
Opens a cursor at either end of an index. */
-
+UNIV_INTERN
void
btr_cur_open_at_index_side(
/*=======================*/
- ibool from_left, /* in: TRUE if open to the low end,
+ ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: latch mode */
- btr_cur_t* cursor, /* in: cursor */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index */
+ ulint latch_mode, /*!< in: latch mode */
+ btr_cur_t* cursor, /*!< in: cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
- page_t* page;
ulint page_no;
ulint space;
+ ulint zip_size;
ulint height;
ulint root_height = 0; /* remove warning */
rec_t* node_ptr;
@@ -607,7 +698,7 @@ btr_cur_open_at_index_side(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
estimate = latch_mode & BTR_ESTIMATE;
latch_mode = latch_mode & ~BTR_ESTIMATE;
@@ -627,19 +718,22 @@ btr_cur_open_at_index_side(
cursor->index = index;
space = dict_index_get_space(index);
+ zip_size = dict_table_zip_size(index->table);
page_no = dict_index_get_page(index);
height = ULINT_UNDEFINED;
for (;;) {
- page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
- BUF_GET,
- __FILE__, __LINE__,
- mtr);
+ buf_block_t* block;
+ page_t* page;
+ block = buf_page_get_gen(space, zip_size, page_no,
+ RW_NO_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, mtr);
+ page = buf_block_get_frame(block);
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
- buf_block_align(page)->check_index_page_at_flush = TRUE;
+ block->check_index_page_at_flush = TRUE;
if (height == ULINT_UNDEFINED) {
/* We are in the root node */
@@ -649,7 +743,7 @@ btr_cur_open_at_index_side(
}
if (height == 0) {
- btr_cur_latch_leaves(page, space, page_no,
+ btr_cur_latch_leaves(page, space, zip_size, page_no,
latch_mode, cursor, mtr);
/* In versions <= 3.23.52 we had forgotten to
@@ -670,9 +764,9 @@ btr_cur_open_at_index_side(
}
if (from_left) {
- page_cur_set_before_first(page, page_cursor);
+ page_cur_set_before_first(block, page_cursor);
} else {
- page_cur_set_after_last(page, page_cursor);
+ page_cur_set_after_last(block, page_cursor);
}
if (height == 0) {
@@ -710,27 +804,27 @@ btr_cur_open_at_index_side(
}
}
-/**************************************************************************
+/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
-
+UNIV_INTERN
void
btr_cur_open_at_rnd_pos(
/*====================*/
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /* in/out: B-tree cursor */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /*!< in/out: B-tree cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
- page_t* page;
ulint page_no;
ulint space;
+ ulint zip_size;
ulint height;
rec_t* node_ptr;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
if (latch_mode == BTR_MODIFY_TREE) {
mtr_x_lock(dict_index_get_lock(index), mtr);
@@ -742,15 +836,19 @@ btr_cur_open_at_rnd_pos(
cursor->index = index;
space = dict_index_get_space(index);
+ zip_size = dict_table_zip_size(index->table);
page_no = dict_index_get_page(index);
height = ULINT_UNDEFINED;
for (;;) {
- page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
- BUF_GET,
- __FILE__, __LINE__,
- mtr);
+ buf_block_t* block;
+ page_t* page;
+
+ block = buf_page_get_gen(space, zip_size, page_no,
+ RW_NO_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, mtr);
+ page = buf_block_get_frame(block);
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
@@ -761,11 +859,11 @@ btr_cur_open_at_rnd_pos(
}
if (height == 0) {
- btr_cur_latch_leaves(page, space, page_no,
+ btr_cur_latch_leaves(page, space, zip_size, page_no,
latch_mode, cursor, mtr);
}
- page_cur_open_on_rnd_user_rec(page, page_cursor);
+ page_cur_open_on_rnd_user_rec(block, page_cursor);
if (height == 0) {
@@ -790,80 +888,76 @@ btr_cur_open_at_rnd_pos(
/*==================== B-TREE INSERT =========================*/
-/*****************************************************************
+/*************************************************************//**
Inserts a record if there is enough space, or if enough space can
-be freed by reorganizing. Differs from _optimistic_insert because
+be freed by reorganizing. Differs from btr_cur_optimistic_insert because
no heuristics is applied to whether it pays to use CPU time for
-reorganizing the page or not. */
+reorganizing the page or not.
+@return pointer to inserted record if succeed, else NULL */
static
rec_t*
btr_cur_insert_if_possible(
/*=======================*/
- /* out: pointer to inserted record if succeed,
- else NULL */
- btr_cur_t* cursor, /* in: cursor on page after which to insert;
+ btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
- dtuple_t* tuple, /* in: tuple to insert; the size info need not
+ const dtuple_t* tuple, /*!< in: tuple to insert; the size info need not
have been stored to tuple */
- ibool* reorg, /* out: TRUE if reorganization occurred */
- mtr_t* mtr) /* in: mtr */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
- page_t* page;
+ buf_block_t* block;
rec_t* rec;
ut_ad(dtuple_check_typed(tuple));
- *reorg = FALSE;
-
- page = btr_cur_get_page(cursor);
+ block = btr_cur_get_block(cursor);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
page_cursor = btr_cur_get_page_cur(cursor);
/* Now, try the insert */
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple,
+ cursor->index, n_ext, mtr);
- if (!rec) {
+ if (UNIV_UNLIKELY(!rec)) {
/* If record did not fit, reorganize */
- btr_page_reorganize(page, cursor->index, mtr);
-
- *reorg = TRUE;
+ if (btr_page_reorganize(block, cursor->index, mtr)) {
- page_cur_search(page, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
+ page_cur_search(block, cursor->index, tuple,
+ PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple,
+ cursor->index, n_ext, mtr);
+ }
}
return(rec);
}
-/*****************************************************************
-For an insert, checks the locks and does the undo logging if desired. */
+/*************************************************************//**
+For an insert, checks the locks and does the undo logging if desired.
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
UNIV_INLINE
ulint
btr_cur_ins_lock_and_undo(
/*======================*/
- /* out: DB_SUCCESS, DB_WAIT_LOCK,
- DB_FAIL, or error number */
- ulint flags, /* in: undo logging and locking flags: if
+ ulint flags, /*!< in: undo logging and locking flags: if
not zero, the parameters index and thr
should be specified */
- btr_cur_t* cursor, /* in: cursor on page after which to insert */
- dtuple_t* entry, /* in: entry to insert */
- que_thr_t* thr, /* in: query thread or NULL */
- ibool* inherit)/* out: TRUE if the inserted new record maybe
+ btr_cur_t* cursor, /*!< in: cursor on page after which to insert */
+ const dtuple_t* entry, /*!< in: entry to insert */
+ que_thr_t* thr, /*!< in: query thread or NULL */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ ibool* inherit)/*!< out: TRUE if the inserted new record maybe
should inherit LOCK_GAP type locks from the
successor record */
{
dict_index_t* index;
ulint err;
rec_t* rec;
- dulint roll_ptr;
+ roll_ptr_t roll_ptr;
/* Check if we have to wait for a lock: enqueue an explicit lock
request if yes */
@@ -871,14 +965,16 @@ btr_cur_ins_lock_and_undo(
rec = btr_cur_get_rec(cursor);
index = cursor->index;
- err = lock_rec_insert_check_and_lock(flags, rec, index, thr, inherit);
+ err = lock_rec_insert_check_and_lock(flags, rec,
+ btr_cur_get_block(cursor),
+ index, thr, mtr, inherit);
if (err != DB_SUCCESS) {
return(err);
}
- if ((index->type & DICT_CLUSTERED) && !(index->type & DICT_IBUF)) {
+ if (dict_index_is_clust(index) && !dict_index_is_ibuf(index)) {
err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
thr, index, entry,
@@ -902,68 +998,81 @@ btr_cur_ins_lock_and_undo(
}
#ifdef UNIV_DEBUG
-/*****************************************************************
+/*************************************************************//**
Report information about a transaction. */
static
void
btr_cur_trx_report(
/*===============*/
- trx_t* trx, /* in: transaction */
- const dict_index_t* index, /* in: index */
- const char* op) /* in: operation */
+ trx_t* trx, /*!< in: transaction */
+ const dict_index_t* index, /*!< in: index */
+ const char* op) /*!< in: operation */
{
- fprintf(stderr, "Trx with id %lu %lu going to ",
- ut_dulint_get_high(trx->id),
- ut_dulint_get_low(trx->id));
+ fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ",
+ TRX_ID_PREP_PRINTF(trx->id));
fputs(op, stderr);
dict_index_name_print(stderr, trx, index);
putc('\n', stderr);
}
#endif /* UNIV_DEBUG */
-/*****************************************************************
+/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does
not succeed if there is too little space on the page. If there is just
one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record. */
-
+prevent trying to split a page with just one record.
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+UNIV_INTERN
ulint
btr_cur_optimistic_insert(
/*======================*/
- /* out: DB_SUCCESS, DB_WAIT_LOCK,
- DB_FAIL, or error number */
- ulint flags, /* in: undo logging and locking flags: if not
+ ulint flags, /*!< in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
- btr_cur_t* cursor, /* in: cursor on page after which to insert;
+ btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
- dtuple_t* entry, /* in: entry to insert */
- rec_t** rec, /* out: pointer to inserted record if
+ dtuple_t* entry, /*!< in/out: entry to insert */
+ rec_t** rec, /*!< out: pointer to inserted record if
succeed */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
- que_thr_t* thr, /* in: query thread or NULL */
- mtr_t* mtr) /* in: mtr */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ que_thr_t* thr, /*!< in: query thread or NULL */
+ mtr_t* mtr) /*!< in: mtr; if this function returns
+ DB_SUCCESS on a leaf page of a secondary
+ index in a compressed tablespace, the
+ mtr must be committed before latching
+ any further pages */
{
big_rec_t* big_rec_vec = NULL;
dict_index_t* index;
page_cur_t* page_cursor;
+ buf_block_t* block;
page_t* page;
ulint max_size;
rec_t* dummy_rec;
- ulint level;
+ ibool leaf;
ibool reorg;
ibool inherit;
+ ulint zip_size;
ulint rec_size;
- ulint type;
+ mem_heap_t* heap = NULL;
ulint err;
*big_rec = NULL;
- page = btr_cur_get_page(cursor);
+ block = btr_cur_get_block(cursor);
+ page = buf_block_get_frame(block);
index = cursor->index;
+ zip_size = buf_block_get_zip_size(block);
+#ifdef UNIV_DEBUG_VALGRIND
+ if (zip_size) {
+ UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
+ UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
+ }
+#endif /* UNIV_DEBUG_VALGRIND */
if (!dtuple_check_typed_no_assert(entry)) {
fputs("InnoDB: Error in a tuple to insert into ", stderr);
@@ -976,95 +1085,149 @@ btr_cur_optimistic_insert(
}
#endif /* UNIV_DEBUG */
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
max_size = page_get_max_insert_size_after_reorganize(page, 1);
- level = btr_page_get_level(page, mtr);
+ leaf = page_is_leaf(page);
-calculate_sizes_again:
/* Calculate the record size when entry is converted to a record */
- rec_size = rec_get_converted_size(index, entry);
+ rec_size = rec_get_converted_size(index, entry, n_ext);
- if (rec_size
- >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
- REC_MAX_DATA_SIZE)) {
+ if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
+ dtuple_get_n_fields(entry), zip_size)) {
/* The record is so big that we have to store some fields
externally on separate database pages */
+ big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
- big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
-
- if (big_rec_vec == NULL) {
+ if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
return(DB_TOO_BIG_RECORD);
}
- goto calculate_sizes_again;
+ rec_size = rec_get_converted_size(index, entry, n_ext);
+ }
+
+ if (UNIV_UNLIKELY(zip_size)) {
+ /* Estimate the free space of an empty compressed page.
+ Subtract one byte for the encoded heap_no in the
+ modification log. */
+ ulint free_space_zip = page_zip_empty_size(
+ cursor->index->n_fields, zip_size) - 1;
+ ulint n_uniq = dict_index_get_n_unique_in_tree(index);
+
+ ut_ad(dict_table_is_comp(index->table));
+
+ /* There should be enough room for two node pointer
+ records on an empty non-leaf page. This prevents
+ infinite page splits. */
+
+ if (UNIV_LIKELY(entry->n_fields >= n_uniq)
+ && UNIV_UNLIKELY(REC_NODE_PTR_SIZE
+ + rec_get_converted_size_comp_prefix(
+ index, entry->fields, n_uniq,
+ NULL)
+ /* On a compressed page, there is
+ a two-byte entry in the dense
+ page directory for every record.
+ But there is no record header. */
+ - (REC_N_NEW_EXTRA_BYTES - 2)
+ > free_space_zip / 2)) {
+
+ if (big_rec_vec) {
+ dtuple_convert_back_big_rec(
+ index, entry, big_rec_vec);
+ }
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+
+ return(DB_TOO_BIG_RECORD);
+ }
}
/* If there have been many consecutive inserts, and we are on the leaf
level, check if we have to split the page to reserve enough free space
for future updates of records. */
- type = index->type;
-
- if ((type & DICT_CLUSTERED)
- && (dict_index_get_space_reserve() + rec_size > max_size)
+ if (dict_index_is_clust(index)
&& (page_get_n_recs(page) >= 2)
- && (0 == level)
+ && UNIV_LIKELY(leaf)
+ && (dict_index_get_space_reserve() + rec_size > max_size)
&& (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
+fail:
+ err = DB_FAIL;
+fail_err:
if (big_rec_vec) {
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
}
- return(DB_FAIL);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ return(err);
}
- if (!(((max_size >= rec_size)
- && (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT))
- || (page_get_max_insert_size(page, 1) >= rec_size)
- || (page_get_n_recs(page) <= 1))) {
+ if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
+ || max_size < rec_size)
+ && UNIV_LIKELY(page_get_n_recs(page) > 1)
+ && page_get_max_insert_size(page, 1) < rec_size) {
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
- return(DB_FAIL);
+ goto fail;
}
/* Check locks and write to the undo log, if specified */
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit);
+ err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
+ thr, mtr, &inherit);
- if (err != DB_SUCCESS) {
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
- return(err);
+ goto fail_err;
}
page_cursor = btr_cur_get_page_cur(cursor);
- reorg = FALSE;
-
/* Now, try the insert */
- *rec = page_cur_insert_rec_low(page_cursor, entry, index,
- NULL, NULL, mtr);
- if (UNIV_UNLIKELY(!(*rec))) {
+ {
+ const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
+ *rec = page_cur_tuple_insert(page_cursor, entry, index,
+ n_ext, mtr);
+ reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
+
+ if (UNIV_UNLIKELY(reorg)) {
+ ut_a(zip_size);
+ ut_a(*rec);
+ }
+ }
+
+ if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
/* If the record did not fit, reorganize */
- btr_page_reorganize(page, index, mtr);
+ if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
+ ut_a(zip_size);
- ut_ad(page_get_max_insert_size(page, 1) == max_size);
+ goto fail;
+ }
+
+ ut_ad(zip_size
+ || page_get_max_insert_size(page, 1) == max_size);
reorg = TRUE;
- page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
+ page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
- *rec = page_cur_tuple_insert(page_cursor, entry, index, mtr);
+ *rec = page_cur_tuple_insert(page_cursor, entry, index,
+ n_ext, mtr);
if (UNIV_UNLIKELY(!*rec)) {
+ if (UNIV_LIKELY(zip_size != 0)) {
+
+ goto fail;
+ }
+
fputs("InnoDB: Error: cannot insert tuple ", stderr);
dtuple_print(stderr, entry);
fputs(" into ", stderr);
@@ -1075,8 +1238,12 @@ calculate_sizes_again:
}
}
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
#ifdef BTR_CUR_HASH_ADAPT
- if (!reorg && (0 == level) && (cursor->flag == BTR_CUR_HASH)) {
+ if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
btr_search_update_hash_node_on_insert(cursor);
} else {
btr_search_update_hash_on_insert(cursor);
@@ -1085,19 +1252,39 @@ calculate_sizes_again:
if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
- lock_update_insert(*rec);
+ lock_update_insert(block, *rec);
}
#if 0
fprintf(stderr, "Insert into page %lu, max ins size %lu,"
" rec %lu ind type %lu\n",
- buf_frame_get_page_no(page), max_size,
- rec_size + PAGE_DIR_SLOT_SIZE, type);
+ buf_block_get_page_no(block), max_size,
+ rec_size + PAGE_DIR_SLOT_SIZE, index->type);
#endif
- if (!(type & DICT_CLUSTERED)) {
- /* We have added a record to page: update its free bits */
- ibuf_update_free_bits_if_full(cursor->index, page, max_size,
- rec_size + PAGE_DIR_SLOT_SIZE);
+ if (leaf && !dict_index_is_clust(index)) {
+ /* Update the free bits of the B-tree page in the
+ insert buffer bitmap. */
+
+ /* The free bits in the insert buffer bitmap must
+ never exceed the free space on a page. It is safe to
+ decrement or reset the bits in the bitmap in a
+ mini-transaction that is committed before the
+ mini-transaction that affects the free space. */
+
+ /* It is unsafe to increment the bits in a separately
+ committed mini-transaction, because in crash recovery,
+ the free bits could momentarily be set too high. */
+
+ if (zip_size) {
+ /* Update the bits in the same mini-transaction. */
+ ibuf_update_free_bits_zip(block, mtr);
+ } else {
+ /* Decrement the bits in a separate
+ mini-transaction. */
+ ibuf_update_free_bits_if_full(
+ block, max_size,
+ rec_size + PAGE_DIR_SLOT_SIZE);
+ }
}
*big_rec = big_rec_vec;
@@ -1105,36 +1292,38 @@ calculate_sizes_again:
return(DB_SUCCESS);
}
-/*****************************************************************
+/*************************************************************//**
Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist. */
-
+to brothers of page, if those brothers exist.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
btr_cur_pessimistic_insert(
/*=======================*/
- /* out: DB_SUCCESS or error number */
- ulint flags, /* in: undo logging and locking flags: if not
+ ulint flags, /*!< in: undo logging and locking flags: if not
zero, the parameter thr should be
specified; if no undo logging is specified,
then the caller must have reserved enough
free extents in the file space so that the
insertion will certainly succeed */
- btr_cur_t* cursor, /* in: cursor after which to insert;
+ btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
- dtuple_t* entry, /* in: entry to insert */
- rec_t** rec, /* out: pointer to inserted record if
+ dtuple_t* entry, /*!< in/out: entry to insert */
+ rec_t** rec, /*!< out: pointer to inserted record if
succeed */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
- que_thr_t* thr, /* in: query thread or NULL */
- mtr_t* mtr) /* in: mtr */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ que_thr_t* thr, /*!< in: query thread or NULL */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index = cursor->index;
+ ulint zip_size = dict_table_zip_size(index->table);
big_rec_t* big_rec_vec = NULL;
- page_t* page;
+ mem_heap_t* heap = NULL;
ulint err;
ibool dummy_inh;
ibool success;
@@ -1145,12 +1334,10 @@ btr_cur_pessimistic_insert(
*big_rec = NULL;
- page = btr_cur_get_page(cursor);
-
ut_ad(mtr_memo_contains(mtr,
dict_index_get_lock(btr_cur_get_index(cursor)),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
/* Try first an optimistic insert; reset the cursor flag: we do not
@@ -1158,8 +1345,8 @@ btr_cur_pessimistic_insert(
cursor->flag = BTR_CUR_BINARY;
- err = btr_cur_optimistic_insert(flags, cursor, entry, rec, big_rec,
- thr, mtr);
+ err = btr_cur_optimistic_insert(flags, cursor, entry, rec,
+ big_rec, n_ext, thr, mtr);
if (err != DB_FAIL) {
return(err);
@@ -1168,7 +1355,8 @@ btr_cur_pessimistic_insert(
/* Retry with a pessimistic insert. Check locks and write to undo log,
if specified */
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &dummy_inh);
+ err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
+ thr, mtr, &dummy_inh);
if (err != DB_SUCCESS) {
@@ -1185,20 +1373,25 @@ btr_cur_pessimistic_insert(
success = fsp_reserve_free_extents(&n_reserved, index->space,
n_extents, FSP_NORMAL, mtr);
if (!success) {
- err = DB_OUT_OF_FILE_SPACE;
-
- return(err);
+ return(DB_OUT_OF_FILE_SPACE);
}
}
- if (rec_get_converted_size(index, entry)
- >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
- REC_MAX_DATA_SIZE)) {
-
+ if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
+ dict_table_is_comp(index->table),
+ dict_index_get_n_fields(index),
+ zip_size)) {
/* The record is so big that we have to store some fields
externally on separate database pages */
- big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
+ if (UNIV_LIKELY_NULL(big_rec_vec)) {
+ /* This should never happen, but we handle
+ the situation in a robust manner. */
+ ut_ad(0);
+ dtuple_convert_back_big_rec(index, entry, big_rec_vec);
+ }
+
+ big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
if (big_rec_vec == NULL) {
@@ -1210,52 +1403,55 @@ btr_cur_pessimistic_insert(
}
}
- if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
+ if (dict_index_get_page(index)
+ == buf_block_get_page_no(btr_cur_get_block(cursor))) {
/* The page is the root page */
- *rec = btr_root_raise_and_insert(cursor, entry, mtr);
+ *rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
} else {
- *rec = btr_page_split_and_insert(cursor, entry, mtr);
+ *rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
}
- btr_cur_position(index, page_rec_get_prev(*rec), cursor);
+ ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
#ifdef BTR_CUR_ADAPT
btr_search_update_hash_on_insert(cursor);
#endif
if (!(flags & BTR_NO_LOCKING_FLAG)) {
- lock_update_insert(*rec);
+ lock_update_insert(btr_cur_get_block(cursor), *rec);
}
- err = DB_SUCCESS;
-
if (n_extents > 0) {
fil_space_release_free_extents(index->space, n_reserved);
}
*big_rec = big_rec_vec;
- return(err);
+ return(DB_SUCCESS);
}
/*==================== B-TREE UPDATE =========================*/
-/*****************************************************************
-For an update, checks the locks and does the undo logging. */
+/*************************************************************//**
+For an update, checks the locks and does the undo logging.
+@return DB_SUCCESS, DB_WAIT_LOCK, or error number */
UNIV_INLINE
ulint
btr_cur_upd_lock_and_undo(
/*======================*/
- /* out: DB_SUCCESS, DB_WAIT_LOCK, or error
- number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on record to update */
- upd_t* update, /* in: update vector */
- ulint cmpl_info,/* in: compiler info on secondary index
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor on record to update */
+ const upd_t* update, /*!< in: update vector */
+ ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /* in: query thread */
- dulint* roll_ptr)/* out: roll pointer */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ roll_ptr_t* roll_ptr)/*!< out: roll pointer */
{
dict_index_t* index;
rec_t* rec;
@@ -1266,11 +1462,12 @@ btr_cur_upd_lock_and_undo(
rec = btr_cur_get_rec(cursor);
index = cursor->index;
- if (!(index->type & DICT_CLUSTERED)) {
+ if (!dict_index_is_clust(index)) {
/* We do undo logging only when we update a clustered index
record */
- return(lock_sec_rec_modify_check_and_lock(flags, rec, index,
- thr));
+ return(lock_sec_rec_modify_check_and_lock(
+ flags, btr_cur_get_block(cursor), rec,
+ index, thr, mtr));
}
/* Check if we have to wait for a lock: enqueue an explicit lock
@@ -1281,10 +1478,10 @@ btr_cur_upd_lock_and_undo(
if (!(flags & BTR_NO_LOCKING_FLAG)) {
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
err = lock_clust_rec_modify_check_and_lock(
- flags, rec, index,
+ flags, btr_cur_get_block(cursor), rec, index,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap), thr);
if (UNIV_LIKELY_NULL(heap)) {
@@ -1304,19 +1501,19 @@ btr_cur_upd_lock_and_undo(
return(err);
}
-/***************************************************************
+/***********************************************************//**
Writes a redo log record of updating a record in-place. */
UNIV_INLINE
void
btr_cur_update_in_place_log(
/*========================*/
- ulint flags, /* in: flags */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index where cursor positioned */
- upd_t* update, /* in: update vector */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr, /* in: roll ptr */
- mtr_t* mtr) /* in: mtr */
+ ulint flags, /*!< in: flags */
+ rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: index where cursor positioned */
+ const upd_t* update, /*!< in: update vector */
+ trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr, /*!< in: roll ptr */
+ mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
page_t* page = page_align(rec);
@@ -1351,28 +1548,30 @@ btr_cur_update_in_place_log(
row_upd_index_write_log(update, log_ptr, mtr);
}
+#endif /* UNIV_HOTBACKUP */
-/***************************************************************
-Parses a redo log record of updating a record in-place. */
-
+/***********************************************************//**
+Parses a redo log record of updating a record in-place.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_cur_parse_update_in_place(
/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- dict_index_t* index) /* in: index corresponding to page */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in/out: page or NULL */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ dict_index_t* index) /*!< in: index corresponding to page */
{
- ulint flags;
- rec_t* rec;
- upd_t* update;
- ulint pos;
- dulint trx_id;
- dulint roll_ptr;
- ulint rec_offset;
- mem_heap_t* heap;
- ulint* offsets;
+ ulint flags;
+ rec_t* rec;
+ upd_t* update;
+ ulint pos;
+ trx_id_t trx_id;
+ roll_ptr_t roll_ptr;
+ ulint rec_offset;
+ mem_heap_t* heap;
+ ulint* offsets;
if (end_ptr < ptr + 1) {
@@ -1417,11 +1616,11 @@ btr_cur_parse_update_in_place(
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields_in_recovery(rec, offsets,
+ row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
pos, trx_id, roll_ptr);
}
- row_upd_rec_in_place(rec, offsets, update);
+ row_upd_rec_in_place(rec, index, offsets, update, page_zip);
func_exit:
mem_heap_free(heap);
@@ -1429,39 +1628,105 @@ func_exit:
return(ptr);
}
-/*****************************************************************
-Updates a record when the update causes no size changes in its fields.
-We assume here that the ordering fields of the record do not change. */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+See if there is enough place in the page modification log to log
+an update-in-place.
+@return TRUE if enough place */
+static
+ibool
+btr_cur_update_alloc_zip(
+/*=====================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ buf_block_t* block, /*!< in/out: buffer page */
+ dict_index_t* index, /*!< in: the index corresponding to the block */
+ ulint length, /*!< in: size needed */
+ ibool create, /*!< in: TRUE=delete-and-insert,
+ FALSE=update-in-place */
+ mtr_t* mtr) /*!< in: mini-transaction */
+{
+ ut_a(page_zip == buf_block_get_page_zip(block));
+ ut_ad(page_zip);
+ ut_ad(!dict_index_is_ibuf(index));
+
+ if (page_zip_available(page_zip, dict_index_is_clust(index),
+ length, create)) {
+ return(TRUE);
+ }
+ if (!page_zip->m_nonempty) {
+ /* The page has been freshly compressed, so
+ recompressing it will not help. */
+ return(FALSE);
+ }
+
+ if (!page_zip_compress(page_zip, buf_block_get_frame(block),
+ index, mtr)) {
+ /* Unable to compress the page */
+ return(FALSE);
+ }
+
+ /* After recompressing a page, we must make sure that the free
+ bits in the insert buffer bitmap will not exceed the free
+ space on the page. Because this function will not attempt
+ recompression unless page_zip_available() fails above, it is
+ safe to reset the free bits if page_zip_available() fails
+ again, below. The free bits can safely be reset in a separate
+ mini-transaction. If page_zip_available() succeeds below, we
+ can be sure that the page_zip_compress() above did not reduce
+ the free space available on the page. */
+
+ if (!page_zip_available(page_zip, dict_index_is_clust(index),
+ length, create)) {
+ /* Out of space: reset the free bits. */
+ if (!dict_index_is_clust(index)
+ && page_is_leaf(buf_block_get_frame(block))) {
+ ibuf_reset_free_bits(block);
+ }
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************//**
+Updates a record when the update causes no size changes in its fields.
+We assume here that the ordering fields of the record do not change.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
btr_cur_update_in_place(
/*====================*/
- /* out: DB_SUCCESS or error number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- upd_t* update, /* in: update vector */
- ulint cmpl_info,/* in: compiler info on secondary index
+ const upd_t* update, /*!< in: update vector */
+ ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr; must be committed before
+ latching any further pages */
{
dict_index_t* index;
buf_block_t* block;
+ page_zip_des_t* page_zip;
ulint err;
rec_t* rec;
- dulint roll_ptr = ut_dulint_zero;
+ roll_ptr_t roll_ptr = ut_dulint_zero;
trx_t* trx;
ulint was_delete_marked;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
rec = btr_cur_get_rec(cursor);
index = cursor->index;
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+ /* The insert buffer tree should never be updated in place. */
+ ut_ad(!dict_index_is_ibuf(index));
+
trx = thr_get_trx(thr);
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
#ifdef UNIV_DEBUG
@@ -1471,9 +1736,19 @@ btr_cur_update_in_place(
}
#endif /* UNIV_DEBUG */
+ block = btr_cur_get_block(cursor);
+ page_zip = buf_block_get_page_zip(block);
+
+ /* Check that enough space is available on the compressed page. */
+ if (UNIV_LIKELY_NULL(page_zip)
+ && !btr_cur_update_alloc_zip(page_zip, block, index,
+ rec_offs_size(offsets), FALSE, mtr)) {
+ return(DB_ZIP_OVERFLOW);
+ }
+
/* Do lock checking and undo logging */
err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
- thr, &roll_ptr);
+ thr, mtr, &roll_ptr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
if (UNIV_LIKELY_NULL(heap)) {
@@ -1482,16 +1757,12 @@ btr_cur_update_in_place(
return(err);
}
- block = buf_block_align(rec);
- ut_ad(!!page_is_comp(buf_block_get_frame(block))
- == dict_table_is_comp(index->table));
-
if (block->is_hashed) {
/* The function row_upd_changes_ord_field_binary works only
if the update vector was built for a clustered index, we must
NOT call it if index is secondary */
- if (!(index->type & DICT_CLUSTERED)
+ if (!dict_index_is_clust(index)
|| row_upd_changes_ord_field_binary(NULL, index, update)) {
/* Remove possible hash index pointer to this record */
@@ -1502,27 +1773,36 @@ btr_cur_update_in_place(
}
if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
+ row_upd_rec_sys_fields(rec, NULL,
+ index, offsets, trx, roll_ptr);
}
was_delete_marked = rec_get_deleted_flag(
rec, page_is_comp(buf_block_get_frame(block)));
- row_upd_rec_in_place(rec, offsets, update);
+ row_upd_rec_in_place(rec, index, offsets, update, page_zip);
if (block->is_hashed) {
rw_lock_x_unlock(&btr_search_latch);
}
- btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr,
- mtr);
+ if (page_zip && !dict_index_is_clust(index)
+ && page_is_leaf(buf_block_get_frame(block))) {
+ /* Update the free bits in the insert buffer. */
+ ibuf_update_free_bits_zip(block, mtr);
+ }
+
+ btr_cur_update_in_place_log(flags, rec, index, update,
+ trx, roll_ptr, mtr);
+
if (was_delete_marked
&& !rec_get_deleted_flag(rec, page_is_comp(
buf_block_get_frame(block)))) {
/* The new updated record owns its possible externally
stored fields */
- btr_cur_unmark_extern_fields(rec, mtr, offsets);
+ btr_cur_unmark_extern_fields(page_zip,
+ rec, index, offsets, mtr);
}
if (UNIV_LIKELY_NULL(heap)) {
@@ -1531,50 +1811,58 @@ btr_cur_update_in_place(
return(DB_SUCCESS);
}
-/*****************************************************************
+/*************************************************************//**
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended. We assume here that the ordering
-fields of the record do not change. */
-
+fields of the record do not change.
+@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
+DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
+there is not enough space left on the compressed page */
+UNIV_INTERN
ulint
btr_cur_optimistic_update(
/*======================*/
- /* out: DB_SUCCESS, or DB_OVERFLOW if the
- updated record does not fit, DB_UNDERFLOW
- if the page would become too empty */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- upd_t* update, /* in: update vector; this must also
+ const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
- ulint cmpl_info,/* in: compiler info on secondary index
+ ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr; must be committed before
+ latching any further pages */
{
dict_index_t* index;
page_cur_t* page_cursor;
ulint err;
+ buf_block_t* block;
page_t* page;
+ page_zip_des_t* page_zip;
rec_t* rec;
+ rec_t* orig_rec;
ulint max_size;
ulint new_rec_size;
ulint old_rec_size;
dtuple_t* new_entry;
- dulint roll_ptr;
+ roll_ptr_t roll_ptr;
trx_t* trx;
mem_heap_t* heap;
- ibool reorganized = FALSE;
ulint i;
+ ulint n_ext;
ulint* offsets;
- page = btr_cur_get_page(cursor);
- rec = btr_cur_get_rec(cursor);
+ block = btr_cur_get_block(cursor);
+ page = buf_block_get_frame(block);
+ orig_rec = rec = btr_cur_get_rec(cursor);
index = cursor->index;
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ /* The insert buffer tree should never be updated in place. */
+ ut_ad(!dict_index_is_ibuf(index));
heap = mem_heap_create(1024);
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
@@ -1586,30 +1874,20 @@ btr_cur_optimistic_update(
}
#endif /* UNIV_DEBUG */
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
/* The simplest and the most common case: the update does not
change the size of any field and none of the updated fields is
- externally stored in rec or update */
+ externally stored in rec or update, and there is enough space
+ on the compressed page to log the update. */
+
mem_heap_free(heap);
return(btr_cur_update_in_place(flags, cursor, update,
cmpl_info, thr, mtr));
}
- for (i = 0; i < upd_get_n_fields(update); i++) {
- if (upd_get_nth_field(update, i)->extern_storage) {
-
- /* Externally stored fields are treated in pessimistic
- update */
-
- mem_heap_free(heap);
- return(DB_OVERFLOW);
- }
- }
-
if (rec_offs_any_extern(offsets)) {
+any_extern:
/* Externally stored fields are treated in pessimistic
update */
@@ -1617,38 +1895,61 @@ btr_cur_optimistic_update(
return(DB_OVERFLOW);
}
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+ if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
+
+ goto any_extern;
+ }
+ }
+
page_cursor = btr_cur_get_page_cur(cursor);
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+ new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
+ &n_ext, heap);
+ /* We checked above that there are no externally stored fields. */
+ ut_a(!n_ext);
+ /* The page containing the clustered index record
+ corresponding to new_entry is latched in mtr.
+ Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, NULL);
+ FALSE, heap);
old_rec_size = rec_offs_size(offsets);
- new_rec_size = rec_get_converted_size(index, new_entry);
+ new_rec_size = rec_get_converted_size(index, new_entry, 0);
+
+ page_zip = buf_block_get_page_zip(block);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (UNIV_LIKELY_NULL(page_zip)
+ && !btr_cur_update_alloc_zip(page_zip, block, index,
+ new_rec_size, TRUE, mtr)) {
+ err = DB_ZIP_OVERFLOW;
+ goto err_exit;
+ }
if (UNIV_UNLIKELY(new_rec_size
>= (page_get_free_space_of_empty(page_is_comp(page))
/ 2))) {
- mem_heap_free(heap);
-
- return(DB_OVERFLOW);
+ err = DB_OVERFLOW;
+ goto err_exit;
}
- max_size = old_rec_size
- + page_get_max_insert_size_after_reorganize(page, 1);
-
if (UNIV_UNLIKELY(page_get_data_size(page)
- old_rec_size + new_rec_size
< BTR_CUR_PAGE_COMPRESS_LIMIT)) {
/* The page would become too empty */
- mem_heap_free(heap);
-
- return(DB_UNDERFLOW);
+ err = DB_UNDERFLOW;
+ goto err_exit;
}
+ max_size = old_rec_size
+ + page_get_max_insert_size_after_reorganize(page, 1);
+
if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
&& (max_size >= new_rec_size))
|| (page_get_n_recs(page) <= 1))) {
@@ -1657,29 +1958,33 @@ btr_cur_optimistic_update(
reorganize: for simplicity, we decide what to do assuming a
reorganization is needed, though it might not be necessary */
- mem_heap_free(heap);
-
- return(DB_OVERFLOW);
+ err = DB_OVERFLOW;
+ goto err_exit;
}
/* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr,
- &roll_ptr);
+ err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
-
+err_exit:
mem_heap_free(heap);
-
return(err);
}
/* Ok, we may do the replacement. Store on the page infimum the
explicit locks on rec, before deleting rec (see the comment in
- .._pessimistic_update). */
+ btr_cur_pessimistic_update). */
- lock_rec_store_on_page_infimum(page, rec);
+ lock_rec_store_on_page_infimum(block, rec);
btr_search_update_hash_on_delete(cursor);
+ /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
+ invokes rec_offs_make_valid() to point to the copied record that
+ the fields of new_entry point to. We have to undo it here. */
+ ut_ad(rec_offs_validate(NULL, index, offsets));
+ rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
+
page_cur_delete_rec(page_cursor, index, offsets, mtr);
page_cur_move_to_prev(page_cursor);
@@ -1693,22 +1998,19 @@ btr_cur_optimistic_update(
trx->id);
}
- rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
-
+ /* There are no externally stored columns in new_entry */
+ rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */
- if (!rec_get_deleted_flag(rec, page_is_comp(page))) {
- /* The new inserted record owns its possible externally
- stored fields */
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- btr_cur_unmark_extern_fields(rec, mtr, offsets);
+ if (page_zip && !dict_index_is_clust(index)
+ && page_is_leaf(page)) {
+ /* Update the free bits in the insert buffer. */
+ ibuf_update_free_bits_zip(block, mtr);
}
/* Restore the old explicit lock state on the record */
- lock_rec_restore_from_page_infimum(rec, page);
+ lock_rec_restore_from_page_infimum(block, rec, block);
page_cur_move_to_next(page_cursor);
@@ -1717,7 +2019,7 @@ btr_cur_optimistic_update(
return(DB_SUCCESS);
}
-/*****************************************************************
+/*************************************************************//**
If, in a split, a new supremum record was created as the predecessor of the
updated record, the supremum record must inherit exactly the locks on the
updated record. In the split it may have inherited locks from the successor
@@ -1727,15 +2029,17 @@ static
void
btr_cur_pess_upd_restore_supremum(
/*==============================*/
- rec_t* rec, /* in: updated record */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: updated record */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
- page_t* prev_page;
- ulint space;
- ulint prev_page_no;
+ page_t* page;
+ buf_block_t* prev_block;
+ ulint space;
+ ulint zip_size;
+ ulint prev_page_no;
- page = buf_frame_align(rec);
+ page = buf_block_get_frame(block);
if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
/* Updated record is not the first user record on its page */
@@ -1743,98 +2047,111 @@ btr_cur_pess_upd_restore_supremum(
return;
}
- space = buf_frame_get_space_id(page);
+ space = buf_block_get_space(block);
+ zip_size = buf_block_get_zip_size(block);
prev_page_no = btr_page_get_prev(page, mtr);
ut_ad(prev_page_no != FIL_NULL);
- prev_page = buf_page_get_with_no_latch(space, prev_page_no, mtr);
+ prev_block = buf_page_get_with_no_latch(space, zip_size,
+ prev_page_no, mtr);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
+ ut_a(btr_page_get_next(prev_block->frame, mtr)
+ == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- /* We must already have an x-latch to prev_page! */
- ut_ad(mtr_memo_contains(mtr, buf_block_align(prev_page),
- MTR_MEMO_PAGE_X_FIX));
+ /* We must already have an x-latch on prev_block! */
+ ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX));
- lock_rec_reset_and_inherit_gap_locks(page_get_supremum_rec(prev_page),
- rec);
+ lock_rec_reset_and_inherit_gap_locks(prev_block, block,
+ PAGE_HEAP_NO_SUPREMUM,
+ page_rec_get_heap_no(rec));
}
-/*****************************************************************
+/*************************************************************//**
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist. We assume
-here that the ordering fields of the record do not change. */
-
+here that the ordering fields of the record do not change.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
btr_cur_pessimistic_update(
/*=======================*/
- /* out: DB_SUCCESS or error code */
- ulint flags, /* in: undo logging, locking, and rollback
+ ulint flags, /*!< in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /* in: cursor on the record to update */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ btr_cur_t* cursor, /*!< in: cursor on the record to update */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
- upd_t* update, /* in: update vector; this is allowed also
+ const upd_t* update, /*!< in: update vector; this is allowed also
contain trx id and roll ptr fields, but
the values in update vector have no effect */
- ulint cmpl_info,/* in: compiler info on secondary index
+ ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr; must be committed before
+ latching any further pages */
{
big_rec_t* big_rec_vec = NULL;
big_rec_t* dummy_big_rec;
dict_index_t* index;
+ buf_block_t* block;
page_t* page;
+ page_zip_des_t* page_zip;
rec_t* rec;
page_cur_t* page_cursor;
dtuple_t* new_entry;
- mem_heap_t* heap;
ulint err;
ulint optim_err;
- ibool dummy_reorganized;
- dulint roll_ptr;
+ roll_ptr_t roll_ptr;
trx_t* trx;
ibool was_first;
- ibool success;
ulint n_extents = 0;
ulint n_reserved;
- ulint* ext_vect;
- ulint n_ext_vect;
- ulint reserve_flag;
+ ulint n_ext;
ulint* offsets = NULL;
*big_rec = NULL;
- page = btr_cur_get_page(cursor);
+ block = btr_cur_get_block(cursor);
+ page = buf_block_get_frame(block);
+ page_zip = buf_block_get_page_zip(block);
rec = btr_cur_get_rec(cursor);
index = cursor->index;
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+ /* The insert buffer tree should never be updated in place. */
+ ut_ad(!dict_index_is_ibuf(index));
optim_err = btr_cur_optimistic_update(flags, cursor, update,
cmpl_info, thr, mtr);
- if (optim_err != DB_UNDERFLOW && optim_err != DB_OVERFLOW) {
-
+ switch (optim_err) {
+ case DB_UNDERFLOW:
+ case DB_OVERFLOW:
+ case DB_ZIP_OVERFLOW:
+ break;
+ default:
return(optim_err);
}
/* Do lock checking and undo logging */
err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
- thr, &roll_ptr);
+ thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
return(err);
}
if (optim_err == DB_OVERFLOW) {
+ ulint reserve_flag;
+
/* First reserve enough free space for the file segments
of the index tree, so that the update will not fail because
of lack of space */
@@ -1847,25 +2164,35 @@ btr_cur_pessimistic_update(
reserve_flag = FSP_NORMAL;
}
- success = fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents,
- reserve_flag, mtr);
- if (!success) {
- err = DB_OUT_OF_FILE_SPACE;
-
- return(err);
+ if (!fsp_reserve_free_extents(&n_reserved, index->space,
+ n_extents, reserve_flag, mtr)) {
+ return(DB_OUT_OF_FILE_SPACE);
}
}
- heap = mem_heap_create(1024);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+ if (!*heap) {
+ *heap = mem_heap_create(1024);
+ }
+ offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
trx = thr_get_trx(thr);
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
-
+ new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
+ &n_ext, *heap);
+ /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
+ invokes rec_offs_make_valid() to point to the copied record that
+ the fields of new_entry point to. We have to undo it here. */
+ ut_ad(rec_offs_validate(NULL, index, offsets));
+ rec_offs_make_valid(rec, index, offsets);
+
+ /* The page containing the clustered index record
+ corresponding to new_entry is latched in mtr. If the
+ clustered index record is delete-marked, then its externally
+ stored fields cannot have been purged yet, because then the
+ purge would also have removed the clustered index record
+ itself. Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, heap);
+ FALSE, *heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
@@ -1873,7 +2200,7 @@ btr_cur_pessimistic_update(
trx->id);
}
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
+ if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
/* We are in a transaction rollback undoing a row
update: we must free possible externally stored fields
which got new values in the update, if they are not
@@ -1881,38 +2208,42 @@ btr_cur_pessimistic_update(
updated the primary key to another value, and then
update it back again. */
- ut_a(big_rec_vec == NULL);
+ ut_ad(big_rec_vec == NULL);
- btr_rec_free_updated_extern_fields(index, rec, offsets,
- update, TRUE, mtr);
+ btr_rec_free_updated_extern_fields(
+ index, rec, page_zip, offsets, update,
+ trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
}
/* We have to set appropriate extern storage bits in the new
record to be inserted: we have to remember which fields were such */
- ext_vect = mem_heap_alloc(heap, sizeof(ulint)
- * dict_index_get_n_fields(index));
ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update);
-
- if (UNIV_UNLIKELY(rec_get_converted_size(index, new_entry)
- >= ut_min(page_get_free_space_of_empty(
- page_is_comp(page)) / 2,
- REC_MAX_DATA_SIZE))) {
-
- big_rec_vec = dtuple_convert_big_rec(index, new_entry,
- ext_vect, n_ext_vect);
- if (big_rec_vec == NULL) {
+ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
+ n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
+
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ ut_ad(page_is_comp(page));
+ if (page_zip_rec_needs_ext(
+ rec_get_converted_size(index, new_entry, n_ext),
+ TRUE,
+ dict_index_get_n_fields(index),
+ page_zip_get_size(page_zip))) {
+
+ goto make_external;
+ }
+ } else if (page_zip_rec_needs_ext(
+ rec_get_converted_size(index, new_entry, n_ext),
+ page_is_comp(page), 0, 0)) {
+make_external:
+ big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
+ if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
err = DB_TOO_BIG_RECORD;
goto return_after_reservations;
}
}
- page_cursor = btr_cur_get_page_cur(cursor);
-
/* Store state of explicit locks on rec on the page infimum record,
before deleting rec. The page infimum acts as a dummy carrier of the
locks, taking care also of lock releases, before we can move the locks
@@ -1922,47 +2253,59 @@ btr_cur_pessimistic_update(
delete the lock structs set on the root page even if the root
page carries just node pointers. */
- lock_rec_store_on_page_infimum(buf_frame_align(rec), rec);
+ lock_rec_store_on_page_infimum(block, rec);
btr_search_update_hash_on_delete(cursor);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+ page_cursor = btr_cur_get_page_cur(cursor);
+
page_cur_delete_rec(page_cursor, index, offsets, mtr);
page_cur_move_to_prev(page_cursor);
- rec = btr_cur_insert_if_possible(cursor, new_entry,
- &dummy_reorganized, mtr);
- ut_a(rec || optim_err != DB_UNDERFLOW);
+ rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
if (rec) {
- lock_rec_restore_from_page_infimum(rec, page);
- rec_set_field_extern_bits(rec, index,
- ext_vect, n_ext_vect, mtr);
+ lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
+ rec, block);
offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
+ ULINT_UNDEFINED, heap);
if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
/* The new inserted record owns its possible externally
stored fields */
- btr_cur_unmark_extern_fields(rec, mtr, offsets);
+ btr_cur_unmark_extern_fields(page_zip,
+ rec, index, offsets, mtr);
}
btr_cur_compress_if_useful(cursor, mtr);
+ if (page_zip && !dict_index_is_clust(index)
+ && page_is_leaf(page)) {
+ /* Update the free bits in the insert buffer. */
+ ibuf_update_free_bits_zip(block, mtr);
+ }
+
err = DB_SUCCESS;
goto return_after_reservations;
- }
-
- if (page_cur_is_before_first(page_cursor)) {
- /* The record to be updated was positioned as the first user
- record on its page */
-
- was_first = TRUE;
} else {
- was_first = FALSE;
+ ut_a(optim_err != DB_UNDERFLOW);
+
+ /* Out of space: reset the free bits. */
+ if (!dict_index_is_clust(index)
+ && page_is_leaf(page)) {
+ ibuf_reset_free_bits(block);
+ }
}
+ /* Was the record to be updated positioned as the first user
+ record on its page? */
+ was_first = page_cur_is_before_first(page_cursor);
+
/* The first parameter means that no lock checking and undo logging
is made in the insert */
@@ -1970,22 +2313,43 @@ btr_cur_pessimistic_update(
| BTR_NO_LOCKING_FLAG
| BTR_KEEP_SYS_FLAG,
cursor, new_entry, &rec,
- &dummy_big_rec, NULL, mtr);
+ &dummy_big_rec, n_ext, NULL, mtr);
ut_a(rec);
ut_a(err == DB_SUCCESS);
ut_a(dummy_big_rec == NULL);
- rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ if (dict_index_is_sec_or_ibuf(index)) {
+ /* Update PAGE_MAX_TRX_ID in the index page header.
+ It was not updated by btr_cur_pessimistic_insert()
+ because of BTR_NO_LOCKING_FLAG. */
+ buf_block_t* rec_block;
+
+ rec_block = btr_cur_get_block(cursor);
+
+ page_update_max_trx_id(rec_block,
+ buf_block_get_page_zip(rec_block),
+ trx->id, mtr);
+ }
if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
/* The new inserted record owns its possible externally
stored fields */
+ buf_block_t* rec_block = btr_cur_get_block(cursor);
- btr_cur_unmark_extern_fields(rec, mtr, offsets);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+ page = buf_block_get_frame(rec_block);
+#endif /* UNIV_ZIP_DEBUG */
+ page_zip = buf_block_get_page_zip(rec_block);
+
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, heap);
+ btr_cur_unmark_extern_fields(page_zip,
+ rec, index, offsets, mtr);
}
- lock_rec_restore_from_page_infimum(rec, page);
+ lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
+ rec, block);
/* If necessary, restore also the correct lock state for a new,
preceding supremum record created in a page split. While the old
@@ -1993,11 +2357,14 @@ btr_cur_pessimistic_update(
from a wrong record. */
if (!was_first) {
- btr_cur_pess_upd_restore_supremum(rec, mtr);
+ btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
+ rec, mtr);
}
return_after_reservations:
- mem_heap_free(heap);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
if (n_extents > 0) {
fil_space_release_free_extents(index->space, n_reserved);
@@ -2010,20 +2377,20 @@ return_after_reservations:
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
-/********************************************************************
+/****************************************************************//**
Writes the redo log record for delete marking or unmarking of an index
record. */
UNIV_INLINE
void
btr_cur_del_mark_set_clust_rec_log(
/*===============================*/
- ulint flags, /* in: flags */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of the record */
- ibool val, /* in: value to set */
- trx_t* trx, /* in: deleting transaction */
- dulint roll_ptr,/* in: roll ptr to the undo log record */
- mtr_t* mtr) /* in: mtr */
+ ulint flags, /*!< in: flags */
+ rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: index of the record */
+ ibool val, /*!< in: value to set */
+ trx_t* trx, /*!< in: deleting transaction */
+ roll_ptr_t roll_ptr,/*!< in: roll ptr to the undo log record */
+ mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
ut_ad(flags < 256);
@@ -2055,27 +2422,29 @@ btr_cur_del_mark_set_clust_rec_log(
mlog_close(mtr, log_ptr);
}
+#endif /* !UNIV_HOTBACKUP */
-/********************************************************************
+/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a clustered
-index record. */
-
+index record.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: index corresponding to page */
- page_t* page) /* in: page or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in/out: page or NULL */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ dict_index_t* index) /*!< in: index corresponding to page */
{
- ulint flags;
- ulint val;
- ulint pos;
- dulint trx_id;
- dulint roll_ptr;
- ulint offset;
- rec_t* rec;
+ ulint flags;
+ ulint val;
+ ulint pos;
+ trx_id_t trx_id;
+ roll_ptr_t roll_ptr;
+ ulint offset;
+ rec_t* rec;
ut_ad(!page
|| !!page_is_comp(page) == dict_table_is_comp(index->table));
@@ -2110,57 +2479,59 @@ btr_cur_parse_del_mark_set_clust_rec(
if (page) {
rec = page + offset;
+ /* We do not need to reserve btr_search_latch, as the page
+ is only being recovered, and there cannot be a hash index to
+ it. */
+
+ btr_rec_set_deleted_flag(rec, page_zip, val);
+
if (!(flags & BTR_KEEP_SYS_FLAG)) {
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
row_upd_rec_sys_fields_in_recovery(
- rec, rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
+ rec, page_zip,
+ rec_get_offsets(rec, index, offsets_,
+ ULINT_UNDEFINED, &heap),
pos, trx_id, roll_ptr);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
}
-
- /* We do not need to reserve btr_search_latch, as the page
- is only being recovered, and there cannot be a hash index to
- it. */
-
- rec_set_deleted_flag(rec, page_is_comp(page), val);
}
return(ptr);
}
-/***************************************************************
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created. */
-
+undo log record created.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
ulint
btr_cur_del_mark_set_clust_rec(
/*===========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor */
- ibool val, /* in: value to set */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor */
+ ibool val, /*!< in: value to set */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
buf_block_t* block;
- dulint roll_ptr;
+ roll_ptr_t roll_ptr;
ulint err;
rec_t* rec;
+ page_zip_des_t* page_zip;
trx_t* trx;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
rec = btr_cur_get_rec(cursor);
index = cursor->index;
@@ -2174,18 +2545,16 @@ btr_cur_del_mark_set_clust_rec(
}
#endif /* UNIV_DEBUG */
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
err = lock_clust_rec_modify_check_and_lock(flags,
+ btr_cur_get_block(cursor),
rec, index, offsets, thr);
if (err != DB_SUCCESS) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
+ goto func_exit;
}
err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
@@ -2193,24 +2562,24 @@ btr_cur_del_mark_set_clust_rec(
&roll_ptr);
if (err != DB_SUCCESS) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
+ goto func_exit;
}
- block = buf_block_align(rec);
+ block = btr_cur_get_block(cursor);
if (block->is_hashed) {
rw_lock_x_lock(&btr_search_latch);
}
- rec_set_deleted_flag(rec, rec_offs_comp(offsets), val);
+ page_zip = buf_block_get_page_zip(block);
+
+ btr_rec_set_deleted_flag(rec, page_zip, val);
trx = thr_get_trx(thr);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
+ row_upd_rec_sys_fields(rec, page_zip,
+ index, offsets, trx, roll_ptr);
}
if (block->is_hashed) {
@@ -2219,22 +2588,24 @@ btr_cur_del_mark_set_clust_rec(
btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
roll_ptr, mtr);
+
+func_exit:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
- return(DB_SUCCESS);
+ return(err);
}
-/********************************************************************
+/****************************************************************//**
Writes the redo log record for a delete mark setting of a secondary
index record. */
UNIV_INLINE
void
btr_cur_del_mark_set_sec_rec_log(
/*=============================*/
- rec_t* rec, /* in: record */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr */
+ rec_t* rec, /*!< in: record */
+ ibool val, /*!< in: value to set */
+ mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
ut_ad(val <= 1);
@@ -2257,18 +2628,20 @@ btr_cur_del_mark_set_sec_rec_log(
mlog_close(mtr, log_ptr);
}
+#endif /* !UNIV_HOTBACKUP */
-/********************************************************************
+/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a secondary
-index record. */
-
+index record.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page) /* in: page or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in/out: page or NULL */
+ page_zip_des_t* page_zip)/*!< in/out: compressed page, or NULL */
{
ulint val;
ulint offset;
@@ -2294,30 +2667,31 @@ btr_cur_parse_del_mark_set_sec_rec(
is only being recovered, and there cannot be a hash index to
it. */
- rec_set_deleted_flag(rec, page_is_comp(page), val);
+ btr_rec_set_deleted_flag(rec, page_zip, val);
}
return(ptr);
}
-/***************************************************************
-Sets a secondary index record delete mark to TRUE or FALSE. */
-
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
+Sets a secondary index record delete mark to TRUE or FALSE.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
ulint
btr_cur_del_mark_set_sec_rec(
/*=========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- number */
- ulint flags, /* in: locking flag */
- btr_cur_t* cursor, /* in: cursor */
- ibool val, /* in: value to set */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ ulint flags, /*!< in: locking flag */
+ btr_cur_t* cursor, /*!< in: cursor */
+ ibool val, /*!< in: value to set */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
rec_t* rec;
ulint err;
+ block = btr_cur_get_block(cursor);
rec = btr_cur_get_rec(cursor);
#ifdef UNIV_DEBUG
@@ -2328,23 +2702,22 @@ btr_cur_del_mark_set_sec_rec(
}
#endif /* UNIV_DEBUG */
- err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index,
- thr);
+ err = lock_sec_rec_modify_check_and_lock(flags,
+ btr_cur_get_block(cursor),
+ rec, cursor->index, thr, mtr);
if (err != DB_SUCCESS) {
return(err);
}
- block = buf_block_align(rec);
- ut_ad(!!page_is_comp(buf_block_get_frame(block))
+ ut_ad(!!page_rec_is_comp(rec)
== dict_table_is_comp(cursor->index->table));
if (block->is_hashed) {
rw_lock_x_lock(&btr_search_latch);
}
- rec_set_deleted_flag(rec, page_is_comp(buf_block_get_frame(block)),
- val);
+ btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
if (block->is_hashed) {
rw_lock_x_unlock(&btr_search_latch);
@@ -2355,114 +2728,89 @@ btr_cur_del_mark_set_sec_rec(
return(DB_SUCCESS);
}
-/***************************************************************
-Sets a secondary index record delete mark to FALSE. This function is only
+/***********************************************************//**
+Clear a secondary index record's delete mark. This function is only
used by the insert buffer insert merge mechanism. */
-
+UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
- rec_t* rec, /* in: record to delete unmark */
- mtr_t* mtr) /* in: mtr */
+ rec_t* rec, /*!< in/out: record to delete unmark */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page
+ corresponding to rec, or NULL
+ when the tablespace is
+ uncompressed */
+ mtr_t* mtr) /*!< in: mtr */
{
/* We do not need to reserve btr_search_latch, as the page has just
been read to the buffer pool and there cannot be a hash index to it. */
- rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
+ btr_rec_set_deleted_flag(rec, page_zip, FALSE);
btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
}
/*==================== B-TREE RECORD REMOVE =========================*/
-/*****************************************************************
-Tries to compress a page of the tree on the leaf level. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-void
-btr_cur_compress(
-/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
-
- btr_compress(cursor, mtr);
-}
-
-/*****************************************************************
+/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
+free extents so that the compression will always succeed if done!
+@return TRUE if compression occurred */
+UNIV_INTERN
ibool
btr_cur_compress_if_useful(
/*=======================*/
- /* out: TRUE if compression occurred */
- btr_cur_t* cursor, /* in: cursor on the page to compress;
+ btr_cur_t* cursor, /*!< in: cursor on the page to compress;
cursor does not stay valid if compression
occurs */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mtr_memo_contains(mtr,
dict_index_get_lock(btr_cur_get_index(cursor)),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
- if (btr_cur_compress_recommendation(cursor, mtr)) {
-
- btr_compress(cursor, mtr);
-
- return(TRUE);
- }
-
- return(FALSE);
+ return(btr_cur_compress_recommendation(cursor, mtr)
+ && btr_compress(cursor, mtr));
}
-/***********************************************************
+/*******************************************************//**
Removes the record on which the tree cursor is positioned on a leaf page.
It is assumed that the mtr has an x-latch on the page where the cursor is
-positioned, but no latch on the whole tree. */
-
+positioned, but no latch on the whole tree.
+@return TRUE if success, i.e., the page did not become too empty */
+UNIV_INTERN
ibool
btr_cur_optimistic_delete(
/*======================*/
- /* out: TRUE if success, i.e., the page
- did not become too empty */
- btr_cur_t* cursor, /* in: cursor on leaf page, on the record to
+ btr_cur_t* cursor, /*!< in: cursor on leaf page, on the record to
delete; cursor stays valid: if deletion
succeeds, on function exit it points to the
successor of the deleted record */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr; if this function returns
+ TRUE on a leaf page of a secondary
+ index, the mtr must be committed
+ before latching any further pages */
{
- page_t* page;
- ulint max_ins_size;
+ buf_block_t* block;
rec_t* rec;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
ibool no_compress_needed;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
/* This is intended only for leaf page deletions */
- page = btr_cur_get_page(cursor);
+ block = btr_cur_get_block(cursor);
- ut_ad(btr_page_get_level(page, mtr) == 0);
+ ut_ad(page_is_leaf(buf_block_get_frame(block)));
rec = btr_cur_get_rec(cursor);
offsets = rec_get_offsets(rec, cursor->index, offsets,
@@ -2474,17 +2822,39 @@ btr_cur_optimistic_delete(
if (no_compress_needed) {
- lock_update_delete(rec);
+ page_t* page = buf_block_get_frame(block);
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+ ulint max_ins = 0;
+
+ lock_update_delete(block, rec);
btr_search_update_hash_on_delete(cursor);
- max_ins_size = page_get_max_insert_size_after_reorganize(
- page, 1);
+ if (!page_zip) {
+ max_ins = page_get_max_insert_size_after_reorganize(
+ page, 1);
+ }
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
cursor->index, offsets, mtr);
-
- ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
- mtr);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (dict_index_is_clust(cursor->index)
+ || dict_index_is_ibuf(cursor->index)
+ || !page_is_leaf(page)) {
+ /* The insert buffer does not handle
+ inserts to clustered indexes, to
+ non-leaf pages of secondary index B-trees,
+ or to the insert buffer. */
+ } else if (page_zip) {
+ ibuf_update_free_bits_zip(block, mtr);
+ } else {
+ ibuf_update_free_bits_low(block, max_ins, mtr);
+ }
}
if (UNIV_LIKELY_NULL(heap)) {
@@ -2494,35 +2864,37 @@ btr_cur_optimistic_delete(
return(no_compress_needed);
}
-/*****************************************************************
+/*************************************************************//**
Removes the record on which the tree cursor is positioned. Tries
to compress the page if its fillfactor drops below a threshold
or if it is the only page on the level. It is assumed that mtr holds
an x-latch on the tree and on the cursor page. To avoid deadlocks,
mtr must also own x-latches to brothers of page, if those brothers
-exist. */
-
+exist.
+@return TRUE if compression occurred */
+UNIV_INTERN
ibool
btr_cur_pessimistic_delete(
/*=======================*/
- /* out: TRUE if compression occurred */
- ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+ ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
these may actually grow in size */
- ibool has_reserved_extents, /* in: TRUE if the
+ ibool has_reserved_extents, /*!< in: TRUE if the
caller has already reserved enough free
extents so that he knows that the operation
will succeed */
- btr_cur_t* cursor, /* in: cursor on the record to delete;
+ btr_cur_t* cursor, /*!< in: cursor on the record to delete;
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
- ibool in_rollback,/* in: TRUE if called in rollback */
- mtr_t* mtr) /* in: mtr */
+ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* block;
page_t* page;
+ page_zip_des_t* page_zip;
dict_index_t* index;
rec_t* rec;
dtuple_t* node_ptr;
@@ -2534,13 +2906,13 @@ btr_cur_pessimistic_delete(
mem_heap_t* heap;
ulint* offsets;
- page = btr_cur_get_page(cursor);
+ block = btr_cur_get_block(cursor);
+ page = buf_block_get_frame(block);
index = btr_cur_get_index(cursor);
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
if (!has_reserved_extents) {
/* First reserve enough free space for the file segments
of the index tree, so that the node pointer updates will
@@ -2561,23 +2933,25 @@ btr_cur_pessimistic_delete(
heap = mem_heap_create(1024);
rec = btr_cur_get_rec(cursor);
+ page_zip = buf_block_get_page_zip(block);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
- /* Free externally stored fields if the record is neither
- a node pointer nor in two-byte format.
- This avoids an unnecessary loop. */
- if (page_is_comp(page)
- ? !rec_get_node_ptr_flag(rec)
- : !rec_get_1byte_offs_flag(rec)) {
+ if (rec_offs_any_extern(offsets)) {
btr_rec_free_externally_stored_fields(index,
- rec, offsets,
- in_rollback, mtr);
+ rec, offsets, page_zip,
+ rb_ctx, mtr);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
}
if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
- && UNIV_UNLIKELY(dict_index_get_page(btr_cur_get_index(cursor))
- != buf_frame_get_page_no(page))) {
+ && UNIV_UNLIKELY(dict_index_get_page(index)
+ != buf_block_get_page_no(block))) {
/* If there is only one record, drop the whole page in
btr_discard_page, if this is not the root page */
@@ -2590,7 +2964,7 @@ btr_cur_pessimistic_delete(
goto return_after_reservations;
}
- lock_update_delete(rec);
+ lock_update_delete(block, rec);
level = btr_page_get_level(page, mtr);
if (level > 0
@@ -2605,18 +2979,22 @@ btr_cur_pessimistic_delete(
non-leaf level, we must mark the new leftmost node
pointer as the predefined minimum record */
- btr_set_min_rec_mark(next_rec, page_is_comp(page),
- mtr);
+ /* This will make page_zip_validate() fail until
+ page_cur_delete_rec() completes. This is harmless,
+ because everything will take place within a single
+ mini-transaction and because writing to the redo log
+ is an atomic operation (performed by mtr_commit()). */
+ btr_set_min_rec_mark(next_rec, mtr);
} else {
/* Otherwise, if we delete the leftmost node pointer
on a page, we have to change the father node pointer
so that it is equal to the new leftmost node pointer
on the page */
- btr_node_ptr_delete(index, page, mtr);
+ btr_node_ptr_delete(index, block, mtr);
node_ptr = dict_index_build_node_ptr(
- index, next_rec, buf_frame_get_page_no(page),
+ index, next_rec, buf_block_get_page_no(block),
heap, level);
btr_insert_on_non_leaf_level(index,
@@ -2627,8 +3005,11 @@ btr_cur_pessimistic_delete(
btr_search_update_hash_on_delete(cursor);
page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
- ut_ad(btr_check_node_ptr(index, page, mtr));
+ ut_ad(btr_check_node_ptr(index, block, mtr));
*err = DB_SUCCESS;
@@ -2646,17 +3027,17 @@ return_after_reservations:
return(ret);
}
-/***********************************************************************
+/*******************************************************************//**
Adds path information to the cursor for the current page, for which
the binary search has been performed. */
static
void
btr_cur_add_path_info(
/*==================*/
- btr_cur_t* cursor, /* in: cursor positioned on a page */
- ulint height, /* in: height of the page in tree;
+ btr_cur_t* cursor, /*!< in: cursor positioned on a page */
+ ulint height, /*!< in: height of the page in tree;
0 means leaf node */
- ulint root_height) /* in: root node height in tree */
+ ulint root_height) /*!< in: root node height in tree */
{
btr_path_t* slot;
rec_t* rec;
@@ -2683,21 +3064,21 @@ btr_cur_add_path_info(
slot = cursor->path_arr + (root_height - height);
slot->nth_rec = page_rec_get_n_recs_before(rec);
- slot->n_recs = page_get_n_recs(buf_frame_align(rec));
+ slot->n_recs = page_get_n_recs(page_align(rec));
}
-/***********************************************************************
-Estimates the number of rows in a given index range. */
-
-ib_longlong
+/*******************************************************************//**
+Estimates the number of rows in a given index range.
+@return estimated number of rows */
+UNIV_INTERN
+ib_int64_t
btr_estimate_n_rows_in_range(
/*=========================*/
- /* out: estimated number of rows */
- dict_index_t* index, /* in: index */
- dtuple_t* tuple1, /* in: range start, may also be empty tuple */
- ulint mode1, /* in: search mode for range start */
- dtuple_t* tuple2, /* in: range end, may also be empty tuple */
- ulint mode2) /* in: search mode for range end */
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */
+ ulint mode1, /*!< in: search mode for range start */
+ const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */
+ ulint mode2) /*!< in: search mode for range end */
{
btr_path_t path1[BTR_PATH_ARRAY_N_SLOTS];
btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS];
@@ -2707,7 +3088,7 @@ btr_estimate_n_rows_in_range(
ibool diverged;
ibool diverged_lot;
ulint divergence_level;
- ib_longlong n_rows;
+ ib_int64_t n_rows;
ulint i;
mtr_t mtr;
@@ -2834,15 +3215,15 @@ btr_estimate_n_rows_in_range(
}
}
-/***********************************************************************
+/*******************************************************************//**
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
-
+UNIV_INTERN
void
btr_estimate_number_of_different_key_vals(
/*======================================*/
- dict_index_t* index) /* in: index */
+ dict_index_t* index) /*!< in: index */
{
btr_cur_t cursor;
page_t* page;
@@ -2850,31 +3231,41 @@ btr_estimate_number_of_different_key_vals(
ulint n_cols;
ulint matched_fields;
ulint matched_bytes;
- ib_longlong* n_diff;
+ ib_int64_t* n_diff;
+ ullint n_sample_pages; /* number of pages to sample */
ulint not_empty_flag = 0;
ulint total_external_size = 0;
ulint i;
ulint j;
- ulint add_on;
+ ullint add_on;
mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_rec_[REC_OFFS_NORMAL_SIZE];
ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
ulint* offsets_rec = offsets_rec_;
ulint* offsets_next_rec= offsets_next_rec_;
- *offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
- *offsets_next_rec_
- = (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
+ rec_offs_init(offsets_rec_);
+ rec_offs_init(offsets_next_rec_);
n_cols = dict_index_get_n_unique(index);
- n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
+ n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
- memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
+ /* It makes no sense to test more pages than are contained
+ in the index, thus we lower the number if it is too high */
+ if (srv_stats_sample_pages > index->stat_index_size) {
+ if (index->stat_index_size > 0) {
+ n_sample_pages = index->stat_index_size;
+ } else {
+ n_sample_pages = 1;
+ }
+ } else {
+ n_sample_pages = srv_stats_sample_pages;
+ }
/* We sample some pages in the index to get an estimate */
- for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
+ for (i = 0; i < n_sample_pages; i++) {
rec_t* supremum;
mtr_start(&mtr);
@@ -2882,7 +3273,7 @@ btr_estimate_number_of_different_key_vals(
/* Count the number of different key values for each prefix of
the key on this index page. If the prefix does not determine
- the index record uniquely in te B-tree, then we subtract one
+ the index record uniquely in the B-tree, then we subtract one
because otherwise our algorithm would give a wrong estimate
for an index where there is just one key value. */
@@ -2963,7 +3354,7 @@ btr_estimate_number_of_different_key_vals(
}
/* If we saw k borders between different key values on
- BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many
+ n_sample_pages leaf pages, we can estimate how many
there will be in index->stat_n_leaf_pages */
/* We must take into account that our sample actually represents
@@ -2973,27 +3364,27 @@ btr_estimate_number_of_different_key_vals(
for (j = 0; j <= n_cols; j++) {
index->stat_n_diff_key_vals[j]
= ((n_diff[j]
- * (ib_longlong)index->stat_n_leaf_pages
- + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
+ * (ib_int64_t)index->stat_n_leaf_pages
+ + n_sample_pages - 1
+ total_external_size
+ not_empty_flag)
- / (BTR_KEY_VAL_ESTIMATE_N_PAGES
+ / (n_sample_pages
+ total_external_size));
/* If the tree is small, smaller than
- 10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then
+ 10 * n_sample_pages + total_external_size, then
the above estimate is ok. For bigger trees it is common that we
do not see any borders between key values in the few pages
- we pick. But still there may be BTR_KEY_VAL_ESTIMATE_N_PAGES
+ we pick. But still there may be n_sample_pages
different key values, or even more. Let us try to approximate
that: */
add_on = index->stat_n_leaf_pages
- / (10 * (BTR_KEY_VAL_ESTIMATE_N_PAGES
+ / (10 * (n_sample_pages
+ total_external_size));
- if (add_on > BTR_KEY_VAL_ESTIMATE_N_PAGES) {
- add_on = BTR_KEY_VAL_ESTIMATE_N_PAGES;
+ if (add_on > n_sample_pages) {
+ add_on = n_sample_pages;
}
index->stat_n_diff_key_vals[j] += add_on;
@@ -3007,16 +3398,15 @@ btr_estimate_number_of_different_key_vals(
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
-/***************************************************************
-Gets the externally stored size of a record, in units of a database page. */
+/***********************************************************//**
+Gets the externally stored size of a record, in units of a database page.
+@return externally stored part, in units of a database page */
static
ulint
btr_rec_get_externally_stored_len(
/*==============================*/
- /* out: externally stored part,
- in units of a database page */
- rec_t* rec, /* in: record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ rec_t* rec, /*!< in: record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint n_fields;
byte* data;
@@ -3046,17 +3436,20 @@ btr_rec_get_externally_stored_len(
return(total_extern_len / UNIV_PAGE_SIZE);
}
-/***********************************************************************
+/*******************************************************************//**
Sets the ownership bit of an externally stored field in a record. */
static
void
btr_cur_set_ownership_of_extern_field(
/*==================================*/
- rec_t* rec, /* in: clustered index record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint i, /* in: field number */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
+ part will be updated, or NULL */
+ rec_t* rec, /*!< in/out: clustered index record */
+ dict_index_t* index, /*!< in: index of the page */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint i, /*!< in: field number */
+ ibool val, /*!< in: value to set */
+ mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
{
byte* data;
ulint local_len;
@@ -3076,116 +3469,118 @@ btr_cur_set_ownership_of_extern_field(
byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
}
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
- MLOG_1BYTE, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
+ page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
+ } else if (UNIV_LIKELY(mtr != NULL)) {
+
+ mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
+ MLOG_1BYTE, mtr);
+ } else {
+ mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
+ }
}
-/***********************************************************************
+/*******************************************************************//**
Marks not updated extern fields as not-owned by this record. The ownership
is transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
to free the field. */
-
+UNIV_INTERN
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
- rec_t* rec, /* in: record in a clustered index */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update, /* in: update vector */
- mtr_t* mtr) /* in: mtr */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
+ part will be updated, or NULL */
+ rec_t* rec, /*!< in/out: record in a clustered index */
+ dict_index_t* index, /*!< in: index of the page */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const upd_t* update, /*!< in: update vector */
+ mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
{
- ibool is_updated;
ulint n;
ulint j;
ulint i;
ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
+
+ if (!rec_offs_any_extern(offsets)) {
+
+ return;
+ }
+
n = rec_offs_n_fields(offsets);
for (i = 0; i < n; i++) {
if (rec_offs_nth_extern(offsets, i)) {
/* Check it is not in updated fields */
- is_updated = FALSE;
if (update) {
for (j = 0; j < upd_get_n_fields(update);
j++) {
if (upd_get_nth_field(update, j)
->field_no == i) {
- is_updated = TRUE;
+
+ goto updated;
}
}
}
- if (!is_updated) {
- btr_cur_set_ownership_of_extern_field(
- rec, offsets, i, FALSE, mtr);
- }
+ btr_cur_set_ownership_of_extern_field(
+ page_zip, rec, index, offsets, i, FALSE, mtr);
+updated:
+ ;
}
}
}
-/***********************************************************************
+/*******************************************************************//**
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
in entry, so that they are not freed in a rollback. */
-
+UNIV_INTERN
void
btr_cur_mark_dtuple_inherited_extern(
/*=================================*/
- dtuple_t* entry, /* in: updated entry to be inserted to
- clustered index */
- ulint* ext_vec, /* in: array of extern fields in the
- original record */
- ulint n_ext_vec, /* in: number of elements in ext_vec */
- upd_t* update) /* in: update vector */
+ dtuple_t* entry, /*!< in/out: updated entry to be
+ inserted to clustered index */
+ const upd_t* update) /*!< in: update vector */
{
- dfield_t* dfield;
- ulint byte_val;
- byte* data;
- ulint len;
- ibool is_updated;
- ulint j;
- ulint i;
+ ulint i;
- if (ext_vec == NULL) {
+ for (i = 0; i < dtuple_get_n_fields(entry); i++) {
- return;
- }
+ dfield_t* dfield = dtuple_get_nth_field(entry, i);
+ byte* data;
+ ulint len;
+ ulint j;
- for (i = 0; i < n_ext_vec; i++) {
+ if (!dfield_is_ext(dfield)) {
+ continue;
+ }
- /* Check ext_vec[i] is in updated fields */
- is_updated = FALSE;
+ /* Check if it is in updated fields */
for (j = 0; j < upd_get_n_fields(update); j++) {
- if (upd_get_nth_field(update, j)->field_no
- == ext_vec[i]) {
- is_updated = TRUE;
+ if (upd_get_nth_field(update, j)->field_no == i) {
+
+ goto is_updated;
}
}
- if (!is_updated) {
- dfield = dtuple_get_nth_field(entry, ext_vec[i]);
-
- data = (byte*) dfield_get_data(dfield);
- len = dfield_get_len(dfield);
-
- len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- byte_val = mach_read_from_1(data + len
- + BTR_EXTERN_LEN);
-
- byte_val = byte_val | BTR_EXTERN_INHERITED_FLAG;
+ data = dfield_get_data(dfield);
+ len = dfield_get_len(dfield);
+ data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
+ |= BTR_EXTERN_INHERITED_FLAG;
- mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
- }
+is_updated:
+ ;
}
}
-/***********************************************************************
+/*******************************************************************//**
Marks all extern fields in a record as owned by the record. This function
should be called if the delete mark of a record is removed: a not delete
marked record always owns all its extern fields. */
@@ -3193,9 +3588,12 @@ static
void
btr_cur_unmark_extern_fields(
/*=========================*/
- rec_t* rec, /* in: record in a clustered index */
- mtr_t* mtr, /* in: mtr */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
+ part will be updated, or NULL */
+ rec_t* rec, /*!< in/out: record in a clustered index */
+ dict_index_t* index, /*!< in: index of the page */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
{
ulint n;
ulint i;
@@ -3203,106 +3601,116 @@ btr_cur_unmark_extern_fields(
ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
n = rec_offs_n_fields(offsets);
+ if (!rec_offs_any_extern(offsets)) {
+
+ return;
+ }
+
for (i = 0; i < n; i++) {
if (rec_offs_nth_extern(offsets, i)) {
- btr_cur_set_ownership_of_extern_field(rec, offsets, i,
- TRUE, mtr);
+ btr_cur_set_ownership_of_extern_field(
+ page_zip, rec, index, offsets, i, TRUE, mtr);
}
}
}
-/***********************************************************************
+/*******************************************************************//**
Marks all extern fields in a dtuple as owned by the record. */
-
+UNIV_INTERN
void
btr_cur_unmark_dtuple_extern_fields(
/*================================*/
- dtuple_t* entry, /* in: clustered index entry */
- ulint* ext_vec, /* in: array of numbers of fields
- which have been stored externally */
- ulint n_ext_vec) /* in: number of elements in ext_vec */
+ dtuple_t* entry) /*!< in/out: clustered index entry */
{
- dfield_t* dfield;
- ulint byte_val;
- byte* data;
- ulint len;
ulint i;
- for (i = 0; i < n_ext_vec; i++) {
- dfield = dtuple_get_nth_field(entry, ext_vec[i]);
+ for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+ dfield_t* dfield = dtuple_get_nth_field(entry, i);
- data = (byte*) dfield_get_data(dfield);
- len = dfield_get_len(dfield);
-
- len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN);
+ if (dfield_is_ext(dfield)) {
+ byte* data = dfield_get_data(dfield);
+ ulint len = dfield_get_len(dfield);
- byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
-
- mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
+ data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
+ &= ~BTR_EXTERN_OWNER_FLAG;
+ }
}
}
-/***********************************************************************
-Stores the positions of the fields marked as extern storage in the update
-vector, and also those fields who are marked as extern storage in rec
-and not mentioned in updated fields. We use this function to remember
-which fields we must mark as extern storage in a record inserted for an
-update. */
-
+/*******************************************************************//**
+Flags the data tuple fields that are marked as extern storage in the
+update vector. We use this function to remember which fields we must
+mark as extern storage in a record inserted for an update.
+@return number of flagged external columns */
+UNIV_INTERN
ulint
btr_push_update_extern_fields(
/*==========================*/
- /* out: number of values stored in ext_vect */
- ulint* ext_vect,/* in: array of ulints, must be preallocated
- to have space for all fields in rec */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update) /* in: update vector or NULL */
+ dtuple_t* tuple, /*!< in/out: data tuple */
+ const upd_t* update, /*!< in: update vector */
+ mem_heap_t* heap) /*!< in: memory heap */
{
- ulint n_pushed = 0;
- ibool is_updated;
- ulint n;
- ulint j;
- ulint i;
-
- if (update) {
- n = upd_get_n_fields(update);
+ ulint n_pushed = 0;
+ ulint n;
+ const upd_field_t* uf;
- for (i = 0; i < n; i++) {
+ ut_ad(tuple);
+ ut_ad(update);
- if (upd_get_nth_field(update, i)->extern_storage) {
+ uf = update->fields;
+ n = upd_get_n_fields(update);
- ext_vect[n_pushed] = upd_get_nth_field(
- update, i)->field_no;
+ for (; n--; uf++) {
+ if (dfield_is_ext(&uf->new_val)) {
+ dfield_t* field
+ = dtuple_get_nth_field(tuple, uf->field_no);
+ if (!dfield_is_ext(field)) {
+ dfield_set_ext(field);
n_pushed++;
}
- }
- }
-
- n = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- /* Check it is not in updated fields */
- is_updated = FALSE;
-
- if (update) {
- for (j = 0; j < upd_get_n_fields(update);
- j++) {
- if (upd_get_nth_field(update, j)
- ->field_no == i) {
- is_updated = TRUE;
- }
- }
- }
- if (!is_updated) {
- ext_vect[n_pushed] = i;
- n_pushed++;
+ switch (uf->orig_len) {
+ byte* data;
+ ulint len;
+ byte* buf;
+ case 0:
+ break;
+ case BTR_EXTERN_FIELD_REF_SIZE:
+ /* Restore the original locally stored
+ part of the column. In the undo log,
+ InnoDB writes a longer prefix of externally
+ stored columns, so that column prefixes
+ in secondary indexes can be reconstructed. */
+ dfield_set_data(field, (byte*) dfield_get_data(field)
+ + dfield_get_len(field)
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ BTR_EXTERN_FIELD_REF_SIZE);
+ dfield_set_ext(field);
+ break;
+ default:
+ /* Reconstruct the original locally
+ stored part of the column. The data
+ will have to be copied. */
+ ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
+
+ data = dfield_get_data(field);
+ len = dfield_get_len(field);
+
+ buf = mem_heap_alloc(heap, uf->orig_len);
+ /* Copy the locally stored prefix. */
+ memcpy(buf, data,
+ uf->orig_len
+ - BTR_EXTERN_FIELD_REF_SIZE);
+ /* Copy the BLOB pointer. */
+ memcpy(buf + uf->orig_len
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ data + len - BTR_EXTERN_FIELD_REF_SIZE,
+ BTR_EXTERN_FIELD_REF_SIZE);
+
+ dfield_set_data(field, buf, uf->orig_len);
+ dfield_set_ext(field);
}
}
}
@@ -3310,213 +3718,489 @@ btr_push_update_extern_fields(
return(n_pushed);
}
-/***********************************************************************
-Returns the length of a BLOB part stored on the header page. */
+/*******************************************************************//**
+Returns the length of a BLOB part stored on the header page.
+@return part length */
static
ulint
btr_blob_get_part_len(
/*==================*/
- /* out: part length */
- byte* blob_header) /* in: blob header */
+ const byte* blob_header) /*!< in: blob header */
{
return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
}
-/***********************************************************************
-Returns the page number where the next BLOB part is stored. */
+/*******************************************************************//**
+Returns the page number where the next BLOB part is stored.
+@return page number or FIL_NULL if no more pages */
static
ulint
btr_blob_get_next_page_no(
/*======================*/
- /* out: page number or FIL_NULL if
- no more pages */
- byte* blob_header) /* in: blob header */
+ const byte* blob_header) /*!< in: blob header */
{
return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
}
-/***********************************************************************
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The fields are stored on pages allocated from leaf node
-file segment of the index tree. */
+/*******************************************************************//**
+Deallocate a buffer block that was reserved for a BLOB part. */
+static
+void
+btr_blob_free(
+/*==========*/
+ buf_block_t* block, /*!< in: buffer block */
+ ibool all, /*!< in: TRUE=remove also the compressed page
+ if there is one */
+ mtr_t* mtr) /*!< in: mini-transaction to commit */
+{
+ ulint space = buf_block_get_space(block);
+ ulint page_no = buf_block_get_page_no(block);
+
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+ mtr_commit(mtr);
+
+ buf_pool_mutex_enter();
+ mutex_enter(&block->mutex);
+ /* Only free the block if it is still allocated to
+ the same file page. */
+
+ if (buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE
+ && buf_block_get_space(block) == space
+ && buf_block_get_page_no(block) == page_no) {
+
+ if (buf_LRU_free_block(&block->page, all, NULL)
+ != BUF_LRU_FREED
+ && all && block->page.zip.data) {
+ /* Attempt to deallocate the uncompressed page
+ if the whole block cannot be deallocted. */
+
+ buf_LRU_free_block(&block->page, FALSE, NULL);
+ }
+ }
+
+ buf_pool_mutex_exit();
+ mutex_exit(&block->mutex);
+}
+
+/*******************************************************************//**
+Stores the fields in big_rec_vec to the tablespace and puts pointers to
+them in rec. The extern flags in rec will have to be set beforehand.
+The fields are stored on pages allocated from leaf node
+file segment of the index tree.
+@return DB_SUCCESS or error */
+UNIV_INTERN
ulint
btr_store_big_rec_extern_fields(
/*============================*/
- /* out: DB_SUCCESS or error */
- dict_index_t* index, /* in: index of rec; the index tree
+ dict_index_t* index, /*!< in: index of rec; the index tree
MUST be X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets, /* in: rec_get_offsets(rec, index);
+ buf_block_t* rec_block, /*!< in/out: block containing rec */
+ rec_t* rec, /*!< in/out: record */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
- big_rec_t* big_rec_vec, /* in: vector containing fields
+ big_rec_t* big_rec_vec, /*!< in: vector containing fields
to be stored externally */
- mtr_t* local_mtr __attribute__((unused))) /* in: mtr
+ mtr_t* local_mtr __attribute__((unused))) /*!< in: mtr
containing the latch to rec and to the
tree */
{
- byte* data;
- ulint local_len;
+ ulint rec_page_no;
+ byte* field_ref;
ulint extern_len;
ulint store_len;
ulint page_no;
- page_t* page;
ulint space_id;
- page_t* prev_page;
- page_t* rec_page;
+ ulint zip_size;
ulint prev_page_no;
ulint hint_page_no;
ulint i;
mtr_t mtr;
+ mem_heap_t* heap = NULL;
+ page_zip_des_t* page_zip;
+ z_stream c_stream;
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_X_FIX));
- ut_a(index->type & DICT_CLUSTERED);
-
- space_id = buf_frame_get_space_id(rec);
+ ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
+ ut_a(dict_index_is_clust(index));
+
+ page_zip = buf_block_get_page_zip(rec_block);
+ ut_a(dict_table_zip_size(index->table)
+ == buf_block_get_zip_size(rec_block));
+
+ space_id = buf_block_get_space(rec_block);
+ zip_size = buf_block_get_zip_size(rec_block);
+ rec_page_no = buf_block_get_page_no(rec_block);
+ ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
+
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ int err;
+
+ /* Zlib deflate needs 128 kilobytes for the default
+ window size, plus 512 << memLevel, plus a few
+ kilobytes for small objects. We use reduced memLevel
+ to limit the memory consumption, and preallocate the
+ heap, hoping to avoid memory fragmentation. */
+ heap = mem_heap_create(250000);
+ page_zip_set_alloc(&c_stream, heap);
+
+ err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
+ ut_a(err == Z_OK);
+ }
/* We have to create a file segment to the tablespace
for each field and put the pointer to the field in rec */
for (i = 0; i < big_rec_vec->n_fields; i++) {
-
- data = rec_get_nth_field(rec, offsets,
- big_rec_vec->fields[i].field_no,
- &local_len);
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+ ut_ad(rec_offs_nth_extern(offsets,
+ big_rec_vec->fields[i].field_no));
+ {
+ ulint local_len;
+ field_ref = rec_get_nth_field(
+ rec, offsets, big_rec_vec->fields[i].field_no,
+ &local_len);
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+ field_ref += local_len;
+ }
extern_len = big_rec_vec->fields[i].len;
ut_a(extern_len > 0);
prev_page_no = FIL_NULL;
- while (extern_len > 0) {
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ int err = deflateReset(&c_stream);
+ ut_a(err == Z_OK);
+
+ c_stream.next_in = (void*) big_rec_vec->fields[i].data;
+ c_stream.avail_in = extern_len;
+ }
+
+ for (;;) {
+ buf_block_t* block;
+ page_t* page;
+
mtr_start(&mtr);
if (prev_page_no == FIL_NULL) {
- hint_page_no = buf_frame_get_page_no(rec) + 1;
+ hint_page_no = 1 + rec_page_no;
} else {
hint_page_no = prev_page_no + 1;
}
- page = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, &mtr);
- if (page == NULL) {
+ block = btr_page_alloc(index, hint_page_no,
+ FSP_NO_DIR, 0, &mtr);
+ if (UNIV_UNLIKELY(block == NULL)) {
mtr_commit(&mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ deflateEnd(&c_stream);
+ mem_heap_free(heap);
+ }
+
return(DB_OUT_OF_FILE_SPACE);
}
- mlog_write_ulint(page + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_BLOB,
- MLOG_2BYTES, &mtr);
-
- page_no = buf_frame_get_page_no(page);
+ page_no = buf_block_get_page_no(block);
+ page = buf_block_get_frame(block);
if (prev_page_no != FIL_NULL) {
- prev_page = buf_page_get(space_id,
- prev_page_no,
+ buf_block_t* prev_block;
+ page_t* prev_page;
+
+ prev_block = buf_page_get(space_id, zip_size,
+ prev_page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(prev_block,
+ SYNC_EXTERN_STORAGE);
+ prev_page = buf_block_get_frame(prev_block);
+
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mlog_write_ulint(
+ prev_page + FIL_PAGE_NEXT,
+ page_no, MLOG_4BYTES, &mtr);
+ memcpy(buf_block_get_page_zip(
+ prev_block)
+ ->data + FIL_PAGE_NEXT,
+ prev_page + FIL_PAGE_NEXT, 4);
+ } else {
+ mlog_write_ulint(
+ prev_page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_NEXT_PAGE_NO,
+ page_no, MLOG_4BYTES, &mtr);
+ }
+
+ }
+
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ int err;
+ page_zip_des_t* blob_page_zip;
+
+ /* Write FIL_PAGE_TYPE to the redo log
+ separately, before logging any other
+ changes to the page, so that the debug
+ assertions in
+ recv_parse_or_apply_log_rec_body() can
+ be made simpler. Before InnoDB Plugin
+ 1.0.4, the initialization of
+ FIL_PAGE_TYPE was logged as part of
+ the mlog_log_string() below. */
+
+ mlog_write_ulint(page + FIL_PAGE_TYPE,
+ prev_page_no == FIL_NULL
+ ? FIL_PAGE_TYPE_ZBLOB
+ : FIL_PAGE_TYPE_ZBLOB2,
+ MLOG_2BYTES, &mtr);
+
+ c_stream.next_out = page
+ + FIL_PAGE_DATA;
+ c_stream.avail_out
+ = page_zip_get_size(page_zip)
+ - FIL_PAGE_DATA;
+
+ err = deflate(&c_stream, Z_FINISH);
+ ut_a(err == Z_OK || err == Z_STREAM_END);
+ ut_a(err == Z_STREAM_END
+ || c_stream.avail_out == 0);
+
+ /* Write the "next BLOB page" pointer */
+ mlog_write_ulint(page + FIL_PAGE_NEXT,
+ FIL_NULL, MLOG_4BYTES, &mtr);
+ /* Initialize the unused "prev page" pointer */
+ mlog_write_ulint(page + FIL_PAGE_PREV,
+ FIL_NULL, MLOG_4BYTES, &mtr);
+ /* Write a back pointer to the record
+ into the otherwise unused area. This
+ information could be useful in
+ debugging. Later, we might want to
+ implement the possibility to relocate
+ BLOB pages. Then, we would need to be
+ able to adjust the BLOB pointer in the
+ record. We do not store the heap
+ number of the record, because it can
+ change in page_zip_reorganize() or
+ btr_page_reorganize(). However, also
+ the page number of the record may
+ change when B-tree nodes are split or
+ merged. */
+ mlog_write_ulint(page
+ + FIL_PAGE_FILE_FLUSH_LSN,
+ space_id,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(page
+ + FIL_PAGE_FILE_FLUSH_LSN + 4,
+ rec_page_no,
+ MLOG_4BYTES, &mtr);
+
+ /* Zero out the unused part of the page. */
+ memset(page + page_zip_get_size(page_zip)
+ - c_stream.avail_out,
+ 0, c_stream.avail_out);
+ mlog_log_string(page + FIL_PAGE_FILE_FLUSH_LSN,
+ page_zip_get_size(page_zip)
+ - FIL_PAGE_FILE_FLUSH_LSN,
+ &mtr);
+ /* Copy the page to compressed storage,
+ because it will be flushed to disk
+ from there. */
+ blob_page_zip = buf_block_get_page_zip(block);
+ ut_ad(blob_page_zip);
+ ut_ad(page_zip_get_size(blob_page_zip)
+ == page_zip_get_size(page_zip));
+ memcpy(blob_page_zip->data, page,
+ page_zip_get_size(page_zip));
+
+ if (err == Z_OK && prev_page_no != FIL_NULL) {
+
+ goto next_zip_page;
+ }
+
+ rec_block = buf_page_get(space_id, zip_size,
+ rec_page_no,
RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(rec_block,
+ SYNC_NO_ORDER_CHECK);
+
+ if (err == Z_STREAM_END) {
+ mach_write_to_4(field_ref
+ + BTR_EXTERN_LEN, 0);
+ mach_write_to_4(field_ref
+ + BTR_EXTERN_LEN + 4,
+ c_stream.total_in);
+ } else {
+ memset(field_ref + BTR_EXTERN_LEN,
+ 0, 8);
+ }
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(prev_page,
- SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ if (prev_page_no == FIL_NULL) {
+ mach_write_to_4(field_ref
+ + BTR_EXTERN_SPACE_ID,
+ space_id);
- mlog_write_ulint(prev_page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO,
- page_no, MLOG_4BYTES, &mtr);
- }
+ mach_write_to_4(field_ref
+ + BTR_EXTERN_PAGE_NO,
+ page_no);
+
+ mach_write_to_4(field_ref
+ + BTR_EXTERN_OFFSET,
+ FIL_PAGE_NEXT);
+ }
+
+ page_zip_write_blob_ptr(
+ page_zip, rec, index, offsets,
+ big_rec_vec->fields[i].field_no, &mtr);
- if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END)) {
- store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END;
+next_zip_page:
+ prev_page_no = page_no;
+
+ /* Commit mtr and release the
+ uncompressed page frame to save memory. */
+ btr_blob_free(block, FALSE, &mtr);
+
+ if (err == Z_STREAM_END) {
+ break;
+ }
} else {
- store_len = extern_len;
- }
+ mlog_write_ulint(page + FIL_PAGE_TYPE,
+ FIL_PAGE_TYPE_BLOB,
+ MLOG_2BYTES, &mtr);
+
+ if (extern_len > (UNIV_PAGE_SIZE
+ - FIL_PAGE_DATA
+ - BTR_BLOB_HDR_SIZE
+ - FIL_PAGE_DATA_END)) {
+ store_len = UNIV_PAGE_SIZE
+ - FIL_PAGE_DATA
+ - BTR_BLOB_HDR_SIZE
+ - FIL_PAGE_DATA_END;
+ } else {
+ store_len = extern_len;
+ }
- mlog_write_string(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_SIZE,
- big_rec_vec->fields[i].data
- + big_rec_vec->fields[i].len
- - extern_len,
- store_len, &mtr);
- mlog_write_ulint(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_PART_LEN,
- store_len, MLOG_4BYTES, &mtr);
- mlog_write_ulint(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO,
- FIL_NULL, MLOG_4BYTES, &mtr);
-
- extern_len -= store_len;
-
- rec_page = buf_page_get(space_id,
- buf_frame_get_page_no(data),
- RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
- big_rec_vec->fields[i].len
- - extern_len,
- MLOG_4BYTES, &mtr);
+ mlog_write_string(page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_SIZE,
+ (const byte*)
+ big_rec_vec->fields[i].data
+ + big_rec_vec->fields[i].len
+ - extern_len,
+ store_len, &mtr);
+ mlog_write_ulint(page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_PART_LEN,
+ store_len, MLOG_4BYTES, &mtr);
+ mlog_write_ulint(page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_NEXT_PAGE_NO,
+ FIL_NULL, MLOG_4BYTES, &mtr);
- if (prev_page_no == FIL_NULL) {
- mlog_write_ulint(data + local_len
- + BTR_EXTERN_SPACE_ID,
- space_id,
- MLOG_4BYTES, &mtr);
+ extern_len -= store_len;
- mlog_write_ulint(data + local_len
- + BTR_EXTERN_PAGE_NO,
- page_no,
- MLOG_4BYTES, &mtr);
+ rec_block = buf_page_get(space_id, zip_size,
+ rec_page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(rec_block,
+ SYNC_NO_ORDER_CHECK);
- mlog_write_ulint(data + local_len
- + BTR_EXTERN_OFFSET,
- FIL_PAGE_DATA,
+ mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(field_ref
+ + BTR_EXTERN_LEN + 4,
+ big_rec_vec->fields[i].len
+ - extern_len,
MLOG_4BYTES, &mtr);
- /* Set the bit denoting that this field
- in rec is stored externally */
+ if (prev_page_no == FIL_NULL) {
+ mlog_write_ulint(field_ref
+ + BTR_EXTERN_SPACE_ID,
+ space_id,
+ MLOG_4BYTES, &mtr);
+
+ mlog_write_ulint(field_ref
+ + BTR_EXTERN_PAGE_NO,
+ page_no,
+ MLOG_4BYTES, &mtr);
+
+ mlog_write_ulint(field_ref
+ + BTR_EXTERN_OFFSET,
+ FIL_PAGE_DATA,
+ MLOG_4BYTES, &mtr);
+ }
- rec_set_nth_field_extern_bit(
- rec, index,
- big_rec_vec->fields[i].field_no,
- TRUE, &mtr);
- }
+ prev_page_no = page_no;
- prev_page_no = page_no;
+ mtr_commit(&mtr);
- mtr_commit(&mtr);
+ if (extern_len == 0) {
+ break;
+ }
+ }
}
}
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ deflateEnd(&c_stream);
+ mem_heap_free(heap);
+ }
+
return(DB_SUCCESS);
}
-/***********************************************************************
+/*******************************************************************//**
+Check the FIL_PAGE_TYPE on an uncompressed BLOB page. */
+static
+void
+btr_check_blob_fil_page_type(
+/*=========================*/
+ ulint space_id, /*!< in: space id */
+ ulint page_no, /*!< in: page number */
+ const page_t* page, /*!< in: page */
+ ibool read) /*!< in: TRUE=read, FALSE=purge */
+{
+ ulint type = fil_page_get_type(page);
+
+ ut_a(space_id == page_get_space_id(page));
+ ut_a(page_no == page_get_page_no(page));
+
+ if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) {
+ ulint flags = fil_space_get_flags(space_id);
+
+ if (UNIV_LIKELY
+ ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) {
+ /* Old versions of InnoDB did not initialize
+ FIL_PAGE_TYPE on BLOB pages. Do not print
+ anything about the type mismatch when reading
+ a BLOB page that is in Antelope format.*/
+ return;
+ }
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: FIL_PAGE_TYPE=%lu"
+ " on BLOB %s space %lu page %lu flags %lx\n",
+ (ulong) type, read ? "read" : "purge",
+ (ulong) space_id, (ulong) page_no, (ulong) flags);
+ ut_error;
+ }
+}
+
+/*******************************************************************//**
Frees the space in an externally stored field to the file space
-management if the field in data is owned the externally stored field,
+management if the field in data is owned by the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
-
+UNIV_INTERN
void
btr_free_externally_stored_field(
/*=============================*/
- dict_index_t* index, /* in: index of the data, the index
+ dict_index_t* index, /*!< in: index of the data, the index
tree MUST be X-latched; if the tree
height is 1, then also the root page
must be X-latched! (this is relevant
@@ -3524,145 +4208,196 @@ btr_free_externally_stored_field(
from purge where 'data' is located on
an undo log page, not an index
page) */
- byte* data, /* in: internally stored data
- + reference to the externally
- stored part */
- ulint local_len, /* in: length of data */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* local_mtr __attribute__((unused))) /* in: mtr
+ byte* field_ref, /*!< in/out: field reference */
+ const rec_t* rec, /*!< in: record containing field_ref, for
+ page_zip_write_blob_ptr(), or NULL */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
+ or NULL */
+ page_zip_des_t* page_zip, /*!< in: compressed page corresponding
+ to rec, or NULL if rec == NULL */
+ ulint i, /*!< in: field number of field_ref;
+ ignored if rec == NULL */
+ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ mtr_t* local_mtr __attribute__((unused))) /*!< in: mtr
containing the latch to data an an
X-latch to the index tree */
{
- page_t* page;
- page_t* rec_page;
- ulint space_id;
- ulint page_no;
- ulint offset;
- ulint extern_len;
- ulint next_page_no;
- ulint part_len;
- mtr_t mtr;
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+ page_t* page;
+ ulint space_id;
+ ulint rec_zip_size = dict_table_zip_size(index->table);
+ ulint ext_zip_size;
+ ulint page_no;
+ ulint next_page_no;
+ mtr_t mtr;
+#ifdef UNIV_DEBUG
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
- MTR_MEMO_PAGE_X_FIX));
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- for (;;) {
- mtr_start(&mtr);
-
- rec_page = buf_page_get(buf_frame_get_space_id(data),
- buf_frame_get_page_no(data),
- RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- space_id = mach_read_from_4(data + local_len
- + BTR_EXTERN_SPACE_ID);
-
- page_no = mach_read_from_4(data + local_len
- + BTR_EXTERN_PAGE_NO);
+ ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(!rec || rec_offs_validate(rec, index, offsets));
- offset = mach_read_from_4(data + local_len
- + BTR_EXTERN_OFFSET);
- extern_len = mach_read_from_4(data + local_len
- + BTR_EXTERN_LEN + 4);
-
- /* If extern len is 0, then there is no external storage data
- at all */
-
- if (extern_len == 0) {
-
- mtr_commit(&mtr);
+ if (rec) {
+ ulint local_len;
+ const byte* f = rec_get_nth_field(rec, offsets,
+ i, &local_len);
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+ f += local_len;
+ ut_ad(f == field_ref);
+ }
+#endif /* UNIV_DEBUG */
- return;
- }
+ if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
+ BTR_EXTERN_FIELD_REF_SIZE))) {
+ /* In the rollback of uncommitted transactions, we may
+ encounter a clustered index record whose BLOBs have
+ not been written. There is nothing to free then. */
+ ut_a(rb_ctx == RB_RECOVERY);
+ return;
+ }
- if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
- & BTR_EXTERN_OWNER_FLAG) {
- /* This field does not own the externally
- stored field: do not free! */
+ space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
+
+ if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
+ ext_zip_size = fil_space_get_zip_size(space_id);
+ /* This must be an undo log record in the system tablespace,
+ that is, in row_purge_upd_exist_or_extern().
+ Currently, externally stored records are stored in the
+ same tablespace as the referring records. */
+ ut_ad(!page_get_space_id(page_align(field_ref)));
+ ut_ad(!rec);
+ ut_ad(!page_zip);
+ } else {
+ ext_zip_size = rec_zip_size;
+ }
- mtr_commit(&mtr);
+ if (!rec) {
+ /* This is a call from row_purge_upd_exist_or_extern(). */
+ ut_ad(!page_zip);
+ rec_zip_size = 0;
+ }
- return;
- }
+ for (;;) {
+ buf_block_t* rec_block;
+ buf_block_t* ext_block;
- if (do_not_free_inherited
- && mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
- & BTR_EXTERN_INHERITED_FLAG) {
- /* Rollback and inherited field: do not free! */
+ mtr_start(&mtr);
+ rec_block = buf_page_get(page_get_space_id(
+ page_align(field_ref)),
+ rec_zip_size,
+ page_get_page_no(
+ page_align(field_ref)),
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
+ page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
+
+ if (/* There is no external storage data */
+ page_no == FIL_NULL
+ /* This field does not own the externally stored field */
+ || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
+ & BTR_EXTERN_OWNER_FLAG)
+ /* Rollback and inherited field */
+ || (rb_ctx != RB_NONE
+ && (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
+ & BTR_EXTERN_INHERITED_FLAG))) {
+
+ /* Do not free */
mtr_commit(&mtr);
return;
}
- page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
- next_page_no = mach_read_from_4(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO);
+ ext_block = buf_page_get(space_id, ext_zip_size, page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
+ page = buf_block_get_frame(ext_block);
+
+ if (ext_zip_size) {
+ /* Note that page_zip will be NULL
+ in row_purge_upd_exist_or_extern(). */
+ switch (fil_page_get_type(page)) {
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ break;
+ default:
+ ut_error;
+ }
+ next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
- part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA);
+ btr_page_free_low(index, ext_block, 0, &mtr);
- ut_a(extern_len >= part_len);
+ if (UNIV_LIKELY(page_zip != NULL)) {
+ mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
+ next_page_no);
+ mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
+ 0);
+ page_zip_write_blob_ptr(page_zip, rec, index,
+ offsets, i, &mtr);
+ } else {
+ mlog_write_ulint(field_ref
+ + BTR_EXTERN_PAGE_NO,
+ next_page_no,
+ MLOG_4BYTES, &mtr);
+ mlog_write_ulint(field_ref
+ + BTR_EXTERN_LEN + 4, 0,
+ MLOG_4BYTES, &mtr);
+ }
+ } else {
+ ut_a(!page_zip);
+ btr_check_blob_fil_page_type(space_id, page_no, page,
+ FALSE);
- /* We must supply the page level (= 0) as an argument
- because we did not store it on the page (we save the space
- overhead from an index page header. */
+ next_page_no = mach_read_from_4(
+ page + FIL_PAGE_DATA
+ + BTR_BLOB_HDR_NEXT_PAGE_NO);
- btr_page_free_low(index, page, 0, &mtr);
+ /* We must supply the page level (= 0) as an argument
+ because we did not store it on the page (we save the
+ space overhead from an index page header. */
- mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
- next_page_no,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
- extern_len - part_len,
- MLOG_4BYTES, &mtr);
- if (next_page_no == FIL_NULL) {
- ut_a(extern_len - part_len == 0);
- }
+ btr_page_free_low(index, ext_block, 0, &mtr);
- if (extern_len - part_len == 0) {
- ut_a(next_page_no == FIL_NULL);
+ mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
+ next_page_no,
+ MLOG_4BYTES, &mtr);
+ /* Zero out the BLOB length. If the server
+ crashes during the execution of this function,
+ trx_rollback_or_clean_all_recovered() could
+ dereference the half-deleted BLOB, fetching a
+ wrong prefix for the BLOB. */
+ mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
+ 0,
+ MLOG_4BYTES, &mtr);
}
- mtr_commit(&mtr);
+ /* Commit mtr and release the BLOB block to save memory. */
+ btr_blob_free(ext_block, TRUE, &mtr);
}
}
-/***************************************************************
+/***********************************************************//**
Frees the externally stored fields for a record. */
-
+static
void
btr_rec_free_externally_stored_fields(
/*==================================*/
- dict_index_t* index, /* in: index of the data, the index
+ dict_index_t* index, /*!< in: index of the data, the index
tree MUST be X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* mtr) /* in: mini-transaction handle which contains
+ rec_t* rec, /*!< in/out: record */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ mtr_t* mtr) /*!< in: mini-transaction handle which contains
an X-latch to record page and to the index
tree */
{
ulint n_fields;
- byte* data;
- ulint len;
ulint i;
ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
/* Free possible externally stored fields in the record */
ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
@@ -3670,168 +4405,429 @@ btr_rec_free_externally_stored_fields(
for (i = 0; i < n_fields; i++) {
if (rec_offs_nth_extern(offsets, i)) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
- btr_free_externally_stored_field(index, data, len,
- do_not_free_inherited,
- mtr);
+ ulint len;
+ byte* data
+ = rec_get_nth_field(rec, offsets, i, &len);
+ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ btr_free_externally_stored_field(
+ index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
+ rec, offsets, page_zip, i, rb_ctx, mtr);
}
}
}
-/***************************************************************
+/***********************************************************//**
Frees the externally stored fields for a record, if the field is mentioned
in the update vector. */
static
void
btr_rec_free_updated_extern_fields(
/*===============================*/
- dict_index_t* index, /* in: index of rec; the index tree MUST be
+ dict_index_t* index, /*!< in: index of rec; the index tree MUST be
X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- upd_t* update, /* in: update vector */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* mtr) /* in: mini-transaction handle which contains
+ rec_t* rec, /*!< in/out: record */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const upd_t* update, /*!< in: update vector */
+ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ mtr_t* mtr) /*!< in: mini-transaction handle which contains
an X-latch to record page and to the tree */
{
- upd_field_t* ufield;
- ulint n_fields;
- byte* data;
- ulint len;
- ulint i;
+ ulint n_fields;
+ ulint i;
ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
/* Free possible externally stored fields in the record */
n_fields = upd_get_n_fields(update);
for (i = 0; i < n_fields; i++) {
- ufield = upd_get_nth_field(update, i);
+ const upd_field_t* ufield = upd_get_nth_field(update, i);
if (rec_offs_nth_extern(offsets, ufield->field_no)) {
+ ulint len;
+ byte* data = rec_get_nth_field(
+ rec, offsets, ufield->field_no, &len);
+ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ btr_free_externally_stored_field(
+ index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
+ rec, offsets, page_zip,
+ ufield->field_no, rb_ctx, mtr);
+ }
+ }
+}
+
+/*******************************************************************//**
+Copies the prefix of an uncompressed BLOB. The clustered index record
+that points to this BLOB must be protected by a lock or a page latch.
+@return number of bytes written to buf */
+static
+ulint
+btr_copy_blob_prefix(
+/*=================*/
+ byte* buf, /*!< out: the externally stored part of
+ the field, or a prefix of it */
+ ulint len, /*!< in: length of buf, in bytes */
+ ulint space_id,/*!< in: space id of the BLOB pages */
+ ulint page_no,/*!< in: page number of the first BLOB page */
+ ulint offset) /*!< in: offset on the first BLOB page */
+{
+ ulint copied_len = 0;
+
+ for (;;) {
+ mtr_t mtr;
+ buf_block_t* block;
+ const page_t* page;
+ const byte* blob_header;
+ ulint part_len;
+ ulint copy_len;
- data = rec_get_nth_field(rec, offsets,
- ufield->field_no, &len);
- btr_free_externally_stored_field(index, data, len,
- do_not_free_inherited,
- mtr);
+ mtr_start(&mtr);
+
+ block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
+ page = buf_block_get_frame(block);
+
+ btr_check_blob_fil_page_type(space_id, page_no, page, TRUE);
+
+ blob_header = page + offset;
+ part_len = btr_blob_get_part_len(blob_header);
+ copy_len = ut_min(part_len, len - copied_len);
+
+ memcpy(buf + copied_len,
+ blob_header + BTR_BLOB_HDR_SIZE, copy_len);
+ copied_len += copy_len;
+
+ page_no = btr_blob_get_next_page_no(blob_header);
+
+ mtr_commit(&mtr);
+
+ if (page_no == FIL_NULL || copy_len != part_len) {
+ return(copied_len);
}
+
+ /* On other BLOB pages except the first the BLOB header
+ always is at the page data start: */
+
+ offset = FIL_PAGE_DATA;
+
+ ut_ad(copied_len <= len);
}
}
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. Parameter
-data contains a pointer to 'internally' stored part of the field:
-possibly some data, and the reference to the externally stored part in
-the last 20 bytes of data. */
+/*******************************************************************//**
+Copies the prefix of a compressed BLOB. The clustered index record
+that points to this BLOB must be protected by a lock or a page latch. */
+static
+void
+btr_copy_zblob_prefix(
+/*==================*/
+ z_stream* d_stream,/*!< in/out: the decompressing stream */
+ ulint zip_size,/*!< in: compressed BLOB page size */
+ ulint space_id,/*!< in: space id of the BLOB pages */
+ ulint page_no,/*!< in: page number of the first BLOB page */
+ ulint offset) /*!< in: offset on the first BLOB page */
+{
+ ulint page_type = FIL_PAGE_TYPE_ZBLOB;
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
- /* out: the whole field copied to heap */
- ulint* len, /* out: length of the whole field */
- byte* data, /* in: 'internally' stored part of the
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
+ ut_ad(zip_size <= UNIV_PAGE_SIZE);
+ ut_ad(space_id);
+
+ for (;;) {
+ buf_page_t* bpage;
+ int err;
+ ulint next_page_no;
+
+ /* There is no latch on bpage directly. Instead,
+ bpage is protected by the B-tree page latch that
+ is being held on the clustered index record, or,
+ in row_merge_copy_blobs(), by an exclusive table lock. */
+ bpage = buf_page_get_zip(space_id, zip_size, page_no);
+
+ if (UNIV_UNLIKELY(!bpage)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Cannot load"
+ " compressed BLOB"
+ " page %lu space %lu\n",
+ (ulong) page_no, (ulong) space_id);
+ return;
+ }
+
+ if (UNIV_UNLIKELY
+ (fil_page_get_type(bpage->zip.data) != page_type)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Unexpected type %lu of"
+ " compressed BLOB"
+ " page %lu space %lu\n",
+ (ulong) fil_page_get_type(bpage->zip.data),
+ (ulong) page_no, (ulong) space_id);
+ goto end_of_blob;
+ }
+
+ next_page_no = mach_read_from_4(bpage->zip.data + offset);
+
+ if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
+ /* When the BLOB begins at page header,
+ the compressed data payload does not
+ immediately follow the next page pointer. */
+ offset = FIL_PAGE_DATA;
+ } else {
+ offset += 4;
+ }
+
+ d_stream->next_in = bpage->zip.data + offset;
+ d_stream->avail_in = zip_size - offset;
+
+ err = inflate(d_stream, Z_NO_FLUSH);
+ switch (err) {
+ case Z_OK:
+ if (!d_stream->avail_out) {
+ goto end_of_blob;
+ }
+ break;
+ case Z_STREAM_END:
+ if (next_page_no == FIL_NULL) {
+ goto end_of_blob;
+ }
+ /* fall through */
+ default:
+inflate_error:
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: inflate() of"
+ " compressed BLOB"
+ " page %lu space %lu returned %d (%s)\n",
+ (ulong) page_no, (ulong) space_id,
+ err, d_stream->msg);
+ case Z_BUF_ERROR:
+ goto end_of_blob;
+ }
+
+ if (next_page_no == FIL_NULL) {
+ if (!d_stream->avail_in) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: unexpected end of"
+ " compressed BLOB"
+ " page %lu space %lu\n",
+ (ulong) page_no,
+ (ulong) space_id);
+ } else {
+ err = inflate(d_stream, Z_FINISH);
+ switch (err) {
+ case Z_STREAM_END:
+ case Z_BUF_ERROR:
+ break;
+ default:
+ goto inflate_error;
+ }
+ }
+
+end_of_blob:
+ buf_page_release_zip(bpage);
+ return;
+ }
+
+ buf_page_release_zip(bpage);
+
+ /* On other BLOB pages except the first
+ the BLOB header always is at the page header: */
+
+ page_no = next_page_no;
+ offset = FIL_PAGE_NEXT;
+ page_type = FIL_PAGE_TYPE_ZBLOB2;
+ }
+}
+
+/*******************************************************************//**
+Copies the prefix of an externally stored field of a record. The
+clustered index record that points to this BLOB must be protected by a
+lock or a page latch.
+@return number of bytes written to buf */
+static
+ulint
+btr_copy_externally_stored_field_prefix_low(
+/*========================================*/
+ byte* buf, /*!< out: the externally stored part of
+ the field, or a prefix of it */
+ ulint len, /*!< in: length of buf, in bytes */
+ ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
+ zero for uncompressed BLOBs */
+ ulint space_id,/*!< in: space id of the first BLOB page */
+ ulint page_no,/*!< in: page number of the first BLOB page */
+ ulint offset) /*!< in: offset on the first BLOB page */
+{
+ if (UNIV_UNLIKELY(len == 0)) {
+ return(0);
+ }
+
+ if (UNIV_UNLIKELY(zip_size)) {
+ int err;
+ z_stream d_stream;
+ mem_heap_t* heap;
+
+ /* Zlib inflate needs 32 kilobytes for the default
+ window size, plus a few kilobytes for small objects. */
+ heap = mem_heap_create(40000);
+ page_zip_set_alloc(&d_stream, heap);
+
+ err = inflateInit(&d_stream);
+ ut_a(err == Z_OK);
+
+ d_stream.next_out = buf;
+ d_stream.avail_out = len;
+ d_stream.avail_in = 0;
+
+ btr_copy_zblob_prefix(&d_stream, zip_size,
+ space_id, page_no, offset);
+ inflateEnd(&d_stream);
+ mem_heap_free(heap);
+ return(d_stream.total_out);
+ } else {
+ return(btr_copy_blob_prefix(buf, len, space_id,
+ page_no, offset));
+ }
+}
+
+/*******************************************************************//**
+Copies the prefix of an externally stored field of a record. The
+clustered index record must be protected by a lock or a page latch.
+@return the length of the copied field, or 0 if the column was being
+or has been deleted */
+UNIV_INTERN
+ulint
+btr_copy_externally_stored_field_prefix(
+/*====================================*/
+ byte* buf, /*!< out: the field, or a prefix of it */
+ ulint len, /*!< in: length of buf, in bytes */
+ ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
+ zero for uncompressed BLOBs */
+ const byte* data, /*!< in: 'internally' stored part of the
field containing also the reference to
- the external part */
- ulint local_len,/* in: length of data */
- mem_heap_t* heap) /* in: mem heap */
+ the external part; must be protected by
+ a lock or a page latch */
+ ulint local_len)/*!< in: length of data, in bytes */
{
- page_t* page;
ulint space_id;
ulint page_no;
ulint offset;
- ulint extern_len;
- byte* blob_header;
- ulint part_len;
- byte* buf;
- ulint copied_len;
- mtr_t mtr;
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
- space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
-
- page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
+ if (UNIV_UNLIKELY(local_len >= len)) {
+ memcpy(buf, data, len);
+ return(len);
+ }
- offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
+ memcpy(buf, data, local_len);
+ data += local_len;
- /* Currently a BLOB cannot be bigger that 4 GB; we
- leave the 4 upper bytes in the length field unused */
+ ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
- extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
+ if (!mach_read_from_4(data + BTR_EXTERN_LEN + 4)) {
+ /* The externally stored part of the column has been
+ (partially) deleted. Signal the half-deleted BLOB
+ to the caller. */
- buf = mem_heap_alloc(heap, local_len + extern_len);
+ return(0);
+ }
- ut_memcpy(buf, data, local_len);
- copied_len = local_len;
+ space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
- if (extern_len == 0) {
- *len = copied_len;
+ page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
- return(buf);
- }
+ offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
- for (;;) {
- mtr_start(&mtr);
+ return(local_len
+ + btr_copy_externally_stored_field_prefix_low(buf + local_len,
+ len - local_len,
+ zip_size,
+ space_id, page_no,
+ offset));
+}
- page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
- blob_header = page + offset;
+/*******************************************************************//**
+Copies an externally stored field of a record to mem heap. The
+clustered index record must be protected by a lock or a page latch.
+@return the whole field copied to heap */
+static
+byte*
+btr_copy_externally_stored_field(
+/*=============================*/
+ ulint* len, /*!< out: length of the whole field */
+ const byte* data, /*!< in: 'internally' stored part of the
+ field containing also the reference to
+ the external part; must be protected by
+ a lock or a page latch */
+ ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
+ zero for uncompressed BLOBs */
+ ulint local_len,/*!< in: length of data */
+ mem_heap_t* heap) /*!< in: mem heap */
+{
+ ulint space_id;
+ ulint page_no;
+ ulint offset;
+ ulint extern_len;
+ byte* buf;
- part_len = btr_blob_get_part_len(blob_header);
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE,
- part_len);
- copied_len += part_len;
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
- page_no = btr_blob_get_next_page_no(blob_header);
+ space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
- mtr_commit(&mtr);
+ page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
- if (page_no == FIL_NULL) {
- ut_a(copied_len == local_len + extern_len);
+ offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
- *len = copied_len;
+ /* Currently a BLOB cannot be bigger than 4 GB; we
+ leave the 4 upper bytes in the length field unused */
- return(buf);
- }
+ extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
- /* On other BLOB pages except the first the BLOB header
- always is at the page data start: */
+ buf = mem_heap_alloc(heap, local_len + extern_len);
- offset = FIL_PAGE_DATA;
+ memcpy(buf, data, local_len);
+ *len = local_len
+ + btr_copy_externally_stored_field_prefix_low(buf + local_len,
+ extern_len,
+ zip_size,
+ space_id,
+ page_no, offset);
- ut_a(copied_len < local_len + extern_len);
- }
+ return(buf);
}
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. */
-
+/*******************************************************************//**
+Copies an externally stored field of a record to mem heap.
+@return the field copied to heap */
+UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
/*=================================*/
- /* out: the field copied to heap */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint no, /* in: field number */
- ulint* len, /* out: length of the field */
- mem_heap_t* heap) /* in: mem heap */
+ const rec_t* rec, /*!< in: record in a clustered index;
+ must be protected by a lock or a page latch */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
+ zero for uncompressed BLOBs */
+ ulint no, /*!< in: field number */
+ ulint* len, /*!< out: length of the field */
+ mem_heap_t* heap) /*!< in: mem heap */
{
- ulint local_len;
- byte* data;
+ ulint local_len;
+ const byte* data;
- ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_a(rec_offs_nth_extern(offsets, no));
/* An externally stored field can contain some initial
@@ -3845,5 +4841,7 @@ btr_rec_copy_externally_stored_field(
data = rec_get_nth_field(rec, offsets, no, &local_len);
- return(btr_copy_externally_stored_field(len, data, local_len, heap));
+ return(btr_copy_externally_stored_field(len, data,
+ zip_size, local_len, heap));
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0pcur.c b/storage/innobase/btr/btr0pcur.c
index 65b3c90c809..ec98692c35b 100644
--- a/storage/innobase/btr/btr0pcur.c
+++ b/storage/innobase/btr/btr0pcur.c
@@ -1,7 +1,24 @@
-/******************************************************
-The index tree persistent cursor
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file btr/btr0pcur.c
+The index tree persistent cursor
Created 2/23/1996 Heikki Tuuri
*******************************************************/
@@ -16,13 +33,13 @@ Created 2/23/1996 Heikki Tuuri
#include "rem0cmp.h"
#include "trx0trx.h"
-/******************************************************************
-Allocates memory for a persistent cursor object and initializes the cursor. */
-
+/**************************************************************//**
+Allocates memory for a persistent cursor object and initializes the cursor.
+@return own: persistent cursor */
+UNIV_INTERN
btr_pcur_t*
btr_pcur_create_for_mysql(void)
/*============================*/
- /* out, own: persistent cursor */
{
btr_pcur_t* pcur;
@@ -34,13 +51,13 @@ btr_pcur_create_for_mysql(void)
return(pcur);
}
-/******************************************************************
+/**************************************************************//**
Frees the memory for a persistent cursor object. */
-
+UNIV_INTERN
void
btr_pcur_free_for_mysql(
/*====================*/
- btr_pcur_t* cursor) /* in, own: persistent cursor */
+ btr_pcur_t* cursor) /*!< in, own: persistent cursor */
{
if (cursor->old_rec_buf != NULL) {
@@ -60,21 +77,22 @@ btr_pcur_free_for_mysql(
mem_free(cursor);
}
-/******************************************************************
+/**************************************************************//**
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
-
+UNIV_INTERN
void
btr_pcur_store_position(
/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
+ buf_block_t* block;
rec_t* rec;
dict_index_t* index;
page_t* page;
@@ -83,6 +101,7 @@ btr_pcur_store_position(
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+ block = btr_pcur_get_block(cursor);
index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
page_cursor = btr_pcur_get_page_cur(cursor);
@@ -91,10 +110,8 @@ btr_pcur_store_position(
page = page_align(rec);
offs = page_offset(rec);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_a(cursor->latch_mode != BTR_NO_LATCHES);
if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
@@ -137,20 +154,19 @@ btr_pcur_store_position(
index, rec, &cursor->old_n_fields,
&cursor->old_rec_buf, &cursor->buf_size);
- cursor->block_when_stored = buf_block_align(page);
- cursor->modify_clock = buf_block_get_modify_clock(
- cursor->block_when_stored);
+ cursor->block_when_stored = block;
+ cursor->modify_clock = buf_block_get_modify_clock(block);
}
-/******************************************************************
+/**************************************************************//**
Copies the stored position of a pcur to another pcur. */
-
+UNIV_INTERN
void
btr_pcur_copy_stored_position(
/*==========================*/
- btr_pcur_t* pcur_receive, /* in: pcur which will receive the
+ btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the
position info */
- btr_pcur_t* pcur_donate) /* in: pcur from which the info is
+ btr_pcur_t* pcur_donate) /*!< in: pcur from which the info is
copied */
{
if (pcur_receive->old_rec_buf) {
@@ -172,7 +188,7 @@ btr_pcur_copy_stored_position(
pcur_receive->old_n_fields = pcur_donate->old_n_fields;
}
-/******************************************************************
+/**************************************************************//**
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
(1) cursor was positioned on a user record: this function restores the position
@@ -183,22 +199,19 @@ infimum;
(3) cursor was positioned on the page supremum: restores to the first record
GREATER than the user record which was the predecessor of the supremum.
(4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree. */
-
+restores to before first or after the last in the tree.
+@return TRUE if the cursor position was stored when it was on a user
+record and it can be restored on a user record whose ordering fields
+are identical to the ones of the original user record */
+UNIV_INTERN
ibool
btr_pcur_restore_position(
/*======================*/
- /* out: TRUE if the cursor position
- was stored when it was on a user record
- and it can be restored on a user record
- whose ordering fields are identical to
- the ones of the original user record */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in: detached persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in: detached persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
- page_t* page;
dtuple_t* tuple;
ulint mode;
ulint old_mode;
@@ -210,6 +223,7 @@ btr_pcur_restore_position(
|| UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
&& cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
ut_print_buf(stderr, cursor, sizeof(btr_pcur_t));
+ putc('\n', stderr);
if (cursor->trx_if_known) {
trx_print(stderr, cursor->trx_if_known, 0);
}
@@ -217,9 +231,9 @@ btr_pcur_restore_position(
ut_error;
}
- if (UNIV_UNLIKELY(
- cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
- || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
+ if (UNIV_UNLIKELY
+ (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
+ || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
/* In these cases we do not try an optimistic restoration,
but always do a search */
@@ -228,8 +242,7 @@ btr_pcur_restore_position(
cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
- cursor->block_when_stored
- = buf_block_align(btr_pcur_get_page(cursor));
+ cursor->block_when_stored = btr_pcur_get_block(cursor);
return(FALSE);
}
@@ -237,25 +250,24 @@ btr_pcur_restore_position(
ut_a(cursor->old_rec);
ut_a(cursor->old_n_fields);
- page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
-
if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
|| UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
/* Try optimistic restoration */
if (UNIV_LIKELY(buf_page_optimistic_get(
latch_mode,
- cursor->block_when_stored, page,
+ cursor->block_when_stored,
cursor->modify_clock, mtr))) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+
+ buf_block_dbg_add_level(btr_pcur_get_block(cursor),
+ SYNC_TREE_NODE);
+
if (cursor->rel_pos == BTR_PCUR_ON) {
#ifdef UNIV_DEBUG
- rec_t* rec;
- ulint* offsets1;
- ulint* offsets2;
+ const rec_t* rec;
+ const ulint* offsets1;
+ const ulint* offsets2;
#endif /* UNIV_DEBUG */
cursor->latch_mode = latch_mode;
#ifdef UNIV_DEBUG
@@ -307,7 +319,7 @@ btr_pcur_restore_position(
cursor->search_mode = old_mode;
if (cursor->rel_pos == BTR_PCUR_ON
- && btr_pcur_is_on_user_rec(cursor, mtr)
+ && btr_pcur_is_on_user_rec(cursor)
&& 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
rec_get_offsets(
btr_pcur_get_rec(cursor), index,
@@ -317,8 +329,7 @@ btr_pcur_restore_position(
the cursor can now be on a different page! But we can retain
the value of old_rec */
- cursor->block_when_stored = buf_block_align(
- btr_pcur_get_page(cursor));
+ cursor->block_when_stored = btr_pcur_get_block(cursor);
cursor->modify_clock = buf_block_get_modify_clock(
cursor->block_when_stored);
cursor->old_stored = BTR_PCUR_OLD_STORED;
@@ -339,79 +350,85 @@ btr_pcur_restore_position(
return(FALSE);
}
-/******************************************************************
+/**************************************************************//**
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
made by the current mini-transaction to the data protected by the
cursor latch, as then the latch must not be released until mtr_commit. */
-
+UNIV_INTERN
void
btr_pcur_release_leaf(
/*==================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
+ buf_block_t* block;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
+ block = btr_pcur_get_block(cursor);
- btr_leaf_page_release(page, cursor->latch_mode, mtr);
+ btr_leaf_page_release(block, cursor->latch_mode, mtr);
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
}
-/*************************************************************
+/*********************************************************//**
Moves the persistent cursor to the first record on the next page. Releases the
latch on the current page, and bufferunfixes it. Note that there must not be
modifications on the current page, as then the x-latch can be released only in
mtr_commit. */
-
+UNIV_INTERN
void
btr_pcur_move_to_next_page(
/*=======================*/
- btr_pcur_t* cursor, /* in: persistent cursor; must be on the
+ btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
last record of the current page */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
- ulint next_page_no;
- ulint space;
- page_t* page;
- page_t* next_page;
+ ulint next_page_no;
+ ulint space;
+ ulint zip_size;
+ page_t* page;
+ buf_block_t* next_block;
+ page_t* next_page;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- ut_ad(btr_pcur_is_after_last_on_page(cursor, mtr));
+ ut_ad(btr_pcur_is_after_last_on_page(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
page = btr_pcur_get_page(cursor);
-
next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
+ space = buf_block_get_space(btr_pcur_get_block(cursor));
+ zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
ut_ad(next_page_no != FIL_NULL);
- next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
+ next_block = btr_block_get(space, zip_size, next_page_no,
+ cursor->latch_mode, mtr);
+ next_page = buf_block_get_frame(next_block);
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr) == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
ut_a(page_is_comp(next_page) == page_is_comp(page));
- buf_block_align(next_page)->check_index_page_at_flush = TRUE;
+ ut_a(btr_page_get_prev(next_page, mtr)
+ == buf_block_get_page_no(btr_pcur_get_block(cursor)));
+#endif /* UNIV_BTR_DEBUG */
+ next_block->check_index_page_at_flush = TRUE;
- btr_leaf_page_release(page, cursor->latch_mode, mtr);
+ btr_leaf_page_release(btr_pcur_get_block(cursor),
+ cursor->latch_mode, mtr);
- page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor));
+ page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
page_check_dir(next_page);
}
-/*************************************************************
+/*********************************************************//**
Moves the persistent cursor backward if it is on the first record of the page.
Commits mtr. Note that to prevent a possible deadlock, the operation
first stores the position of the cursor, commits mtr, acquires the necessary
@@ -420,24 +437,24 @@ alphabetical position of the cursor is guaranteed to be sensible on
return, but it may happen that the cursor is not positioned on the last
record of any page, because the structure of the tree may have changed
during the time when the cursor had no latches. */
-
+UNIV_INTERN
void
btr_pcur_move_backward_from_page(
/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor, must be on the first
+ btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the first
record of the current page */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
- ulint prev_page_no;
- ulint space;
- page_t* page;
- page_t* prev_page;
- ulint latch_mode;
- ulint latch_mode2;
+ ulint prev_page_no;
+ ulint space;
+ page_t* page;
+ buf_block_t* prev_block;
+ ulint latch_mode;
+ ulint latch_mode2;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- ut_ad(btr_pcur_is_before_first_on_page(cursor, mtr));
+ ut_ad(btr_pcur_is_before_first_on_page(cursor));
ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
latch_mode = cursor->latch_mode;
@@ -465,26 +482,27 @@ btr_pcur_move_backward_from_page(
page = btr_pcur_get_page(cursor);
prev_page_no = btr_page_get_prev(page, mtr);
- space = buf_frame_get_space_id(page);
+ space = buf_block_get_space(btr_pcur_get_block(cursor));
- if (btr_pcur_is_before_first_on_page(cursor, mtr)
- && (prev_page_no != FIL_NULL)) {
+ if (prev_page_no == FIL_NULL) {
+ } else if (btr_pcur_is_before_first_on_page(cursor)) {
- prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
+ prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
- btr_leaf_page_release(page, latch_mode, mtr);
+ btr_leaf_page_release(btr_pcur_get_block(cursor),
+ latch_mode, mtr);
- page_cur_set_after_last(prev_page,
+ page_cur_set_after_last(prev_block,
btr_pcur_get_page_cur(cursor));
- } else if (prev_page_no != FIL_NULL) {
+ } else {
/* The repositioned cursor did not end on an infimum record on
a page. Cursor repositioning acquired a latch also on the
previous page, but we do not need the latch: release it. */
- prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
+ prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
- btr_leaf_page_release(prev_page, latch_mode, mtr);
+ btr_leaf_page_release(prev_block, latch_mode, mtr);
}
cursor->latch_mode = latch_mode;
@@ -492,25 +510,24 @@ btr_pcur_move_backward_from_page(
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
-/*************************************************************
+/*********************************************************//**
Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'. */
-
+are left, the cursor stays 'before first in tree'.
+@return TRUE if the cursor was not before first in tree */
+UNIV_INTERN
ibool
btr_pcur_move_to_prev(
/*==================*/
- /* out: TRUE if the cursor was not before first
- in tree */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
- if (btr_pcur_is_before_first_on_page(cursor, mtr)) {
+ if (btr_pcur_is_before_first_on_page(cursor)) {
if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
@@ -522,36 +539,36 @@ btr_pcur_move_to_prev(
return(TRUE);
}
- btr_pcur_move_to_prev_on_page(cursor, mtr);
+ btr_pcur_move_to_prev_on_page(cursor);
return(TRUE);
}
-/******************************************************************
+/**************************************************************//**
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
-
+UNIV_INTERN
void
btr_pcur_open_on_user_rec(
/*======================*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ... */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF or
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ ulint mode, /*!< in: PAGE_CUR_L, ... */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
- btr_pcur_t* cursor, /* in: memory buffer for persistent
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
- if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+ if (btr_pcur_is_after_last_on_page(cursor)) {
btr_pcur_move_to_next_user_rec(cursor, mtr);
}
diff --git a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
index 84ad0e27110..ef7afeb1039 100644
--- a/storage/innobase/btr/btr0sea.c
+++ b/storage/innobase/btr/btr0sea.c
@@ -1,7 +1,31 @@
-/************************************************************************
-The index tree adaptive search
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1996 Innobase Oy
+/********************************************************************//**
+@file btr/btr0sea.c
+The index tree adaptive search
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
@@ -19,47 +43,55 @@ Created 2/17/1996 Heikki Tuuri
#include "btr0btr.h"
#include "ha0ha.h"
-ulint btr_search_this_is_zero = 0; /* A dummy variable to fool the
- compiler */
+/** Flag: has the search system been enabled?
+Protected by btr_search_latch and btr_search_enabled_mutex. */
+UNIV_INTERN char btr_search_enabled = TRUE;
+
+/** Mutex protecting btr_search_enabled */
+static mutex_t btr_search_enabled_mutex;
+
+/** A dummy variable to fool the compiler */
+UNIV_INTERN ulint btr_search_this_is_zero = 0;
#ifdef UNIV_SEARCH_PERF_STAT
-ulint btr_search_n_succ = 0;
-ulint btr_search_n_hash_fail = 0;
+/** Number of successful adaptive hash index lookups */
+UNIV_INTERN ulint btr_search_n_succ = 0;
+/** Number of failed adaptive hash index lookups */
+UNIV_INTERN ulint btr_search_n_hash_fail = 0;
#endif /* UNIV_SEARCH_PERF_STAT */
-byte btr_sea_pad1[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line as btr_search_latch */
+/** padding to prevent other memory update
+hotspots from residing on the same memory
+cache line as btr_search_latch */
+UNIV_INTERN byte btr_sea_pad1[64];
-/* The latch protecting the adaptive search system: this latch protects the
+/** The latch protecting the adaptive search system: this latch protects the
(1) positions of records on those pages where a hash index has been built.
NOTE: It does not protect values of non-ordering fields within a record from
being updated in-place! We can use fact (1) to perform unique searches to
indexes. */
-rw_lock_t* btr_search_latch_temp; /* We will allocate the latch from
- dynamic memory to get it to the
- same DRAM page as other hotspot
- semaphores */
+/* We will allocate the latch from dynamic memory to get it to the
+same DRAM page as other hotspot semaphores */
+UNIV_INTERN rw_lock_t* btr_search_latch_temp;
-byte btr_sea_pad2[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line */
+/** padding to prevent other memory update hotspots from residing on
+the same memory cache line */
+UNIV_INTERN byte btr_sea_pad2[64];
-btr_search_sys_t* btr_search_sys;
+/** The adaptive hash index */
+UNIV_INTERN btr_search_sys_t* btr_search_sys;
-/* If the number of records on the page divided by this parameter
+/** If the number of records on the page divided by this parameter
would have been successfully accessed using a hash index, the index
is then built on the page, assuming the global limit has been reached */
-
#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
-/* The global limit for consecutive potentially successful hash searches,
+/** The global limit for consecutive potentially successful hash searches,
before hash index building is started */
-
#define BTR_SEARCH_BUILD_LIMIT 100
-/************************************************************************
+/********************************************************************//**
Builds a hash index on a page with the given parameters. If the page already
has a hash index with different parameters, the old hash index is removed.
If index is non-NULL, this function checks if n_fields and n_bytes are
@@ -68,15 +100,15 @@ static
void
btr_search_build_page_hash_index(
/*=============================*/
- dict_index_t* index, /* in: index for which to build, or NULL if
+ dict_index_t* index, /*!< in: index for which to build, or NULL if
not known */
- page_t* page, /* in: index page, s- or x-latched */
- ulint n_fields,/* in: hash this many full fields */
- ulint n_bytes,/* in: hash this many bytes from the next
+ buf_block_t* block, /*!< in: index page, s- or x-latched */
+ ulint n_fields,/*!< in: hash this many full fields */
+ ulint n_bytes,/*!< in: hash this many bytes from the next
field */
- ibool left_side);/* in: hash for searches from left side? */
+ ibool left_side);/*!< in: hash for searches from left side? */
-/*********************************************************************
+/*****************************************************************//**
This function should be called before reserving any btr search mutex, if
the intended operation might add nodes to the search system hash table.
Because of the latching order, once we have reserved the btr search system
@@ -91,7 +123,6 @@ void
btr_search_check_free_space_in_heap(void)
/*=====================================*/
{
- buf_frame_t* frame;
hash_table_t* table;
mem_heap_t* heap;
@@ -109,27 +140,27 @@ btr_search_check_free_space_in_heap(void)
be enough free space in the hash table. */
if (heap->free_block == NULL) {
- frame = buf_frame_alloc();
+ buf_block_t* block = buf_block_alloc(0);
rw_lock_x_lock(&btr_search_latch);
if (heap->free_block == NULL) {
- heap->free_block = frame;
+ heap->free_block = block;
} else {
- buf_frame_free(frame);
+ buf_block_free(block);
}
rw_lock_x_unlock(&btr_search_latch);
}
}
-/*********************************************************************
+/*****************************************************************//**
Creates and initializes the adaptive search system at a database start. */
-
+UNIV_INTERN
void
btr_search_sys_create(
/*==================*/
- ulint hash_size) /* in: hash index hash table size */
+ ulint hash_size) /*!< in: hash index hash table size */
{
/* We allocate the search latch from dynamic memory:
see above at the global variable definition */
@@ -137,21 +168,75 @@ btr_search_sys_create(
btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS);
+ mutex_create(&btr_search_enabled_mutex, SYNC_SEARCH_SYS_CONF);
btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
- btr_search_sys->hash_index = ha_create(TRUE, hash_size, 0, 0);
+ btr_search_sys->hash_index = ha_create(hash_size, 0, 0);
+}
+/*****************************************************************//**
+Frees the adaptive search system at a database shutdown. */
+UNIV_INTERN
+void
+btr_search_sys_free(void)
+/*=====================*/
+{
+ mem_free(btr_search_latch_temp);
+ btr_search_latch_temp = NULL;
+ mem_heap_free(btr_search_sys->hash_index->heap);
+ hash_table_free(btr_search_sys->hash_index);
+ mem_free(btr_search_sys);
+ btr_search_sys = NULL;
}
-/*********************************************************************
-Creates and initializes a search info struct. */
+/********************************************************************//**
+Disable the adaptive hash search system and empty the index. */
+UNIV_INTERN
+void
+btr_search_disable(void)
+/*====================*/
+{
+ mutex_enter(&btr_search_enabled_mutex);
+ rw_lock_x_lock(&btr_search_latch);
+
+ btr_search_enabled = FALSE;
+ /* Clear all block->is_hashed flags and remove all entries
+ from btr_search_sys->hash_index. */
+ buf_pool_drop_hash_index();
+
+ /* btr_search_enabled_mutex should guarantee this. */
+ ut_ad(!btr_search_enabled);
+
+ rw_lock_x_unlock(&btr_search_latch);
+ mutex_exit(&btr_search_enabled_mutex);
+}
+
+/********************************************************************//**
+Enable the adaptive hash search system. */
+UNIV_INTERN
+void
+btr_search_enable(void)
+/*====================*/
+{
+ mutex_enter(&btr_search_enabled_mutex);
+ rw_lock_x_lock(&btr_search_latch);
+
+ btr_search_enabled = TRUE;
+
+ rw_lock_x_unlock(&btr_search_latch);
+ mutex_exit(&btr_search_enabled_mutex);
+}
+
+/*****************************************************************//**
+Creates and initializes a search info struct.
+@return own: search info struct */
+UNIV_INTERN
btr_search_t*
btr_search_info_create(
/*===================*/
- /* out, own: search info struct */
- mem_heap_t* heap) /* in: heap where created */
+ mem_heap_t* heap) /*!< in: heap where created */
{
btr_search_t* info;
@@ -185,14 +270,15 @@ btr_search_info_create(
return(info);
}
-/*********************************************************************
+/*****************************************************************//**
Returns the value of ref_count. The value is protected by
-btr_search_latch. */
+btr_search_latch.
+@return ref_count value. */
+UNIV_INTERN
ulint
btr_search_info_get_ref_count(
/*==========================*/
- /* out: ref_count value. */
- btr_search_t* info) /* in: search info. */
+ btr_search_t* info) /*!< in: search info. */
{
ulint ret;
@@ -210,7 +296,7 @@ btr_search_info_get_ref_count(
return(ret);
}
-/*************************************************************************
+/*********************************************************************//**
Updates the search info of an index about hash successes. NOTE that info
is NOT protected by any semaphore, to save CPU time! Do not assume its fields
are consistent. */
@@ -218,8 +304,8 @@ static
void
btr_search_info_update_hash(
/*========================*/
- btr_search_t* info, /* in/out: search info */
- btr_cur_t* cursor) /* in: cursor which was just positioned */
+ btr_search_t* info, /*!< in/out: search info */
+ btr_cur_t* cursor) /*!< in: cursor which was just positioned */
{
dict_index_t* index;
ulint n_unique;
@@ -232,7 +318,7 @@ btr_search_info_update_hash(
index = cursor->index;
- if (index->type & DICT_IBUF) {
+ if (dict_index_is_ibuf(index)) {
/* So many deletes are performed on an insert buffer tree
that we do not consider a hash index useful on it: */
@@ -330,32 +416,31 @@ set_new_recomm:
}
}
-/*************************************************************************
+/*********************************************************************//**
Updates the block search info on hash successes. NOTE that info and
block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
-semaphore, to save CPU time! Do not assume the fields are consistent. */
+semaphore, to save CPU time! Do not assume the fields are consistent.
+@return TRUE if building a (new) hash index on the block is recommended */
static
ibool
btr_search_update_block_hash_info(
/*==============================*/
- /* out: TRUE if building a (new) hash index on
- the block is recommended */
- btr_search_t* info, /* in: search info */
- buf_block_t* block, /* in: buffer block */
+ btr_search_t* info, /*!< in: search info */
+ buf_block_t* block, /*!< in: buffer block */
btr_cur_t* cursor __attribute__((unused)))
- /* in: cursor */
+ /*!< in: cursor */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_SHARED)
- || rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_EX));
+ ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
+ || rw_lock_own(&block->lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(cursor);
info->last_hash_succ = FALSE;
- ut_a(block->magic_n == BUF_BLOCK_MAGIC_N);
+ ut_a(buf_block_state_valid(block));
ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
if ((block->n_hash_helps > 0)
@@ -409,7 +494,7 @@ btr_search_update_block_hash_info(
return(FALSE);
}
-/*************************************************************************
+/*********************************************************************//**
Updates a hash node reference when it has been unsuccessfully used in a
search which could have succeeded with the used hash parameters. This can
happen because when building a hash index for a page, we do not check
@@ -421,9 +506,9 @@ static
void
btr_search_update_hash_ref(
/*=======================*/
- btr_search_t* info, /* in: search info */
- buf_block_t* block, /* in: buffer block where cursor positioned */
- btr_cur_t* cursor) /* in: cursor */
+ btr_search_t* info, /*!< in: search info */
+ buf_block_t* block, /*!< in: buffer block where cursor positioned */
+ btr_cur_t* cursor) /*!< in: cursor */
{
ulint fold;
rec_t* rec;
@@ -435,17 +520,24 @@ btr_search_update_hash_ref(
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
|| rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- ut_ad(buf_block_align(btr_cur_get_rec(cursor)) == block);
- ut_a(!block->is_hashed || block->index == cursor->index);
+ ut_ad(page_align(btr_cur_get_rec(cursor))
+ == buf_block_get_frame(block));
- if (block->is_hashed
- && (info->n_hash_potential > 0)
+ if (!block->is_hashed) {
+
+ return;
+ }
+
+ ut_a(block->index == cursor->index);
+ ut_a(!dict_index_is_ibuf(cursor->index));
+
+ if ((info->n_hash_potential > 0)
&& (block->curr_n_fields == info->n_fields)
&& (block->curr_n_bytes == info->n_bytes)
&& (block->curr_left_side == info->left_side)) {
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
rec = btr_cur_get_rec(cursor);
@@ -467,18 +559,19 @@ btr_search_update_hash_ref(
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- ha_insert_for_fold(btr_search_sys->hash_index, fold, rec);
+ ha_insert_for_fold(btr_search_sys->hash_index, fold,
+ block, rec);
}
}
-/*************************************************************************
+/*********************************************************************//**
Updates the search info. */
-
+UNIV_INTERN
void
btr_search_info_update_slow(
/*========================*/
- btr_search_t* info, /* in/out: search info */
- btr_cur_t* cursor) /* in: cursor which was just positioned */
+ btr_search_t* info, /*!< in/out: search info */
+ btr_cur_t* cursor) /*!< in: cursor which was just positioned */
{
buf_block_t* block;
ibool build_index;
@@ -490,7 +583,7 @@ btr_search_info_update_slow(
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- block = buf_block_align(btr_cur_get_rec(cursor));
+ block = btr_cur_get_block(cursor);
/* NOTE that the following two function calls do NOT protect
info or block->n_fields etc. with any semaphore, to save CPU time!
@@ -540,7 +633,7 @@ btr_search_info_update_slow(
params2 = params + btr_search_this_is_zero;
btr_search_build_page_hash_index(cursor->index,
- block->frame,
+ block,
params2[0],
params2[1],
params2[2]);
@@ -548,28 +641,28 @@ btr_search_info_update_slow(
}
}
-/**********************************************************************
+/******************************************************************//**
Checks if a guessed position for a tree cursor is right. Note that if
mode is PAGE_CUR_LE, which is used in inserts, and the function returns
-TRUE, then cursor->up_match and cursor->low_match both have sensible values. */
+TRUE, then cursor->up_match and cursor->low_match both have sensible values.
+@return TRUE if success */
static
ibool
btr_search_check_guess(
/*===================*/
- /* out: TRUE if success */
- btr_cur_t* cursor, /* in: guessed cursor position */
+ btr_cur_t* cursor, /*!< in: guessed cursor position */
ibool can_only_compare_to_cursor_rec,
- /* in: if we do not have a latch on the page
+ /*!< in: if we do not have a latch on the page
of cursor, but only a latch on
btr_search_latch, then ONLY the columns
of the record UNDER the cursor are
protected, not the next or previous record
in the chain: we cannot look at the next or
previous record to check our guess! */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+ const dtuple_t* tuple, /*!< in: data tuple */
+ ulint mode, /*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
or PAGE_CUR_GE */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
rec_t* rec;
ulint n_unique;
@@ -580,7 +673,7 @@ btr_search_check_guess(
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
ibool success = FALSE;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
n_unique = dict_index_get_n_unique_in_tree(cursor->index);
@@ -641,8 +734,8 @@ btr_search_check_guess(
prev_rec = page_rec_get_prev(rec);
if (page_rec_is_infimum(prev_rec)) {
- success = btr_page_get_prev(
- buf_frame_align(prev_rec), mtr) == FIL_NULL;
+ success = btr_page_get_prev(page_align(prev_rec), mtr)
+ == FIL_NULL;
goto exit_func;
}
@@ -666,8 +759,7 @@ btr_search_check_guess(
next_rec = page_rec_get_next(rec);
if (page_rec_is_supremum(next_rec)) {
- if (btr_page_get_next(
- buf_frame_align(next_rec), mtr)
+ if (btr_page_get_next(page_align(next_rec), mtr)
== FIL_NULL) {
cursor->up_match = 0;
@@ -695,39 +787,36 @@ exit_func:
return(success);
}
-/**********************************************************************
+/******************************************************************//**
Tries to guess the right search position based on the hash search info
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values. */
-
+both have sensible values.
+@return TRUE if succeeded */
+UNIV_INTERN
ibool
btr_search_guess_on_hash(
/*=====================*/
- /* out: TRUE if succeeded */
- dict_index_t* index, /* in: index */
- btr_search_t* info, /* in: index search info */
- dtuple_t* tuple, /* in: logical record */
- ulint mode, /* in: PAGE_CUR_L, ... */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ...;
+ dict_index_t* index, /*!< in: index */
+ btr_search_t* info, /*!< in: index search info */
+ const dtuple_t* tuple, /*!< in: logical record */
+ ulint mode, /*!< in: PAGE_CUR_L, ... */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ...;
NOTE that only if has_search_latch
is 0, we will have a latch set on
the cursor page, otherwise we assume
the caller uses his search latch
to protect the record! */
- btr_cur_t* cursor, /* out: tree cursor */
- ulint has_search_latch,/* in: latch mode the caller
+ btr_cur_t* cursor, /*!< out: tree cursor */
+ ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
rec_t* rec;
- page_t* page;
ulint fold;
- ulint tuple_n_fields;
dulint index_id;
- ibool can_only_compare_to_cursor_rec = TRUE;
#ifdef notdefined
btr_cur_t cursor2;
btr_pcur_t pcur;
@@ -747,15 +836,8 @@ btr_search_guess_on_hash(
cursor->n_fields = info->n_fields;
cursor->n_bytes = info->n_bytes;
- tuple_n_fields = dtuple_get_n_fields(tuple);
-
- if (UNIV_UNLIKELY(tuple_n_fields < cursor->n_fields)) {
-
- return(FALSE);
- }
-
- if (UNIV_UNLIKELY(tuple_n_fields == cursor->n_fields)
- && (cursor->n_bytes > 0)) {
+ if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple)
+ < cursor->n_fields + (cursor->n_bytes > 0))) {
return(FALSE);
}
@@ -772,10 +854,14 @@ btr_search_guess_on_hash(
if (UNIV_LIKELY(!has_search_latch)) {
rw_lock_s_lock(&btr_search_latch);
+
+ if (UNIV_UNLIKELY(!btr_search_enabled)) {
+ goto failure_unlock;
+ }
}
- ut_ad(btr_search_latch.writer != RW_LOCK_EX);
- ut_ad(btr_search_latch.reader_count > 0);
+ ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
+ ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
@@ -783,12 +869,12 @@ btr_search_guess_on_hash(
goto failure_unlock;
}
- page = buf_frame_align(rec);
+ block = buf_block_align(rec);
if (UNIV_LIKELY(!has_search_latch)) {
if (UNIV_UNLIKELY(
- !buf_page_get_known_nowait(latch_mode, page,
+ !buf_page_get_known_nowait(latch_mode, block,
BUF_MAKE_YOUNG,
__FILE__, __LINE__,
mtr))) {
@@ -796,28 +882,24 @@ btr_search_guess_on_hash(
}
rw_lock_s_unlock(&btr_search_latch);
- can_only_compare_to_cursor_rec = FALSE;
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
}
- block = buf_block_align(page);
+ if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
- if (UNIV_UNLIKELY(block->state == BUF_BLOCK_REMOVE_HASH)) {
if (UNIV_LIKELY(!has_search_latch)) {
- btr_leaf_page_release(page, latch_mode, mtr);
+ btr_leaf_page_release(block, latch_mode, mtr);
}
goto failure;
}
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
ut_ad(page_rec_is_user_rec(rec));
- btr_cur_position(index, rec, cursor);
+ btr_cur_position(index, rec, block, cursor);
/* Check the validity of the guess within the page */
@@ -826,13 +908,13 @@ btr_search_guess_on_hash(
is positioned on. We cannot look at the next of the previous
record to determine if our guess for the cursor position is
right. */
- if (UNIV_EXPECT(
- ut_dulint_cmp(index_id, btr_page_get_index_id(page)), 0)
+ if (UNIV_EXPECT
+ (ut_dulint_cmp(index_id, btr_page_get_index_id(block->frame)), 0)
|| !btr_search_check_guess(cursor,
- can_only_compare_to_cursor_rec,
+ has_search_latch,
tuple, mode, mtr)) {
if (UNIV_LIKELY(!has_search_latch)) {
- btr_leaf_page_release(page, latch_mode, mtr);
+ btr_leaf_page_release(block, latch_mode, mtr);
}
goto failure;
@@ -852,7 +934,7 @@ btr_search_guess_on_hash(
/* Currently, does not work if the following fails: */
ut_ad(!has_search_latch);
- btr_leaf_page_release(page, latch_mode, mtr);
+ btr_leaf_page_release(block, latch_mode, mtr);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
&cursor2, 0, mtr);
@@ -882,15 +964,15 @@ btr_search_guess_on_hash(
btr_search_n_succ++;
#endif
if (UNIV_LIKELY(!has_search_latch)
- && buf_block_peek_if_too_old(block)) {
+ && buf_page_peek_if_too_old(&block->page)) {
- buf_page_make_young(page);
+ buf_page_make_young(&block->page);
}
/* Increment the page get statistics though we did not really
fix the page: for user info only */
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
return(TRUE);
@@ -914,39 +996,41 @@ failure:
return(FALSE);
}
-/************************************************************************
+/********************************************************************//**
Drops a page hash index. */
-
+UNIV_INTERN
void
btr_search_drop_page_hash_index(
/*============================*/
- page_t* page) /* in: index page, s- or x-latched, or an index page
- for which we know that block->buf_fix_count == 0 */
+ buf_block_t* block) /*!< in: block containing index page,
+ s- or x-latched, or an index page
+ for which we know that
+ block->buf_fix_count == 0 */
{
- hash_table_t* table;
- buf_block_t* block;
- ulint n_fields;
- ulint n_bytes;
- rec_t* rec;
- ulint fold;
- ulint prev_fold;
- dulint index_id;
- ulint n_cached;
- ulint n_recs;
- ulint* folds;
- ulint i;
- mem_heap_t* heap;
- dict_index_t* index;
- ulint* offsets;
+ hash_table_t* table;
+ ulint n_fields;
+ ulint n_bytes;
+ const page_t* page;
+ const rec_t* rec;
+ ulint fold;
+ ulint prev_fold;
+ dulint index_id;
+ ulint n_cached;
+ ulint n_recs;
+ ulint* folds;
+ ulint i;
+ mem_heap_t* heap;
+ const dict_index_t* index;
+ ulint* offsets;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
+
retry:
rw_lock_s_lock(&btr_search_latch);
-
- block = buf_block_align(page);
+ page = block->frame;
if (UNIV_LIKELY(!block->is_hashed)) {
@@ -960,12 +1044,13 @@ retry:
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
|| rw_lock_own(&(block->lock), RW_LOCK_EX)
- || (block->buf_fix_count == 0));
+ || (block->page.buf_fix_count == 0));
#endif /* UNIV_SYNC_DEBUG */
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
index = block->index;
+ ut_a(!dict_index_is_ibuf(index));
/* NOTE: The fields of block must not be accessed after
releasing btr_search_latch, as the index page might only
@@ -985,7 +1070,7 @@ retry:
n_cached = 0;
rec = page_get_infimum_rec(page);
- rec = page_rec_get_next(rec);
+ rec = page_rec_get_next_low(rec, page_is_comp(page));
index_id = btr_page_get_index_id(page);
@@ -1013,7 +1098,7 @@ retry:
folds[n_cached] = fold;
n_cached++;
next_rec:
- rec = page_rec_get_next(rec);
+ rec = page_rec_get_next_low(rec, page_rec_is_comp(rec));
prev_fold = fold;
}
@@ -1055,6 +1140,7 @@ next_rec:
block->index = NULL;
cleanup:
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (UNIV_UNLIKELY(block->n_pointers)) {
/* Corruption */
ut_print_timestamp(stderr);
@@ -1070,27 +1156,29 @@ cleanup:
} else {
rw_lock_x_unlock(&btr_search_latch);
}
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ rw_lock_x_unlock(&btr_search_latch);
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
mem_free(folds);
}
-/************************************************************************
+/********************************************************************//**
Drops a page hash index when a page is freed from a fseg to the file system.
Drops possible hash index if the page happens to be in the buffer pool. */
-
+UNIV_INTERN
void
btr_search_drop_page_hash_when_freed(
/*=================================*/
- ulint space, /* in: space id */
- ulint page_no) /* in: page number */
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no) /*!< in: page number */
{
- ibool is_hashed;
- page_t* page;
- mtr_t mtr;
-
- is_hashed = buf_page_peek_if_search_hashed(space, page_no);
+ buf_block_t* block;
+ mtr_t mtr;
- if (!is_hashed) {
+ if (!buf_page_peek_if_search_hashed(space, page_no)) {
return;
}
@@ -1102,7 +1190,7 @@ btr_search_drop_page_hash_when_freed(
get here. Therefore we can acquire the s-latch to the page without
having to fear a deadlock. */
- page = buf_page_get_gen(space, page_no, RW_S_LATCH, NULL,
+ block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
&mtr);
/* Because the buffer pool mutex was released by
@@ -1111,19 +1199,17 @@ btr_search_drop_page_hash_when_freed(
before buf_page_get_gen() got a chance to acquire the buffer
pool mutex again. Thus, we must check for a NULL return. */
- if (UNIV_LIKELY(page != NULL)) {
+ if (UNIV_LIKELY(block != NULL)) {
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
- btr_search_drop_page_hash_index(page);
+ btr_search_drop_page_hash_index(block);
}
mtr_commit(&mtr);
}
-/************************************************************************
+/********************************************************************//**
Builds a hash index on a page with the given parameters. If the page already
has a hash index with different parameters, the old hash index is removed.
If index is non-NULL, this function checks if n_fields and n_bytes are
@@ -1132,15 +1218,15 @@ static
void
btr_search_build_page_hash_index(
/*=============================*/
- dict_index_t* index, /* in: index for which to build */
- page_t* page, /* in: index page, s- or x-latched */
- ulint n_fields,/* in: hash this many full fields */
- ulint n_bytes,/* in: hash this many bytes from the next
+ dict_index_t* index, /*!< in: index for which to build */
+ buf_block_t* block, /*!< in: index page, s- or x-latched */
+ ulint n_fields,/*!< in: hash this many full fields */
+ ulint n_bytes,/*!< in: hash this many bytes from the next
field */
- ibool left_side)/* in: hash for searches from left side? */
+ ibool left_side)/*!< in: hash for searches from left side? */
{
hash_table_t* table;
- buf_block_t* block;
+ page_t* page;
rec_t* rec;
rec_t* next_rec;
ulint fold;
@@ -1154,12 +1240,13 @@ btr_search_build_page_hash_index(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
ut_ad(index);
+ ut_a(!dict_index_is_ibuf(index));
- block = buf_block_align(page);
table = btr_search_sys->hash_index;
+ page = buf_block_get_frame(block);
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
@@ -1175,7 +1262,7 @@ btr_search_build_page_hash_index(
rw_lock_s_unlock(&btr_search_latch);
- btr_search_drop_page_hash_index(page);
+ btr_search_drop_page_hash_index(block);
} else {
rw_lock_s_unlock(&btr_search_latch);
}
@@ -1210,8 +1297,7 @@ btr_search_build_page_hash_index(
index_id = btr_page_get_index_id(page);
- rec = page_get_infimum_rec(page);
- rec = page_rec_get_next(rec);
+ rec = page_rec_get_next(page_get_infimum_rec(page));
offsets = rec_get_offsets(rec, index, offsets,
n_fields + (n_bytes > 0), &heap);
@@ -1276,6 +1362,10 @@ btr_search_build_page_hash_index(
rw_lock_x_lock(&btr_search_latch);
+ if (UNIV_UNLIKELY(!btr_search_enabled)) {
+ goto exit_func;
+ }
+
if (block->is_hashed && ((block->curr_n_fields != n_fields)
|| (block->curr_n_bytes != n_bytes)
|| (block->curr_left_side != left_side))) {
@@ -1301,7 +1391,7 @@ btr_search_build_page_hash_index(
for (i = 0; i < n_cached; i++) {
- ha_insert_for_fold(table, folds[i], recs[i]);
+ ha_insert_for_fold(table, folds[i], block, recs[i]);
}
exit_func:
@@ -1314,32 +1404,26 @@ exit_func:
}
}
-/************************************************************************
+/********************************************************************//**
Moves or deletes hash entries for moved records. If new_page is already hashed,
then the hash index for page, if any, is dropped. If new_page is not hashed,
and page is hashed, then a new hash index is built to new_page with the same
parameters as page (this often happens when a page is split). */
-
+UNIV_INTERN
void
btr_search_move_or_delete_hash_entries(
/*===================================*/
- page_t* new_page, /* in: records are copied
+ buf_block_t* new_block, /*!< in: records are copied
to this page */
- page_t* page, /* in: index page from which
+ buf_block_t* block, /*!< in: index page from which
records were copied, and the
copied records will be deleted
from this page */
- dict_index_t* index) /* in: record descriptor */
+ dict_index_t* index) /*!< in: record descriptor */
{
- buf_block_t* block;
- buf_block_t* new_block;
- ulint n_fields;
- ulint n_bytes;
- ibool left_side;
-
- block = buf_block_align(page);
- new_block = buf_block_align(new_page);
- ut_a(page_is_comp(page) == page_is_comp(new_page));
+ ulint n_fields;
+ ulint n_bytes;
+ ibool left_side;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
@@ -1347,6 +1431,8 @@ btr_search_move_or_delete_hash_entries(
#endif /* UNIV_SYNC_DEBUG */
ut_a(!new_block->is_hashed || new_block->index == index);
ut_a(!block->is_hashed || block->index == index);
+ ut_a(!(new_block->is_hashed || block->is_hashed)
+ || !dict_index_is_ibuf(index));
rw_lock_s_lock(&btr_search_latch);
@@ -1354,7 +1440,7 @@ btr_search_move_or_delete_hash_entries(
rw_lock_s_unlock(&btr_search_latch);
- btr_search_drop_page_hash_index(page);
+ btr_search_drop_page_hash_index(block);
return;
}
@@ -1373,26 +1459,24 @@ btr_search_move_or_delete_hash_entries(
ut_a(n_fields + n_bytes > 0);
- btr_search_build_page_hash_index(index, new_page, n_fields,
+ btr_search_build_page_hash_index(index, new_block, n_fields,
n_bytes, left_side);
-#if 1 /* TODO: safe to remove? */
- ut_a(n_fields == block->curr_n_fields);
- ut_a(n_bytes == block->curr_n_bytes);
- ut_a(left_side == block->curr_left_side);
-#endif
+ ut_ad(n_fields == block->curr_n_fields);
+ ut_ad(n_bytes == block->curr_n_bytes);
+ ut_ad(left_side == block->curr_left_side);
return;
}
rw_lock_s_unlock(&btr_search_latch);
}
-/************************************************************************
+/********************************************************************//**
Updates the page hash index when a single record is deleted from a page. */
-
+UNIV_INTERN
void
btr_search_update_hash_on_delete(
/*=============================*/
- btr_cur_t* cursor) /* in: cursor which was positioned on the
+ btr_cur_t* cursor) /*!< in: cursor which was positioned on the
record to delete using btr_cur_search_...,
the record is not yet deleted */
{
@@ -1404,11 +1488,11 @@ btr_search_update_hash_on_delete(
ibool found;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
mem_heap_t* heap = NULL;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
rec = btr_cur_get_rec(cursor);
- block = buf_block_align(rec);
+ block = btr_cur_get_block(cursor);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
@@ -1421,6 +1505,7 @@ btr_search_update_hash_on_delete(
ut_a(block->index == cursor->index);
ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
+ ut_a(!dict_index_is_ibuf(cursor->index));
table = btr_search_sys->hash_index;
@@ -1438,13 +1523,13 @@ btr_search_update_hash_on_delete(
rw_lock_x_unlock(&btr_search_latch);
}
-/************************************************************************
+/********************************************************************//**
Updates the page hash index when a single record is inserted on a page. */
-
+UNIV_INTERN
void
btr_search_update_hash_node_on_insert(
/*==================================*/
- btr_cur_t* cursor) /* in: cursor which was positioned to the
+ btr_cur_t* cursor) /*!< in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
@@ -1455,7 +1540,7 @@ btr_search_update_hash_node_on_insert(
rec = btr_cur_get_rec(cursor);
- block = buf_block_align(rec);
+ block = btr_cur_get_block(cursor);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
@@ -1467,6 +1552,7 @@ btr_search_update_hash_node_on_insert(
}
ut_a(block->index == cursor->index);
+ ut_a(!dict_index_is_ibuf(cursor->index));
rw_lock_x_lock(&btr_search_latch);
@@ -1478,7 +1564,7 @@ btr_search_update_hash_node_on_insert(
table = btr_search_sys->hash_index;
ha_search_and_update_if_found(table, cursor->fold, rec,
- page_rec_get_next(rec));
+ block, page_rec_get_next(rec));
rw_lock_x_unlock(&btr_search_latch);
} else {
@@ -1488,13 +1574,13 @@ btr_search_update_hash_node_on_insert(
}
}
-/************************************************************************
+/********************************************************************//**
Updates the page hash index when a single record is inserted on a page. */
-
+UNIV_INTERN
void
btr_search_update_hash_on_insert(
/*=============================*/
- btr_cur_t* cursor) /* in: cursor which was positioned to the
+ btr_cur_t* cursor) /*!< in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
@@ -1515,7 +1601,7 @@ btr_search_update_hash_on_insert(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
table = btr_search_sys->hash_index;
@@ -1523,7 +1609,7 @@ btr_search_update_hash_on_insert(
rec = btr_cur_get_rec(cursor);
- block = buf_block_align(rec);
+ block = btr_cur_get_block(cursor);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
@@ -1535,6 +1621,7 @@ btr_search_update_hash_on_insert(
}
ut_a(block->index == cursor->index);
+ ut_a(!dict_index_is_ibuf(cursor->index));
index_id = cursor->index->id;
@@ -1567,7 +1654,7 @@ btr_search_update_hash_on_insert(
locked = TRUE;
- ha_insert_for_fold(table, ins_fold, ins_rec);
+ ha_insert_for_fold(table, ins_fold, block, ins_rec);
}
goto check_next_rec;
@@ -1583,9 +1670,9 @@ btr_search_update_hash_on_insert(
}
if (!left_side) {
- ha_insert_for_fold(table, fold, rec);
+ ha_insert_for_fold(table, fold, block, rec);
} else {
- ha_insert_for_fold(table, ins_fold, ins_rec);
+ ha_insert_for_fold(table, ins_fold, block, ins_rec);
}
}
@@ -1600,7 +1687,7 @@ check_next_rec:
locked = TRUE;
}
- ha_insert_for_fold(table, ins_fold, ins_rec);
+ ha_insert_for_fold(table, ins_fold, block, ins_rec);
}
goto function_exit;
@@ -1617,14 +1704,14 @@ check_next_rec:
if (!left_side) {
- ha_insert_for_fold(table, ins_fold, ins_rec);
+ ha_insert_for_fold(table, ins_fold, block, ins_rec);
/*
fputs("Hash insert for ", stderr);
dict_index_name_print(stderr, cursor->index);
fprintf(stderr, " fold %lu\n", ins_fold);
*/
} else {
- ha_insert_for_fold(table, next_fold, next_rec);
+ ha_insert_for_fold(table, next_fold, block, next_rec);
}
}
@@ -1637,16 +1724,14 @@ function_exit:
}
}
-/************************************************************************
-Validates the search system. */
-
+/********************************************************************//**
+Validates the search system.
+@return TRUE if ok */
+UNIV_INTERN
ibool
btr_search_validate(void)
/*=====================*/
- /* out: TRUE if ok */
{
- buf_block_t* block;
- page_t* page;
ha_node_t* node;
ulint n_page_dumps = 0;
ibool ok = TRUE;
@@ -1660,9 +1745,10 @@ btr_search_validate(void)
btr_search_latch. */
ulint chunk_size = 10000;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
rw_lock_x_lock(&btr_search_latch);
+ buf_pool_mutex_enter();
cell_count = hash_get_n_cells(btr_search_sys->hash_index);
@@ -1670,17 +1756,55 @@ btr_search_validate(void)
/* We release btr_search_latch every once in a while to
give other queries a chance to run. */
if ((i != 0) && ((i % chunk_size) == 0)) {
+ buf_pool_mutex_exit();
rw_lock_x_unlock(&btr_search_latch);
os_thread_yield();
rw_lock_x_lock(&btr_search_latch);
+ buf_pool_mutex_enter();
}
node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
- while (node != NULL) {
- block = buf_block_align(node->data);
- page = buf_frame_align(node->data);
- offsets = rec_get_offsets((rec_t*) node->data,
+ for (; node != NULL; node = node->next) {
+ const buf_block_t* block
+ = buf_block_align(node->data);
+ const buf_block_t* hash_block;
+
+ if (UNIV_LIKELY(buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE)) {
+
+ /* The space and offset are only valid
+ for file blocks. It is possible that
+ the block is being freed
+ (BUF_BLOCK_REMOVE_HASH, see the
+ assertion and the comment below) */
+ hash_block = buf_block_hash_get(
+ buf_block_get_space(block),
+ buf_block_get_page_no(block));
+ } else {
+ hash_block = NULL;
+ }
+
+ if (hash_block) {
+ ut_a(hash_block == block);
+ } else {
+ /* When a block is being freed,
+ buf_LRU_search_and_free_block() first
+ removes the block from
+ buf_pool->page_hash by calling
+ buf_LRU_block_remove_hashed_page().
+ After that, it invokes
+ btr_search_drop_page_hash_index() to
+ remove the block from
+ btr_search_sys->hash_index. */
+
+ ut_a(buf_block_get_state(block)
+ == BUF_BLOCK_REMOVE_HASH);
+ }
+
+ ut_a(!dict_index_is_ibuf(block->index));
+
+ offsets = rec_get_offsets((const rec_t*) node->data,
block->index, offsets,
block->curr_n_fields
+ (block->curr_n_bytes > 0),
@@ -1691,7 +1815,9 @@ btr_search_validate(void)
offsets,
block->curr_n_fields,
block->curr_n_bytes,
- btr_page_get_index_id(page))) {
+ btr_page_get_index_id(block->frame))) {
+ const page_t* page = block->frame;
+
ok = FALSE;
ut_print_timestamp(stderr);
@@ -1701,7 +1827,7 @@ btr_search_validate(void)
"InnoDB: ptr mem address %p"
" index id %lu %lu,"
" node fold %lu, rec fold %lu\n",
- (ulong) buf_frame_get_page_no(page),
+ (ulong) page_get_page_no(page),
node->data,
(ulong) ut_dulint_get_high(
btr_page_get_index_id(page)),
@@ -1728,12 +1854,10 @@ btr_search_validate(void)
(ulong) block->curr_left_side);
if (n_page_dumps < 20) {
- buf_page_print(page);
+ buf_page_print(page, 0);
n_page_dumps++;
}
}
-
- node = node->next;
}
}
@@ -1743,9 +1867,11 @@ btr_search_validate(void)
/* We release btr_search_latch every once in a while to
give other queries a chance to run. */
if (i != 0) {
+ buf_pool_mutex_exit();
rw_lock_x_unlock(&btr_search_latch);
os_thread_yield();
rw_lock_x_lock(&btr_search_latch);
+ buf_pool_mutex_enter();
}
if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
@@ -1753,6 +1879,7 @@ btr_search_validate(void)
}
}
+ buf_pool_mutex_exit();
rw_lock_x_unlock(&btr_search_latch);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
diff --git a/storage/innodb_plugin/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
index f0e1395c307..f0e1395c307 100644
--- a/storage/innodb_plugin/buf/buf0buddy.c
+++ b/storage/innobase/buf/buf0buddy.c
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
index 901ce8e0fef..111d396fbc5 100644
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -1,21 +1,31 @@
-/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
+/*****************************************************************************
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License 2
- as published by the Free Software Foundation in June 1991.
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
- You should have received a copy of the GNU General Public License 2
- along with this program (in file COPYING); if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-/******************************************************
-The database buffer buf_pool
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0buf.c
+The database buffer buf_pool
Created 11/5/1995 Heikki Tuuri
*******************************************************/
@@ -29,14 +39,18 @@ Created 11/5/1995 Heikki Tuuri
#include "mem0mem.h"
#include "btr0btr.h"
#include "fil0fil.h"
+#ifndef UNIV_HOTBACKUP
+#include "buf0buddy.h"
#include "lock0lock.h"
#include "btr0sea.h"
#include "ibuf0ibuf.h"
-#include "dict0dict.h"
-#include "log0recv.h"
-#include "log0log.h"
#include "trx0undo.h"
+#include "log0log.h"
+#endif /* !UNIV_HOTBACKUP */
#include "srv0srv.h"
+#include "dict0dict.h"
+#include "log0recv.h"
+#include "page0zip.h"
/*
IMPLEMENTATION OF THE BUFFER POOL
@@ -126,10 +140,12 @@ in the database, using tables whose size is a power of 2.
Lists of blocks
---------------
-There are several lists of control blocks. The free list contains
-blocks which are currently not used.
+There are several lists of control blocks.
+
+The free list (buf_pool->free) contains blocks which are currently not
+used.
-The LRU-list contains all the blocks holding a file page
+The common LRU list contains all the blocks holding a file page
except those for which the bufferfix count is non-zero.
The pages are in the LRU list roughly in the order of the last
access to the page, so that the oldest pages are at the end of the
@@ -144,11 +160,34 @@ table which cannot fit in the memory. Putting the pages near the
of the LRU list, we make sure that most of the buf_pool stays in the
main memory, undisturbed.
-The chain of modified blocks contains the blocks
+The unzip_LRU list contains a subset of the common LRU list. The
+blocks on the unzip_LRU list hold a compressed file page and the
+corresponding uncompressed page frame. A block is in unzip_LRU if and
+only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
+holds. The blocks in unzip_LRU will be in same order as they are in
+the common LRU list. That is, each manipulation of the common LRU
+list will result in the same manipulation of the unzip_LRU list.
+
+The chain of modified blocks (buf_pool->flush_list) contains the blocks
holding file pages that have been modified in the memory
but not written to disk yet. The block with the oldest modification
which has not yet been written to disk is at the end of the chain.
+The chain of unmodified compressed blocks (buf_pool->zip_clean)
+contains the control blocks (buf_page_t) of those compressed pages
+that are not in buf_pool->flush_list and for which no uncompressed
+page has been allocated in the buffer pool. The control blocks for
+uncompressed pages are accessible via buf_block_t objects that are
+reachable via buf_pool->chunks[].
+
+The chains of free memory blocks (buf_pool->zip_free[]) are used by
+the buddy allocator (buf0buddy.c) to keep track of currently unused
+memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
+blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
+BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
+pool. The buddy allocator is solely used for allocating control
+blocks for compressed pages (buf_page_t) and compressed page frames.
+
Loading a file page
-------------------
@@ -198,52 +237,56 @@ of its random access area (for instance, 32 consecutive pages
in a tablespace) have recently been referenced, we may predict
that the whole area may be needed in the near future, and issue
the read requests for the whole area.
-
- AWE implementation
- ------------------
-
-By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
-we mean the physical 16 kB memory area allocated from RAM for that block.
-By a 'frame' we mean a 16 kB area in the virtual address space of the
-process, in the frame_mem of buf_pool.
-
-We can map pages to the frames of the buffer pool.
-
-1) A buffer block allocated to use as a non-data page, e.g., to the lock
-table, is always mapped to a frame.
-2) A bufferfixed or io-fixed data page is always mapped to a frame.
-3) When we need to map a block to frame, we look from the list
-awe_LRU_free_mapped and try to unmap its last block, but note that
-bufferfixed or io-fixed pages cannot be unmapped.
-4) For every frame in the buffer pool there is always a block whose page is
-mapped to it. When we create the buffer pool, we map the first elements
-in the free list to the frames.
-5) When we have AWE enabled, we disable adaptive hash indexes.
*/
-/* Value in microseconds */
-static const int WAIT_FOR_READ = 20000;
+#ifndef UNIV_HOTBACKUP
+/** Value in microseconds */
+static const int WAIT_FOR_READ = 5000;
-buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */
+/** The buffer buf_pool of the database */
+UNIV_INTERN buf_pool_t* buf_pool = NULL;
-#ifdef UNIV_DEBUG
-ulint buf_dbg_counter = 0; /* This is used to insert validation
+/** mutex protecting the buffer pool struct and control blocks, except the
+read-write lock in them */
+UNIV_INTERN mutex_t buf_pool_mutex;
+/** mutex protecting the control blocks of compressed-only pages
+(of type buf_page_t, not buf_block_t) */
+UNIV_INTERN mutex_t buf_pool_zip_mutex;
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
operations in excution in the
debug version */
-ibool buf_debug_prints = FALSE; /* If this is set TRUE,
- the program prints info whenever
- read-ahead or flush occurs */
+/** Flag to forbid the release of the buffer pool mutex.
+Protected by buf_pool_mutex. */
+UNIV_INTERN ulint buf_pool_mutex_exit_forbidden = 0;
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_DEBUG
+/** If this is set TRUE, the program prints info whenever
+read-ahead or flush occurs */
+UNIV_INTERN ibool buf_debug_prints = FALSE;
#endif /* UNIV_DEBUG */
-/************************************************************************
+
+/** A chunk of buffers. The buffer pool is allocated in chunks. */
+struct buf_chunk_struct{
+ ulint mem_size; /*!< allocated size of the chunk */
+ ulint size; /*!< size of frames[] and blocks[] */
+ void* mem; /*!< pointer to the memory area which
+ was allocated for the frames */
+ buf_block_t* blocks; /*!< array of buffer control blocks */
+};
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
Calculates a page checksum which is stored to the page when it is written
to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures. */
-
+32-bit and 64-bit architectures.
+@return checksum */
+UNIV_INTERN
ulint
buf_calc_page_new_checksum(
/*=======================*/
- /* out: checksum */
- byte* page) /* in: buffer page */
+ const byte* page) /*!< in: buffer page */
{
ulint checksum;
@@ -265,19 +308,19 @@ buf_calc_page_new_checksum(
return(checksum);
}
-/************************************************************************
+/********************************************************************//**
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
looked at the first few bytes of the page. This calculates that old
checksum.
NOTE: we must first store the new formula checksum to
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input! */
-
+because this takes that field as an input!
+@return checksum */
+UNIV_INTERN
ulint
buf_calc_page_old_checksum(
/*=======================*/
- /* out: checksum */
- byte* page) /* in: buffer page */
+ const byte* page) /*!< in: buffer page */
{
ulint checksum;
@@ -288,25 +331,24 @@ buf_calc_page_old_checksum(
return(checksum);
}
-/************************************************************************
-Checks if a page is corrupt. */
-
+/********************************************************************//**
+Checks if a page is corrupt.
+@return TRUE if corrupted */
+UNIV_INTERN
ibool
buf_page_is_corrupted(
/*==================*/
- /* out: TRUE if corrupted */
- byte* read_buf) /* in: a database page */
+ const byte* read_buf, /*!< in: a database page */
+ ulint zip_size) /*!< in: size of compressed page;
+ 0 for uncompressed pages */
{
- ulint checksum;
- ulint old_checksum;
- ulint checksum_field;
- ulint old_checksum_field;
-#ifndef UNIV_HOTBACKUP
- dulint current_lsn;
-#endif
- if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
- != mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
+ ulint checksum_field;
+ ulint old_checksum_field;
+
+ if (UNIV_LIKELY(!zip_size)
+ && memcmp(read_buf + FIL_PAGE_LSN + 4,
+ read_buf + UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
/* Stored log sequence numbers at the start and the end
of page do not match */
@@ -315,32 +357,28 @@ buf_page_is_corrupted(
}
#ifndef UNIV_HOTBACKUP
- if (recv_lsn_checks_on && log_peek_lsn(&current_lsn)) {
- if (ut_dulint_cmp(current_lsn,
- mach_read_from_8(read_buf + FIL_PAGE_LSN))
- < 0) {
+ if (recv_lsn_checks_on) {
+ ib_uint64_t current_lsn;
+
+ if (log_peek_lsn(&current_lsn)
+ && current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: page %lu log sequence number"
- " %lu %lu\n"
+ " %llu\n"
"InnoDB: is in the future! Current system "
- "log sequence number %lu %lu.\n"
+ "log sequence number %llu.\n"
"InnoDB: Your database may be corrupt or "
"you may have copied the InnoDB\n"
"InnoDB: tablespace but not the InnoDB "
"log files. See\n"
- "InnoDB: http://dev.mysql.com/doc/refman/"
- "5.1/en/forcing-recovery.html\n"
+ "InnoDB: " REFMAN "forcing-recovery.html\n"
"InnoDB: for more information.\n",
(ulong) mach_read_from_4(read_buf
+ FIL_PAGE_OFFSET),
- (ulong) ut_dulint_get_high
- (mach_read_from_8(read_buf + FIL_PAGE_LSN)),
- (ulong) ut_dulint_get_low
- (mach_read_from_8(read_buf + FIL_PAGE_LSN)),
- (ulong) ut_dulint_get_high(current_lsn),
- (ulong) ut_dulint_get_low(current_lsn));
+ mach_read_ull(read_buf + FIL_PAGE_LSN),
+ current_lsn);
}
}
#endif
@@ -350,8 +388,15 @@ buf_page_is_corrupted(
BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
disabled. Otherwise, skip checksum calculation and return FALSE */
- if (srv_use_checksums) {
- old_checksum = buf_calc_page_old_checksum(read_buf);
+ if (UNIV_LIKELY(srv_use_checksums)) {
+ checksum_field = mach_read_from_4(read_buf
+ + FIL_PAGE_SPACE_OR_CHKSUM);
+
+ if (UNIV_UNLIKELY(zip_size)) {
+ return(checksum_field != BUF_NO_CHECKSUM_MAGIC
+ && checksum_field
+ != page_zip_calc_checksum(read_buf, zip_size));
+ }
old_checksum_field = mach_read_from_4(
read_buf + UNIV_PAGE_SIZE
@@ -367,21 +412,20 @@ buf_page_is_corrupted(
if (old_checksum_field != mach_read_from_4(read_buf
+ FIL_PAGE_LSN)
- && old_checksum_field != old_checksum
- && old_checksum_field != BUF_NO_CHECKSUM_MAGIC) {
+ && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
+ && old_checksum_field
+ != buf_calc_page_old_checksum(read_buf)) {
return(TRUE);
}
- checksum = buf_calc_page_new_checksum(read_buf);
- checksum_field = mach_read_from_4(read_buf
- + FIL_PAGE_SPACE_OR_CHKSUM);
-
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
- (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
+ (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
- if (checksum_field != 0 && checksum_field != checksum
- && checksum_field != BUF_NO_CHECKSUM_MAGIC) {
+ if (checksum_field != 0
+ && checksum_field != BUF_NO_CHECKSUM_MAGIC
+ && checksum_field
+ != buf_calc_page_new_checksum(read_buf)) {
return(TRUE);
}
@@ -390,23 +434,103 @@ buf_page_is_corrupted(
return(FALSE);
}
-/************************************************************************
+/********************************************************************//**
Prints a page to stderr. */
-
+UNIV_INTERN
void
buf_page_print(
/*===========*/
- byte* read_buf) /* in: a database page */
+ const byte* read_buf, /*!< in: a database page */
+ ulint zip_size) /*!< in: compressed page size, or
+ 0 for uncompressed pages */
{
+#ifndef UNIV_HOTBACKUP
dict_index_t* index;
+#endif /* !UNIV_HOTBACKUP */
ulint checksum;
ulint old_checksum;
+ ulint size = zip_size;
+
+ if (!size) {
+ size = UNIV_PAGE_SIZE;
+ }
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
- (ulint)UNIV_PAGE_SIZE);
- ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE);
- fputs("InnoDB: End of page dump\n", stderr);
+ (ulong) size);
+ ut_print_buf(stderr, read_buf, size);
+ fputs("\nInnoDB: End of page dump\n", stderr);
+
+ if (zip_size) {
+ /* Print compressed page. */
+
+ switch (fil_page_get_type(read_buf)) {
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ checksum = srv_use_checksums
+ ? page_zip_calc_checksum(read_buf, zip_size)
+ : BUF_NO_CHECKSUM_MAGIC;
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Compressed BLOB page"
+ " checksum %lu, stored %lu\n"
+ "InnoDB: Page lsn %lu %lu\n"
+ "InnoDB: Page number (if stored"
+ " to page already) %lu,\n"
+ "InnoDB: space id (if stored"
+ " to page already) %lu\n",
+ (ulong) checksum,
+ (ulong) mach_read_from_4(
+ read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
+ (ulong) mach_read_from_4(
+ read_buf + FIL_PAGE_LSN),
+ (ulong) mach_read_from_4(
+ read_buf + (FIL_PAGE_LSN + 4)),
+ (ulong) mach_read_from_4(
+ read_buf + FIL_PAGE_OFFSET),
+ (ulong) mach_read_from_4(
+ read_buf
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+ return;
+ default:
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: unknown page type %lu,"
+ " assuming FIL_PAGE_INDEX\n",
+ fil_page_get_type(read_buf));
+ /* fall through */
+ case FIL_PAGE_INDEX:
+ checksum = srv_use_checksums
+ ? page_zip_calc_checksum(read_buf, zip_size)
+ : BUF_NO_CHECKSUM_MAGIC;
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Compressed page checksum %lu,"
+ " stored %lu\n"
+ "InnoDB: Page lsn %lu %lu\n"
+ "InnoDB: Page number (if stored"
+ " to page already) %lu,\n"
+ "InnoDB: space id (if stored"
+ " to page already) %lu\n",
+ (ulong) checksum,
+ (ulong) mach_read_from_4(
+ read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
+ (ulong) mach_read_from_4(
+ read_buf + FIL_PAGE_LSN),
+ (ulong) mach_read_from_4(
+ read_buf + (FIL_PAGE_LSN + 4)),
+ (ulong) mach_read_from_4(
+ read_buf + FIL_PAGE_OFFSET),
+ (ulong) mach_read_from_4(
+ read_buf
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+ return;
+ case FIL_PAGE_TYPE_XDES:
+ /* This is an uncompressed page. */
+ break;
+ }
+ }
checksum = srv_use_checksums
? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
@@ -436,6 +560,7 @@ buf_page_print(
(ulong) mach_read_from_4(read_buf
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+#ifndef UNIV_HOTBACKUP
if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
== TRX_UNDO_INSERT) {
fprintf(stderr,
@@ -446,30 +571,26 @@ buf_page_print(
fprintf(stderr,
"InnoDB: Page may be an update undo log page\n");
}
+#endif /* !UNIV_HOTBACKUP */
switch (fil_page_get_type(read_buf)) {
case FIL_PAGE_INDEX:
fprintf(stderr,
"InnoDB: Page may be an index page where"
" index id is %lu %lu\n",
- (ulong) ut_dulint_get_high
- (btr_page_get_index_id(read_buf)),
- (ulong) ut_dulint_get_low
- (btr_page_get_index_id(read_buf)));
-
- /* If the code is in ibbackup, dict_sys may be uninitialized,
- i.e., NULL */
-
- if (dict_sys != NULL) {
-
- index = dict_index_find_on_id_low(
- btr_page_get_index_id(read_buf));
- if (index) {
- fputs("InnoDB: (", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs(")\n", stderr);
- }
+ (ulong) ut_dulint_get_high(
+ btr_page_get_index_id(read_buf)),
+ (ulong) ut_dulint_get_low(
+ btr_page_get_index_id(read_buf)));
+#ifndef UNIV_HOTBACKUP
+ index = dict_index_find_on_id_low(
+ btr_page_get_index_id(read_buf));
+ if (index) {
+ fputs("InnoDB: (", stderr);
+ dict_index_name_print(stderr, NULL, index);
+ fputs(")\n", stderr);
}
+#endif /* !UNIV_HOTBACKUP */
break;
case FIL_PAGE_INODE:
fputs("InnoDB: Page may be an 'inode' page\n", stderr);
@@ -506,41 +627,53 @@ buf_page_print(
fputs("InnoDB: Page may be a BLOB page\n",
stderr);
break;
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ fputs("InnoDB: Page may be a compressed BLOB page\n",
+ stderr);
+ break;
}
}
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
Initializes a buffer control block when the buf_pool is created. */
static
void
buf_block_init(
/*===========*/
- buf_block_t* block, /* in: pointer to control block */
- byte* frame) /* in: pointer to buffer frame, or NULL if in
- the case of AWE there is no frame */
+ buf_block_t* block, /*!< in: pointer to control block */
+ byte* frame) /*!< in: pointer to buffer frame */
{
- block->magic_n = 0;
-
- block->state = BUF_BLOCK_NOT_USED;
+ UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
block->frame = frame;
- block->awe_info = NULL;
-
- block->buf_fix_count = 0;
- block->io_fix = 0;
+ block->page.state = BUF_BLOCK_NOT_USED;
+ block->page.buf_fix_count = 0;
+ block->page.io_fix = BUF_IO_NONE;
- block->modify_clock = ut_dulint_zero;
+ block->modify_clock = 0;
- block->file_page_was_freed = FALSE;
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+ block->page.file_page_was_freed = FALSE;
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
block->check_index_page_at_flush = FALSE;
block->index = NULL;
- block->in_free_list = FALSE;
- block->in_LRU_list = FALSE;
-
+#ifdef UNIV_DEBUG
+ block->page.in_page_hash = FALSE;
+ block->page.in_zip_hash = FALSE;
+ block->page.in_flush_list = FALSE;
+ block->page.in_free_list = FALSE;
+ block->page.in_LRU_list = FALSE;
+ block->in_unzip_LRU_list = FALSE;
+#endif /* UNIV_DEBUG */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
block->n_pointers = 0;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ page_zip_des_init(&block->page.zip);
mutex_create(&block->mutex, SYNC_BUF_BLOCK);
@@ -552,613 +685,1354 @@ buf_block_init(
#endif /* UNIV_SYNC_DEBUG */
}
-/************************************************************************
-Creates the buffer pool. */
-
-buf_pool_t*
-buf_pool_init(
-/*==========*/
- /* out, own: buf_pool object, NULL if not
- enough memory or error */
- ulint max_size, /* in: maximum size of the buf_pool in
- blocks */
- ulint curr_size, /* in: current size to use, must be <=
- max_size, currently must be equal to
- max_size */
- ulint n_frames) /* in: number of frames; if AWE is used,
- this is the size of the address space window
- where physical memory pages are mapped; if
- AWE is not used then this must be the same
- as max_size */
+/********************************************************************//**
+Allocates a chunk of buffer frames.
+@return chunk, or NULL on failure */
+static
+buf_chunk_t*
+buf_chunk_init(
+/*===========*/
+ buf_chunk_t* chunk, /*!< out: chunk of buffers */
+ ulint mem_size) /*!< in: requested size in bytes */
{
+ buf_block_t* block;
byte* frame;
ulint i;
- buf_block_t* block;
- ut_a(max_size == curr_size);
- ut_a(srv_use_awe || n_frames == max_size);
+ /* Round down to a multiple of page size,
+ although it already should be. */
+ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
+ /* Reserve space for the block descriptors. */
+ mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
+ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
- if (n_frames > curr_size) {
- fprintf(stderr,
- "InnoDB: AWE: Error: you must specify in my.cnf"
- " .._awe_mem_mb larger\n"
- "InnoDB: than .._buffer_pool_size. Now the former"
- " is %lu pages,\n"
- "InnoDB: the latter %lu pages.\n",
- (ulong) curr_size, (ulong) n_frames);
+ chunk->mem_size = mem_size;
+ chunk->mem = os_mem_alloc_large(&chunk->mem_size);
+
+ if (UNIV_UNLIKELY(chunk->mem == NULL)) {
return(NULL);
}
- buf_pool = mem_alloc(sizeof(buf_pool_t));
+ /* Allocate the block descriptors from
+ the start of the memory block. */
+ chunk->blocks = chunk->mem;
- /* 1. Initialize general fields
- ---------------------------- */
- mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
+ /* Align a pointer to the first frame. Note that when
+ os_large_page_size is smaller than UNIV_PAGE_SIZE,
+ we may allocate one fewer block than requested. When
+ it is bigger, we may allocate more blocks than requested. */
- mutex_enter(&(buf_pool->mutex));
+ frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
+ chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
+ - (frame != chunk->mem);
- if (srv_use_awe) {
- /*----------------------------------------*/
- /* Allocate the virtual address space window, i.e., the
- buffer pool frames */
+ /* Subtract the space needed for block descriptors. */
+ {
+ ulint size = chunk->size;
- buf_pool->frame_mem = os_awe_allocate_virtual_mem_window(
- UNIV_PAGE_SIZE * (n_frames + 1));
+ while (frame < (byte*) (chunk->blocks + size)) {
+ frame += UNIV_PAGE_SIZE;
+ size--;
+ }
- /* Allocate the physical memory for AWE and the AWE info array
- for buf_pool */
+ chunk->size = size;
+ }
- if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) {
+ /* Init block structs and assign frames for them. Then we
+ assign the frames to the first blocks (we already mapped the
+ memory above). */
- fprintf(stderr,
- "InnoDB: AWE: Error: physical memory must be"
- " allocated in full megabytes.\n"
- "InnoDB: Trying to allocate %lu"
- " database pages.\n",
- (ulong) curr_size);
+ block = chunk->blocks;
- return(NULL);
- }
+ for (i = chunk->size; i--; ) {
- if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info),
- curr_size
- / ((1024 * 1024)
- / UNIV_PAGE_SIZE))) {
+ buf_block_init(block, frame);
- return(NULL);
- }
- /*----------------------------------------*/
- } else {
- buf_pool->frame_mem = os_mem_alloc_large(
- UNIV_PAGE_SIZE * (n_frames + 1), TRUE, FALSE);
+#ifdef HAVE_purify
+ /* Wipe contents of frame to eliminate a Purify warning */
+ memset(block->frame, '\0', UNIV_PAGE_SIZE);
+#endif
+ /* Add the block to the free list */
+ UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
+ ut_d(block->page.in_free_list = TRUE);
+
+ block++;
+ frame += UNIV_PAGE_SIZE;
}
- if (buf_pool->frame_mem == NULL) {
+ return(chunk);
+}
- return(NULL);
- }
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Finds a block in the given buffer chunk that points to a
+given compressed page.
+@return buffer block pointing to the compressed page, or NULL */
+static
+buf_block_t*
+buf_chunk_contains_zip(
+/*===================*/
+ buf_chunk_t* chunk, /*!< in: chunk being checked */
+ const void* data) /*!< in: pointer to compressed page */
+{
+ buf_block_t* block;
+ ulint i;
- buf_pool->blocks = ut_malloc(sizeof(buf_block_t) * max_size);
+ ut_ad(buf_pool);
+ ut_ad(buf_pool_mutex_own());
- if (buf_pool->blocks == NULL) {
+ block = chunk->blocks;
- return(NULL);
+ for (i = chunk->size; i--; block++) {
+ if (block->page.zip.data == data) {
+
+ return(block);
+ }
}
- buf_pool->max_size = max_size;
- buf_pool->curr_size = curr_size;
+ return(NULL);
+}
- buf_pool->n_frames = n_frames;
+/*********************************************************************//**
+Finds a block in the buffer pool that points to a
+given compressed page.
+@return buffer block pointing to the compressed page, or NULL */
+UNIV_INTERN
+buf_block_t*
+buf_pool_contains_zip(
+/*==================*/
+ const void* data) /*!< in: pointer to compressed page */
+{
+ ulint n;
+ buf_chunk_t* chunk = buf_pool->chunks;
- /* Align pointer to the first frame */
+ for (n = buf_pool->n_chunks; n--; chunk++) {
+ buf_block_t* block = buf_chunk_contains_zip(chunk, data);
- frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
+ if (block) {
+ return(block);
+ }
+ }
- buf_pool->frame_zero = frame;
- buf_pool->high_end = frame + UNIV_PAGE_SIZE * n_frames;
+ return(NULL);
+}
+#endif /* UNIV_DEBUG */
- if (srv_use_awe) {
- /*----------------------------------------*/
- /* Map an initial part of the allocated physical memory to
- the window */
+/*********************************************************************//**
+Checks that all file pages in the buffer chunk are in a replaceable state.
+@return address of a non-free block, or NULL if all freed */
+static
+const buf_block_t*
+buf_chunk_not_freed(
+/*================*/
+ buf_chunk_t* chunk) /*!< in: chunk being checked */
+{
+ buf_block_t* block;
+ ulint i;
- os_awe_map_physical_mem_to_window(buf_pool->frame_zero,
- n_frames
- * (UNIV_PAGE_SIZE
- / OS_AWE_X86_PAGE_SIZE),
- buf_pool->awe_info);
- /*----------------------------------------*/
- }
+ ut_ad(buf_pool);
+ ut_ad(buf_pool_mutex_own());
+
+ block = chunk->blocks;
+
+ for (i = chunk->size; i--; block++) {
+ ibool ready;
+
+ switch (buf_block_get_state(block)) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* The uncompressed buffer pool should never
+ contain compressed block descriptors. */
+ ut_error;
+ break;
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ /* Skip blocks that are not being used for
+ file pages. */
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ mutex_enter(&block->mutex);
+ ready = buf_flush_ready_for_replace(&block->page);
+ mutex_exit(&block->mutex);
- buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames);
+ if (!ready) {
- if (buf_pool->blocks_of_frames == NULL) {
+ return(block);
+ }
- return(NULL);
+ break;
+ }
}
- /* Init block structs and assign frames for them; in the case of
- AWE there are less frames than blocks. Then we assign the frames
- to the first blocks (we already mapped the memory above). We also
- init the awe_info for every block. */
+ return(NULL);
+}
- for (i = 0; i < max_size; i++) {
+/*********************************************************************//**
+Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
+@return TRUE if all freed */
+static
+ibool
+buf_chunk_all_free(
+/*===============*/
+ const buf_chunk_t* chunk) /*!< in: chunk being checked */
+{
+ const buf_block_t* block;
+ ulint i;
- block = buf_pool_get_nth_block(buf_pool, i);
+ ut_ad(buf_pool);
+ ut_ad(buf_pool_mutex_own());
- if (i < n_frames) {
- frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE;
- *(buf_pool->blocks_of_frames + i) = block;
- } else {
- frame = NULL;
- }
+ block = chunk->blocks;
- buf_block_init(block, frame);
+ for (i = chunk->size; i--; block++) {
+
+ if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
- if (srv_use_awe) {
- /*----------------------------------------*/
- block->awe_info = buf_pool->awe_info
- + i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE);
- /*----------------------------------------*/
+ return(FALSE);
}
}
- buf_pool->page_hash = hash_create(2 * max_size);
+ return(TRUE);
+}
- buf_pool->n_pend_reads = 0;
+/********************************************************************//**
+Frees a chunk of buffer frames. */
+static
+void
+buf_chunk_free(
+/*===========*/
+ buf_chunk_t* chunk) /*!< out: chunk of buffers */
+{
+ buf_block_t* block;
+ const buf_block_t* block_end;
- buf_pool->last_printout_time = time(NULL);
+ ut_ad(buf_pool_mutex_own());
- buf_pool->n_pages_read = 0;
- buf_pool->n_pages_written = 0;
- buf_pool->n_pages_created = 0;
- buf_pool->n_pages_awe_remapped = 0;
+ block_end = chunk->blocks + chunk->size;
- buf_pool->n_page_gets = 0;
- buf_pool->n_page_gets_old = 0;
- buf_pool->n_pages_read_old = 0;
- buf_pool->n_pages_written_old = 0;
- buf_pool->n_pages_created_old = 0;
- buf_pool->n_pages_awe_remapped_old = 0;
+ for (block = chunk->blocks; block < block_end; block++) {
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
+ ut_a(!block->page.zip.data);
- /* 2. Initialize flushing fields
- ---------------------------- */
- UT_LIST_INIT(buf_pool->flush_list);
+ ut_ad(!block->page.in_LRU_list);
+ ut_ad(!block->in_unzip_LRU_list);
+ ut_ad(!block->page.in_flush_list);
+ /* Remove the block from the free list. */
+ ut_ad(block->page.in_free_list);
+ UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
- for (i = BUF_FLUSH_LRU; i <= BUF_FLUSH_LIST; i++) {
- buf_pool->n_flush[i] = 0;
- buf_pool->init_flush[i] = FALSE;
- buf_pool->no_flush[i] = os_event_create(NULL);
+ /* Free the latches. */
+ mutex_free(&block->mutex);
+ rw_lock_free(&block->lock);
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_free(&block->debug_latch);
+#endif /* UNIV_SYNC_DEBUG */
+ UNIV_MEM_UNDESC(block);
}
- buf_pool->LRU_flush_ended = 0;
+ os_mem_free_large(chunk->mem, chunk->mem_size);
+}
- buf_pool->ulint_clock = 1;
- buf_pool->freed_page_clock = 0;
+/********************************************************************//**
+Creates the buffer pool.
+@return own: buf_pool object, NULL if not enough memory or error */
+UNIV_INTERN
+buf_pool_t*
+buf_pool_init(void)
+/*===============*/
+{
+ buf_chunk_t* chunk;
+ ulint i;
- /* 3. Initialize LRU fields
- ---------------------------- */
- UT_LIST_INIT(buf_pool->LRU);
+ buf_pool = mem_zalloc(sizeof(buf_pool_t));
- buf_pool->LRU_old = NULL;
+ /* 1. Initialize general fields
+ ------------------------------- */
+ mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
+ mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
- UT_LIST_INIT(buf_pool->awe_LRU_free_mapped);
+ buf_pool_mutex_enter();
+
+ buf_pool->n_chunks = 1;
+ buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
- /* Add control blocks to the free list */
UT_LIST_INIT(buf_pool->free);
- for (i = 0; i < curr_size; i++) {
+ if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
+ mem_free(chunk);
+ mem_free(buf_pool);
+ buf_pool = NULL;
+ return(NULL);
+ }
- block = buf_pool_get_nth_block(buf_pool, i);
+ srv_buf_pool_old_size = srv_buf_pool_size;
+ buf_pool->curr_size = chunk->size;
+ srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
- if (block->frame) {
- /* Wipe contents of frame to eliminate a Purify
- warning */
+ buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
+ buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
-#ifdef HAVE_purify
- memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#endif
- if (srv_use_awe) {
- /* Add to the list of blocks mapped to
- frames */
+ buf_pool->last_printout_time = time(NULL);
- UT_LIST_ADD_LAST(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped,
- block);
- }
- }
+ /* 2. Initialize flushing fields
+ -------------------------------- */
- UT_LIST_ADD_LAST(free, buf_pool->free, block);
- block->in_free_list = TRUE;
+ for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
+ buf_pool->no_flush[i] = os_event_create(NULL);
}
- mutex_exit(&(buf_pool->mutex));
+ /* 3. Initialize LRU fields
+ --------------------------- */
+ /* All fields are initialized by mem_zalloc(). */
- if (srv_use_adaptive_hash_indexes) {
- btr_search_sys_create(curr_size * UNIV_PAGE_SIZE
- / sizeof(void*) / 64);
- } else {
- /* Create only a small dummy system */
- btr_search_sys_create(1000);
- }
+ buf_pool_mutex_exit();
+
+ btr_search_sys_create(buf_pool->curr_size
+ * UNIV_PAGE_SIZE / sizeof(void*) / 64);
+
+ /* 4. Initialize the buddy allocator fields */
+ /* All fields are initialized by mem_zalloc(). */
return(buf_pool);
}
-/************************************************************************
-Maps the page of block to a frame, if not mapped yet. Unmaps some page
-from the end of the awe_LRU_free_mapped. */
+/********************************************************************//**
+Frees the buffer pool at shutdown. This must not be invoked before
+freeing all mutexes. */
+UNIV_INTERN
+void
+buf_pool_free(void)
+/*===============*/
+{
+ buf_chunk_t* chunk;
+ buf_chunk_t* chunks;
+
+ chunks = buf_pool->chunks;
+ chunk = chunks + buf_pool->n_chunks;
+ while (--chunk >= chunks) {
+ /* Bypass the checks of buf_chunk_free(), since they
+ would fail at shutdown. */
+ os_mem_free_large(chunk->mem, chunk->mem_size);
+ }
+
+ mem_free(buf_pool->chunks);
+ hash_table_free(buf_pool->page_hash);
+ hash_table_free(buf_pool->zip_hash);
+ mem_free(buf_pool);
+ buf_pool = NULL;
+}
+
+/********************************************************************//**
+Drops the adaptive hash index. To prevent a livelock, this function
+is only to be called while holding btr_search_latch and while
+btr_search_enabled == FALSE. */
+UNIV_INTERN
void
-buf_awe_map_page_to_frame(
-/*======================*/
- buf_block_t* block, /* in: block whose page should be
- mapped to a frame */
- ibool add_to_mapped_list) /* in: TRUE if we in the case
- we need to map the page should also
- add the block to the
- awe_LRU_free_mapped list */
+buf_pool_drop_hash_index(void)
+/*==========================*/
{
- buf_block_t* bck;
+ ibool released_search_latch;
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(block);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!btr_search_enabled);
- if (block->frame) {
+ do {
+ buf_chunk_t* chunks = buf_pool->chunks;
+ buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
- return;
+ released_search_latch = FALSE;
+
+ while (--chunk >= chunks) {
+ buf_block_t* block = chunk->blocks;
+ ulint i = chunk->size;
+
+ for (; i--; block++) {
+ /* block->is_hashed cannot be modified
+ when we have an x-latch on btr_search_latch;
+ see the comment in buf0buf.h */
+
+ if (!block->is_hashed) {
+ continue;
+ }
+
+ /* To follow the latching order, we
+ have to release btr_search_latch
+ before acquiring block->latch. */
+ rw_lock_x_unlock(&btr_search_latch);
+ /* When we release the search latch,
+ we must rescan all blocks, because
+ some may become hashed again. */
+ released_search_latch = TRUE;
+
+ rw_lock_x_lock(&block->lock);
+
+ /* This should be guaranteed by the
+ callers, which will be holding
+ btr_search_enabled_mutex. */
+ ut_ad(!btr_search_enabled);
+
+ /* Because we did not buffer-fix the
+ block by calling buf_block_get_gen(),
+ it is possible that the block has been
+ allocated for some other use after
+ btr_search_latch was released above.
+ We do not care which file page the
+ block is mapped to. All we want to do
+ is to drop any hash entries referring
+ to the page. */
+
+ /* It is possible that
+ block->page.state != BUF_FILE_PAGE.
+ Even that does not matter, because
+ btr_search_drop_page_hash_index() will
+ check block->is_hashed before doing
+ anything. block->is_hashed can only
+ be set on uncompressed file pages. */
+
+ btr_search_drop_page_hash_index(block);
+
+ rw_lock_x_unlock(&block->lock);
+
+ rw_lock_x_lock(&btr_search_latch);
+
+ ut_ad(!btr_search_enabled);
+ }
+ }
+ } while (released_search_latch);
+}
+
+/********************************************************************//**
+Relocate a buffer control block. Relocates the block on the LRU list
+and in buf_pool->page_hash. Does not relocate bpage->list.
+The caller must take care of relocating bpage->list. */
+UNIV_INTERN
+void
+buf_relocate(
+/*=========*/
+ buf_page_t* bpage, /*!< in/out: control block being relocated;
+ buf_page_get_state(bpage) must be
+ BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
+ buf_page_t* dpage) /*!< in/out: destination control block */
+{
+ buf_page_t* b;
+ ulint fold;
+
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+ ut_a(bpage->buf_fix_count == 0);
+ ut_ad(bpage->in_LRU_list);
+ ut_ad(!bpage->in_zip_hash);
+ ut_ad(bpage->in_page_hash);
+ ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
+#ifdef UNIV_DEBUG
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_FILE_PAGE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ case BUF_BLOCK_ZIP_DIRTY:
+ case BUF_BLOCK_ZIP_PAGE:
+ break;
}
+#endif /* UNIV_DEBUG */
- /* Scan awe_LRU_free_mapped from the end and try to find a block
- which is not bufferfixed or io-fixed */
+ memcpy(dpage, bpage, sizeof *dpage);
- bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
+ ut_d(bpage->in_LRU_list = FALSE);
+ ut_d(bpage->in_page_hash = FALSE);
+
+ /* relocate buf_pool->LRU */
+ b = UT_LIST_GET_PREV(LRU, bpage);
+ UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+
+ if (b) {
+ UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
+ } else {
+ UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
+ }
+
+ if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
+ buf_pool->LRU_old = dpage;
+#ifdef UNIV_LRU_DEBUG
+ /* buf_pool->LRU_old must be the first item in the LRU list
+ whose "old" flag is set. */
+ ut_a(buf_pool->LRU_old->old);
+ ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
+ || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
+ ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
+ || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
+ } else {
+ /* Check that the "old" flag is consistent in
+ the block and its neighbours. */
+ buf_page_set_old(dpage, buf_page_is_old(dpage));
+#endif /* UNIV_LRU_DEBUG */
+ }
- while (bck) {
- ibool skip;
+ ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
+ ut_ad(ut_list_node_313->in_LRU_list)));
- mutex_enter(&bck->mutex);
+ /* relocate buf_pool->page_hash */
+ fold = buf_page_address_fold(bpage->space, bpage->offset);
- skip = (bck->state == BUF_BLOCK_FILE_PAGE
- && (bck->buf_fix_count != 0 || bck->io_fix != 0));
+ HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
- if (skip) {
- mutex_exit(&bck->mutex);
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+}
- /* We have to skip this */
- bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
- } else {
- /* We can map block to the frame of bck */
+/********************************************************************//**
+Shrinks the buffer pool. */
+static
+void
+buf_pool_shrink(
+/*============*/
+ ulint chunk_size) /*!< in: number of pages to remove */
+{
+ buf_chunk_t* chunks;
+ buf_chunk_t* chunk;
+ ulint max_size;
+ ulint max_free_size;
+ buf_chunk_t* max_chunk;
+ buf_chunk_t* max_free_chunk;
+
+ ut_ad(!buf_pool_mutex_own());
+
+try_again:
+ btr_search_disable(); /* Empty the adaptive hash index again */
+ buf_pool_mutex_enter();
+
+shrink_again:
+ if (buf_pool->n_chunks <= 1) {
+
+ /* Cannot shrink if there is only one chunk */
+ goto func_done;
+ }
+
+ /* Search for the largest free chunk
+ not larger than the size difference */
+ chunks = buf_pool->chunks;
+ chunk = chunks + buf_pool->n_chunks;
+ max_size = max_free_size = 0;
+ max_chunk = max_free_chunk = NULL;
+
+ while (--chunk >= chunks) {
+ if (chunk->size <= chunk_size
+ && chunk->size > max_free_size) {
+ if (chunk->size > max_size) {
+ max_size = chunk->size;
+ max_chunk = chunk;
+ }
- os_awe_map_physical_mem_to_window(
- bck->frame,
- UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE,
- block->awe_info);
+ if (buf_chunk_all_free(chunk)) {
+ max_free_size = chunk->size;
+ max_free_chunk = chunk;
+ }
+ }
+ }
- block->frame = bck->frame;
+ if (!max_free_size) {
- *(buf_pool->blocks_of_frames
- + (((ulint)(block->frame
- - buf_pool->frame_zero))
- >> UNIV_PAGE_SIZE_SHIFT))
- = block;
+ ulint dirty = 0;
+ ulint nonfree = 0;
+ buf_block_t* block;
+ buf_block_t* bend;
- bck->frame = NULL;
- UT_LIST_REMOVE(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped,
- bck);
+ /* Cannot shrink: try again later
+ (do not assign srv_buf_pool_old_size) */
+ if (!max_chunk) {
- if (add_to_mapped_list) {
- UT_LIST_ADD_FIRST(
- awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped,
- block);
+ goto func_exit;
+ }
+
+ block = max_chunk->blocks;
+ bend = block + max_chunk->size;
+
+ /* Move the blocks of chunk to the end of the
+ LRU list and try to flush them. */
+ for (; block < bend; block++) {
+ switch (buf_block_get_state(block)) {
+ case BUF_BLOCK_NOT_USED:
+ continue;
+ case BUF_BLOCK_FILE_PAGE:
+ break;
+ default:
+ nonfree++;
+ continue;
}
- buf_pool->n_pages_awe_remapped++;
+ mutex_enter(&block->mutex);
+ /* The following calls will temporarily
+ release block->mutex and buf_pool_mutex.
+ Therefore, we have to always retry,
+ even if !dirty && !nonfree. */
+
+ if (!buf_flush_ready_for_replace(&block->page)) {
+
+ buf_LRU_make_block_old(&block->page);
+ dirty++;
+ } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
+ != BUF_LRU_FREED) {
+ nonfree++;
+ }
- mutex_exit(&bck->mutex);
+ mutex_exit(&block->mutex);
+ }
- return;
+ buf_pool_mutex_exit();
+
+ /* Request for a flush of the chunk if it helps.
+ Do not flush if there are non-free blocks, since
+ flushing will not make the chunk freeable. */
+ if (nonfree) {
+ /* Avoid busy-waiting. */
+ os_thread_sleep(100000);
+ } else if (dirty
+ && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
+ == ULINT_UNDEFINED) {
+
+ buf_flush_wait_batch_end(BUF_FLUSH_LRU);
}
+
+ goto try_again;
}
- fprintf(stderr,
- "InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
- "InnoDB: awe_LRU_free_mapped list length %lu\n",
- (ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
+ max_size = max_free_size;
+ max_chunk = max_free_chunk;
- ut_a(0);
-}
+ srv_buf_pool_old_size = srv_buf_pool_size;
-/************************************************************************
-Allocates a buffer block. */
-UNIV_INLINE
-buf_block_t*
-buf_block_alloc(void)
-/*=================*/
- /* out, own: the allocated block; also if AWE
- is used it is guaranteed that the page is
- mapped to a frame */
-{
- buf_block_t* block;
+ /* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
+ chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
+ memcpy(chunks, buf_pool->chunks,
+ (max_chunk - buf_pool->chunks) * sizeof *chunks);
+ memcpy(chunks + (max_chunk - buf_pool->chunks),
+ max_chunk + 1,
+ buf_pool->chunks + buf_pool->n_chunks
+ - (max_chunk + 1));
+ ut_a(buf_pool->curr_size > max_chunk->size);
+ buf_pool->curr_size -= max_chunk->size;
+ srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
+ chunk_size -= max_chunk->size;
+ buf_chunk_free(max_chunk);
+ mem_free(buf_pool->chunks);
+ buf_pool->chunks = chunks;
+ buf_pool->n_chunks--;
- block = buf_LRU_get_free_block();
+ /* Allow a slack of one megabyte. */
+ if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
- return(block);
+ goto shrink_again;
+ }
+
+func_done:
+ srv_buf_pool_old_size = srv_buf_pool_size;
+func_exit:
+ buf_pool_mutex_exit();
+ btr_search_enable();
}
-/************************************************************************
-Moves to the block to the start of the LRU list if there is a danger
-that the block would drift out of the buffer pool. */
-UNIV_INLINE
+/********************************************************************//**
+Rebuild buf_pool->page_hash. */
+static
void
-buf_block_make_young(
-/*=================*/
- buf_block_t* block) /* in: block to make younger */
+buf_pool_page_hash_rebuild(void)
+/*============================*/
{
- ut_ad(!mutex_own(&(buf_pool->mutex)));
+ ulint i;
+ ulint n_chunks;
+ buf_chunk_t* chunk;
+ hash_table_t* page_hash;
+ hash_table_t* zip_hash;
+ buf_page_t* b;
- /* Note that we read freed_page_clock's without holding any mutex:
- this is allowed since the result is used only in heuristics */
+ buf_pool_mutex_enter();
- if (buf_block_peek_if_too_old(block)) {
+ /* Free, create, and populate the hash table. */
+ hash_table_free(buf_pool->page_hash);
+ buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
+ zip_hash = hash_create(2 * buf_pool->curr_size);
- mutex_enter(&buf_pool->mutex);
- /* There has been freeing activity in the LRU list:
- best to move to the head of the LRU list */
+ HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
+ BUF_POOL_ZIP_FOLD_BPAGE);
- buf_LRU_make_block_young(block);
- mutex_exit(&buf_pool->mutex);
- }
-}
+ hash_table_free(buf_pool->zip_hash);
+ buf_pool->zip_hash = zip_hash;
-/************************************************************************
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from from slipping out of
-the buffer pool. */
+ /* Insert the uncompressed file pages to buf_pool->page_hash. */
-void
-buf_page_make_young(
-/*================*/
- buf_frame_t* frame) /* in: buffer frame of a file page */
-{
- buf_block_t* block;
+ chunk = buf_pool->chunks;
+ n_chunks = buf_pool->n_chunks;
- mutex_enter(&(buf_pool->mutex));
+ for (i = 0; i < n_chunks; i++, chunk++) {
+ ulint j;
+ buf_block_t* block = chunk->blocks;
- block = buf_block_align(frame);
+ for (j = 0; j < chunk->size; j++, block++) {
+ if (buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE) {
+ ut_ad(!block->page.in_zip_hash);
+ ut_ad(block->page.in_page_hash);
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ HASH_INSERT(buf_page_t, hash, page_hash,
+ buf_page_address_fold(
+ block->page.space,
+ block->page.offset),
+ &block->page);
+ }
+ }
+ }
- buf_LRU_make_block_young(block);
+ /* Insert the compressed-only pages to buf_pool->page_hash.
+ All such blocks are either in buf_pool->zip_clean or
+ in buf_pool->flush_list. */
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+ b = UT_LIST_GET_NEXT(list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ ut_ad(!b->in_flush_list);
+ ut_ad(b->in_LRU_list);
+ ut_ad(b->in_page_hash);
+ ut_ad(!b->in_zip_hash);
+
+ HASH_INSERT(buf_page_t, hash, page_hash,
+ buf_page_address_fold(b->space, b->offset), b);
+ }
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+ b = UT_LIST_GET_NEXT(list, b)) {
+ ut_ad(b->in_flush_list);
+ ut_ad(b->in_LRU_list);
+ ut_ad(b->in_page_hash);
+ ut_ad(!b->in_zip_hash);
+
+ switch (buf_page_get_state(b)) {
+ case BUF_BLOCK_ZIP_DIRTY:
+ HASH_INSERT(buf_page_t, hash, page_hash,
+ buf_page_address_fold(b->space,
+ b->offset), b);
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ /* uncompressed page */
+ break;
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ }
+ }
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
}
-/************************************************************************
-Frees a buffer block which does not contain a file page. */
-UNIV_INLINE
+/********************************************************************//**
+Resizes the buffer pool. */
+UNIV_INTERN
void
-buf_block_free(
-/*===========*/
- buf_block_t* block) /* in, own: block to be freed */
+buf_pool_resize(void)
+/*=================*/
{
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
- mutex_enter(&block->mutex);
+ if (srv_buf_pool_old_size == srv_buf_pool_size) {
- ut_a(block->state != BUF_BLOCK_FILE_PAGE);
+ buf_pool_mutex_exit();
+ return;
+ }
- buf_LRU_block_free_non_file_page(block);
+ if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
- mutex_exit(&block->mutex);
+ buf_pool_mutex_exit();
- mutex_exit(&(buf_pool->mutex));
-}
+ /* Disable adaptive hash indexes and empty the index
+ in order to free up memory in the buffer pool chunks. */
+ buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
+ / UNIV_PAGE_SIZE);
+ } else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
-/*************************************************************************
-Allocates a buffer frame. */
+ /* Enlarge the buffer pool by at least one megabyte */
-buf_frame_t*
-buf_frame_alloc(void)
-/*=================*/
- /* out: buffer frame */
-{
- return(buf_block_alloc()->frame);
-}
+ ulint mem_size
+ = srv_buf_pool_size - srv_buf_pool_curr_size;
+ buf_chunk_t* chunks;
+ buf_chunk_t* chunk;
-/*************************************************************************
-Frees a buffer frame which does not contain a file page. */
+ chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
-void
-buf_frame_free(
-/*===========*/
- buf_frame_t* frame) /* in: buffer frame */
-{
- buf_block_free(buf_block_align(frame));
-}
+ memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
+ * sizeof *chunks);
-/************************************************************************
-Returns the buffer control block if the page can be found in the buffer
-pool. NOTE that it is possible that the page is not yet read
-from disk, though. This is a very low-level function: use with care! */
+ chunk = &chunks[buf_pool->n_chunks];
-buf_block_t*
-buf_page_peek_block(
+ if (!buf_chunk_init(chunk, mem_size)) {
+ mem_free(chunks);
+ } else {
+ buf_pool->curr_size += chunk->size;
+ srv_buf_pool_curr_size = buf_pool->curr_size
+ * UNIV_PAGE_SIZE;
+ mem_free(buf_pool->chunks);
+ buf_pool->chunks = chunks;
+ buf_pool->n_chunks++;
+ }
+
+ srv_buf_pool_old_size = srv_buf_pool_size;
+ buf_pool_mutex_exit();
+ }
+
+ buf_pool_page_hash_rebuild();
+}
+
+/********************************************************************//**
+Moves a page to the start of the buffer pool LRU list. This high-level
+function can be used to prevent an important page from slipping out of
+the buffer pool. */
+UNIV_INTERN
+void
+buf_page_make_young(
/*================*/
- /* out: control block if found from page hash table,
- otherwise NULL; NOTE that the page is not necessarily
- yet read from disk! */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
+ buf_page_t* bpage) /*!< in: buffer block of a file page */
{
- buf_block_t* block;
+ buf_pool_mutex_enter();
- mutex_enter_fast(&(buf_pool->mutex));
+ ut_a(buf_page_in_file(bpage));
- block = buf_page_hash_get(space, offset);
+ buf_LRU_make_block_young(bpage);
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
+}
- return(block);
+/********************************************************************//**
+Sets the time of the first access of a page and moves a page to the
+start of the buffer pool LRU list if it is too old. This high-level
+function can be used to prevent an important page from slipping
+out of the buffer pool. */
+static
+void
+buf_page_set_accessed_make_young(
+/*=============================*/
+ buf_page_t* bpage, /*!< in/out: buffer block of a
+ file page */
+ unsigned access_time) /*!< in: bpage->access_time
+ read under mutex protection,
+ or 0 if unknown */
+{
+ ut_ad(!buf_pool_mutex_own());
+ ut_a(buf_page_in_file(bpage));
+
+ if (buf_page_peek_if_too_old(bpage)) {
+ buf_pool_mutex_enter();
+ buf_LRU_make_block_young(bpage);
+ buf_pool_mutex_exit();
+ } else if (!access_time) {
+ ulint time_ms = ut_time_ms();
+ buf_pool_mutex_enter();
+ buf_page_set_accessed(bpage, time_ms);
+ buf_pool_mutex_exit();
+ }
}
-/************************************************************************
+/********************************************************************//**
Resets the check_index_page_at_flush field of a page if found in the buffer
pool. */
-
+UNIV_INTERN
void
buf_reset_check_index_page_at_flush(
/*================================*/
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: page number */
{
buf_block_t* block;
- mutex_enter_fast(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
- block = buf_page_hash_get(space, offset);
+ block = (buf_block_t*) buf_page_hash_get(space, offset);
- if (block) {
+ if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
block->check_index_page_at_flush = FALSE;
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
}
-/************************************************************************
+/********************************************************************//**
Returns the current state of is_hashed of a page. FALSE if the page is
not in the pool. NOTE that this operation does not fix the page in the
-pool if it is found there. */
-
+pool if it is found there.
+@return TRUE if page hash index is built in search system */
+UNIV_INTERN
ibool
buf_page_peek_if_search_hashed(
/*===========================*/
- /* out: TRUE if page hash index is built in search
- system */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: page number */
{
buf_block_t* block;
ibool is_hashed;
- mutex_enter_fast(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
- block = buf_page_hash_get(space, offset);
+ block = (buf_block_t*) buf_page_hash_get(space, offset);
- if (!block) {
+ if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
is_hashed = FALSE;
} else {
is_hashed = block->is_hashed;
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(is_hashed);
}
-/************************************************************************
-Returns TRUE if the page can be found in the buffer pool hash table. NOTE
-that it is possible that the page is not yet read from disk, though. */
-
-ibool
-buf_page_peek(
-/*==========*/
- /* out: TRUE if found from page hash table,
- NOTE that the page is not necessarily yet read
- from disk! */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+/********************************************************************//**
+Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+This function should be called when we free a file page and want the
+debug version to check that it is not accessed any more unless
+reallocated.
+@return control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_set_file_page_was_freed(
+/*=============================*/
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: page number */
{
- if (buf_page_peek_block(space, offset)) {
+ buf_page_t* bpage;
- return(TRUE);
+ buf_pool_mutex_enter();
+
+ bpage = buf_page_hash_get(space, offset);
+
+ if (bpage) {
+ bpage->file_page_was_freed = TRUE;
}
- return(FALSE);
+ buf_pool_mutex_exit();
+
+ return(bpage);
}
-/************************************************************************
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+/********************************************************************//**
+Sets file_page_was_freed FALSE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
-reallocated. */
+reallocated.
+@return control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_reset_file_page_was_freed(
+/*===============================*/
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: page number */
+{
+ buf_page_t* bpage;
-buf_block_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- /* out: control block if found from page hash table,
- otherwise NULL */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
+ buf_pool_mutex_enter();
+
+ bpage = buf_page_hash_get(space, offset);
+
+ if (bpage) {
+ bpage->file_page_was_freed = FALSE;
+ }
+
+ buf_pool_mutex_exit();
+
+ return(bpage);
+}
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+
+/********************************************************************//**
+Get read access to a compressed page (usually of type
+FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
+The page must be released with buf_page_release_zip().
+NOTE: the page is not protected by any latch. Mutual exclusion has to
+be implemented at a higher level. In other words, all possible
+accesses to a given page through this function must be protected by
+the same set of mutexes or latches.
+@return pointer to the block */
+UNIV_INTERN
+buf_page_t*
+buf_page_get_zip(
+/*=============*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size */
+ ulint offset) /*!< in: page number */
{
- buf_block_t* block;
+ buf_page_t* bpage;
+ mutex_t* block_mutex;
+ ibool must_read;
+ unsigned access_time;
+
+#ifndef UNIV_LOG_DEBUG
+ ut_ad(!ibuf_inside());
+#endif
+ buf_pool->stat.n_page_gets++;
+
+ for (;;) {
+ buf_pool_mutex_enter();
+lookup:
+ bpage = buf_page_hash_get(space, offset);
+ if (bpage) {
+ break;
+ }
- mutex_enter_fast(&(buf_pool->mutex));
+ /* Page not in buf_pool: needs to be read from file */
- block = buf_page_hash_get(space, offset);
+ buf_pool_mutex_exit();
- if (block) {
- block->file_page_was_freed = TRUE;
+ buf_read_page(space, zip_size, offset);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 37 || buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
}
- mutex_exit(&(buf_pool->mutex));
+ if (UNIV_UNLIKELY(!bpage->zip.data)) {
+ /* There is no compressed page. */
+err_exit:
+ buf_pool_mutex_exit();
+ return(NULL);
+ }
- return(block);
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ case BUF_BLOCK_ZIP_FREE:
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ block_mutex = &buf_pool_zip_mutex;
+ mutex_enter(block_mutex);
+ bpage->buf_fix_count++;
+ goto got_block;
+ case BUF_BLOCK_FILE_PAGE:
+ block_mutex = &((buf_block_t*) bpage)->mutex;
+ mutex_enter(block_mutex);
+
+ /* Discard the uncompressed page frame if possible. */
+ if (buf_LRU_free_block(bpage, FALSE, NULL)
+ == BUF_LRU_FREED) {
+
+ mutex_exit(block_mutex);
+ goto lookup;
+ }
+
+ buf_block_buf_fix_inc((buf_block_t*) bpage,
+ __FILE__, __LINE__);
+ goto got_block;
+ }
+
+ ut_error;
+ goto err_exit;
+
+got_block:
+ must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+ access_time = buf_page_is_accessed(bpage);
+
+ buf_pool_mutex_exit();
+
+ mutex_exit(block_mutex);
+
+ buf_page_set_accessed_make_young(bpage, access_time);
+
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+ ut_a(!bpage->file_page_was_freed);
+#endif
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 5771 || buf_validate());
+ ut_a(bpage->buf_fix_count > 0);
+ ut_a(buf_page_in_file(bpage));
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+ if (must_read) {
+ /* Let us wait until the read operation
+ completes */
+
+ for (;;) {
+ enum buf_io_fix io_fix;
+
+ mutex_enter(block_mutex);
+ io_fix = buf_page_get_io_fix(bpage);
+ mutex_exit(block_mutex);
+
+ if (io_fix == BUF_IO_READ) {
+
+ os_thread_sleep(WAIT_FOR_READ);
+ } else {
+ break;
+ }
+ }
+ }
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(buf_page_get_space(bpage),
+ buf_page_get_page_no(bpage)) == 0);
+#endif
+ return(bpage);
}
-/************************************************************************
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
+/********************************************************************//**
+Initialize some fields of a control block. */
+UNIV_INLINE
+void
+buf_block_init_low(
+/*===============*/
+ buf_block_t* block) /*!< in: block to init */
+{
+ block->check_index_page_at_flush = FALSE;
+ block->index = NULL;
+ block->n_hash_helps = 0;
+ block->is_hashed = FALSE;
+ block->n_fields = 1;
+ block->n_bytes = 0;
+ block->left_side = TRUE;
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Decompress a block.
+@return TRUE if successful */
+UNIV_INTERN
+ibool
+buf_zip_decompress(
+/*===============*/
+ buf_block_t* block, /*!< in/out: block */
+ ibool check) /*!< in: TRUE=verify the page checksum */
+{
+ const byte* frame = block->page.zip.data;
+
+ ut_ad(buf_block_get_zip_size(block));
+ ut_a(buf_block_get_space(block) != 0);
+
+ if (UNIV_LIKELY(check)) {
+ ulint stamp_checksum = mach_read_from_4(
+ frame + FIL_PAGE_SPACE_OR_CHKSUM);
+ ulint calc_checksum = page_zip_calc_checksum(
+ frame, page_zip_get_size(&block->page.zip));
+
+ if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: compressed page checksum mismatch"
+ " (space %u page %u): %lu != %lu\n",
+ block->page.space, block->page.offset,
+ stamp_checksum, calc_checksum);
+ return(FALSE);
+ }
+ }
+
+ switch (fil_page_get_type(frame)) {
+ case FIL_PAGE_INDEX:
+ if (page_zip_decompress(&block->page.zip,
+ block->frame, TRUE)) {
+ return(TRUE);
+ }
+
+ fprintf(stderr,
+ "InnoDB: unable to decompress space %lu page %lu\n",
+ (ulong) block->page.space,
+ (ulong) block->page.offset);
+ return(FALSE);
+
+ case FIL_PAGE_TYPE_ALLOCATED:
+ case FIL_PAGE_INODE:
+ case FIL_PAGE_IBUF_BITMAP:
+ case FIL_PAGE_TYPE_FSP_HDR:
+ case FIL_PAGE_TYPE_XDES:
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ /* Copy to uncompressed storage. */
+ memcpy(block->frame, frame,
+ buf_block_get_zip_size(block));
+ return(TRUE);
+ }
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: unknown compressed page"
+ " type %lu\n",
+ fil_page_get_type(frame));
+ return(FALSE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Gets the block to whose frame the pointer is pointing to.
+@return pointer to block, never NULL */
+UNIV_INTERN
buf_block_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- /* out: control block if found from page hash table,
- otherwise NULL */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
+buf_block_align(
+/*============*/
+ const byte* ptr) /*!< in: pointer to a frame */
{
- buf_block_t* block;
+ buf_chunk_t* chunk;
+ ulint i;
- mutex_enter_fast(&(buf_pool->mutex));
+ /* TODO: protect buf_pool->chunks with a mutex (it will
+ currently remain constant after buf_pool_init()) */
+ for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
+ lint offs = ptr - chunk->blocks->frame;
- block = buf_page_hash_get(space, offset);
+ if (UNIV_UNLIKELY(offs < 0)) {
- if (block) {
- block->file_page_was_freed = FALSE;
+ continue;
+ }
+
+ offs >>= UNIV_PAGE_SIZE_SHIFT;
+
+ if (UNIV_LIKELY((ulint) offs < chunk->size)) {
+ buf_block_t* block = &chunk->blocks[offs];
+
+ /* The function buf_chunk_init() invokes
+ buf_block_init() so that block[n].frame ==
+ block->frame + n * UNIV_PAGE_SIZE. Check it. */
+ ut_ad(block->frame == page_align(ptr));
+#ifdef UNIV_DEBUG
+ /* A thread that updates these fields must
+ hold buf_pool_mutex and block->mutex. Acquire
+ only the latter. */
+ mutex_enter(&block->mutex);
+
+ switch (buf_block_get_state(block)) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* These types should only be used in
+ the compressed buffer pool, whose
+ memory is allocated from
+ buf_pool->chunks, in UNIV_PAGE_SIZE
+ blocks flagged as BUF_BLOCK_MEMORY. */
+ ut_error;
+ break;
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ /* Some data structures contain
+ "guess" pointers to file pages. The
+ file pages may have been freed and
+ reused. Do not complain. */
+ break;
+ case BUF_BLOCK_REMOVE_HASH:
+ /* buf_LRU_block_remove_hashed_page()
+ will overwrite the FIL_PAGE_OFFSET and
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
+ 0xff and set the state to
+ BUF_BLOCK_REMOVE_HASH. */
+ ut_ad(page_get_space_id(page_align(ptr))
+ == 0xffffffff);
+ ut_ad(page_get_page_no(page_align(ptr))
+ == 0xffffffff);
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ ut_ad(block->page.space
+ == page_get_space_id(page_align(ptr)));
+ ut_ad(block->page.offset
+ == page_get_page_no(page_align(ptr)));
+ break;
+ }
+
+ mutex_exit(&block->mutex);
+#endif /* UNIV_DEBUG */
+
+ return(block);
+ }
}
- mutex_exit(&(buf_pool->mutex));
+ /* The block should always be found. */
+ ut_error;
+ return(NULL);
+}
+
+/********************************************************************//**
+Find out if a pointer belongs to a buf_block_t. It can be a pointer to
+the buf_block_t itself or a member of it
+@return TRUE if ptr belongs to a buf_block_t struct */
+UNIV_INTERN
+ibool
+buf_pointer_is_block_field(
+/*=======================*/
+ const void* ptr) /*!< in: pointer not
+ dereferenced */
+{
+ const buf_chunk_t* chunk = buf_pool->chunks;
+ const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
- return(block);
+ /* TODO: protect buf_pool->chunks with a mutex (it will
+ currently remain constant after buf_pool_init()) */
+ while (chunk < echunk) {
+ if (ptr >= (void *)chunk->blocks
+ && ptr < (void *)(chunk->blocks + chunk->size)) {
+
+ return(TRUE);
+ }
+
+ chunk++;
+ }
+
+ return(FALSE);
}
-/************************************************************************
-This is the general function used to get access to a database page. */
+/********************************************************************//**
+Find out if a buffer block was created by buf_chunk_init().
+@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
+static
+ibool
+buf_block_is_uncompressed(
+/*======================*/
+ const buf_block_t* block) /*!< in: pointer to block,
+ not dereferenced */
+{
+ ut_ad(buf_pool_mutex_own());
+
+ if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
+ /* The pointer should be aligned. */
+ return(FALSE);
+ }
+
+ return(buf_pointer_is_block_field((void *)block));
+}
-buf_frame_t*
+/********************************************************************//**
+This is the general function used to get access to a database page.
+@return pointer to the block or NULL */
+UNIV_INTERN
+buf_block_t*
buf_page_get_gen(
/*=============*/
- /* out: pointer to the frame or NULL */
- ulint space, /* in: space id */
- ulint offset, /* in: page number */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_frame_t* guess, /* in: guessed frame or NULL */
- ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr) /* in: mini-transaction */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint offset, /*!< in: page number */
+ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+ buf_block_t* guess, /*!< in: guessed block or NULL */
+ ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
+ BUF_GET_NO_LATCH */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mini-transaction */
{
buf_block_t* block;
- ibool accessed;
+ unsigned access_time;
ulint fix_type;
- ibool success;
ibool must_read;
ut_ad(mtr);
@@ -1167,244 +2041,367 @@ buf_page_get_gen(
|| (rw_latch == RW_NO_LATCH));
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
- || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
+ || (mode == BUF_GET_NO_LATCH));
+ ut_ad(zip_size == fil_space_get_zip_size(space));
+ ut_ad(ut_is_2pow(zip_size));
#ifndef UNIV_LOG_DEBUG
- ut_ad(!ibuf_inside() || ibuf_page(space, offset));
+ ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
#endif
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
loop:
- block = NULL;
- mutex_enter_fast(&(buf_pool->mutex));
-
- if (guess) {
- block = buf_block_align(guess);
+ block = guess;
+ buf_pool_mutex_enter();
- if ((offset != block->offset) || (space != block->space)
- || (block->state != BUF_BLOCK_FILE_PAGE)) {
-
- block = NULL;
+ if (block) {
+ /* If the guess is a compressed page descriptor that
+ has been allocated by buf_buddy_alloc(), it may have
+ been invalidated by buf_buddy_relocate(). In that
+ case, block could point to something that happens to
+ contain the expected bits in block->page. Similarly,
+ the guess may be pointing to a buffer pool chunk that
+ has been released when resizing the buffer pool. */
+
+ if (!buf_block_is_uncompressed(block)
+ || offset != block->page.offset
+ || space != block->page.space
+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+
+ block = guess = NULL;
+ } else {
+ ut_ad(!block->page.in_zip_hash);
+ ut_ad(block->page.in_page_hash);
}
}
if (block == NULL) {
- block = buf_page_hash_get(space, offset);
+ block = (buf_block_t*) buf_page_hash_get(space, offset);
}
+loop2:
if (block == NULL) {
/* Page not in buf_pool: needs to be read from file */
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
if (mode == BUF_GET_IF_IN_POOL) {
return(NULL);
}
- buf_read_page(space, offset);
+ buf_read_page(space, zip_size, offset);
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 37 == 0) {
- ut_ad(buf_validate());
- }
-#endif
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 37 || buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
goto loop;
}
- mutex_enter(&block->mutex);
+ ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
- must_read = FALSE;
+ if (must_read && mode == BUF_GET_IF_IN_POOL) {
+ /* The page is only being read to buffer */
+ buf_pool_mutex_exit();
- if (block->io_fix == BUF_IO_READ) {
+ return(NULL);
+ }
- must_read = TRUE;
+ switch (buf_block_get_state(block)) {
+ buf_page_t* bpage;
+ ibool success;
- if (mode == BUF_GET_IF_IN_POOL) {
- /* The page is only being read to buffer */
- mutex_exit(&buf_pool->mutex);
- mutex_exit(&block->mutex);
+ case BUF_BLOCK_FILE_PAGE:
+ break;
- return(NULL);
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ bpage = &block->page;
+ /* Protect bpage->buf_fix_count. */
+ mutex_enter(&buf_pool_zip_mutex);
+
+ if (bpage->buf_fix_count
+ || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ /* This condition often occurs when the buffer
+ is not buffer-fixed, but I/O-fixed by
+ buf_page_init_for_read(). */
+ mutex_exit(&buf_pool_zip_mutex);
+wait_until_unfixed:
+ /* The block is buffer-fixed or I/O-fixed.
+ Try again later. */
+ buf_pool_mutex_exit();
+ os_thread_sleep(WAIT_FOR_READ);
+
+ goto loop;
}
- }
- /* If AWE is enabled and the page is not mapped to a frame, then
- map it */
+ /* Allocate an uncompressed page. */
+ buf_pool_mutex_exit();
+ mutex_exit(&buf_pool_zip_mutex);
- if (block->frame == NULL) {
- ut_a(srv_use_awe);
+ block = buf_LRU_get_free_block(0);
+ ut_a(block);
- /* We set second parameter TRUE because the block is in the
- LRU list and we must put it to awe_LRU_free_mapped list once
- mapped to a frame */
+ buf_pool_mutex_enter();
+ mutex_enter(&block->mutex);
- buf_awe_map_page_to_frame(block, TRUE);
- }
+ {
+ buf_page_t* hash_bpage
+ = buf_page_hash_get(space, offset);
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, file, line);
-#else
- buf_block_buf_fix_inc(block);
-#endif
- mutex_exit(&buf_pool->mutex);
+ if (UNIV_UNLIKELY(bpage != hash_bpage)) {
+ /* The buf_pool->page_hash was modified
+ while buf_pool_mutex was released.
+ Free the block that was allocated. */
- /* Check if this is the first access to the page */
+ buf_LRU_block_free_non_file_page(block);
+ mutex_exit(&block->mutex);
- accessed = block->accessed;
+ block = (buf_block_t*) hash_bpage;
+ goto loop2;
+ }
+ }
- block->accessed = TRUE;
+ if (UNIV_UNLIKELY
+ (bpage->buf_fix_count
+ || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
- mutex_exit(&block->mutex);
+ /* The block was buffer-fixed or I/O-fixed
+ while buf_pool_mutex was not held by this thread.
+ Free the block that was allocated and try again.
+ This should be extremely unlikely. */
- buf_block_make_young(block);
+ buf_LRU_block_free_non_file_page(block);
+ mutex_exit(&block->mutex);
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->file_page_was_freed == FALSE);
-#endif
+ goto wait_until_unfixed;
+ }
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
+ /* Move the compressed page from bpage to block,
+ and uncompress it. */
- if (buf_dbg_counter % 5771 == 0) {
- ut_ad(buf_validate());
- }
-#endif
- ut_ad(block->buf_fix_count > 0);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+ mutex_enter(&buf_pool_zip_mutex);
- if (mode == BUF_GET_NOWAIT) {
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_func_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_S_FIX;
+ buf_relocate(bpage, &block->page);
+ buf_block_init_low(block);
+ block->lock_hash_val = lock_rec_hash(space, offset);
+
+ UNIV_MEM_DESC(&block->page.zip.data,
+ page_zip_get_size(&block->page.zip), block);
+
+ if (buf_page_get_state(&block->page)
+ == BUF_BLOCK_ZIP_PAGE) {
+ UT_LIST_REMOVE(list, buf_pool->zip_clean,
+ &block->page);
+ ut_ad(!block->page.in_flush_list);
} else {
- ut_ad(rw_latch == RW_X_LATCH);
- success = rw_lock_x_lock_func_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_X_FIX;
+ /* Relocate buf_pool->flush_list. */
+ buf_page_t* b;
+
+ b = UT_LIST_GET_PREV(list, &block->page);
+ ut_ad(block->page.in_flush_list);
+ UT_LIST_REMOVE(list, buf_pool->flush_list,
+ &block->page);
+
+ if (b) {
+ UT_LIST_INSERT_AFTER(
+ list, buf_pool->flush_list, b,
+ &block->page);
+ } else {
+ UT_LIST_ADD_FIRST(
+ list, buf_pool->flush_list,
+ &block->page);
+ }
}
- if (!success) {
- mutex_enter(&block->mutex);
+ /* Buffer-fix, I/O-fix, and X-latch the block
+ for the duration of the decompression.
+ Also add the block to the unzip_LRU list. */
+ block->page.state = BUF_BLOCK_FILE_PAGE;
- block->buf_fix_count--;
+ /* Insert at the front of unzip_LRU list */
+ buf_unzip_LRU_add_block(block, FALSE);
- mutex_exit(&block->mutex);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
+ block->page.buf_fix_count = 1;
+ buf_block_set_io_fix(block, BUF_IO_READ);
+ rw_lock_x_lock(&block->lock);
+ mutex_exit(&block->mutex);
+ mutex_exit(&buf_pool_zip_mutex);
+ buf_pool->n_pend_unzip++;
+
+ buf_buddy_free(bpage, sizeof *bpage);
+
+ buf_pool_mutex_exit();
+
+ /* Decompress the page and apply buffered operations
+ while not holding buf_pool_mutex or block->mutex. */
+ success = buf_zip_decompress(block, srv_use_checksums);
+ if (UNIV_LIKELY(success)) {
+ ibuf_merge_or_delete_for_page(block, space, offset,
+ zip_size, TRUE);
+ }
+
+ /* Unfix and unlatch the block. */
+ buf_pool_mutex_enter();
+ mutex_enter(&block->mutex);
+ block->page.buf_fix_count--;
+ buf_block_set_io_fix(block, BUF_IO_NONE);
+ mutex_exit(&block->mutex);
+ buf_pool->n_pend_unzip--;
+ rw_lock_x_unlock(&block->lock);
+
+ if (UNIV_UNLIKELY(!success)) {
+
+ buf_pool_mutex_exit();
return(NULL);
}
- } else if (rw_latch == RW_NO_LATCH) {
+ break;
+
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ }
+
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+ mutex_enter(&block->mutex);
+ UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
+
+ buf_block_buf_fix_inc(block, file, line);
+
+ mutex_exit(&block->mutex);
+
+ /* Check if this is the first access to the page */
+
+ access_time = buf_page_is_accessed(&block->page);
+
+ buf_pool_mutex_exit();
+
+ buf_page_set_accessed_make_young(&block->page, access_time);
+
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+ ut_a(!block->page.file_page_was_freed);
+#endif
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 5771 || buf_validate());
+ ut_a(block->page.buf_fix_count > 0);
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+ switch (rw_latch) {
+ case RW_NO_LATCH:
if (must_read) {
/* Let us wait until the read operation
completes */
for (;;) {
- mutex_enter(&block->mutex);
+ enum buf_io_fix io_fix;
- if (block->io_fix == BUF_IO_READ) {
+ mutex_enter(&block->mutex);
+ io_fix = buf_block_get_io_fix(block);
+ mutex_exit(&block->mutex);
- mutex_exit(&block->mutex);
+ if (io_fix == BUF_IO_READ) {
os_thread_sleep(WAIT_FOR_READ);
} else {
-
- mutex_exit(&block->mutex);
-
break;
}
}
}
fix_type = MTR_MEMO_BUF_FIX;
- } else if (rw_latch == RW_S_LATCH) {
+ break;
+ case RW_S_LATCH:
rw_lock_s_lock_func(&(block->lock), 0, file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
+ break;
+
+ default:
+ ut_ad(rw_latch == RW_X_LATCH);
rw_lock_x_lock_func(&(block->lock), 0, file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
+ break;
}
mtr_memo_push(mtr, block, fix_type);
- if (!accessed) {
+ if (!access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
- buf_read_ahead_linear(space, offset);
+ buf_read_ahead_linear(space, zip_size, offset);
}
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(buf_block_get_space(block),
+ buf_block_get_page_no(block)) == 0);
#endif
- return(block->frame);
+ return(block);
}
-/************************************************************************
+/********************************************************************//**
This is the general function used to get optimistic access to a database
-page. */
-
+page.
+@return TRUE if success */
+UNIV_INTERN
ibool
buf_page_optimistic_get_func(
/*=========================*/
- /* out: TRUE if success */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /* in: guessed buffer block */
- buf_frame_t* guess, /* in: guessed frame; note that AWE may move
- frames */
- dulint modify_clock,/* in: modify clock value if mode is
+ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+ buf_block_t* block, /*!< in: guessed buffer block */
+ ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
..._GUESS_ON_CLOCK */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr) /* in: mini-transaction */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mini-transaction */
{
- ibool accessed;
+ unsigned access_time;
ibool success;
ulint fix_type;
ut_ad(mtr && block);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
- /* If AWE is used, block may have a different frame now, e.g., NULL */
-
mutex_enter(&block->mutex);
- if (UNIV_UNLIKELY(block->state != BUF_BLOCK_FILE_PAGE)
- || UNIV_UNLIKELY(block->frame != guess)) {
+ if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
mutex_exit(&block->mutex);
return(FALSE);
}
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, file, line);
-#else
- buf_block_buf_fix_inc(block);
-#endif
- accessed = block->accessed;
- block->accessed = TRUE;
+ buf_block_buf_fix_inc(block, file, line);
mutex_exit(&block->mutex);
- buf_block_make_young(block);
+ /* Check if this is the first access to the page.
+ We do a dirty read on purpose, to avoid mutex contention.
+ This field is only used for heuristic purposes; it does not
+ affect correctness. */
- /* Check if this is the first access to the page */
+ access_time = buf_page_is_accessed(&block->page);
+ buf_page_set_accessed_make_young(&block->page, access_time);
- ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
+ ut_ad(!ibuf_inside()
+ || ibuf_page(buf_block_get_space(block),
+ buf_block_get_zip_size(block),
+ buf_block_get_page_no(block), NULL));
if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_func_nowait(&(block->lock),
- file, line);
+ success = rw_lock_s_lock_nowait(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
success = rw_lock_x_lock_func_nowait(&(block->lock),
@@ -1414,21 +2411,15 @@ buf_page_optimistic_get_func(
if (UNIV_UNLIKELY(!success)) {
mutex_enter(&block->mutex);
-
- block->buf_fix_count--;
-
+ buf_block_buf_fix_dec(block);
mutex_exit(&block->mutex);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
return(FALSE);
}
- if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock, block->modify_clock))) {
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(block->frame, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+ if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
if (rw_latch == RW_S_LATCH) {
rw_lock_s_unlock(&(block->lock));
} else {
@@ -1436,76 +2427,66 @@ buf_page_optimistic_get_func(
}
mutex_enter(&block->mutex);
-
- block->buf_fix_count--;
-
+ buf_block_buf_fix_dec(block);
mutex_exit(&block->mutex);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
return(FALSE);
}
mtr_memo_push(mtr, block, fix_type);
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 5771 == 0) {
- ut_ad(buf_validate());
- }
-#endif
- ut_ad(block->buf_fix_count > 0);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 5771 || buf_validate());
+ ut_a(block->page.buf_fix_count > 0);
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->file_page_was_freed == FALSE);
+ ut_a(block->page.file_page_was_freed == FALSE);
#endif
- if (UNIV_UNLIKELY(!accessed)) {
+ if (UNIV_UNLIKELY(!access_time)) {
/* In the case of a first access, try to apply linear
read-ahead */
- buf_read_ahead_linear(buf_frame_get_space_id(guess),
- buf_frame_get_page_no(guess));
+ buf_read_ahead_linear(buf_block_get_space(block),
+ buf_block_get_zip_size(block),
+ buf_block_get_page_no(block));
}
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(buf_block_get_space(block),
+ buf_block_get_page_no(block)) == 0);
#endif
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
return(TRUE);
}
-/************************************************************************
+/********************************************************************//**
This is used to get access to a known database page, when no waiting can be
done. For example, if a search in an adaptive hash index leads us to this
-frame. */
-
+frame.
+@return TRUE if success */
+UNIV_INTERN
ibool
buf_page_get_known_nowait(
/*======================*/
- /* out: TRUE if success */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_frame_t* guess, /* in: the known page frame */
- ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr) /* in: mini-transaction */
+ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+ buf_block_t* block, /*!< in: the known page */
+ ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mini-transaction */
{
- buf_block_t* block;
ibool success;
ulint fix_type;
ut_ad(mtr);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
- block = buf_block_align(guess);
-
mutex_enter(&block->mutex);
- if (block->state == BUF_BLOCK_REMOVE_HASH) {
+ if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
/* Another thread is just freeing the block from the LRU list
of the buffer pool: do not try to access this page; this
attempt to access the page can only come through the hash
@@ -1518,24 +2499,33 @@ buf_page_get_known_nowait(
return(FALSE);
}
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+ buf_block_buf_fix_inc(block, file, line);
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, file, line);
-#else
- buf_block_buf_fix_inc(block);
-#endif
mutex_exit(&block->mutex);
- if (mode == BUF_MAKE_YOUNG) {
- buf_block_make_young(block);
+ if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
+ buf_pool_mutex_enter();
+ buf_LRU_make_block_young(&block->page);
+ buf_pool_mutex_exit();
+ } else if (!buf_page_is_accessed(&block->page)) {
+ /* Above, we do a dirty read on purpose, to avoid
+ mutex contention. The field buf_page_t::access_time
+ is only used for heuristic purposes. Writes to the
+ field must be protected by mutex, however. */
+ ulint time_ms = ut_time_ms();
+
+ buf_pool_mutex_enter();
+ buf_page_set_accessed(&block->page, time_ms);
+ buf_pool_mutex_exit();
}
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_func_nowait(&(block->lock),
- file, line);
+ success = rw_lock_s_lock_nowait(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
success = rw_lock_x_lock_func_nowait(&(block->lock),
@@ -1545,107 +2535,154 @@ buf_page_get_known_nowait(
if (!success) {
mutex_enter(&block->mutex);
-
- block->buf_fix_count--;
-
+ buf_block_buf_fix_dec(block);
mutex_exit(&block->mutex);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
-
return(FALSE);
}
mtr_memo_push(mtr, block, fix_type);
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 5771 == 0) {
- ut_ad(buf_validate());
- }
-#endif
- ut_ad(block->buf_fix_count > 0);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 5771 || buf_validate());
+ ut_a(block->page.buf_fix_count > 0);
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->file_page_was_freed == FALSE);
+ ut_a(block->page.file_page_was_freed == FALSE);
#endif
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a((mode == BUF_KEEP_OLD)
- || (ibuf_count_get(block->space, block->offset) == 0));
+ || (ibuf_count_get(buf_block_get_space(block),
+ buf_block_get_page_no(block)) == 0));
#endif
- buf_pool->n_page_gets++;
+ buf_pool->stat.n_page_gets++;
return(TRUE);
}
-/************************************************************************
-Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
-
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
- in units of a page */
- buf_block_t* block) /* in: block to init */
+/*******************************************************************//**
+Given a tablespace id and page number tries to get that page. If the
+page is not in the buffer pool it is not loaded and NULL is returned.
+Suitable for using when holding the kernel mutex.
+@return pointer to a page or NULL */
+UNIV_INTERN
+const buf_block_t*
+buf_page_try_get_func(
+/*==================*/
+ ulint space_id,/*!< in: tablespace id */
+ ulint page_no,/*!< in: page number */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mini-transaction */
{
- /* Set the state of the block */
- block->magic_n = BUF_BLOCK_MAGIC_N;
+ buf_block_t* block;
+ ibool success;
+ ulint fix_type;
- block->state = BUF_BLOCK_FILE_PAGE;
- block->space = space;
- block->offset = offset;
+ buf_pool_mutex_enter();
+ block = buf_block_hash_get(space_id, page_no);
- block->lock_hash_val = 0;
+ if (!block) {
+ buf_pool_mutex_exit();
+ return(NULL);
+ }
- block->freed_page_clock = 0;
+ mutex_enter(&block->mutex);
+ buf_pool_mutex_exit();
- block->newest_modification = ut_dulint_zero;
- block->oldest_modification = ut_dulint_zero;
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_a(buf_block_get_space(block) == space_id);
+ ut_a(buf_block_get_page_no(block) == page_no);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- block->accessed = FALSE;
- block->buf_fix_count = 0;
- block->io_fix = 0;
+ buf_block_buf_fix_inc(block, file, line);
+ mutex_exit(&block->mutex);
- block->n_hash_helps = 0;
- block->is_hashed = FALSE;
- block->n_fields = 1;
- block->n_bytes = 0;
- block->left_side = TRUE;
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ success = rw_lock_s_lock_nowait(&block->lock, file, line);
+
+ if (!success) {
+ /* Let us try to get an X-latch. If the current thread
+ is holding an X-latch on the page, we cannot get an
+ S-latch. */
+
+ fix_type = MTR_MEMO_PAGE_X_FIX;
+ success = rw_lock_x_lock_func_nowait(&block->lock,
+ file, line);
+ }
+
+ if (!success) {
+ mutex_enter(&block->mutex);
+ buf_block_buf_fix_dec(block);
+ mutex_exit(&block->mutex);
- block->file_page_was_freed = FALSE;
+ return(NULL);
+ }
+
+ mtr_memo_push(mtr, block, fix_type);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 5771 || buf_validate());
+ ut_a(block->page.buf_fix_count > 0);
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+ ut_a(block->page.file_page_was_freed == FALSE);
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+ buf_pool->stat.n_page_gets++;
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(buf_block_get_space(block),
+ buf_block_get_page_no(block)) == 0);
+#endif
+
+ return(block);
}
-/************************************************************************
+/********************************************************************//**
+Initialize some fields of a control block. */
+UNIV_INLINE
+void
+buf_page_init_low(
+/*==============*/
+ buf_page_t* bpage) /*!< in: block to init */
+{
+ bpage->flush_type = BUF_FLUSH_LRU;
+ bpage->io_fix = BUF_IO_NONE;
+ bpage->buf_fix_count = 0;
+ bpage->freed_page_clock = 0;
+ bpage->access_time = 0;
+ bpage->newest_modification = 0;
+ bpage->oldest_modification = 0;
+ HASH_INVALIDATE(bpage, hash);
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+ bpage->file_page_was_freed = FALSE;
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+}
+
+/********************************************************************//**
Inits a page to the buffer buf_pool. */
static
void
buf_page_init(
/*==========*/
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page within space
in units of a page */
- buf_block_t* block) /* in: block to init */
+ buf_block_t* block) /*!< in: block to init */
{
+ buf_page_t* hash_page;
- ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(&(block->mutex)));
- ut_a(block->state != BUF_BLOCK_FILE_PAGE);
+ ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
/* Set the state of the block */
- block->magic_n = BUF_BLOCK_MAGIC_N;
-
- block->state = BUF_BLOCK_FILE_PAGE;
- block->space = space;
- block->offset = offset;
-
- block->check_index_page_at_flush = FALSE;
- block->index = NULL;
-
- block->lock_hash_val = lock_rec_hash(space, offset);
+ buf_block_set_file_page(block, space, offset);
#ifdef UNIV_DEBUG_VALGRIND
if (!space) {
@@ -1656,45 +2693,42 @@ buf_page_init(
}
#endif /* UNIV_DEBUG_VALGRIND */
+ buf_block_init_low(block);
+
+ block->lock_hash_val = lock_rec_hash(space, offset);
+
/* Insert into the hash table of file pages */
- if (buf_page_hash_get(space, offset)) {
+ hash_page = buf_page_hash_get(space, offset);
+
+ if (UNIV_LIKELY_NULL(hash_page)) {
fprintf(stderr,
"InnoDB: Error: page %lu %lu already found"
- " in the hash table\n",
+ " in the hash table: %p, %p\n",
(ulong) space,
- (ulong) offset);
-#ifdef UNIV_DEBUG
+ (ulong) offset,
+ (const void*) hash_page, (const void*) block);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ mutex_exit(&block->mutex);
+ buf_pool_mutex_exit();
buf_print();
buf_LRU_print();
buf_validate();
buf_LRU_validate();
-#endif /* UNIV_DEBUG */
- ut_a(0);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+ ut_error;
}
- HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
- buf_page_address_fold(space, offset), block);
-
- block->freed_page_clock = 0;
+ buf_page_init_low(&block->page);
- block->newest_modification = ut_dulint_zero;
- block->oldest_modification = ut_dulint_zero;
-
- block->accessed = FALSE;
- block->buf_fix_count = 0;
- block->io_fix = 0;
-
- block->n_hash_helps = 0;
- block->is_hashed = FALSE;
- block->n_fields = 1;
- block->n_bytes = 0;
- block->left_side = TRUE;
-
- block->file_page_was_freed = FALSE;
+ ut_ad(!block->page.in_zip_hash);
+ ut_ad(!block->page.in_page_hash);
+ ut_d(block->page.in_page_hash = TRUE);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ buf_page_address_fold(space, offset), &block->page);
}
-/************************************************************************
+/********************************************************************//**
Function which inits a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
@@ -1702,24 +2736,27 @@ Function which inits a page for read to the buffer buf_pool. If the page is
then this function does nothing.
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later. This is one of the functions which perform the
-state transition NOT_USED => FILE_PAGE to a block (the other is
-buf_page_create). */
-
-buf_block_t*
+and the lock released later.
+@return pointer to the block or NULL */
+UNIV_INTERN
+buf_page_t*
buf_page_init_for_read(
/*===================*/
- /* out: pointer to the block or NULL */
- ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /* in: space id */
- ib_longlong tablespace_version,/* in: prevents reading from a wrong
+ ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+ ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size, or 0 */
+ ibool unzip, /*!< in: TRUE=request uncompressed page */
+ ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong
version of the tablespace in case we have done
DISCARD + IMPORT */
- ulint offset) /* in: page number */
+ ulint offset) /*!< in: page number */
{
buf_block_t* block;
+ buf_page_t* bpage;
mtr_t mtr;
+ ibool lru = FALSE;
+ void* data;
ut_ad(buf_pool);
@@ -1728,12 +2765,13 @@ buf_page_init_for_read(
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
/* It is a read-ahead within an ibuf routine */
- ut_ad(!ibuf_bitmap_page(offset));
+ ut_ad(!ibuf_bitmap_page(zip_size, offset));
ut_ad(ibuf_inside());
mtr_start(&mtr);
- if (!ibuf_page_low(space, offset, &mtr)) {
+ if (!recv_no_ibuf_operations
+ && !ibuf_page(space, zip_size, offset, &mtr)) {
mtr_commit(&mtr);
@@ -1743,111 +2781,202 @@ buf_page_init_for_read(
ut_ad(mode == BUF_READ_ANY_PAGE);
}
- block = buf_block_alloc();
+ if (zip_size && UNIV_LIKELY(!unzip)
+ && UNIV_LIKELY(!recv_recovery_is_on())) {
+ block = NULL;
+ } else {
+ block = buf_LRU_get_free_block(0);
+ ut_ad(block);
+ }
- ut_a(block);
+ buf_pool_mutex_enter();
- mutex_enter(&(buf_pool->mutex));
- mutex_enter(&block->mutex);
+ if (buf_page_hash_get(space, offset)) {
+ /* The page is already in the buffer pool. */
+err_exit:
+ if (block) {
+ mutex_enter(&block->mutex);
+ buf_LRU_block_free_non_file_page(block);
+ mutex_exit(&block->mutex);
+ }
+
+ bpage = NULL;
+ goto func_exit;
+ }
if (fil_tablespace_deleted_or_being_deleted_in_mem(
space, tablespace_version)) {
+ /* The page belongs to a space which has been
+ deleted or is being deleted. */
*err = DB_TABLESPACE_DELETED;
- }
- if (*err == DB_TABLESPACE_DELETED
- || NULL != buf_page_hash_get(space, offset)) {
+ goto err_exit;
+ }
- /* The page belongs to a space which has been
- deleted or is being deleted, or the page is
- already in buf_pool, return */
+ if (block) {
+ bpage = &block->page;
+ mutex_enter(&block->mutex);
+ buf_page_init(space, offset, block);
+
+ /* The block must be put to the LRU list, to the old blocks */
+ buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+
+ /* We set a pass-type x-lock on the frame because then
+ the same thread which called for the read operation
+ (and is running now at this point of code) can wait
+ for the read to complete by waiting for the x-lock on
+ the frame; if the x-lock were recursive, the same
+ thread would illegally get the x-lock before the page
+ read is completed. The x-lock is cleared by the
+ io-handler thread. */
+
+ rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
+ buf_page_set_io_fix(bpage, BUF_IO_READ);
+
+ if (UNIV_UNLIKELY(zip_size)) {
+ page_zip_set_size(&block->page.zip, zip_size);
+
+ /* buf_pool_mutex may be released and
+ reacquired by buf_buddy_alloc(). Thus, we
+ must release block->mutex in order not to
+ break the latching order in the reacquisition
+ of buf_pool_mutex. We also must defer this
+ operation until after the block descriptor has
+ been added to buf_pool->LRU and
+ buf_pool->page_hash. */
+ mutex_exit(&block->mutex);
+ data = buf_buddy_alloc(zip_size, &lru);
+ mutex_enter(&block->mutex);
+ block->page.zip.data = data;
+
+ /* To maintain the invariant
+ block->in_unzip_LRU_list
+ == buf_page_belongs_to_unzip_LRU(&block->page)
+ we have to add this block to unzip_LRU
+ after block->page.zip.data is set. */
+ ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
+ buf_unzip_LRU_add_block(block, TRUE);
+ }
mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- buf_block_free(block);
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-
- mtr_commit(&mtr);
+ } else {
+ /* Defer buf_buddy_alloc() until after the block has
+ been found not to exist. The buf_buddy_alloc() and
+ buf_buddy_free() calls may be expensive because of
+ buf_buddy_relocate(). */
+
+ /* The compressed page must be allocated before the
+ control block (bpage), in order to avoid the
+ invocation of buf_buddy_relocate_block() on
+ uninitialized data. */
+ data = buf_buddy_alloc(zip_size, &lru);
+ bpage = buf_buddy_alloc(sizeof *bpage, &lru);
+
+ /* If buf_buddy_alloc() allocated storage from the LRU list,
+ it released and reacquired buf_pool_mutex. Thus, we must
+ check the page_hash again, as it may have been modified. */
+ if (UNIV_UNLIKELY(lru)
+ && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
+
+ /* The block was added by some other thread. */
+ buf_buddy_free(bpage, sizeof *bpage);
+ buf_buddy_free(data, zip_size);
+
+ bpage = NULL;
+ goto func_exit;
}
- return(NULL);
- }
+ page_zip_des_init(&bpage->zip);
+ page_zip_set_size(&bpage->zip, zip_size);
+ bpage->zip.data = data;
- ut_ad(block);
-
- buf_page_init(space, offset, block);
+ mutex_enter(&buf_pool_zip_mutex);
+ UNIV_MEM_DESC(bpage->zip.data,
+ page_zip_get_size(&bpage->zip), bpage);
+ buf_page_init_low(bpage);
+ bpage->state = BUF_BLOCK_ZIP_PAGE;
+ bpage->space = space;
+ bpage->offset = offset;
- /* The block must be put to the LRU list, to the old blocks */
+#ifdef UNIV_DEBUG
+ bpage->in_page_hash = FALSE;
+ bpage->in_zip_hash = FALSE;
+ bpage->in_flush_list = FALSE;
+ bpage->in_free_list = FALSE;
+ bpage->in_LRU_list = FALSE;
+#endif /* UNIV_DEBUG */
- buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */
+ ut_d(bpage->in_page_hash = TRUE);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ buf_page_address_fold(space, offset), bpage);
- block->io_fix = BUF_IO_READ;
+ /* The block must be put to the LRU list, to the old blocks */
+ buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+ buf_LRU_insert_zip_clean(bpage);
- buf_pool->n_pend_reads++;
+ buf_page_set_io_fix(bpage, BUF_IO_READ);
- /* We set a pass-type x-lock on the frame because then the same
- thread which called for the read operation (and is running now at
- this point of code) can wait for the read to complete by waiting
- for the x-lock on the frame; if the x-lock were recursive, the
- same thread would illegally get the x-lock before the page read
- is completed. The x-lock is cleared by the io-handler thread. */
-
- rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
+ mutex_exit(&buf_pool_zip_mutex);
+ }
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
+ buf_pool->n_pend_reads++;
+func_exit:
+ buf_pool_mutex_exit();
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
mtr_commit(&mtr);
}
- return(block);
+ ut_ad(!bpage || buf_page_in_file(bpage));
+ return(bpage);
}
-/************************************************************************
+/********************************************************************//**
Initializes a page to the buffer buf_pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_init_for_read above). */
-
-buf_frame_t*
+FILE_PAGE (the other is buf_page_get_gen).
+@return pointer to the block, page bufferfixed */
+UNIV_INTERN
+buf_block_t*
buf_page_create(
/*============*/
- /* out: pointer to the frame, page bufferfixed */
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space in units of
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page within space in units of
a page */
- mtr_t* mtr) /* in: mini-transaction handle */
+ ulint zip_size,/*!< in: compressed page size, or 0 */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
buf_frame_t* frame;
buf_block_t* block;
buf_block_t* free_block = NULL;
+ ulint time_ms = ut_time_ms();
ut_ad(mtr);
+ ut_ad(space || !zip_size);
- free_block = buf_LRU_get_free_block();
+ free_block = buf_LRU_get_free_block(0);
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
- block = buf_page_hash_get(space, offset);
+ block = (buf_block_t*) buf_page_hash_get(space, offset);
- if (block != NULL) {
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
+ if (block && buf_page_in_file(&block->page)) {
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(space, offset) == 0);
#endif
- block->file_page_was_freed = FALSE;
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+ block->page.file_page_was_freed = FALSE;
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
/* Page can be found in buf_pool */
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
buf_block_free(free_block);
- frame = buf_page_get_with_no_latch(space, offset, mtr);
-
- return(frame);
+ return(buf_page_get_with_no_latch(space, zip_size,
+ offset, mtr));
}
/* If we get here, the page was not in buf_pool: init it there */
@@ -1866,27 +2995,58 @@ buf_page_create(
buf_page_init(space, offset, block);
/* The block must be put to the LRU list */
- buf_LRU_add_block(block, FALSE);
+ buf_LRU_add_block(&block->page, FALSE);
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
-#else
- buf_block_buf_fix_inc(block);
-#endif
- buf_pool->n_pages_created++;
+ buf_block_buf_fix_inc(block, __FILE__, __LINE__);
+ buf_pool->stat.n_pages_created++;
- mutex_exit(&(buf_pool->mutex));
+ if (zip_size) {
+ void* data;
+ ibool lru;
- mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+ /* Prevent race conditions during buf_buddy_alloc(),
+ which may release and reacquire buf_pool_mutex,
+ by IO-fixing and X-latching the block. */
+
+ buf_page_set_io_fix(&block->page, BUF_IO_READ);
+ rw_lock_x_lock(&block->lock);
+
+ page_zip_set_size(&block->page.zip, zip_size);
+ mutex_exit(&block->mutex);
+ /* buf_pool_mutex may be released and reacquired by
+ buf_buddy_alloc(). Thus, we must release block->mutex
+ in order not to break the latching order in
+ the reacquisition of buf_pool_mutex. We also must
+ defer this operation until after the block descriptor
+ has been added to buf_pool->LRU and buf_pool->page_hash. */
+ data = buf_buddy_alloc(zip_size, &lru);
+ mutex_enter(&block->mutex);
+ block->page.zip.data = data;
+
+ /* To maintain the invariant
+ block->in_unzip_LRU_list
+ == buf_page_belongs_to_unzip_LRU(&block->page)
+ we have to add this block to unzip_LRU after
+ block->page.zip.data is set. */
+ ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
+ buf_unzip_LRU_add_block(block, FALSE);
+
+ buf_page_set_io_fix(&block->page, BUF_IO_NONE);
+ rw_lock_x_unlock(&block->lock);
+ }
- block->accessed = TRUE;
+ buf_page_set_accessed(&block->page, time_ms);
+
+ buf_pool_mutex_exit();
+
+ mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
mutex_exit(&block->mutex);
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
- ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
+ ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
@@ -1905,68 +3065,86 @@ buf_page_create(
memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 357 == 0) {
- ut_ad(buf_validate());
- }
-#endif
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 357 || buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(buf_block_get_space(block),
+ buf_block_get_page_no(block)) == 0);
#endif
- return(frame);
+ return(block);
}
-/************************************************************************
+/********************************************************************//**
Completes an asynchronous read or write request of a file page to or from
the buffer pool. */
-
+UNIV_INTERN
void
buf_page_io_complete(
/*=================*/
- buf_block_t* block) /* in: pointer to the block in question */
+ buf_page_t* bpage) /*!< in: pointer to the block in question */
{
- ulint io_type;
+ enum buf_io_fix io_type;
+ const ibool uncompressed = (buf_page_get_state(bpage)
+ == BUF_BLOCK_FILE_PAGE);
- ut_ad(block);
+ ut_a(buf_page_in_file(bpage));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- /* We do not need protect block->io_fix here by block->mutex to read
+ /* We do not need protect io_fix here by mutex to read
it because this is the only function where we can change the value
from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
ensures that this is the only thread that handles the i/o for this
block. */
- io_type = block->io_fix;
+ io_type = buf_page_get_io_fix(bpage);
+ ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
if (io_type == BUF_IO_READ) {
+ ulint read_page_no;
+ ulint read_space_id;
+ byte* frame;
+
+ if (buf_page_get_zip_size(bpage)) {
+ frame = bpage->zip.data;
+ buf_pool->n_pend_unzip++;
+ if (uncompressed
+ && !buf_zip_decompress((buf_block_t*) bpage,
+ FALSE)) {
+
+ buf_pool->n_pend_unzip--;
+ goto corrupt;
+ }
+ buf_pool->n_pend_unzip--;
+ } else {
+ ut_a(uncompressed);
+ frame = ((buf_block_t*) bpage)->frame;
+ }
+
/* If this page is not uninitialized and not in the
doublewrite buffer, then the page number and space id
should be the same as in block. */
- ulint read_page_no = mach_read_from_4(
- block->frame + FIL_PAGE_OFFSET);
- ulint read_space_id = mach_read_from_4(
- block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
+ read_space_id = mach_read_from_4(
+ frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- if (!block->space
- && trx_doublewrite_page_inside(block->offset)) {
+ if (bpage->space == TRX_SYS_SPACE
+ && trx_doublewrite_page_inside(bpage->offset)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: reading page %lu\n"
"InnoDB: which is in the"
" doublewrite buffer!\n",
- (ulong) block->offset);
+ (ulong) bpage->offset);
} else if (!read_space_id && !read_page_no) {
/* This is likely an uninitialized page. */
- } else if ((block->space && block->space != read_space_id)
- || block->offset != read_page_no) {
+ } else if ((bpage->space
+ && bpage->space != read_space_id)
+ || bpage->offset != read_page_no) {
/* We did not compare space_id to read_space_id
- if block->space == 0, because the field on the
+ if bpage->space == 0, because the field on the
page may contain garbage in MySQL < 4.1.1,
- which only supported block->space == 0. */
+ which only supported bpage->space == 0. */
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -1975,30 +3153,31 @@ buf_page_io_complete(
"InnoDB: read in are %lu:%lu,"
" should be %lu:%lu!\n",
(ulong) read_space_id, (ulong) read_page_no,
- (ulong) block->space, (ulong) block->offset);
+ (ulong) bpage->space,
+ (ulong) bpage->offset);
}
+
/* From version 3.23.38 up we store the page checksum
to the 4 first bytes of the page end lsn field */
- if (buf_page_is_corrupted(block->frame)) {
+ if (buf_page_is_corrupted(frame,
+ buf_page_get_zip_size(bpage))) {
+corrupt:
fprintf(stderr,
"InnoDB: Database page corruption on disk"
" or a failed\n"
- "InnoDB: file read of page %lu.\n",
- (ulong) block->offset);
-
- fputs("InnoDB: You may have to recover"
- " from a backup.\n", stderr);
-
- buf_page_print(block->frame);
-
+ "InnoDB: file read of page %lu.\n"
+ "InnoDB: You may have to recover"
+ " from a backup.\n",
+ (ulong) bpage->offset);
+ buf_page_print(frame, buf_page_get_zip_size(bpage));
fprintf(stderr,
"InnoDB: Database page corruption on disk"
" or a failed\n"
- "InnoDB: file read of page %lu.\n",
- (ulong) block->offset);
- fputs("InnoDB: You may have to recover"
- " from a backup.\n", stderr);
+ "InnoDB: file read of page %lu.\n"
+ "InnoDB: You may have to recover"
+ " from a backup.\n",
+ (ulong) bpage->offset);
fputs("InnoDB: It is also possible that"
" your operating\n"
"InnoDB: system has corrupted its"
@@ -2015,9 +3194,8 @@ buf_page_io_complete(
" You can use CHECK\n"
"InnoDB: TABLE to scan your"
" table for corruption.\n"
- "InnoDB: See also"
- " http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
+ "InnoDB: See also "
+ REFMAN "forcing-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
@@ -2029,86 +3207,121 @@ buf_page_io_complete(
}
if (recv_recovery_is_on()) {
- recv_recover_page(FALSE, TRUE, block->frame,
- block->space, block->offset);
+ /* Pages must be uncompressed for crash recovery. */
+ ut_a(uncompressed);
+ recv_recover_page(TRUE, (buf_block_t*) bpage);
}
- if (!recv_no_ibuf_operations) {
+ if (uncompressed && !recv_no_ibuf_operations) {
ibuf_merge_or_delete_for_page(
- block->frame, block->space, block->offset,
+ (buf_block_t*) bpage, bpage->space,
+ bpage->offset, buf_page_get_zip_size(bpage),
TRUE);
}
}
- mutex_enter(&(buf_pool->mutex));
- mutex_enter(&block->mutex);
+ buf_pool_mutex_enter();
+ mutex_enter(buf_page_get_mutex(bpage));
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ if (io_type == BUF_IO_WRITE || uncompressed) {
+ /* For BUF_IO_READ of compressed-only blocks, the
+ buffered operations will be merged by buf_page_get_gen()
+ after the block has been uncompressed. */
+ ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+ }
#endif
/* Because this thread which does the unlocking is not the same that
did the locking, we use a pass value != 0 in unlock, which simply
removes the newest lock debug record, without checking the thread
id. */
- block->io_fix = 0;
+ buf_page_set_io_fix(bpage, BUF_IO_NONE);
- if (io_type == BUF_IO_READ) {
+ switch (io_type) {
+ case BUF_IO_READ:
/* NOTE that the call to ibuf may have moved the ownership of
the x-latch to this OS thread: do not let this confuse you in
debugging! */
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
- buf_pool->n_pages_read++;
+ buf_pool->stat.n_pages_read++;
- rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fputs("Has read ", stderr);
+ if (uncompressed) {
+ rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
+ BUF_IO_READ);
}
-#endif /* UNIV_DEBUG */
- } else {
- ut_ad(io_type == BUF_IO_WRITE);
+ break;
+
+ case BUF_IO_WRITE:
/* Write means a flush operation: call the completion
routine in the flush system */
- buf_flush_write_complete(block);
+ buf_flush_write_complete(bpage);
- rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
+ if (uncompressed) {
+ rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
+ BUF_IO_WRITE);
+ }
- buf_pool->n_pages_written++;
+ buf_pool->stat.n_pages_written++;
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fputs("Has written ", stderr);
- }
-#endif /* UNIV_DEBUG */
- }
+ break;
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
+ default:
+ ut_error;
+ }
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
- fprintf(stderr, "page space %lu page no %lu\n",
- (ulong) block->space, (ulong) block->offset);
+ fprintf(stderr, "Has %s page space %lu page no %lu\n",
+ io_type == BUF_IO_READ ? "read" : "written",
+ (ulong) buf_page_get_space(bpage),
+ (ulong) buf_page_get_page_no(bpage));
}
#endif /* UNIV_DEBUG */
+
+ mutex_exit(buf_page_get_mutex(bpage));
+ buf_pool_mutex_exit();
}
-/*************************************************************************
+/*********************************************************************//**
Invalidates the file pages in the buffer pool when an archive recovery is
completed. All the file pages buffered must be in a replaceable state when
this function is called: not latched and not modified. */
-
+UNIV_INTERN
void
buf_pool_invalidate(void)
/*=====================*/
{
- ibool freed;
+ ibool freed;
+ enum buf_flush i;
+
+ buf_pool_mutex_enter();
+
+ for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
+
+ /* As this function is called during startup and
+ during redo application phase during recovery, InnoDB
+ is single threaded (apart from IO helper threads) at
+ this stage. No new write batch can be in intialization
+ stage at this point. */
+ ut_ad(buf_pool->init_flush[i] == FALSE);
+
+ /* However, it is possible that a write batch that has
+ been posted earlier is still not complete. For buffer
+ pool invalidation to proceed we must ensure there is NO
+ write activity happening. */
+ if (buf_pool->n_flush[i] > 0) {
+ buf_pool_mutex_exit();
+ buf_flush_wait_batch_end(i);
+ buf_pool_mutex_enter();
+ }
+ }
+
+ buf_pool_mutex_exit();
ut_ad(buf_all_freed());
@@ -2118,22 +3331,33 @@ buf_pool_invalidate(void)
freed = buf_LRU_search_and_free_block(100);
}
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+ ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
- mutex_exit(&(buf_pool->mutex));
-}
+ buf_pool->freed_page_clock = 0;
+ buf_pool->LRU_old = NULL;
+ buf_pool->LRU_old_len = 0;
+ buf_pool->LRU_flush_ended = 0;
-#ifdef UNIV_DEBUG
-/*************************************************************************
-Validates the buffer buf_pool data structure. */
+ memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
+ buf_refresh_io_stats();
+ buf_pool_mutex_exit();
+}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
+Validates the buffer buf_pool data structure.
+@return TRUE */
+UNIV_INTERN
ibool
buf_validate(void)
/*==============*/
{
- buf_block_t* block;
+ buf_page_t* b;
+ buf_chunk_t* chunk;
ulint i;
ulint n_single_flush = 0;
ulint n_lru_flush = 0;
@@ -2141,69 +3365,187 @@ buf_validate(void)
ulint n_lru = 0;
ulint n_flush = 0;
ulint n_free = 0;
- ulint n_page = 0;
+ ulint n_zip = 0;
ut_ad(buf_pool);
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
- for (i = 0; i < buf_pool->curr_size; i++) {
+ chunk = buf_pool->chunks;
- block = buf_pool_get_nth_block(buf_pool, i);
+ /* Check the uncompressed blocks. */
- mutex_enter(&block->mutex);
+ for (i = buf_pool->n_chunks; i--; chunk++) {
- if (block->state == BUF_BLOCK_FILE_PAGE) {
+ ulint j;
+ buf_block_t* block = chunk->blocks;
- ut_a(buf_page_hash_get(block->space,
- block->offset) == block);
- n_page++;
+ for (j = chunk->size; j--; block++) {
-#ifdef UNIV_IBUF_DEBUG
- ut_a((block->io_fix == BUF_IO_READ)
- || ibuf_count_get(block->space, block->offset)
- == 0);
+ mutex_enter(&block->mutex);
+
+ switch (buf_block_get_state(block)) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* These should only occur on
+ zip_clean, zip_free[], or flush_list. */
+ ut_error;
+ break;
+
+ case BUF_BLOCK_FILE_PAGE:
+ ut_a(buf_page_hash_get(buf_block_get_space(
+ block),
+ buf_block_get_page_no(
+ block))
+ == &block->page);
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(buf_page_get_io_fix(&block->page)
+ == BUF_IO_READ
+ || !ibuf_count_get(buf_block_get_space(
+ block),
+ buf_block_get_page_no(
+ block)));
#endif
- if (block->io_fix == BUF_IO_WRITE) {
+ switch (buf_page_get_io_fix(&block->page)) {
+ case BUF_IO_NONE:
+ break;
- if (block->flush_type == BUF_FLUSH_LRU) {
- n_lru_flush++;
- ut_a(rw_lock_is_locked(
- &block->lock,
- RW_LOCK_SHARED));
- } else if (block->flush_type
- == BUF_FLUSH_LIST) {
- n_list_flush++;
- } else if (block->flush_type
- == BUF_FLUSH_SINGLE_PAGE) {
- n_single_flush++;
- } else {
- ut_error;
+ case BUF_IO_WRITE:
+ switch (buf_page_get_flush_type(
+ &block->page)) {
+ case BUF_FLUSH_LRU:
+ n_lru_flush++;
+ ut_a(rw_lock_is_locked(
+ &block->lock,
+ RW_LOCK_SHARED));
+ break;
+ case BUF_FLUSH_LIST:
+ n_list_flush++;
+ break;
+ case BUF_FLUSH_SINGLE_PAGE:
+ n_single_flush++;
+ break;
+ default:
+ ut_error;
+ }
+
+ break;
+
+ case BUF_IO_READ:
+
+ ut_a(rw_lock_is_locked(&block->lock,
+ RW_LOCK_EX));
+ break;
}
- } else if (block->io_fix == BUF_IO_READ) {
+ n_lru++;
- ut_a(rw_lock_is_locked(&(block->lock),
- RW_LOCK_EX));
- }
+ if (block->page.oldest_modification > 0) {
+ n_flush++;
+ }
- n_lru++;
+ break;
- if (ut_dulint_cmp(block->oldest_modification,
- ut_dulint_zero) > 0) {
- n_flush++;
+ case BUF_BLOCK_NOT_USED:
+ n_free++;
+ break;
+
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ /* do nothing */
+ break;
}
- } else if (block->state == BUF_BLOCK_NOT_USED) {
- n_free++;
+ mutex_exit(&block->mutex);
}
+ }
- mutex_exit(&block->mutex);
+ mutex_enter(&buf_pool_zip_mutex);
+
+ /* Check clean compressed-only blocks. */
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+ b = UT_LIST_GET_NEXT(list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ switch (buf_page_get_io_fix(b)) {
+ case BUF_IO_NONE:
+ /* All clean blocks should be I/O-unfixed. */
+ break;
+ case BUF_IO_READ:
+ /* In buf_LRU_free_block(), we temporarily set
+ b->io_fix = BUF_IO_READ for a newly allocated
+ control block in order to prevent
+ buf_page_get_gen() from decompressing the block. */
+ break;
+ default:
+ ut_error;
+ break;
+ }
+ ut_a(!b->oldest_modification);
+ ut_a(buf_page_hash_get(b->space, b->offset) == b);
+
+ n_lru++;
+ n_zip++;
}
- if (n_lru + n_free > buf_pool->curr_size) {
- fprintf(stderr, "n LRU %lu, n free %lu\n",
- (ulong) n_lru, (ulong) n_free);
+ /* Check dirty compressed-only blocks. */
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+ b = UT_LIST_GET_NEXT(list, b)) {
+ ut_ad(b->in_flush_list);
+
+ switch (buf_page_get_state(b)) {
+ case BUF_BLOCK_ZIP_DIRTY:
+ ut_a(b->oldest_modification);
+ n_lru++;
+ n_flush++;
+ n_zip++;
+ switch (buf_page_get_io_fix(b)) {
+ case BUF_IO_NONE:
+ case BUF_IO_READ:
+ break;
+
+ case BUF_IO_WRITE:
+ switch (buf_page_get_flush_type(b)) {
+ case BUF_FLUSH_LRU:
+ n_lru_flush++;
+ break;
+ case BUF_FLUSH_LIST:
+ n_list_flush++;
+ break;
+ case BUF_FLUSH_SINGLE_PAGE:
+ n_single_flush++;
+ break;
+ default:
+ ut_error;
+ }
+ break;
+ }
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ /* uncompressed page */
+ break;
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ }
+ ut_a(buf_page_hash_get(b->space, b->offset) == b);
+ }
+
+ mutex_exit(&buf_pool_zip_mutex);
+
+ if (n_lru + n_free > buf_pool->curr_size + n_zip) {
+ fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
+ (ulong) n_lru, (ulong) n_free,
+ (ulong) buf_pool->curr_size, (ulong) n_zip);
ut_error;
}
@@ -2220,17 +3562,19 @@ buf_validate(void)
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
ut_a(buf_LRU_validate());
ut_a(buf_flush_validate());
return(TRUE);
}
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-/*************************************************************************
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
Prints info of the buffer buf_pool data structure. */
-
+UNIV_INTERN
void
buf_print(void)
/*===========*/
@@ -2242,7 +3586,7 @@ buf_print(void)
ulint j;
dulint id;
ulint n_found;
- buf_frame_t* frame;
+ buf_chunk_t* chunk;
dict_index_t* index;
ut_ad(buf_pool);
@@ -2252,60 +3596,74 @@ buf_print(void)
index_ids = mem_alloc(sizeof(dulint) * size);
counts = mem_alloc(sizeof(ulint) * size);
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
fprintf(stderr,
"buf_pool size %lu\n"
"database pages %lu\n"
"free pages %lu\n"
"modified database pages %lu\n"
+ "n pending decompressions %lu\n"
"n pending reads %lu\n"
"n pending flush LRU %lu list %lu single page %lu\n"
+ "pages made young %lu, not young %lu\n"
"pages read %lu, created %lu, written %lu\n",
(ulong) size,
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
(ulong) UT_LIST_GET_LEN(buf_pool->free),
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
+ (ulong) buf_pool->n_pend_unzip,
(ulong) buf_pool->n_pend_reads,
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
- (ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
- (ulong) buf_pool->n_pages_written);
+ (ulong) buf_pool->stat.n_pages_made_young,
+ (ulong) buf_pool->stat.n_pages_not_made_young,
+ (ulong) buf_pool->stat.n_pages_read,
+ (ulong) buf_pool->stat.n_pages_created,
+ (ulong) buf_pool->stat.n_pages_written);
/* Count the number of blocks belonging to each index in the buffer */
n_found = 0;
- for (i = 0; i < size; i++) {
- frame = buf_pool_get_nth_block(buf_pool, i)->frame;
+ chunk = buf_pool->chunks;
- if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
+ for (i = buf_pool->n_chunks; i--; chunk++) {
+ buf_block_t* block = chunk->blocks;
+ ulint n_blocks = chunk->size;
- id = btr_page_get_index_id(frame);
+ for (; n_blocks--; block++) {
+ const buf_frame_t* frame = block->frame;
- /* Look for the id in the index_ids array */
- j = 0;
+ if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
- while (j < n_found) {
+ id = btr_page_get_index_id(frame);
- if (ut_dulint_cmp(index_ids[j], id) == 0) {
- (counts[j])++;
+ /* Look for the id in the index_ids array */
+ j = 0;
- break;
+ while (j < n_found) {
+
+ if (ut_dulint_cmp(index_ids[j],
+ id) == 0) {
+ counts[j]++;
+
+ break;
+ }
+ j++;
}
- j++;
- }
- if (j == n_found) {
- n_found++;
- index_ids[j] = id;
- counts[j] = 1;
+ if (j == n_found) {
+ n_found++;
+ index_ids[j] = id;
+ counts[j] = 1;
+ }
}
}
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
for (i = 0; i < n_found; i++) {
index = dict_index_get_if_in_cache(index_ids[i]);
@@ -2328,27 +3686,44 @@ buf_print(void)
ut_a(buf_validate());
}
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-/*************************************************************************
-Returns the number of latched pages in the buffer pool. */
-
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the number of latched pages in the buffer pool.
+@return number of latched pages */
+UNIV_INTERN
ulint
buf_get_latched_pages_number(void)
+/*==============================*/
{
- buf_block_t* block;
+ buf_chunk_t* chunk;
+ buf_page_t* b;
ulint i;
ulint fixed_pages_number = 0;
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
- for (i = 0; i < buf_pool->curr_size; i++) {
+ chunk = buf_pool->chunks;
- block = buf_pool_get_nth_block(buf_pool, i);
+ for (i = buf_pool->n_chunks; i--; chunk++) {
+ buf_block_t* block;
+ ulint j;
+
+ block = chunk->blocks;
+
+ for (j = chunk->size; j--; block++) {
+ if (buf_block_get_state(block)
+ != BUF_BLOCK_FILE_PAGE) {
+
+ continue;
+ }
- if (block->magic_n == BUF_BLOCK_MAGIC_N) {
mutex_enter(&block->mutex);
- if (block->buf_fix_count != 0 || block->io_fix != 0) {
+ if (block->page.buf_fix_count != 0
+ || buf_page_get_io_fix(&block->page)
+ != BUF_IO_NONE) {
fixed_pages_number++;
}
@@ -2356,15 +3731,57 @@ buf_get_latched_pages_number(void)
}
}
- mutex_exit(&(buf_pool->mutex));
+ mutex_enter(&buf_pool_zip_mutex);
+
+ /* Traverse the lists of clean and dirty compressed-only blocks. */
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+ b = UT_LIST_GET_NEXT(list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
+
+ if (b->buf_fix_count != 0
+ || buf_page_get_io_fix(b) != BUF_IO_NONE) {
+ fixed_pages_number++;
+ }
+ }
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+ b = UT_LIST_GET_NEXT(list, b)) {
+ ut_ad(b->in_flush_list);
+
+ switch (buf_page_get_state(b)) {
+ case BUF_BLOCK_ZIP_DIRTY:
+ if (b->buf_fix_count != 0
+ || buf_page_get_io_fix(b) != BUF_IO_NONE) {
+ fixed_pages_number++;
+ }
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ /* uncompressed page */
+ break;
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ }
+ }
+
+ mutex_exit(&buf_pool_zip_mutex);
+ buf_pool_mutex_exit();
return(fixed_pages_number);
}
#endif /* UNIV_DEBUG */
-/*************************************************************************
-Returns the number of pending buf pool ios. */
-
+/*********************************************************************//**
+Returns the number of pending buf pool ios.
+@return number of pending I/O operations */
+UNIV_INTERN
ulint
buf_get_n_pending_ios(void)
/*=======================*/
@@ -2375,17 +3792,18 @@ buf_get_n_pending_ios(void)
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
}
-/*************************************************************************
+/*********************************************************************//**
Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool. */
-
+database pages in the buffer pool.
+@return modified page percentage ratio */
+UNIV_INTERN
ulint
buf_get_modified_ratio_pct(void)
/*============================*/
{
ulint ratio;
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
@@ -2393,49 +3811,39 @@ buf_get_modified_ratio_pct(void)
/* 1 + is there to avoid division by zero */
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(ratio);
}
-/*************************************************************************
+/*********************************************************************//**
Prints info of the buffer i/o. */
-
+UNIV_INTERN
void
buf_print_io(
/*=========*/
- FILE* file) /* in/out: buffer where to print */
+ FILE* file) /*!< in/out: buffer where to print */
{
time_t current_time;
double time_elapsed;
- ulint size;
+ ulint n_gets_diff;
ut_ad(buf_pool);
- size = buf_pool->curr_size;
-
- mutex_enter(&(buf_pool->mutex));
- if (srv_use_awe) {
- fprintf(stderr,
- "AWE: Buffer pool memory frames %lu\n",
- (ulong) buf_pool->n_frames);
+ buf_pool_mutex_enter();
- fprintf(stderr,
- "AWE: Database pages and free buffers"
- " mapped in frames %lu\n",
- (ulong)
- UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
- }
fprintf(file,
"Buffer pool size %lu\n"
"Free buffers %lu\n"
"Database pages %lu\n"
+ "Old database pages %lu\n"
"Modified db pages %lu\n"
"Pending reads %lu\n"
"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
- (ulong) size,
+ (ulong) buf_pool->curr_size,
(ulong) UT_LIST_GET_LEN(buf_pool->free),
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+ (ulong) buf_pool->LRU_old_len,
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
(ulong) buf_pool->n_pend_reads,
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
@@ -2447,116 +3855,139 @@ buf_print_io(
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,
buf_pool->last_printout_time);
- buf_pool->last_printout_time = current_time;
fprintf(file,
+ "Pages made young %lu, not young %lu\n"
+ "%.2f youngs/s, %.2f non-youngs/s\n"
"Pages read %lu, created %lu, written %lu\n"
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
- (ulong) buf_pool->n_pages_read,
- (ulong) buf_pool->n_pages_created,
- (ulong) buf_pool->n_pages_written,
- (buf_pool->n_pages_read - buf_pool->n_pages_read_old)
+ (ulong) buf_pool->stat.n_pages_made_young,
+ (ulong) buf_pool->stat.n_pages_not_made_young,
+ (buf_pool->stat.n_pages_made_young
+ - buf_pool->old_stat.n_pages_made_young)
/ time_elapsed,
- (buf_pool->n_pages_created - buf_pool->n_pages_created_old)
+ (buf_pool->stat.n_pages_not_made_young
+ - buf_pool->old_stat.n_pages_not_made_young)
/ time_elapsed,
- (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
+ (ulong) buf_pool->stat.n_pages_read,
+ (ulong) buf_pool->stat.n_pages_created,
+ (ulong) buf_pool->stat.n_pages_written,
+ (buf_pool->stat.n_pages_read
+ - buf_pool->old_stat.n_pages_read)
+ / time_elapsed,
+ (buf_pool->stat.n_pages_created
+ - buf_pool->old_stat.n_pages_created)
+ / time_elapsed,
+ (buf_pool->stat.n_pages_written
+ - buf_pool->old_stat.n_pages_written)
/ time_elapsed);
- if (srv_use_awe) {
- fprintf(file, "AWE: %.2f page remaps/s\n",
- (buf_pool->n_pages_awe_remapped
- - buf_pool->n_pages_awe_remapped_old)
- / time_elapsed);
- }
+ n_gets_diff = buf_pool->stat.n_page_gets - buf_pool->old_stat.n_page_gets;
- if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
- fprintf(file, "Buffer pool hit rate %lu / 1000\n",
+ if (n_gets_diff) {
+ fprintf(file,
+ "Buffer pool hit rate %lu / 1000,"
+ " young-making rate %lu / 1000 not %lu / 1000\n",
(ulong)
- (1000 - ((1000 * (buf_pool->n_pages_read
- - buf_pool->n_pages_read_old))
- / (buf_pool->n_page_gets
- - buf_pool->n_page_gets_old))));
+ (1000 - ((1000 * (buf_pool->stat.n_pages_read
+ - buf_pool->old_stat.n_pages_read))
+ / (buf_pool->stat.n_page_gets
+ - buf_pool->old_stat.n_page_gets))),
+ (ulong)
+ (1000 * (buf_pool->stat.n_pages_made_young
+ - buf_pool->old_stat.n_pages_made_young)
+ / n_gets_diff),
+ (ulong)
+ (1000 * (buf_pool->stat.n_pages_not_made_young
+ - buf_pool->old_stat.n_pages_not_made_young)
+ / n_gets_diff));
} else {
fputs("No buffer pool page gets since the last printout\n",
file);
}
- buf_pool->n_page_gets_old = buf_pool->n_page_gets;
- buf_pool->n_pages_read_old = buf_pool->n_pages_read;
- buf_pool->n_pages_created_old = buf_pool->n_pages_created;
- buf_pool->n_pages_written_old = buf_pool->n_pages_written;
- buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
+ /* Statistics about read ahead algorithm */
+ fprintf(file, "Pages read ahead %.2f/s,"
+ " evicted without access %.2f/s\n",
+ (buf_pool->stat.n_ra_pages_read
+ - buf_pool->old_stat.n_ra_pages_read)
+ / time_elapsed,
+ (buf_pool->stat.n_ra_pages_evicted
+ - buf_pool->old_stat.n_ra_pages_evicted)
+ / time_elapsed);
- mutex_exit(&(buf_pool->mutex));
+ /* Print some values to help us with visualizing what is
+ happening with LRU eviction. */
+ fprintf(file,
+ "LRU len: %lu, unzip_LRU len: %lu\n"
+ "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
+ UT_LIST_GET_LEN(buf_pool->LRU),
+ UT_LIST_GET_LEN(buf_pool->unzip_LRU),
+ buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
+ buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+
+ buf_refresh_io_stats();
+ buf_pool_mutex_exit();
}
-/**************************************************************************
+/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
void
buf_refresh_io_stats(void)
/*======================*/
{
buf_pool->last_printout_time = time(NULL);
- buf_pool->n_page_gets_old = buf_pool->n_page_gets;
- buf_pool->n_pages_read_old = buf_pool->n_pages_read;
- buf_pool->n_pages_created_old = buf_pool->n_pages_created;
- buf_pool->n_pages_written_old = buf_pool->n_pages_written;
- buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
+ buf_pool->old_stat = buf_pool->stat;
}
-/*************************************************************************
-Checks that all file pages in the buffer are in a replaceable state. */
-
+/*********************************************************************//**
+Asserts that all file pages in the buffer are in a replaceable state.
+@return TRUE */
+UNIV_INTERN
ibool
buf_all_freed(void)
/*===============*/
{
- buf_block_t* block;
+ buf_chunk_t* chunk;
ulint i;
ut_ad(buf_pool);
- mutex_enter(&(buf_pool->mutex));
-
- for (i = 0; i < buf_pool->curr_size; i++) {
+ buf_pool_mutex_enter();
- block = buf_pool_get_nth_block(buf_pool, i);
+ chunk = buf_pool->chunks;
- mutex_enter(&block->mutex);
+ for (i = buf_pool->n_chunks; i--; chunk++) {
- if (block->state == BUF_BLOCK_FILE_PAGE) {
+ const buf_block_t* block = buf_chunk_not_freed(chunk);
- if (!buf_flush_ready_for_replace(block)) {
-
- fprintf(stderr,
- "Page %lu %lu still fixed or dirty\n",
- (ulong) block->space,
- (ulong) block->offset);
- ut_error;
- }
+ if (UNIV_LIKELY_NULL(block)) {
+ fprintf(stderr,
+ "Page %lu %lu still fixed or dirty\n",
+ (ulong) block->page.space,
+ (ulong) block->page.offset);
+ ut_error;
}
-
- mutex_exit(&block->mutex);
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(TRUE);
}
-/*************************************************************************
+/*********************************************************************//**
Checks that there currently are no pending i/o-operations for the buffer
-pool. */
-
+pool.
+@return TRUE if there is no pending i/o */
+UNIV_INTERN
ibool
buf_pool_check_no_pending_io(void)
/*==============================*/
- /* out: TRUE if there is no pending i/o */
{
ibool ret;
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
+ buf_pool->n_flush[BUF_FLUSH_LIST]
@@ -2566,25 +3997,56 @@ buf_pool_check_no_pending_io(void)
ret = TRUE;
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(ret);
}
-/*************************************************************************
-Gets the current length of the free list of buffer blocks. */
-
+/*********************************************************************//**
+Gets the current length of the free list of buffer blocks.
+@return length of the free list */
+UNIV_INTERN
ulint
buf_get_free_list_len(void)
/*=======================*/
{
ulint len;
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
len = UT_LIST_GET_LEN(buf_pool->free);
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(len);
}
+#else /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
+UNIV_INTERN
+void
+buf_page_init_for_backup_restore(
+/*=============================*/
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page within space
+ in units of a page */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ buf_block_t* block) /*!< in: block to init */
+{
+ block->page.state = BUF_BLOCK_FILE_PAGE;
+ block->page.space = space;
+ block->page.offset = offset;
+
+ page_zip_des_init(&block->page.zip);
+
+ /* We assume that block->page.data has been allocated
+ with zip_size == UNIV_PAGE_SIZE. */
+ ut_ad(zip_size <= UNIV_PAGE_SIZE);
+ ut_ad(ut_is_2pow(zip_size));
+ page_zip_set_size(&block->page.zip, zip_size);
+ if (zip_size) {
+ block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+ }
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
index 423c08c0569..8b614ce90e5 100644
--- a/storage/innobase/buf/buf0flu.c
+++ b/storage/innobase/buf/buf0flu.c
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer buf_pool flush algorithm
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995-2001 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0flu.c
+The database buffer buf_pool flush algorithm
Created 11/11/1995 Heikki Tuuri
*******************************************************/
@@ -10,147 +27,193 @@ Created 11/11/1995 Heikki Tuuri
#ifdef UNIV_NONINL
#include "buf0flu.ic"
-#include "trx0sys.h"
#endif
+#include "buf0buf.h"
+#include "srv0srv.h"
+#include "page0zip.h"
+#ifndef UNIV_HOTBACKUP
#include "ut0byte.h"
#include "ut0lst.h"
#include "page0page.h"
#include "fil0fil.h"
-#include "buf0buf.h"
#include "buf0lru.h"
#include "buf0rea.h"
#include "ibuf0ibuf.h"
#include "log0log.h"
#include "os0file.h"
#include "trx0sys.h"
-#include "srv0srv.h"
-
-/* When flushed, dirty blocks are searched in neighborhoods of this size, and
-flushed along with the original page. */
-
-#define BUF_FLUSH_AREA ut_min(BUF_READ_AHEAD_AREA,\
- buf_pool->curr_size / 16)
/**********************************************************************
-Validates the flush list. */
+These statistics are generated for heuristics used in estimating the
+rate at which we should flush the dirty blocks to avoid bursty IO
+activity. Note that the rate of flushing not only depends on how many
+dirty pages we have in the buffer pool but it is also a fucntion of
+how much redo the workload is generating and at what rate. */
+/* @{ */
+
+/** Number of intervals for which we keep the history of these stats.
+Each interval is 1 second, defined by the rate at which
+srv_error_monitor_thread() calls buf_flush_stat_update(). */
+#define BUF_FLUSH_STAT_N_INTERVAL 20
+
+/** Sampled values buf_flush_stat_cur.
+Not protected by any mutex. Updated by buf_flush_stat_update(). */
+static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
+
+/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
+static ulint buf_flush_stat_arr_ind;
+
+/** Values at start of the current interval. Reset by
+buf_flush_stat_update(). */
+static buf_flush_stat_t buf_flush_stat_cur;
+
+/** Running sum of past values of buf_flush_stat_cur.
+Updated by buf_flush_stat_update(). Not protected by any mutex. */
+static buf_flush_stat_t buf_flush_stat_sum;
+
+/** Number of pages flushed through non flush_list flushes. */
+static ulint buf_lru_flush_page_count = 0;
+
+/* @} */
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/******************************************************************//**
+Validates the flush list.
+@return TRUE if ok */
static
ibool
buf_flush_validate_low(void);
/*========================*/
- /* out: TRUE if ok */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-/************************************************************************
+/********************************************************************//**
Inserts a modified block into the flush list. */
-
+UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
- buf_block_t* block) /* in: block which is modified */
+ buf_block_t* block) /*!< in/out: block which is modified */
{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
+ ut_ad(buf_pool_mutex_own());
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
- || (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list))
- ->oldest_modification,
- block->oldest_modification) <= 0));
-
- UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
-
- ut_ad(buf_flush_validate_low());
+ || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
+ <= block->page.oldest_modification));
+
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.in_LRU_list);
+ ut_ad(block->page.in_page_hash);
+ ut_ad(!block->page.in_zip_hash);
+ ut_ad(!block->page.in_flush_list);
+ ut_d(block->page.in_flush_list = TRUE);
+ UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
}
-/************************************************************************
+/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
-
+UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
- buf_block_t* block) /* in: block which is modified */
+ buf_block_t* block) /*!< in/out: block which is modified */
{
- buf_block_t* prev_b;
- buf_block_t* b;
+ buf_page_t* prev_b;
+ buf_page_t* b;
- ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+ ut_ad(block->page.in_LRU_list);
+ ut_ad(block->page.in_page_hash);
+ ut_ad(!block->page.in_zip_hash);
+ ut_ad(!block->page.in_flush_list);
+ ut_d(block->page.in_flush_list = TRUE);
prev_b = NULL;
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
- while (b && (ut_dulint_cmp(b->oldest_modification,
- block->oldest_modification) > 0)) {
+ while (b && b->oldest_modification > block->page.oldest_modification) {
+ ut_ad(b->in_flush_list);
prev_b = b;
- b = UT_LIST_GET_NEXT(flush_list, b);
+ b = UT_LIST_GET_NEXT(list, b);
}
if (prev_b == NULL) {
- UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
+ UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
} else {
- UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
- block);
+ UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
+ prev_b, &block->page);
}
- ut_ad(buf_flush_validate_low());
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
}
-/************************************************************************
+/********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., the transition FILE_PAGE => NOT_USED allowed. */
-
+i.e., the transition FILE_PAGE => NOT_USED allowed.
+@return TRUE if can replace immediately */
+UNIV_INTERN
ibool
buf_flush_ready_for_replace(
/*========================*/
- /* out: TRUE if can replace immediately */
- buf_block_t* block) /* in: buffer control block, must be in state
- BUF_BLOCK_FILE_PAGE and in the LRU list */
+ buf_page_t* bpage) /*!< in: buffer control block, must be
+ buf_page_in_file(bpage) and in the LRU list */
{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&block->mutex));
- if (block->state != BUF_BLOCK_FILE_PAGE) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: buffer block state %lu"
- " in the LRU list!\n",
- (ulong)block->state);
- ut_print_buf(stderr, block, sizeof(buf_block_t));
-
- return(FALSE);
- }
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(bpage->in_LRU_list);
- if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
- || (block->buf_fix_count != 0)
- || (block->io_fix != 0)) {
+ if (UNIV_LIKELY(buf_page_in_file(bpage))) {
- return(FALSE);
+ return(bpage->oldest_modification == 0
+ && buf_page_get_io_fix(bpage) == BUF_IO_NONE
+ && bpage->buf_fix_count == 0);
}
- return(TRUE);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: buffer block state %lu"
+ " in the LRU list!\n",
+ (ulong) buf_page_get_state(bpage));
+ ut_print_buf(stderr, bpage, sizeof(buf_page_t));
+ putc('\n', stderr);
+
+ return(FALSE);
}
-/************************************************************************
-Returns TRUE if the block is modified and ready for flushing. */
+/********************************************************************//**
+Returns TRUE if the block is modified and ready for flushing.
+@return TRUE if can flush immediately */
UNIV_INLINE
ibool
buf_flush_ready_for_flush(
/*======================*/
- /* out: TRUE if can flush immediately */
- buf_block_t* block, /* in: buffer control block, must be in state
- BUF_BLOCK_FILE_PAGE */
- ulint flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ buf_page_t* bpage, /*!< in: buffer control block, must be
+ buf_page_in_file(bpage) */
+ enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&(block->mutex)));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(buf_page_in_file(bpage));
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
+
+ if (bpage->oldest_modification != 0
+ && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+ ut_ad(bpage->in_flush_list);
- if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
- && (block->io_fix == 0)) {
if (flush_type != BUF_FLUSH_LRU) {
return(TRUE);
- } else if (block->buf_fix_count == 0) {
+ } else if (bpage->buf_fix_count == 0) {
/* If we are flushing the LRU list, to avoid deadlocks
we require the block not to be bufferfixed, and hence
@@ -163,50 +226,106 @@ buf_flush_ready_for_flush(
return(FALSE);
}
-/************************************************************************
-Updates the flush system data structures when a write is completed. */
+/********************************************************************//**
+Remove a block from the flush list of modified blocks. */
+UNIV_INTERN
+void
+buf_flush_remove(
+/*=============*/
+ buf_page_t* bpage) /*!< in: pointer to the block in question */
+{
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(bpage->in_flush_list);
+ ut_d(bpage->in_flush_list = FALSE);
+
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_PAGE:
+ /* clean compressed pages should not be on the flush list */
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ return;
+ case BUF_BLOCK_ZIP_DIRTY:
+ buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
+ UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+ buf_LRU_insert_zip_clean(bpage);
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+ break;
+ }
+
+ bpage->oldest_modification = 0;
+
+ ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
+ ut_ad(ut_list_node_313->in_flush_list)));
+}
+/********************************************************************//**
+Updates the flush system data structures when a write is completed. */
+UNIV_INTERN
void
buf_flush_write_complete(
/*=====================*/
- buf_block_t* block) /* in: pointer to the block in question */
+ buf_page_t* bpage) /*!< in: pointer to the block in question */
{
- ut_ad(block);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mutex_own(&(buf_pool->mutex)));
-#endif /* UNIV_SYNC_DEBUG */
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- block->oldest_modification = ut_dulint_zero;
+ enum buf_flush flush_type;
- UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
+ ut_ad(bpage);
- ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));
+ buf_flush_remove(bpage);
- (buf_pool->n_flush[block->flush_type])--;
+ flush_type = buf_page_get_flush_type(bpage);
+ buf_pool->n_flush[flush_type]--;
- if (block->flush_type == BUF_FLUSH_LRU) {
+ if (flush_type == BUF_FLUSH_LRU) {
/* Put the block to the end of the LRU list to wait to be
moved to the free list */
- buf_LRU_make_block_old(block);
+ buf_LRU_make_block_old(bpage);
buf_pool->LRU_flush_ended++;
}
/* fprintf(stderr, "n pending flush %lu\n",
- buf_pool->n_flush[block->flush_type]); */
+ buf_pool->n_flush[flush_type]); */
- if ((buf_pool->n_flush[block->flush_type] == 0)
- && (buf_pool->init_flush[block->flush_type] == FALSE)) {
+ if ((buf_pool->n_flush[flush_type] == 0)
+ && (buf_pool->init_flush[flush_type] == FALSE)) {
/* The running flush batch has ended */
- os_event_set(buf_pool->no_flush[block->flush_type]);
+ os_event_set(buf_pool->no_flush[flush_type]);
}
}
-/************************************************************************
+/********************************************************************//**
+Flush a batch of writes to the datafiles that have already been
+written by the OS. */
+static
+void
+buf_flush_sync_datafiles(void)
+/*==========================*/
+{
+ /* Wake possible simulated aio thread to actually post the
+ writes to the operating system */
+ os_aio_simulated_wake_handler_threads();
+
+ /* Wait that all async writes to tablespaces have been posted to
+ the OS */
+ os_aio_wait_until_no_pending_writes();
+
+ /* Now we flush the data to disk (for example, with fsync) */
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ return;
+}
+
+/********************************************************************//**
Flushes possible buffered writes from the doublewrite memory buffer to disk,
and also wakes up the aio thread if simulated aio is used. It is very
important to call this function after a batch of writes has been posted,
@@ -217,15 +336,14 @@ void
buf_flush_buffered_writes(void)
/*===========================*/
{
- buf_block_t* block;
byte* write_buf;
ulint len;
ulint len2;
ulint i;
if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
- os_aio_simulated_wake_handler_threads();
-
+ /* Sync the writes to the disk. */
+ buf_flush_sync_datafiles();
return;
}
@@ -244,12 +362,21 @@ buf_flush_buffered_writes(void)
for (i = 0; i < trx_doublewrite->first_free; i++) {
- block = trx_doublewrite->buf_block_arr[i];
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ const buf_block_t* block;
+
+ block = (buf_block_t*) trx_doublewrite->buf_block_arr[i];
- if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
- != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
+ if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
+ || block->page.zip.data) {
+ /* No simple validate for compressed pages exists. */
+ continue;
+ }
+
+ if (UNIV_UNLIKELY
+ (memcmp(block->frame + (FIL_PAGE_LSN + 4),
+ block->frame + (UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
+ 4))) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: The page to be written"
@@ -260,23 +387,31 @@ buf_flush_buffered_writes(void)
" doublewrite buffer.\n");
}
- if (block->check_index_page_at_flush
- && !page_simple_validate(block->frame)) {
+ if (!block->check_index_page_at_flush) {
+ } else if (page_is_comp(block->frame)) {
+ if (UNIV_UNLIKELY
+ (!page_simple_validate_new(block->frame))) {
+corrupted_page:
+ buf_page_print(block->frame, 0);
- buf_page_print(block->frame);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Apparent corruption of an"
+ " index page n:o %lu in space %lu\n"
+ "InnoDB: to be written to data file."
+ " We intentionally crash server\n"
+ "InnoDB: to prevent corrupt data"
+ " from ending up in data\n"
+ "InnoDB: files.\n",
+ (ulong) buf_block_get_page_no(block),
+ (ulong) buf_block_get_space(block));
+
+ ut_error;
+ }
+ } else if (UNIV_UNLIKELY
+ (!page_simple_validate_old(block->frame))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Apparent corruption of an"
- " index page n:o %lu in space %lu\n"
- "InnoDB: to be written to data file."
- " We intentionally crash server\n"
- "InnoDB: to prevent corrupt data"
- " from ending up in data\n"
- "InnoDB: files.\n",
- (ulong) block->offset, (ulong) block->space);
-
- ut_error;
+ goto corrupted_page;
}
}
@@ -284,23 +419,29 @@ buf_flush_buffered_writes(void)
srv_dblwr_pages_written+= trx_doublewrite->first_free;
srv_dblwr_writes++;
- if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- } else {
- len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
- }
-
- fil_io(OS_FILE_WRITE,
- TRUE, TRX_SYS_SPACE,
- trx_doublewrite->block1, 0, len,
- (void*)trx_doublewrite->write_buf, NULL);
+ len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
+ trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
write_buf = trx_doublewrite->write_buf;
+ i = 0;
- for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) {
- if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4)
- != mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
+ fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ trx_doublewrite->block1, 0, len,
+ (void*) write_buf, NULL);
+
+ for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
+ len2 += UNIV_PAGE_SIZE, i++) {
+ const buf_block_t* block = (buf_block_t*)
+ trx_doublewrite->buf_block_arr[i];
+
+ if (UNIV_LIKELY(!block->page.zip.data)
+ && UNIV_LIKELY(buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE)
+ && UNIV_UNLIKELY
+ (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
+ write_buf + len2
+ + (UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: The page to be written"
@@ -310,39 +451,45 @@ buf_flush_buffered_writes(void)
}
}
- if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- len = (trx_doublewrite->first_free
- - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;
-
- fil_io(OS_FILE_WRITE,
- TRUE, TRX_SYS_SPACE,
- trx_doublewrite->block2, 0, len,
- (void*)(trx_doublewrite->write_buf
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- * UNIV_PAGE_SIZE),
- NULL);
-
- write_buf = trx_doublewrite->write_buf
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
- len2 += UNIV_PAGE_SIZE) {
- if (mach_read_from_4(write_buf + len2
- + FIL_PAGE_LSN + 4)
- != mach_read_from_4(write_buf + len2
- + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM
- + 4)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be"
- " written seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in"
- " the doublewrite block2.\n");
- }
+ if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+ goto flush;
+ }
+
+ len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+ * UNIV_PAGE_SIZE;
+
+ write_buf = trx_doublewrite->write_buf
+ + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+ ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
+
+ fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+ trx_doublewrite->block2, 0, len,
+ (void*) write_buf, NULL);
+
+ for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
+ len2 += UNIV_PAGE_SIZE, i++) {
+ const buf_block_t* block = (buf_block_t*)
+ trx_doublewrite->buf_block_arr[i];
+
+ if (UNIV_LIKELY(!block->page.zip.data)
+ && UNIV_LIKELY(buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE)
+ && UNIV_UNLIKELY
+ (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
+ write_buf + len2
+ + (UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: ERROR: The page to be"
+ " written seems corrupt!\n"
+ "InnoDB: The lsn fields do not match!"
+ " Noticed in"
+ " the doublewrite block2.\n");
}
}
+flush:
/* Now flush the doublewrite buffer data to disk */
fil_flush(TRX_SYS_SPACE);
@@ -352,11 +499,33 @@ buf_flush_buffered_writes(void)
blocks. Next do the writes to the intended positions. */
for (i = 0; i < trx_doublewrite->first_free; i++) {
- block = trx_doublewrite->buf_block_arr[i];
+ const buf_block_t* block = (buf_block_t*)
+ trx_doublewrite->buf_block_arr[i];
+
+ ut_a(buf_page_in_file(&block->page));
+ if (UNIV_LIKELY_NULL(block->page.zip.data)) {
+ fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+ FALSE, buf_page_get_space(&block->page),
+ buf_page_get_zip_size(&block->page),
+ buf_page_get_page_no(&block->page), 0,
+ buf_page_get_zip_size(&block->page),
+ (void*)block->page.zip.data,
+ (void*)block);
+
+ /* Increment the counter of I/O operations used
+ for selecting LRU policy. */
+ buf_LRU_stat_inc_io();
+
+ continue;
+ }
+
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
- != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
+ if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
+ block->frame
+ + (UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
+ 4))) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: The page to be written"
@@ -367,39 +536,31 @@ buf_flush_buffered_writes(void)
" the doublewrite buffer.\n"
"InnoDB: Page buf fix count %lu,"
" io fix %lu, state %lu\n",
- (ulong)block->buf_fix_count,
- (ulong)block->io_fix,
- (ulong)block->state);
+ (ulong)block->page.buf_fix_count,
+ (ulong)buf_block_get_io_fix(block),
+ (ulong)buf_block_get_state(block));
}
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
+ FALSE, buf_block_get_space(block), 0,
+ buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
- }
-
- /* Wake possible simulated aio thread to actually post the
- writes to the operating system */
-
- os_aio_simulated_wake_handler_threads();
-
- /* Wait that all async writes to tablespaces have been posted to
- the OS */
-
- os_aio_wait_until_no_pending_writes();
- /* Now we flush the data to disk (for example, with fsync) */
+ /* Increment the counter of I/O operations used
+ for selecting LRU policy. */
+ buf_LRU_stat_inc_io();
+ }
- fil_flush_file_spaces(FIL_TABLESPACE);
+ /* Sync the writes to the disk. */
+ buf_flush_sync_datafiles();
/* We can now reuse the doublewrite memory buffer: */
-
trx_doublewrite->first_free = 0;
mutex_exit(&(trx_doublewrite->mutex));
}
-/************************************************************************
+/********************************************************************//**
Posts a buffer page for writing. If the doublewrite memory buffer is
full, calls buf_flush_buffered_writes and waits for for free space to
appear. */
@@ -407,12 +568,13 @@ static
void
buf_flush_post_to_doublewrite_buf(
/*==============================*/
- buf_block_t* block) /* in: buffer block to write */
+ buf_page_t* bpage) /*!< in: buffer block to write */
{
+ ulint zip_size;
try_again:
mutex_enter(&(trx_doublewrite->mutex));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(buf_page_in_file(bpage));
if (trx_doublewrite->first_free
>= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
@@ -423,11 +585,25 @@ try_again:
goto try_again;
}
- ut_memcpy(trx_doublewrite->write_buf
- + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
- block->frame, UNIV_PAGE_SIZE);
+ zip_size = buf_page_get_zip_size(bpage);
+
+ if (UNIV_UNLIKELY(zip_size)) {
+ /* Copy the compressed page and clear the rest. */
+ memcpy(trx_doublewrite->write_buf
+ + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
+ bpage->zip.data, zip_size);
+ memset(trx_doublewrite->write_buf
+ + UNIV_PAGE_SIZE * trx_doublewrite->first_free
+ + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+ } else {
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
- trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block;
+ memcpy(trx_doublewrite->write_buf
+ + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
+ ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
+ }
+
+ trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
trx_doublewrite->first_free++;
@@ -442,27 +618,67 @@ try_again:
mutex_exit(&(trx_doublewrite->mutex));
}
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************************
+/********************************************************************//**
Initializes a page for writing to the tablespace. */
-
+UNIV_INTERN
void
buf_flush_init_for_writing(
/*=======================*/
- byte* page, /* in: page */
- dulint newest_lsn, /* in: newest modification lsn to the page */
- ulint space, /* in: space id */
- ulint page_no) /* in: page number */
+ byte* page, /*!< in/out: page */
+ void* page_zip_, /*!< in/out: compressed page, or NULL */
+ ib_uint64_t newest_lsn) /*!< in: newest modification lsn
+ to the page */
{
- /* Write the newest modification lsn to the page header and trailer */
- mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
+ ut_ad(page);
+
+ if (page_zip_) {
+ page_zip_des_t* page_zip = page_zip_;
+ ulint zip_size = page_zip_get_size(page_zip);
+ ut_ad(zip_size);
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(zip_size <= UNIV_PAGE_SIZE);
+
+ switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
+ case FIL_PAGE_TYPE_ALLOCATED:
+ case FIL_PAGE_INODE:
+ case FIL_PAGE_IBUF_BITMAP:
+ case FIL_PAGE_TYPE_FSP_HDR:
+ case FIL_PAGE_TYPE_XDES:
+ /* These are essentially uncompressed pages. */
+ memcpy(page_zip->data, page, zip_size);
+ /* fall through */
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ case FIL_PAGE_INDEX:
+ mach_write_ull(page_zip->data
+ + FIL_PAGE_LSN, newest_lsn);
+ memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+ mach_write_to_4(page_zip->data
+ + FIL_PAGE_SPACE_OR_CHKSUM,
+ srv_use_checksums
+ ? page_zip_calc_checksum(
+ page_zip->data, zip_size)
+ : BUF_NO_CHECKSUM_MAGIC);
+ return;
+ }
- mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- newest_lsn);
- /* Write the page number and the space id */
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: ERROR: The compressed page to be written"
+ " seems corrupt:", stderr);
+ ut_print_buf(stderr, page, zip_size);
+ fputs("\nInnoDB: Possibly older version of the page:", stderr);
+ ut_print_buf(stderr, page_zip->data, zip_size);
+ putc('\n', stderr);
+ ut_error;
+ }
- mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
- mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space);
+ /* Write the newest modification lsn to the page header and trailer */
+ mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
+
+ mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ newest_lsn);
/* Store the new formula checksum */
@@ -482,7 +698,8 @@ buf_flush_init_for_writing(
: BUF_NO_CHECKSUM_MAGIC);
}
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
also when the doublewrite buffer is used, we must call
buf_flush_buffered_writes after we have posted a batch of writes! */
@@ -490,17 +707,30 @@ static
void
buf_flush_write_block_low(
/*======================*/
- buf_block_t* block) /* in: buffer block to write */
+ buf_page_t* bpage) /*!< in: buffer block to write */
{
+ ulint zip_size = buf_page_get_zip_size(bpage);
+ page_t* frame = NULL;
#ifdef UNIV_LOG_DEBUG
static ibool univ_log_debug_warned;
#endif /* UNIV_LOG_DEBUG */
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
+ ut_ad(buf_page_in_file(bpage));
+
+ /* We are not holding buf_pool_mutex or block_mutex here.
+ Nevertheless, it is safe to access bpage, because it is
+ io_fixed and oldest_modification != 0. Thus, it cannot be
+ relocated in the buffer pool or removed from flush_list or
+ LRU_list. */
+ ut_ad(!buf_pool_mutex_own());
+ ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+ ut_ad(bpage->oldest_modification != 0);
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
#endif
- ut_ad(!ut_dulint_is_zero(block->newest_modification));
+ ut_ad(bpage->newest_modification != 0);
#ifdef UNIV_LOG_DEBUG
if (!univ_log_debug_warned) {
@@ -512,241 +742,199 @@ buf_flush_write_block_low(
}
#else
/* Force the log to the disk before writing the modified block */
- log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
+ log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
#endif
- buf_flush_init_for_writing(block->frame, block->newest_modification,
- block->space, block->offset);
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ case BUF_BLOCK_ZIP_DIRTY:
+ frame = bpage->zip.data;
+ if (UNIV_LIKELY(srv_use_checksums)) {
+ ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
+ == page_zip_calc_checksum(frame, zip_size));
+ }
+ mach_write_ull(frame + FIL_PAGE_LSN,
+ bpage->newest_modification);
+ memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ frame = bpage->zip.data;
+ if (!frame) {
+ frame = ((buf_block_t*) bpage)->frame;
+ }
+
+ buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
+ bpage->zip.data
+ ? &bpage->zip : NULL,
+ bpage->newest_modification);
+ break;
+ }
+
if (!srv_use_doublewrite_buf || !trx_doublewrite) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
- (void*)block->frame, (void*)block);
+ FALSE, buf_page_get_space(bpage), zip_size,
+ buf_page_get_page_no(bpage), 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE,
+ frame, bpage);
} else {
- buf_flush_post_to_doublewrite_buf(block);
+ buf_flush_post_to_doublewrite_buf(bpage);
}
}
-/************************************************************************
-Writes a page asynchronously from the buffer buf_pool to a file, if it can be
-found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
-we must call os_aio_simulated_wake_handler_threads after we have posted a batch
-of writes! */
+/********************************************************************//**
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: in simulated aio we must call
+os_aio_simulated_wake_handler_threads after we have posted a batch of
+writes! NOTE: buf_pool_mutex and buf_page_get_mutex(bpage) must be
+held upon entering this function, and they will be released by this
+function. */
static
-ulint
-buf_flush_try_page(
-/*===============*/
- /* out: 1 if a page was flushed, 0 otherwise */
- ulint space, /* in: space id */
- ulint offset, /* in: page offset */
- ulint flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or
- BUF_FLUSH_SINGLE_PAGE */
+void
+buf_flush_page(
+/*===========*/
+ buf_page_t* bpage, /*!< in: buffer control block */
+ enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
{
- buf_block_t* block;
- ibool locked;
-
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
- || flush_type == BUF_FLUSH_SINGLE_PAGE);
-
- mutex_enter(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
-
- if (!block) {
- mutex_exit(&(buf_pool->mutex));
- return(0);
- }
+ mutex_t* block_mutex;
+ ibool is_uncompressed;
- mutex_enter(&block->mutex);
-
- if (flush_type == BUF_FLUSH_LIST
- && buf_flush_ready_for_flush(block, flush_type)) {
-
- block->io_fix = BUF_IO_WRITE;
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_page_in_file(bpage));
- /* If AWE is enabled and the page is not mapped to a frame,
- then map it */
+ block_mutex = buf_page_get_mutex(bpage);
+ ut_ad(mutex_own(block_mutex));
- if (block->frame == NULL) {
- ut_a(srv_use_awe);
+ ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
- /* We set second parameter TRUE because the block is
- in the LRU list and we must put it to
- awe_LRU_free_mapped list once mapped to a frame */
+ buf_page_set_io_fix(bpage, BUF_IO_WRITE);
- buf_awe_map_page_to_frame(block, TRUE);
- }
+ buf_page_set_flush_type(bpage, flush_type);
- block->flush_type = flush_type;
+ if (buf_pool->n_flush[flush_type] == 0) {
- if (buf_pool->n_flush[flush_type] == 0) {
-
- os_event_reset(buf_pool->no_flush[flush_type]);
- }
+ os_event_reset(buf_pool->no_flush[flush_type]);
+ }
- (buf_pool->n_flush[flush_type])++;
+ buf_pool->n_flush[flush_type]++;
- locked = FALSE;
+ is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(is_uncompressed == (block_mutex != &buf_pool_zip_mutex));
+ switch (flush_type) {
+ ibool is_s_latched;
+ case BUF_FLUSH_LIST:
/* If the simulated aio thread is not running, we must
not wait for any latch, as we may end up in a deadlock:
if buf_fix_count == 0, then we know we need not wait */
- if (block->buf_fix_count == 0) {
- rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-
- locked = TRUE;
+ is_s_latched = (bpage->buf_fix_count == 0);
+ if (is_s_latched && is_uncompressed) {
+ rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
+ BUF_IO_WRITE);
}
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(block_mutex);
+ buf_pool_mutex_exit();
+
+ /* Even though bpage is not protected by any mutex at
+ this point, it is safe to access bpage, because it is
+ io_fixed and oldest_modification != 0. Thus, it
+ cannot be relocated in the buffer pool or removed from
+ flush_list or LRU_list. */
- if (!locked) {
+ if (!is_s_latched) {
buf_flush_buffered_writes();
- rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+ if (is_uncompressed) {
+ rw_lock_s_lock_gen(&((buf_block_t*) bpage)
+ ->lock, BUF_IO_WRITE);
+ }
}
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Flushing page space %lu, page no %lu \n",
- (ulong) block->space, (ulong) block->offset);
- }
-#endif /* UNIV_DEBUG */
-
- buf_flush_write_block_low(block);
-
- return(1);
-
- } else if (flush_type == BUF_FLUSH_LRU
- && buf_flush_ready_for_flush(block, flush_type)) {
+ break;
+ case BUF_FLUSH_LRU:
/* VERY IMPORTANT:
Because any thread may call the LRU flush, even when owning
locks on pages, to avoid deadlocks, we must make sure that the
s-lock is acquired on the page without waiting: this is
- accomplished because in the if-condition above we require
- the page not to be bufferfixed (in function
- ..._ready_for_flush). */
-
- block->io_fix = BUF_IO_WRITE;
-
- /* If AWE is enabled and the page is not mapped to a frame,
- then map it */
-
- if (block->frame == NULL) {
- ut_a(srv_use_awe);
-
- /* We set second parameter TRUE because the block is
- in the LRU list and we must put it to
- awe_LRU_free_mapped list once mapped to a frame */
+ accomplished because buf_flush_ready_for_flush() must hold,
+ and that requires the page not to be bufferfixed. */
- buf_awe_map_page_to_frame(block, TRUE);
+ if (is_uncompressed) {
+ rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
+ BUF_IO_WRITE);
}
- block->flush_type = flush_type;
-
- if (buf_pool->n_flush[flush_type] == 0) {
-
- os_event_reset(buf_pool->no_flush[flush_type]);
- }
-
- (buf_pool->n_flush[flush_type])++;
-
- rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-
/* Note that the s-latch is acquired before releasing the
buf_pool mutex: this ensures that the latch is acquired
immediately. */
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- buf_flush_write_block_low(block);
+ mutex_exit(block_mutex);
+ buf_pool_mutex_exit();
+ break;
- return(1);
-
- } else if (flush_type == BUF_FLUSH_SINGLE_PAGE
- && buf_flush_ready_for_flush(block, flush_type)) {
-
- block->io_fix = BUF_IO_WRITE;
-
- /* If AWE is enabled and the page is not mapped to a frame,
- then map it */
-
- if (block->frame == NULL) {
- ut_a(srv_use_awe);
-
- /* We set second parameter TRUE because the block is
- in the LRU list and we must put it to
- awe_LRU_free_mapped list once mapped to a frame */
-
- buf_awe_map_page_to_frame(block, TRUE);
- }
-
- block->flush_type = flush_type;
-
- if (buf_pool->n_flush[block->flush_type] == 0) {
-
- os_event_reset(buf_pool->no_flush[block->flush_type]);
- }
-
- (buf_pool->n_flush[flush_type])++;
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
+ default:
+ ut_error;
+ }
- rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+ /* Even though bpage is not protected by any mutex at this
+ point, it is safe to access bpage, because it is io_fixed and
+ oldest_modification != 0. Thus, it cannot be relocated in the
+ buffer pool or removed from flush_list or LRU_list. */
#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Flushing single page space %lu,"
- " page no %lu \n",
- (ulong) block->space,
- (ulong) block->offset);
- }
-#endif /* UNIV_DEBUG */
-
- buf_flush_write_block_low(block);
-
- return(1);
+ if (buf_debug_prints) {
+ fprintf(stderr,
+ "Flushing %u space %u page %u\n",
+ flush_type, bpage->space, bpage->offset);
}
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
+#endif /* UNIV_DEBUG */
+ buf_flush_write_block_low(bpage);
}
-/***************************************************************
-Flushes to disk all flushable pages within the flush area. */
+/***********************************************************//**
+Flushes to disk all flushable pages within the flush area.
+@return number of pages flushed */
static
ulint
buf_flush_try_neighbors(
/*====================*/
- /* out: number of pages flushed */
- ulint space, /* in: space id */
- ulint offset, /* in: page offset */
- ulint flush_type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: page offset */
+ enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
+ BUF_FLUSH_LIST */
{
- buf_block_t* block;
+ buf_page_t* bpage;
ulint low, high;
ulint count = 0;
ulint i;
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
- low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA;
- high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA;
-
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
/* If there is little space, it is better not to flush any
block except from the end of the LRU list */
low = offset;
high = offset + 1;
+ } else {
+ /* When flushed, dirty blocks are searched in neighborhoods of
+ this size, and flushed along with the original page. */
+
+ ulint buf_flush_area = ut_min(BUF_READ_AHEAD_AREA,
+ buf_pool->curr_size / 16);
+
+ low = (offset / buf_flush_area) * buf_flush_area;
+ high = (offset / buf_flush_area + 1) * buf_flush_area;
}
/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
@@ -755,30 +943,31 @@ buf_flush_try_neighbors(
high = fil_space_get_size(space);
}
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
for (i = low; i < high; i++) {
- block = buf_page_hash_get(space, i);
- ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
+ bpage = buf_page_hash_get(space, i);
- if (!block) {
+ if (!bpage) {
continue;
+ }
- } else if (flush_type == BUF_FLUSH_LRU && i != offset
- && !block->old) {
+ ut_a(buf_page_in_file(bpage));
- /* We avoid flushing 'non-old' blocks in an LRU flush,
- because the flushed blocks are soon freed */
+ /* We avoid flushing 'non-old' blocks in an LRU flush,
+ because the flushed blocks are soon freed */
- continue;
- } else {
+ if (flush_type != BUF_FLUSH_LRU
+ || i == offset
+ || buf_page_is_old(bpage)) {
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(&block->mutex);
+ mutex_enter(block_mutex);
- if (buf_flush_ready_for_flush(block, flush_type)
- && (i == offset || block->buf_fix_count == 0)) {
+ if (buf_flush_ready_for_flush(bpage, flush_type)
+ && (i == offset || !bpage->buf_fix_count)) {
/* We only try to flush those
neighbors != offset where the buf fix count is
zero, as we then know that we probably can
@@ -787,61 +976,52 @@ buf_flush_try_neighbors(
flush the doublewrite buffer before we start
waiting. */
- mutex_exit(&block->mutex);
-
- mutex_exit(&(buf_pool->mutex));
-
- /* Note: as we release the buf_pool mutex
- above, in buf_flush_try_page we cannot be sure
- the page is still in a flushable state:
- therefore we check it again inside that
- function. */
-
- count += buf_flush_try_page(space, i,
- flush_type);
+ buf_flush_page(bpage, flush_type);
+ ut_ad(!mutex_own(block_mutex));
+ count++;
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
} else {
- mutex_exit(&block->mutex);
+ mutex_exit(block_mutex);
}
}
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(count);
}
-/***********************************************************************
+/*******************************************************************//**
This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-
+the calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued;
+ULINT_UNDEFINED if there was a flush of the same type already running */
+UNIV_INTERN
ulint
buf_flush_batch(
/*============*/
- /* out: number of blocks for which the write
- request was queued; ULINT_UNDEFINED if there
- was a flush of the same type already running */
- ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
- BUF_FLUSH_LIST, then the caller must not own
- any latches on pages */
- ulint min_n, /* in: wished minimum mumber of blocks flushed
- (it is not guaranteed that the actual number
- is that big, though) */
- dulint lsn_limit) /* in the case BUF_FLUSH_LIST all blocks whose
- oldest_modification is smaller than this
- should be flushed (if their number does not
- exceed min_n), otherwise ignored */
+ enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
+ BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
+ then the caller must not own any
+ latches on pages */
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
+ blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
{
- buf_block_t* block;
+ buf_page_t* bpage;
ulint page_count = 0;
ulint old_page_count;
ulint space;
ulint offset;
- ibool found;
ut_ad((flush_type == BUF_FLUSH_LRU)
|| (flush_type == BUF_FLUSH_LIST));
@@ -849,21 +1029,22 @@ buf_flush_batch(
ut_ad((flush_type != BUF_FLUSH_LIST)
|| sync_thread_levels_empty_gen(TRUE));
#endif /* UNIV_SYNC_DEBUG */
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
if ((buf_pool->n_flush[flush_type] > 0)
|| (buf_pool->init_flush[flush_type] == TRUE)) {
/* There is already a flush batch of the same type running */
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(ULINT_UNDEFINED);
}
- (buf_pool->init_flush)[flush_type] = TRUE;
+ buf_pool->init_flush[flush_type] = TRUE;
for (;;) {
+flush_next:
/* If we have flushed enough, leave the loop */
if (page_count >= min_n) {
@@ -874,41 +1055,41 @@ buf_flush_batch(
block to be flushed. */
if (flush_type == BUF_FLUSH_LRU) {
- block = UT_LIST_GET_LAST(buf_pool->LRU);
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
} else {
ut_ad(flush_type == BUF_FLUSH_LIST);
- block = UT_LIST_GET_LAST(buf_pool->flush_list);
- if (!block
- || (ut_dulint_cmp(block->oldest_modification,
- lsn_limit) >= 0)) {
+ bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ if (!bpage
+ || bpage->oldest_modification >= lsn_limit) {
/* We have flushed enough */
break;
}
+ ut_ad(bpage->in_flush_list);
}
- found = FALSE;
-
/* Note that after finding a single flushable page, we try to
flush also all its neighbors, and after that start from the
END of the LRU list or flush list again: the list may change
during the flushing and we cannot safely preserve within this
function a pointer to a block in the list! */
- while ((block != NULL) && !found) {
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ do {
+ mutex_t*block_mutex = buf_page_get_mutex(bpage);
+ ibool ready;
- mutex_enter(&block->mutex);
+ ut_a(buf_page_in_file(bpage));
- if (buf_flush_ready_for_flush(block, flush_type)) {
+ mutex_enter(block_mutex);
+ ready = buf_flush_ready_for_flush(bpage, flush_type);
+ mutex_exit(block_mutex);
- found = TRUE;
- space = block->space;
- offset = block->offset;
+ if (ready) {
+ space = buf_page_get_space(bpage);
+ offset = buf_page_get_page_no(bpage);
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
old_page_count = page_count;
@@ -920,40 +1101,34 @@ buf_flush_batch(
flush_type, offset,
page_count - old_page_count); */
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
+ goto flush_next;
} else if (flush_type == BUF_FLUSH_LRU) {
-
- mutex_exit(&block->mutex);
-
- block = UT_LIST_GET_PREV(LRU, block);
+ bpage = UT_LIST_GET_PREV(LRU, bpage);
} else {
ut_ad(flush_type == BUF_FLUSH_LIST);
- mutex_exit(&block->mutex);
-
- block = UT_LIST_GET_PREV(flush_list, block);
+ bpage = UT_LIST_GET_PREV(list, bpage);
+ ut_ad(!bpage || bpage->in_flush_list);
}
- }
+ } while (bpage != NULL);
/* If we could not find anything to flush, leave the loop */
- if (!found) {
- break;
- }
+ break;
}
- (buf_pool->init_flush)[flush_type] = FALSE;
+ buf_pool->init_flush[flush_type] = FALSE;
- if ((buf_pool->n_flush[flush_type] == 0)
- && (buf_pool->init_flush[flush_type] == FALSE)) {
+ if (buf_pool->n_flush[flush_type] == 0) {
/* The running flush batch has ended */
os_event_set(buf_pool->no_flush[flush_type]);
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
buf_flush_buffered_writes();
@@ -970,62 +1145,71 @@ buf_flush_batch(
srv_buf_pool_flushed += page_count;
+ /* We keep track of all flushes happening as part of LRU
+ flush. When estimating the desired rate at which flush_list
+ should be flushed we factor in this value. */
+ if (flush_type == BUF_FLUSH_LRU) {
+ buf_lru_flush_page_count += page_count;
+ }
+
return(page_count);
}
-/**********************************************************************
+/******************************************************************//**
Waits until a flush batch of the given type ends */
-
+UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
- ulint type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ enum buf_flush type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
{
ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
os_event_wait(buf_pool->no_flush[type]);
}
-/**********************************************************************
+/******************************************************************//**
Gives a recommendation of how many blocks should be flushed to establish
a big enough margin of replaceable blocks near the end of the LRU list
-and in the free list. */
+and in the free list.
+@return number of blocks which should be flushed from the end of the
+LRU list */
static
ulint
buf_flush_LRU_recommendation(void)
/*==============================*/
- /* out: number of blocks which should be flushed
- from the end of the LRU list */
{
- buf_block_t* block;
+ buf_page_t* bpage;
ulint n_replaceable;
ulint distance = 0;
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
- block = UT_LIST_GET_LAST(buf_pool->LRU);
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- while ((block != NULL)
+ while ((bpage != NULL)
&& (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
+ BUF_FLUSH_EXTRA_MARGIN)
&& (distance < BUF_LRU_FREE_SEARCH_LEN)) {
- mutex_enter(&block->mutex);
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(block_mutex);
- if (buf_flush_ready_for_replace(block)) {
+ if (buf_flush_ready_for_replace(bpage)) {
n_replaceable++;
}
- mutex_exit(&block->mutex);
+ mutex_exit(block_mutex);
distance++;
- block = UT_LIST_GET_PREV(LRU, block);
+ bpage = UT_LIST_GET_PREV(LRU, bpage);
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
@@ -1036,13 +1220,13 @@ buf_flush_LRU_recommendation(void)
- n_replaceable);
}
-/*************************************************************************
+/*********************************************************************//**
Flushes pages from the end of the LRU list if there is too small a margin
of replaceable pages there or in the free list. VERY IMPORTANT: this function
is called also by threads which have locks on pages. To avoid deadlocks, we
flush only pages such that the s-lock required for flushing can be acquired
immediately, without waiting. */
-
+UNIV_INTERN
void
buf_flush_free_margin(void)
/*=======================*/
@@ -1053,8 +1237,7 @@ buf_flush_free_margin(void)
n_to_flush = buf_flush_LRU_recommendation();
if (n_to_flush > 0) {
- n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
- ut_dulint_zero);
+ n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
if (n_flushed == ULINT_UNDEFINED) {
/* There was an LRU type flush batch already running;
let us wait for it to end */
@@ -1064,52 +1247,164 @@ buf_flush_free_margin(void)
}
}
-/**********************************************************************
-Validates the flush list. */
+/*********************************************************************
+Update the historical stats that we are collecting for flush rate
+heuristics at the end of each interval.
+Flush rate heuristic depends on (a) rate of redo log generation and
+(b) the rate at which LRU flush is happening. */
+UNIV_INTERN
+void
+buf_flush_stat_update(void)
+/*=======================*/
+{
+ buf_flush_stat_t* item;
+ ib_uint64_t lsn_diff;
+ ib_uint64_t lsn;
+ ulint n_flushed;
+
+ lsn = log_get_lsn();
+ if (buf_flush_stat_cur.redo == 0) {
+ /* First time around. Just update the current LSN
+ and return. */
+ buf_flush_stat_cur.redo = lsn;
+ return;
+ }
+
+ item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
+
+ /* values for this interval */
+ lsn_diff = lsn - buf_flush_stat_cur.redo;
+ n_flushed = buf_lru_flush_page_count
+ - buf_flush_stat_cur.n_flushed;
+
+ /* add the current value and subtract the obsolete entry. */
+ buf_flush_stat_sum.redo += lsn_diff - item->redo;
+ buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
+
+ /* put current entry in the array. */
+ item->redo = lsn_diff;
+ item->n_flushed = n_flushed;
+
+ /* update the index */
+ buf_flush_stat_arr_ind++;
+ buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
+
+ /* reset the current entry. */
+ buf_flush_stat_cur.redo = lsn;
+ buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
+}
+
+/*********************************************************************
+Determines the fraction of dirty pages that need to be flushed based
+on the speed at which we generate redo log. Note that if redo log
+is generated at a significant rate without corresponding increase
+in the number of dirty pages (for example, an in-memory workload)
+it can cause IO bursts of flushing. This function implements heuristics
+to avoid this burstiness.
+@return number of dirty pages to be flushed / second */
+UNIV_INTERN
+ulint
+buf_flush_get_desired_flush_rate(void)
+/*==================================*/
+{
+ ulint redo_avg;
+ ulint lru_flush_avg;
+ ulint n_dirty;
+ ulint n_flush_req;
+ lint rate;
+ ib_uint64_t lsn = log_get_lsn();
+ ulint log_capacity = log_get_capacity();
+
+ /* log_capacity should never be zero after the initialization
+ of log subsystem. */
+ ut_ad(log_capacity != 0);
+
+ /* Get total number of dirty pages. It is OK to access
+ flush_list without holding any mtex as we are using this
+ only for heuristics. */
+ n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list);
+
+ /* An overflow can happen if we generate more than 2^32 bytes
+ of redo in this interval i.e.: 4G of redo in 1 second. We can
+ safely consider this as infinity because if we ever come close
+ to 4G we'll start a synchronous flush of dirty pages. */
+ /* redo_avg below is average at which redo is generated in
+ past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
+ interval. */
+ redo_avg = (ulint) (buf_flush_stat_sum.redo
+ / BUF_FLUSH_STAT_N_INTERVAL
+ + (lsn - buf_flush_stat_cur.redo));
+
+ /* An overflow can happen possibly if we flush more than 2^32
+ pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
+ unlikely scenario. Even when this happens it means that our
+ flush rate will be off the mark. It won't affect correctness
+ of any subsystem. */
+ /* lru_flush_avg below is rate at which pages are flushed as
+ part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
+ number of pages flushed in the current interval. */
+ lru_flush_avg = buf_flush_stat_sum.n_flushed
+ / BUF_FLUSH_STAT_N_INTERVAL
+ + (buf_lru_flush_page_count
+ - buf_flush_stat_cur.n_flushed);
+
+ n_flush_req = (n_dirty * redo_avg) / log_capacity;
+
+ /* The number of pages that we want to flush from the flush
+ list is the difference between the required rate and the
+ number of pages that we are historically flushing from the
+ LRU list */
+ rate = n_flush_req - lru_flush_avg;
+ return(rate > 0 ? (ulint) rate : 0);
+}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/******************************************************************//**
+Validates the flush list.
+@return TRUE if ok */
static
ibool
buf_flush_validate_low(void)
/*========================*/
- /* out: TRUE if ok */
{
- buf_block_t* block;
- dulint om;
+ buf_page_t* bpage;
- UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list);
+ UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
+ ut_ad(ut_list_node_313->in_flush_list));
- block = UT_LIST_GET_FIRST(buf_pool->flush_list);
+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
- while (block != NULL) {
- om = block->oldest_modification;
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0);
+ while (bpage != NULL) {
+ const ib_uint64_t om = bpage->oldest_modification;
+ ut_ad(bpage->in_flush_list);
+ ut_a(buf_page_in_file(bpage));
+ ut_a(om > 0);
- block = UT_LIST_GET_NEXT(flush_list, block);
+ bpage = UT_LIST_GET_NEXT(list, bpage);
- if (block) {
- ut_a(ut_dulint_cmp(om, block->oldest_modification)
- >= 0);
- }
+ ut_a(!bpage || om >= bpage->oldest_modification);
}
return(TRUE);
}
-/**********************************************************************
-Validates the flush list. */
-
+/******************************************************************//**
+Validates the flush list.
+@return TRUE if ok */
+UNIV_INTERN
ibool
buf_flush_validate(void)
/*====================*/
- /* out: TRUE if ok */
{
ibool ret;
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
ret = buf_flush_validate_low();
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(ret);
}
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
index d3c787d1578..4f19fd13fa5 100644
--- a/storage/innobase/buf/buf0lru.c
+++ b/storage/innobase/buf/buf0lru.c
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer replacement algorithm
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0lru.c
+The database buffer replacement algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
@@ -10,7 +27,6 @@ Created 11/5/1995 Heikki Tuuri
#ifdef UNIV_NONINL
#include "buf0lru.ic"
-#include "srv0srv.h" /* Needed to getsrv_print_innodb_monitor */
#endif
#include "ut0byte.h"
@@ -22,64 +38,175 @@ Created 11/5/1995 Heikki Tuuri
#include "os0sync.h"
#include "fil0fil.h"
#include "btr0btr.h"
+#include "buf0buddy.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "buf0rea.h"
#include "btr0sea.h"
+#include "ibuf0ibuf.h"
#include "os0file.h"
+#include "page0zip.h"
#include "log0recv.h"
+#include "srv0srv.h"
-/* The number of blocks from the LRU_old pointer onward, including the block
-pointed to, must be 3/8 of the whole LRU list length, except that the
-tolerance defined below is allowed. Note that the tolerance must be small
-enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
-LRU_old pointer is not allowed to point to either end of the LRU list. */
+/** The number of blocks from the LRU_old pointer onward, including
+the block pointed to, must be buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+of the whole LRU list length, except that the tolerance defined below
+is allowed. Note that the tolerance must be small enough such that for
+even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
+allowed to point to either end of the LRU list. */
#define BUF_LRU_OLD_TOLERANCE 20
-/* The whole LRU list length is divided by this number to determine an
-initial segment in buf_LRU_get_recent_limit */
-
-#define BUF_LRU_INITIAL_RATIO 8
+/** The minimum amount of non-old blocks when the LRU_old list exists
+(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
+@see buf_LRU_old_adjust_len */
+#define BUF_LRU_NON_OLD_MIN_LEN 5
+#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
+# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
+#endif
-/* When dropping the search hash index entries before deleting an ibd
+/** When dropping the search hash index entries before deleting an ibd
file, we build a local array of pages belonging to that tablespace
in the buffer pool. Following is the size of that array. */
#define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024
-/* If we switch on the InnoDB monitor because there are too few available
+/** If we switch on the InnoDB monitor because there are too few available
frames in the buffer pool, we set this to TRUE */
-ibool buf_lru_switched_on_innodb_mon = FALSE;
-
-/**********************************************************************
-Takes a block out of the LRU list and page hash table and sets the block
-state to BUF_BLOCK_REMOVE_HASH. */
+static ibool buf_lru_switched_on_innodb_mon = FALSE;
+
+/******************************************************************//**
+These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
+and page_zip_decompress() operations. Based on the statistics,
+buf_LRU_evict_from_unzip_LRU() decides if we want to evict from
+unzip_LRU or the regular LRU. From unzip_LRU, we will only evict the
+uncompressed frame (meaning we can evict dirty blocks as well). From
+the regular LRU, we will evict the entire block (i.e.: both the
+uncompressed and compressed data), which must be clean. */
+
+/* @{ */
+
+/** Number of intervals for which we keep the history of these stats.
+Each interval is 1 second, defined by the rate at which
+srv_error_monitor_thread() calls buf_LRU_stat_update(). */
+#define BUF_LRU_STAT_N_INTERVAL 50
+
+/** Co-efficient with which we multiply I/O operations to equate them
+with page_zip_decompress() operations. */
+#define BUF_LRU_IO_TO_UNZIP_FACTOR 50
+
+/** Sampled values buf_LRU_stat_cur.
+Protected by buf_pool_mutex. Updated by buf_LRU_stat_update(). */
+static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
+/** Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */
+static ulint buf_LRU_stat_arr_ind;
+
+/** Current operation counters. Not protected by any mutex. Cleared
+by buf_LRU_stat_update(). */
+UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_cur;
+
+/** Running sum of past values of buf_LRU_stat_cur.
+Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
+UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum;
+
+/* @} */
+
+/** @name Heuristics for detecting index scan @{ */
+/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
+"old" blocks. Protected by buf_pool_mutex. */
+UNIV_INTERN uint buf_LRU_old_ratio;
+/** Move blocks to "new" LRU list only if the first access was at
+least this many milliseconds ago. Not protected by any mutex or latch. */
+UNIV_INTERN uint buf_LRU_old_threshold_ms;
+/* @} */
+
+/******************************************************************//**
+Takes a block out of the LRU list and page hash table.
+If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
+the object will be freed and buf_pool_zip_mutex will be released.
+
+If a compressed page or a compressed-only block descriptor is freed,
+other compressed pages or compressed-only block descriptors may be
+relocated.
+@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state
+was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */
static
-void
+enum buf_page_state
buf_LRU_block_remove_hashed_page(
/*=============================*/
- buf_block_t* block); /* in: block, must contain a file page and
+ buf_page_t* bpage, /*!< in: block, must contain a file page and
be in a state where it can be freed; there
may or may not be a hash index to the page */
-/**********************************************************************
+ ibool zip); /*!< in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+/******************************************************************//**
Puts a file page whose has no hash index to the free list. */
static
void
buf_LRU_block_free_hashed_page(
/*===========================*/
- buf_block_t* block); /* in: block, must contain a file page and
+ buf_block_t* block); /*!< in: block, must contain a file page and
be in a state where it can be freed */
-/**********************************************************************
+/******************************************************************//**
+Determines if the unzip_LRU list should be used for evicting a victim
+instead of the general LRU list.
+@return TRUE if should use unzip_LRU */
+UNIV_INLINE
+ibool
+buf_LRU_evict_from_unzip_LRU(void)
+/*==============================*/
+{
+ ulint io_avg;
+ ulint unzip_avg;
+
+ ut_ad(buf_pool_mutex_own());
+
+ /* If the unzip_LRU list is empty, we can only use the LRU. */
+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
+ return(FALSE);
+ }
+
+ /* If unzip_LRU is at most 10% of the size of the LRU list,
+ then use the LRU. This slack allows us to keep hot
+ decompressed pages in the buffer pool. */
+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+ <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
+ return(FALSE);
+ }
+
+ /* If eviction hasn't started yet, we assume by default
+ that a workload is disk bound. */
+ if (buf_pool->freed_page_clock == 0) {
+ return(TRUE);
+ }
+
+ /* Calculate the average over past intervals, and add the values
+ of the current interval. */
+ io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
+ + buf_LRU_stat_cur.io;
+ unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
+ + buf_LRU_stat_cur.unzip;
+
+ /* Decide based on our formula. If the load is I/O bound
+ (unzip_avg is smaller than the weighted io_avg), evict an
+ uncompressed frame from unzip_LRU. Otherwise we assume that
+ the load is CPU bound and evict from the regular LRU. */
+ return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
+}
+
+/******************************************************************//**
Attempts to drop page hash index on a batch of pages belonging to a
particular space id. */
static
void
buf_LRU_drop_page_hash_batch(
/*=========================*/
- ulint id, /* in: space id */
- const ulint* arr, /* in: array of page_no */
- ulint count) /* in: number of entries in array */
+ ulint space_id, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ const ulint* arr, /*!< in: array of page_no */
+ ulint count) /*!< in: number of entries in array */
{
ulint i;
@@ -87,11 +214,12 @@ buf_LRU_drop_page_hash_batch(
ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
for (i = 0; i < count; ++i) {
- btr_search_drop_page_hash_when_freed(id, arr[i]);
+ btr_search_drop_page_hash_when_freed(space_id, zip_size,
+ arr[i]);
}
}
-/**********************************************************************
+/******************************************************************//**
When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
hash index entries belonging to that table. This function tries to
do that in batch. Note that this is a 'best effort' attempt and does
@@ -100,101 +228,110 @@ static
void
buf_LRU_drop_page_hash_for_tablespace(
/*==================================*/
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- buf_block_t* block;
+ buf_page_t* bpage;
ulint* page_arr;
ulint num_entries;
+ ulint zip_size;
+
+ zip_size = fil_space_get_zip_size(id);
+
+ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+ /* Somehow, the tablespace does not exist. Nothing to drop. */
+ ut_ad(0);
+ return;
+ }
page_arr = ut_malloc(sizeof(ulint)
* BUF_LRU_DROP_SEARCH_HASH_SIZE);
- mutex_enter(&buf_pool->mutex);
+ buf_pool_mutex_enter();
scan_again:
num_entries = 0;
- block = UT_LIST_GET_LAST(buf_pool->LRU);
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- while (block != NULL) {
- buf_block_t* prev_block;
+ while (bpage != NULL) {
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ buf_page_t* prev_bpage;
- mutex_enter(&block->mutex);
- prev_block = UT_LIST_GET_PREV(LRU, block);
+ mutex_enter(block_mutex);
+ prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(buf_page_in_file(bpage));
- if (block->space != id
- || block->buf_fix_count > 0
- || block->io_fix != 0) {
+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
+ || bpage->space != id
+ || bpage->buf_fix_count > 0
+ || bpage->io_fix != BUF_IO_NONE) {
/* We leave the fixed pages as is in this scan.
To be dealt with later in the final scan. */
- mutex_exit(&block->mutex);
+ mutex_exit(block_mutex);
goto next_page;
}
- ut_ad(block->space == id);
- if (block->is_hashed) {
+ if (((buf_block_t*) bpage)->is_hashed) {
/* Store the offset(i.e.: page_no) in the array
so that we can drop hash index in a batch
later. */
- page_arr[num_entries] = block->offset;
- mutex_exit(&block->mutex);
+ page_arr[num_entries] = bpage->offset;
+ mutex_exit(block_mutex);
ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
++num_entries;
if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
goto next_page;
}
- /* Array full. We release the buf_pool->mutex to
+ /* Array full. We release the buf_pool_mutex to
obey the latching order. */
- mutex_exit(&buf_pool->mutex);
+ buf_pool_mutex_exit();
- buf_LRU_drop_page_hash_batch(id, page_arr,
+ buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
num_entries);
num_entries = 0;
- mutex_enter(&buf_pool->mutex);
+ buf_pool_mutex_enter();
} else {
- mutex_exit(&block->mutex);
+ mutex_exit(block_mutex);
}
next_page:
- /* Note that we may have released the buf_pool->mutex
- above after reading the prev_block during processing
+ /* Note that we may have released the buf_pool mutex
+ above after reading the prev_bpage during processing
of a page_hash_batch (i.e.: when the array was full).
- This means that prev_block can change in LRU list.
+ This means that prev_bpage can change in LRU list.
This is OK because this function is a 'best effort'
to drop as many search hash entries as possible and
it does not guarantee that ALL such entries will be
dropped. */
- block = prev_block;
+ bpage = prev_bpage;
- /* If, however, block has been removed from LRU list
+ /* If, however, bpage has been removed from LRU list
to the free list then we should restart the scan.
- block->state is protected by buf_pool->mutex. */
- if (block && block->state != BUF_BLOCK_FILE_PAGE) {
+ bpage->state is protected by buf_pool mutex. */
+ if (bpage && !buf_page_in_file(bpage)) {
ut_a(num_entries == 0);
goto scan_again;
}
}
- mutex_exit(&buf_pool->mutex);
+ buf_pool_mutex_exit();
/* Drop any remaining batch of search hashed pages. */
- buf_LRU_drop_page_hash_batch(id, page_arr, num_entries);
+ buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
ut_free(page_arr);
}
-/**********************************************************************
+/******************************************************************//**
Invalidates all pages belonging to a given tablespace when we are deleting
the data file(s) of that tablespace. */
-
+UNIV_INTERN
void
buf_LRU_invalidate_tablespace(
/*==========================*/
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- buf_block_t* block;
- ulint page_no;
+ buf_page_t* bpage;
ibool all_freed;
/* Before we attempt to drop pages one by one we first
@@ -206,77 +343,96 @@ buf_LRU_invalidate_tablespace(
buf_LRU_drop_page_hash_for_tablespace(id);
scan_again:
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
all_freed = TRUE;
- block = UT_LIST_GET_LAST(buf_pool->LRU);
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- while (block != NULL) {
- buf_block_t* prev_block;
+ while (bpage != NULL) {
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ buf_page_t* prev_bpage;
- mutex_enter(&block->mutex);
- prev_block = UT_LIST_GET_PREV(LRU, block);
+ ut_a(buf_page_in_file(bpage));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ mutex_enter(block_mutex);
+ prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
- if (block->space == id
- && (block->buf_fix_count > 0 || block->io_fix != 0)) {
+ if (buf_page_get_space(bpage) == id) {
+ if (bpage->buf_fix_count > 0
+ || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
- /* We cannot remove this page during this scan yet;
- maybe the system is currently reading it in, or
- flushing the modifications to the file */
+ /* We cannot remove this page during
+ this scan yet; maybe the system is
+ currently reading it in, or flushing
+ the modifications to the file */
- all_freed = FALSE;
+ all_freed = FALSE;
- goto next_page;
- }
+ goto next_page;
+ }
- if (block->space == id) {
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Dropping space %lu page %lu\n",
- (ulong) block->space,
- (ulong) block->offset);
+ (ulong) buf_page_get_space(bpage),
+ (ulong) buf_page_get_page_no(bpage));
}
#endif
- if (block->is_hashed) {
- page_no = block->offset;
+ if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE
+ && ((buf_block_t*) bpage)->is_hashed) {
+ ulint page_no;
+ ulint zip_size;
+
+ buf_pool_mutex_exit();
- mutex_exit(&block->mutex);
+ zip_size = buf_page_get_zip_size(bpage);
+ page_no = buf_page_get_page_no(bpage);
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(block_mutex);
/* Note that the following call will acquire
an S-latch on the page */
- btr_search_drop_page_hash_when_freed(id,
- page_no);
+ btr_search_drop_page_hash_when_freed(
+ id, zip_size, page_no);
goto scan_again;
}
- if (0 != ut_dulint_cmp(block->oldest_modification,
- ut_dulint_zero)) {
+ if (bpage->oldest_modification != 0) {
- /* Remove from the flush list of modified
- blocks */
- block->oldest_modification = ut_dulint_zero;
-
- UT_LIST_REMOVE(flush_list,
- buf_pool->flush_list, block);
+ buf_flush_remove(bpage);
}
/* Remove from the LRU list */
- buf_LRU_block_remove_hashed_page(block);
- buf_LRU_block_free_hashed_page(block);
+ if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+ != BUF_BLOCK_ZIP_FREE) {
+ buf_LRU_block_free_hashed_page((buf_block_t*)
+ bpage);
+ } else {
+ /* The block_mutex should have been
+ released by buf_LRU_block_remove_hashed_page()
+ when it returns BUF_BLOCK_ZIP_FREE. */
+ ut_ad(block_mutex == &buf_pool_zip_mutex);
+ ut_ad(!mutex_own(block_mutex));
+
+ /* The compressed block descriptor
+ (bpage) has been deallocated and
+ block_mutex released. Also,
+ buf_buddy_free() may have relocated
+ prev_bpage. Rescan the LRU list. */
+
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ continue;
+ }
}
next_page:
- mutex_exit(&block->mutex);
- block = prev_block;
+ mutex_exit(block_mutex);
+ bpage = prev_bpage;
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
if (!all_freed) {
os_thread_sleep(20000);
@@ -285,142 +441,215 @@ next_page:
}
}
-/**********************************************************************
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around. */
+/********************************************************************//**
+Insert a compressed block into buf_pool->zip_clean in the LRU order. */
+UNIV_INTERN
+void
+buf_LRU_insert_zip_clean(
+/*=====================*/
+ buf_page_t* bpage) /*!< in: pointer to the block in question */
+{
+ buf_page_t* b;
+
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
-ulint
-buf_LRU_get_recent_limit(void)
-/*==========================*/
- /* out: the limit; zero if could not determine it */
+ /* Find the first successor of bpage in the LRU list
+ that is in the zip_clean list. */
+ b = bpage;
+ do {
+ b = UT_LIST_GET_NEXT(LRU, b);
+ } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
+
+ /* Insert bpage before b, i.e., after the predecessor of b. */
+ if (b) {
+ b = UT_LIST_GET_PREV(list, b);
+ }
+
+ if (b) {
+ UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
+ } else {
+ UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
+ }
+}
+
+/******************************************************************//**
+Try to free an uncompressed page of a compressed block from the unzip
+LRU list. The compressed page is preserved, and it need not be clean.
+@return TRUE if freed */
+UNIV_INLINE
+ibool
+buf_LRU_free_from_unzip_LRU_list(
+/*=============================*/
+ ulint n_iterations) /*!< in: how many times this has been called
+ repeatedly without result: a high value means
+ that we should search farther; we will search
+ n_iterations / 5 of the unzip_LRU list,
+ or nothing if n_iterations >= 5 */
{
buf_block_t* block;
- ulint len;
- ulint limit;
+ ulint distance;
- mutex_enter(&(buf_pool->mutex));
+ ut_ad(buf_pool_mutex_own());
- len = UT_LIST_GET_LEN(buf_pool->LRU);
+ /* Theoratically it should be much easier to find a victim
+ from unzip_LRU as we can choose even a dirty block (as we'll
+ be evicting only the uncompressed frame). In a very unlikely
+ eventuality that we are unable to find a victim from
+ unzip_LRU, we fall back to the regular LRU list. We do this
+ if we have done five iterations so far. */
- if (len < BUF_LRU_OLD_MIN_LEN) {
- /* The LRU list is too short to do read-ahead */
+ if (UNIV_UNLIKELY(n_iterations >= 5)
+ || !buf_LRU_evict_from_unzip_LRU()) {
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
+ return(FALSE);
}
- block = UT_LIST_GET_FIRST(buf_pool->LRU);
+ distance = 100 + (n_iterations
+ * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
- limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
+ for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+ UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
+ block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
- mutex_exit(&(buf_pool->mutex));
+ enum buf_lru_free_block_status freed;
- return(limit);
-}
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->in_unzip_LRU_list);
+ ut_ad(block->page.in_LRU_list);
-/**********************************************************************
-Look for a replaceable block from the end of the LRU list and put it to
-the free list if found. */
+ mutex_enter(&block->mutex);
+ freed = buf_LRU_free_block(&block->page, FALSE, NULL);
+ mutex_exit(&block->mutex);
+
+ switch (freed) {
+ case BUF_LRU_FREED:
+ return(TRUE);
+
+ case BUF_LRU_CANNOT_RELOCATE:
+ /* If we failed to relocate, try
+ regular LRU eviction. */
+ return(FALSE);
+
+ case BUF_LRU_NOT_FREED:
+ /* The block was buffer-fixed or I/O-fixed.
+ Keep looking. */
+ continue;
+ }
+
+ /* inappropriate return value from
+ buf_LRU_free_block() */
+ ut_error;
+ }
+ return(FALSE);
+}
+
+/******************************************************************//**
+Try to free a clean page from the common LRU list.
+@return TRUE if freed */
+UNIV_INLINE
ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
- /* out: TRUE if freed */
- ulint n_iterations) /* in: how many times this has been called
+buf_LRU_free_from_common_LRU_list(
+/*==============================*/
+ ulint n_iterations) /*!< in: how many times this has been called
repeatedly without result: a high value means
- that we should search farther; if value is
- k < 10, then we only search k/10 * [number
- of pages in the buffer pool] from the end
- of the LRU list */
+ that we should search farther; if
+ n_iterations < 10, then we search
+ n_iterations / 10 * buf_pool->curr_size
+ pages from the end of the LRU list */
{
- buf_block_t* block;
- ulint distance = 0;
- ibool freed;
+ buf_page_t* bpage;
+ ulint distance;
- mutex_enter(&(buf_pool->mutex));
+ ut_ad(buf_pool_mutex_own());
- freed = FALSE;
- block = UT_LIST_GET_LAST(buf_pool->LRU);
+ distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
- while (block != NULL) {
- ut_a(block->in_LRU_list);
+ for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
+ bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
- mutex_enter(&block->mutex);
+ enum buf_lru_free_block_status freed;
+ unsigned accessed;
+ mutex_t* block_mutex
+ = buf_page_get_mutex(bpage);
- if (buf_flush_ready_for_replace(block)) {
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(bpage->in_LRU_list);
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Putting space %lu page %lu"
- " to free list\n",
- (ulong) block->space,
- (ulong) block->offset);
- }
-#endif /* UNIV_DEBUG */
+ mutex_enter(block_mutex);
+ accessed = buf_page_is_accessed(bpage);
+ freed = buf_LRU_free_block(bpage, TRUE, NULL);
+ mutex_exit(block_mutex);
- buf_LRU_block_remove_hashed_page(block);
-
- mutex_exit(&(buf_pool->mutex));
- mutex_exit(&block->mutex);
-
- /* Remove possible adaptive hash index built on the
- page; in the case of AWE the block may not have a
- frame at all */
-
- if (block->frame) {
- /* The page was declared uninitialized
- by buf_LRU_block_remove_hashed_page().
- We need to flag the contents of the
- page valid (which it still is) in
- order to avoid bogus Valgrind
- warnings. */
- UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
- btr_search_drop_page_hash_index(block->frame);
- UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
+ switch (freed) {
+ case BUF_LRU_FREED:
+ /* Keep track of pages that are evicted without
+ ever being accessed. This gives us a measure of
+ the effectiveness of readahead */
+ if (!accessed) {
+ ++buf_pool->stat.n_ra_pages_evicted;
}
+ return(TRUE);
- ut_a(block->buf_fix_count == 0);
-
- mutex_enter(&(buf_pool->mutex));
- mutex_enter(&block->mutex);
-
- buf_LRU_block_free_hashed_page(block);
- freed = TRUE;
- mutex_exit(&block->mutex);
+ case BUF_LRU_NOT_FREED:
+ /* The block was dirty, buffer-fixed, or I/O-fixed.
+ Keep looking. */
+ continue;
+ case BUF_LRU_CANNOT_RELOCATE:
+ /* This should never occur, because we
+ want to discard the compressed page too. */
break;
}
- mutex_exit(&block->mutex);
+ /* inappropriate return value from
+ buf_LRU_free_block() */
+ ut_error;
+ }
- block = UT_LIST_GET_PREV(LRU, block);
- distance++;
+ return(FALSE);
+}
- if (!freed && n_iterations <= 10
- && distance > 100 + (n_iterations * buf_pool->curr_size)
- / 10) {
- buf_pool->LRU_flush_ended = 0;
+/******************************************************************//**
+Try to free a replaceable block.
+@return TRUE if found and freed */
+UNIV_INTERN
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+ ulint n_iterations) /*!< in: how many times this has been called
+ repeatedly without result: a high value means
+ that we should search farther; if
+ n_iterations < 10, then we search
+ n_iterations / 10 * buf_pool->curr_size
+ pages from the end of the LRU list; if
+ n_iterations < 5, then we will also search
+ n_iterations / 5 of the unzip_LRU list. */
+{
+ ibool freed = FALSE;
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
- return(FALSE);
- }
- }
- if (buf_pool->LRU_flush_ended > 0) {
- buf_pool->LRU_flush_ended--;
+ freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
+
+ if (!freed) {
+ freed = buf_LRU_free_from_common_LRU_list(n_iterations);
}
+
if (!freed) {
buf_pool->LRU_flush_ended = 0;
+ } else if (buf_pool->LRU_flush_ended > 0) {
+ buf_pool->LRU_flush_ended--;
}
- mutex_exit(&(buf_pool->mutex));
+
+ buf_pool_mutex_exit();
return(freed);
}
-/**********************************************************************
+/******************************************************************//**
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
operation, as flushed pages from non-unique non-clustered indexes are here
@@ -428,62 +657,95 @@ taken out of the buffer pool, and their inserts redirected to the insert
buffer. Otherwise, the flushed blocks could get modified again before read
operations need new buffer blocks, and the i/o work done in flushing would be
wasted. */
-
+UNIV_INTERN
void
buf_LRU_try_free_flushed_blocks(void)
/*=================================*/
{
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
while (buf_pool->LRU_flush_ended > 0) {
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
buf_LRU_search_and_free_block(1);
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
}
-/**********************************************************************
+/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks. */
-
+pool for their locks.
+@return TRUE if less than 25 % of buffer pool left */
+UNIV_INTERN
ibool
buf_LRU_buf_pool_running_out(void)
/*==============================*/
- /* out: TRUE if less than 25 % of buffer pool
- left */
{
ibool ret = FALSE;
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) {
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
ret = TRUE;
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(ret);
}
-/**********************************************************************
-Returns a free block from buf_pool. The block is taken off the free list.
-If it is empty, blocks are moved from the end of the LRU list to the free
-list. */
+/******************************************************************//**
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, returns NULL.
+@return a free control block, or NULL if the buf_block->free list is empty */
+UNIV_INTERN
+buf_block_t*
+buf_LRU_get_free_only(void)
+/*=======================*/
+{
+ buf_block_t* block;
+
+ ut_ad(buf_pool_mutex_own());
+
+ block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
+
+ if (block) {
+ ut_ad(block->page.in_free_list);
+ ut_d(block->page.in_free_list = FALSE);
+ ut_ad(!block->page.in_flush_list);
+ ut_ad(!block->page.in_LRU_list);
+ ut_a(!buf_page_in_file(&block->page));
+ UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+
+ mutex_enter(&block->mutex);
+
+ buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+ UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+
+ mutex_exit(&block->mutex);
+ }
+
+ return(block);
+}
+/******************************************************************//**
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, blocks are moved from the end of the
+LRU list to the free list.
+@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
+UNIV_INTERN
buf_block_t*
-buf_LRU_get_free_block(void)
-/*========================*/
- /* out: the free control block; also if AWE is
- used, it is guaranteed that the block has its
- page mapped to a frame when we return */
+buf_LRU_get_free_block(
+/*===================*/
+ ulint zip_size) /*!< in: compressed page size in bytes,
+ or 0 if uncompressed tablespace */
{
buf_block_t* block = NULL;
ibool freed;
@@ -491,10 +753,10 @@ buf_LRU_get_free_block(void)
ibool mon_value_was = FALSE;
ibool started_monitor = FALSE;
loop:
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) {
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -514,12 +776,14 @@ loop:
ut_error;
- } else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) {
+ } else if (!recv_recovery_on
+ && (UT_LIST_GET_LEN(buf_pool->free)
+ + UT_LIST_GET_LEN(buf_pool->LRU))
+ < buf_pool->curr_size / 3) {
if (!buf_lru_switched_on_innodb_mon) {
- /* Over 67 % of the buffer pool is occupied by lock
+ /* Over 67 % of the buffer pool is occupied by lock
heaps or the adaptive hash index. This may be a memory
leak! */
@@ -556,39 +820,27 @@ loop:
}
/* If there is a block in the free list, take it */
- if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
-
- block = UT_LIST_GET_FIRST(buf_pool->free);
- ut_a(block->in_free_list);
- UT_LIST_REMOVE(free, buf_pool->free, block);
- block->in_free_list = FALSE;
- ut_a(block->state != BUF_BLOCK_FILE_PAGE);
- ut_a(!block->in_LRU_list);
-
- if (srv_use_awe) {
- if (block->frame) {
- /* Remove from the list of mapped pages */
-
- UT_LIST_REMOVE(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped,
- block);
- } else {
- /* We map the page to a frame; second param
- FALSE below because we do not want it to be
- added to the awe_LRU_free_mapped list */
+ block = buf_LRU_get_free_only();
+ if (block) {
- buf_awe_map_page_to_frame(block, FALSE);
- }
+#ifdef UNIV_DEBUG
+ block->page.zip.m_start =
+#endif /* UNIV_DEBUG */
+ block->page.zip.m_end =
+ block->page.zip.m_nonempty =
+ block->page.zip.n_blobs = 0;
+
+ if (UNIV_UNLIKELY(zip_size)) {
+ ibool lru;
+ page_zip_set_size(&block->page.zip, zip_size);
+ block->page.zip.data = buf_buddy_alloc(zip_size, &lru);
+ UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
+ } else {
+ page_zip_set_size(&block->page.zip, 0);
+ block->page.zip.data = NULL;
}
- mutex_enter(&block->mutex);
-
- block->state = BUF_BLOCK_READY_FOR_USE;
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-
- mutex_exit(&block->mutex);
-
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
if (started_monitor) {
srv_print_innodb_monitor = mon_value_was;
@@ -600,7 +852,7 @@ loop:
/* If no block was in the free list, search from the end of the LRU
list and try to free a block there */
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
freed = buf_LRU_search_and_free_block(n_iterations);
@@ -611,7 +863,7 @@ loop:
if (n_iterations > 30) {
ut_print_timestamp(stderr);
fprintf(stderr,
- "InnoDB: Warning: difficult to find free blocks from\n"
+ " InnoDB: Warning: difficult to find free blocks in\n"
"InnoDB: the buffer pool (%lu search iterations)!"
" Consider\n"
"InnoDB: increasing the buffer pool size.\n"
@@ -649,18 +901,18 @@ loop:
os_aio_simulated_wake_handler_threads();
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
if (buf_pool->LRU_flush_ended > 0) {
/* We have written pages in an LRU flush. To make the insert
buffer more efficient, we try to move these pages to the free
list. */
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
buf_LRU_try_free_flushed_blocks();
} else {
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
}
if (n_iterations > 10) {
@@ -673,7 +925,7 @@ loop:
goto loop;
}
-/***********************************************************************
+/*******************************************************************//**
Moves the LRU_old pointer so that the length of the old blocks list
is inside the allowed limits. */
UNIV_INLINE
@@ -685,39 +937,62 @@ buf_LRU_old_adjust_len(void)
ulint new_len;
ut_a(buf_pool->LRU_old);
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5);
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(buf_LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
+ ut_ad(buf_LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
+#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
+# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
+#endif
+#ifdef UNIV_LRU_DEBUG
+ /* buf_pool->LRU_old must be the first item in the LRU list
+ whose "old" flag is set. */
+ ut_a(buf_pool->LRU_old->old);
+ ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
+ || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
+ ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
+ || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
+#endif /* UNIV_LRU_DEBUG */
+
+ old_len = buf_pool->LRU_old_len;
+ new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
+ * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
+ UT_LIST_GET_LEN(buf_pool->LRU)
+ - (BUF_LRU_OLD_TOLERANCE
+ + BUF_LRU_NON_OLD_MIN_LEN));
for (;;) {
- old_len = buf_pool->LRU_old_len;
- new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+ buf_page_t* LRU_old = buf_pool->LRU_old;
- ut_a(buf_pool->LRU_old->in_LRU_list);
+ ut_a(LRU_old);
+ ut_ad(LRU_old->in_LRU_list);
+#ifdef UNIV_LRU_DEBUG
+ ut_a(LRU_old->old);
+#endif /* UNIV_LRU_DEBUG */
/* Update the LRU_old pointer if necessary */
- if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
+ if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
- buf_pool->LRU_old = UT_LIST_GET_PREV(
- LRU, buf_pool->LRU_old);
- (buf_pool->LRU_old)->old = TRUE;
- buf_pool->LRU_old_len++;
+ buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV(
+ LRU, LRU_old);
+#ifdef UNIV_LRU_DEBUG
+ ut_a(!LRU_old->old);
+#endif /* UNIV_LRU_DEBUG */
+ old_len = ++buf_pool->LRU_old_len;
+ buf_page_set_old(LRU_old, TRUE);
} else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
- (buf_pool->LRU_old)->old = FALSE;
- buf_pool->LRU_old = UT_LIST_GET_NEXT(
- LRU, buf_pool->LRU_old);
- buf_pool->LRU_old_len--;
+ buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
+ old_len = --buf_pool->LRU_old_len;
+ buf_page_set_old(LRU_old, FALSE);
} else {
- ut_a(buf_pool->LRU_old); /* Check that we did not
- fall out of the LRU list */
return;
}
}
}
-/***********************************************************************
+/*******************************************************************//**
Initializes the old blocks pointer in the LRU list. This function should be
called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
static
@@ -725,22 +1000,22 @@ void
buf_LRU_old_init(void)
/*==================*/
{
- buf_block_t* block;
+ buf_page_t* bpage;
- ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(buf_pool_mutex_own());
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
/* We first initialize all blocks in the LRU list as old and then use
the adjust function to move the LRU_old pointer to the right
position */
- block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- while (block != NULL) {
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(block->in_LRU_list);
- block->old = TRUE;
- block = UT_LIST_GET_NEXT(LRU, block);
+ for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL;
+ bpage = UT_LIST_GET_PREV(LRU, bpage)) {
+ ut_ad(bpage->in_LRU_list);
+ ut_ad(buf_page_in_file(bpage));
+ /* This loop temporarily violates the
+ assertions of buf_page_set_old(). */
+ bpage->old = TRUE;
}
buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
@@ -749,52 +1024,86 @@ buf_LRU_old_init(void)
buf_LRU_old_adjust_len();
}
-/**********************************************************************
+/******************************************************************//**
+Remove a block from the unzip_LRU list if it belonged to the list. */
+static
+void
+buf_unzip_LRU_remove_block_if_needed(
+/*=================================*/
+ buf_page_t* bpage) /*!< in/out: control block */
+{
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(buf_pool_mutex_own());
+
+ if (buf_page_belongs_to_unzip_LRU(bpage)) {
+ buf_block_t* block = (buf_block_t*) bpage;
+
+ ut_ad(block->in_unzip_LRU_list);
+ ut_d(block->in_unzip_LRU_list = FALSE);
+
+ UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+ }
+}
+
+/******************************************************************//**
Removes a block from the LRU list. */
UNIV_INLINE
void
buf_LRU_remove_block(
/*=================*/
- buf_block_t* block) /* in: control block */
+ buf_page_t* bpage) /*!< in: control block */
{
ut_ad(buf_pool);
- ut_ad(block);
- ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(bpage);
+ ut_ad(buf_pool_mutex_own());
+
+ ut_a(buf_page_in_file(bpage));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(block->in_LRU_list);
+ ut_ad(bpage->in_LRU_list);
/* If the LRU_old pointer is defined and points to just this block,
move it backward one step */
- if (block == buf_pool->LRU_old) {
+ if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
- /* Below: the previous block is guaranteed to exist, because
- the LRU_old pointer is only allowed to differ by the
- tolerance value from strict 3/8 of the LRU list length. */
+ /* Below: the previous block is guaranteed to exist,
+ because the LRU_old pointer is only allowed to differ
+ by BUF_LRU_OLD_TOLERANCE from strict
+ buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
+ list length. */
+ buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
- buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block);
- (buf_pool->LRU_old)->old = TRUE;
+ ut_a(prev_bpage);
+#ifdef UNIV_LRU_DEBUG
+ ut_a(!prev_bpage->old);
+#endif /* UNIV_LRU_DEBUG */
+ buf_pool->LRU_old = prev_bpage;
+ buf_page_set_old(prev_bpage, TRUE);
buf_pool->LRU_old_len++;
- ut_a(buf_pool->LRU_old);
}
/* Remove the block from the LRU list */
- UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
- block->in_LRU_list = FALSE;
+ UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+ ut_d(bpage->in_LRU_list = FALSE);
- if (srv_use_awe && block->frame) {
- /* Remove from the list of mapped pages */
-
- UT_LIST_REMOVE(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped, block);
- }
+ buf_unzip_LRU_remove_block_if_needed(bpage);
- /* If the LRU list is so short that LRU_old not defined, return */
+ /* If the LRU list is so short that LRU_old is not defined,
+ clear the "old" flags and return */
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
+ for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL;
+ bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
+ /* This loop temporarily violates the
+ assertions of buf_page_set_old(). */
+ bpage->old = FALSE;
+ }
+
buf_pool->LRU_old = NULL;
+ buf_pool->LRU_old_len = 0;
return;
}
@@ -802,7 +1111,7 @@ buf_LRU_remove_block(
ut_ad(buf_pool->LRU_old);
/* Update the LRU_old_len field if necessary */
- if (block->old) {
+ if (buf_page_is_old(bpage)) {
buf_pool->LRU_old_len--;
}
@@ -811,47 +1120,49 @@ buf_LRU_remove_block(
buf_LRU_old_adjust_len();
}
-/**********************************************************************
-Adds a block to the LRU list end. */
-UNIV_INLINE
+/******************************************************************//**
+Adds a block to the LRU list of decompressed zip pages. */
+UNIV_INTERN
void
-buf_LRU_add_block_to_end_low(
-/*=========================*/
- buf_block_t* block) /* in: control block */
+buf_unzip_LRU_add_block(
+/*====================*/
+ buf_block_t* block, /*!< in: control block */
+ ibool old) /*!< in: TRUE if should be put to the end
+ of the list, else put to the start */
{
- buf_block_t* last_block;
-
ut_ad(buf_pool);
ut_ad(block);
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(buf_pool_mutex_own());
- block->old = TRUE;
+ ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
- last_block = UT_LIST_GET_LAST(buf_pool->LRU);
+ ut_ad(!block->in_unzip_LRU_list);
+ ut_d(block->in_unzip_LRU_list = TRUE);
- if (last_block) {
- block->LRU_position = last_block->LRU_position;
+ if (old) {
+ UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
} else {
- block->LRU_position = buf_pool_clock_tic();
+ UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
}
+}
- ut_a(!block->in_LRU_list);
- UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
- block->in_LRU_list = TRUE;
-
- if (srv_use_awe && block->frame) {
- /* Add to the list of mapped pages */
-
- UT_LIST_ADD_LAST(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped, block);
- }
+/******************************************************************//**
+Adds a block to the LRU list end. */
+UNIV_INLINE
+void
+buf_LRU_add_block_to_end_low(
+/*=========================*/
+ buf_page_t* bpage) /*!< in: control block */
+{
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+ ut_ad(buf_pool_mutex_own());
- if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+ ut_a(buf_page_in_file(bpage));
- buf_pool->LRU_old_len++;
- }
+ ut_ad(!bpage->in_LRU_list);
+ UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
+ ut_d(bpage->in_LRU_list = TRUE);
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
@@ -859,6 +1170,8 @@ buf_LRU_add_block_to_end_low(
/* Adjust the length of the old block list if necessary */
+ buf_page_set_old(bpage, TRUE);
+ buf_pool->LRU_old_len++;
buf_LRU_old_adjust_len();
} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
@@ -867,60 +1180,57 @@ buf_LRU_add_block_to_end_low(
defined: init it */
buf_LRU_old_init();
+ } else {
+ buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
+ }
+
+ /* If this is a zipped block with decompressed frame as well
+ then put it on the unzip_LRU list */
+ if (buf_page_belongs_to_unzip_LRU(bpage)) {
+ buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE);
}
}
-/**********************************************************************
+/******************************************************************//**
Adds a block to the LRU list. */
UNIV_INLINE
void
buf_LRU_add_block_low(
/*==================*/
- buf_block_t* block, /* in: control block */
- ibool old) /* in: TRUE if should be put to the old blocks
+ buf_page_t* bpage, /*!< in: control block */
+ ibool old) /*!< in: TRUE if should be put to the old blocks
in the LRU list, else put to the start; if the
LRU list is very short, the block is added to
the start, regardless of this parameter */
{
- ulint cl;
-
ut_ad(buf_pool);
- ut_ad(block);
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(!block->in_LRU_list);
-
- block->old = old;
- cl = buf_pool_clock_tic();
+ ut_ad(bpage);
+ ut_ad(buf_pool_mutex_own());
- if (srv_use_awe && block->frame) {
- /* Add to the list of mapped pages; for simplicity we always
- add to the start, even if the user would have set 'old'
- TRUE */
-
- UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped, block);
- }
+ ut_a(buf_page_in_file(bpage));
+ ut_ad(!bpage->in_LRU_list);
if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
- UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
+ UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
- block->LRU_position = cl;
- block->freed_page_clock = buf_pool->freed_page_clock;
+ bpage->freed_page_clock = buf_pool->freed_page_clock;
} else {
+#ifdef UNIV_LRU_DEBUG
+ /* buf_pool->LRU_old must be the first item in the LRU list
+ whose "old" flag is set. */
+ ut_a(buf_pool->LRU_old->old);
+ ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
+ || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
+ ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
+ || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
+#endif /* UNIV_LRU_DEBUG */
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
- block);
+ bpage);
buf_pool->LRU_old_len++;
-
- /* We copy the LRU position field of the previous block
- to the new block */
-
- block->LRU_position = (buf_pool->LRU_old)->LRU_position;
}
- block->in_LRU_list = TRUE;
+ ut_d(bpage->in_LRU_list = TRUE);
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
@@ -928,6 +1238,7 @@ buf_LRU_add_block_low(
/* Adjust the length of the old block list if necessary */
+ buf_page_set_old(bpage, old);
buf_LRU_old_adjust_len();
} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
@@ -936,302 +1247,846 @@ buf_LRU_add_block_low(
defined: init it */
buf_LRU_old_init();
+ } else {
+ buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
+ }
+
+ /* If this is a zipped block with decompressed frame as well
+ then put it on the unzip_LRU list */
+ if (buf_page_belongs_to_unzip_LRU(bpage)) {
+ buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
}
}
-/**********************************************************************
+/******************************************************************//**
Adds a block to the LRU list. */
-
+UNIV_INTERN
void
buf_LRU_add_block(
/*==============*/
- buf_block_t* block, /* in: control block */
- ibool old) /* in: TRUE if should be put to the old
+ buf_page_t* bpage, /*!< in: control block */
+ ibool old) /*!< in: TRUE if should be put to the old
blocks in the LRU list, else put to the start;
if the LRU list is very short, the block is
added to the start, regardless of this
parameter */
{
- buf_LRU_add_block_low(block, old);
+ buf_LRU_add_block_low(bpage, old);
}
-/**********************************************************************
+/******************************************************************//**
Moves a block to the start of the LRU list. */
-
+UNIV_INTERN
void
buf_LRU_make_block_young(
/*=====================*/
- buf_block_t* block) /* in: control block */
+ buf_page_t* bpage) /*!< in: control block */
{
- buf_LRU_remove_block(block);
- buf_LRU_add_block_low(block, FALSE);
+ ut_ad(buf_pool_mutex_own());
+
+ if (bpage->old) {
+ buf_pool->stat.n_pages_made_young++;
+ }
+
+ buf_LRU_remove_block(bpage);
+ buf_LRU_add_block_low(bpage, FALSE);
}
-/**********************************************************************
+/******************************************************************//**
Moves a block to the end of the LRU list. */
-
+UNIV_INTERN
void
buf_LRU_make_block_old(
/*===================*/
- buf_block_t* block) /* in: control block */
+ buf_page_t* bpage) /*!< in: control block */
{
- buf_LRU_remove_block(block);
- buf_LRU_add_block_to_end_low(block);
+ buf_LRU_remove_block(bpage);
+ buf_LRU_add_block_to_end_low(bpage);
}
-/**********************************************************************
-Puts a block back to the free list. */
+/******************************************************************//**
+Try to free a block. If bpage is a descriptor of a compressed-only
+page, the descriptor object will be freed as well.
+
+NOTE: If this function returns BUF_LRU_FREED, it will not temporarily
+release buf_pool_mutex. Furthermore, the page frame will no longer be
+accessible via bpage.
+
+The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
+release these two mutexes after the call. No other
+buf_page_get_mutex() may be held when calling this function.
+@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
+BUF_LRU_NOT_FREED otherwise. */
+UNIV_INTERN
+enum buf_lru_free_block_status
+buf_LRU_free_block(
+/*===============*/
+ buf_page_t* bpage, /*!< in: block to be freed */
+ ibool zip, /*!< in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+ ibool* buf_pool_mutex_released)
+ /*!< in: pointer to a variable that will
+ be assigned TRUE if buf_pool_mutex
+ was temporarily released, or NULL */
+{
+ buf_page_t* b = NULL;
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(block_mutex));
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(bpage->in_LRU_list);
+ ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+
+ if (!buf_page_can_relocate(bpage)) {
+
+ /* Do not free buffer-fixed or I/O-fixed blocks. */
+ return(BUF_LRU_NOT_FREED);
+ }
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
+ if (zip || !bpage->zip.data) {
+ /* This would completely free the block. */
+ /* Do not completely free dirty blocks. */
+
+ if (bpage->oldest_modification) {
+ return(BUF_LRU_NOT_FREED);
+ }
+ } else if (bpage->oldest_modification) {
+ /* Do not completely free dirty blocks. */
+
+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+ ut_ad(buf_page_get_state(bpage)
+ == BUF_BLOCK_ZIP_DIRTY);
+ return(BUF_LRU_NOT_FREED);
+ }
+
+ goto alloc;
+ } else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
+ /* Allocate the control block for the compressed page.
+ If it cannot be allocated (without freeing a block
+ from the LRU list), refuse to free bpage. */
+alloc:
+ buf_pool_mutex_exit_forbid();
+ b = buf_buddy_alloc(sizeof *b, NULL);
+ buf_pool_mutex_exit_allow();
+
+ if (UNIV_UNLIKELY(!b)) {
+ return(BUF_LRU_CANNOT_RELOCATE);
+ }
+
+ memcpy(b, bpage, sizeof *b);
+ }
+
+#ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+ fprintf(stderr, "Putting space %lu page %lu to free list\n",
+ (ulong) buf_page_get_space(bpage),
+ (ulong) buf_page_get_page_no(bpage));
+ }
+#endif /* UNIV_DEBUG */
+
+ if (buf_LRU_block_remove_hashed_page(bpage, zip)
+ != BUF_BLOCK_ZIP_FREE) {
+ ut_a(bpage->buf_fix_count == 0);
+
+ if (b) {
+ buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
+ const ulint fold = buf_page_address_fold(
+ bpage->space, bpage->offset);
+
+ ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
+
+ b->state = b->oldest_modification
+ ? BUF_BLOCK_ZIP_DIRTY
+ : BUF_BLOCK_ZIP_PAGE;
+ UNIV_MEM_DESC(b->zip.data,
+ page_zip_get_size(&b->zip), b);
+
+ /* The fields in_page_hash and in_LRU_list of
+ the to-be-freed block descriptor should have
+ been cleared in
+ buf_LRU_block_remove_hashed_page(), which
+ invokes buf_LRU_remove_block(). */
+ ut_ad(!bpage->in_page_hash);
+ ut_ad(!bpage->in_LRU_list);
+ /* bpage->state was BUF_BLOCK_FILE_PAGE because
+ b != NULL. The type cast below is thus valid. */
+ ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
+
+ /* The fields of bpage were copied to b before
+ buf_LRU_block_remove_hashed_page() was invoked. */
+ ut_ad(!b->in_zip_hash);
+ ut_ad(b->in_page_hash);
+ ut_ad(b->in_LRU_list);
+
+ HASH_INSERT(buf_page_t, hash,
+ buf_pool->page_hash, fold, b);
+
+ /* Insert b where bpage was in the LRU list. */
+ if (UNIV_LIKELY(prev_b != NULL)) {
+ ulint lru_len;
+
+ ut_ad(prev_b->in_LRU_list);
+ ut_ad(buf_page_in_file(prev_b));
+ UNIV_MEM_ASSERT_RW(prev_b, sizeof *prev_b);
+
+ UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
+ prev_b, b);
+
+ if (buf_page_is_old(b)) {
+ buf_pool->LRU_old_len++;
+ if (UNIV_UNLIKELY
+ (buf_pool->LRU_old
+ == UT_LIST_GET_NEXT(LRU, b))) {
+
+ buf_pool->LRU_old = b;
+ }
+ }
+
+ lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+ if (lru_len > BUF_LRU_OLD_MIN_LEN) {
+ ut_ad(buf_pool->LRU_old);
+ /* Adjust the length of the
+ old block list if necessary */
+ buf_LRU_old_adjust_len();
+ } else if (lru_len == BUF_LRU_OLD_MIN_LEN) {
+ /* The LRU list is now long
+ enough for LRU_old to become
+ defined: init it */
+ buf_LRU_old_init();
+ }
+#ifdef UNIV_LRU_DEBUG
+ /* Check that the "old" flag is consistent
+ in the block and its neighbours. */
+ buf_page_set_old(b, buf_page_is_old(b));
+#endif /* UNIV_LRU_DEBUG */
+ } else {
+ ut_d(b->in_LRU_list = FALSE);
+ buf_LRU_add_block_low(b, buf_page_is_old(b));
+ }
+
+ if (b->state == BUF_BLOCK_ZIP_PAGE) {
+ buf_LRU_insert_zip_clean(b);
+ } else {
+ buf_page_t* prev;
+
+ ut_ad(b->in_flush_list);
+ ut_d(bpage->in_flush_list = FALSE);
+
+ prev = UT_LIST_GET_PREV(list, b);
+ UT_LIST_REMOVE(list, buf_pool->flush_list, b);
+
+ if (prev) {
+ ut_ad(prev->in_flush_list);
+ UT_LIST_INSERT_AFTER(
+ list,
+ buf_pool->flush_list,
+ prev, b);
+ } else {
+ UT_LIST_ADD_FIRST(
+ list,
+ buf_pool->flush_list,
+ b);
+ }
+ }
+
+ bpage->zip.data = NULL;
+ page_zip_set_size(&bpage->zip, 0);
+
+ /* Prevent buf_page_get_gen() from
+ decompressing the block while we release
+ buf_pool_mutex and block_mutex. */
+ b->buf_fix_count++;
+ b->io_fix = BUF_IO_READ;
+ }
+
+ if (buf_pool_mutex_released) {
+ *buf_pool_mutex_released = TRUE;
+ }
+
+ buf_pool_mutex_exit();
+ mutex_exit(block_mutex);
+
+ /* Remove possible adaptive hash index on the page.
+ The page was declared uninitialized by
+ buf_LRU_block_remove_hashed_page(). We need to flag
+ the contents of the page valid (which it still is) in
+ order to avoid bogus Valgrind warnings.*/
+
+ UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
+ UNIV_PAGE_SIZE);
+ btr_search_drop_page_hash_index((buf_block_t*) bpage);
+ UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
+ UNIV_PAGE_SIZE);
+
+ if (b) {
+ /* Compute and stamp the compressed page
+ checksum while not holding any mutex. The
+ block is already half-freed
+ (BUF_BLOCK_REMOVE_HASH) and removed from
+ buf_pool->page_hash, thus inaccessible by any
+ other thread. */
+
+ mach_write_to_4(
+ b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM,
+ UNIV_LIKELY(srv_use_checksums)
+ ? page_zip_calc_checksum(
+ b->zip.data,
+ page_zip_get_size(&b->zip))
+ : BUF_NO_CHECKSUM_MAGIC);
+ }
+
+ buf_pool_mutex_enter();
+ mutex_enter(block_mutex);
+
+ if (b) {
+ mutex_enter(&buf_pool_zip_mutex);
+ b->buf_fix_count--;
+ buf_page_set_io_fix(b, BUF_IO_NONE);
+ mutex_exit(&buf_pool_zip_mutex);
+ }
+
+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+ } else {
+ /* The block_mutex should have been released by
+ buf_LRU_block_remove_hashed_page() when it returns
+ BUF_BLOCK_ZIP_FREE. */
+ ut_ad(block_mutex == &buf_pool_zip_mutex);
+ mutex_enter(block_mutex);
+ }
+
+ return(BUF_LRU_FREED);
+}
+/******************************************************************//**
+Puts a block back to the free list. */
+UNIV_INTERN
void
buf_LRU_block_free_non_file_page(
/*=============================*/
- buf_block_t* block) /* in: block, must not contain a file page */
+ buf_block_t* block) /*!< in: block, must not contain a file page */
{
+ void* data;
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&block->mutex));
ut_ad(block);
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(&block->mutex));
- ut_a((block->state == BUF_BLOCK_MEMORY)
- || (block->state == BUF_BLOCK_READY_FOR_USE));
+ switch (buf_block_get_state(block)) {
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_READY_FOR_USE:
+ break;
+ default:
+ ut_error;
+ }
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(block->n_pointers == 0);
- ut_a(!block->in_free_list);
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ ut_ad(!block->page.in_free_list);
+ ut_ad(!block->page.in_flush_list);
+ ut_ad(!block->page.in_LRU_list);
- block->state = BUF_BLOCK_NOT_USED;
+ buf_block_set_state(block, BUF_BLOCK_NOT_USED);
UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
#ifdef UNIV_DEBUG
/* Wipe contents of page to reveal possible stale pointers to it */
memset(block->frame, '\0', UNIV_PAGE_SIZE);
+#else
+ /* Wipe page_no and space_id */
+ memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
+ memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
#endif
- UT_LIST_ADD_FIRST(free, buf_pool->free, block);
- block->in_free_list = TRUE;
+ data = block->page.zip.data;
- UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
+ if (data) {
+ block->page.zip.data = NULL;
+ mutex_exit(&block->mutex);
+ buf_pool_mutex_exit_forbid();
+ buf_buddy_free(data, page_zip_get_size(&block->page.zip));
+ buf_pool_mutex_exit_allow();
+ mutex_enter(&block->mutex);
+ page_zip_set_size(&block->page.zip, 0);
+ }
- if (srv_use_awe && block->frame) {
- /* Add to the list of mapped pages */
+ UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
+ ut_d(block->page.in_free_list = TRUE);
- UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped, block);
- }
+ UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
}
-/**********************************************************************
-Takes a block out of the LRU list and page hash table and sets the block
-state to BUF_BLOCK_REMOVE_HASH. */
+/******************************************************************//**
+Takes a block out of the LRU list and page hash table.
+If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
+the object will be freed and buf_pool_zip_mutex will be released.
+
+If a compressed page or a compressed-only block descriptor is freed,
+other compressed pages or compressed-only block descriptors may be
+relocated.
+@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state
+was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */
static
-void
+enum buf_page_state
buf_LRU_block_remove_hashed_page(
/*=============================*/
- buf_block_t* block) /* in: block, must contain a file page and
+ buf_page_t* bpage, /*!< in: block, must contain a file page and
be in a state where it can be freed; there
may or may not be a hash index to the page */
+ ibool zip) /*!< in: TRUE if should remove also the
+ compressed page of an uncompressed page */
{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&block->mutex));
- ut_ad(block);
+ const buf_page_t* hashed_bpage;
+ ut_ad(bpage);
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(block->io_fix == 0);
- ut_a(block->buf_fix_count == 0);
- ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);
+ ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+ ut_a(bpage->buf_fix_count == 0);
- buf_LRU_remove_block(block);
+ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+
+ buf_LRU_remove_block(bpage);
buf_pool->freed_page_clock += 1;
- /* Note that if AWE is enabled the block may not have a frame at all */
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_FILE_PAGE:
+ UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t));
+ UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame,
+ UNIV_PAGE_SIZE);
+ buf_block_modify_clock_inc((buf_block_t*) bpage);
+ if (bpage->zip.data) {
+ const page_t* page = ((buf_block_t*) bpage)->frame;
+ const ulint zip_size
+ = page_zip_get_size(&bpage->zip);
+
+ ut_a(!zip || bpage->oldest_modification == 0);
+
+ switch (UNIV_EXPECT(fil_page_get_type(page),
+ FIL_PAGE_INDEX)) {
+ case FIL_PAGE_TYPE_ALLOCATED:
+ case FIL_PAGE_INODE:
+ case FIL_PAGE_IBUF_BITMAP:
+ case FIL_PAGE_TYPE_FSP_HDR:
+ case FIL_PAGE_TYPE_XDES:
+ /* These are essentially uncompressed pages. */
+ if (!zip) {
+ /* InnoDB writes the data to the
+ uncompressed page frame. Copy it
+ to the compressed page, which will
+ be preserved. */
+ memcpy(bpage->zip.data, page,
+ zip_size);
+ }
+ break;
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+ break;
+ case FIL_PAGE_INDEX:
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(&bpage->zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+ break;
+ default:
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: ERROR: The compressed page"
+ " to be evicted seems corrupt:", stderr);
+ ut_print_buf(stderr, page, zip_size);
+ fputs("\nInnoDB: Possibly older version"
+ " of the page:", stderr);
+ ut_print_buf(stderr, bpage->zip.data,
+ zip_size);
+ putc('\n', stderr);
+ ut_error;
+ }
+
+ break;
+ }
+ /* fall through */
+ case BUF_BLOCK_ZIP_PAGE:
+ ut_a(bpage->oldest_modification == 0);
+ UNIV_MEM_ASSERT_W(bpage->zip.data,
+ page_zip_get_size(&bpage->zip));
+ break;
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ }
- buf_block_modify_clock_inc(block);
+ hashed_bpage = buf_page_hash_get(bpage->space, bpage->offset);
- if (block != buf_page_hash_get(block->space, block->offset)) {
+ if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
fprintf(stderr,
"InnoDB: Error: page %lu %lu not found"
" in the hash table\n",
- (ulong) block->space,
- (ulong) block->offset);
- if (buf_page_hash_get(block->space, block->offset)) {
+ (ulong) bpage->space,
+ (ulong) bpage->offset);
+ if (hashed_bpage) {
fprintf(stderr,
"InnoDB: In hash table we find block"
" %p of %lu %lu which is not %p\n",
- (void*) buf_page_hash_get
- (block->space, block->offset),
- (ulong) buf_page_hash_get
- (block->space, block->offset)->space,
- (ulong) buf_page_hash_get
- (block->space, block->offset)->offset,
- (void*) block);
+ (const void*) hashed_bpage,
+ (ulong) hashed_bpage->space,
+ (ulong) hashed_bpage->offset,
+ (const void*) bpage);
}
-#ifdef UNIV_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ mutex_exit(buf_page_get_mutex(bpage));
+ buf_pool_mutex_exit();
buf_print();
buf_LRU_print();
buf_validate();
buf_LRU_validate();
-#endif
- ut_a(0);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+ ut_error;
}
- HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
- buf_page_address_fold(block->space, block->offset),
- block);
+ ut_ad(!bpage->in_zip_hash);
+ ut_ad(bpage->in_page_hash);
+ ut_d(bpage->in_page_hash = FALSE);
+ HASH_DELETE(buf_page_t, hash, buf_pool->page_hash,
+ buf_page_address_fold(bpage->space, bpage->offset),
+ bpage);
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_PAGE:
+ ut_ad(!bpage->in_free_list);
+ ut_ad(!bpage->in_flush_list);
+ ut_ad(!bpage->in_LRU_list);
+ ut_a(bpage->zip.data);
+ ut_a(buf_page_get_zip_size(bpage));
+
+ UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+
+ mutex_exit(&buf_pool_zip_mutex);
+ buf_pool_mutex_exit_forbid();
+ buf_buddy_free(bpage->zip.data,
+ page_zip_get_size(&bpage->zip));
+ buf_buddy_free(bpage, sizeof(*bpage));
+ buf_pool_mutex_exit_allow();
+ UNIV_MEM_UNDESC(bpage);
+ return(BUF_BLOCK_ZIP_FREE);
+
+ case BUF_BLOCK_FILE_PAGE:
+ memset(((buf_block_t*) bpage)->frame
+ + FIL_PAGE_OFFSET, 0xff, 4);
+ memset(((buf_block_t*) bpage)->frame
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
+ UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
+ UNIV_PAGE_SIZE);
+ buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
+
+ if (zip && bpage->zip.data) {
+ /* Free the compressed page. */
+ void* data = bpage->zip.data;
+ bpage->zip.data = NULL;
+
+ ut_ad(!bpage->in_free_list);
+ ut_ad(!bpage->in_flush_list);
+ ut_ad(!bpage->in_LRU_list);
+ mutex_exit(&((buf_block_t*) bpage)->mutex);
+ buf_pool_mutex_exit_forbid();
+ buf_buddy_free(data, page_zip_get_size(&bpage->zip));
+ buf_pool_mutex_exit_allow();
+ mutex_enter(&((buf_block_t*) bpage)->mutex);
+ page_zip_set_size(&bpage->zip, 0);
+ }
+
+ return(BUF_BLOCK_REMOVE_HASH);
- UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
- block->state = BUF_BLOCK_REMOVE_HASH;
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ break;
+ }
+
+ ut_error;
+ return(BUF_BLOCK_ZIP_FREE);
}
-/**********************************************************************
+/******************************************************************//**
Puts a file page whose has no hash index to the free list. */
static
void
buf_LRU_block_free_hashed_page(
/*===========================*/
- buf_block_t* block) /* in: block, must contain a file page and
+ buf_block_t* block) /*!< in: block, must contain a file page and
be in a state where it can be freed */
{
- ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(&block->mutex));
- ut_a(block->state == BUF_BLOCK_REMOVE_HASH);
-
- block->state = BUF_BLOCK_MEMORY;
+ buf_block_set_state(block, BUF_BLOCK_MEMORY);
buf_LRU_block_free_non_file_page(block);
}
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Validates the LRU list. */
+/**********************************************************************//**
+Updates buf_LRU_old_ratio.
+@return updated old_pct */
+UNIV_INTERN
+uint
+buf_LRU_old_ratio_update(
+/*=====================*/
+ uint old_pct,/*!< in: Reserve this percentage of
+ the buffer pool for "old" blocks. */
+ ibool adjust) /*!< in: TRUE=adjust the LRU list;
+ FALSE=just assign buf_LRU_old_ratio
+ during the initialization of InnoDB */
+{
+ uint ratio;
+
+ ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
+ if (ratio < BUF_LRU_OLD_RATIO_MIN) {
+ ratio = BUF_LRU_OLD_RATIO_MIN;
+ } else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
+ ratio = BUF_LRU_OLD_RATIO_MAX;
+ }
+
+ if (adjust) {
+ buf_pool_mutex_enter();
+
+ if (ratio != buf_LRU_old_ratio) {
+ buf_LRU_old_ratio = ratio;
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU)
+ >= BUF_LRU_OLD_MIN_LEN) {
+ buf_LRU_old_adjust_len();
+ }
+ }
+ buf_pool_mutex_exit();
+ } else {
+ buf_LRU_old_ratio = ratio;
+ }
+
+ /* the reverse of
+ ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */
+ return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5));
+}
+
+/********************************************************************//**
+Update the historical stats that we are collecting for LRU eviction
+policy at the end of each interval. */
+UNIV_INTERN
+void
+buf_LRU_stat_update(void)
+/*=====================*/
+{
+ buf_LRU_stat_t* item;
+
+ /* If we haven't started eviction yet then don't update stats. */
+ if (buf_pool->freed_page_clock == 0) {
+ goto func_exit;
+ }
+
+ buf_pool_mutex_enter();
+
+ /* Update the index. */
+ item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
+ buf_LRU_stat_arr_ind++;
+ buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
+
+ /* Add the current value and subtract the obsolete entry. */
+ buf_LRU_stat_sum.io += buf_LRU_stat_cur.io - item->io;
+ buf_LRU_stat_sum.unzip += buf_LRU_stat_cur.unzip - item->unzip;
+
+ /* Put current entry in the array. */
+ memcpy(item, &buf_LRU_stat_cur, sizeof *item);
+
+ buf_pool_mutex_exit();
+
+func_exit:
+ /* Clear the current entry. */
+ memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
+}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Validates the LRU list.
+@return TRUE */
+UNIV_INTERN
ibool
buf_LRU_validate(void)
/*==================*/
{
+ buf_page_t* bpage;
buf_block_t* block;
ulint old_len;
ulint new_len;
- ulint LRU_pos;
ut_ad(buf_pool);
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
ut_a(buf_pool->LRU_old);
old_len = buf_pool->LRU_old_len;
- new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+ new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
+ * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
+ UT_LIST_GET_LEN(buf_pool->LRU)
+ - (BUF_LRU_OLD_TOLERANCE
+ + BUF_LRU_NON_OLD_MIN_LEN));
ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
}
- UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU);
+ UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
+ ut_ad(ut_list_node_313->in_LRU_list));
- block = UT_LIST_GET_FIRST(buf_pool->LRU);
+ bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
old_len = 0;
- while (block != NULL) {
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- if (block->old) {
- old_len++;
- }
+ while (bpage != NULL) {
- if (buf_pool->LRU_old && (old_len == 1)) {
- ut_a(buf_pool->LRU_old == block);
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_error;
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list
+ == buf_page_belongs_to_unzip_LRU(bpage));
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ break;
}
- LRU_pos = block->LRU_position;
+ if (buf_page_is_old(bpage)) {
+ const buf_page_t* prev
+ = UT_LIST_GET_PREV(LRU, bpage);
+ const buf_page_t* next
+ = UT_LIST_GET_NEXT(LRU, bpage);
- block = UT_LIST_GET_NEXT(LRU, block);
+ if (!old_len++) {
+ ut_a(buf_pool->LRU_old == bpage);
+ } else {
+ ut_a(!prev || buf_page_is_old(prev));
+ }
- if (block) {
- /* If the following assert fails, it may
- not be an error: just the buf_pool clock
- has wrapped around */
- ut_a(LRU_pos >= block->LRU_position);
+ ut_a(!next || buf_page_is_old(next));
}
- }
- if (buf_pool->LRU_old) {
- ut_a(buf_pool->LRU_old_len == old_len);
+ bpage = UT_LIST_GET_NEXT(LRU, bpage);
}
- UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
+ ut_a(buf_pool->LRU_old_len == old_len);
+
+ UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
+ ut_ad(ut_list_node_313->in_free_list));
+
+ for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+ bpage != NULL;
+ bpage = UT_LIST_GET_NEXT(list, bpage)) {
+
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
+ }
- block = UT_LIST_GET_FIRST(buf_pool->free);
+ UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
+ ut_ad(ut_list_node_313->in_unzip_LRU_list
+ && ut_list_node_313->page.in_LRU_list));
- while (block != NULL) {
- ut_a(block->state == BUF_BLOCK_NOT_USED);
+ for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
+ block;
+ block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
- block = UT_LIST_GET_NEXT(free, block);
+ ut_ad(block->in_unzip_LRU_list);
+ ut_ad(block->page.in_LRU_list);
+ ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(TRUE);
}
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-/**************************************************************************
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
Prints the LRU list. */
-
+UNIV_INTERN
void
buf_LRU_print(void)
/*===============*/
{
- buf_block_t* block;
- buf_frame_t* frame;
- ulint len;
+ const buf_page_t* bpage;
ut_ad(buf_pool);
- mutex_enter(&(buf_pool->mutex));
-
- fprintf(stderr, "Pool ulint clock %lu\n",
- (ulong) buf_pool->ulint_clock);
+ buf_pool_mutex_enter();
- block = UT_LIST_GET_FIRST(buf_pool->LRU);
+ bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
- len = 0;
+ while (bpage != NULL) {
- while (block != NULL) {
+ fprintf(stderr, "BLOCK space %lu page %lu ",
+ (ulong) buf_page_get_space(bpage),
+ (ulong) buf_page_get_page_no(bpage));
- fprintf(stderr, "BLOCK %lu ", (ulong) block->offset);
-
- if (block->old) {
+ if (buf_page_is_old(bpage)) {
fputs("old ", stderr);
}
- if (block->buf_fix_count) {
+ if (bpage->buf_fix_count) {
fprintf(stderr, "buffix count %lu ",
- (ulong) block->buf_fix_count);
+ (ulong) bpage->buf_fix_count);
}
- if (block->io_fix) {
- fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix);
+ if (buf_page_get_io_fix(bpage)) {
+ fprintf(stderr, "io_fix %lu ",
+ (ulong) buf_page_get_io_fix(bpage));
}
- if (ut_dulint_cmp(block->oldest_modification,
- ut_dulint_zero) > 0) {
+ if (bpage->oldest_modification) {
fputs("modif. ", stderr);
}
- frame = buf_block_get_frame(block);
-
- fprintf(stderr, "LRU pos %lu type %lu index id %lu ",
- (ulong) block->LRU_position,
- (ulong) fil_page_get_type(frame),
- (ulong) ut_dulint_get_low
- (btr_page_get_index_id(frame)));
+ switch (buf_page_get_state(bpage)) {
+ const byte* frame;
+ case BUF_BLOCK_FILE_PAGE:
+ frame = buf_block_get_frame((buf_block_t*) bpage);
+ fprintf(stderr, "\ntype %lu"
+ " index id %lu\n",
+ (ulong) fil_page_get_type(frame),
+ (ulong) ut_dulint_get_low(
+ btr_page_get_index_id(frame)));
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ frame = bpage->zip.data;
+ fprintf(stderr, "\ntype %lu size %lu"
+ " index id %lu\n",
+ (ulong) fil_page_get_type(frame),
+ (ulong) buf_page_get_zip_size(bpage),
+ (ulong) ut_dulint_get_low(
+ btr_page_get_index_id(frame)));
+ break;
- block = UT_LIST_GET_NEXT(LRU, block);
- if (++len == 10) {
- len = 0;
- putc('\n', stderr);
+ default:
+ fprintf(stderr, "\n!state %lu!\n",
+ (ulong) buf_page_get_state(bpage));
+ break;
}
+
+ bpage = UT_LIST_GET_NEXT(LRU, bpage);
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
}
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
diff --git a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
index fdec0206990..dd98ea17eb5 100644
--- a/storage/innobase/buf/buf0rea.c
+++ b/storage/innobase/buf/buf0rea.c
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer read
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0rea.c
+The database buffer read
Created 11/5/1995 Heikki Tuuri
*******************************************************/
@@ -19,60 +36,48 @@ Created 11/5/1995 Heikki Tuuri
#include "trx0sys.h"
#include "os0file.h"
#include "srv0start.h"
+#include "srv0srv.h"
-extern ulint srv_read_ahead_rnd;
-extern ulint srv_read_ahead_seq;
-extern ulint srv_buf_pool_reads;
-
-/* The size in blocks of the area where the random read-ahead algorithm counts
-the accessed pages when deciding whether to read-ahead */
-#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
-
-/* There must be at least this many pages in buf_pool in the area to start
-a random read-ahead */
-#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
-
-/* The linear read-ahead area size */
+/** The linear read-ahead area size */
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
-/* The linear read-ahead threshold */
-#define BUF_READ_AHEAD_LINEAR_THRESHOLD (3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
-
-/* If there are buf_pool->curr_size per the number below pending reads, then
+/** If there are buf_pool->curr_size per the number below pending reads, then
read-ahead is not done: this is to prevent flooding the buffer pool with
i/o-fixed buffer blocks */
#define BUF_READ_AHEAD_PEND_LIMIT 2
-/************************************************************************
+/********************************************************************//**
Low-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
-flag is cleared and the x-lock released by an i/o-handler thread. */
+flag is cleared and the x-lock released by an i/o-handler thread.
+@return 1 if a read request was queued, 0 if the page already resided
+in buf_pool, or if the page is in the doublewrite buffer blocks in
+which case it is never read into the pool, or if the tablespace does
+not exist or is being dropped
+@return 1 if read request is issued. 0 if it is not */
static
ulint
buf_read_page_low(
/*==============*/
- /* out: 1 if a read request was queued, 0 if the page
- already resided in buf_pool, or if the page is in
- the doublewrite buffer blocks in which case it is never
- read into the pool, or if the tablespace does not
- exist or is being dropped */
- ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
+ ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
trying to read from a non-existent tablespace, or a
tablespace which is just now being dropped */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
+ ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
at read-ahead functions) */
- ulint space, /* in: space id */
- ib_longlong tablespace_version, /* in: if the space memory object has
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size, or 0 */
+ ibool unzip, /*!< in: TRUE=request uncompressed page */
+ ib_int64_t tablespace_version, /*!< in: if the space memory object has
this timestamp different from what we are giving here,
treat the tablespace as dropped; this is a timestamp we
use to stop dangling page reads from a tablespace
which we have DISCARDed + IMPORTed back */
- ulint offset) /* in: page number */
+ ulint offset) /*!< in: page number */
{
- buf_block_t* block;
+ buf_page_t* bpage;
ulint wake_later;
*err = DB_SUCCESS;
@@ -96,7 +101,8 @@ buf_read_page_low(
return(0);
}
- if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
+ if (ibuf_bitmap_page(zip_size, offset)
+ || trx_sys_hdr_page(space, offset)) {
/* Trx sys header is so low in the latching order that we play
safe and do not leave the i/o-completion to an asynchronous
@@ -111,9 +117,9 @@ buf_read_page_low(
or is being dropped; if we succeed in initing the page in the buffer
pool for read, then DISCARD cannot proceed until the read has
completed */
- block = buf_page_init_for_read(err, mode, space, tablespace_version,
- offset);
- if (block == NULL) {
+ bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
+ tablespace_version, offset);
+ if (bpage == NULL) {
return(0);
}
@@ -127,202 +133,57 @@ buf_read_page_low(
}
#endif
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_ad(buf_page_in_file(bpage));
- *err = fil_io(OS_FILE_READ | wake_later,
- sync, space,
- offset, 0, UNIV_PAGE_SIZE,
- (void*)block->frame, (void*)block);
+ if (zip_size) {
+ *err = fil_io(OS_FILE_READ | wake_later,
+ sync, space, zip_size, offset, 0, zip_size,
+ bpage->zip.data, bpage);
+ } else {
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+
+ *err = fil_io(OS_FILE_READ | wake_later,
+ sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
+ ((buf_block_t*) bpage)->frame, bpage);
+ }
ut_a(*err == DB_SUCCESS);
if (sync) {
/* The i/o is already completed when we arrive from
fil_read */
- buf_page_io_complete(block);
+ buf_page_io_complete(bpage);
}
return(1);
}
-/************************************************************************
-Applies a random read-ahead in buf_pool if there are at least a threshold
-value of accessed pages from the random read-ahead area. Does not read any
-page, not even the one at the position (space, offset), if the read-ahead
-mechanism is not activated. NOTE 1: the calling thread may own latches on
-pages: to avoid deadlocks this function must be written such that it cannot
-end up waiting for these latches! NOTE 2: the calling thread must want
-access to the page given: this rule is set to prevent unintended read-aheads
-performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o. */
-static
-ulint
-buf_read_ahead_random(
-/*==================*/
- /* out: number of page read requests issued; NOTE
- that if we read ibuf pages, it may happen that
- the page at the given page number does not get
- read even if we return a value > 0! */
- ulint space, /* in: space id */
- ulint offset) /* in: page number of a page which the current thread
- wants to access */
-{
- ib_longlong tablespace_version;
- buf_block_t* block;
- ulint recent_blocks = 0;
- ulint count;
- ulint LRU_recent_limit;
- ulint ibuf_mode;
- ulint low, high;
- ulint err;
- ulint i;
-
- if (srv_startup_is_before_trx_rollback_phase) {
- /* No read-ahead to avoid thread deadlocks */
- return(0);
- }
-
- if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
-
- /* If it is an ibuf bitmap page or trx sys hdr, we do
- no read-ahead, as that could break the ibuf page access
- order */
-
- return(0);
- }
-
- /* Remember the tablespace version before we ask te tablespace size
- below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
- do not try to read outside the bounds of the tablespace! */
-
- tablespace_version = fil_space_get_version(space);
-
- low = (offset / BUF_READ_AHEAD_RANDOM_AREA)
- * BUF_READ_AHEAD_RANDOM_AREA;
- high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
- * BUF_READ_AHEAD_RANDOM_AREA;
- if (high > fil_space_get_size(space)) {
-
- high = fil_space_get_size(space);
- }
-
- /* Get the minimum LRU_position field value for an initial segment
- of the LRU list, to determine which blocks have recently been added
- to the start of the list. */
-
- LRU_recent_limit = buf_LRU_get_recent_limit();
-
- mutex_enter(&(buf_pool->mutex));
-
- if (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
- }
-
- /* Count how many blocks in the area have been recently accessed,
- that is, reside near the start of the LRU list. */
-
- for (i = low; i < high; i++) {
- block = buf_page_hash_get(space, i);
-
- if ((block)
- && (block->LRU_position > LRU_recent_limit)
- && block->accessed) {
-
- recent_blocks++;
- }
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
- /* Do nothing */
-
- return(0);
- }
-
- /* Read all the suitable blocks within the area */
-
- if (ibuf_inside()) {
- ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
- } else {
- ibuf_mode = BUF_READ_ANY_PAGE;
- }
-
- count = 0;
-
- for (i = low; i < high; i++) {
- /* It is only sensible to do read-ahead in the non-sync aio
- mode: hence FALSE as the first parameter */
-
- if (!ibuf_bitmap_page(i)) {
- count += buf_read_page_low(
- &err, FALSE,
- ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
- space, tablespace_version, i);
- if (err == DB_TABLESPACE_DELETED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: in random"
- " readahead trying to access\n"
- "InnoDB: tablespace %lu page %lu,\n"
- "InnoDB: but the tablespace does not"
- " exist or is just being dropped.\n",
- (ulong) space, (ulong) i);
- }
- }
- }
-
- /* In simulated aio we wake the aio handler threads only after
- queuing all aio requests, in native aio the following call does
- nothing: */
-
- os_aio_simulated_wake_handler_threads();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "Random read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset,
- (ulong) count);
- }
-#endif /* UNIV_DEBUG */
-
- ++srv_read_ahead_rnd;
- return(count);
-}
-
-/************************************************************************
+/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible. */
-
-ulint
+released by the i/o-handler thread.
+@return TRUE if page has been read in, FALSE in case of failure */
+UNIV_INTERN
+ibool
buf_read_page(
/*==========*/
- /* out: number of page read requests issued: this can
- be > 1 if read-ahead occurred */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint offset) /*!< in: page number */
{
- ib_longlong tablespace_version;
+ ib_int64_t tablespace_version;
ulint count;
- ulint count2;
ulint err;
tablespace_version = fil_space_get_version(space);
- count = buf_read_ahead_random(space, offset);
-
/* We do the i/o in the synchronous aio mode to save thread
switches: hence TRUE */
- count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
- tablespace_version, offset);
- srv_buf_pool_reads+= count2;
+ count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+ zip_size, FALSE,
+ tablespace_version, offset);
+ srv_buf_pool_reads += count;
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -336,14 +197,17 @@ buf_read_page(
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
- return(count + count2);
+ /* Increment number of I/O operations used for LRU policy. */
+ buf_LRU_stat_inc_io();
+
+ return(count > 0);
}
-/************************************************************************
+/********************************************************************//**
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
-that the the algorithm looks at the 'natural' adjacent successor and
+that the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
@@ -361,20 +225,21 @@ function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io. */
-
+which could result in a deadlock if the OS does not support asynchronous io.
+@return number of page read requests issued */
+UNIV_INTERN
ulint
buf_read_ahead_linear(
/*==================*/
- /* out: number of page read requests issued */
- ulint space, /* in: space id */
- ulint offset) /* in: page number of a page; NOTE: the current thread
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint offset) /*!< in: page number of a page; NOTE: the current thread
must want access to this page (see NOTE 3 above) */
{
- ib_longlong tablespace_version;
- buf_block_t* block;
+ ib_int64_t tablespace_version;
+ buf_page_t* bpage;
buf_frame_t* frame;
- buf_block_t* pred_block = NULL;
+ buf_page_t* pred_bpage = NULL;
ulint pred_offset;
ulint succ_offset;
ulint count;
@@ -385,28 +250,32 @@ buf_read_ahead_linear(
ulint low, high;
ulint err;
ulint i;
+ const ulint buf_read_ahead_linear_area
+ = BUF_READ_AHEAD_LINEAR_AREA;
+ ulint threshold;
- if (srv_startup_is_before_trx_rollback_phase) {
+ if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
/* No read-ahead to avoid thread deadlocks */
return(0);
}
- if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
+ low = (offset / buf_read_ahead_linear_area)
+ * buf_read_ahead_linear_area;
+ high = (offset / buf_read_ahead_linear_area + 1)
+ * buf_read_ahead_linear_area;
- /* If it is an ibuf bitmap page or trx sys hdr, we do
- no read-ahead, as that could break the ibuf page access
- order */
+ if ((offset != low) && (offset != high - 1)) {
+ /* This is not a border page of the area: return */
return(0);
}
- low = (offset / BUF_READ_AHEAD_LINEAR_AREA)
- * BUF_READ_AHEAD_LINEAR_AREA;
- high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
- * BUF_READ_AHEAD_LINEAR_AREA;
+ if (ibuf_bitmap_page(zip_size, offset)
+ || trx_sys_hdr_page(space, offset)) {
- if ((offset != low) && (offset != high - 1)) {
- /* This is not a border page of the area: return */
+ /* If it is an ibuf bitmap page or trx sys hdr, we do
+ no read-ahead, as that could break the ibuf page access
+ order */
return(0);
}
@@ -417,10 +286,10 @@ buf_read_ahead_linear(
tablespace_version = fil_space_get_version(space);
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
if (high > fil_space_get_size(space)) {
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
/* The area is not whole, return */
return(0);
@@ -428,7 +297,7 @@ buf_read_ahead_linear(
if (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
return(0);
}
@@ -443,47 +312,71 @@ buf_read_ahead_linear(
asc_or_desc = -1;
}
+ /* How many out of order accessed pages can we ignore
+ when working out the access pattern for linear readahead */
+ threshold = ut_min((64 - srv_read_ahead_threshold),
+ BUF_READ_AHEAD_AREA);
+
fail_count = 0;
for (i = low; i < high; i++) {
- block = buf_page_hash_get(space, i);
+ bpage = buf_page_hash_get(space, i);
- if ((block == NULL) || !block->accessed) {
+ if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
/* Not accessed */
fail_count++;
- } else if (pred_block
- && (ut_ulint_cmp(block->LRU_position,
- pred_block->LRU_position)
- != asc_or_desc)) {
+ } else if (pred_bpage) {
+ /* Note that buf_page_is_accessed() returns
+ the time of the first access. If some blocks
+ of the extent existed in the buffer pool at
+ the time of a linear access pattern, the first
+ access times may be nonmonotonic, even though
+ the latest access times were linear. The
+ threshold (srv_read_ahead_factor) should help
+ a little against this. */
+ int res = ut_ulint_cmp(
+ buf_page_is_accessed(bpage),
+ buf_page_is_accessed(pred_bpage));
/* Accesses not in the right order */
-
- fail_count++;
- pred_block = block;
+ if (res != 0 && res != asc_or_desc) {
+ fail_count++;
+ }
}
- }
-
- if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
- - BUF_READ_AHEAD_LINEAR_THRESHOLD) {
- /* Too many failures: return */
- mutex_exit(&(buf_pool->mutex));
+ if (fail_count > threshold) {
+ /* Too many failures: return */
+ buf_pool_mutex_exit();
+ return(0);
+ }
- return(0);
+ if (bpage && buf_page_is_accessed(bpage)) {
+ pred_bpage = bpage;
+ }
}
/* If we got this far, we know that enough pages in the area have
been accessed in the right order: linear read-ahead can be sensible */
- block = buf_page_hash_get(space, offset);
+ bpage = buf_page_hash_get(space, offset);
- if (block == NULL) {
- mutex_exit(&(buf_pool->mutex));
+ if (bpage == NULL) {
+ buf_pool_mutex_exit();
return(0);
}
- frame = block->frame;
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_PAGE:
+ frame = bpage->zip.data;
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ frame = ((buf_block_t*) bpage)->frame;
+ break;
+ default:
+ ut_error;
+ break;
+ }
/* Read the natural predecessor and successor page addresses from
the page; NOTE that because the calling thread may have an x-latch
@@ -494,7 +387,7 @@ buf_read_ahead_linear(
pred_offset = fil_page_get_prev(frame);
succ_offset = fil_page_get_next(frame);
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
if ((offset == low) && (succ_offset == offset + 1)) {
@@ -511,10 +404,10 @@ buf_read_ahead_linear(
return(0);
}
- low = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
- * BUF_READ_AHEAD_LINEAR_AREA;
- high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
- * BUF_READ_AHEAD_LINEAR_AREA;
+ low = (new_offset / buf_read_ahead_linear_area)
+ * buf_read_ahead_linear_area;
+ high = (new_offset / buf_read_ahead_linear_area + 1)
+ * buf_read_ahead_linear_area;
if ((new_offset != low) && (new_offset != high - 1)) {
/* This is not a border page of the area: return */
@@ -548,11 +441,11 @@ buf_read_ahead_linear(
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */
- if (!ibuf_bitmap_page(i)) {
+ if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
&err, FALSE,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
- space, tablespace_version, i);
+ space, zip_size, FALSE, tablespace_version, i);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -583,32 +476,42 @@ buf_read_ahead_linear(
}
#endif /* UNIV_DEBUG */
- ++srv_read_ahead_seq;
+ /* Read ahead is considered one I/O operation for the purpose of
+ LRU policy decision. */
+ buf_LRU_stat_inc_io();
+
+ buf_pool->stat.n_ra_pages_read += count;
return(count);
}
-/************************************************************************
+/********************************************************************//**
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
-
+UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
- ibool sync, /* in: TRUE if the caller wants this function
- to wait for the highest address page to get
- read in, before this function returns */
- ulint* space_ids, /* in: array of space ids */
- ib_longlong* space_versions,/* in: the spaces must have this version
- number (timestamp), otherwise we discard the
- read; we use this to cancel reads if
- DISCARD + IMPORT may have changed the
- tablespace size */
- ulint* page_nos, /* in: array of page numbers to read, with the
- highest page number the last in the array */
- ulint n_stored) /* in: number of page numbers in the array */
+ ibool sync, /*!< in: TRUE if the caller
+ wants this function to wait
+ for the highest address page
+ to get read in, before this
+ function returns */
+ const ulint* space_ids, /*!< in: array of space ids */
+ const ib_int64_t* space_versions,/*!< in: the spaces must have
+ this version number
+ (timestamp), otherwise we
+ discard the read; we use this
+ to cancel reads if DISCARD +
+ IMPORT may have changed the
+ tablespace size */
+ const ulint* page_nos, /*!< in: array of page numbers
+ to read, with the highest page
+ number the last in the
+ array */
+ ulint n_stored) /*!< in: number of elements
+ in the arrays */
{
- ulint err;
ulint i;
ut_ad(!ibuf_inside());
@@ -621,18 +524,27 @@ buf_read_ibuf_merge_pages(
}
for (i = 0; i < n_stored; i++) {
- buf_read_page_low(&err,
- (i + 1 == n_stored) && sync,
- BUF_READ_ANY_PAGE,
- space_ids[i], space_versions[i],
+ ulint zip_size = fil_space_get_zip_size(space_ids[i]);
+ ulint err;
+
+ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+
+ goto tablespace_deleted;
+ }
+
+ buf_read_page_low(&err, sync && (i + 1 == n_stored),
+ BUF_READ_ANY_PAGE, space_ids[i],
+ zip_size, TRUE, space_versions[i],
page_nos[i]);
- if (err == DB_TABLESPACE_DELETED) {
+ if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
+tablespace_deleted:
/* We have deleted or are deleting the single-table
tablespace: remove the entries for that page */
ibuf_merge_or_delete_for_page(NULL, space_ids[i],
- page_nos[i], FALSE);
+ page_nos[i],
+ zip_size, FALSE);
}
}
@@ -650,25 +562,41 @@ buf_read_ibuf_merge_pages(
#endif /* UNIV_DEBUG */
}
-/************************************************************************
+/********************************************************************//**
Issues read requests for pages which recovery wants to read in. */
-
+UNIV_INTERN
void
buf_read_recv_pages(
/*================*/
- ibool sync, /* in: TRUE if the caller wants this function
- to wait for the highest address page to get
- read in, before this function returns */
- ulint space, /* in: space id */
- ulint* page_nos, /* in: array of page numbers to read, with the
- highest page number the last in the array */
- ulint n_stored) /* in: number of page numbers in the array */
+ ibool sync, /*!< in: TRUE if the caller
+ wants this function to wait
+ for the highest address page
+ to get read in, before this
+ function returns */
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in
+ bytes, or 0 */
+ const ulint* page_nos, /*!< in: array of page numbers
+ to read, with the highest page
+ number the last in the
+ array */
+ ulint n_stored) /*!< in: number of page numbers
+ in the array */
{
- ib_longlong tablespace_version;
+ ib_int64_t tablespace_version;
ulint count;
ulint err;
ulint i;
+ zip_size = fil_space_get_zip_size(space);
+
+ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+ /* It is a single table tablespace and the .ibd file is
+ missing: do nothing */
+
+ return;
+ }
+
tablespace_version = fil_space_get_version(space);
for (i = 0; i < n_stored; i++) {
@@ -702,14 +630,14 @@ buf_read_recv_pages(
os_aio_print_debug = FALSE;
if ((i + 1 == n_stored) && sync) {
- buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
- space, tablespace_version,
+ buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+ zip_size, TRUE, tablespace_version,
page_nos[i]);
} else {
buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
| OS_AIO_SIMULATED_WAKE_LATER,
- space, tablespace_version,
- page_nos[i]);
+ space, zip_size, TRUE,
+ tablespace_version, page_nos[i]);
}
}
diff --git a/storage/innodb_plugin/compile-innodb b/storage/innobase/compile-innodb
index 82601f03ae9..82601f03ae9 100755
--- a/storage/innodb_plugin/compile-innodb
+++ b/storage/innobase/compile-innodb
diff --git a/storage/innodb_plugin/compile-innodb-debug b/storage/innobase/compile-innodb-debug
index efb4abf88d5..efb4abf88d5 100755
--- a/storage/innodb_plugin/compile-innodb-debug
+++ b/storage/innobase/compile-innodb-debug
diff --git a/storage/innobase/data/data0data.c b/storage/innobase/data/data0data.c
index 0f03de4ca9d..e3c1f1b4f23 100644
--- a/storage/innobase/data/data0data.c
+++ b/storage/innobase/data/data0data.c
@@ -1,7 +1,24 @@
-/************************************************************************
-SQL data field and tuple
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file data/data0data.c
+SQL data field and tuple
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
@@ -12,69 +29,42 @@ Created 5/30/1994 Heikki Tuuri
#include "data0data.ic"
#endif
+#ifndef UNIV_HOTBACKUP
#include "rem0rec.h"
#include "rem0cmp.h"
#include "page0page.h"
+#include "page0zip.h"
#include "dict0dict.h"
#include "btr0cur.h"
#include <ctype.h>
+#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
-byte data_error; /* data pointers of tuple fields are initialized
- to point here for error checking */
-
-ulint data_dummy; /* this is used to fool the compiler in
- dtuple_validate */
+/** Dummy variable to catch access to uninitialized fields. In the
+debug version, dtuple_create() will make all fields of dtuple_t point
+to data_error. */
+UNIV_INTERN byte data_error;
+
+# ifndef UNIV_DEBUG_VALGRIND
+/** this is used to fool the compiler in dtuple_validate */
+UNIV_INTERN ulint data_dummy;
+# endif /* !UNIV_DEBUG_VALGRIND */
#endif /* UNIV_DEBUG */
-/* Some non-inlined functions used in the MySQL interface: */
-void
-dfield_set_data_noninline(
- dfield_t* field, /* in: field */
- void* data, /* in: data */
- ulint len) /* in: length or UNIV_SQL_NULL */
-{
- dfield_set_data(field, data, len);
-}
-void*
-dfield_get_data_noninline(
- dfield_t* field) /* in: field */
-{
- return(dfield_get_data(field));
-}
-ulint
-dfield_get_len_noninline(
- dfield_t* field) /* in: field */
-{
- return(dfield_get_len(field));
-}
-ulint
-dtuple_get_n_fields_noninline(
- dtuple_t* tuple) /* in: tuple */
-{
- return(dtuple_get_n_fields(tuple));
-}
-dfield_t*
-dtuple_get_nth_field_noninline(
- dtuple_t* tuple, /* in: tuple */
- ulint n) /* in: index of field */
-{
- return(dtuple_get_nth_field(tuple, n));
-}
-
-/*************************************************************************
-Tests if dfield data length and content is equal to the given. */
-
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Tests if dfield data length and content is equal to the given.
+@return TRUE if equal */
+UNIV_INTERN
ibool
dfield_data_is_binary_equal(
/*========================*/
- /* out: TRUE if equal */
- dfield_t* field, /* in: field */
- ulint len, /* in: data length or UNIV_SQL_NULL */
- byte* data) /* in: data */
+ const dfield_t* field, /*!< in: field */
+ ulint len, /*!< in: data length or UNIV_SQL_NULL */
+ const byte* data) /*!< in: data */
{
- if (len != field->len) {
+ if (len != dfield_get_len(field)) {
return(FALSE);
}
@@ -84,7 +74,7 @@ dfield_data_is_binary_equal(
return(TRUE);
}
- if (0 != ut_memcmp(field->data, data, len)) {
+ if (0 != memcmp(dfield_get_data(field), data, len)) {
return(FALSE);
}
@@ -92,25 +82,19 @@ dfield_data_is_binary_equal(
return(TRUE);
}
-/****************************************************************
-Returns TRUE if lengths of two dtuples are equal and respective data fields
-in them are equal when compared with collation in char fields (not as binary
-strings). */
-
-ibool
-dtuple_datas_are_ordering_equal(
-/*============================*/
- /* out: TRUE if length and fieds are equal
- when compared with cmp_data_data:
- NOTE: in character type fields some letters
- are identified with others! (collation) */
- dtuple_t* tuple1, /* in: tuple 1 */
- dtuple_t* tuple2) /* in: tuple 2 */
+/************************************************************//**
+Compare two data tuples, respecting the collation of character fields.
+@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
+than tuple2 */
+UNIV_INTERN
+int
+dtuple_coll_cmp(
+/*============*/
+ const dtuple_t* tuple1, /*!< in: tuple 1 */
+ const dtuple_t* tuple2) /*!< in: tuple 2 */
{
- dfield_t* field1;
- dfield_t* field2;
- ulint n_fields;
- ulint i;
+ ulint n_fields;
+ ulint i;
ut_ad(tuple1 && tuple2);
ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
@@ -122,58 +106,33 @@ dtuple_datas_are_ordering_equal(
if (n_fields != dtuple_get_n_fields(tuple2)) {
- return(FALSE);
+ return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
}
for (i = 0; i < n_fields; i++) {
+ int cmp;
+ const dfield_t* field1 = dtuple_get_nth_field(tuple1, i);
+ const dfield_t* field2 = dtuple_get_nth_field(tuple2, i);
- field1 = dtuple_get_nth_field(tuple1, i);
- field2 = dtuple_get_nth_field(tuple2, i);
+ cmp = cmp_dfield_dfield(field1, field2);
- if (0 != cmp_dfield_dfield(field1, field2)) {
-
- return(FALSE);
+ if (cmp) {
+ return(cmp);
}
}
- return(TRUE);
+ return(0);
}
-/*************************************************************************
-Creates a dtuple for use in MySQL. */
-
-dtuple_t*
-dtuple_create_for_mysql(
-/*====================*/
- /* out, own created dtuple */
- void** heap, /* out: created memory heap */
- ulint n_fields) /* in: number of fields */
-{
- *heap = (void*)mem_heap_create(500);
-
- return(dtuple_create(*((mem_heap_t**)heap), n_fields));
-}
-
-/*************************************************************************
-Frees a dtuple used in MySQL. */
-
-void
-dtuple_free_for_mysql(
-/*==================*/
- void* heap) /* in: memory heap where tuple was created */
-{
- mem_heap_free((mem_heap_t*)heap);
-}
-
-/*************************************************************************
+/*********************************************************************//**
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
-
+UNIV_INTERN
void
dtuple_set_n_fields(
/*================*/
- dtuple_t* tuple, /* in: tuple */
- ulint n_fields) /* in: number of fields */
+ dtuple_t* tuple, /*!< in: tuple */
+ ulint n_fields) /*!< in: number of fields */
{
ut_ad(tuple);
@@ -181,14 +140,14 @@ dtuple_set_n_fields(
tuple->n_fields_cmp = n_fields;
}
-/**************************************************************
-Checks that a data field is typed. */
+/**********************************************************//**
+Checks that a data field is typed.
+@return TRUE if ok */
static
ibool
dfield_check_typed_no_assert(
/*=========================*/
- /* out: TRUE if ok */
- dfield_t* field) /* in: data field */
+ const dfield_t* field) /*!< in: data field */
{
if (dfield_get_type(field)->mtype > DATA_MYSQL
|| dfield_get_type(field)->mtype < DATA_VARCHAR) {
@@ -203,16 +162,16 @@ dfield_check_typed_no_assert(
return(TRUE);
}
-/**************************************************************
-Checks that a data tuple is typed. */
-
+/**********************************************************//**
+Checks that a data tuple is typed.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dtuple_check_typed_no_assert(
/*=========================*/
- /* out: TRUE if ok */
- dtuple_t* tuple) /* in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
{
- dfield_t* field;
+ const dfield_t* field;
ulint i;
if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
@@ -238,15 +197,17 @@ dump:
return(TRUE);
}
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************
-Checks that a data field is typed. Asserts an error if not. */
-
+#ifdef UNIV_DEBUG
+/**********************************************************//**
+Checks that a data field is typed. Asserts an error if not.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dfield_check_typed(
/*===============*/
- /* out: TRUE if ok */
- dfield_t* field) /* in: data field */
+ const dfield_t* field) /*!< in: data field */
{
if (dfield_get_type(field)->mtype > DATA_MYSQL
|| dfield_get_type(field)->mtype < DATA_VARCHAR) {
@@ -262,16 +223,16 @@ dfield_check_typed(
return(TRUE);
}
-/**************************************************************
-Checks that a data tuple is typed. Asserts an error if not. */
-
+/**********************************************************//**
+Checks that a data tuple is typed. Asserts an error if not.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dtuple_check_typed(
/*===============*/
- /* out: TRUE if ok */
- dtuple_t* tuple) /* in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
{
- dfield_t* field;
+ const dfield_t* field;
ulint i;
for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
@@ -284,23 +245,20 @@ dtuple_check_typed(
return(TRUE);
}
-#ifdef UNIV_DEBUG
-/**************************************************************
+/**********************************************************//**
Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set. */
-
+all fields must have been set.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dtuple_validate(
/*============*/
- /* out: TRUE if ok */
- dtuple_t* tuple) /* in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
{
- dfield_t* field;
- byte* data;
+ const dfield_t* field;
ulint n_fields;
ulint len;
ulint i;
- ulint j;
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
@@ -314,9 +272,11 @@ dtuple_validate(
field = dtuple_get_nth_field(tuple, i);
len = dfield_get_len(field);
- if (len != UNIV_SQL_NULL) {
+ if (!dfield_is_null(field)) {
- data = field->data;
+ const byte* data = dfield_get_data(field);
+#ifndef UNIV_DEBUG_VALGRIND
+ ulint j;
for (j = 0; j < len; j++) {
@@ -325,6 +285,9 @@ dtuple_validate(
code */
data++;
}
+#endif /* !UNIV_DEBUG_VALGRIND */
+
+ UNIV_MEM_ASSERT_RW(data, len);
}
}
@@ -334,134 +297,252 @@ dtuple_validate(
}
#endif /* UNIV_DEBUG */
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Pretty prints a dfield value according to its data type. */
-
+UNIV_INTERN
void
dfield_print(
/*=========*/
- dfield_t* dfield) /* in: dfield */
+ const dfield_t* dfield) /*!< in: dfield */
{
- byte* data;
- ulint len;
- ulint mtype;
- ulint i;
+ const byte* data;
+ ulint len;
+ ulint i;
len = dfield_get_len(dfield);
data = dfield_get_data(dfield);
- if (len == UNIV_SQL_NULL) {
+ if (dfield_is_null(dfield)) {
fputs("NULL", stderr);
return;
}
- mtype = dtype_get_mtype(dfield_get_type(dfield));
-
- if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
-
+ switch (dtype_get_mtype(dfield_get_type(dfield))) {
+ case DATA_CHAR:
+ case DATA_VARCHAR:
for (i = 0; i < len; i++) {
int c = *data++;
putc(isprint(c) ? c : ' ', stderr);
}
- } else if (mtype == DATA_INT) {
+
+ if (dfield_is_ext(dfield)) {
+ fputs("(external)", stderr);
+ }
+ break;
+ case DATA_INT:
ut_a(len == 4); /* only works for 32-bit integers */
fprintf(stderr, "%d", (int)mach_read_from_4(data));
- } else {
+ break;
+ default:
ut_error;
}
}
-/*****************************************************************
+/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
-
+UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
- dfield_t* dfield) /* in: dfield */
+ const dfield_t* dfield) /*!< in: dfield */
{
- byte* data;
- ulint len;
- ulint mtype;
- ulint i;
- ibool print_also_hex;
+ const byte* data;
+ ulint len;
+ ulint prtype;
+ ulint i;
+ ibool print_also_hex;
len = dfield_get_len(dfield);
data = dfield_get_data(dfield);
- if (len == UNIV_SQL_NULL) {
+ if (dfield_is_null(dfield)) {
fputs("NULL", stderr);
return;
}
- mtype = dtype_get_mtype(dfield_get_type(dfield));
+ prtype = dtype_get_prtype(dfield_get_type(dfield));
+
+ switch (dtype_get_mtype(dfield_get_type(dfield))) {
+ dulint id;
+ case DATA_INT:
+ switch (len) {
+ ulint val;
+ case 1:
+ val = mach_read_from_1(data);
+
+ if (!(prtype & DATA_UNSIGNED)) {
+ val &= ~0x80;
+ fprintf(stderr, "%ld", (long) val);
+ } else {
+ fprintf(stderr, "%lu", (ulong) val);
+ }
+ break;
+
+ case 2:
+ val = mach_read_from_2(data);
+
+ if (!(prtype & DATA_UNSIGNED)) {
+ val &= ~0x8000;
+ fprintf(stderr, "%ld", (long) val);
+ } else {
+ fprintf(stderr, "%lu", (ulong) val);
+ }
+ break;
+
+ case 3:
+ val = mach_read_from_3(data);
+
+ if (!(prtype & DATA_UNSIGNED)) {
+ val &= ~0x800000;
+ fprintf(stderr, "%ld", (long) val);
+ } else {
+ fprintf(stderr, "%lu", (ulong) val);
+ }
+ break;
+
+ case 4:
+ val = mach_read_from_4(data);
+
+ if (!(prtype & DATA_UNSIGNED)) {
+ val &= ~0x80000000;
+ fprintf(stderr, "%ld", (long) val);
+ } else {
+ fprintf(stderr, "%lu", (ulong) val);
+ }
+ break;
+
+ case 6:
+ id = mach_read_from_6(data);
+ fprintf(stderr, "{%lu %lu}",
+ ut_dulint_get_high(id),
+ ut_dulint_get_low(id));
+ break;
+
+ case 7:
+ id = mach_read_from_7(data);
+ fprintf(stderr, "{%lu %lu}",
+ ut_dulint_get_high(id),
+ ut_dulint_get_low(id));
+ break;
+ case 8:
+ id = mach_read_from_8(data);
+ fprintf(stderr, "{%lu %lu}",
+ ut_dulint_get_high(id),
+ ut_dulint_get_low(id));
+ break;
+ default:
+ goto print_hex;
+ }
+ break;
+
+ case DATA_SYS:
+ switch (prtype & DATA_SYS_PRTYPE_MASK) {
+ case DATA_TRX_ID:
+ id = mach_read_from_6(data);
+
+ fprintf(stderr, "trx_id " TRX_ID_FMT,
+ TRX_ID_PREP_PRINTF(id));
+ break;
+
+ case DATA_ROLL_PTR:
+ id = mach_read_from_7(data);
+
+ fprintf(stderr, "roll_ptr {%lu %lu}",
+ ut_dulint_get_high(id), ut_dulint_get_low(id));
+ break;
- if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
+ case DATA_ROW_ID:
+ id = mach_read_from_6(data);
+ fprintf(stderr, "row_id {%lu %lu}",
+ ut_dulint_get_high(id), ut_dulint_get_low(id));
+ break;
+
+ default:
+ id = mach_dulint_read_compressed(data);
+
+ fprintf(stderr, "mix_id {%lu %lu}",
+ ut_dulint_get_high(id), ut_dulint_get_low(id));
+ }
+ break;
+
+ case DATA_CHAR:
+ case DATA_VARCHAR:
print_also_hex = FALSE;
for (i = 0; i < len; i++) {
int c = *data++;
+
if (!isprint(c)) {
print_also_hex = TRUE;
- c = ' ';
+
+ fprintf(stderr, "\\x%02x", (unsigned char) c);
+ } else {
+ putc(c, stderr);
}
- putc(c, stderr);
}
- if (!print_also_hex) {
-
- return;
+ if (dfield_is_ext(dfield)) {
+ fputs("(external)", stderr);
}
- fputs(" Hex: ", stderr);
+ if (!print_also_hex) {
+ break;
+ }
data = dfield_get_data(dfield);
+ /* fall through */
+
+ case DATA_BINARY:
+ default:
+print_hex:
+ fputs(" Hex: ",stderr);
for (i = 0; i < len; i++) {
- fprintf(stderr, "%02lx", (ulint)*data);
+ fprintf(stderr, "%02lx", (ulint) *data++);
+ }
- data++;
+ if (dfield_is_ext(dfield)) {
+ fputs("(external)", stderr);
}
- } else if (mtype == DATA_INT) {
- ut_a(len == 4); /* only works for 32-bit integers */
- fprintf(stderr, "%d", (int)mach_read_from_4(data));
- } else {
- ut_error;
}
}
-/*****************************************************************
+/*************************************************************//**
Print a dfield value using ut_print_buf. */
static
void
dfield_print_raw(
/*=============*/
- FILE* f, /* in: output stream */
- dfield_t* dfield) /* in: dfield */
+ FILE* f, /*!< in: output stream */
+ const dfield_t* dfield) /*!< in: dfield */
{
- ulint len = dfield->len;
- if (len != UNIV_SQL_NULL) {
+ ulint len = dfield_get_len(dfield);
+ if (!dfield_is_null(dfield)) {
ulint print_len = ut_min(len, 1000);
- ut_print_buf(f, dfield->data, print_len);
+ ut_print_buf(f, dfield_get_data(dfield), print_len);
if (len != print_len) {
- fprintf(f, "(total %lu bytes)", (ulong) len);
+ fprintf(f, "(total %lu bytes%s)",
+ (ulong) len,
+ dfield_is_ext(dfield) ? ", external" : "");
}
} else {
fputs(" SQL NULL", f);
}
}
-/**************************************************************
+/**********************************************************//**
The following function prints the contents of a tuple. */
-
+UNIV_INTERN
void
dtuple_print(
/*=========*/
- FILE* f, /* in: output stream */
- dtuple_t* tuple) /* in: tuple */
+ FILE* f, /*!< in: output stream */
+ const dtuple_t* tuple) /*!< in: tuple */
{
ulint n_fields;
ulint i;
@@ -476,47 +557,53 @@ dtuple_print(
dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
putc(';', f);
+ putc('\n', f);
}
- putc('\n', f);
ut_ad(dtuple_validate(tuple));
}
-/******************************************************************
+/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index. */
-
+to determine uniquely the insertion place of the tuple in the index.
+@return own: created big record vector, NULL if we are not able to
+shorten the entry enough, i.e., if there are too many fixed-length or
+short fields in entry or the index is clustered */
+UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
- /* out, own: created big record vector,
- NULL if we are not able to shorten
- the entry enough, i.e., if there are
- too many short fields in entry */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint* ext_vec,/* in: array of externally stored fields,
- or NULL: if a field already is externally
- stored, then we cannot move it to the vector
- this function returns */
- ulint n_ext_vec)/* in: number of elements is ext_vec */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in/out: index entry */
+ ulint* n_ext) /*!< in/out: number of
+ externally stored columns */
{
mem_heap_t* heap;
big_rec_t* vector;
dfield_t* dfield;
+ dict_field_t* ifield;
ulint size;
ulint n_fields;
- ulint longest;
- ulint longest_i = ULINT_MAX;
- ibool is_externally_stored;
- ulint i;
- ulint j;
+ ulint local_len;
+ ulint local_prefix_len;
+
+ if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
+ return(NULL);
+ }
+
+ if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
+ /* up to MySQL 5.1: store a 768-byte prefix locally */
+ local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
+ } else {
+ /* new-format table: do not store any BLOB prefix locally */
+ local_len = BTR_EXTERN_FIELD_REF_SIZE;
+ }
ut_a(dtuple_check_typed_no_assert(entry));
- size = rec_get_converted_size(index, entry);
+ size = rec_get_converted_size(index, entry, *n_ext);
if (UNIV_UNLIKELY(size > 1000000000)) {
fprintf(stderr,
@@ -537,55 +624,56 @@ dtuple_convert_big_rec(
* sizeof(big_rec_field_t));
/* Decide which fields to shorten: the algorithm is to look for
- the longest field whose type is DATA_BLOB */
+ a variable-length field that yields the biggest savings when
+ stored externally */
n_fields = 0;
- while (rec_get_converted_size(index, entry)
- >= ut_min(page_get_free_space_of_empty(
- dict_table_is_comp(index->table)) / 2,
- REC_MAX_DATA_SIZE)) {
+ while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
+ *n_ext),
+ dict_table_is_comp(index->table),
+ dict_index_get_n_fields(index),
+ dict_table_zip_size(index->table))) {
+ ulint i;
+ ulint longest = 0;
+ ulint longest_i = ULINT_MAX;
+ byte* data;
+ big_rec_field_t* b;
- longest = 0;
for (i = dict_index_get_n_unique_in_tree(index);
i < dtuple_get_n_fields(entry); i++) {
+ ulint savings;
- /* Skip over fields which already are externally
- stored */
+ dfield = dtuple_get_nth_field(entry, i);
+ ifield = dict_index_get_nth_field(index, i);
- is_externally_stored = FALSE;
+ /* Skip fixed-length, NULL, externally stored,
+ or short columns */
- if (ext_vec) {
- for (j = 0; j < n_ext_vec; j++) {
- if (ext_vec[j] == i) {
- is_externally_stored = TRUE;
- }
- }
+ if (ifield->fixed_len
+ || dfield_is_null(dfield)
+ || dfield_is_ext(dfield)
+ || dfield_get_len(dfield) <= local_len
+ || dfield_get_len(dfield)
+ <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
+ goto skip_field;
}
- if (!is_externally_stored) {
+ savings = dfield_get_len(dfield) - local_len;
- dfield = dtuple_get_nth_field(entry, i);
-
- if (dfield->len != UNIV_SQL_NULL
- && dfield->len > longest) {
-
- longest = dfield->len;
-
- longest_i = i;
- }
+ /* Check that there would be savings */
+ if (longest >= savings) {
+ goto skip_field;
}
- }
- /* We do not store externally fields which are smaller than
- DICT_MAX_INDEX_COL_LEN */
+ longest_i = i;
+ longest = savings;
-#if DICT_MAX_INDEX_COL_LEN <= REC_1BYTE_OFFS_LIMIT
-# error "DICT_MAX_INDEX_COL_LEN <= REC_1BYTE_OFFS_LIMIT"
-#endif
+skip_field:
+ continue;
+ }
- if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10
- + DICT_MAX_INDEX_COL_LEN) {
+ if (!longest) {
/* Cannot shorten more */
mem_heap_free(heap);
@@ -593,89 +681,84 @@ dtuple_convert_big_rec(
return(NULL);
}
- /* Move data from field longest_i to big rec vector;
- we do not let data size of the remaining entry
- drop below 128 which is the limit for the 2-byte
- offset storage format in a physical record. This
- we accomplish by storing 128 bytes of data in entry
- itself, and only the remaining part to big rec vec.
+ /* Move data from field longest_i to big rec vector.
We store the first bytes locally to the record. Then
we can calculate all ordering fields in all indexes
from locally stored data. */
dfield = dtuple_get_nth_field(entry, longest_i);
- vector->fields[n_fields].field_no = longest_i;
-
- ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN);
-
- vector->fields[n_fields].len = dfield->len
- - DICT_MAX_INDEX_COL_LEN;
-
- vector->fields[n_fields].data = mem_heap_alloc(
- heap, vector->fields[n_fields].len);
-
- /* Copy data (from the end of field) to big rec vector */
+ ifield = dict_index_get_nth_field(index, longest_i);
+ local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
+
+ b = &vector->fields[n_fields];
+ b->field_no = longest_i;
+ b->len = dfield_get_len(dfield) - local_prefix_len;
+ b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
+
+ /* Allocate the locally stored part of the column. */
+ data = mem_heap_alloc(heap, local_len);
+
+ /* Copy the local prefix. */
+ memcpy(data, dfield_get_data(dfield), local_prefix_len);
+ /* Clear the extern field reference (BLOB pointer). */
+ memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
+#if 0
+ /* The following would fail the Valgrind checks in
+ page_cur_insert_rec_low() and page_cur_insert_rec_zip().
+ The BLOB pointers in the record will be initialized after
+ the record and the BLOBs have been written. */
+ UNIV_MEM_ALLOC(data + local_prefix_len,
+ BTR_EXTERN_FIELD_REF_SIZE);
+#endif
- ut_memcpy(vector->fields[n_fields].data,
- ((byte*)dfield->data) + dfield->len
- - vector->fields[n_fields].len,
- vector->fields[n_fields].len);
- dfield->len = dfield->len - vector->fields[n_fields].len
- + BTR_EXTERN_FIELD_REF_SIZE;
+ dfield_set_data(dfield, data, local_len);
+ dfield_set_ext(dfield);
- /* Set the extern field reference in dfield to zero */
- memset(((byte*)dfield->data)
- + dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
- 0, BTR_EXTERN_FIELD_REF_SIZE);
n_fields++;
+ (*n_ext)++;
+ ut_ad(n_fields < dtuple_get_n_fields(entry));
}
vector->n_fields = n_fields;
return(vector);
}
-/******************************************************************
+/**************************************************************//**
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
-
+UNIV_INTERN
void
dtuple_convert_back_big_rec(
/*========================*/
- dict_index_t* index __attribute__((unused)), /* in: index */
- dtuple_t* entry, /* in: entry whose data was put to vector */
- big_rec_t* vector) /* in, own: big rec vector; it is
+ dict_index_t* index __attribute__((unused)), /*!< in: index */
+ dtuple_t* entry, /*!< in: entry whose data was put to vector */
+ big_rec_t* vector) /*!< in, own: big rec vector; it is
freed in this function */
{
- dfield_t* dfield;
- ulint i;
+ big_rec_field_t* b = vector->fields;
+ const big_rec_field_t* const end = b + vector->n_fields;
- for (i = 0; i < vector->n_fields; i++) {
+ for (; b < end; b++) {
+ dfield_t* dfield;
+ ulint local_len;
- dfield = dtuple_get_nth_field(entry,
- vector->fields[i].field_no);
- /* Copy data from big rec vector */
+ dfield = dtuple_get_nth_field(entry, b->field_no);
+ local_len = dfield_get_len(dfield);
- ut_memcpy(((byte*)dfield->data)
- + dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
- vector->fields[i].data,
- vector->fields[i].len);
- dfield->len = dfield->len + vector->fields[i].len
- - BTR_EXTERN_FIELD_REF_SIZE;
- }
+ ut_ad(dfield_is_ext(dfield));
+ ut_ad(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- mem_heap_free(vector->heap);
-}
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-/******************************************************************
-Frees the memory in a big rec vector. */
+ ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN);
+
+ dfield_set_data(dfield,
+ (char*) b->data - local_len,
+ b->len + local_len);
+ }
-void
-dtuple_big_rec_free(
-/*================*/
- big_rec_t* vector) /* in, own: big rec vector; it is
- freed in this function */
-{
mem_heap_free(vector->heap);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/data/data0type.c b/storage/innobase/data/data0type.c
index a3cfe691404..e834fd2ec55 100644
--- a/storage/innobase/data/data0type.c
+++ b/storage/innobase/data/data0type.c
@@ -1,7 +1,24 @@
-/******************************************************
-Data types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file data/data0type.c
+Data types
Created 1/16/1996 Heikki Tuuri
*******************************************************/
@@ -12,57 +29,37 @@ Created 1/16/1996 Heikki Tuuri
#include "data0type.ic"
#endif
-/**********************************************************************
-This function is used to find the storage length in bytes of the first n
-characters for prefix indexes using a multibyte character set. The function
-finds charset information and returns length of prefix_len characters in the
-index field in bytes.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-
-ulint
-innobase_get_at_most_n_mbchars(
-/*===========================*/
- /* out: number of bytes occupied by the first
- n characters */
- ulint charset_id, /* in: character set id */
- ulint prefix_len, /* in: prefix length in bytes of the index
- (this has to be divided by mbmaxlen to get the
- number of CHARACTERS n in the prefix) */
- ulint data_len, /* in: length of the string in bytes */
- const char* str); /* in: character string */
+#ifndef UNIV_HOTBACKUP
+# include "ha_prototypes.h"
/* At the database startup we store the default-charset collation number of
this MySQL installation to this global variable. If we have < 4.1.2 format
column definitions, or records in the insert buffer, we use this
charset-collation code for them. */
-ulint data_mysql_default_charset_coll = 99999999;
+UNIV_INTERN ulint data_mysql_default_charset_coll;
-/*************************************************************************
+/*********************************************************************//**
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy. */
-
+the characters in the string occupy.
+@return length of the prefix, in bytes */
+UNIV_INTERN
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
- /* out: length of the prefix,
- in bytes */
- ulint prtype, /* in: precise type */
- ulint mbminlen, /* in: minimum length of a
+ ulint prtype, /*!< in: precise type */
+ ulint mbminlen, /*!< in: minimum length of a
multi-byte character */
- ulint mbmaxlen, /* in: maximum length of a
+ ulint mbmaxlen, /*!< in: maximum length of a
multi-byte character */
- ulint prefix_len, /* in: length of the requested
+ ulint prefix_len, /*!< in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
- ulint data_len, /* in: length of str (in bytes) */
- const char* str) /* in: the string whose prefix
+ ulint data_len, /*!< in: length of str (in bytes) */
+ const char* str) /*!< in: the string whose prefix
length is being determined */
{
-#ifndef UNIV_HOTBACKUP
ut_a(data_len != UNIV_SQL_NULL);
ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
@@ -80,23 +77,18 @@ dtype_get_at_most_n_mbchars(
}
return(data_len);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* UNIV_HOTBACKUP */
}
+#endif /* UNIV_HOTBACKUP */
-/*************************************************************************
+/*********************************************************************//**
Checks if a data main type is a string type. Also a BLOB is considered a
-string type. */
-
+string type.
+@return TRUE if string type */
+UNIV_INTERN
ibool
dtype_is_string_type(
/*=================*/
- /* out: TRUE if string type */
- ulint mtype) /* in: InnoDB main data type code: DATA_CHAR, ... */
+ ulint mtype) /*!< in: InnoDB main data type code: DATA_CHAR, ... */
{
if (mtype <= DATA_BLOB
|| mtype == DATA_MYSQL
@@ -108,17 +100,17 @@ dtype_is_string_type(
return(FALSE);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE. */
-
+those DATA_BLOB columns this function currently returns FALSE.
+@return TRUE if binary string type */
+UNIV_INTERN
ibool
dtype_is_binary_string_type(
/*========================*/
- /* out: TRUE if binary string type */
- ulint mtype, /* in: main data type */
- ulint prtype) /* in: precise type */
+ ulint mtype, /*!< in: main data type */
+ ulint prtype) /*!< in: precise type */
{
if ((mtype == DATA_FIXBINARY)
|| (mtype == DATA_BINARY)
@@ -130,18 +122,18 @@ dtype_is_binary_string_type(
return(FALSE);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE. */
-
+For those DATA_BLOB columns this function currently returns TRUE.
+@return TRUE if non-binary string type */
+UNIV_INTERN
ibool
dtype_is_non_binary_string_type(
/*============================*/
- /* out: TRUE if non-binary string type */
- ulint mtype, /* in: main data type */
- ulint prtype) /* in: precise type */
+ ulint mtype, /*!< in: main data type */
+ ulint prtype) /*!< in: precise type */
{
if (dtype_is_string_type(mtype) == TRUE
&& dtype_is_binary_string_type(mtype, prtype) == FALSE) {
@@ -152,27 +144,17 @@ dtype_is_non_binary_string_type(
return(FALSE);
}
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-
-ulint
-dtype_get_charset_coll_noninline(
-/*=============================*/
- ulint prtype) /* in: precise data type */
-{
- return(dtype_get_charset_coll(prtype));
-}
-
-/*************************************************************************
+/*********************************************************************//**
Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code. */
-
+charset-collation code.
+@return precise type, including the charset-collation code */
+UNIV_INTERN
ulint
dtype_form_prtype(
/*==============*/
- ulint old_prtype, /* in: the MySQL type code and the flags
+ ulint old_prtype, /*!< in: the MySQL type code and the flags
DATA_BINARY_TYPE etc. */
- ulint charset_coll) /* in: MySQL charset-collation code */
+ ulint charset_coll) /*!< in: MySQL charset-collation code */
{
ut_a(old_prtype < 256 * 256);
ut_a(charset_coll < 256);
@@ -180,14 +162,14 @@ dtype_form_prtype(
return(old_prtype + (charset_coll << 16));
}
-/*************************************************************************
-Validates a data type structure. */
-
+/*********************************************************************//**
+Validates a data type structure.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dtype_validate(
/*===========*/
- /* out: TRUE if ok */
- dtype_t* type) /* in: type struct to validate */
+ const dtype_t* type) /*!< in: type struct to validate */
{
ut_a(type);
ut_a(type->mtype >= DATA_VARCHAR);
@@ -197,18 +179,21 @@ dtype_validate(
ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
}
+#ifndef UNIV_HOTBACKUP
ut_a(type->mbminlen <= type->mbmaxlen);
+#endif /* !UNIV_HOTBACKUP */
return(TRUE);
}
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
Prints a data type structure. */
-
+UNIV_INTERN
void
dtype_print(
/*========*/
- dtype_t* type) /* in: type */
+ const dtype_t* type) /*!< in: type */
{
ulint mtype;
ulint prtype;
@@ -309,3 +294,4 @@ dtype_print(
fprintf(stderr, " len %lu", (ulong) len);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c
index 5f9aaf71e18..e55de30481b 100644
--- a/storage/innobase/dict/dict0boot.c
+++ b/storage/innobase/dict/dict0boot.c
@@ -1,7 +1,24 @@
-/******************************************************
-Data dictionary creation and booting
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0boot.c
+Data dictionary creation and booting
Created 4/18/1996 Heikki Tuuri
*******************************************************/
@@ -23,36 +40,35 @@ Created 4/18/1996 Heikki Tuuri
#include "log0recv.h"
#include "os0file.h"
-/**************************************************************************
-Gets a pointer to the dictionary header and x-latches its page. */
-
+/**********************************************************************//**
+Gets a pointer to the dictionary header and x-latches its page.
+@return pointer to the dictionary header, page x-latched */
+UNIV_INTERN
dict_hdr_t*
dict_hdr_get(
/*=========*/
- /* out: pointer to the dictionary header,
- page x-latched */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* block;
dict_hdr_t* header;
- ut_ad(mtr);
+ block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
+ RW_X_LATCH, mtr);
+ header = DICT_HDR + buf_block_get_frame(block);
+
+ buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
- header = DICT_HDR + buf_page_get(DICT_HDR_SPACE, DICT_HDR_PAGE_NO,
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_DICT_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
return(header);
}
-/**************************************************************************
-Returns a new table, index, or tree id. */
-
+/**********************************************************************//**
+Returns a new table, index, or tree id.
+@return the new id */
+UNIV_INTERN
dulint
dict_hdr_get_new_id(
/*================*/
- /* out: the new id */
- ulint type) /* in: DICT_HDR_ROW_ID, ... */
+ ulint type) /*!< in: DICT_HDR_ROW_ID, ... */
{
dict_hdr_t* dict_hdr;
dulint id;
@@ -74,10 +90,10 @@ dict_hdr_get_new_id(
return(id);
}
-/**************************************************************************
+/**********************************************************************//**
Writes the current value of the row id counter to the dictionary header file
page. */
-
+UNIV_INTERN
void
dict_hdr_flush_row_id(void)
/*=======================*/
@@ -99,31 +115,28 @@ dict_hdr_flush_row_id(void)
mtr_commit(&mtr);
}
-/*********************************************************************
+/*****************************************************************//**
Creates the file page for the dictionary header. This function is
-called only at the database creation. */
+called only at the database creation.
+@return TRUE if succeed */
static
ibool
dict_hdr_create(
/*============*/
- /* out: TRUE if succeed */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* block;
dict_hdr_t* dict_header;
- ulint hdr_page_no;
ulint root_page_no;
- page_t* page;
ut_ad(mtr);
/* Create the dictionary header file block in a new, allocated file
segment in the system tablespace */
- page = fseg_create(DICT_HDR_SPACE, 0,
- DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
+ block = fseg_create(DICT_HDR_SPACE, 0,
+ DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
- hdr_page_no = buf_frame_get_page_no(page);
-
- ut_a(DICT_HDR_PAGE_NO == hdr_page_no);
+ ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
dict_header = dict_hdr_get(mtr);
@@ -147,7 +160,8 @@ dict_hdr_create(
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr);
+ DICT_HDR_SPACE, 0, DICT_TABLES_ID,
+ dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -156,8 +170,9 @@ dict_hdr_create(
mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
- root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
- DICT_TABLE_IDS_ID, FALSE, mtr);
+ root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
+ DICT_TABLE_IDS_ID,
+ dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -167,7 +182,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr);
+ DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
+ dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -177,7 +193,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr);
+ DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
+ dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -187,7 +204,8 @@ dict_hdr_create(
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr);
+ DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
+ dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
@@ -200,10 +218,10 @@ dict_hdr_create(
return(TRUE);
}
-/*********************************************************************
+/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
-
+UNIV_INTERN
void
dict_boot(void)
/*===========*/
@@ -213,6 +231,7 @@ dict_boot(void)
dict_hdr_t* dict_hdr;
mem_heap_t* heap;
mtr_t mtr;
+ ulint error;
mtr_start(&mtr);
@@ -249,7 +268,10 @@ dict_boot(void)
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+ /* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
+ /* TYPE is either DICT_TABLE_ORDINARY, or (TYPE & DICT_TF_COMPACT)
+ and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
@@ -270,9 +292,12 @@ dict_boot(void)
index->id = DICT_TABLES_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_TABLES,
- MLOG_4BYTES, &mtr));
+ error = dict_index_add_to_cache(table, index,
+ mtr_read_ulint(dict_hdr
+ + DICT_HDR_TABLES,
+ MLOG_4BYTES, &mtr),
+ FALSE);
+ ut_a(error == DB_SUCCESS);
/*-------------------------*/
index = dict_mem_index_create("SYS_TABLES", "ID_IND",
@@ -280,9 +305,12 @@ dict_boot(void)
dict_mem_index_add_field(index, "ID", 0);
index->id = DICT_TABLE_IDS_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_TABLE_IDS,
- MLOG_4BYTES, &mtr));
+ error = dict_index_add_to_cache(table, index,
+ mtr_read_ulint(dict_hdr
+ + DICT_HDR_TABLE_IDS,
+ MLOG_4BYTES, &mtr),
+ FALSE);
+ ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
@@ -309,9 +337,12 @@ dict_boot(void)
dict_mem_index_add_field(index, "POS", 0);
index->id = DICT_COLUMNS_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_COLUMNS,
- MLOG_4BYTES, &mtr));
+ error = dict_index_add_to_cache(table, index,
+ mtr_read_ulint(dict_hdr
+ + DICT_HDR_COLUMNS,
+ MLOG_4BYTES, &mtr),
+ FALSE);
+ ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
@@ -348,9 +379,12 @@ dict_boot(void)
dict_mem_index_add_field(index, "ID", 0);
index->id = DICT_INDEXES_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_INDEXES,
- MLOG_4BYTES, &mtr));
+ error = dict_index_add_to_cache(table, index,
+ mtr_read_ulint(dict_hdr
+ + DICT_HDR_INDEXES,
+ MLOG_4BYTES, &mtr),
+ FALSE);
+ ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
@@ -372,9 +406,12 @@ dict_boot(void)
dict_mem_index_add_field(index, "POS", 0);
index->id = DICT_FIELDS_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_FIELDS,
- MLOG_4BYTES, &mtr));
+ error = dict_index_add_to_cache(table, index,
+ mtr_read_ulint(dict_hdr
+ + DICT_HDR_FIELDS,
+ MLOG_4BYTES, &mtr),
+ FALSE);
+ ut_a(error == DB_SUCCESS);
mtr_commit(&mtr);
/*-------------------------*/
@@ -393,7 +430,7 @@ dict_boot(void)
mutex_exit(&(dict_sys->mutex));
}
-/*********************************************************************
+/*****************************************************************//**
Inserts the basic system table data into themselves in the database
creation. */
static
@@ -404,9 +441,9 @@ dict_insert_initial_data(void)
/* Does nothing yet */
}
-/*********************************************************************
+/*****************************************************************//**
Creates and initializes the data dictionary at the database creation. */
-
+UNIV_INTERN
void
dict_create(void)
/*=============*/
diff --git a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c
index 4116230347d..96a9bd8152e 100644
--- a/storage/innobase/dict/dict0crea.c
+++ b/storage/innobase/dict/dict0crea.c
@@ -1,7 +1,24 @@
-/******************************************************
-Database object creation
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0crea.c
+Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
@@ -26,16 +43,16 @@ Created 1/8/1996 Heikki Tuuri
#include "usr0sess.h"
#include "ut0vec.h"
-/*********************************************************************
+/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
-in the SYS_TABLES system table. */
+in the SYS_TABLES system table.
+@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_tables_tuple(
/*=========================*/
- /* out: the tuple which should be inserted */
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: memory heap from which the memory for
+ dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
{
dict_table_t* sys_tables;
@@ -49,6 +66,8 @@ dict_create_sys_tables_tuple(
entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
+ dict_table_copy_types(entry, sys_tables);
+
/* 0: NAME -----------------------------*/
dfield = dtuple_get_nth_field(entry, 0);
@@ -75,27 +94,34 @@ dict_create_sys_tables_tuple(
dfield = dtuple_get_nth_field(entry, 3);
ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
+ if (table->flags & ~DICT_TF_COMPACT) {
+ ut_a(table->flags & DICT_TF_COMPACT);
+ ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
+ ut_a((table->flags & DICT_TF_ZSSIZE_MASK)
+ <= (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT));
+ ut_a(!(table->flags & (~0 << DICT_TF_BITS)));
+ mach_write_to_4(ptr, table->flags);
+ } else {
+ mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
+ }
dfield_set_data(dfield, ptr, 4);
/* 6: MIX_ID (obsolete) ---------------------------*/
dfield = dtuple_get_nth_field(entry, 4);
- ptr = mem_heap_alloc(heap, 8);
- memset(ptr, 0, 8);
+ ptr = mem_heap_zalloc(heap, 8);
dfield_set_data(dfield, ptr, 8);
/* 7: MIX_LEN (obsolete) --------------------------*/
dfield = dtuple_get_nth_field(entry, 5);
- ptr = mem_heap_alloc(heap, 4);
- memset(ptr, 0, 4);
+ ptr = mem_heap_zalloc(heap, 4);
dfield_set_data(dfield, ptr, 4);
/* 8: CLUSTER_NAME ---------------------*/
dfield = dtuple_get_nth_field(entry, 6);
- dfield_set_data(dfield, NULL, UNIV_SQL_NULL); /* not supported */
+ dfield_set_null(dfield); /* not supported */
/* 9: SPACE ----------------------------*/
dfield = dtuple_get_nth_field(entry, 7);
@@ -106,22 +132,20 @@ dict_create_sys_tables_tuple(
dfield_set_data(dfield, ptr, 4);
/*----------------------------------*/
- dict_table_copy_types(entry, sys_tables);
-
return(entry);
}
-/*********************************************************************
+/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
-in the SYS_COLUMNS system table. */
+in the SYS_COLUMNS system table.
+@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_columns_tuple(
/*==========================*/
- /* out: the tuple which should be inserted */
- dict_table_t* table, /* in: table */
- ulint i, /* in: column number */
- mem_heap_t* heap) /* in: memory heap from which the memory for
+ dict_table_t* table, /*!< in: table */
+ ulint i, /*!< in: column number */
+ mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
{
dict_table_t* sys_columns;
@@ -139,6 +163,8 @@ dict_create_sys_columns_tuple(
entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
+ dict_table_copy_types(entry, sys_columns);
+
/* 0: TABLE_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, 0);
@@ -188,20 +214,18 @@ dict_create_sys_columns_tuple(
dfield_set_data(dfield, ptr, 4);
/*---------------------------------*/
- dict_table_copy_types(entry, sys_columns);
-
return(entry);
}
-/*******************************************************************
-Builds a table definition to insert. */
+/***************************************************************//**
+Builds a table definition to insert.
+@return DB_SUCCESS or error code */
static
ulint
dict_build_table_def_step(
/*======================*/
- /* out: DB_SUCCESS or error code */
- que_thr_t* thr, /* in: query thread */
- tab_node_t* node) /* in: table create node */
+ que_thr_t* thr, /*!< in: query thread */
+ tab_node_t* node) /*!< in: table create node */
{
dict_table_t* table;
dtuple_t* row;
@@ -209,8 +233,6 @@ dict_build_table_def_step(
const char* path_or_name;
ibool is_path;
mtr_t mtr;
- ulint i;
- ulint row_len;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -220,14 +242,6 @@ dict_build_table_def_step(
thr_get_trx(thr)->table_id = table->id;
- row_len = 0;
- for (i = 0; i < table->n_def; i++) {
- row_len += dict_col_get_min_size(&table->cols[i]);
- }
- if (row_len > BTR_PAGE_MAX_REC_SIZE) {
- return(DB_TOO_BIG_RECORD);
- }
-
if (srv_file_per_table) {
/* We create a new single-table tablespace for the table.
We initially let it be 4 pages:
@@ -250,8 +264,13 @@ dict_build_table_def_step(
is_path = FALSE;
}
+ ut_ad(dict_table_get_format(table) <= DICT_TF_FORMAT_MAX);
+ ut_ad(!dict_table_zip_size(table)
+ || dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
+
error = fil_create_new_single_table_tablespace(
&space, path_or_name, is_path,
+ table->flags == DICT_TF_COMPACT ? 0 : table->flags,
FIL_IBD_FILE_INITIAL_SIZE);
table->space = (unsigned int) space;
@@ -265,6 +284,9 @@ dict_build_table_def_step(
fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
mtr_commit(&mtr);
+ } else {
+ /* Create in the system tablespace: disallow new features */
+ table->flags &= DICT_TF_COMPACT;
}
row = dict_create_sys_tables_tuple(table, node->heap);
@@ -274,14 +296,14 @@ dict_build_table_def_step(
return(DB_SUCCESS);
}
-/*******************************************************************
-Builds a column definition to insert. */
+/***************************************************************//**
+Builds a column definition to insert.
+@return DB_SUCCESS */
static
ulint
dict_build_col_def_step(
/*====================*/
- /* out: DB_SUCCESS */
- tab_node_t* node) /* in: table create node */
+ tab_node_t* node) /*!< in: table create node */
{
dtuple_t* row;
@@ -292,16 +314,16 @@ dict_build_col_def_step(
return(DB_SUCCESS);
}
-/*********************************************************************
+/*****************************************************************//**
Based on an index object, this function builds the entry to be inserted
-in the SYS_INDEXES system table. */
+in the SYS_INDEXES system table.
+@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_indexes_tuple(
/*==========================*/
- /* out: the tuple which should be inserted */
- dict_index_t* index, /* in: index */
- mem_heap_t* heap) /* in: memory heap from which the memory for
+ dict_index_t* index, /*!< in: index */
+ mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
{
dict_table_t* sys_indexes;
@@ -319,6 +341,8 @@ dict_create_sys_indexes_tuple(
entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
+ dict_table_copy_types(entry, sys_indexes);
+
/* 0: TABLE_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, 0);
@@ -377,22 +401,20 @@ dict_create_sys_indexes_tuple(
dfield_set_data(dfield, ptr, 4);
/*--------------------------------*/
- dict_table_copy_types(entry, sys_indexes);
-
return(entry);
}
-/*********************************************************************
+/*****************************************************************//**
Based on an index object, this function builds the entry to be inserted
-in the SYS_FIELDS system table. */
+in the SYS_FIELDS system table.
+@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_fields_tuple(
/*=========================*/
- /* out: the tuple which should be inserted */
- dict_index_t* index, /* in: index */
- ulint i, /* in: field number */
- mem_heap_t* heap) /* in: memory heap from which the memory for
+ dict_index_t* index, /*!< in: index */
+ ulint i, /*!< in: field number */
+ mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
{
dict_table_t* sys_fields;
@@ -408,6 +430,7 @@ dict_create_sys_fields_tuple(
for (j = 0; j < index->n_fields; j++) {
if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
index_contains_column_prefix_field = TRUE;
+ break;
}
}
@@ -417,6 +440,8 @@ dict_create_sys_fields_tuple(
entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+ dict_table_copy_types(entry, sys_fields);
+
/* 0: INDEX_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, 0);
@@ -452,26 +477,24 @@ dict_create_sys_fields_tuple(
ut_strlen(field->name));
/*---------------------------------*/
- dict_table_copy_types(entry, sys_fields);
-
return(entry);
}
-/*********************************************************************
+/*****************************************************************//**
Creates the tuple with which the index entry is searched for writing the index
-tree root page number, if such a tree is created. */
+tree root page number, if such a tree is created.
+@return the tuple for search */
static
dtuple_t*
dict_create_search_tuple(
/*=====================*/
- /* out: the tuple for search */
- dtuple_t* tuple, /* in: the tuple inserted in the SYS_INDEXES
+ const dtuple_t* tuple, /*!< in: the tuple inserted in the SYS_INDEXES
table */
- mem_heap_t* heap) /* in: memory heap from which the memory for
+ mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
{
dtuple_t* search_tuple;
- dfield_t* field1;
+ const dfield_t* field1;
dfield_t* field2;
ut_ad(tuple && heap);
@@ -493,15 +516,15 @@ dict_create_search_tuple(
return(search_tuple);
}
-/*******************************************************************
-Builds an index definition row to insert. */
+/***************************************************************//**
+Builds an index definition row to insert.
+@return DB_SUCCESS or error code */
static
ulint
dict_build_index_def_step(
/*======================*/
- /* out: DB_SUCCESS or error code */
- que_thr_t* thr, /* in: query thread */
- ind_node_t* node) /* in: index create node */
+ que_thr_t* thr, /*!< in: query thread */
+ ind_node_t* node) /*!< in: index create node */
{
dict_table_t* table;
dict_index_t* index;
@@ -525,7 +548,7 @@ dict_build_index_def_step(
node->table = table;
ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
- || (index->type & DICT_CLUSTERED));
+ || dict_index_is_clust(index));
index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
@@ -539,17 +562,20 @@ dict_build_index_def_step(
ins_node_set_new_row(node->ind_def, row);
+ /* Note that the index was created by this transaction. */
+ index->trx_id = (ib_uint64_t) ut_conv_dulint_to_longlong(trx->id);
+
return(DB_SUCCESS);
}
-/*******************************************************************
-Builds a field definition row to insert. */
+/***************************************************************//**
+Builds a field definition row to insert.
+@return DB_SUCCESS */
static
ulint
dict_build_field_def_step(
/*======================*/
- /* out: DB_SUCCESS */
- ind_node_t* node) /* in: index create node */
+ ind_node_t* node) /*!< in: index create node */
{
dict_index_t* index;
dtuple_t* row;
@@ -563,14 +589,14 @@ dict_build_field_def_step(
return(DB_SUCCESS);
}
-/*******************************************************************
-Creates an index tree for the index if it is not a member of a cluster. */
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
dict_create_index_tree_step(
/*========================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- ind_node_t* node) /* in: index create node */
+ ind_node_t* node) /*!< in: index create node */
{
dict_index_t* index;
dict_table_t* sys_indexes;
@@ -600,8 +626,9 @@ dict_create_index_tree_step(
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- node->page_no = btr_create(index->type, index->space, index->id,
- dict_table_is_comp(table), &mtr);
+ node->page_no = btr_create(index->type, index->space,
+ dict_table_zip_size(index->table),
+ index->id, index, &mtr);
/* printf("Created a new index tree in space %lu root page %lu\n",
index->space, index->page_no); */
@@ -619,20 +646,21 @@ dict_create_index_tree_step(
return(DB_SUCCESS);
}
-/***********************************************************************
+/*******************************************************************//**
Drops the index tree associated with a row in SYS_INDEXES table. */
-
+UNIV_INTERN
void
dict_drop_index_tree(
/*=================*/
- rec_t* rec, /* in: record in the clustered index of SYS_INDEXES
- table */
- mtr_t* mtr) /* in: mtr having the latch on the record page */
+ rec_t* rec, /*!< in/out: record in the clustered index
+ of SYS_INDEXES table */
+ mtr_t* mtr) /*!< in: mtr having the latch on the record page */
{
- ulint root_page_no;
- ulint space;
- byte* ptr;
- ulint len;
+ ulint root_page_no;
+ ulint space;
+ ulint zip_size;
+ const byte* ptr;
+ ulint len;
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
@@ -654,8 +682,9 @@ dict_drop_index_tree(
ut_ad(len == 4);
space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+ zip_size = fil_space_get_zip_size(space);
- if (!fil_tablespace_exists_in_mem(space)) {
+ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
/* It is a single table tablespace and the .ibd file is
missing: do nothing */
@@ -665,7 +694,7 @@ dict_drop_index_tree(
/* We free all the pages but the root page first; this operation
may span several mini-transactions */
- btr_free_but_not_root(space, root_page_no);
+ btr_free_but_not_root(space, zip_size, root_page_no);
/* Then we free the root page in the same mini-transaction where
we write FIL_NULL to the appropriate field in the SYS_INDEXES
@@ -673,38 +702,40 @@ dict_drop_index_tree(
/* printf("Dropping index tree in space %lu root page %lu\n", space,
root_page_no); */
- btr_free_root(space, root_page_no, mtr);
+ btr_free_root(space, zip_size, root_page_no, mtr);
page_rec_write_index_page_no(rec,
DICT_SYS_INDEXES_PAGE_NO_FIELD,
FIL_NULL, mtr);
}
-/***********************************************************************
-Truncates the index tree associated with a row in SYS_INDEXES table. */
-
+/*******************************************************************//**
+Truncates the index tree associated with a row in SYS_INDEXES table.
+@return new root page number, or FIL_NULL on failure */
+UNIV_INTERN
ulint
dict_truncate_index_tree(
/*=====================*/
- /* out: new root page number, or
- FIL_NULL on failure */
- dict_table_t* table, /* in: the table the index belongs to */
- btr_pcur_t* pcur, /* in/out: persistent cursor pointing to
+ dict_table_t* table, /*!< in: the table the index belongs to */
+ ulint space, /*!< in: 0=truncate,
+ nonzero=create the index tree in the
+ given tablespace */
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
record in the clustered index of
SYS_INDEXES table. The cursor may be
repositioned in this call. */
- mtr_t* mtr) /* in: mtr having the latch
+ mtr_t* mtr) /*!< in: mtr having the latch
on the record page. The mtr may be
committed and restarted in this call. */
{
ulint root_page_no;
- ulint space;
+ ibool drop = !space;
+ ulint zip_size;
ulint type;
dulint index_id;
rec_t* rec;
- byte* ptr;
+ const byte* ptr;
ulint len;
- ulint comp;
dict_index_t* index;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -716,13 +747,13 @@ dict_truncate_index_tree(
root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
- if (root_page_no == FIL_NULL) {
+ if (drop && root_page_no == FIL_NULL) {
/* The tree has been freed. */
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Trying to TRUNCATE"
" a missing index of table %s!\n", table->name);
- return(FIL_NULL);
+ drop = FALSE;
}
ptr = rec_get_nth_field_old(rec,
@@ -730,9 +761,13 @@ dict_truncate_index_tree(
ut_ad(len == 4);
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+ if (drop) {
+ space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+ }
+
+ zip_size = fil_space_get_zip_size(space);
- if (!fil_tablespace_exists_in_mem(space)) {
+ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
/* It is a single table tablespace and the .ibd file is
missing: do nothing */
@@ -751,20 +786,25 @@ dict_truncate_index_tree(
ut_ad(len == 8);
index_id = mach_read_from_8(ptr);
+ if (!drop) {
+
+ goto create;
+ }
+
/* We free all the pages but the root page first; this operation
may span several mini-transactions */
- btr_free_but_not_root(space, root_page_no);
+ btr_free_but_not_root(space, zip_size, root_page_no);
/* Then we free the root page in the same mini-transaction where
we create the b-tree and write its new root page number to the
appropriate field in the SYS_INDEXES record: this mini-transaction
marks the B-tree totally truncated */
- comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH,
- mtr));
+ btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
- btr_free_root(space, root_page_no, mtr);
+ btr_free_root(space, zip_size, root_page_no, mtr);
+create:
/* We will temporarily write FIL_NULL to the PAGE_NO field
in SYS_INDEXES, so that the database will not get into an
inconsistent state in case it crashes between the mtr_commit()
@@ -786,36 +826,34 @@ dict_truncate_index_tree(
index;
index = UT_LIST_GET_NEXT(indexes, index)) {
if (!ut_dulint_cmp(index->id, index_id)) {
- break;
+ root_page_no = btr_create(type, space, zip_size,
+ index_id, index, mtr);
+ index->page = (unsigned int) root_page_no;
+ return(root_page_no);
}
}
- root_page_no = btr_create(type, space, index_id, comp, mtr);
- if (index) {
- index->page = (unsigned int) root_page_no;
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Index %lu %lu of table %s is missing\n"
- "InnoDB: from the data dictionary during TRUNCATE!\n",
- ut_dulint_get_high(index_id),
- ut_dulint_get_low(index_id),
- table->name);
- }
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Index %lu %lu of table %s is missing\n"
+ "InnoDB: from the data dictionary during TRUNCATE!\n",
+ ut_dulint_get_high(index_id),
+ ut_dulint_get_low(index_id),
+ table->name);
- return(root_page_no);
+ return(FIL_NULL);
}
-/*************************************************************************
-Creates a table create graph. */
-
+/*********************************************************************//**
+Creates a table create graph.
+@return own: table create node */
+UNIV_INTERN
tab_node_t*
tab_create_graph_create(
/*====================*/
- /* out, own: table create node */
- dict_table_t* table, /* in: table to create, built as a memory data
+ dict_table_t* table, /*!< in: table to create, built as a memory data
structure */
- mem_heap_t* heap) /* in: heap where created */
+ mem_heap_t* heap) /*!< in: heap where created */
{
tab_node_t* node;
@@ -842,16 +880,16 @@ tab_create_graph_create(
return(node);
}
-/*************************************************************************
-Creates an index create graph. */
-
+/*********************************************************************//**
+Creates an index create graph.
+@return own: index create node */
+UNIV_INTERN
ind_node_t*
ind_create_graph_create(
/*====================*/
- /* out, own: index create node */
- dict_index_t* index, /* in: index to create, built as a memory data
+ dict_index_t* index, /*!< in: index to create, built as a memory data
structure */
- mem_heap_t* heap) /* in: heap where created */
+ mem_heap_t* heap) /*!< in: heap where created */
{
ind_node_t* node;
@@ -879,14 +917,14 @@ ind_create_graph_create(
return(node);
}
-/***************************************************************
-Creates a table. This is a high-level function used in SQL execution graphs. */
-
+/***********************************************************//**
+Creates a table. This is a high-level function used in SQL execution graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
dict_create_table_step(
/*===================*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
tab_node_t* node;
ulint err = DB_ERROR;
@@ -985,15 +1023,15 @@ function_exit:
return(thr);
}
-/***************************************************************
+/***********************************************************//**
Creates an index. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
dict_create_index_step(
/*===================*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
ind_node_t* node;
ulint err = DB_ERROR;
@@ -1046,19 +1084,40 @@ dict_create_index_step(
return(thr);
} else {
- node->state = INDEX_CREATE_INDEX_TREE;
+ node->state = INDEX_ADD_TO_CACHE;
}
}
+ if (node->state == INDEX_ADD_TO_CACHE) {
+
+ dulint index_id = node->index->id;
+
+ err = dict_index_add_to_cache(node->table, node->index,
+ FIL_NULL, TRUE);
+
+ node->index = dict_index_get_if_in_cache_low(index_id);
+ ut_a(!node->index == (err != DB_SUCCESS));
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->state = INDEX_CREATE_INDEX_TREE;
+ }
+
if (node->state == INDEX_CREATE_INDEX_TREE) {
err = dict_create_index_tree_step(node);
if (err != DB_SUCCESS) {
+ dict_index_remove_from_cache(node->table, node->index);
+ node->index = NULL;
goto function_exit;
}
+ node->index->page = node->page_no;
node->state = INDEX_COMMIT_WORK;
}
@@ -1068,21 +1127,13 @@ dict_create_index_step(
(CREATE INDEX does NOT currently do an implicit commit of
the current transaction) */
- node->state = INDEX_ADD_TO_CACHE;
+ node->state = INDEX_CREATE_INDEX_TREE;
/* thr->run_node = node->commit_node;
return(thr); */
}
- if (node->state == INDEX_ADD_TO_CACHE) {
-
- dict_index_add_to_cache(node->table, node->index,
- node->page_no);
-
- err = DB_SUCCESS;
- }
-
function_exit:
trx->error_state = err;
@@ -1103,15 +1154,15 @@ function_exit:
return(thr);
}
-/********************************************************************
+/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
at database creation or database start if they are not found or are
-not of the right form. */
-
+not of the right form.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
dict_create_or_check_foreign_constraint_tables(void)
/*================================================*/
- /* out: DB_SUCCESS or error code */
{
dict_table_t* table1;
dict_table_t* table2;
@@ -1187,7 +1238,6 @@ dict_create_or_check_foreign_constraint_tables(void)
" FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
"CREATE UNIQUE CLUSTERED INDEX ID_IND"
" ON SYS_FOREIGN_COLS (ID, POS);\n"
- "COMMIT WORK;\n"
"END;\n"
, FALSE, trx);
@@ -1210,7 +1260,7 @@ dict_create_or_check_foreign_constraint_tables(void)
error = DB_MUST_GET_MORE_FILE_SPACE;
}
- trx->op_info = "";
+ trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
@@ -1225,18 +1275,18 @@ dict_create_or_check_foreign_constraint_tables(void)
return(error);
}
-/********************************************************************
-Evaluate the given foreign key SQL statement. */
-
+/****************************************************************//**
+Evaluate the given foreign key SQL statement.
+@return error code or DB_SUCCESS */
+static
ulint
dict_foreign_eval_sql(
/*==================*/
- /* out: error code or DB_SUCCESS */
- pars_info_t* info, /* in: info struct, or NULL */
- const char* sql, /* in: SQL string to evaluate */
- dict_table_t* table, /* in: table */
- dict_foreign_t* foreign,/* in: foreign */
- trx_t* trx) /* in: transaction */
+ pars_info_t* info, /*!< in: info struct, or NULL */
+ const char* sql, /*!< in: SQL string to evaluate */
+ dict_table_t* table, /*!< in: table */
+ dict_foreign_t* foreign,/*!< in: foreign */
+ trx_t* trx) /*!< in: transaction */
{
ulint error;
FILE* ef = dict_foreign_err_file;
@@ -1251,12 +1301,11 @@ dict_foreign_eval_sql(
ef);
ut_print_name(ef, trx, TRUE, table->name);
fputs(".\nA foreign key constraint of name ", ef);
- ut_print_name(ef, trx, FALSE, foreign->id);
+ ut_print_name(ef, trx, TRUE, foreign->id);
fputs("\nalready exists."
- " (Note that internally InnoDB adds 'databasename/'\n"
- "in front of the user-defined constraint name).\n",
- ef);
- fputs("Note that InnoDB's FOREIGN KEY system tables store\n"
+ " (Note that internally InnoDB adds 'databasename'\n"
+ "in front of the user-defined constraint name.)\n"
+ "Note that InnoDB's FOREIGN KEY system tables store\n"
"constraint names as case-insensitive, with the\n"
"MySQL standard latin1_swedish_ci collation. If you\n"
"create tables or databases whose names differ only in\n"
@@ -1291,18 +1340,18 @@ dict_foreign_eval_sql(
return(DB_SUCCESS);
}
-/************************************************************************
+/********************************************************************//**
Add a single foreign key field definition to the data dictionary tables in
-the database. */
+the database.
+@return error code or DB_SUCCESS */
static
ulint
dict_create_add_foreign_field_to_dictionary(
/*========================================*/
- /* out: error code or DB_SUCCESS */
- ulint field_nr, /* in: foreign field number */
- dict_table_t* table, /* in: table */
- dict_foreign_t* foreign, /* in: foreign */
- trx_t* trx) /* in: transaction */
+ ulint field_nr, /*!< in: foreign field number */
+ dict_table_t* table, /*!< in: table */
+ dict_foreign_t* foreign, /*!< in: foreign */
+ trx_t* trx) /*!< in: transaction */
{
pars_info_t* info = pars_info_create();
@@ -1326,23 +1375,23 @@ dict_create_add_foreign_field_to_dictionary(
table, foreign, trx));
}
-/************************************************************************
+/********************************************************************//**
Add a single foreign key definition to the data dictionary tables in the
database. We also generate names to constraints that were not named by the
user. A generated constraint has a name of the format
-databasename/tablename_ibfk_<number>, where the numbers start from 1, and
+databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and
are given locally for this table, that is, the number is not global, as in
-the old format constraints < 4.0.18 it used to be. */
+the old format constraints < 4.0.18 it used to be.
+@return error code or DB_SUCCESS */
static
ulint
dict_create_add_foreign_to_dictionary(
/*==================================*/
- /* out: error code or DB_SUCCESS */
- ulint* id_nr, /* in/out: number to use in id generation;
+ ulint* id_nr, /*!< in/out: number to use in id generation;
incremented if used */
- dict_table_t* table, /* in: table */
- dict_foreign_t* foreign,/* in: foreign */
- trx_t* trx) /* in: transaction */
+ dict_table_t* table, /*!< in: table */
+ dict_foreign_t* foreign,/*!< in: foreign */
+ trx_t* trx) /*!< in: transaction */
{
ulint error;
ulint i;
@@ -1401,14 +1450,14 @@ dict_create_add_foreign_to_dictionary(
return(error);
}
-/************************************************************************
-Adds foreign key definitions to data dictionary tables in the database. */
-
+/********************************************************************//**
+Adds foreign key definitions to data dictionary tables in the database.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
dict_create_add_foreigns_to_dictionary(
/*===================================*/
- /* out: error code or DB_SUCCESS */
- ulint start_id,/* in: if we are actually doing ALTER TABLE
+ ulint start_id,/*!< in: if we are actually doing ALTER TABLE
ADD CONSTRAINT, we want to generate constraint
numbers which are bigger than in the table so
far; we number the constraints from
@@ -1416,8 +1465,8 @@ dict_create_add_foreigns_to_dictionary(
we are creating a new table, or if the table
so far has no constraints for which the name
was generated here */
- dict_table_t* table, /* in: table */
- trx_t* trx) /* in: transaction */
+ dict_table_t* table, /*!< in: table */
+ trx_t* trx) /*!< in: transaction */
{
dict_foreign_t* foreign;
ulint number = start_id + 1;
diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
index b8251a99105..2e524a5a2e3 100644
--- a/storage/innobase/dict/dict0dict.c
+++ b/storage/innobase/dict/dict0dict.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-Data dictionary system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/******************************************************************//**
+@file dict/dict0dict.c
+Data dictionary system
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
@@ -12,6 +29,12 @@ Created 1/8/1996 Heikki Tuuri
#include "dict0dict.ic"
#endif
+/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
+UNIV_INTERN dict_index_t* dict_ind_redundant;
+/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
+UNIV_INTERN dict_index_t* dict_ind_compact;
+
+#ifndef UNIV_HOTBACKUP
#include "buf0buf.h"
#include "data0type.h"
#include "mach0data.h"
@@ -22,221 +45,137 @@ Created 1/8/1996 Heikki Tuuri
#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0sea.h"
+#include "page0zip.h"
+#include "page0page.h"
#include "pars0pars.h"
#include "pars0sym.h"
#include "que0que.h"
#include "rem0cmp.h"
-#ifndef UNIV_HOTBACKUP
-# include "m_ctype.h" /* my_isspace() */
-#endif /* !UNIV_HOTBACKUP */
+#include "row0merge.h"
+#include "m_ctype.h" /* my_isspace() */
+#include "ha_prototypes.h" /* innobase_strcasecmp() */
#include <ctype.h>
-dict_sys_t* dict_sys = NULL; /* the dictionary system */
-
-rw_lock_t dict_operation_lock; /* table create, drop, etc. reserve
- this in X-mode; implicit or backround
- operations purge, rollback, foreign
- key checks reserve this in S-mode; we
- cannot trust that MySQL protects
- implicit or background operations
- a table drop since MySQL does not
- know of them; therefore we need this;
- NOTE: a transaction which reserves
- this must keep book on the mode in
- trx->dict_operation_lock_mode */
-
-#define DICT_HEAP_SIZE 100 /* initial memory heap size when
+/** the dictionary system */
+UNIV_INTERN dict_sys_t* dict_sys = NULL;
+
+/** @brief the data dictionary rw-latch protecting dict_sys
+
+table create, drop, etc. reserve this in X-mode; implicit or
+backround operations purge, rollback, foreign key checks reserve this
+in S-mode; we cannot trust that MySQL protects implicit or background
+operations a table drop since MySQL does not know of them; therefore
+we need this; NOTE: a transaction which reserves this must keep book
+on the mode in trx_struct::dict_operation_lock_mode */
+UNIV_INTERN rw_lock_t dict_operation_lock;
+
+#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
creating a table or index object */
-#define DICT_POOL_PER_TABLE_HASH 512 /* buffer pool max size per table
+#define DICT_POOL_PER_TABLE_HASH 512 /*!< buffer pool max size per table
hash table fixed size in bytes */
-#define DICT_POOL_PER_VARYING 4 /* buffer pool max size per data
+#define DICT_POOL_PER_VARYING 4 /*!< buffer pool max size per data
dictionary varying size in bytes */
-/* Identifies generated InnoDB foreign key names */
+/** Identifies generated InnoDB foreign key names */
static char dict_ibfk[] = "_ibfk_";
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Converts an identifier to a table name.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_convert_from_table_id(
-/*===========================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len); /* in: length of 'to', in bytes;
- should be at least 5 * strlen(to) + 1 */
-/**********************************************************************
-Converts an identifier to UTF-8.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_convert_from_id(
-/*=====================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len); /* in: length of 'to', in bytes;
- should be at least 3 * strlen(to) + 1 */
-/**********************************************************************
-Compares NUL-terminated UTF-8 strings case insensitively.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-int
-innobase_strcasecmp(
-/*================*/
- /* out: 0 if a=b, <0 if a<b, >1 if a>b */
- const char* a, /* in: first string to compare */
- const char* b); /* in: second string to compare */
-
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_casedn_str(
-/*================*/
- char* a); /* in/out: string to put in lower case */
-
-/**************************************************************************
-Determines the connection character set.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-struct charset_info_st*
-innobase_get_charset(
-/*=================*/
- /* out: connection character set */
- void* mysql_thd); /* in: MySQL thread handle */
-#endif /* !UNIV_HOTBACKUP */
-
-/**************************************************************************
-Removes an index from the dictionary cache. */
-static
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /* in: table */
- dict_index_t* index); /* in, own: index */
-/***********************************************************************
-Copies fields contained in index2 to index1. */
-static
-void
-dict_index_copy(
-/*============*/
- dict_index_t* index1, /* in: index to copy to */
- dict_index_t* index2, /* in: index to copy from */
- dict_table_t* table, /* in: table */
- ulint start, /* in: first position to copy */
- ulint end); /* in: last position to copy */
-/***********************************************************************
+/*******************************************************************//**
Tries to find column names for the index and sets the col field of the
-index. */
+index.
+@return TRUE if the column names were found */
static
-void
+ibool
dict_index_find_cols(
/*=================*/
- dict_table_t* table, /* in: table */
- dict_index_t* index); /* in: index */
-/***********************************************************************
+ dict_table_t* table, /*!< in: table */
+ dict_index_t* index); /*!< in: index */
+/*******************************************************************//**
Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user. */
+index, containing also system fields not defined by the user.
+@return own: the internal representation of the clustered index */
static
dict_index_t*
dict_index_build_internal_clust(
/*============================*/
- /* out, own: the internal representation
- of the clustered index */
- dict_table_t* table, /* in: table */
- dict_index_t* index); /* in: user representation of a clustered
- index */
-/***********************************************************************
+ const dict_table_t* table, /*!< in: table */
+ dict_index_t* index); /*!< in: user representation of
+ a clustered index */
+/*******************************************************************//**
Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user. */
+index, containing also system fields not defined by the user.
+@return own: the internal representation of the non-clustered index */
static
dict_index_t*
dict_index_build_internal_non_clust(
/*================================*/
- /* out, own: the internal representation
- of the non-clustered index */
- dict_table_t* table, /* in: table */
- dict_index_t* index); /* in: user representation of a non-clustered
- index */
-/**************************************************************************
+ const dict_table_t* table, /*!< in: table */
+ dict_index_t* index); /*!< in: user representation of
+ a non-clustered index */
+/**********************************************************************//**
Removes a foreign constraint struct from the dictionary cache. */
static
void
dict_foreign_remove_from_cache(
/*===========================*/
- dict_foreign_t* foreign); /* in, own: foreign constraint */
-/**************************************************************************
+ dict_foreign_t* foreign); /*!< in, own: foreign constraint */
+/**********************************************************************//**
Prints a column data. */
static
void
dict_col_print_low(
/*===============*/
- const dict_table_t* table, /* in: table */
- const dict_col_t* col); /* in: column */
-/**************************************************************************
+ const dict_table_t* table, /*!< in: table */
+ const dict_col_t* col); /*!< in: column */
+/**********************************************************************//**
Prints an index data. */
static
void
dict_index_print_low(
/*=================*/
- dict_index_t* index); /* in: index */
-/**************************************************************************
+ dict_index_t* index); /*!< in: index */
+/**********************************************************************//**
Prints a field data. */
static
void
dict_field_print_low(
/*=================*/
- dict_field_t* field); /* in: field */
-/*************************************************************************
+ dict_field_t* field); /*!< in: field */
+/*********************************************************************//**
Frees a foreign key struct. */
static
void
dict_foreign_free(
/*==============*/
- dict_foreign_t* foreign); /* in, own: foreign key struct */
+ dict_foreign_t* foreign); /*!< in, own: foreign key struct */
/* Stream for storing detailed information about the latest foreign key
and unique key errors */
-FILE* dict_foreign_err_file = NULL;
-mutex_t dict_foreign_err_mutex; /* mutex protecting the foreign
- and unique error buffers */
+UNIV_INTERN FILE* dict_foreign_err_file = NULL;
+/* mutex protecting the foreign and unique error buffers */
+UNIV_INTERN mutex_t dict_foreign_err_mutex;
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
+/******************************************************************//**
Makes all characters in a NUL-terminated UTF-8 string lower case. */
-
+UNIV_INTERN
void
dict_casedn_str(
/*============*/
- char* a) /* in/out: string to put in lower case */
+ char* a) /*!< in/out: string to put in lower case */
{
innobase_casedn_str(a);
}
-#endif /* !UNIV_HOTBACKUP */
-
-/************************************************************************
-Checks if the database name in two table names is the same. */
+/********************************************************************//**
+Checks if the database name in two table names is the same.
+@return TRUE if same db name */
+UNIV_INTERN
ibool
dict_tables_have_same_db(
/*=====================*/
- /* out: TRUE if same db name */
- const char* name1, /* in: table name in the form
+ const char* name1, /*!< in: table name in the form
dbname '/' tablename */
- const char* name2) /* in: table name in the form
+ const char* name2) /*!< in: table name in the form
dbname '/' tablename */
{
for (; *name1 == *name2; name1++, name2++) {
@@ -248,14 +187,14 @@ dict_tables_have_same_db(
return(FALSE);
}
-/************************************************************************
-Return the end of table name where we have removed dbname and '/'. */
-
+/********************************************************************//**
+Return the end of table name where we have removed dbname and '/'.
+@return table name */
+UNIV_INTERN
const char*
dict_remove_db_name(
/*================*/
- /* out: table name */
- const char* name) /* in: table name in the form
+ const char* name) /*!< in: table name in the form
dbname '/' tablename */
{
const char* s = strchr(name, '/');
@@ -264,14 +203,14 @@ dict_remove_db_name(
return(s + 1);
}
-/************************************************************************
-Get the database name length in a table name. */
-
+/********************************************************************//**
+Get the database name length in a table name.
+@return database name length */
+UNIV_INTERN
ulint
dict_get_db_name_len(
/*=================*/
- /* out: database name length */
- const char* name) /* in: table name in the form
+ const char* name) /*!< in: table name in the form
dbname '/' tablename */
{
const char* s;
@@ -280,9 +219,9 @@ dict_get_db_name_len(
return(s - name);
}
-/************************************************************************
+/********************************************************************//**
Reserves the dictionary system mutex for MySQL. */
-
+UNIV_INTERN
void
dict_mutex_enter_for_mysql(void)
/*============================*/
@@ -290,9 +229,9 @@ dict_mutex_enter_for_mysql(void)
mutex_enter(&(dict_sys->mutex));
}
-/************************************************************************
+/********************************************************************//**
Releases the dictionary system mutex for MySQL. */
-
+UNIV_INTERN
void
dict_mutex_exit_for_mysql(void)
/*===========================*/
@@ -300,97 +239,40 @@ dict_mutex_exit_for_mysql(void)
mutex_exit(&(dict_sys->mutex));
}
-/************************************************************************
+/********************************************************************//**
Decrements the count of open MySQL handles to a table. */
-
+UNIV_INTERN
void
dict_table_decrement_handle_count(
/*==============================*/
- dict_table_t* table) /* in: table */
+ dict_table_t* table, /*!< in/out: table */
+ ibool dict_locked) /*!< in: TRUE=data dictionary locked */
{
- mutex_enter(&(dict_sys->mutex));
+ if (!dict_locked) {
+ mutex_enter(&dict_sys->mutex);
+ }
+ ut_ad(mutex_own(&dict_sys->mutex));
ut_a(table->n_mysql_handles_opened > 0);
table->n_mysql_handles_opened--;
- mutex_exit(&(dict_sys->mutex));
-}
-
-/*************************************************************************
-Gets the column data type. */
-
-void
-dict_col_copy_type_noninline(
-/*=========================*/
- const dict_col_t* col, /* in: column */
- dtype_t* type) /* out: data type */
-{
- dict_col_copy_type(col, type);
-}
-
-/************************************************************************
-Gets the nth column of a table. */
-
-const dict_col_t*
-dict_table_get_nth_col_noninline(
-/*=============================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint pos) /* in: position of column */
-{
- return(dict_table_get_nth_col(table, pos));
-}
-
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-
-dict_index_t*
-dict_table_get_first_index_noninline(
-/*=================================*/
- /* out: index, NULL if none exists */
- dict_table_t* table) /* in: table */
-{
- return(dict_table_get_first_index(table));
-}
-
-/************************************************************************
-Gets the next index on the table. */
-
-dict_index_t*
-dict_table_get_next_index_noninline(
-/*================================*/
- /* out: index, NULL if none left */
- dict_index_t* index) /* in: index */
-{
- return(dict_table_get_next_index(index));
-}
-
-/**************************************************************************
-Returns an index object. */
-
-dict_index_t*
-dict_table_get_index_noninline(
-/*===========================*/
- /* out: index, NULL if does not exist */
- dict_table_t* table, /* in: table */
- const char* name) /* in: index name */
-{
- return(dict_table_get_index(table, name));
+ if (!dict_locked) {
+ mutex_exit(&dict_sys->mutex);
+ }
}
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************************
-Returns a column's name. */
-
+/**********************************************************************//**
+Returns a column's name.
+@return column name. NOTE: not guaranteed to stay valid if table is
+modified in any way (columns added, etc.). */
+UNIV_INTERN
const char*
dict_table_get_col_name(
/*====================*/
- /* out: column name. NOTE: not
- guaranteed to stay valid if table is
- modified in any way (columns added,
- etc.). */
- const dict_table_t* table, /* in: table */
- ulint col_nr) /* in: column number */
+ const dict_table_t* table, /*!< in: table */
+ ulint col_nr) /*!< in: column number */
{
ulint i;
const char* s;
@@ -409,57 +291,57 @@ dict_table_get_col_name(
return(s);
}
-
-/************************************************************************
-Acquire the autoinc lock.*/
-
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Acquire the autoinc lock. */
+UNIV_INTERN
void
dict_table_autoinc_lock(
/*====================*/
- dict_table_t* table)
+ dict_table_t* table) /*!< in/out: table */
{
mutex_enter(&table->autoinc_mutex);
}
-/************************************************************************
+/********************************************************************//**
Unconditionally set the autoinc counter. */
-
+UNIV_INTERN
void
dict_table_autoinc_initialize(
/*==========================*/
- dict_table_t* table, /* in: table */
- ib_ulonglong value) /* in: next value to assign to a row */
+ dict_table_t* table, /*!< in/out: table */
+ ib_uint64_t value) /*!< in: next value to assign to a row */
{
ut_ad(mutex_own(&table->autoinc_mutex));
table->autoinc = value;
}
-/************************************************************************
+/********************************************************************//**
Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized. */
-
-ib_ulonglong
+initialized.
+@return value for a new row, or 0 */
+UNIV_INTERN
+ib_uint64_t
dict_table_autoinc_read(
/*====================*/
- /* out: value for a new row, or 0 */
- dict_table_t* table) /* in: table */
+ const dict_table_t* table) /*!< in: table */
{
ut_ad(mutex_own(&table->autoinc_mutex));
return(table->autoinc);
}
-/************************************************************************
+/********************************************************************//**
Updates the autoinc counter if the value supplied is greater than the
current value. */
-
+UNIV_INTERN
void
dict_table_autoinc_update_if_greater(
/*=================================*/
- dict_table_t* table, /* in: table */
- ib_ulonglong value) /* in: value which was assigned to a row */
+ dict_table_t* table, /*!< in/out: table */
+ ib_uint64_t value) /*!< in: value which was assigned to a row */
{
ut_ad(mutex_own(&table->autoinc_mutex));
@@ -469,28 +351,56 @@ dict_table_autoinc_update_if_greater(
}
}
-/************************************************************************
-Release the autoinc lock.*/
-
+/********************************************************************//**
+Release the autoinc lock. */
+UNIV_INTERN
void
dict_table_autoinc_unlock(
/*======================*/
- dict_table_t* table) /* in: release autoinc lock for this table */
+ dict_table_t* table) /*!< in/out: table */
{
mutex_exit(&table->autoinc_mutex);
}
-/************************************************************************
-Looks for column n in an index. */
+/**********************************************************************//**
+Looks for an index with the given table and index id.
+NOTE that we do not reserve the dictionary mutex.
+@return index or NULL if not found from cache */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_on_id_low(
+/*=====================*/
+ dict_table_t* table, /*!< in: table */
+ dulint id) /*!< in: index id */
+{
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(table);
+ while (index) {
+ if (0 == ut_dulint_cmp(id, index->id)) {
+ /* Found */
+
+ return(index);
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ return(NULL);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
ulint
dict_index_get_nth_col_pos(
/*=======================*/
- /* out: position in internal representation
- of the index; if not contained, returns
- ULINT_UNDEFINED */
- dict_index_t* index, /* in: index */
- ulint n) /* in: column number */
+ const dict_index_t* index, /*!< in: index */
+ ulint n) /*!< in: column number */
{
const dict_field_t* field;
const dict_col_t* col;
@@ -502,7 +412,7 @@ dict_index_get_nth_col_pos(
col = dict_table_get_nth_col(index->table, n);
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
return(dict_col_get_clust_pos(col, index));
}
@@ -521,16 +431,16 @@ dict_index_get_nth_col_pos(
return(ULINT_UNDEFINED);
}
-/************************************************************************
-Returns TRUE if the index contains a column or a prefix of that column. */
-
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Returns TRUE if the index contains a column or a prefix of that column.
+@return TRUE if contains the column or its prefix */
+UNIV_INTERN
ibool
dict_index_contains_col_or_prefix(
/*==============================*/
- /* out: TRUE if contains the column or its
- prefix */
- dict_index_t* index, /* in: index */
- ulint n) /* in: column number */
+ const dict_index_t* index, /*!< in: index */
+ ulint n) /*!< in: column number */
{
const dict_field_t* field;
const dict_col_t* col;
@@ -540,7 +450,7 @@ dict_index_contains_col_or_prefix(
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
return(TRUE);
}
@@ -561,26 +471,25 @@ dict_index_contains_col_or_prefix(
return(FALSE);
}
-/************************************************************************
+/********************************************************************//**
Looks for a matching field in an index. The column has to be the same. The
column in index must be complete, or must contain a prefix longer than the
column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index. */
-
+from the prefix in index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
ulint
dict_index_get_nth_field_pos(
/*=========================*/
- /* out: position in internal representation
- of the index; if not contained, returns
- ULINT_UNDEFINED */
- dict_index_t* index, /* in: index from which to search */
- dict_index_t* index2, /* in: index */
- ulint n) /* in: field number in index2 */
+ const dict_index_t* index, /*!< in: index from which to search */
+ const dict_index_t* index2, /*!< in: index */
+ ulint n) /*!< in: field number in index2 */
{
- dict_field_t* field;
- dict_field_t* field2;
- ulint n_fields;
- ulint pos;
+ const dict_field_t* field;
+ const dict_field_t* field2;
+ ulint n_fields;
+ ulint pos;
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -604,15 +513,15 @@ dict_index_get_nth_field_pos(
return(ULINT_UNDEFINED);
}
-/**************************************************************************
-Returns a table object based on table id. */
-
+/**********************************************************************//**
+Returns a table object based on table id.
+@return table, NULL if does not exist */
+UNIV_INTERN
dict_table_t*
dict_table_get_on_id(
/*=================*/
- /* out: table, NULL if does not exist */
- dulint table_id, /* in: table id */
- trx_t* trx) /* in: transaction handle */
+ dulint table_id, /*!< in: table id */
+ trx_t* trx) /*!< in: transaction handle */
{
dict_table_t* table;
@@ -638,47 +547,32 @@ dict_table_get_on_id(
return(table);
}
-/************************************************************************
-Looks for column n position in the clustered index. */
-
+/********************************************************************//**
+Looks for column n position in the clustered index.
+@return position in internal representation of the clustered index */
+UNIV_INTERN
ulint
dict_table_get_nth_col_pos(
/*=======================*/
- /* out: position in internal representation
- of the clustered index */
- dict_table_t* table, /* in: table */
- ulint n) /* in: column number */
+ const dict_table_t* table, /*!< in: table */
+ ulint n) /*!< in: column number */
{
return(dict_index_get_nth_col_pos(dict_table_get_first_index(table),
n));
}
-/************************************************************************
-Check whether the table uses the compact page format. */
-
-ibool
-dict_table_is_comp_noninline(
-/*=========================*/
- /* out: TRUE if table uses the
- compact page format */
- const dict_table_t* table) /* in: table */
-{
- return(dict_table_is_comp(table));
-}
-
-/************************************************************************
+/********************************************************************//**
Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns. */
-
+table. Column prefixes are treated like whole columns.
+@return TRUE if the column, or its prefix, is in the clustered key */
+UNIV_INTERN
ibool
dict_table_col_in_clustered_key(
/*============================*/
- /* out: TRUE if the column, or its prefix, is
- in the clustered key */
- dict_table_t* table, /* in: table */
- ulint n) /* in: column number */
+ const dict_table_t* table, /*!< in: table */
+ ulint n) /*!< in: column number */
{
- dict_index_t* index;
+ const dict_index_t* index;
const dict_field_t* field;
const dict_col_t* col;
ulint pos;
@@ -704,9 +598,9 @@ dict_table_col_in_clustered_key(
return(FALSE);
}
-/**************************************************************************
+/**********************************************************************//**
Inits the data dictionary module. */
-
+UNIV_INTERN
void
dict_init(void)
/*===========*/
@@ -715,10 +609,10 @@ dict_init(void)
mutex_create(&dict_sys->mutex, SYNC_DICT);
- dict_sys->table_hash = hash_create(buf_pool_get_max_size()
+ dict_sys->table_hash = hash_create(buf_pool_get_curr_size()
/ (DICT_POOL_PER_TABLE_HASH
* UNIV_WORD_SIZE));
- dict_sys->table_id_hash = hash_create(buf_pool_get_max_size()
+ dict_sys->table_id_hash = hash_create(buf_pool_get_curr_size()
/ (DICT_POOL_PER_TABLE_HASH
* UNIV_WORD_SIZE));
dict_sys->size = 0;
@@ -733,20 +627,18 @@ dict_init(void)
mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH);
}
-/**************************************************************************
+/**********************************************************************//**
Returns a table object and optionally increment its MySQL open handle count.
NOTE! This is a high-level function to be used mainly from outside the
'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function. */
-
+appropriate function.
+@return table, NULL if does not exist */
+UNIV_INTERN
dict_table_t*
dict_table_get(
/*===========*/
- /* out: table, NULL if
- does not exist */
- const char* table_name, /* in: table name */
- ibool inc_mysql_count)
- /* in: whether to increment the open
+ const char* table_name, /*!< in: table name */
+ ibool inc_mysql_count)/*!< in: whether to increment the open
handle count on the table */
{
dict_table_t* table;
@@ -772,15 +664,16 @@ dict_table_get(
return(table);
}
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************************
+/**********************************************************************//**
Adds system columns to a table object. */
-
+UNIV_INTERN
void
dict_table_add_system_columns(
/*==========================*/
- dict_table_t* table, /* in/out: table */
- mem_heap_t* heap) /* in: temporary heap */
+ dict_table_t* table, /*!< in/out: table */
+ mem_heap_t* heap) /*!< in: temporary heap */
{
ut_ad(table);
ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS);
@@ -819,14 +712,15 @@ dict_table_add_system_columns(
#endif
}
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Adds a table object to the dictionary cache. */
-
+UNIV_INTERN
void
dict_table_add_to_cache(
/*====================*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: temporary heap */
+ dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap) /*!< in: temporary heap */
{
ulint fold;
ulint id_fold;
@@ -866,17 +760,35 @@ dict_table_add_to_cache(
/* Look for a table with the same name: error if such exists */
{
dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
- (ut_strcmp(table2->name, table->name) == 0));
+ HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ ut_strcmp(table2->name, table->name) == 0);
ut_a(table2 == NULL);
+
+#ifdef UNIV_DEBUG
+ /* Look for the same table pointer with a different name */
+ HASH_SEARCH_ALL(name_hash, dict_sys->table_hash,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ table2 == table);
+ ut_ad(table2 == NULL);
+#endif /* UNIV_DEBUG */
}
/* Look for a table with the same id: error if such exists */
{
dict_table_t* table2;
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, table2,
- (ut_dulint_cmp(table2->id, table->id) == 0));
+ HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ ut_dulint_cmp(table2->id, table->id) == 0);
ut_a(table2 == NULL);
+
+#ifdef UNIV_DEBUG
+ /* Look for the same table pointer with a different id */
+ HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ table2 == table);
+ ut_ad(table2 == NULL);
+#endif /* UNIV_DEBUG */
}
/* Add table to hash table of tables */
@@ -892,16 +804,16 @@ dict_table_add_to_cache(
dict_sys->size += mem_heap_get_size(table->heap);
}
-/**************************************************************************
+/**********************************************************************//**
Looks for an index with the given id. NOTE that we do not reserve
the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page! */
-
+printing info of a corrupt database page!
+@return index or NULL if not found from cache */
+UNIV_INTERN
dict_index_t*
dict_index_find_on_id_low(
/*======================*/
- /* out: index or NULL if not found from cache */
- dulint id) /* in: index id */
+ dulint id) /*!< in: index id */
{
dict_table_t* table;
dict_index_t* index;
@@ -927,16 +839,16 @@ dict_index_find_on_id_low(
return(NULL);
}
-/**************************************************************************
-Renames a table object. */
-
+/**********************************************************************//**
+Renames a table object.
+@return TRUE if success */
+UNIV_INTERN
ibool
dict_table_rename_in_cache(
/*=======================*/
- /* out: TRUE if success */
- dict_table_t* table, /* in: table */
- const char* new_name, /* in: new name */
- ibool rename_also_foreigns)/* in: in ALTER TABLE we want
+ dict_table_t* table, /*!< in/out: table */
+ const char* new_name, /*!< in: new name */
+ ibool rename_also_foreigns)/*!< in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
{
@@ -944,26 +856,31 @@ dict_table_rename_in_cache(
dict_index_t* index;
ulint fold;
ulint old_size;
- char* old_name;
- ibool success;
+ const char* old_name;
ut_ad(table);
ut_ad(mutex_own(&(dict_sys->mutex)));
old_size = mem_heap_get_size(table->heap);
+ old_name = table->name;
fold = ut_fold_string(new_name);
/* Look for a table with the same name: error if such exists */
{
dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
+ HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
+ dict_table_t*, table2, ut_ad(table2->cached),
(ut_strcmp(table2->name, new_name) == 0));
- if (table2) {
- fprintf(stderr,
- "InnoDB: Error: dictionary cache"
- " already contains a table of name %s\n",
- new_name);
+ if (UNIV_LIKELY_NULL(table2)) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: dictionary cache"
+ " already contains a table ", stderr);
+ ut_print_name(stderr, NULL, TRUE, new_name);
+ fputs("\n"
+ "InnoDB: cannot rename table ", stderr);
+ ut_print_name(stderr, NULL, TRUE, old_name);
+ putc('\n', stderr);
return(FALSE);
}
}
@@ -973,27 +890,24 @@ dict_table_rename_in_cache(
if (table->space != 0) {
if (table->dir_path_of_temp_table != NULL) {
- fprintf(stderr,
- "InnoDB: Error: trying to rename a table"
- " %s (%s) created with CREATE\n"
- "InnoDB: TEMPORARY TABLE\n",
- table->name, table->dir_path_of_temp_table);
- success = FALSE;
- } else {
- success = fil_rename_tablespace(
- table->name, table->space, new_name);
- }
-
- if (!success) {
-
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: trying to rename a"
+ " TEMPORARY TABLE ", stderr);
+ ut_print_name(stderr, NULL, TRUE, old_name);
+ fputs(" (", stderr);
+ ut_print_filename(stderr,
+ table->dir_path_of_temp_table);
+ fputs(" )\n", stderr);
+ return(FALSE);
+ } else if (!fil_rename_tablespace(old_name, table->space,
+ new_name)) {
return(FALSE);
}
}
/* Remove table from the hash tables of tables */
HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
- ut_fold_string(table->name), table);
- old_name = mem_heap_strdup(table->heap, table->name);
+ ut_fold_string(old_name), table);
table->name = mem_heap_strdup(table->heap, new_name);
/* Add table to hash table of tables */
@@ -1141,15 +1055,15 @@ dict_table_rename_in_cache(
return(TRUE);
}
-/**************************************************************************
+/**********************************************************************//**
Change the id of a table object in the dictionary cache. This is used in
DISCARD TABLESPACE. */
-
+UNIV_INTERN
void
dict_table_change_id_in_cache(
/*==========================*/
- dict_table_t* table, /* in: table object already in cache */
- dulint new_id) /* in: new id to set */
+ dict_table_t* table, /*!< in/out: table object already in cache */
+ dulint new_id) /*!< in: new id to set */
{
ut_ad(table);
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -1166,13 +1080,13 @@ dict_table_change_id_in_cache(
ut_fold_dulint(table->id), table);
}
-/**************************************************************************
+/**********************************************************************//**
Removes a table object from the dictionary cache. */
-
+UNIV_INTERN
void
dict_table_remove_from_cache(
/*=========================*/
- dict_table_t* table) /* in, own: table */
+ dict_table_t* table) /*!< in, own: table */
{
dict_foreign_t* foreign;
dict_index_t* index;
@@ -1233,27 +1147,15 @@ dict_table_remove_from_cache(
dict_mem_table_free(table);
}
-/*************************************************************************
-Gets the column position in the clustered index. */
-
-ulint
-dict_col_get_clust_pos_noninline(
-/*=============================*/
- const dict_col_t* col, /* in: table column */
- const dict_index_t* clust_index) /* in: clustered index */
-{
- return(dict_col_get_clust_pos(col, clust_index));
-}
-
-/********************************************************************
+/****************************************************************//**
If the given column name is reserved for InnoDB system columns, return
-TRUE. */
-
+TRUE.
+@return TRUE if name is reserved */
+UNIV_INTERN
ibool
dict_col_name_is_reserved(
/*======================*/
- /* out: TRUE if name is reserved */
- const char* name) /* in: column name */
+ const char* name) /*!< in: column name */
{
/* This check reminds that if a new system column is added to
the program, it should be dealt with here. */
@@ -1277,16 +1179,271 @@ dict_col_name_is_reserved(
return(FALSE);
}
-/**************************************************************************
-Adds an index to the dictionary cache. */
+/****************************************************************//**
+If an undo log record for this table might not fit on a single page,
+return TRUE.
+@return TRUE if the undo log record could become too big */
+static
+ibool
+dict_index_too_big_for_undo(
+/*========================*/
+ const dict_table_t* table, /*!< in: table */
+ const dict_index_t* new_index) /*!< in: index */
+{
+ /* Make sure that all column prefixes will fit in the undo log record
+ in trx_undo_page_report_modify() right after trx_undo_page_init(). */
-void
+ ulint i;
+ const dict_index_t* clust_index
+ = dict_table_get_first_index(table);
+ ulint undo_page_len
+ = TRX_UNDO_PAGE_HDR - TRX_UNDO_PAGE_HDR_SIZE
+ + 2 /* next record pointer */
+ + 1 /* type_cmpl */
+ + 11 /* trx->undo_no */ + 11 /* table->id */
+ + 1 /* rec_get_info_bits() */
+ + 11 /* DB_TRX_ID */
+ + 11 /* DB_ROLL_PTR */
+ + 10 + FIL_PAGE_DATA_END /* trx_undo_left() */
+ + 2/* pointer to previous undo log record */;
+
+ if (UNIV_UNLIKELY(!clust_index)) {
+ ut_a(dict_index_is_clust(new_index));
+ clust_index = new_index;
+ }
+
+ /* Add the size of the ordering columns in the
+ clustered index. */
+ for (i = 0; i < clust_index->n_uniq; i++) {
+ const dict_col_t* col
+ = dict_index_get_nth_col(clust_index, i);
+
+ /* Use the maximum output size of
+ mach_write_compressed(), although the encoded
+ length should always fit in 2 bytes. */
+ undo_page_len += 5 + dict_col_get_max_size(col);
+ }
+
+ /* Add the old values of the columns to be updated.
+ First, the amount and the numbers of the columns.
+ These are written by mach_write_compressed() whose
+ maximum output length is 5 bytes. However, given that
+ the quantities are below REC_MAX_N_FIELDS (10 bits),
+ the maximum length is 2 bytes per item. */
+ undo_page_len += 2 * (dict_table_get_n_cols(table) + 1);
+
+ for (i = 0; i < clust_index->n_def; i++) {
+ const dict_col_t* col
+ = dict_index_get_nth_col(clust_index, i);
+ ulint max_size
+ = dict_col_get_max_size(col);
+ ulint fixed_size
+ = dict_col_get_fixed_size(col,
+ dict_table_is_comp(table));
+
+ if (fixed_size) {
+ /* Fixed-size columns are stored locally. */
+ max_size = fixed_size;
+ } else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
+ /* Short columns are stored locally. */
+ } else if (!col->ord_part) {
+ /* See if col->ord_part would be set
+ because of new_index. */
+ ulint j;
+
+ for (j = 0; j < new_index->n_uniq; j++) {
+ if (dict_index_get_nth_col(
+ new_index, j) == col) {
+
+ goto is_ord_part;
+ }
+ }
+
+ /* This is not an ordering column in any index.
+ Thus, it can be stored completely externally. */
+ max_size = BTR_EXTERN_FIELD_REF_SIZE;
+ } else {
+is_ord_part:
+ /* This is an ordering column in some index.
+ A long enough prefix must be written to the
+ undo log. See trx_undo_page_fetch_ext(). */
+
+ if (max_size > REC_MAX_INDEX_COL_LEN) {
+ max_size = REC_MAX_INDEX_COL_LEN;
+ }
+
+ max_size += BTR_EXTERN_FIELD_REF_SIZE;
+ }
+
+ undo_page_len += 5 + max_size;
+ }
+
+ return(undo_page_len >= UNIV_PAGE_SIZE);
+}
+
+/****************************************************************//**
+If a record of this index might not fit on a single B-tree page,
+return TRUE.
+@return TRUE if the index record could become too big */
+static
+ibool
+dict_index_too_big_for_tree(
+/*========================*/
+ const dict_table_t* table, /*!< in: table */
+ const dict_index_t* new_index) /*!< in: index */
+{
+ ulint zip_size;
+ ulint comp;
+ ulint i;
+ /* maximum possible storage size of a record */
+ ulint rec_max_size;
+ /* maximum allowed size of a record on a leaf page */
+ ulint page_rec_max;
+ /* maximum allowed size of a node pointer record */
+ ulint page_ptr_max;
+
+ comp = dict_table_is_comp(table);
+ zip_size = dict_table_zip_size(table);
+
+ if (zip_size && zip_size < UNIV_PAGE_SIZE) {
+ /* On a compressed page, two records must fit in the
+ uncompressed page modification log. On compressed
+ pages with zip_size == UNIV_PAGE_SIZE, this limit will
+ never be reached. */
+ ut_ad(comp);
+ /* The maximum allowed record size is the size of
+ an empty page, minus a byte for recoding the heap
+ number in the page modification log. The maximum
+ allowed node pointer size is half that. */
+ page_rec_max = page_zip_empty_size(new_index->n_fields,
+ zip_size) - 1;
+ page_ptr_max = page_rec_max / 2;
+ /* On a compressed page, there is a two-byte entry in
+ the dense page directory for every record. But there
+ is no record header. */
+ rec_max_size = 2;
+ } else {
+ /* The maximum allowed record size is half a B-tree
+ page. No additional sparse page directory entry will
+ be generated for the first few user records. */
+ page_rec_max = page_get_free_space_of_empty(comp) / 2;
+ page_ptr_max = page_rec_max;
+ /* Each record has a header. */
+ rec_max_size = comp
+ ? REC_N_NEW_EXTRA_BYTES
+ : REC_N_OLD_EXTRA_BYTES;
+ }
+
+ if (comp) {
+ /* Include the "null" flags in the
+ maximum possible record size. */
+ rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable);
+ } else {
+ /* For each column, include a 2-byte offset and a
+ "null" flag. The 1-byte format is only used in short
+ records that do not contain externally stored columns.
+ Such records could never exceed the page limit, even
+ when using the 2-byte format. */
+ rec_max_size += 2 * new_index->n_fields;
+ }
+
+ /* Compute the maximum possible record size. */
+ for (i = 0; i < new_index->n_fields; i++) {
+ const dict_field_t* field
+ = dict_index_get_nth_field(new_index, i);
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ ulint field_max_size;
+ ulint field_ext_max_size;
+
+ /* In dtuple_convert_big_rec(), variable-length columns
+ that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
+ may be chosen for external storage.
+
+ Fixed-length columns, and all columns of secondary
+ index records are always stored inline. */
+
+ /* Determine the maximum length of the index field.
+ The field_ext_max_size should be computed as the worst
+ case in rec_get_converted_size_comp() for
+ REC_STATUS_ORDINARY records. */
+
+ field_max_size = dict_col_get_fixed_size(col, comp);
+ if (field_max_size) {
+ /* dict_index_add_col() should guarantee this */
+ ut_ad(!field->prefix_len
+ || field->fixed_len == field->prefix_len);
+ /* Fixed lengths are not encoded
+ in ROW_FORMAT=COMPACT. */
+ field_ext_max_size = 0;
+ goto add_field_size;
+ }
+
+ field_max_size = dict_col_get_max_size(col);
+ field_ext_max_size = field_max_size < 256 ? 1 : 2;
+
+ if (field->prefix_len) {
+ if (field->prefix_len < field_max_size) {
+ field_max_size = field->prefix_len;
+ }
+ } else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2
+ && dict_index_is_clust(new_index)) {
+
+ /* In the worst case, we have a locally stored
+ column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes.
+ The length can be stored in one byte. If the
+ column were stored externally, the lengths in
+ the clustered index page would be
+ BTR_EXTERN_FIELD_REF_SIZE and 2. */
+ field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2;
+ field_ext_max_size = 1;
+ }
+
+ if (comp) {
+ /* Add the extra size for ROW_FORMAT=COMPACT.
+ For ROW_FORMAT=REDUNDANT, these bytes were
+ added to rec_max_size before this loop. */
+ rec_max_size += field_ext_max_size;
+ }
+add_field_size:
+ rec_max_size += field_max_size;
+
+ /* Check the size limit on leaf pages. */
+ if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) {
+
+ return(TRUE);
+ }
+
+ /* Check the size limit on non-leaf pages. Records
+ stored in non-leaf B-tree pages consist of the unique
+ columns of the record (the key columns of the B-tree)
+ and a node pointer field. When we have processed the
+ unique columns, rec_max_size equals the size of the
+ node pointer record minus the node pointer column. */
+ if (i + 1 == dict_index_get_n_unique_in_tree(new_index)
+ && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************************//**
+Adds an index to the dictionary cache.
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
+UNIV_INTERN
+ulint
dict_index_add_to_cache(
/*====================*/
- dict_table_t* table, /* in: table on which the index is */
- dict_index_t* index, /* in, own: index; NOTE! The index memory
+ dict_table_t* table, /*!< in: table on which the index is */
+ dict_index_t* index, /*!< in, own: index; NOTE! The index memory
object is freed in this function! */
- ulint page_no)/* in: root page number of the index */
+ ulint page_no,/*!< in: root page number of the index */
+ ibool strict) /*!< in: TRUE=refuse to create the index
+ if records could be too big to fit in
+ an B-tree page */
{
dict_index_t* new_index;
ulint n_ord;
@@ -1298,61 +1455,117 @@ dict_index_add_to_cache(
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(mem_heap_validate(index->heap));
+ ut_a(!dict_index_is_clust(index)
+ || UT_LIST_GET_LEN(table->indexes) == 0);
-#ifdef UNIV_DEBUG
- {
- dict_index_t* index2;
- index2 = UT_LIST_GET_FIRST(table->indexes);
-
- while (index2 != NULL) {
- ut_ad(ut_strcmp(index->name, index2->name) != 0);
+ if (!dict_index_find_cols(table, index)) {
- index2 = UT_LIST_GET_NEXT(indexes, index2);
- }
+ return(DB_CORRUPTION);
}
-#endif /* UNIV_DEBUG */
-
- ut_a(!(index->type & DICT_CLUSTERED)
- || UT_LIST_GET_LEN(table->indexes) == 0);
-
- dict_index_find_cols(table, index);
/* Build the cache internal representation of the index,
containing also the added system fields */
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
new_index = dict_index_build_internal_clust(table, index);
} else {
new_index = dict_index_build_internal_non_clust(table, index);
}
- new_index->search_info = btr_search_info_create(new_index->heap);
-
/* Set the n_fields value in new_index to the actual defined
number of fields in the cache internal representation */
new_index->n_fields = new_index->n_def;
- /* Add the new index as the last index for the table */
-
- UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
- new_index->table = table;
- new_index->table_name = table->name;
-
- /* Increment the ord_part counts in columns which are ordering */
+ if (strict && dict_index_too_big_for_tree(table, new_index)) {
+too_big:
+ dict_mem_index_free(new_index);
+ dict_mem_index_free(index);
+ return(DB_TOO_BIG_RECORD);
+ }
if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
n_ord = new_index->n_fields;
} else {
- n_ord = dict_index_get_n_unique(new_index);
+ n_ord = new_index->n_uniq;
+ }
+
+ switch (dict_table_get_format(table)) {
+ case DICT_TF_FORMAT_51:
+ /* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store
+ prefixes of externally stored columns locally within
+ the record. There are no special considerations for
+ the undo log record size. */
+ goto undo_size_ok;
+
+ case DICT_TF_FORMAT_ZIP:
+ /* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED,
+ column prefix indexes require that prefixes of
+ externally stored columns are written to the undo log.
+ This may make the undo log record bigger than the
+ record on the B-tree page. The maximum size of an
+ undo log record is the page size. That must be
+ checked for below. */
+ break;
+
+#if DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX
+# error "DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX"
+#endif
+ }
+
+ for (i = 0; i < n_ord; i++) {
+ const dict_field_t* field
+ = dict_index_get_nth_field(new_index, i);
+ const dict_col_t* col
+ = dict_field_get_col(field);
+
+ /* In dtuple_convert_big_rec(), variable-length columns
+ that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
+ may be chosen for external storage. If the column appears
+ in an ordering column of an index, a longer prefix of
+ REC_MAX_INDEX_COL_LEN will be copied to the undo log
+ by trx_undo_page_report_modify() and
+ trx_undo_page_fetch_ext(). It suffices to check the
+ capacity of the undo log whenever new_index includes
+ a column prefix on a column that may be stored externally. */
+
+ if (field->prefix_len /* prefix index */
+ && !col->ord_part /* not yet ordering column */
+ && !dict_col_get_fixed_size(col, TRUE) /* variable-length */
+ && dict_col_get_max_size(col)
+ > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) {
+
+ if (dict_index_too_big_for_undo(table, new_index)) {
+ /* An undo log record might not fit in
+ a single page. Refuse to create this index. */
+
+ goto too_big;
+ }
+
+ break;
+ }
}
+undo_size_ok:
+ /* Flag the ordering columns */
+
for (i = 0; i < n_ord; i++) {
dict_index_get_nth_field(new_index, i)->col->ord_part = 1;
}
- new_index->page = (unsigned int) page_no;
+ /* Add the new index as the last index for the table */
+
+ UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
+ new_index->table = table;
+ new_index->table_name = table->name;
+
+ new_index->search_info = btr_search_info_create(new_index->heap);
+
+ new_index->stat_index_size = 1;
+ new_index->stat_n_leaf_pages = 1;
+
+ new_index->page = page_no;
rw_lock_create(&new_index->lock, SYNC_INDEX_TREE);
if (!UNIV_UNLIKELY(new_index->type & DICT_UNIVERSAL)) {
@@ -1360,7 +1573,7 @@ dict_index_add_to_cache(
new_index->stat_n_diff_key_vals = mem_heap_alloc(
new_index->heap,
(1 + dict_index_get_n_unique(new_index))
- * sizeof(ib_longlong));
+ * sizeof(ib_int64_t));
/* Give some sensible values to stat_n_... in case we do
not calculate statistics quickly enough */
@@ -1373,16 +1586,18 @@ dict_index_add_to_cache(
dict_sys->size += mem_heap_get_size(new_index->heap);
dict_mem_index_free(index);
+
+ return(DB_SUCCESS);
}
-/**************************************************************************
+/**********************************************************************//**
Removes an index from the dictionary cache. */
-static
+UNIV_INTERN
void
dict_index_remove_from_cache(
/*=========================*/
- dict_table_t* table, /* in: table */
- dict_index_t* index) /* in, own: index */
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index) /*!< in, own: index */
{
ulint size;
ulint retries = 0;
@@ -1452,15 +1667,16 @@ dict_index_remove_from_cache(
dict_mem_index_free(index);
}
-/***********************************************************************
+/*******************************************************************//**
Tries to find column names for the index and sets the col field of the
-index. */
+index.
+@return TRUE if the column names were found */
static
-void
+ibool
dict_index_find_cols(
/*=================*/
- dict_table_t* table, /* in: table */
- dict_index_t* index) /* in: index */
+ dict_table_t* table, /*!< in: table */
+ dict_index_t* index) /*!< in: index */
{
ulint i;
@@ -1475,31 +1691,40 @@ dict_index_find_cols(
for (j = 0; j < table->n_cols; j++) {
if (!strcmp(dict_table_get_col_name(table, j),
field->name)) {
- field->col = (dict_col_t*)
- dict_table_get_nth_col(table, j);
+ field->col = dict_table_get_nth_col(table, j);
goto found;
}
}
+#ifdef UNIV_DEBUG
/* It is an error not to find a matching column. */
- ut_error;
+ fputs("InnoDB: Error: no matching column for ", stderr);
+ ut_print_name(stderr, NULL, FALSE, field->name);
+ fputs(" in ", stderr);
+ dict_index_name_print(stderr, NULL, index);
+ fputs("!\n", stderr);
+#endif /* UNIV_DEBUG */
+ return(FALSE);
- found:
+found:
;
}
+
+ return(TRUE);
}
+#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
+/*******************************************************************//**
Adds a column to index. */
-
+UNIV_INTERN
void
dict_index_add_col(
/*===============*/
- dict_index_t* index, /* in: index */
- dict_table_t* table, /* in: table */
- dict_col_t* col, /* in: column */
- ulint prefix_len) /* in: column prefix length */
+ dict_index_t* index, /*!< in/out: index */
+ const dict_table_t* table, /*!< in: table */
+ dict_col_t* col, /*!< in: column */
+ ulint prefix_len) /*!< in: column prefix length */
{
dict_field_t* field;
const char* col_name;
@@ -1511,7 +1736,8 @@ dict_index_add_col(
field = dict_index_get_nth_field(index, index->n_def - 1);
field->col = col;
- field->fixed_len = (unsigned int) dict_col_get_fixed_size(col);
+ field->fixed_len = (unsigned int) dict_col_get_fixed_size(
+ col, dict_table_is_comp(table));
if (prefix_len && field->fixed_len > prefix_len) {
field->fixed_len = (unsigned int) prefix_len;
@@ -1536,17 +1762,18 @@ dict_index_add_col(
}
}
-/***********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
Copies fields contained in index2 to index1. */
static
void
dict_index_copy(
/*============*/
- dict_index_t* index1, /* in: index to copy to */
- dict_index_t* index2, /* in: index to copy from */
- dict_table_t* table, /* in: table */
- ulint start, /* in: first position to copy */
- ulint end) /* in: last position to copy */
+ dict_index_t* index1, /*!< in: index to copy to */
+ dict_index_t* index2, /*!< in: index to copy from */
+ const dict_table_t* table, /*!< in: table */
+ ulint start, /*!< in: first position to copy */
+ ulint end) /*!< in: last position to copy */
{
dict_field_t* field;
ulint i;
@@ -1561,15 +1788,16 @@ dict_index_copy(
}
}
-/***********************************************************************
+/*******************************************************************//**
Copies types of fields contained in index to tuple. */
-
+UNIV_INTERN
void
dict_index_copy_types(
/*==================*/
- dtuple_t* tuple, /* in: data tuple */
- dict_index_t* index, /* in: index */
- ulint n_fields) /* in: number of field types to copy */
+ dtuple_t* tuple, /*!< in/out: data tuple */
+ const dict_index_t* index, /*!< in: index */
+ ulint n_fields) /*!< in: number of
+ field types to copy */
{
ulint i;
@@ -1580,8 +1808,8 @@ dict_index_copy_types(
}
for (i = 0; i < n_fields; i++) {
- dict_field_t* ifield;
- dtype_t* dfield_type;
+ const dict_field_t* ifield;
+ dtype_t* dfield_type;
ifield = dict_index_get_nth_field(index, i);
dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
@@ -1589,38 +1817,40 @@ dict_index_copy_types(
}
}
-/***********************************************************************
-Copies types of columns contained in table to tuple. */
-
+/*******************************************************************//**
+Copies types of columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value. This function should
+be called right after dtuple_create(). */
+UNIV_INTERN
void
dict_table_copy_types(
/*==================*/
- dtuple_t* tuple, /* in: data tuple */
- dict_table_t* table) /* in: index */
+ dtuple_t* tuple, /*!< in/out: data tuple */
+ const dict_table_t* table) /*!< in: table */
{
- dtype_t* dfield_type;
ulint i;
for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
- dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
- dict_col_copy_type(dict_table_get_nth_col(table, i),
- dfield_type);
+ dfield_t* dfield = dtuple_get_nth_field(tuple, i);
+ dtype_t* dtype = dfield_get_type(dfield);
+
+ dfield_set_null(dfield);
+ dict_col_copy_type(dict_table_get_nth_col(table, i), dtype);
}
}
-/***********************************************************************
+/*******************************************************************//**
Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user. */
+index, containing also system fields not defined by the user.
+@return own: the internal representation of the clustered index */
static
dict_index_t*
dict_index_build_internal_clust(
/*============================*/
- /* out, own: the internal representation
- of the clustered index */
- dict_table_t* table, /* in: table */
- dict_index_t* index) /* in: user representation of a clustered
- index */
+ const dict_table_t* table, /*!< in: table */
+ dict_index_t* index) /*!< in: user representation of
+ a clustered index */
{
dict_index_t* new_index;
dict_field_t* field;
@@ -1630,7 +1860,7 @@ dict_index_build_internal_clust(
ibool* indexed;
ut_ad(table && index);
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -1655,7 +1885,7 @@ dict_index_build_internal_clust(
new_index->n_uniq = REC_MAX_N_FIELDS;
- } else if (index->type & DICT_UNIQUE) {
+ } else if (dict_index_is_unique(index)) {
/* Only the fields defined so far are needed to identify
the index entry uniquely */
@@ -1667,7 +1897,7 @@ dict_index_build_internal_clust(
new_index->trx_id_offset = 0;
- if (!(index->type & DICT_IBUF)) {
+ if (!dict_index_is_ibuf(index)) {
/* Add system columns, trx id first */
trx_id_pos = new_index->n_def;
@@ -1682,19 +1912,19 @@ dict_index_build_internal_clust(
# error "DATA_ROLL_PTR != 2"
#endif
- if (!(index->type & DICT_UNIQUE)) {
- dict_index_add_col(new_index, table, (dict_col_t*)
+ if (!dict_index_is_unique(index)) {
+ dict_index_add_col(new_index, table,
dict_table_get_sys_col(
table, DATA_ROW_ID),
0);
trx_id_pos++;
}
- dict_index_add_col(new_index, table, (dict_col_t*)
+ dict_index_add_col(new_index, table,
dict_table_get_sys_col(table, DATA_TRX_ID),
0);
- dict_index_add_col(new_index, table, (dict_col_t*)
+ dict_index_add_col(new_index, table,
dict_table_get_sys_col(table,
DATA_ROLL_PTR),
0);
@@ -1702,7 +1932,8 @@ dict_index_build_internal_clust(
for (i = 0; i < trx_id_pos; i++) {
fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(new_index, i));
+ dict_index_get_nth_col(new_index, i),
+ dict_table_is_comp(table));
if (fixed_size == 0) {
new_index->trx_id_offset = 0;
@@ -1723,10 +1954,9 @@ dict_index_build_internal_clust(
}
/* Remember the table columns already contained in new_index */
- indexed = mem_alloc(table->n_cols * sizeof *indexed);
- memset(indexed, 0, table->n_cols * sizeof *indexed);
+ indexed = mem_zalloc(table->n_cols * sizeof *indexed);
- /* Mark with 0 the table columns already contained in new_index */
+ /* Mark the table columns already contained in new_index */
for (i = 0; i < new_index->n_def; i++) {
field = dict_index_get_nth_field(new_index, i);
@@ -1744,8 +1974,7 @@ dict_index_build_internal_clust(
there */
for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
- dict_col_t* col = (dict_col_t*)
- dict_table_get_nth_col(table, i);
+ dict_col_t* col = dict_table_get_nth_col(table, i);
ut_ad(col->mtype != DATA_SYS);
if (!indexed[col->ind]) {
@@ -1755,7 +1984,7 @@ dict_index_build_internal_clust(
mem_free(indexed);
- ut_ad((index->type & DICT_IBUF)
+ ut_ad(dict_index_is_ibuf(index)
|| (UT_LIST_GET_LEN(table->indexes) == 0));
new_index->cached = TRUE;
@@ -1763,18 +1992,17 @@ dict_index_build_internal_clust(
return(new_index);
}
-/***********************************************************************
+/*******************************************************************//**
Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user. */
+index, containing also system fields not defined by the user.
+@return own: the internal representation of the non-clustered index */
static
dict_index_t*
dict_index_build_internal_non_clust(
/*================================*/
- /* out, own: the internal representation
- of the non-clustered index */
- dict_table_t* table, /* in: table */
- dict_index_t* index) /* in: user representation of a non-clustered
- index */
+ const dict_table_t* table, /*!< in: table */
+ dict_index_t* index) /*!< in: user representation of
+ a non-clustered index */
{
dict_field_t* field;
dict_index_t* new_index;
@@ -1783,7 +2011,7 @@ dict_index_build_internal_non_clust(
ibool* indexed;
ut_ad(table && index);
- ut_ad(0 == (index->type & DICT_CLUSTERED));
+ ut_ad(!dict_index_is_clust(index));
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -1791,7 +2019,7 @@ dict_index_build_internal_non_clust(
clust_index = UT_LIST_GET_FIRST(table->indexes);
ut_ad(clust_index);
- ut_ad(clust_index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(clust_index));
ut_ad(!(clust_index->type & DICT_UNIVERSAL));
/* Create a new index */
@@ -1810,10 +2038,9 @@ dict_index_build_internal_non_clust(
dict_index_copy(new_index, index, table, 0, index->n_fields);
/* Remember the table columns already contained in new_index */
- indexed = mem_alloc(table->n_cols * sizeof *indexed);
- memset(indexed, 0, table->n_cols * sizeof *indexed);
+ indexed = mem_zalloc(table->n_cols * sizeof *indexed);
- /* Mark with 0 table columns already contained in new_index */
+ /* Mark the table columns already contained in new_index */
for (i = 0; i < new_index->n_def; i++) {
field = dict_index_get_nth_field(new_index, i);
@@ -1842,7 +2069,7 @@ dict_index_build_internal_non_clust(
mem_free(indexed);
- if ((index->type) & DICT_UNIQUE) {
+ if (dict_index_is_unique(index)) {
new_index->n_uniq = index->n_fields;
} else {
new_index->n_uniq = new_index->n_def;
@@ -1860,42 +2087,98 @@ dict_index_build_internal_non_clust(
/*====================== FOREIGN KEY PROCESSING ========================*/
-/*************************************************************************
-Checks if a table is referenced by foreign keys. */
-
+/*********************************************************************//**
+Checks if a table is referenced by foreign keys.
+@return TRUE if table is referenced by a foreign key */
+UNIV_INTERN
ibool
-dict_table_referenced_by_foreign_key(
+dict_table_is_referenced_by_foreign_key(
+/*====================================*/
+ const dict_table_t* table) /*!< in: InnoDB table */
+{
+ return(UT_LIST_GET_LEN(table->referenced_list) > 0);
+}
+
+/*********************************************************************//**
+Check if the index is referenced by a foreign key, if TRUE return foreign
+else return NULL
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_referenced_constraint(
/*=================================*/
- /* out: TRUE if table is referenced by a
- foreign key */
- dict_table_t* table) /* in: InnoDB table */
+ dict_table_t* table, /*!< in: InnoDB table */
+ dict_index_t* index) /*!< in: InnoDB index */
{
- if (UT_LIST_GET_LEN(table->referenced_list) > 0) {
+ dict_foreign_t* foreign;
- return(TRUE);
+ ut_ad(index != NULL);
+ ut_ad(table != NULL);
+
+ for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
+ foreign;
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+
+ if (foreign->referenced_index == index) {
+
+ return(foreign);
+ }
}
- return(FALSE);
+ return(NULL);
+}
+
+/*********************************************************************//**
+Checks if a index is defined for a foreign key constraint. Index is a part
+of a foreign key constraint if the index is referenced by foreign key
+or index is a foreign key index.
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_foreign_constraint(
+/*==============================*/
+ dict_table_t* table, /*!< in: InnoDB table */
+ dict_index_t* index) /*!< in: InnoDB index */
+{
+ dict_foreign_t* foreign;
+
+ ut_ad(index != NULL);
+ ut_ad(table != NULL);
+
+ for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
+ foreign;
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+
+ if (foreign->foreign_index == index
+ || foreign->referenced_index == index) {
+
+ return(foreign);
+ }
+ }
+
+ return(NULL);
}
-/*************************************************************************
+/*********************************************************************//**
Frees a foreign key struct. */
static
void
dict_foreign_free(
/*==============*/
- dict_foreign_t* foreign) /* in, own: foreign key struct */
+ dict_foreign_t* foreign) /*!< in, own: foreign key struct */
{
mem_heap_free(foreign->heap);
}
-/**************************************************************************
+/**********************************************************************//**
Removes a foreign constraint struct from the dictionary cache. */
static
void
dict_foreign_remove_from_cache(
/*===========================*/
- dict_foreign_t* foreign) /* in, own: foreign constraint */
+ dict_foreign_t* foreign) /*!< in, own: foreign constraint */
{
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_a(foreign);
@@ -1915,16 +2198,16 @@ dict_foreign_remove_from_cache(
dict_foreign_free(foreign);
}
-/**************************************************************************
+/**********************************************************************//**
Looks for the foreign constraint from the foreign and referenced lists
-of a table. */
+of a table.
+@return foreign constraint */
static
dict_foreign_t*
dict_foreign_find(
/*==============*/
- /* out: foreign constraint */
- dict_table_t* table, /* in: table object */
- const char* id) /* in: foreign constraint id */
+ dict_table_t* table, /*!< in: table object */
+ const char* id) /*!< in: foreign constraint id */
{
dict_foreign_t* foreign;
@@ -1955,38 +2238,45 @@ dict_foreign_find(
return(NULL);
}
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
Tries to find an index whose first fields are the columns in the array,
-in the same order. */
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return matching index, NULL if not found */
static
dict_index_t*
dict_foreign_find_index(
/*====================*/
- /* out: matching index, NULL if not found */
- dict_table_t* table, /* in: table */
- const char** columns,/* in: array of column names */
- ulint n_cols, /* in: number of columns */
- dict_index_t* types_idx, /* in: NULL or an index to whose types the
+ dict_table_t* table, /*!< in: table */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ dict_index_t* types_idx, /*!< in: NULL or an index to whose types the
column types must match */
ibool check_charsets,
- /* in: whether to check charsets.
+ /*!< in: whether to check charsets.
only has an effect if types_idx != NULL */
ulint check_null)
- /* in: nonzero if none of the columns must
+ /*!< in: nonzero if none of the columns must
be declared NOT NULL */
{
dict_index_t* index;
- dict_field_t* field;
- const char* col_name;
- ulint i;
index = dict_table_get_first_index(table);
while (index != NULL) {
- if (dict_index_get_n_fields(index) >= n_cols) {
+ /* Ignore matches that refer to the same instance
+ or the index is to be dropped */
+ if (index->to_be_dropped || types_idx == index) {
+
+ goto next_rec;
+
+ } else if (dict_index_get_n_fields(index) >= n_cols) {
+ ulint i;
for (i = 0; i < n_cols; i++) {
+ dict_field_t* field;
+ const char* col_name;
+
field = dict_index_get_nth_field(index, i);
col_name = dict_table_get_col_name(
@@ -2027,20 +2317,104 @@ dict_foreign_find_index(
}
}
+next_rec:
index = dict_table_get_next_index(index);
}
return(NULL);
}
-/**************************************************************************
+/**********************************************************************//**
+Find an index that is equivalent to the one passed in and is not marked
+for deletion.
+@return index equivalent to foreign->foreign_index, or NULL */
+UNIV_INTERN
+dict_index_t*
+dict_foreign_find_equiv_index(
+/*==========================*/
+ dict_foreign_t* foreign)/*!< in: foreign key */
+{
+ ut_a(foreign != NULL);
+
+ /* Try to find an index which contains the columns as the
+ first fields and in the right order, and the types are the
+ same as in foreign->foreign_index */
+
+ return(dict_foreign_find_index(
+ foreign->foreign_table,
+ foreign->foreign_col_names, foreign->n_fields,
+ foreign->foreign_index, TRUE, /* check types */
+ FALSE/* allow columns to be NULL */));
+}
+
+/**********************************************************************//**
+Returns an index object by matching on the name and column names and
+if more than one index matches return the index with the max id
+@return matching index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_by_max_id(
+/*===========================*/
+ dict_table_t* table, /*!< in: table */
+ const char* name, /*!< in: the index name to find */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols) /*!< in: number of columns */
+{
+ dict_index_t* index;
+ dict_index_t* found;
+
+ found = NULL;
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ if (ut_strcmp(index->name, name) == 0
+ && dict_index_get_n_ordering_defined_by_user(index)
+ == n_cols) {
+
+ ulint i;
+
+ for (i = 0; i < n_cols; i++) {
+ dict_field_t* field;
+ const char* col_name;
+
+ field = dict_index_get_nth_field(index, i);
+
+ col_name = dict_table_get_col_name(
+ table, dict_col_get_no(field->col));
+
+ if (0 != innobase_strcasecmp(
+ columns[i], col_name)) {
+
+ break;
+ }
+ }
+
+ if (i == n_cols) {
+ /* We found a matching index, select
+ the index with the higher id*/
+
+ if (!found
+ || ut_dulint_cmp(index->id, found->id) > 0) {
+
+ found = index;
+ }
+ }
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ return(found);
+}
+
+/**********************************************************************//**
Report an error in a foreign key definition. */
static
void
dict_foreign_error_report_low(
/*==========================*/
- FILE* file, /* in: output stream */
- const char* name) /* in: table name */
+ FILE* file, /*!< in: output stream */
+ const char* name) /*!< in: table name */
{
rewind(file);
ut_print_timestamp(file);
@@ -2048,15 +2422,15 @@ dict_foreign_error_report_low(
name);
}
-/**************************************************************************
+/**********************************************************************//**
Report an error in a foreign key definition. */
static
void
dict_foreign_error_report(
/*======================*/
- FILE* file, /* in: output stream */
- dict_foreign_t* fk, /* in: foreign key constraint */
- const char* msg) /* in: the error message */
+ FILE* file, /*!< in: output stream */
+ dict_foreign_t* fk, /*!< in: foreign key constraint */
+ const char* msg) /*!< in: the error message */
{
mutex_enter(&dict_foreign_err_mutex);
dict_foreign_error_report_low(file, fk->foreign_table_name);
@@ -2068,26 +2442,25 @@ dict_foreign_error_report(
fputs("The index in the foreign key in table is ", file);
ut_print_name(file, NULL, FALSE, fk->foreign_index->name);
fputs("\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-foreign-key-constraints.html\n"
+ "See " REFMAN "innodb-foreign-key-constraints.html\n"
"for correct foreign key definition.\n",
file);
}
mutex_exit(&dict_foreign_err_mutex);
}
-/**************************************************************************
+/**********************************************************************//**
Adds a foreign key constraint object to the dictionary cache. May free
the object if there already is an object with the same identifier in.
At least one of the foreign table and the referenced table must already
-be in the dictionary cache! */
-
+be in the dictionary cache!
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
dict_foreign_add_to_cache(
/*======================*/
- /* out: DB_SUCCESS or error code */
- dict_foreign_t* foreign, /* in, own: foreign key constraint */
- ibool check_charsets) /* in: TRUE=check charset
+ dict_foreign_t* foreign, /*!< in, own: foreign key constraint */
+ ibool check_charsets) /*!< in: TRUE=check charset
compatibility */
{
dict_table_t* for_table;
@@ -2124,7 +2497,7 @@ dict_foreign_add_to_cache(
if (for_in_cache->referenced_table == NULL && ref_table) {
index = dict_foreign_find_index(
ref_table,
- (const char**) for_in_cache->referenced_col_names,
+ for_in_cache->referenced_col_names,
for_in_cache->n_fields, for_in_cache->foreign_index,
check_charsets, FALSE);
@@ -2156,7 +2529,7 @@ dict_foreign_add_to_cache(
if (for_in_cache->foreign_table == NULL && for_table) {
index = dict_foreign_find_index(
for_table,
- (const char**) for_in_cache->foreign_col_names,
+ for_in_cache->foreign_col_names,
for_in_cache->n_fields,
for_in_cache->referenced_index, check_charsets,
for_in_cache->type
@@ -2199,17 +2572,17 @@ dict_foreign_add_to_cache(
return(DB_SUCCESS);
}
-/*************************************************************************
+/*********************************************************************//**
Scans from pointer onwards. Stops if is at the start of a copy of
'string' where characters are compared without case sensitivity, and
-only outside `` or "" quotes. Stops also at '\0'. */
-
+only outside `` or "" quotes. Stops also at NUL.
+@return scanned up to this */
+static
const char*
dict_scan_to(
/*=========*/
- /* out: scanned up to this */
- const char* ptr, /* in: scan from */
- const char* string) /* in: look for this */
+ const char* ptr, /*!< in: scan from */
+ const char* string) /*!< in: look for this */
{
char quote = '\0';
@@ -2242,19 +2615,19 @@ nomatch:
return(ptr);
}
-/*************************************************************************
-Accepts a specified string. Comparisons are case-insensitive. */
+/*********************************************************************//**
+Accepts a specified string. Comparisons are case-insensitive.
+@return if string was accepted, the pointer is moved after that, else
+ptr is returned */
static
const char*
dict_accept(
/*========*/
- /* out: if string was accepted, the pointer
- is moved after that, else ptr is returned */
- struct charset_info_st* cs,/* in: the character set of ptr */
- const char* ptr, /* in: scan from this */
- const char* string, /* in: accept only this string as the next
+ struct charset_info_st* cs,/*!< in: the character set of ptr */
+ const char* ptr, /*!< in: scan from this */
+ const char* string, /*!< in: accept only this string as the next
non-whitespace string */
- ibool* success)/* out: TRUE if accepted */
+ ibool* success)/*!< out: TRUE if accepted */
{
const char* old_ptr = ptr;
const char* old_ptr2;
@@ -2278,25 +2651,25 @@ dict_accept(
return(ptr + ut_strlen(string));
}
-/*************************************************************************
+/*********************************************************************//**
Scans an id. For the lexical definition of an 'id', see the code below.
-Strips backquotes or double quotes from around the id. */
+Strips backquotes or double quotes from around the id.
+@return scanned to */
static
const char*
dict_scan_id(
/*=========*/
- /* out: scanned to */
- struct charset_info_st* cs,/* in: the character set of ptr */
- const char* ptr, /* in: scanned to */
- mem_heap_t* heap, /* in: heap where to allocate the id
+ struct charset_info_st* cs,/*!< in: the character set of ptr */
+ const char* ptr, /*!< in: scanned to */
+ mem_heap_t* heap, /*!< in: heap where to allocate the id
(NULL=id will not be allocated, but it
will point to string near ptr) */
- const char** id, /* out,own: the id; NULL if no id was
+ const char** id, /*!< out,own: the id; NULL if no id was
scannable */
- ibool table_id,/* in: TRUE=convert the allocated id
+ ibool table_id,/*!< in: TRUE=convert the allocated id
as a table name; FALSE=convert to UTF-8 */
ibool accept_also_dot)
- /* in: TRUE if also a dot can appear in a
+ /*!< in: TRUE if also a dot can appear in a
non-quoted id; in a quoted id it can appear
always */
{
@@ -2378,7 +2751,7 @@ convert_id:
len = 3 * len + 1;
*id = dst = mem_heap_alloc(heap, len);
- innobase_convert_from_id(dst, str, len);
+ innobase_convert_from_id(cs, dst, str, len);
} else if (!strncmp(str, srv_mysql50_table_name_prefix,
sizeof srv_mysql50_table_name_prefix)) {
/* This is a pre-5.1 table name
@@ -2392,26 +2765,26 @@ convert_id:
len = 5 * len + 1;
*id = dst = mem_heap_alloc(heap, len);
- innobase_convert_from_table_id(dst, str, len);
+ innobase_convert_from_table_id(cs, dst, str, len);
}
return(ptr);
}
-/*************************************************************************
-Tries to scan a column name. */
+/*********************************************************************//**
+Tries to scan a column name.
+@return scanned to */
static
const char*
dict_scan_col(
/*==========*/
- /* out: scanned to */
- struct charset_info_st* cs, /* in: the character set of ptr */
- const char* ptr, /* in: scanned to */
- ibool* success,/* out: TRUE if success */
- dict_table_t* table, /* in: table in which the column is */
- const dict_col_t** column, /* out: pointer to column if success */
- mem_heap_t* heap, /* in: heap where to allocate */
- const char** name) /* out,own: the column name;
+ struct charset_info_st* cs, /*!< in: the character set of ptr */
+ const char* ptr, /*!< in: scanned to */
+ ibool* success,/*!< out: TRUE if success */
+ dict_table_t* table, /*!< in: table in which the column is */
+ const dict_col_t** column, /*!< out: pointer to column if success */
+ mem_heap_t* heap, /*!< in: heap where to allocate */
+ const char** name) /*!< out,own: the column name;
NULL if no name was scannable */
{
ulint i;
@@ -2449,20 +2822,20 @@ dict_scan_col(
return(ptr);
}
-/*************************************************************************
-Scans a table name from an SQL string. */
+/*********************************************************************//**
+Scans a table name from an SQL string.
+@return scanned to */
static
const char*
dict_scan_table_name(
/*=================*/
- /* out: scanned to */
- struct charset_info_st* cs,/* in: the character set of ptr */
- const char* ptr, /* in: scanned to */
- dict_table_t** table, /* out: table object or NULL */
- const char* name, /* in: foreign key table name */
- ibool* success,/* out: TRUE if ok name found */
- mem_heap_t* heap, /* in: heap where to allocate the id */
- const char** ref_name)/* out,own: the table name;
+ struct charset_info_st* cs,/*!< in: the character set of ptr */
+ const char* ptr, /*!< in: scanned to */
+ dict_table_t** table, /*!< out: table object or NULL */
+ const char* name, /*!< in: foreign key table name */
+ ibool* success,/*!< out: TRUE if ok name found */
+ mem_heap_t* heap, /*!< in: heap where to allocate the id */
+ const char** ref_name)/*!< out,own: the table name;
NULL if no name was scannable */
{
const char* database_name = NULL;
@@ -2548,16 +2921,16 @@ dict_scan_table_name(
return(ptr);
}
-/*************************************************************************
-Skips one id. The id is allowed to contain also '.'. */
+/*********************************************************************//**
+Skips one id. The id is allowed to contain also '.'.
+@return scanned to */
static
const char*
dict_skip_word(
/*===========*/
- /* out: scanned to */
- struct charset_info_st* cs,/* in: the character set of ptr */
- const char* ptr, /* in: scanned to */
- ibool* success)/* out: TRUE if success, FALSE if just spaces
+ struct charset_info_st* cs,/*!< in: the character set of ptr */
+ const char* ptr, /*!< in: scanned to */
+ ibool* success)/*!< out: TRUE if success, FALSE if just spaces
left in string or a syntax error */
{
const char* start;
@@ -2573,20 +2946,19 @@ dict_skip_word(
return(ptr);
}
-/*************************************************************************
+/*********************************************************************//**
Removes MySQL comments from an SQL string. A comment is either
(a) '#' to the end of the line,
-(b) '--<space>' to the end of the line, or
-(c) '<slash><asterisk>' till the next '<asterisk><slash>' (like the familiar
-C comment syntax). */
+(b) '--[space]' to the end of the line, or
+(c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar
+C comment syntax).
+@return own: SQL string stripped from comments; the caller must free
+this with mem_free()! */
static
char*
dict_strip_comments(
/*================*/
- /* out, own: SQL string stripped from
- comments; the caller must free this
- with mem_free()! */
- const char* sql_string) /* in: SQL string */
+ const char* sql_string) /*!< in: SQL string */
{
char* str;
const char* sptr;
@@ -2660,17 +3032,16 @@ scan_more:
}
}
-/*************************************************************************
-Finds the highest <number> for foreign key constraints of the table. Looks
+/*********************************************************************//**
+Finds the highest [number] for foreign key constraints of the table. Looks
only at the >= 4.0.18-format id's, which are of the form
-databasename/tablename_ibfk_<number>. */
+databasename/tablename_ibfk_[number].
+@return highest number, 0 if table has no new format foreign key constraints */
static
ulint
dict_table_get_highest_foreign_id(
/*==============================*/
- /* out: highest number, 0 if table has no new
- format foreign key constraints */
- dict_table_t* table) /* in: table in the dictionary memory cache */
+ dict_table_t* table) /*!< in: table in the dictionary memory cache */
{
dict_foreign_t* foreign;
char* endp;
@@ -2709,19 +3080,19 @@ dict_table_get_highest_foreign_id(
return(biggest_id);
}
-/*************************************************************************
+/*********************************************************************//**
Reports a simple foreign key create clause syntax error. */
static
void
dict_foreign_report_syntax_err(
/*===========================*/
- const char* name, /* in: table name */
+ const char* name, /*!< in: table name */
const char* start_of_latest_foreign,
- /* in: start of the foreign key clause
+ /*!< in: start of the foreign key clause
in the SQL string */
- const char* ptr) /* in: place of the syntax error */
+ const char* ptr) /*!< in: place of the syntax error */
{
- FILE* ef = dict_foreign_err_file;
+ FILE* ef = dict_foreign_err_file;
mutex_enter(&dict_foreign_err_mutex);
dict_foreign_error_report_low(ef, name);
@@ -2730,31 +3101,31 @@ dict_foreign_report_syntax_err(
mutex_exit(&dict_foreign_err_mutex);
}
-/*************************************************************************
+/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary the foreign
key constraints declared in the string. This function should be called after
the indexes for a table have been created. Each foreign key constraint must
be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint. */
+allowed to contain more fields than mentioned in the constraint.
+@return error code or DB_SUCCESS */
static
ulint
dict_create_foreign_constraints_low(
/*================================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap, /* in: memory heap */
- struct charset_info_st* cs,/* in: the character set of sql_string */
+ trx_t* trx, /*!< in: transaction */
+ mem_heap_t* heap, /*!< in: memory heap */
+ struct charset_info_st* cs,/*!< in: the character set of sql_string */
const char* sql_string,
- /* in: CREATE TABLE or ALTER TABLE statement
+ /*!< in: CREATE TABLE or ALTER TABLE statement
where foreign keys are declared like:
FOREIGN KEY (a, b) REFERENCES table2(c, d),
table2 can be written also with the database
name before it: test.table2; the default
database is the database of parameter name */
- const char* name, /* in: table full name in the normalized form
+ const char* name, /*!< in: table full name in the normalized form
database_name/table_name */
ibool reject_fks)
- /* in: if TRUE, fail with error code
+ /*!< in: if TRUE, fail with error code
DB_CANNOT_ADD_CONSTRAINT if any foreign
keys are found. */
{
@@ -2828,8 +3199,8 @@ dict_create_foreign_constraints_low(
}
/* Starting from 4.0.18 and 4.1.2, we generate foreign key id's in the
- format databasename/tablename_ibfk_<number>, where <number> is local
- to the table; look for the highest <number> for table_to_alter, so
+ format databasename/tablename_ibfk_[number], where [number] is local
+ to the table; look for the highest [number] for table_to_alter, so
that we can assign to new constraints higher numbers. */
/* If we are altering a temporary table, the table name after ALTER
@@ -2989,8 +3360,7 @@ col_loop1:
ut_print_name(ef, NULL, TRUE, name);
fprintf(ef, " where the columns appear\n"
"as the first columns. Constraint:\n%s\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-foreign-key-constraints.html\n"
+ "See " REFMAN "innodb-foreign-key-constraints.html\n"
"for correct foreign key definition.\n",
start_of_latest_foreign);
mutex_exit(&dict_foreign_err_mutex);
@@ -3270,7 +3640,7 @@ try_find_index:
" and such columns in old tables\n"
"cannot be referenced by such columns"
" in new tables.\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/"
+ "See " REFMAN
"innodb-foreign-key-constraints.html\n"
"for correct foreign key definition.\n",
start_of_latest_foreign);
@@ -3309,38 +3679,19 @@ try_find_index:
goto loop;
}
-/**************************************************************************
-Determines whether a string starts with the specified keyword. */
-
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
- /* out: TRUE if str starts
- with keyword */
- void* mysql_thd, /* in: MySQL thread handle */
- const char* str, /* in: string to scan for keyword */
- const char* keyword) /* in: keyword to look for */
-{
- struct charset_info_st* cs = innobase_get_charset(mysql_thd);
- ibool success;
-
- dict_accept(cs, str, keyword, &success);
- return(success);
-}
-
-/*************************************************************************
+/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary the foreign
key constraints declared in the string. This function should be called after
the indexes for a table have been created. Each foreign key constraint must
be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint. */
-
+allowed to contain more fields than mentioned in the constraint.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
dict_create_foreign_constraints(
/*============================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- const char* sql_string, /* in: table create statement where
+ trx_t* trx, /*!< in: transaction */
+ const char* sql_string, /*!< in: table create statement where
foreign keys are declared like:
FOREIGN KEY (a, b) REFERENCES
table2(c, d), table2 can be written
@@ -3348,10 +3699,10 @@ dict_create_foreign_constraints(
name before it: test.table2; the
default database id the database of
parameter name */
- const char* name, /* in: table full name in the
+ const char* name, /*!< in: table full name in the
normalized form
database_name/table_name */
- ibool reject_fks) /* in: if TRUE, fail with error
+ ibool reject_fks) /*!< in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
{
@@ -3375,23 +3726,21 @@ dict_create_foreign_constraints(
return(err);
}
-/**************************************************************************
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
-
+/**********************************************************************//**
+Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
+@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
+constraint id does not match */
+UNIV_INTERN
ulint
dict_foreign_parse_drop_constraints(
/*================================*/
- /* out: DB_SUCCESS or
- DB_CANNOT_DROP_CONSTRAINT if
- syntax error or the constraint
- id does not match */
- mem_heap_t* heap, /* in: heap from which we can
+ mem_heap_t* heap, /*!< in: heap from which we can
allocate memory */
- trx_t* trx, /* in: transaction */
- dict_table_t* table, /* in: table */
- ulint* n, /* out: number of constraints
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table, /*!< in: table */
+ ulint* n, /*!< out: number of constraints
to drop */
- const char*** constraints_to_drop) /* out: id's of the
+ const char*** constraints_to_drop) /*!< out: id's of the
constraints to drop */
{
dict_foreign_t* foreign;
@@ -3509,19 +3858,33 @@ syntax_error:
return(DB_CANNOT_DROP_CONSTRAINT);
}
-#endif /* UNIV_HOTBACKUP */
/*==================== END OF FOREIGN KEY PROCESSING ====================*/
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Returns an index object if it is found in the dictionary cache. */
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+Assumes that dict_sys->mutex is already being held.
+@return index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_if_in_cache_low(
+/*===========================*/
+ dulint index_id) /*!< in: index id */
+{
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ return(dict_index_find_on_id_low(index_id));
+}
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+@return index, NULL if not found */
+UNIV_INTERN
dict_index_t*
dict_index_get_if_in_cache(
/*=======================*/
- /* out: index, NULL if not found */
- dulint index_id) /* in: index id */
+ dulint index_id) /*!< in: index id */
{
dict_index_t* index;
@@ -3531,25 +3894,25 @@ dict_index_get_if_in_cache(
mutex_enter(&(dict_sys->mutex));
- index = dict_index_find_on_id_low(index_id);
+ index = dict_index_get_if_in_cache_low(index_id);
mutex_exit(&(dict_sys->mutex));
return(index);
}
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG
-/**************************************************************************
+/**********************************************************************//**
Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer. */
-
+no comparison can occur with the page number field in a node pointer.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dict_index_check_search_tuple(
/*==========================*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index tree */
- dtuple_t* tuple) /* in: tuple used in a search */
+ const dict_index_t* index, /*!< in: index tree */
+ const dtuple_t* tuple) /*!< in: tuple used in a search */
{
ut_a(index);
ut_a(dtuple_get_n_fields_cmp(tuple)
@@ -3558,20 +3921,22 @@ dict_index_check_search_tuple(
}
#endif /* UNIV_DEBUG */
-/**************************************************************************
-Builds a node pointer out of a physical record and a page number. */
-
+/**********************************************************************//**
+Builds a node pointer out of a physical record and a page number.
+@return own: node pointer */
+UNIV_INTERN
dtuple_t*
dict_index_build_node_ptr(
/*======================*/
- /* out, own: node pointer */
- dict_index_t* index, /* in: index tree */
- rec_t* rec, /* in: record for which to build node
- pointer */
- ulint page_no,/* in: page number to put in node pointer */
- mem_heap_t* heap, /* in: memory heap where pointer created */
- ulint level) /* in: level of rec in tree: 0 means leaf
- level */
+ const dict_index_t* index, /*!< in: index */
+ const rec_t* rec, /*!< in: record for which to build node
+ pointer */
+ ulint page_no,/*!< in: page number to put in node
+ pointer */
+ mem_heap_t* heap, /*!< in: memory heap where pointer
+ created */
+ ulint level) /*!< in: level of rec in tree:
+ 0 means leaf level */
{
dtuple_t* tuple;
dfield_t* field;
@@ -3625,20 +3990,21 @@ dict_index_build_node_ptr(
return(tuple);
}
-/**************************************************************************
+/**********************************************************************//**
Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely. */
-
+index entry uniquely.
+@return pointer to the prefix record */
+UNIV_INTERN
rec_t*
dict_index_copy_rec_order_prefix(
/*=============================*/
- /* out: pointer to the prefix record */
- dict_index_t* index, /* in: index tree */
- rec_t* rec, /* in: record for which to copy prefix */
- ulint* n_fields,/* out: number of fields copied */
- byte** buf, /* in/out: memory buffer for the copied prefix,
- or NULL */
- ulint* buf_size)/* in/out: buffer size */
+ const dict_index_t* index, /*!< in: index */
+ const rec_t* rec, /*!< in: record for which to
+ copy prefix */
+ ulint* n_fields,/*!< out: number of fields copied */
+ byte** buf, /*!< in/out: memory buffer for the
+ copied prefix, or NULL */
+ ulint* buf_size)/*!< in/out: buffer size */
{
ulint n;
@@ -3655,17 +4021,17 @@ dict_index_copy_rec_order_prefix(
return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size));
}
-/**************************************************************************
-Builds a typed data tuple out of a physical record. */
-
+/**********************************************************************//**
+Builds a typed data tuple out of a physical record.
+@return own: data tuple */
+UNIV_INTERN
dtuple_t*
dict_index_build_data_tuple(
/*========================*/
- /* out, own: data tuple */
- dict_index_t* index, /* in: index tree */
- rec_t* rec, /* in: record for which to build data tuple */
- ulint n_fields,/* in: number of data fields */
- mem_heap_t* heap) /* in: memory heap where tuple created */
+ dict_index_t* index, /*!< in: index tree */
+ rec_t* rec, /*!< in: record for which to build data tuple */
+ ulint n_fields,/*!< in: number of data fields */
+ mem_heap_t* heap) /*!< in: memory heap where tuple created */
{
dtuple_t* tuple;
@@ -3683,24 +4049,25 @@ dict_index_build_data_tuple(
return(tuple);
}
-/*************************************************************************
+/*********************************************************************//**
Calculates the minimum record length in an index. */
-
+UNIV_INTERN
ulint
dict_index_calc_min_rec_len(
/*========================*/
- dict_index_t* index) /* in: index */
+ const dict_index_t* index) /*!< in: index */
{
ulint sum = 0;
ulint i;
+ ulint comp = dict_table_is_comp(index->table);
- if (dict_table_is_comp(index->table)) {
+ if (comp) {
ulint nullable = 0;
sum = REC_N_NEW_EXTRA_BYTES;
for (i = 0; i < dict_index_get_n_fields(index); i++) {
const dict_col_t* col
= dict_index_get_nth_col(index, i);
- ulint size = dict_col_get_fixed_size(col);
+ ulint size = dict_col_get_fixed_size(col, comp);
sum += size;
if (!size) {
size = col->len;
@@ -3719,7 +4086,7 @@ dict_index_calc_min_rec_len(
for (i = 0; i < dict_index_get_n_fields(index); i++) {
sum += dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i));
+ dict_index_get_nth_col(index, i), comp);
}
if (sum > 127) {
@@ -3733,16 +4100,16 @@ dict_index_calc_min_rec_len(
return(sum);
}
-/*************************************************************************
+/*********************************************************************//**
Calculates new estimates for table and index statistics. The statistics
are used in query optimization. */
-
+UNIV_INTERN
void
dict_update_statistics_low(
/*=======================*/
- dict_table_t* table, /* in: table */
+ dict_table_t* table, /*!< in/out: table */
ibool has_dict_mutex __attribute__((unused)))
- /* in: TRUE if the caller has the
+ /*!< in: TRUE if the caller has the
dictionary mutex */
{
dict_index_t* index;
@@ -3755,8 +4122,7 @@ dict_update_statistics_low(
" InnoDB: cannot calculate statistics for table %s\n"
"InnoDB: because the .ibd file is missing. For help,"
" please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
table->name);
return;
@@ -3817,37 +4183,25 @@ dict_update_statistics_low(
table->stat_modified_counter = 0;
}
-/*************************************************************************
+/*********************************************************************//**
Calculates new estimates for table and index statistics. The statistics
are used in query optimization. */
-
+UNIV_INTERN
void
dict_update_statistics(
/*===================*/
- dict_table_t* table) /* in: table */
+ dict_table_t* table) /*!< in/out: table */
{
dict_update_statistics_low(table, FALSE);
}
-/**************************************************************************
-A noninlined version of dict_table_get_low. */
-
-dict_table_t*
-dict_table_get_low_noninlined(
-/*==========================*/
- /* out: table, NULL if not found */
- const char* table_name) /* in: table name */
-{
- return(dict_table_get_low(table_name));
-}
-
-/**************************************************************************
+/**********************************************************************//**
Prints info of a foreign key constraint. */
static
void
dict_foreign_print_low(
/*===================*/
- dict_foreign_t* foreign) /* in: foreign key constraint */
+ dict_foreign_t* foreign) /*!< in: foreign key constraint */
{
ulint i;
@@ -3871,26 +4225,26 @@ dict_foreign_print_low(
fputs(" )\n", stderr);
}
-/**************************************************************************
+/**********************************************************************//**
Prints a table data. */
-
+UNIV_INTERN
void
dict_table_print(
/*=============*/
- dict_table_t* table) /* in: table */
+ dict_table_t* table) /*!< in: table */
{
mutex_enter(&(dict_sys->mutex));
dict_table_print_low(table);
mutex_exit(&(dict_sys->mutex));
}
-/**************************************************************************
+/**********************************************************************//**
Prints a table data when we know the table name. */
-
+UNIV_INTERN
void
dict_table_print_by_name(
/*=====================*/
- const char* name)
+ const char* name) /*!< in: table name */
{
dict_table_t* table;
@@ -3904,13 +4258,13 @@ dict_table_print_by_name(
mutex_exit(&(dict_sys->mutex));
}
-/**************************************************************************
+/**********************************************************************//**
Prints a table data. */
-
+UNIV_INTERN
void
dict_table_print_low(
/*=================*/
- dict_table_t* table) /* in: table */
+ dict_table_t* table) /*!< in: table */
{
dict_index_t* index;
dict_foreign_t* foreign;
@@ -3922,12 +4276,13 @@ dict_table_print_low(
fprintf(stderr,
"--------------------------------------\n"
- "TABLE: name %s, id %lu %lu, columns %lu, indexes %lu,"
- " appr.rows %lu\n"
+ "TABLE: name %s, id %lu %lu, flags %lx, columns %lu,"
+ " indexes %lu, appr.rows %lu\n"
" COLUMNS: ",
table->name,
(ulong) ut_dulint_get_high(table->id),
(ulong) ut_dulint_get_low(table->id),
+ (ulong) table->flags,
(ulong) table->n_cols,
(ulong) UT_LIST_GET_LEN(table->indexes),
(ulong) table->stat_n_rows);
@@ -3961,14 +4316,14 @@ dict_table_print_low(
}
}
-/**************************************************************************
+/**********************************************************************//**
Prints a column data. */
static
void
dict_col_print_low(
/*===============*/
- const dict_table_t* table, /* in: table */
- const dict_col_t* col) /* in: column */
+ const dict_table_t* table, /*!< in: table */
+ const dict_col_t* col) /*!< in: column */
{
dtype_t type;
@@ -3981,16 +4336,17 @@ dict_col_print_low(
dtype_print(&type);
}
-/**************************************************************************
+/**********************************************************************//**
Prints an index data. */
static
void
dict_index_print_low(
/*=================*/
- dict_index_t* index) /* in: index */
+ dict_index_t* index) /*!< in: index */
{
- ib_longlong n_vals;
+ ib_int64_t n_vals;
ulint i;
+ const char* type_string;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -4001,6 +4357,14 @@ dict_index_print_low(
n_vals = index->stat_n_diff_key_vals[1];
}
+ if (dict_index_is_clust(index)) {
+ type_string = "clustered index";
+ } else if (dict_index_is_unique(index)) {
+ type_string = "unique index";
+ } else {
+ type_string = "secondary index";
+ }
+
fprintf(stderr,
" INDEX: name %s, id %lu %lu, fields %lu/%lu,"
" uniq %lu, type %lu\n"
@@ -4032,13 +4396,13 @@ dict_index_print_low(
#endif /* UNIV_BTR_PRINT */
}
-/**************************************************************************
+/**********************************************************************//**
Prints a field data. */
static
void
dict_field_print_low(
/*=================*/
- dict_field_t* field) /* in: field */
+ dict_field_t* field) /*!< in: field */
{
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -4049,17 +4413,17 @@ dict_field_print_low(
}
}
-/**************************************************************************
+/**********************************************************************//**
Outputs info on a foreign key of a table in a format suitable for
CREATE TABLE. */
-
+UNIV_INTERN
void
dict_print_info_on_foreign_key_in_create_format(
/*============================================*/
- FILE* file, /* in: file where to print */
- trx_t* trx, /* in: transaction */
- dict_foreign_t* foreign, /* in: foreign key constraint */
- ibool add_newline) /* in: whether to add a newline */
+ FILE* file, /*!< in: file where to print */
+ trx_t* trx, /*!< in: transaction */
+ dict_foreign_t* foreign, /*!< in: foreign key constraint */
+ ibool add_newline) /*!< in: whether to add a newline */
{
const char* stripped_id;
ulint i;
@@ -4147,19 +4511,19 @@ dict_print_info_on_foreign_key_in_create_format(
}
}
-/**************************************************************************
+/**********************************************************************//**
Outputs info on foreign keys of a table. */
-
+UNIV_INTERN
void
dict_print_info_on_foreign_keys(
/*============================*/
- ibool create_table_format, /* in: if TRUE then print in
+ ibool create_table_format, /*!< in: if TRUE then print in
a format suitable to be inserted into
a CREATE TABLE, otherwise in the format
of SHOW TABLE STATUS */
- FILE* file, /* in: file where to print */
- trx_t* trx, /* in: transaction */
- dict_table_t* table) /* in: table */
+ FILE* file, /*!< in: file where to print */
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table) /*!< in: table */
{
dict_foreign_t* foreign;
@@ -4237,17 +4601,251 @@ dict_print_info_on_foreign_keys(
mutex_exit(&(dict_sys->mutex));
}
-/************************************************************************
+/********************************************************************//**
Displays the names of the index and the table. */
+UNIV_INTERN
void
dict_index_name_print(
/*==================*/
- FILE* file, /* in: output stream */
- trx_t* trx, /* in: transaction */
- const dict_index_t* index) /* in: index to print */
+ FILE* file, /*!< in: output stream */
+ trx_t* trx, /*!< in: transaction */
+ const dict_index_t* index) /*!< in: index to print */
{
fputs("index ", file);
ut_print_name(file, trx, FALSE, index->name);
fputs(" of table ", file);
ut_print_name(file, trx, TRUE, index->table_name);
}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Inits dict_ind_redundant and dict_ind_compact. */
+UNIV_INTERN
+void
+dict_ind_init(void)
+/*===============*/
+{
+ dict_table_t* table;
+
+ /* create dummy table and index for REDUNDANT infimum and supremum */
+ table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0);
+ dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
+ DATA_ENGLISH | DATA_NOT_NULL, 8);
+
+ dict_ind_redundant = dict_mem_index_create("SYS_DUMMY1", "SYS_DUMMY1",
+ DICT_HDR_SPACE, 0, 1);
+ dict_index_add_col(dict_ind_redundant, table,
+ dict_table_get_nth_col(table, 0), 0);
+ dict_ind_redundant->table = table;
+ /* create dummy table and index for COMPACT infimum and supremum */
+ table = dict_mem_table_create("SYS_DUMMY2",
+ DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
+ dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
+ DATA_ENGLISH | DATA_NOT_NULL, 8);
+ dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2",
+ DICT_HDR_SPACE, 0, 1);
+ dict_index_add_col(dict_ind_compact, table,
+ dict_table_get_nth_col(table, 0), 0);
+ dict_ind_compact->table = table;
+
+ /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
+ dict_ind_redundant->cached = dict_ind_compact->cached = TRUE;
+}
+
+/**********************************************************************//**
+Frees dict_ind_redundant and dict_ind_compact. */
+static
+void
+dict_ind_free(void)
+/*===============*/
+{
+ dict_table_t* table;
+
+ table = dict_ind_compact->table;
+ dict_mem_index_free(dict_ind_compact);
+ dict_ind_compact = NULL;
+ dict_mem_table_free(table);
+
+ table = dict_ind_redundant->table;
+ dict_mem_index_free(dict_ind_redundant);
+ dict_ind_redundant = NULL;
+ dict_mem_table_free(table);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Get index by name
+@return index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name(
+/*=========================*/
+ dict_table_t* table, /*!< in: table */
+ const char* name) /*!< in: name of the index to find */
+{
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ if (ut_strcmp(index->name, name) == 0) {
+
+ return(index);
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ return(NULL);
+
+}
+
+/**********************************************************************//**
+Replace the index passed in with another equivalent index in the tables
+foreign key list. */
+UNIV_INTERN
+void
+dict_table_replace_index_in_foreign_list(
+/*=====================================*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index) /*!< in: index to be replaced */
+{
+ dict_foreign_t* foreign;
+
+ for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
+ foreign;
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+
+ if (foreign->foreign_index == index) {
+ dict_index_t* new_index
+ = dict_foreign_find_equiv_index(foreign);
+ ut_a(new_index);
+
+ foreign->foreign_index = new_index;
+ }
+ }
+}
+
+/**********************************************************************//**
+In case there is more than one index with the same name return the index
+with the min(id).
+@return index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name_and_min_id(
+/*=====================================*/
+ dict_table_t* table, /*!< in: table */
+ const char* name) /*!< in: name of the index to find */
+{
+ dict_index_t* index;
+ dict_index_t* min_index; /* Index with matching name and min(id) */
+
+ min_index = NULL;
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ if (ut_strcmp(index->name, name) == 0) {
+ if (!min_index
+ || ut_dulint_cmp(index->id, min_index->id) < 0) {
+
+ min_index = index;
+ }
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ return(min_index);
+
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Check for duplicate index entries in a table [using the index name] */
+UNIV_INTERN
+void
+dict_table_check_for_dup_indexes(
+/*=============================*/
+ const dict_table_t* table) /*!< in: Check for dup indexes
+ in this table */
+{
+ /* Check for duplicates, ignoring indexes that are marked
+ as to be dropped */
+
+ const dict_index_t* index1;
+ const dict_index_t* index2;
+
+ /* The primary index _must_ exist */
+ ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
+
+ index1 = UT_LIST_GET_FIRST(table->indexes);
+ index2 = UT_LIST_GET_NEXT(indexes, index1);
+
+ while (index1 && index2) {
+
+ while (index2) {
+
+ if (!index2->to_be_dropped) {
+ ut_ad(ut_strcmp(index1->name, index2->name));
+ }
+
+ index2 = UT_LIST_GET_NEXT(indexes, index2);
+ }
+
+ index1 = UT_LIST_GET_NEXT(indexes, index1);
+ index2 = UT_LIST_GET_NEXT(indexes, index1);
+ }
+}
+#endif /* UNIV_DEBUG */
+
+/**************************************************************************
+Closes the data dictionary module. */
+UNIV_INTERN
+void
+dict_close(void)
+/*============*/
+{
+ ulint i;
+
+ /* Free the hash elements. We don't remove them from the table
+ because we are going to destroy the table anyway. */
+ for (i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) {
+ dict_table_t* table;
+
+ table = HASH_GET_FIRST(dict_sys->table_hash, i);
+
+ while (table) {
+ dict_table_t* prev_table = table;
+
+ table = HASH_GET_NEXT(name_hash, prev_table);
+#ifdef UNIV_DEBUG
+ ut_a(prev_table->magic_n == DICT_TABLE_MAGIC_N);
+#endif
+ /* Acquire only because it's a pre-condition. */
+ mutex_enter(&dict_sys->mutex);
+
+ dict_table_remove_from_cache(prev_table);
+
+ mutex_exit(&dict_sys->mutex);
+ }
+ }
+
+ hash_table_free(dict_sys->table_hash);
+
+ /* The elements are the same instance as in dict_sys->table_hash,
+ therefore we don't delete the individual elements. */
+ hash_table_free(dict_sys->table_id_hash);
+
+ dict_ind_free();
+
+ mutex_free(&dict_sys->mutex);
+
+ rw_lock_free(&dict_operation_lock);
+ memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock));
+
+ mutex_free(&dict_foreign_err_mutex);
+
+ mem_free(dict_sys);
+ dict_sys = NULL;
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
index 65f1c9536bd..842a129c1a6 100644
--- a/storage/innobase/dict/dict0load.c
+++ b/storage/innobase/dict/dict0load.c
@@ -1,16 +1,31 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0load.c
Loads to the memory cache database object definitions
from dictionary tables
-(c) 1996 Innobase Oy
-
Created 4/24/1996 Heikki Tuuri
*******************************************************/
#include "dict0load.h"
-#ifndef UNIV_HOTBACKUP
#include "mysql_version.h"
-#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_NONINL
#include "dict0load.ic"
@@ -26,17 +41,17 @@ Created 4/24/1996 Heikki Tuuri
#include "srv0start.h"
#include "srv0srv.h"
-/********************************************************************
-Returns TRUE if index's i'th column's name is 'name' .*/
+/****************************************************************//**
+Compare the name of an index column.
+@return TRUE if the i'th column of index is 'name'. */
static
ibool
name_of_col_is(
/*===========*/
- /* out: */
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index */
- ulint i, /* in: */
- const char* name) /* in: name to compare to */
+ const dict_table_t* table, /*!< in: table */
+ const dict_index_t* index, /*!< in: index */
+ ulint i, /*!< in: index field offset */
+ const char* name) /*!< in: name to compare to */
{
ulint tmp = dict_col_get_no(dict_field_get_col(
dict_index_get_nth_field(
@@ -45,16 +60,15 @@ name_of_col_is(
return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0);
}
-/************************************************************************
-Finds the first table name in the given database. */
-
+/********************************************************************//**
+Finds the first table name in the given database.
+@return own: table name, NULL if does not exist; the caller must free
+the memory in the string! */
+UNIV_INTERN
char*
dict_get_first_table_name_in_db(
/*============================*/
- /* out, own: table name, NULL if
- does not exist; the caller must
- free the memory in the string! */
- const char* name) /* in: database name which ends in '/' */
+ const char* name) /*!< in: database name which ends in '/' */
{
dict_table_t* sys_tables;
btr_pcur_t pcur;
@@ -62,8 +76,8 @@ dict_get_first_table_name_in_db(
dtuple_t* tuple;
mem_heap_t* heap;
dfield_t* dfield;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
mtr_t mtr;
@@ -88,7 +102,7 @@ dict_get_first_table_name_in_db(
loop:
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
/* Not found */
btr_pcur_close(&pcur);
@@ -129,10 +143,10 @@ loop:
goto loop;
}
-/************************************************************************
+/********************************************************************//**
Prints to the standard output information on all tables found in the data
dictionary system table. */
-
+UNIV_INTERN
void
dict_print(void)
/*============*/
@@ -141,8 +155,8 @@ dict_print(void)
dict_index_t* sys_index;
dict_table_t* table;
btr_pcur_t pcur;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
mtr_t mtr;
@@ -167,7 +181,7 @@ loop:
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
/* end of index */
btr_pcur_close(&pcur);
@@ -222,7 +236,69 @@ loop:
goto loop;
}
-/************************************************************************
+/********************************************************************//**
+Determine the flags of a table described in SYS_TABLES.
+@return compressed page size in kilobytes; or 0 if the tablespace is
+uncompressed, ULINT_UNDEFINED on error */
+static
+ulint
+dict_sys_tables_get_flags(
+/*======================*/
+ const rec_t* rec) /*!< in: a record of SYS_TABLES */
+{
+ const byte* field;
+ ulint len;
+ ulint n_cols;
+ ulint flags;
+
+ field = rec_get_nth_field_old(rec, 5, &len);
+ ut_a(len == 4);
+
+ flags = mach_read_from_4(field);
+
+ if (UNIV_LIKELY(flags == DICT_TABLE_ORDINARY)) {
+ return(0);
+ }
+
+ field = rec_get_nth_field_old(rec, 4, &len);
+ n_cols = mach_read_from_4(field);
+
+ if (UNIV_UNLIKELY(!(n_cols & 0x80000000UL))) {
+ /* New file formats require ROW_FORMAT=COMPACT. */
+ return(ULINT_UNDEFINED);
+ }
+
+ switch (flags & (DICT_TF_FORMAT_MASK | DICT_TF_COMPACT)) {
+ default:
+ case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT:
+ case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT | DICT_TF_COMPACT:
+ /* flags should be DICT_TABLE_ORDINARY,
+ or DICT_TF_FORMAT_MASK should be nonzero. */
+ return(ULINT_UNDEFINED);
+
+ case DICT_TF_FORMAT_ZIP << DICT_TF_FORMAT_SHIFT | DICT_TF_COMPACT:
+#if DICT_TF_FORMAT_MAX > DICT_TF_FORMAT_ZIP
+# error "missing case labels for DICT_TF_FORMAT_ZIP .. DICT_TF_FORMAT_MAX"
+#endif
+ /* We support this format. */
+ break;
+ }
+
+ if (UNIV_UNLIKELY((flags & DICT_TF_ZSSIZE_MASK)
+ > (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT))) {
+ /* Unsupported compressed page size. */
+ return(ULINT_UNDEFINED);
+ }
+
+ if (UNIV_UNLIKELY(flags & (~0 << DICT_TF_BITS))) {
+ /* Some unused bits are set. */
+ return(ULINT_UNDEFINED);
+ }
+
+ return(flags);
+}
+
+/********************************************************************//**
In a crash recovery we already have all the tablespace objects created.
This function compares the space id information in the InnoDB data dictionary
to what we already read with fil_load_single_table_tablespaces().
@@ -230,19 +306,16 @@ to what we already read with fil_load_single_table_tablespaces().
In a normal startup, we create the tablespace objects for every table in
InnoDB's data dictionary, if the corresponding .ibd file exists.
We also scan the biggest space id, and store it to fil_system. */
-
+UNIV_INTERN
void
dict_check_tablespaces_and_store_max_id(
/*====================================*/
- ibool in_crash_recovery) /* in: are we doing a crash recovery */
+ ibool in_crash_recovery) /*!< in: are we doing a crash recovery */
{
dict_table_t* sys_tables;
dict_index_t* sys_index;
btr_pcur_t pcur;
- rec_t* rec;
- byte* field;
- ulint len;
- ulint space_id;
+ const rec_t* rec;
ulint max_space_id = 0;
mtr_t mtr;
@@ -261,7 +334,7 @@ loop:
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
/* end of index */
btr_pcur_close(&pcur);
@@ -279,13 +352,34 @@ loop:
return;
}
- field = rec_get_nth_field_old(rec, 0, &len);
-
if (!rec_get_deleted_flag(rec, 0)) {
/* We found one */
+ const byte* field;
+ ulint len;
+ ulint space_id;
+ ulint flags;
+ char* name;
+
+ field = rec_get_nth_field_old(rec, 0, &len);
+ name = mem_strdupl((char*) field, len);
- char* name = mem_strdupl((char*) field, len);
+ flags = dict_sys_tables_get_flags(rec);
+ if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
+
+ field = rec_get_nth_field_old(rec, 5, &len);
+ flags = mach_read_from_4(field);
+
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: table ", stderr);
+ ut_print_filename(stderr, name);
+ fprintf(stderr, "\n"
+ "InnoDB: in InnoDB data dictionary"
+ " has unknown type %lx.\n",
+ (ulong) flags);
+
+ goto loop;
+ }
field = rec_get_nth_field_old(rec, 9, &len);
ut_a(len == 4);
@@ -309,7 +403,7 @@ loop:
object and check that the .ibd file exists. */
fil_open_single_table_tablespace(FALSE, space_id,
- name);
+ flags, name);
}
mem_free(name);
@@ -326,22 +420,22 @@ loop:
goto loop;
}
-/************************************************************************
+/********************************************************************//**
Loads definitions for table columns. */
static
void
dict_load_columns(
/*==============*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
+ dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap) /*!< in: memory heap for temporary storage */
{
dict_table_t* sys_columns;
dict_index_t* sys_index;
btr_pcur_t pcur;
dtuple_t* tuple;
dfield_t* dfield;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
byte* buf;
char* name;
@@ -374,7 +468,7 @@ dict_load_columns(
rec = btr_pcur_get_rec(&pcur);
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ ut_a(btr_pcur_is_on_user_rec(&pcur));
ut_a(!rec_get_deleted_flag(rec, 0));
@@ -432,34 +526,14 @@ dict_load_columns(
mtr_commit(&mtr);
}
-/************************************************************************
-Report that an index field or index for a table has been delete marked. */
-static
-void
-dict_load_report_deleted_index(
-/*===========================*/
- const char* name, /* in: table name */
- ulint field) /* in: index field, or ULINT_UNDEFINED */
-{
- fprintf(stderr, "InnoDB: Error: data dictionary entry"
- " for table %s is corrupt!\n", name);
- if (field != ULINT_UNDEFINED) {
- fprintf(stderr,
- "InnoDB: Index field %lu is delete marked.\n", field);
- } else {
- fputs("InnoDB: An index is delete marked.\n", stderr);
- }
-}
-
-/************************************************************************
+/********************************************************************//**
Loads definitions for index fields. */
static
void
dict_load_fields(
/*=============*/
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index whose fields to load */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
+ dict_index_t* index, /*!< in: index whose fields to load */
+ mem_heap_t* heap) /*!< in: memory heap for temporary storage */
{
dict_table_t* sys_fields;
dict_index_t* sys_index;
@@ -468,8 +542,8 @@ dict_load_fields(
dfield_t* dfield;
ulint pos_and_prefix_len;
ulint prefix_len;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
byte* buf;
ulint i;
@@ -498,14 +572,19 @@ dict_load_fields(
rec = btr_pcur_get_rec(&pcur);
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ ut_a(btr_pcur_is_on_user_rec(&pcur));
+
+ /* There could be delete marked records in SYS_FIELDS
+ because SYS_FIELDS.INDEX_ID can be updated
+ by ALTER TABLE ADD INDEX. */
+
if (rec_get_deleted_flag(rec, 0)) {
- dict_load_report_deleted_index(table->name, i);
+
+ goto next_rec;
}
field = rec_get_nth_field_old(rec, 0, &len);
ut_ad(len == 8);
- ut_a(ut_memcmp(buf, field, len) == 0);
field = rec_get_nth_field_old(rec, 1, &len);
ut_a(len == 4);
@@ -540,6 +619,7 @@ dict_load_fields(
(char*) field, len),
prefix_len);
+next_rec:
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
}
@@ -547,19 +627,17 @@ dict_load_fields(
mtr_commit(&mtr);
}
-/************************************************************************
+/********************************************************************//**
Loads definitions for table indexes. Adds them to the data dictionary
-cache. */
+cache.
+@return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary
+table or DB_UNSUPPORTED if table has unknown index type */
static
ulint
dict_load_indexes(
/*==============*/
- /* out: DB_SUCCESS if ok, DB_CORRUPTION
- if corruption of dictionary table or
- DB_UNSUPPORTED if table has unknown index
- type */
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
+ dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap) /*!< in: memory heap for temporary storage */
{
dict_table_t* sys_indexes;
dict_index_t* sys_index;
@@ -567,8 +645,8 @@ dict_load_indexes(
btr_pcur_t pcur;
dtuple_t* tuple;
dfield_t* dfield;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
ulint name_len;
char* name_buf;
@@ -609,7 +687,7 @@ dict_load_indexes(
btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
BTR_SEARCH_LEAF, &pcur, &mtr);
for (;;) {
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
break;
}
@@ -621,14 +699,9 @@ dict_load_indexes(
if (ut_memcmp(buf, field, len) != 0) {
break;
- }
-
- if (rec_get_deleted_flag(rec, 0)) {
- dict_load_report_deleted_index(table->name,
- ULINT_UNDEFINED);
-
- error = DB_CORRUPTION;
- goto func_exit;
+ } else if (rec_get_deleted_flag(rec, 0)) {
+ /* Skip delete marked records */
+ goto next_rec;
}
field = rec_get_nth_field_old(rec, 1, &len);
@@ -678,12 +751,13 @@ dict_load_indexes(
} else if ((type & DICT_CLUSTERED) == 0
&& NULL == dict_table_get_first_index(table)) {
- fprintf(stderr,
- "InnoDB: Error: trying to load index %s"
- " for table %s\n"
- "InnoDB: but the first index"
- " is not clustered!\n",
- name_buf, table->name);
+ fputs("InnoDB: Error: trying to load index ",
+ stderr);
+ ut_print_name(stderr, NULL, FALSE, name_buf);
+ fputs(" for table ", stderr);
+ ut_print_name(stderr, NULL, TRUE, table->name);
+ fputs("\nInnoDB: but the first index"
+ " is not clustered!\n", stderr);
error = DB_CORRUPTION;
goto func_exit;
@@ -701,10 +775,21 @@ dict_load_indexes(
space, type, n_fields);
index->id = id;
- dict_load_fields(table, index, heap);
- dict_index_add_to_cache(table, index, page_no);
+ dict_load_fields(index, heap);
+ error = dict_index_add_to_cache(table, index, page_no,
+ FALSE);
+ /* The data dictionary tables should never contain
+ invalid index definitions. If we ignored this error
+ and simply did not load this index definition, the
+ .frm file would disagree with the index definitions
+ inside InnoDB. */
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+
+ goto func_exit;
+ }
}
+next_rec:
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
}
@@ -715,22 +800,20 @@ func_exit:
return(error);
}
-/************************************************************************
+/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
a foreign key references columns in this table. Adds all these to the data
-dictionary cache. */
-
+dictionary cache.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+ibd_file_missing flag TRUE in the table object we return */
+UNIV_INTERN
dict_table_t*
dict_load_table(
/*============*/
- /* out: table, NULL if does not exist;
- if the table is stored in an .ibd file,
- but the file does not exist,
- then we set the ibd_file_missing flag TRUE
- in the table object we return */
- const char* name) /* in: table name in the
+ const char* name) /*!< in: table name in the
databasename/tablename format */
{
ibool ibd_file_missing = FALSE;
@@ -741,8 +824,8 @@ dict_load_table(
dtuple_t* tuple;
mem_heap_t* heap;
dfield_t* dfield;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
ulint space;
ulint n_cols;
@@ -770,7 +853,7 @@ dict_load_table(
BTR_SEARCH_LEAF, &pcur, &mtr);
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ if (!btr_pcur_is_on_user_rec(&pcur)
|| rec_get_deleted_flag(rec, 0)) {
/* Not found */
err_exit:
@@ -796,6 +879,22 @@ err_exit:
/* Check if the tablespace exists and has the right name */
if (space != 0) {
+ flags = dict_sys_tables_get_flags(rec);
+
+ if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
+ field = rec_get_nth_field_old(rec, 5, &len);
+ flags = mach_read_from_4(field);
+
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: table ", stderr);
+ ut_print_filename(stderr, name);
+ fprintf(stderr, "\n"
+ "InnoDB: in InnoDB data dictionary"
+ " has unknown type %lx.\n",
+ (ulong) flags);
+ goto err_exit;
+ }
+
if (fil_space_for_table_exists_in_mem(space, name, FALSE,
FALSE, FALSE)) {
/* Ok; (if we did a crash recovery then the tablespace
@@ -812,14 +911,16 @@ err_exit:
" Retrying an open.\n",
name, (ulong)space);
/* Try to open the tablespace */
- if (!fil_open_single_table_tablespace(TRUE,
- space, name)) {
+ if (!fil_open_single_table_tablespace(
+ TRUE, space, flags, name)) {
/* We failed to find a sensible tablespace
file */
ibd_file_missing = TRUE;
}
}
+ } else {
+ flags = 0;
}
ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS"));
@@ -827,8 +928,6 @@ err_exit:
field = rec_get_nth_field_old(rec, 4, &len);
n_cols = mach_read_from_4(field);
- flags = 0;
-
/* The high-order bit of N_COLS is the "compact format" flag. */
if (n_cols & 0x80000000UL) {
flags |= DICT_TF_COMPACT;
@@ -844,15 +943,6 @@ err_exit:
field = rec_get_nth_field_old(rec, 3, &len);
table->id = mach_read_from_8(field);
- field = rec_get_nth_field_old(rec, 5, &len);
- if (UNIV_UNLIKELY(mach_read_from_4(field) != DICT_TABLE_ORDINARY)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: table %s: unknown table type %lu\n",
- name, (ulong) mach_read_from_4(field));
- goto err_exit;
- }
-
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -903,14 +993,14 @@ err_exit:
return(table);
}
-/***************************************************************************
-Loads a table object based on the table id. */
-
+/***********************************************************************//**
+Loads a table object based on the table id.
+@return table; NULL if table does not exist */
+UNIV_INTERN
dict_table_t*
dict_load_table_on_id(
/*==================*/
- /* out: table; NULL if table does not exist */
- dulint table_id) /* in: table id */
+ dulint table_id) /*!< in: table id */
{
byte id_buf[8];
btr_pcur_t pcur;
@@ -919,8 +1009,8 @@ dict_load_table_on_id(
dfield_t* dfield;
dict_index_t* sys_table_ids;
dict_table_t* sys_tables;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
dict_table_t* table;
mtr_t mtr;
@@ -953,7 +1043,7 @@ dict_load_table_on_id(
BTR_SEARCH_LEAF, &pcur, &mtr);
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ if (!btr_pcur_is_on_user_rec(&pcur)
|| rec_get_deleted_flag(rec, 0)) {
/* Not found */
@@ -994,15 +1084,15 @@ dict_load_table_on_id(
return(table);
}
-/************************************************************************
+/********************************************************************//**
This function is called when the database is booted. Loads system table
index definitions except for the clustered index which is added to the
dictionary cache at booting before calling this function. */
-
+UNIV_INTERN
void
dict_load_sys_table(
/*================*/
- dict_table_t* table) /* in: system table */
+ dict_table_t* table) /*!< in: system table */
{
mem_heap_t* heap;
@@ -1015,23 +1105,23 @@ dict_load_sys_table(
mem_heap_free(heap);
}
-/************************************************************************
+/********************************************************************//**
Loads foreign key constraint col names (also for the referenced table). */
static
void
dict_load_foreign_cols(
/*===================*/
- const char* id, /* in: foreign constraint id as a
+ const char* id, /*!< in: foreign constraint id as a
null-terminated string */
- dict_foreign_t* foreign)/* in: foreign constraint object */
+ dict_foreign_t* foreign)/*!< in: foreign constraint object */
{
dict_table_t* sys_foreign_cols;
dict_index_t* sys_index;
btr_pcur_t pcur;
dtuple_t* tuple;
dfield_t* dfield;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
ulint i;
mtr_t mtr;
@@ -1061,7 +1151,7 @@ dict_load_foreign_cols(
rec = btr_pcur_get_rec(&pcur);
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ ut_a(btr_pcur_is_on_user_rec(&pcur));
ut_a(!rec_get_deleted_flag(rec, 0));
field = rec_get_nth_field_old(rec, 0, &len);
@@ -1087,17 +1177,17 @@ dict_load_foreign_cols(
mtr_commit(&mtr);
}
-/***************************************************************************
-Loads a foreign key constraint to the dictionary cache. */
+/***********************************************************************//**
+Loads a foreign key constraint to the dictionary cache.
+@return DB_SUCCESS or error code */
static
ulint
dict_load_foreign(
/*==============*/
- /* out: DB_SUCCESS or error code */
- const char* id, /* in: foreign constraint id as a
+ const char* id, /*!< in: foreign constraint id as a
null-terminated string */
ibool check_charsets)
- /* in: TRUE=check charset compatibility */
+ /*!< in: TRUE=check charset compatibility */
{
dict_foreign_t* foreign;
dict_table_t* sys_foreign;
@@ -1106,8 +1196,8 @@ dict_load_foreign(
dtuple_t* tuple;
mem_heap_t* heap2;
dfield_t* dfield;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
ulint n_fields_and_type;
mtr_t mtr;
@@ -1132,7 +1222,7 @@ dict_load_foreign(
BTR_SEARCH_LEAF, &pcur, &mtr);
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
+ if (!btr_pcur_is_on_user_rec(&pcur)
|| rec_get_deleted_flag(rec, 0)) {
/* Not found */
@@ -1212,19 +1302,19 @@ dict_load_foreign(
return(dict_foreign_add_to_cache(foreign, check_charsets));
}
-/***************************************************************************
+/***********************************************************************//**
Loads foreign key constraints where the table is either the foreign key
holder or where the table is referenced by a foreign key. Adds these
constraints to the data dictionary. Note that we know that the dictionary
cache already contains all constraints where the other relevant table is
-already in the dictionary cache. */
-
+already in the dictionary cache.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
dict_load_foreigns(
/*===============*/
- /* out: DB_SUCCESS or error code */
- const char* table_name, /* in: table name */
- ibool check_charsets) /* in: TRUE=check charset
+ const char* table_name, /*!< in: table name */
+ ibool check_charsets) /*!< in: TRUE=check charset
compatibility */
{
btr_pcur_t pcur;
@@ -1233,8 +1323,8 @@ dict_load_foreigns(
dfield_t* dfield;
dict_index_t* sec_index;
dict_table_t* sys_foreign;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
char* id ;
ulint err;
@@ -1276,7 +1366,7 @@ start_load:
loop:
rec = btr_pcur_get_rec(&pcur);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
/* End of index */
goto load_next_index;
diff --git a/storage/innobase/dict/dict0mem.c b/storage/innobase/dict/dict0mem.c
index 168771ca307..6458cbab92d 100644
--- a/storage/innobase/dict/dict0mem.c
+++ b/storage/innobase/dict/dict0mem.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-Data dictionary memory object creation
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file dict/dict0mem.c
+Data dictionary memory object creation
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
@@ -16,76 +33,50 @@ Created 1/8/1996 Heikki Tuuri
#include "data0type.h"
#include "mach0data.h"
#include "dict0dict.h"
-#include "que0que.h"
-#include "pars0pars.h"
-#include "lock0lock.h"
+#ifndef UNIV_HOTBACKUP
+# include "lock0lock.h"
+#endif /* !UNIV_HOTBACKUP */
-#define DICT_HEAP_SIZE 100 /* initial memory heap size when
+#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
creating a table or index object */
-/**************************************************************************
-Creates a table memory object. */
-
+/**********************************************************************//**
+Creates a table memory object.
+@return own: table object */
+UNIV_INTERN
dict_table_t*
dict_mem_table_create(
/*==================*/
- /* out, own: table object */
- const char* name, /* in: table name */
- ulint space, /* in: space where the clustered index of
+ const char* name, /*!< in: table name */
+ ulint space, /*!< in: space where the clustered index of
the table is placed; this parameter is
ignored if the table is made a member of
a cluster */
- ulint n_cols, /* in: number of columns */
- ulint flags) /* in: table flags */
+ ulint n_cols, /*!< in: number of columns */
+ ulint flags) /*!< in: table flags */
{
dict_table_t* table;
mem_heap_t* heap;
ut_ad(name);
- ut_ad(!(flags & ~DICT_TF_COMPACT));
+ ut_a(!(flags & (~0 << DICT_TF_BITS)));
heap = mem_heap_create(DICT_HEAP_SIZE);
- table = mem_heap_alloc(heap, sizeof(dict_table_t));
+ table = mem_heap_zalloc(heap, sizeof(dict_table_t));
table->heap = heap;
table->flags = (unsigned int) flags;
table->name = mem_heap_strdup(heap, name);
- table->dir_path_of_temp_table = NULL;
table->space = (unsigned int) space;
- table->ibd_file_missing = FALSE;
- table->tablespace_discarded = FALSE;
- table->n_def = 0;
table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
- table->n_mysql_handles_opened = 0;
- table->n_foreign_key_checks_running = 0;
-
- table->cached = FALSE;
-
table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
* sizeof(dict_col_t));
- table->col_names = NULL;
- UT_LIST_INIT(table->indexes);
-
- table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
-
- table->query_cache_inv_trx_id = ut_dulint_zero;
-
- UT_LIST_INIT(table->locks);
- UT_LIST_INIT(table->foreign_list);
- UT_LIST_INIT(table->referenced_list);
-
-#ifdef UNIV_DEBUG
- table->does_not_fit_in_memory = FALSE;
-#endif /* UNIV_DEBUG */
- table->stat_initialized = FALSE;
-
- table->stat_modified_counter = 0;
-
- table->big_rows = 0;
+#ifndef UNIV_HOTBACKUP
+ table->autoinc_lock = mem_heap_alloc(heap, lock_get_size());
mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
@@ -94,40 +85,42 @@ dict_mem_table_create(
/* The number of transactions that are either waiting on the
AUTOINC lock or have been granted the lock. */
table->n_waiting_or_granted_auto_inc_locks = 0;
+#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_DEBUG
- table->magic_n = DICT_TABLE_MAGIC_N;
-#endif /* UNIV_DEBUG */
+ ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
return(table);
}
-/********************************************************************
+/****************************************************************//**
Free a table memory object. */
-
+UNIV_INTERN
void
dict_mem_table_free(
/*================*/
- dict_table_t* table) /* in: table */
+ dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_d(table->cached = FALSE);
+#ifndef UNIV_HOTBACKUP
mutex_free(&(table->autoinc_mutex));
+#endif /* UNIV_HOTBACKUP */
mem_heap_free(table->heap);
}
-/********************************************************************
-Append 'name' to 'col_names' (@see dict_table_t::col_names). */
+/****************************************************************//**
+Append 'name' to 'col_names'. @see dict_table_t::col_names
+@return new column names array */
static
const char*
dict_add_col_name(
/*==============*/
- /* out: new column names array */
- const char* col_names, /* in: existing column names, or
+ const char* col_names, /*!< in: existing column names, or
NULL */
- ulint cols, /* in: number of existing columns */
- const char* name, /* in: new column name */
- mem_heap_t* heap) /* in: heap */
+ ulint cols, /*!< in: number of existing columns */
+ const char* name, /*!< in: new column name */
+ mem_heap_t* heap) /*!< in: heap */
{
ulint old_len;
ulint new_len;
@@ -164,22 +157,24 @@ dict_add_col_name(
return(res);
}
-/**************************************************************************
+/**********************************************************************//**
Adds a column definition to a table. */
-
+UNIV_INTERN
void
dict_mem_table_add_col(
/*===================*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap, /* in: temporary memory heap, or NULL */
- const char* name, /* in: column name, or NULL */
- ulint mtype, /* in: main datatype */
- ulint prtype, /* in: precise type */
- ulint len) /* in: precision */
+ dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
+ const char* name, /*!< in: column name, or NULL */
+ ulint mtype, /*!< in: main datatype */
+ ulint prtype, /*!< in: precise type */
+ ulint len) /*!< in: precision */
{
dict_col_t* col;
+#ifndef UNIV_HOTBACKUP
ulint mbminlen;
ulint mbmaxlen;
+#endif /* !UNIV_HOTBACKUP */
ulint i;
ut_ad(table);
@@ -194,8 +189,7 @@ dict_mem_table_add_col(
}
if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) {
/* All preceding column names are empty. */
- char* s = mem_heap_alloc(heap, table->n_def);
- memset(s, 0, table->n_def);
+ char* s = mem_heap_zalloc(heap, table->n_def);
table->col_names = s;
}
@@ -203,7 +197,7 @@ dict_mem_table_add_col(
i, name, heap);
}
- col = (dict_col_t*) dict_table_get_nth_col(table, i);
+ col = dict_table_get_nth_col(table, i);
col->ind = (unsigned int) i;
col->ord_part = 0;
@@ -212,27 +206,29 @@ dict_mem_table_add_col(
col->prtype = (unsigned int) prtype;
col->len = (unsigned int) len;
+#ifndef UNIV_HOTBACKUP
dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
col->mbminlen = (unsigned int) mbminlen;
col->mbmaxlen = (unsigned int) mbmaxlen;
+#endif /* !UNIV_HOTBACKUP */
}
-/**************************************************************************
-Creates an index memory object. */
-
+/**********************************************************************//**
+Creates an index memory object.
+@return own: index object */
+UNIV_INTERN
dict_index_t*
dict_mem_index_create(
/*==================*/
- /* out, own: index object */
- const char* table_name, /* in: table name */
- const char* index_name, /* in: index name */
- ulint space, /* in: space where the index tree is
+ const char* table_name, /*!< in: table name */
+ const char* index_name, /*!< in: index name */
+ ulint space, /*!< in: space where the index tree is
placed, ignored if the index is of
the clustered type */
- ulint type, /* in: DICT_UNIQUE,
+ ulint type, /*!< in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
- ulint n_fields) /* in: number of fields */
+ ulint n_fields) /*!< in: number of fields */
{
dict_index_t* index;
mem_heap_t* heap;
@@ -240,79 +236,58 @@ dict_mem_index_create(
ut_ad(table_name && index_name);
heap = mem_heap_create(DICT_HEAP_SIZE);
- index = mem_heap_alloc(heap, sizeof(dict_index_t));
+ index = mem_heap_zalloc(heap, sizeof(dict_index_t));
index->heap = heap;
index->type = type;
+#ifndef UNIV_HOTBACKUP
index->space = (unsigned int) space;
- index->page = 0;
+#endif /* !UNIV_HOTBACKUP */
index->name = mem_heap_strdup(heap, index_name);
index->table_name = table_name;
- index->table = NULL;
- index->n_def = index->n_nullable = 0;
index->n_fields = (unsigned int) n_fields;
index->fields = mem_heap_alloc(heap, 1 + n_fields
* sizeof(dict_field_t));
/* The '1 +' above prevents allocation
of an empty mem block */
- index->stat_n_diff_key_vals = NULL;
-
- index->cached = FALSE;
- memset(&index->lock, 0, sizeof index->lock);
#ifdef UNIV_DEBUG
index->magic_n = DICT_INDEX_MAGIC_N;
#endif /* UNIV_DEBUG */
return(index);
}
-/**************************************************************************
-Creates and initializes a foreign constraint memory object. */
-
+/**********************************************************************//**
+Creates and initializes a foreign constraint memory object.
+@return own: foreign constraint struct */
+UNIV_INTERN
dict_foreign_t*
dict_mem_foreign_create(void)
/*=========================*/
- /* out, own: foreign constraint struct */
{
dict_foreign_t* foreign;
mem_heap_t* heap;
heap = mem_heap_create(100);
- foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t));
+ foreign = mem_heap_zalloc(heap, sizeof(dict_foreign_t));
foreign->heap = heap;
- foreign->id = NULL;
-
- foreign->type = 0;
- foreign->foreign_table_name = NULL;
- foreign->foreign_table = NULL;
- foreign->foreign_col_names = NULL;
-
- foreign->referenced_table_name = NULL;
- foreign->referenced_table = NULL;
- foreign->referenced_col_names = NULL;
-
- foreign->n_fields = 0;
-
- foreign->foreign_index = NULL;
- foreign->referenced_index = NULL;
-
return(foreign);
}
-/**************************************************************************
+/**********************************************************************//**
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
-
+UNIV_INTERN
void
dict_mem_index_add_field(
/*=====================*/
- dict_index_t* index, /* in: index */
- const char* name, /* in: column name */
- ulint prefix_len) /* in: 0 or the column prefix length
+ dict_index_t* index, /*!< in: index */
+ const char* name, /*!< in: column name */
+ ulint prefix_len) /*!< in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
{
@@ -329,13 +304,13 @@ dict_mem_index_add_field(
field->prefix_len = (unsigned int) prefix_len;
}
-/**************************************************************************
+/**********************************************************************//**
Frees an index memory object. */
-
+UNIV_INTERN
void
dict_mem_index_free(
/*================*/
- dict_index_t* index) /* in: index */
+ dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
diff --git a/storage/innobase/dyn/dyn0dyn.c b/storage/innobase/dyn/dyn0dyn.c
index bcf2fda2b08..e1275f040f3 100644
--- a/storage/innobase/dyn/dyn0dyn.c
+++ b/storage/innobase/dyn/dyn0dyn.c
@@ -1,7 +1,24 @@
-/******************************************************
-The dynamically allocated array
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dyn/dyn0dyn.c
+The dynamically allocated array
Created 2/5/1996 Heikki Tuuri
*******************************************************/
@@ -11,14 +28,14 @@ Created 2/5/1996 Heikki Tuuri
#include "dyn0dyn.ic"
#endif
-/****************************************************************
-Adds a new block to a dyn array. */
-
+/************************************************************//**
+Adds a new block to a dyn array.
+@return created block */
+UNIV_INTERN
dyn_block_t*
dyn_array_add_block(
/*================*/
- /* out: created block */
- dyn_array_t* arr) /* in: dyn array */
+ dyn_array_t* arr) /*!< in: dyn array */
{
mem_heap_t* heap;
dyn_block_t* block;
diff --git a/storage/innobase/eval/eval0eval.c b/storage/innobase/eval/eval0eval.c
index cbc47ec508f..589b0fa1576 100644
--- a/storage/innobase/eval/eval0eval.c
+++ b/storage/innobase/eval/eval0eval.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file eval/eval0eval.c
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
-(c) 1997 Innobase Oy
-
Created 12/29/1997 Heikki Tuuri
*******************************************************/
@@ -16,29 +33,29 @@ Created 12/29/1997 Heikki Tuuri
#include "data0data.h"
#include "row0sel.h"
-/* The RND function seed */
-ulint eval_rnd = 128367121;
+/** The RND function seed */
+static ulint eval_rnd = 128367121;
-/* Dummy adress used when we should allocate a buffer of size 0 in
-the function below */
+/** Dummy adress used when we should allocate a buffer of size 0 in
+eval_node_alloc_val_buf */
-byte eval_dummy;
+static byte eval_dummy;
-/*********************************************************************
+/*****************************************************************//**
Allocate a buffer from global dynamic memory for a value of a que_node.
NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has an allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field. */
-
+allocated for a query node val field.
+@return pointer to allocated buffer */
+UNIV_INTERN
byte*
eval_node_alloc_val_buf(
/*====================*/
- /* out: pointer to allocated buffer */
- que_node_t* node, /* in: query graph node; sets the val field
+ que_node_t* node, /*!< in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
- ulint size) /* in: buffer size */
+ ulint size) /*!< in: buffer size */
{
dfield_t* dfield;
byte* data;
@@ -67,15 +84,15 @@ eval_node_alloc_val_buf(
return(data);
}
-/*********************************************************************
+/*****************************************************************//**
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
-
+UNIV_INTERN
void
eval_node_free_val_buf(
/*===================*/
- que_node_t* node) /* in: query graph node */
+ que_node_t* node) /*!< in: query graph node */
{
dfield_t* dfield;
byte* data;
@@ -94,14 +111,14 @@ eval_node_free_val_buf(
}
}
-/*********************************************************************
-Evaluates a comparison node. */
-
+/*****************************************************************//**
+Evaluates a comparison node.
+@return the result of the comparison */
+UNIV_INTERN
ibool
eval_cmp(
/*=====*/
- /* out: the result of the comparison */
- func_node_t* cmp_node) /* in: comparison node */
+ func_node_t* cmp_node) /*!< in: comparison node */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -153,13 +170,13 @@ eval_cmp(
return(val);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a logical operation node. */
UNIV_INLINE
void
eval_logical(
/*=========*/
- func_node_t* logical_node) /* in: logical operation node */
+ func_node_t* logical_node) /*!< in: logical operation node */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -194,13 +211,13 @@ eval_logical(
eval_node_set_ibool_val(logical_node, val);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates an arithmetic operation node. */
UNIV_INLINE
void
eval_arith(
/*=======*/
- func_node_t* arith_node) /* in: arithmetic operation node */
+ func_node_t* arith_node) /*!< in: arithmetic operation node */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -238,13 +255,13 @@ eval_arith(
eval_node_set_int_val(arith_node, val);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates an aggregate operation node. */
UNIV_INLINE
void
eval_aggregate(
/*===========*/
- func_node_t* node) /* in: aggregate operation node */
+ func_node_t* node) /*!< in: aggregate operation node */
{
que_node_t* arg;
lint val;
@@ -272,14 +289,14 @@ eval_aggregate(
eval_node_set_int_val(node, val);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a predefined function node where the function is not relevant
in benchmarks. */
static
void
eval_predefined_2(
/*==============*/
- func_node_t* func_node) /* in: predefined function node */
+ func_node_t* func_node) /*!< in: predefined function node */
{
que_node_t* arg;
que_node_t* arg1;
@@ -359,13 +376,13 @@ eval_predefined_2(
}
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a notfound-function node. */
UNIV_INLINE
void
eval_notfound(
/*==========*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -401,13 +418,13 @@ eval_notfound(
eval_node_set_ibool_val(func_node, ibool_val);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a substr-function node. */
UNIV_INLINE
void
eval_substr(
/*========*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -434,13 +451,13 @@ eval_substr(
dfield_set_data(dfield, str1 + len1, len2);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a replstr-procedure node. */
static
void
eval_replstr(
/*=========*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -474,13 +491,13 @@ eval_replstr(
ut_memcpy(str1 + len1, str2, len2);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates an instr-function node. */
static
void
eval_instr(
/*=======*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -546,13 +563,13 @@ match_found:
eval_node_set_int_val(func_node, int_val);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a predefined function node. */
UNIV_INLINE
void
eval_binary_to_number(
/*==================*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
dfield_t* dfield;
@@ -584,13 +601,13 @@ eval_binary_to_number(
eval_node_copy_and_alloc_val(func_node, str2, 4);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a predefined function node. */
static
void
eval_concat(
/*========*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg;
dfield_t* dfield;
@@ -626,7 +643,7 @@ eval_concat(
}
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a predefined function node. If the first argument is an integer,
this function looks at the second argument which is the integer length in
bytes, and converts the integer to a VARCHAR.
@@ -636,7 +653,7 @@ UNIV_INLINE
void
eval_to_binary(
/*===========*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -674,13 +691,13 @@ eval_to_binary(
dfield_set_data(dfield, str1 + (4 - len1), len1);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a predefined function node. */
UNIV_INLINE
void
eval_predefined(
/*============*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
lint int_val;
@@ -746,8 +763,7 @@ eval_predefined(
}
}
- dfield_set_len((dfield_t*) que_node_get_val(func_node),
- int_len);
+ dfield_set_len(que_node_get_val(func_node), int_len);
return;
@@ -767,13 +783,13 @@ eval_predefined(
eval_node_set_int_val(func_node, int_val);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a function node. */
-
+UNIV_INTERN
void
eval_func(
/*======*/
- func_node_t* func_node) /* in: function node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg;
ulint class;
@@ -793,7 +809,7 @@ eval_func(
/* The functions are not defined for SQL null argument
values, except for eval_cmp and notfound */
- if ((dfield_get_len(que_node_get_val(arg)) == UNIV_SQL_NULL)
+ if (dfield_is_null(que_node_get_val(arg))
&& (class != PARS_FUNC_CMP)
&& (func != PARS_NOTFOUND_TOKEN)
&& (func != PARS_PRINTF_TOKEN)) {
diff --git a/storage/innobase/eval/eval0proc.c b/storage/innobase/eval/eval0proc.c
index a513e8e4024..3a4218d92bf 100644
--- a/storage/innobase/eval/eval0proc.c
+++ b/storage/innobase/eval/eval0proc.c
@@ -1,7 +1,24 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
+/*****************************************************************************
+
+Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1998 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file eval/eval0proc.c
+Executes SQL stored procedures and their control structures
Created 1/20/1998 Heikki Tuuri
*******************************************************/
@@ -12,14 +29,14 @@ Created 1/20/1998 Heikki Tuuri
#include "eval0proc.ic"
#endif
-/**************************************************************************
-Performs an execution step of an if-statement node. */
-
+/**********************************************************************//**
+Performs an execution step of an if-statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
if_step(
/*====*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
if_node_t* node;
elsif_node_t* elsif_node;
@@ -88,14 +105,14 @@ if_step(
return(thr);
}
-/**************************************************************************
-Performs an execution step of a while-statement node. */
-
+/**********************************************************************//**
+Performs an execution step of a while-statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
while_step(
/*=======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
while_node_t* node;
@@ -124,14 +141,14 @@ while_step(
return(thr);
}
-/**************************************************************************
-Performs an execution step of an assignment statement node. */
-
+/**********************************************************************//**
+Performs an execution step of an assignment statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
assign_step(
/*========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
assign_node_t* node;
@@ -151,14 +168,14 @@ assign_step(
return(thr);
}
-/**************************************************************************
-Performs an execution step of a for-loop node. */
-
+/**********************************************************************//**
+Performs an execution step of a for-loop node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
for_step(
/*=====*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
for_node_t* node;
que_node_t* parent;
@@ -213,14 +230,14 @@ for_step(
return(thr);
}
-/**************************************************************************
-Performs an execution step of an exit statement node. */
-
+/**********************************************************************//**
+Performs an execution step of an exit statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
exit_step(
/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
exit_node_t* node;
que_node_t* loop_node;
@@ -245,14 +262,14 @@ exit_step(
return(thr);
}
-/**************************************************************************
-Performs an execution step of a return-statement node. */
-
+/**********************************************************************//**
+Performs an execution step of a return-statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
return_step(
/*========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
return_node_t* node;
que_node_t* parent;
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
index 42e5166c9e4..112a0e27d50 100644
--- a/storage/innobase/fil/fil0fil.c
+++ b/storage/innobase/fil/fil0fil.c
@@ -1,7 +1,24 @@
-/******************************************************
-The tablespace memory cache
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fil/fil0fil.c
+The tablespace memory cache
Created 10/25/1995 Heikki Tuuri
*******************************************************/
@@ -9,16 +26,11 @@ Created 10/25/1995 Heikki Tuuri
#include "fil0fil.h"
#include "mem0mem.h"
-#include "sync0sync.h"
#include "hash0hash.h"
#include "os0file.h"
-#include "os0sync.h"
#include "mach0data.h"
-#include "ibuf0ibuf.h"
#include "buf0buf.h"
#include "buf0flu.h"
-#include "buf0lru.h"
-#include "log0log.h"
#include "log0recv.h"
#include "fsp0fsp.h"
#include "srv0srv.h"
@@ -26,7 +38,15 @@ Created 10/25/1995 Heikki Tuuri
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "dict0dict.h"
-
+#include "page0zip.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0lru.h"
+# include "ibuf0ibuf.h"
+# include "sync0sync.h"
+# include "os0sync.h"
+#else /* !UNIV_HOTBACKUP */
+static ulint srv_data_read, srv_data_written;
+#endif /* !UNIV_HOTBACKUP */
/*
IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
@@ -84,131 +104,144 @@ out of the LRU-list and keep a count of pending operations. When an operation
completes, we decrement the count and return the file node to the LRU-list if
the count drops to zero. */
-/* When mysqld is run, the default directory "." is the mysqld datadir,
+/** When mysqld is run, the default directory "." is the mysqld datadir,
but in the MySQL Embedded Server Library and ibbackup it is not the default
directory, and we must set the base file path explicitly */
-const char* fil_path_to_mysql_datadir = ".";
+UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
-/* The number of fsyncs done to the log */
-ulint fil_n_log_flushes = 0;
+/** The number of fsyncs done to the log */
+UNIV_INTERN ulint fil_n_log_flushes = 0;
-ulint fil_n_pending_log_flushes = 0;
-ulint fil_n_pending_tablespace_flushes = 0;
+/** Number of pending redo log flushes */
+UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
+/** Number of pending tablespace flushes */
+UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
-/* Null file address */
-fil_addr_t fil_addr_null = {FIL_NULL, 0};
+/** The null file address */
+UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
-/* File node of a tablespace or the log data space */
+/** File node of a tablespace or the log data space */
struct fil_node_struct {
- fil_space_t* space; /* backpointer to the space where this node
+ fil_space_t* space; /*!< backpointer to the space where this node
belongs */
- char* name; /* path to the file */
- ibool open; /* TRUE if file open */
- os_file_t handle; /* OS handle to the file, if file open */
- ibool is_raw_disk;/* TRUE if the 'file' is actually a raw
+ char* name; /*!< path to the file */
+ ibool open; /*!< TRUE if file open */
+ os_file_t handle; /*!< OS handle to the file, if file open */
+ ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw
device or a raw disk partition */
- ulint size; /* size of the file in database pages, 0 if
+ ulint size; /*!< size of the file in database pages, 0 if
not known yet; the possible last incomplete
megabyte may be ignored if space == 0 */
ulint n_pending;
- /* count of pending i/o's on this file;
+ /*!< count of pending i/o's on this file;
closing of the file is not allowed if
this is > 0 */
ulint n_pending_flushes;
- /* count of pending flushes on this file;
+ /*!< count of pending flushes on this file;
closing of the file is not allowed if
this is > 0 */
- ib_longlong modification_counter;/* when we write to the file we
+ ib_int64_t modification_counter;/*!< when we write to the file we
increment this by one */
- ib_longlong flush_counter;/* up to what modification_counter value
- we have flushed the modifications to disk */
+ ib_int64_t flush_counter;/*!< up to what
+ modification_counter value we have
+ flushed the modifications to disk */
UT_LIST_NODE_T(fil_node_t) chain;
- /* link field for the file chain */
+ /*!< link field for the file chain */
UT_LIST_NODE_T(fil_node_t) LRU;
- /* link field for the LRU list */
- ulint magic_n;
+ /*!< link field for the LRU list */
+ ulint magic_n;/*!< FIL_NODE_MAGIC_N */
};
+/** Value of fil_node_struct::magic_n */
#define FIL_NODE_MAGIC_N 89389
-/* Tablespace or log data space: let us call them by a common name space */
+/** Tablespace or log data space: let us call them by a common name space */
struct fil_space_struct {
- char* name; /* space name = the path to the first file in
+ char* name; /*!< space name = the path to the first file in
it */
- ulint id; /* space id */
- ib_longlong tablespace_version;
- /* in DISCARD/IMPORT this timestamp is used to
- check if we should ignore an insert buffer
- merge request for a page because it actually
- was for the previous incarnation of the
- space */
- ibool mark; /* this is set to TRUE at database startup if
+ ulint id; /*!< space id */
+ ib_int64_t tablespace_version;
+ /*!< in DISCARD/IMPORT this timestamp
+ is used to check if we should ignore
+ an insert buffer merge request for a
+ page because it actually was for the
+ previous incarnation of the space */
+ ibool mark; /*!< this is set to TRUE at database startup if
the space corresponds to a table in the InnoDB
data dictionary; so we can print a warning of
orphaned tablespaces */
- ibool stop_ios;/* TRUE if we want to rename the .ibd file of
- tablespace and want to stop temporarily
- posting of new i/o requests on the file */
+ ibool stop_ios;/*!< TRUE if we want to rename the
+ .ibd file of tablespace and want to
+ stop temporarily posting of new i/o
+ requests on the file */
ibool stop_ibuf_merges;
- /* we set this TRUE when we start deleting a
- single-table tablespace */
+ /*!< we set this TRUE when we start
+ deleting a single-table tablespace */
ibool is_being_deleted;
- /* this is set to TRUE when we start
+ /*!< this is set to TRUE when we start
deleting a single-table tablespace and its
file; when this flag is set no further i/o
or flush requests can be placed on this space,
though there may be such requests still being
processed on this space */
- ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
+ ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
+ FIL_ARCH_LOG */
UT_LIST_BASE_NODE_T(fil_node_t) chain;
- /* base node for the file chain */
- ulint size; /* space size in pages; 0 if a single-table
+ /*!< base node for the file chain */
+ ulint size; /*!< space size in pages; 0 if a single-table
tablespace whose size we do not know yet;
last incomplete megabytes in data files may be
ignored if space == 0 */
+ ulint flags; /*!< compressed page size and file format, or 0 */
ulint n_reserved_extents;
- /* number of reserved free extents for
+ /*!< number of reserved free extents for
ongoing operations like B-tree page split */
- ulint n_pending_flushes; /* this is > 0 when flushing
+ ulint n_pending_flushes; /*!< this is positive when flushing
the tablespace to disk; dropping of the
- tablespace is forbidden if this is > 0 */
- ulint n_pending_ibuf_merges;/* this is > 0 when merging
- insert buffer entries to a page so that we
- may need to access the ibuf bitmap page in the
- tablespade: dropping of the tablespace is
- forbidden if this is > 0 */
- hash_node_t hash; /* hash chain node */
- hash_node_t name_hash;/* hash chain the name_hash table */
- rw_lock_t latch; /* latch protecting the file space storage
+ tablespace is forbidden if this is positive */
+ ulint n_pending_ibuf_merges;/*!< this is positive
+ when merging insert buffer entries to
+ a page so that we may need to access
+ the ibuf bitmap page in the
+ tablespade: dropping of the tablespace
+ is forbidden if this is positive */
+ hash_node_t hash; /*!< hash chain node */
+ hash_node_t name_hash;/*!< hash chain the name_hash table */
+#ifndef UNIV_HOTBACKUP
+ rw_lock_t latch; /*!< latch protecting the file space storage
allocation */
+#endif /* !UNIV_HOTBACKUP */
UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
- /* list of spaces with at least one unflushed
+ /*!< list of spaces with at least one unflushed
file we have written to */
- ibool is_in_unflushed_spaces; /* TRUE if this space is
- currently in the list above */
+ ibool is_in_unflushed_spaces; /*!< TRUE if this space is
+ currently in unflushed_spaces */
UT_LIST_NODE_T(fil_space_t) space_list;
- /* list of all spaces */
- ibuf_data_t* ibuf_data;
- /* insert buffer data */
- ulint magic_n;
+ /*!< list of all spaces */
+ ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
};
+/** Value of fil_space_struct::magic_n */
#define FIL_SPACE_MAGIC_N 89472
-/* The tablespace memory cache; also the totality of logs = the log data space,
-is stored here; below we talk about tablespaces, but also the ib_logfiles
-form a 'space' and it is handled here */
-
+/** The tablespace memory cache */
typedef struct fil_system_struct fil_system_t;
+
+/** The tablespace memory cache; also the totality of logs (the log
+data space) is stored here; below we talk about tablespaces, but also
+the ib_logfiles form a 'space' and it is handled here */
+
struct fil_system_struct {
- mutex_t mutex; /* The mutex protecting the cache */
- hash_table_t* spaces; /* The hash table of spaces in the
+#ifndef UNIV_HOTBACKUP
+ mutex_t mutex; /*!< The mutex protecting the cache */
+#endif /* !UNIV_HOTBACKUP */
+ hash_table_t* spaces; /*!< The hash table of spaces in the
system; they are hashed on the space
id */
- hash_table_t* name_hash; /* hash table based on the space
+ hash_table_t* name_hash; /*!< hash table based on the space
name */
UT_LIST_BASE_NODE_T(fil_node_t) LRU;
- /* base node for the LRU list of the
+ /*!< base node for the LRU list of the
most recently used open files with no
pending i/o's; if we start an i/o on
the file, we first remove it from this
@@ -219,24 +252,24 @@ struct fil_system_struct {
after the startup, and kept open until
shutdown */
UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
- /* base node for the list of those
+ /*!< base node for the list of those
tablespaces whose files contain
unflushed writes; those spaces have
at least one file node where
modification_counter > flush_counter */
- ulint n_open; /* number of files currently open */
- ulint max_n_open; /* n_open is not allowed to exceed
+ ulint n_open; /*!< number of files currently open */
+ ulint max_n_open; /*!< n_open is not allowed to exceed
this */
- ib_longlong modification_counter;/* when we write to a file we
+ ib_int64_t modification_counter;/*!< when we write to a file we
increment this by one */
- ulint max_assigned_id;/* maximum space id in the existing
+ ulint max_assigned_id;/*!< maximum space id in the existing
tables, or assigned during the time
mysqld has been up; at an InnoDB
startup we scan the data dictionary
and set here the maximum of the
space id's of the tables there */
- ib_longlong tablespace_version;
- /* a counter which is incremented for
+ ib_int64_t tablespace_version;
+ /*!< a counter which is incremented for
every space object memory creation;
every space mem object gets a
'timestamp' from this; in DISCARD/
@@ -244,15 +277,15 @@ struct fil_system_struct {
should ignore an insert buffer merge
request */
UT_LIST_BASE_NODE_T(fil_space_t) space_list;
- /* list of all file spaces */
+ /*!< list of all file spaces */
};
-/* The tablespace memory cache. This variable is NULL before the module is
+/** The tablespace memory cache. This variable is NULL before the module is
initialized. */
-fil_system_t* fil_system = NULL;
+static fil_system_t* fil_system = NULL;
-/************************************************************************
+/********************************************************************//**
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
Prepares a file node for i/o. Opens the file if it is closed. Updates the
@@ -263,153 +296,239 @@ static
void
fil_node_prepare_for_io(
/*====================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- fil_space_t* space); /* in: space */
-/************************************************************************
+ fil_node_t* node, /*!< in: file node */
+ fil_system_t* system, /*!< in: tablespace memory cache */
+ fil_space_t* space); /*!< in: space */
+/********************************************************************//**
Updates the data structures when an i/o operation finishes. Updates the
pending i/o's field in the node appropriately. */
static
void
fil_node_complete_io(
/*=================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- ulint type); /* in: OS_FILE_WRITE or OS_FILE_READ; marks
+ fil_node_t* node, /*!< in: file node */
+ fil_system_t* system, /*!< in: tablespace memory cache */
+ ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
the node as modified if
type == OS_FILE_WRITE */
-/***********************************************************************
+/*******************************************************************//**
Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache. */
+tablespace memory cache.
+@return space id, ULINT_UNDEFINED if not found */
static
ulint
fil_get_space_id_for_table(
/*=======================*/
- /* out: space id, ULINT_UNDEFINED if not
- found */
- const char* name); /* in: table name in the standard
+ const char* name); /*!< in: table name in the standard
'databasename/tablename' format */
+/*******************************************************************//**
+Frees a space object from the tablespace memory cache. Closes the files in
+the chain but does not delete them. There must not be any pending i/o's or
+flushes on the files. */
+static
+ibool
+fil_space_free(
+/*===========*/
+ /* out: TRUE if success */
+ ulint id, /* in: space id */
+ ibool own_mutex);/* in: TRUE if own system->mutex */
+/********************************************************************//**
+Reads data from a space to a buffer. Remember that the possible incomplete
+blocks at the end of file are ignored: they are not taken into account when
+calculating the byte offset within a space.
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INLINE
+ulint
+fil_read(
+/*=====*/
+ ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ ulint space_id, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint block_offset, /*!< in: offset in number of blocks */
+ ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
+ this must be divisible by the OS block size */
+ ulint len, /*!< in: how many bytes to read; this must not
+ cross a file boundary; in aio this must be a
+ block size multiple */
+ void* buf, /*!< in/out: buffer where to store data read;
+ in aio this must be appropriately aligned */
+ void* message) /*!< in: message for aio handler if non-sync
+ aio used, else ignored */
+{
+ return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
+ byte_offset, len, buf, message));
+}
+
+/********************************************************************//**
+Writes data to a space from a buffer. Remember that the possible incomplete
+blocks at the end of file are ignored: they are not taken into account when
+calculating the byte offset within a space.
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INLINE
+ulint
+fil_write(
+/*======*/
+ ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ ulint space_id, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint block_offset, /*!< in: offset in number of blocks */
+ ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
+ this must be divisible by the OS block size */
+ ulint len, /*!< in: how many bytes to write; this must
+ not cross a file boundary; in aio this must
+ be a block size multiple */
+ void* buf, /*!< in: buffer from which to write; in aio
+ this must be appropriately aligned */
+ void* message) /*!< in: message for aio handler if non-sync
+ aio used, else ignored */
+{
+ return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
+ byte_offset, len, buf, message));
+}
+
+/*******************************************************************//**
+Returns the table space by a given id, NULL if not found. */
+UNIV_INLINE
+fil_space_t*
+fil_space_get_by_id(
+/*================*/
+ ulint id) /*!< in: space id */
+{
+ fil_space_t* space;
+
+ ut_ad(mutex_own(&fil_system->mutex));
+
+ HASH_SEARCH(hash, fil_system->spaces, id,
+ fil_space_t*, space,
+ ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
+ space->id == id);
+
+ return(space);
+}
+/*******************************************************************//**
+Returns the table space by a given name, NULL if not found. */
+UNIV_INLINE
+fil_space_t*
+fil_space_get_by_name(
+/*==================*/
+ const char* name) /*!< in: space name */
+{
+ fil_space_t* space;
+ ulint fold;
-/***********************************************************************
-Returns the version number of a tablespace, -1 if not found. */
+ ut_ad(mutex_own(&fil_system->mutex));
-ib_longlong
+ fold = ut_fold_string(name);
+
+ HASH_SEARCH(name_hash, fil_system->name_hash, fold,
+ fil_space_t*, space,
+ ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
+ !strcmp(name, space->name));
+
+ return(space);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Returns the version number of a tablespace, -1 if not found.
+@return version number, -1 if the tablespace does not exist in the
+memory cache */
+UNIV_INTERN
+ib_int64_t
fil_space_get_version(
/*==================*/
- /* out: version number, -1 if the tablespace does not
- exist in the memory cache */
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- ib_longlong version = -1;
+ ib_int64_t version = -1;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (space) {
version = space->tablespace_version;
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(version);
}
-/***********************************************************************
-Returns the latch of a file space. */
-
+/*******************************************************************//**
+Returns the latch of a file space.
+@return latch protecting storage allocation */
+UNIV_INTERN
rw_lock_t*
fil_space_get_latch(
/*================*/
- /* out: latch protecting storage allocation */
- ulint id) /* in: space id */
+ ulint id, /*!< in: space id */
+ ulint* flags) /*!< out: tablespace flags */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
ut_a(space);
- mutex_exit(&(system->mutex));
+ if (flags) {
+ *flags = space->flags;
+ }
+
+ mutex_exit(&fil_system->mutex);
return(&(space->latch));
}
-/***********************************************************************
-Returns the type of a file space. */
-
+/*******************************************************************//**
+Returns the type of a file space.
+@return FIL_TABLESPACE or FIL_LOG */
+UNIV_INTERN
ulint
fil_space_get_type(
/*===============*/
- /* out: FIL_TABLESPACE or FIL_LOG */
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
ut_a(space);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(space->purpose);
}
+#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
-Returns the ibuf data of a file space. */
-
-ibuf_data_t*
-fil_space_get_ibuf_data(
-/*====================*/
- /* out: ibuf data for this space */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- ut_a(id == 0);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- mutex_exit(&(system->mutex));
-
- ut_a(space);
-
- return(space->ibuf_data);
-}
-
-/**************************************************************************
+/**********************************************************************//**
Checks if all the file nodes in a space are flushed. The caller must hold
-the fil_system mutex. */
+the fil_system mutex.
+@return TRUE if all are flushed */
static
ibool
fil_space_is_flushed(
/*=================*/
- /* out: TRUE if all are flushed */
- fil_space_t* space) /* in: space */
+ fil_space_t* space) /*!< in: space */
{
fil_node_t* node;
- ut_ad(mutex_own(&(fil_system->mutex)));
+ ut_ad(mutex_own(&fil_system->mutex));
node = UT_LIST_GET_FIRST(space->chain);
@@ -425,27 +544,26 @@ fil_space_is_flushed(
return(TRUE);
}
-/***********************************************************************
+/*******************************************************************//**
Appends a new file to the chain of files of a space. File must be closed. */
-
+UNIV_INTERN
void
fil_node_create(
/*============*/
- const char* name, /* in: file name (file must be closed) */
- ulint size, /* in: file size in database blocks, rounded
+ const char* name, /*!< in: file name (file must be closed) */
+ ulint size, /*!< in: file size in database blocks, rounded
downwards to an integer */
- ulint id, /* in: space id where to append */
- ibool is_raw) /* in: TRUE if a raw device or
+ ulint id, /*!< in: space id where to append */
+ ibool is_raw) /*!< in: TRUE if a raw device or
a raw disk partition */
{
- fil_system_t* system = fil_system;
fil_node_t* node;
fil_space_t* space;
- ut_a(system);
+ ut_a(fil_system);
ut_a(name);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
node = mem_alloc(sizeof(fil_node_t));
@@ -463,7 +581,7 @@ fil_node_create(
node->modification_counter = 0;
node->flush_counter = 0;
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (!space) {
ut_print_timestamp(stderr);
@@ -476,7 +594,7 @@ fil_node_create(
mem_free(node);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return;
}
@@ -487,30 +605,34 @@ fil_node_create(
UT_LIST_ADD_LAST(chain, space->chain, node);
- mutex_exit(&(system->mutex));
+ if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
+
+ fil_system->max_assigned_id = id;
+ }
+
+ mutex_exit(&fil_system->mutex);
}
-/************************************************************************
+/********************************************************************//**
Opens a the file of a node of a tablespace. The caller must own the fil_system
mutex. */
static
void
fil_node_open_file(
/*===============*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- fil_space_t* space) /* in: space */
+ fil_node_t* node, /*!< in: file node */
+ fil_system_t* system, /*!< in: tablespace memory cache */
+ fil_space_t* space) /*!< in: space */
{
- ib_longlong size_bytes;
+ ib_int64_t size_bytes;
ulint size_low;
ulint size_high;
ibool ret;
ibool success;
-#ifndef UNIV_HOTBACKUP
byte* buf2;
byte* page;
ulint space_id;
-#endif /* !UNIV_HOTBACKUP */
+ ulint flags;
ut_ad(mutex_own(&(system->mutex)));
ut_a(node->n_pending == 0);
@@ -543,12 +665,15 @@ fil_node_open_file(
os_file_get_size(node->handle, &size_low, &size_high);
- size_bytes = (((ib_longlong)size_high) << 32)
- + (ib_longlong)size_low;
+ size_bytes = (((ib_int64_t)size_high) << 32)
+ + (ib_int64_t)size_low;
#ifdef UNIV_HOTBACKUP
- node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
-
-#else
+ if (space->id == 0) {
+ node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+ os_file_close(node->handle);
+ goto add_size;
+ }
+#endif /* UNIV_HOTBACKUP */
ut_a(space->purpose != FIL_LOG);
ut_a(space->id != 0);
@@ -577,6 +702,7 @@ fil_node_open_file(
success = os_file_read(node->handle, page, 0, 0,
UNIV_PAGE_SIZE);
space_id = fsp_header_get_space_id(page);
+ flags = fsp_header_get_flags(page);
ut_free(buf2);
@@ -584,33 +710,52 @@ fil_node_open_file(
os_file_close(node->handle);
- if (space_id == ULINT_UNDEFINED || space_id == 0) {
+ if (UNIV_UNLIKELY(space_id != space->id)) {
+ fprintf(stderr,
+ "InnoDB: Error: tablespace id is %lu"
+ " in the data dictionary\n"
+ "InnoDB: but in file %s it is %lu!\n",
+ space->id, node->name, space_id);
+
+ ut_error;
+ }
+
+ if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
+ || space_id == 0)) {
fprintf(stderr,
"InnoDB: Error: tablespace id %lu"
" in file %s is not sensible\n",
(ulong) space_id, node->name);
- ut_a(0);
+ ut_error;
}
- if (space_id != space->id) {
+ if (UNIV_UNLIKELY(space->flags != flags)) {
fprintf(stderr,
- "InnoDB: Error: tablespace id is %lu"
+ "InnoDB: Error: table flags are %lx"
" in the data dictionary\n"
- "InnoDB: but in file %s it is %lu!\n",
- space->id, node->name, space_id);
+ "InnoDB: but the flags in file %s are %lx!\n",
+ space->flags, node->name, flags);
- ut_a(0);
+ ut_error;
}
- if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
- node->size = (ulint)
- ((size_bytes / (1024 * 1024))
- * ((1024 * 1024) / UNIV_PAGE_SIZE));
- } else {
+ if (size_bytes >= 1024 * 1024) {
+ /* Truncate the size to whole megabytes. */
+ size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
+ }
+
+ if (!(flags & DICT_TF_ZSSIZE_MASK)) {
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+ } else {
+ node->size = (ulint)
+ (size_bytes
+ / dict_table_flags_to_zip_size(flags));
}
-#endif
+
+#ifdef UNIV_HOTBACKUP
+add_size:
+#endif /* UNIV_HOTBACKUP */
space->size += node->size;
}
@@ -644,14 +789,14 @@ fil_node_open_file(
}
}
-/**************************************************************************
+/**********************************************************************//**
Closes a file. */
static
void
fil_node_close_file(
/*================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system) /* in: tablespace memory cache */
+ fil_node_t* node, /*!< in: file node */
+ fil_system_t* system) /*!< in: tablespace memory cache */
{
ibool ret;
@@ -679,41 +824,38 @@ fil_node_close_file(
}
}
-/************************************************************************
+/********************************************************************//**
Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex. */
+mutex.
+@return TRUE if success, FALSE if should retry later; since i/o's
+generally complete in < 100 ms, and as InnoDB writes at most 128 pages
+from the buffer pool in a batch, and then immediately flushes the
+files, there is a good chance that the next time we find a suitable
+node from the LRU list */
static
ibool
fil_try_to_close_file_in_LRU(
/*=========================*/
- /* out: TRUE if success, FALSE if should retry
- later; since i/o's generally complete in <
- 100 ms, and as InnoDB writes at most 128 pages
- from the buffer pool in a batch, and then
- immediately flushes the files, there is a good
- chance that the next time we find a suitable
- node from the LRU list */
- ibool print_info) /* in: if TRUE, prints information why it
+ ibool print_info) /*!< in: if TRUE, prints information why it
cannot close a file */
{
- fil_system_t* system = fil_system;
fil_node_t* node;
- ut_ad(mutex_own(&(system->mutex)));
+ ut_ad(mutex_own(&fil_system->mutex));
- node = UT_LIST_GET_LAST(system->LRU);
+ node = UT_LIST_GET_LAST(fil_system->LRU);
if (print_info) {
fprintf(stderr,
"InnoDB: fil_sys open file LRU len %lu\n",
- (ulong) UT_LIST_GET_LEN(system->LRU));
+ (ulong) UT_LIST_GET_LEN(fil_system->LRU));
}
while (node != NULL) {
if (node->modification_counter == node->flush_counter
&& node->n_pending_flushes == 0) {
- fil_node_close_file(node, system);
+ fil_node_close_file(node, fil_system);
return(TRUE);
}
@@ -741,7 +883,7 @@ fil_try_to_close_file_in_LRU(
return(FALSE);
}
-/***********************************************************************
+/*******************************************************************//**
Reserves the fil_system mutex and tries to make sure we can open at least one
file while holding it. This should be called before calling
fil_node_prepare_for_io(), because that function may need to open a file. */
@@ -749,18 +891,16 @@ static
void
fil_mutex_enter_and_prepare_for_io(
/*===============================*/
- ulint space_id) /* in: space id */
+ ulint space_id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
ibool success;
ibool print_info = FALSE;
ulint count = 0;
ulint count2 = 0;
- ut_ad(!mutex_own(&(system->mutex)));
retry:
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
/* We keep log files and system tablespace files always open;
@@ -772,13 +912,13 @@ retry:
return;
}
- if (system->n_open < system->max_n_open) {
+ if (fil_system->n_open < fil_system->max_n_open) {
return;
}
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
+ space = fil_space_get_by_id(space_id);
+
if (space != NULL && space->stop_ios) {
/* We are going to do a rename file and want to stop new i/o's
for a while */
@@ -791,7 +931,7 @@ retry:
(ulong) count2);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
os_thread_sleep(20000);
@@ -817,12 +957,12 @@ retry:
close_more:
success = fil_try_to_close_file_in_LRU(print_info);
- if (success && system->n_open >= system->max_n_open) {
+ if (success && fil_system->n_open >= fil_system->max_n_open) {
goto close_more;
}
- if (system->n_open < system->max_n_open) {
+ if (fil_system->n_open < fil_system->max_n_open) {
/* Ok */
return;
@@ -835,14 +975,15 @@ close_more:
" while the maximum\n"
"InnoDB: allowed value would be %lu.\n"
"InnoDB: You may need to raise the value of"
- " innodb_max_files_open in\n"
+ " innodb_open_files in\n"
"InnoDB: my.cnf.\n",
- (ulong) system->n_open, (ulong) system->max_n_open);
+ (ulong) fil_system->n_open,
+ (ulong) fil_system->max_n_open);
return;
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
#ifndef UNIV_HOTBACKUP
/* Wake the i/o-handler threads to make sure pending i/o's are
@@ -861,15 +1002,15 @@ close_more:
goto retry;
}
-/***********************************************************************
+/*******************************************************************//**
Frees a file node object from a tablespace memory cache. */
static
void
fil_node_free(
/*==========*/
- fil_node_t* node, /* in, own: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- fil_space_t* space) /* in: space where the file node is chained */
+ fil_node_t* node, /*!< in, own: file node */
+ fil_system_t* system, /*!< in: tablespace memory cache */
+ fil_space_t* space) /*!< in: space where the file node is chained */
{
ut_ad(node && system && space);
ut_ad(mutex_own(&(system->mutex)));
@@ -903,69 +1044,79 @@ fil_node_free(
mem_free(node);
}
-/********************************************************************
+#ifdef UNIV_LOG_ARCHIVE
+/****************************************************************//**
Drops files from the start of a file space, so that its size is cut by
the amount given. */
-
+UNIV_INTERN
void
fil_space_truncate_start(
/*=====================*/
- ulint id, /* in: space id */
- ulint trunc_len) /* in: truncate by this much; it is an error
+ ulint id, /*!< in: space id */
+ ulint trunc_len) /*!< in: truncate by this much; it is an error
if this does not equal to the combined size of
some initial files in the space */
{
- fil_system_t* system = fil_system;
fil_node_t* node;
fil_space_t* space;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
ut_a(space);
while (trunc_len > 0) {
node = UT_LIST_GET_FIRST(space->chain);
- ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
+ ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
trunc_len -= node->size * UNIV_PAGE_SIZE;
- fil_node_free(node, system, space);
+ fil_node_free(node, fil_system, space);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
}
+#endif /* UNIV_LOG_ARCHIVE */
-/***********************************************************************
+/*******************************************************************//**
Creates a space memory object and puts it to the tablespace memory cache. If
-there is an error, prints an error message to the .err log. */
-
+there is an error, prints an error message to the .err log.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_space_create(
/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: space name */
- ulint id, /* in: space id */
- ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
+ const char* name, /*!< in: space name */
+ ulint id, /*!< in: space id */
+ ulint flags, /*!< in: compressed page size
+ and file format, or 0 */
+ ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- ulint namesake_id;
+
+ /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
+ ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+ ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
+ format, the tablespace flags should equal table->flags. */
+ ut_a(flags != DICT_TF_COMPACT);
+
try_again:
/*printf(
"InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
purpose);*/
- ut_a(system);
+ ut_a(fil_system);
ut_a(name);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_name(name);
+
+ if (UNIV_LIKELY_NULL(space)) {
+ ulint namesake_id;
- HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space,
- 0 == strcmp(name, space->name));
- if (space != NULL) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: trying to init to the"
@@ -980,7 +1131,7 @@ try_again:
if (id == 0 || purpose != FIL_TABLESPACE) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
@@ -1002,16 +1153,16 @@ try_again:
namesake_id = space->id;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
- fil_space_free(namesake_id);
+ fil_space_free(namesake_id, FALSE);
goto try_again;
}
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
- if (space != NULL) {
+ if (UNIV_LIKELY_NULL(space)) {
fprintf(stderr,
"InnoDB: Error: trying to add tablespace %lu"
" of name ", (ulong) id);
@@ -1024,7 +1175,7 @@ try_again:
fputs(" already exists in the tablespace\n"
"InnoDB: memory cache!\n", stderr);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
@@ -1034,12 +1185,12 @@ try_again:
space->name = mem_strdup(name);
space->id = id;
- system->tablespace_version++;
- space->tablespace_version = system->tablespace_version;
+ fil_system->tablespace_version++;
+ space->tablespace_version = fil_system->tablespace_version;
space->mark = FALSE;
- if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) {
- system->max_assigned_id = id;
+ if (purpose == FIL_TABLESPACE && id > fil_system->max_assigned_id) {
+ fil_system->max_assigned_id = id;
}
space->stop_ios = FALSE;
@@ -1047,6 +1198,7 @@ try_again:
space->is_being_deleted = FALSE;
space->purpose = purpose;
space->size = 0;
+ space->flags = flags;
space->n_reserved_extents = 0;
@@ -1056,42 +1208,38 @@ try_again:
UT_LIST_INIT(space->chain);
space->magic_n = FIL_SPACE_MAGIC_N;
- space->ibuf_data = NULL;
-
rw_lock_create(&space->latch, SYNC_FSP);
- HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
+ HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
- HASH_INSERT(fil_space_t, name_hash, system->name_hash,
+ HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
ut_fold_string(name), space);
space->is_in_unflushed_spaces = FALSE;
- UT_LIST_ADD_LAST(space_list, system->space_list, space);
+ UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(TRUE);
}
-/***********************************************************************
+/*******************************************************************//**
Assigns a new space id for a new single-table tablespace. This works simply by
incrementing the global counter. If 4 billion id's is not enough, we may need
-to recycle id's. */
+to recycle id's.
+@return new tablespace id; ULINT_UNDEFINED if could not assign an id */
static
ulint
fil_assign_new_space_id(void)
/*=========================*/
- /* out: new tablespace id; ULINT_UNDEFINED if could
- not assign an id */
{
- fil_system_t* system = fil_system;
ulint id;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- system->max_assigned_id++;
+ fil_system->max_assigned_id++;
- id = system->max_assigned_id;
+ id = fil_system->max_assigned_id;
if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
ut_print_timestamp(stderr);
@@ -1117,35 +1265,38 @@ fil_assign_new_space_id(void)
" have to dump all your tables and\n"
"InnoDB: recreate the whole InnoDB installation.\n",
(ulong) id);
- system->max_assigned_id--;
+ fil_system->max_assigned_id--;
id = ULINT_UNDEFINED;
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(id);
}
-/***********************************************************************
+/*******************************************************************//**
Frees a space object from the tablespace memory cache. Closes the files in
the chain but does not delete them. There must not be any pending i/o's or
-flushes on the files. */
-
+flushes on the files.
+@return TRUE if success */
+static
ibool
fil_space_free(
/*===========*/
- /* out: TRUE if success */
- ulint id) /* in: space id */
+ /* out: TRUE if success */
+ ulint id, /* in: space id */
+ ibool own_mutex) /* in: TRUE if own system->mutex */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
fil_space_t* namespace;
fil_node_t* fil_node;
- mutex_enter(&(system->mutex));
+ if (!own_mutex) {
+ mutex_enter(&fil_system->mutex);
+ }
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (!space) {
ut_print_timestamp(stderr);
@@ -1154,29 +1305,28 @@ fil_space_free(
" from the cache but\n"
"InnoDB: it is not there.\n", (ulong) id);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
- HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
+ HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
- HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name),
- namespace, 0 == strcmp(space->name, namespace->name));
+ namespace = fil_space_get_by_name(space->name);
ut_a(namespace);
ut_a(space == namespace);
- HASH_DELETE(fil_space_t, name_hash, system->name_hash,
+ HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
ut_fold_string(space->name), space);
if (space->is_in_unflushed_spaces) {
space->is_in_unflushed_spaces = FALSE;
- UT_LIST_REMOVE(unflushed_spaces, system->unflushed_spaces,
+ UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
space);
}
- UT_LIST_REMOVE(space_list, system->space_list, space);
+ UT_LIST_REMOVE(space_list, fil_system->space_list, space);
ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
ut_a(0 == space->n_pending_flushes);
@@ -1184,14 +1334,16 @@ fil_space_free(
fil_node = UT_LIST_GET_FIRST(space->chain);
while (fil_node != NULL) {
- fil_node_free(fil_node, system, space);
+ fil_node_free(fil_node, fil_system, space);
fil_node = UT_LIST_GET_FIRST(space->chain);
}
ut_a(0 == UT_LIST_GET_LEN(space->chain));
- mutex_exit(&(system->mutex));
+ if (!own_mutex) {
+ mutex_exit(&fil_system->mutex);
+ }
rw_lock_free(&(space->latch));
@@ -1201,52 +1353,28 @@ fil_space_free(
return(TRUE);
}
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************
-Returns the tablespace object for a given id, or NULL if not found from the
-tablespace memory cache. */
-static
-fil_space_t*
-fil_get_space_for_id_low(
-/*=====================*/
- /* out: tablespace object or NULL; NOTE that you must
- own &(fil_system->mutex) to call this function! */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- return(space);
-}
-#endif
-
-/***********************************************************************
+/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
-memory cache. */
-
+memory cache.
+@return space size, 0 if space not found */
+UNIV_INTERN
ulint
fil_space_get_size(
/*===============*/
- /* out: space size, 0 if space not found */
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
fil_node_t* node;
fil_space_t* space;
ulint size;
- ut_ad(system);
+ ut_ad(fil_system);
fil_mutex_enter_and_prepare_for_io(id);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (space == NULL) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(0);
}
@@ -1262,114 +1390,162 @@ fil_space_get_size(
the file yet; the following calls will open it and update the
size fields */
- fil_node_prepare_for_io(node, system, space);
- fil_node_complete_io(node, system, OS_FILE_READ);
+ fil_node_prepare_for_io(node, fil_system, space);
+ fil_node_complete_io(node, fil_system, OS_FILE_READ);
}
size = space->size;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(size);
}
-/***********************************************************************
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache. */
-
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- /* out: TRUE if the address is meaningful */
- ulint id, /* in: space id */
- ulint page_no)/* in: page number */
+/*******************************************************************//**
+Returns the flags of the space. The tablespace must be cached
+in the memory cache.
+@return flags, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_flags(
+/*================*/
+ ulint id) /*!< in: space id */
{
- if (fil_space_get_size(id) > page_no) {
+ fil_node_t* node;
+ fil_space_t* space;
+ ulint flags;
- return(TRUE);
+ ut_ad(fil_system);
+
+ if (UNIV_UNLIKELY(!id)) {
+ return(0);
}
- return(FALSE);
-}
+ fil_mutex_enter_and_prepare_for_io(id);
-/********************************************************************
-Creates a the tablespace memory cache. */
-static
-fil_system_t*
-fil_system_create(
-/*==============*/
- /* out, own: tablespace memory cache */
- ulint hash_size, /* in: hash table size */
- ulint max_n_open) /* in: maximum number of open files; must be
- > 10 */
-{
- fil_system_t* system;
+ space = fil_space_get_by_id(id);
- ut_a(hash_size > 0);
- ut_a(max_n_open > 0);
+ if (space == NULL) {
+ mutex_exit(&fil_system->mutex);
- system = mem_alloc(sizeof(fil_system_t));
+ return(ULINT_UNDEFINED);
+ }
- mutex_create(&system->mutex, SYNC_ANY_LATCH);
+ if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
+ ut_a(id != 0);
- system->spaces = hash_create(hash_size);
- system->name_hash = hash_create(hash_size);
+ ut_a(1 == UT_LIST_GET_LEN(space->chain));
- UT_LIST_INIT(system->LRU);
+ node = UT_LIST_GET_FIRST(space->chain);
- system->n_open = 0;
- system->max_n_open = max_n_open;
+ /* It must be a single-table tablespace and we have not opened
+ the file yet; the following calls will open it and update the
+ size fields */
- system->modification_counter = 0;
- system->max_assigned_id = 0;
+ fil_node_prepare_for_io(node, fil_system, space);
+ fil_node_complete_io(node, fil_system, OS_FILE_READ);
+ }
- system->tablespace_version = 0;
+ flags = space->flags;
- UT_LIST_INIT(system->unflushed_spaces);
- UT_LIST_INIT(system->space_list);
+ mutex_exit(&fil_system->mutex);
- return(system);
+ return(flags);
}
-/********************************************************************
-Initializes the tablespace memory cache. */
+/*******************************************************************//**
+Returns the compressed page size of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return compressed page size, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_zip_size(
+/*===================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return(dict_table_flags_to_zip_size(flags));
+ }
+
+ return(flags);
+}
+
+/*******************************************************************//**
+Checks if the pair space, page_no refers to an existing page in a tablespace
+file space. The tablespace must be cached in the memory cache.
+@return TRUE if the address is meaningful */
+UNIV_INTERN
+ibool
+fil_check_adress_in_tablespace(
+/*===========================*/
+ ulint id, /*!< in: space id */
+ ulint page_no)/*!< in: page number */
+{
+ if (fil_space_get_size(id) > page_no) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+/****************************************************************//**
+Initializes the tablespace memory cache. */
+UNIV_INTERN
void
fil_init(
/*=====*/
- ulint max_n_open) /* in: max number of open files */
+ ulint hash_size, /*!< in: hash table size */
+ ulint max_n_open) /*!< in: max number of open files */
{
- ulint hash_size;
-
ut_a(fil_system == NULL);
- if (srv_file_per_table) {
- hash_size = 50000;
- } else {
- hash_size = 5000;
- }
+ ut_a(hash_size > 0);
+ ut_a(max_n_open > 0);
+
+ fil_system = mem_alloc(sizeof(fil_system_t));
+
+ mutex_create(&fil_system->mutex, SYNC_ANY_LATCH);
+
+ fil_system->spaces = hash_create(hash_size);
+ fil_system->name_hash = hash_create(hash_size);
+
+ UT_LIST_INIT(fil_system->LRU);
- fil_system = fil_system_create(hash_size, max_n_open);
+ fil_system->n_open = 0;
+ fil_system->max_n_open = max_n_open;
+
+ fil_system->modification_counter = 0;
+ fil_system->max_assigned_id = 0;
+
+ fil_system->tablespace_version = 0;
+
+ UT_LIST_INIT(fil_system->unflushed_spaces);
+ UT_LIST_INIT(fil_system->space_list);
}
-/***********************************************************************
+/*******************************************************************//**
Opens all log files and system tablespace data files. They stay open until the
database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
-
+UNIV_INTERN
void
fil_open_log_and_system_tablespace_files(void)
/*==========================================*/
{
- fil_system_t* system = fil_system;
fil_space_t* space;
fil_node_t* node;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- space = UT_LIST_GET_FIRST(system->space_list);
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
while (space != NULL) {
if (space->purpose != FIL_TABLESPACE || space->id == 0) {
@@ -1377,14 +1553,15 @@ fil_open_log_and_system_tablespace_files(void)
while (node != NULL) {
if (!node->open) {
- fil_node_open_file(node, system,
+ fil_node_open_file(node, fil_system,
space);
}
- if (system->max_n_open < 10 + system->n_open) {
+ if (fil_system->max_n_open
+ < 10 + fil_system->n_open) {
fprintf(stderr,
"InnoDB: Warning: you must"
" raise the value of"
- " innodb_max_open_files in\n"
+ " innodb_open_files in\n"
"InnoDB: my.cnf! Remember that"
" InnoDB keeps all log files"
" and all system\n"
@@ -1398,8 +1575,8 @@ fil_open_log_and_system_tablespace_files(void)
" Current open files %lu,"
" max allowed"
" open files %lu.\n",
- (ulong) system->n_open,
- (ulong) system->max_n_open);
+ (ulong) fil_system->n_open,
+ (ulong) fil_system->max_n_open);
}
node = UT_LIST_GET_NEXT(chain, node);
}
@@ -1407,100 +1584,81 @@ fil_open_log_and_system_tablespace_files(void)
space = UT_LIST_GET_NEXT(space_list, space);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
}
-/***********************************************************************
+/*******************************************************************//**
Closes all open files. There must not be any pending i/o's or not flushed
modifications in the files. */
-
+UNIV_INTERN
void
fil_close_all_files(void)
/*=====================*/
{
- fil_system_t* system = fil_system;
fil_space_t* space;
fil_node_t* node;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- space = UT_LIST_GET_FIRST(system->space_list);
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
while (space != NULL) {
+ fil_space_t* prev_space = space;
+
node = UT_LIST_GET_FIRST(space->chain);
while (node != NULL) {
if (node->open) {
- fil_node_close_file(node, system);
+ fil_node_close_file(node, fil_system);
}
node = UT_LIST_GET_NEXT(chain, node);
}
space = UT_LIST_GET_NEXT(space_list, space);
+ fil_space_free(prev_space->id, TRUE);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
}
-/***********************************************************************
+/*******************************************************************//**
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
-
+UNIV_INTERN
void
fil_set_max_space_id_if_bigger(
/*===========================*/
- ulint max_id) /* in: maximum known id */
+ ulint max_id) /*!< in: maximum known id */
{
- fil_system_t* system = fil_system;
-
if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
fprintf(stderr,
"InnoDB: Fatal error: max tablespace id"
" is too high, %lu\n", (ulong) max_id);
- ut_a(0);
+ ut_error;
}
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- if (system->max_assigned_id < max_id) {
+ if (fil_system->max_assigned_id < max_id) {
- system->max_assigned_id = max_id;
+ fil_system->max_assigned_id = max_id;
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
}
-/********************************************************************
-Initializes the ibuf data structure for space 0 == the system tablespace.
-This can be called after the file space headers have been created and the
-dictionary system has been initialized. */
-
-void
-fil_ibuf_init_at_db_start(void)
-/*===========================*/
-{
- fil_space_t* space;
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- ut_a(space);
- ut_a(space->purpose == FIL_TABLESPACE);
-
- space->ibuf_data = ibuf_data_init_for_space(space->id);
-}
-
-/********************************************************************
+/****************************************************************//**
Writes the flushed lsn and the latest archived log number to the page header
-of the first page of a data file. */
+of the first page of a data file of the system tablespace (space 0),
+which is uncompressed. */
static
ulint
fil_write_lsn_and_arch_no_to_file(
/*==============================*/
- ulint space_id, /* in: space number */
- ulint sum_of_sizes, /* in: combined size of previous files in
- space, in database pages */
- dulint lsn, /* in: lsn to write */
- ulint arch_log_no /* in: archived log number to write */
- __attribute__((unused)))
+ ulint sum_of_sizes, /*!< in: combined size of previous files
+ in space, in database pages */
+ ib_uint64_t lsn, /*!< in: lsn to write */
+ ulint arch_log_no __attribute__((unused)))
+ /*!< in: archived log number to write */
{
byte* buf1;
byte* buf;
@@ -1508,32 +1666,35 @@ fil_write_lsn_and_arch_no_to_file(
buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
buf = ut_align(buf1, UNIV_PAGE_SIZE);
- fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+ fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+
+ mach_write_ull(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+ fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
- fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+ mem_free(buf1);
return(DB_SUCCESS);
}
-/********************************************************************
+/****************************************************************//**
Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace. */
-
+header of the first page of each data file in the system tablespace.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
fil_write_flushed_lsn_to_data_files(
/*================================*/
- /* out: DB_SUCCESS or error number */
- dulint lsn, /* in: lsn to write */
- ulint arch_log_no) /* in: latest archived log file number */
+ ib_uint64_t lsn, /*!< in: lsn to write */
+ ulint arch_log_no) /*!< in: latest archived log
+ file number */
{
fil_space_t* space;
fil_node_t* node;
ulint sum_of_sizes;
ulint err;
- mutex_enter(&(fil_system->mutex));
+ mutex_enter(&fil_system->mutex);
space = UT_LIST_GET_FIRST(fil_system->space_list);
@@ -1550,17 +1711,16 @@ fil_write_flushed_lsn_to_data_files(
node = UT_LIST_GET_FIRST(space->chain);
while (node) {
- mutex_exit(&(fil_system->mutex));
+ mutex_exit(&fil_system->mutex);
err = fil_write_lsn_and_arch_no_to_file(
- space->id, sum_of_sizes, lsn,
- arch_log_no);
+ sum_of_sizes, lsn, arch_log_no);
if (err != DB_SUCCESS) {
return(err);
}
- mutex_enter(&(fil_system->mutex));
+ mutex_enter(&fil_system->mutex);
sum_of_sizes += node->size;
node = UT_LIST_GET_NEXT(chain, node);
@@ -1569,31 +1729,32 @@ fil_write_flushed_lsn_to_data_files(
space = UT_LIST_GET_NEXT(space_list, space);
}
- mutex_exit(&(fil_system->mutex));
+ mutex_exit(&fil_system->mutex);
return(DB_SUCCESS);
}
-/***********************************************************************
+/*******************************************************************//**
Reads the flushed lsn and arch no fields from a data file at database
startup. */
-
+UNIV_INTERN
void
fil_read_flushed_lsn_and_arch_log_no(
/*=================================*/
- os_file_t data_file, /* in: open data file */
- ibool one_read_already, /* in: TRUE if min and max parameters
- below already contain sensible data */
+ os_file_t data_file, /*!< in: open data file */
+ ibool one_read_already, /*!< in: TRUE if min and max
+ parameters below already
+ contain sensible data */
#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no, /* in/out: */
- ulint* max_arch_log_no, /* in/out: */
+ ulint* min_arch_log_no, /*!< in/out: */
+ ulint* max_arch_log_no, /*!< in/out: */
#endif /* UNIV_LOG_ARCHIVE */
- dulint* min_flushed_lsn, /* in/out: */
- dulint* max_flushed_lsn) /* in/out: */
+ ib_uint64_t* min_flushed_lsn, /*!< in/out: */
+ ib_uint64_t* max_flushed_lsn) /*!< in/out: */
{
- byte* buf;
- byte* buf2;
- dulint flushed_lsn;
+ byte* buf;
+ byte* buf2;
+ ib_uint64_t flushed_lsn;
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
/* Align the memory for a possible read from a raw device */
@@ -1601,7 +1762,7 @@ fil_read_flushed_lsn_and_arch_log_no(
os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
- flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
+ flushed_lsn = mach_read_ull(buf + FIL_PAGE_FILE_FLUSH_LSN);
ut_free(buf2);
@@ -1615,10 +1776,10 @@ fil_read_flushed_lsn_and_arch_log_no(
return;
}
- if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) {
+ if (*min_flushed_lsn > flushed_lsn) {
*min_flushed_lsn = flushed_lsn;
}
- if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) {
+ if (*max_flushed_lsn < flushed_lsn) {
*max_flushed_lsn = flushed_lsn;
}
#ifdef UNIV_LOG_ARCHIVE
@@ -1633,23 +1794,22 @@ fil_read_flushed_lsn_and_arch_log_no(
/*================ SINGLE-TABLE TABLESPACES ==========================*/
-/***********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
Increments the count of pending insert buffer page merges, if space is not
-being deleted. */
-
+being deleted.
+@return TRUE if being deleted, and ibuf merges should be skipped */
+UNIV_INTERN
ibool
fil_inc_pending_ibuf_merges(
/*========================*/
- /* out: TRUE if being deleted, and ibuf merges should
- be skipped */
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (space == NULL) {
fprintf(stderr,
@@ -1659,32 +1819,31 @@ fil_inc_pending_ibuf_merges(
}
if (space == NULL || space->stop_ibuf_merges) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(TRUE);
}
space->n_pending_ibuf_merges++;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
-/***********************************************************************
+/*******************************************************************//**
Decrements the count of pending insert buffer page merges. */
-
+UNIV_INTERN
void
fil_decr_pending_ibuf_merges(
/*=========================*/
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (space == NULL) {
fprintf(stderr,
@@ -1697,16 +1856,17 @@ fil_decr_pending_ibuf_merges(
space->n_pending_ibuf_merges--;
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
}
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************
+/********************************************************//**
Creates the database directory for a table if it does not exist yet. */
static
void
fil_create_directory_for_tablename(
/*===============================*/
- const char* name) /* in: name in the standard
+ const char* name) /*!< in: name in the standard
'databasename/tablename' format */
{
const char* namend;
@@ -1730,29 +1890,35 @@ fil_create_directory_for_tablename(
}
#ifndef UNIV_HOTBACKUP
-/************************************************************
+/********************************************************//**
Writes a log record about an .ibd file create/rename/delete. */
static
void
fil_op_write_log(
/*=============*/
- ulint type, /* in: MLOG_FILE_CREATE,
+ ulint type, /*!< in: MLOG_FILE_CREATE,
+ MLOG_FILE_CREATE2,
MLOG_FILE_DELETE, or
MLOG_FILE_RENAME */
- ulint space_id, /* in: space id */
- const char* name, /* in: table name in the familiar
+ ulint space_id, /*!< in: space id */
+ ulint log_flags, /*!< in: redo log flags (stored
+ in the page number field) */
+ ulint flags, /*!< in: compressed page size
+ and file format
+ if type==MLOG_FILE_CREATE2, or 0 */
+ const char* name, /*!< in: table name in the familiar
'databasename/tablename' format, or
the file path in the case of
MLOG_FILE_DELETE */
- const char* new_name, /* in: if type is MLOG_FILE_RENAME,
+ const char* new_name, /*!< in: if type is MLOG_FILE_RENAME,
the new table name in the
'databasename/tablename' format */
- mtr_t* mtr) /* in: mini-transaction handle */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
byte* log_ptr;
ulint len;
- log_ptr = mlog_open(mtr, 11 + 2);
+ log_ptr = mlog_open(mtr, 11 + 2 + 1);
if (!log_ptr) {
/* Logging in mtr is switched off during crash recovery:
@@ -1760,8 +1926,12 @@ fil_op_write_log(
return;
}
- log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0,
- log_ptr, mtr);
+ log_ptr = mlog_write_initial_log_record_for_file_op(
+ type, space_id, log_flags, log_ptr, mtr);
+ if (type == MLOG_FILE_CREATE2) {
+ mach_write_to_4(log_ptr, flags);
+ log_ptr += 4;
+ }
/* Let us store the strings as null-terminated for easier readability
and handling */
@@ -1774,7 +1944,7 @@ fil_op_write_log(
mlog_catenate_string(mtr, (byte*) name, len);
if (type == MLOG_FILE_RENAME) {
- ulint len = strlen(new_name) + 1;
+ len = strlen(new_name) + 1;
log_ptr = mlog_open(mtr, 2 + len);
ut_a(log_ptr);
mach_write_to_2(log_ptr, len);
@@ -1786,7 +1956,7 @@ fil_op_write_log(
}
#endif
-/***********************************************************************
+/*******************************************************************//**
Parses the body of a log record written about an .ibd file operation. That is,
the log record part after the standard (type, space id, page no) header of the
log record.
@@ -1797,29 +1967,39 @@ at that path does not exist yet. If the database directory for the file to be
created does not exist, then we create the directory, too.
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
-datadir that we should use in replaying the file operations. */
-
+datadir that we should use in replaying the file operations.
+@return end of log record, or NULL if the record was not completely
+contained between ptr and end_ptr */
+UNIV_INTERN
byte*
fil_op_log_parse_or_replay(
/*=======================*/
- /* out: end of log record, or NULL if the
- record was not completely contained between
- ptr and end_ptr */
- byte* ptr, /* in: buffer containing the log record body,
+ byte* ptr, /*!< in: buffer containing the log record body,
or an initial segment of it, if the record does
not fir completely between ptr and end_ptr */
- byte* end_ptr, /* in: buffer end */
- ulint type, /* in: the type of this log record */
- ibool do_replay, /* in: TRUE if we want to replay the
- operation, and not just parse the log record */
- ulint space_id) /* in: if do_replay is TRUE, the space id of
- the tablespace in question; otherwise
- ignored */
+ byte* end_ptr, /*!< in: buffer end */
+ ulint type, /*!< in: the type of this log record */
+ ulint space_id, /*!< in: the space id of the tablespace in
+ question, or 0 if the log record should
+ only be parsed but not replayed */
+ ulint log_flags) /*!< in: redo log flags
+ (stored in the page number parameter) */
{
ulint name_len;
ulint new_name_len;
const char* name;
const char* new_name = NULL;
+ ulint flags = 0;
+
+ if (type == MLOG_FILE_CREATE2) {
+ if (end_ptr < ptr + 4) {
+
+ return(NULL);
+ }
+
+ flags = mach_read_from_4(ptr);
+ ptr += 4;
+ }
if (end_ptr < ptr + 2) {
@@ -1868,7 +2048,7 @@ fil_op_log_parse_or_replay(
printf("new name %s\n", new_name);
}
*/
- if (do_replay == FALSE) {
+ if (!space_id) {
return(ptr);
}
@@ -1881,11 +2061,15 @@ fil_op_log_parse_or_replay(
were renames of tables during the backup. See ibbackup code for more
on the problem. */
- if (type == MLOG_FILE_DELETE) {
+ switch (type) {
+ case MLOG_FILE_DELETE:
if (fil_tablespace_exists_in_mem(space_id)) {
ut_a(fil_delete_tablespace(space_id));
}
- } else if (type == MLOG_FILE_RENAME) {
+
+ break;
+
+ case MLOG_FILE_RENAME:
/* We do the rename based on space id, not old file name;
this should guarantee that after the log replay each .ibd file
has the correct name for the latest log sequence number; the
@@ -1909,43 +2093,49 @@ fil_op_log_parse_or_replay(
}
}
}
- } else {
- ut_a(type == MLOG_FILE_CREATE);
+ break;
+
+ case MLOG_FILE_CREATE:
+ case MLOG_FILE_CREATE2:
if (fil_tablespace_exists_in_mem(space_id)) {
/* Do nothing */
} else if (fil_get_space_id_for_table(name)
!= ULINT_UNDEFINED) {
/* Do nothing */
+ } else if (log_flags & MLOG_FILE_FLAG_TEMP) {
+ /* Temporary table, do nothing */
} else {
/* Create the database directory for name, if it does
not exist yet */
fil_create_directory_for_tablename(name);
- ut_a(space_id != 0);
-
if (fil_create_new_single_table_tablespace(
- &space_id, name, FALSE,
+ &space_id, name, FALSE, flags,
FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
ut_error;
}
}
+
+ break;
+
+ default:
+ ut_error;
}
return(ptr);
}
-/***********************************************************************
+/*******************************************************************//**
Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache. */
-
+memory cache.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_delete_tablespace(
/*==================*/
- /* out: TRUE if success */
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
ibool success;
fil_space_t* space;
fil_node_t* node;
@@ -1954,15 +2144,15 @@ fil_delete_tablespace(
ut_a(id != 0);
stop_ibuf_merges:
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (space != NULL) {
space->stop_ibuf_merges = TRUE;
if (space->n_pending_ibuf_merges == 0) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
count = 0;
@@ -1981,7 +2171,7 @@ stop_ibuf_merges:
(ulong) count);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
os_thread_sleep(20000);
count++;
@@ -1990,13 +2180,13 @@ stop_ibuf_merges:
}
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
count = 0;
try_again:
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (space == NULL) {
ut_print_timestamp(stderr);
@@ -2006,7 +2196,7 @@ try_again:
" tablespace memory cache.\n",
(ulong) id);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
@@ -2033,7 +2223,7 @@ try_again:
(ulong) node->n_pending,
(ulong) count);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
os_thread_sleep(20000);
count++;
@@ -2043,7 +2233,7 @@ try_again:
path = mem_strdup(space->name);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
#ifndef UNIV_HOTBACKUP
/* Invalidate in the buffer pool all pages belonging to the
tablespace. Since we have set space->is_being_deleted = TRUE, readahead
@@ -2056,7 +2246,7 @@ try_again:
#endif
/* printf("Deleting tablespace %s id %lu\n", space->name, id); */
- success = fil_space_free(id);
+ success = fil_space_free(id, FALSE);
if (success) {
success = os_file_delete(path);
@@ -2078,7 +2268,7 @@ try_again:
to write any log record */
mtr_start(&mtr);
- fil_op_write_log(MLOG_FILE_DELETE, id, path, NULL, &mtr);
+ fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
mtr_commit(&mtr);
#endif
mem_free(path);
@@ -2091,20 +2281,21 @@ try_again:
return(FALSE);
}
-/***********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
1) we do not drop the table from the data dictionary;
2) we remove all insert buffer entries for the tablespace immediately; in DROP
TABLE they are only removed gradually in the background;
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had. */
-
+as it originally had.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_discard_tablespace(
/*===================*/
- /* out: TRUE if success */
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
ibool success;
@@ -2123,26 +2314,27 @@ fil_discard_tablespace(
ibuf_delete_for_discarded_space(id);
- return(TRUE);
+ return(success);
}
+#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
-Renames the memory cache structures of a single-table tablespace. */
+/*******************************************************************//**
+Renames the memory cache structures of a single-table tablespace.
+@return TRUE if success */
static
ibool
fil_rename_tablespace_in_mem(
/*=========================*/
- /* out: TRUE if success */
- fil_space_t* space, /* in: tablespace memory object */
- fil_node_t* node, /* in: file node of that tablespace */
- const char* path) /* in: new name */
+ fil_space_t* space, /*!< in: tablespace memory object */
+ fil_node_t* node, /*!< in: file node of that tablespace */
+ const char* path) /*!< in: new name */
{
- fil_system_t* system = fil_system;
fil_space_t* space2;
const char* old_name = space->name;
- HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name),
- space2, 0 == strcmp(old_name, space2->name));
+ ut_ad(mutex_own(&fil_system->mutex));
+
+ space2 = fil_space_get_by_name(old_name);
if (space != space2) {
fputs("InnoDB: Error: cannot find ", stderr);
ut_print_filename(stderr, old_name);
@@ -2151,8 +2343,7 @@ fil_rename_tablespace_in_mem(
return(FALSE);
}
- HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path),
- space2, 0 == strcmp(path, space2->name));
+ space2 = fil_space_get_by_name(path);
if (space2 != NULL) {
fputs("InnoDB: Error: ", stderr);
ut_print_filename(stderr, path);
@@ -2161,7 +2352,7 @@ fil_rename_tablespace_in_mem(
return(FALSE);
}
- HASH_DELETE(fil_space_t, name_hash, system->name_hash,
+ HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
ut_fold_string(space->name), space);
mem_free(space->name);
mem_free(node->name);
@@ -2169,22 +2360,22 @@ fil_rename_tablespace_in_mem(
space->name = mem_strdup(path);
node->name = mem_strdup(path);
- HASH_INSERT(fil_space_t, name_hash, system->name_hash,
+ HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
ut_fold_string(path), space);
return(TRUE);
}
-/***********************************************************************
+/*******************************************************************//**
Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free(). */
+by caller with mem_free().
+@return own: file name */
static
char*
fil_make_ibd_name(
/*==============*/
- /* out, own: file name */
- const char* name, /* in: table name or a dir path of a
+ const char* name, /*!< in: table name or a dir path of a
TEMPORARY table */
- ibool is_temp) /* in: TRUE if it is a dir path */
+ ibool is_temp) /*!< in: TRUE if it is a dir path */
{
ulint namelen = strlen(name);
ulint dirlen = strlen(fil_path_to_mysql_datadir);
@@ -2206,24 +2397,23 @@ fil_make_ibd_name(
return(filename);
}
-/***********************************************************************
+/*******************************************************************//**
Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache. */
-
+tablespace memory cache.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_rename_tablespace(
/*==================*/
- /* out: TRUE if success */
- const char* old_name, /* in: old table name in the standard
+ const char* old_name, /*!< in: old table name in the standard
databasename/tablename format of
InnoDB, or NULL if we do the rename
based on the space id only */
- ulint id, /* in: space id */
- const char* new_name) /* in: new table name in the standard
+ ulint id, /*!< in: space id */
+ const char* new_name) /*!< in: new table name in the standard
databasename/tablename format
of InnoDB */
{
- fil_system_t* system = fil_system;
ibool success;
fil_space_t* space;
fil_node_t* node;
@@ -2250,9 +2440,9 @@ retry:
fprintf(stderr, ", %lu iterations\n", (ulong) count);
}
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (space == NULL) {
fprintf(stderr,
@@ -2261,14 +2451,14 @@ retry:
"InnoDB: though the table ", (ulong) id);
ut_print_filename(stderr, old_name);
fputs(" in a rename operation should have that id\n", stderr);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
if (count > 25000) {
space->stop_ios = FALSE;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
@@ -2286,7 +2476,7 @@ retry:
/* There are pending i/o's or flushes, sleep for a while and
retry */
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
os_thread_sleep(20000);
@@ -2295,7 +2485,7 @@ retry:
} else if (node->modification_counter > node->flush_counter) {
/* Flush the space */
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
os_thread_sleep(20000);
@@ -2306,7 +2496,7 @@ retry:
} else if (node->open) {
/* Close the file */
- fil_node_close_file(node, system);
+ fil_node_close_file(node, fil_system);
}
/* Check that the old name in the space is right */
@@ -2341,7 +2531,7 @@ retry:
space->stop_ios = FALSE;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
#ifndef UNIV_HOTBACKUP
if (success) {
@@ -2349,7 +2539,7 @@ retry:
mtr_start(&mtr);
- fil_op_write_log(MLOG_FILE_RENAME, id, old_name, new_name,
+ fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
&mtr);
mtr_commit(&mtr);
}
@@ -2357,27 +2547,28 @@ retry:
return(success);
}
-/***********************************************************************
+/*******************************************************************//**
Creates a new single-table tablespace to a database directory of MySQL.
Database directories are under the 'datadir' of MySQL. The datadir is the
directory of a running mysqld program. We can refer to it by simply the
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server. */
-
+dir of the mysqld server.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
fil_create_new_single_table_tablespace(
/*===================================*/
- /* out: DB_SUCCESS or error code */
- ulint* space_id, /* in/out: space id; if this is != 0,
+ ulint* space_id, /*!< in/out: space id; if this is != 0,
then this is an input parameter,
otherwise output */
- const char* tablename, /* in: the table name in the usual
+ const char* tablename, /*!< in: the table name in the usual
databasename/tablename format
of InnoDB, or a dir path to a temp
table */
- ibool is_temp, /* in: TRUE if a table created with
+ ibool is_temp, /*!< in: TRUE if a table created with
CREATE TEMPORARY TABLE */
- ulint size) /* in: the initial size of the
+ ulint flags, /*!< in: tablespace flags */
+ ulint size) /*!< in: the initial size of the
tablespace file in pages,
must be >= FIL_IBD_FILE_INITIAL_SIZE */
{
@@ -2390,6 +2581,11 @@ fil_create_new_single_table_tablespace(
char* path;
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
+ /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
+ ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+ ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
+ format, the tablespace flags should equal table->flags. */
+ ut_a(flags != DICT_TF_COMPACT);
path = fil_make_ibd_name(tablename, is_temp);
@@ -2437,7 +2633,7 @@ fil_create_new_single_table_tablespace(
return(DB_ERROR);
}
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
+ buf2 = ut_malloc(3 * UNIV_PAGE_SIZE);
/* Align the memory for file i/o if we might have O_DIRECT set */
page = ut_align(buf2, UNIV_PAGE_SIZE);
@@ -2480,11 +2676,30 @@ error_exit2:
memset(page, '\0', UNIV_PAGE_SIZE);
- fsp_header_write_space_id(page, *space_id);
+ fsp_header_init_fields(page, *space_id, flags);
+ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, *space_id);
+
+ if (!(flags & DICT_TF_ZSSIZE_MASK)) {
+ buf_flush_init_for_writing(page, NULL, 0);
+ ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
+ } else {
+ page_zip_des_t page_zip;
+ ulint zip_size;
- buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0);
+ zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
+ << ((flags & DICT_TF_ZSSIZE_MASK)
+ >> DICT_TF_ZSSIZE_SHIFT));
- ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
+ page_zip_set_size(&page_zip, zip_size);
+ page_zip.data = page + UNIV_PAGE_SIZE;
+#ifdef UNIV_DEBUG
+ page_zip.m_start =
+#endif /* UNIV_DEBUG */
+ page_zip.m_end = page_zip.m_nonempty =
+ page_zip.n_blobs = 0;
+ buf_flush_init_for_writing(page, &page_zip, 0);
+ ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size);
+ }
ut_free(buf2);
@@ -2511,7 +2726,7 @@ error_exit2:
goto error_exit2;
}
- success = fil_space_create(path, *space_id, FIL_TABLESPACE);
+ success = fil_space_create(path, *space_id, flags, FIL_TABLESPACE);
if (!success) {
goto error_exit2;
@@ -2525,8 +2740,13 @@ error_exit2:
mtr_start(&mtr);
- fil_op_write_log(MLOG_FILE_CREATE, *space_id, tablename,
- NULL, &mtr);
+ fil_op_write_log(flags
+ ? MLOG_FILE_CREATE2
+ : MLOG_FILE_CREATE,
+ *space_id,
+ is_temp ? MLOG_FILE_FLAG_TEMP : 0,
+ flags,
+ tablename, NULL, &mtr);
mtr_commit(&mtr);
}
@@ -2535,7 +2755,8 @@ error_exit2:
return(DB_SUCCESS);
}
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
It is possible, though very improbable, that the lsn's in the tablespace to be
imported have risen above the current system lsn, if a lengthy purge, ibuf
merge, or rollback was performed on a backup taken with ibbackup. If that is
@@ -2543,15 +2764,15 @@ the case, reset page lsn's in the file. We assume that mysqld was shut down
after it performed these cleanup operations on the .ibd file, so that it at
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn. */
-
+lsn's just by looking at that flush lsn.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_reset_too_high_lsns(
/*====================*/
- /* out: TRUE if success */
- const char* name, /* in: table name in the
+ const char* name, /*!< in: table name in the
databasename/tablename format */
- dulint current_lsn) /* in: reset lsn's if the lsn stamped
+ ib_uint64_t current_lsn) /*!< in: reset lsn's if the lsn stamped
to FIL_PAGE_FILE_FLUSH_LSN in the
first page is too high */
{
@@ -2559,11 +2780,11 @@ fil_reset_too_high_lsns(
char* filepath;
byte* page;
byte* buf2;
- dulint flush_lsn;
+ ib_uint64_t flush_lsn;
ulint space_id;
- ib_longlong file_size;
- ib_longlong offset;
- ulint page_no;
+ ib_int64_t file_size;
+ ib_int64_t offset;
+ ulint zip_size;
ibool success;
filepath = fil_make_ibd_name(name, FALSE);
@@ -2588,7 +2809,7 @@ fil_reset_too_high_lsns(
/* Read the first page of the tablespace */
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
+ buf2 = ut_malloc(3 * UNIV_PAGE_SIZE);
/* Align the memory for file i/o if we might have O_DIRECT set */
page = ut_align(buf2, UNIV_PAGE_SIZE);
@@ -2600,9 +2821,9 @@ fil_reset_too_high_lsns(
/* We have to read the file flush lsn from the header of the file */
- flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+ flush_lsn = mach_read_ull(page + FIL_PAGE_FILE_FLUSH_LSN);
- if (ut_dulint_cmp(current_lsn, flush_lsn) >= 0) {
+ if (current_lsn >= flush_lsn) {
/* Ok */
success = TRUE;
@@ -2610,48 +2831,56 @@ fil_reset_too_high_lsns(
}
space_id = fsp_header_get_space_id(page);
+ zip_size = fsp_header_get_zip_size(page);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Flush lsn in the tablespace file %lu"
" to be imported\n"
- "InnoDB: is %lu %lu, which exceeds current"
- " system lsn %lu %lu.\n"
+ "InnoDB: is %llu, which exceeds current"
+ " system lsn %llu.\n"
"InnoDB: We reset the lsn's in the file ",
(ulong) space_id,
- (ulong) ut_dulint_get_high(flush_lsn),
- (ulong) ut_dulint_get_low(flush_lsn),
- (ulong) ut_dulint_get_high(current_lsn),
- (ulong) ut_dulint_get_low(current_lsn));
+ flush_lsn, current_lsn);
ut_print_filename(stderr, filepath);
fputs(".\n", stderr);
+ ut_a(ut_is_2pow(zip_size));
+ ut_a(zip_size <= UNIV_PAGE_SIZE);
+
/* Loop through all the pages in the tablespace and reset the lsn and
the page checksum if necessary */
file_size = os_file_get_size_as_iblonglong(file);
- for (offset = 0; offset < file_size; offset += UNIV_PAGE_SIZE) {
+ for (offset = 0; offset < file_size;
+ offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
success = os_file_read(file, page,
(ulint)(offset & 0xFFFFFFFFUL),
- (ulint)(offset >> 32), UNIV_PAGE_SIZE);
+ (ulint)(offset >> 32),
+ zip_size ? zip_size : UNIV_PAGE_SIZE);
if (!success) {
goto func_exit;
}
- if (ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN),
- current_lsn) > 0) {
+ if (mach_read_ull(page + FIL_PAGE_LSN) > current_lsn) {
/* We have to reset the lsn */
- space_id = mach_read_from_4(
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
- buf_flush_init_for_writing(page, current_lsn, space_id,
- page_no);
+ if (zip_size) {
+ memcpy(page + UNIV_PAGE_SIZE, page, zip_size);
+ buf_flush_init_for_writing(
+ page, page + UNIV_PAGE_SIZE,
+ current_lsn);
+ } else {
+ buf_flush_init_for_writing(
+ page, NULL, current_lsn);
+ }
success = os_file_write(filepath, file, page,
(ulint)(offset & 0xFFFFFFFFUL),
(ulint)(offset >> 32),
- UNIV_PAGE_SIZE);
+ zip_size
+ ? zip_size
+ : UNIV_PAGE_SIZE);
if (!success) {
goto func_exit;
@@ -2666,15 +2895,17 @@ fil_reset_too_high_lsns(
}
/* We now update the flush_lsn stamp at the start of the file */
- success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
+ success = os_file_read(file, page, 0, 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE);
if (!success) {
goto func_exit;
}
- mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
+ mach_write_ull(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
- success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
+ success = os_file_write(filepath, file, page, 0, 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE);
if (!success) {
goto func_exit;
@@ -2688,7 +2919,7 @@ func_exit:
return(success);
}
-/************************************************************************
+/********************************************************************//**
Tries to open a single-table tablespace and optionally checks the space id is
right in it. If does not succeed, prints an error message to the .err log. This
function is used to open a tablespace when we start up mysqld, and also in
@@ -2696,21 +2927,22 @@ IMPORT TABLESPACE.
NOTE that we assume this operation is used either at the database startup
or under the protection of the dictionary mutex, so that two users cannot
race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it. */
-
+tablespace open, but closes it after we have looked at the space id in it.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_open_single_table_tablespace(
/*=============================*/
- /* out: TRUE if success */
- ibool check_space_id, /* in: should we check that the space
+ ibool check_space_id, /*!< in: should we check that the space
id in the file is right; we assume
that this function runs much faster
if no check is made, since accessing
the file inode probably is much
faster (the OS caches them) than
accessing the first page of the file */
- ulint id, /* in: space id */
- const char* name) /* in: table name in the
+ ulint id, /*!< in: space id */
+ ulint flags, /*!< in: tablespace flags */
+ const char* name) /*!< in: table name in the
databasename/tablename format */
{
os_file_t file;
@@ -2719,10 +2951,16 @@ fil_open_single_table_tablespace(
byte* buf2;
byte* page;
ulint space_id;
- ibool ret = TRUE;
+ ulint space_flags;
filepath = fil_make_ibd_name(name, FALSE);
+ /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
+ ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+ ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
+ format, the tablespace flags should equal table->flags. */
+ ut_a(flags != DICT_TF_COMPACT);
+
file = os_file_create_simple_no_error_handling(
filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
if (!success) {
@@ -2744,8 +2982,7 @@ fil_open_single_table_tablespace(
" a temporary table #sql...,\n"
"InnoDB: and MySQL removed the .ibd file for this.\n"
"InnoDB: Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting-datadict.html\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
"InnoDB: for how to resolve the issue.\n", stderr);
mem_free(filepath);
@@ -2767,36 +3004,38 @@ fil_open_single_table_tablespace(
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
- /* We have to read the tablespace id from the file */
+ /* We have to read the tablespace id and flags from the file. */
space_id = fsp_header_get_space_id(page);
+ space_flags = fsp_header_get_flags(page);
ut_free(buf2);
- if (space_id != id) {
+ if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: tablespace id in file ", stderr);
+ fputs(" InnoDB: Error: tablespace id and flags in file ",
+ stderr);
ut_print_filename(stderr, filepath);
- fprintf(stderr, " is %lu, but in the InnoDB\n"
- "InnoDB: data dictionary it is %lu.\n"
+ fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
+ "InnoDB: data dictionary they are %lu and %lu.\n"
"InnoDB: Have you moved InnoDB .ibd files"
" around without using the\n"
"InnoDB: commands DISCARD TABLESPACE and"
" IMPORT TABLESPACE?\n"
"InnoDB: Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting-datadict.html\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
"InnoDB: for how to resolve the issue.\n",
- (ulong) space_id, (ulong) id);
+ (ulong) space_id, (ulong) space_flags,
+ (ulong) id, (ulong) flags);
- ret = FALSE;
+ success = FALSE;
goto func_exit;
}
skip_check:
- success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
+ success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
if (!success) {
goto func_exit;
@@ -2810,19 +3049,20 @@ func_exit:
os_file_close(file);
mem_free(filepath);
- return(ret);
+ return(success);
}
+#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_HOTBACKUP
-/***********************************************************************
+/*******************************************************************//**
Allocates a file name for an old version of a single-table tablespace.
-The string must be freed by caller with mem_free()! */
+The string must be freed by caller with mem_free()!
+@return own: file name */
static
char*
fil_make_ibbackup_old_name(
/*=======================*/
- /* out, own: file name */
- const char* name) /* in: original file name */
+ const char* name) /*!< in: original file name */
{
static const char suffix[] = "_ibbackup_old_vers_";
ulint len = strlen(name);
@@ -2835,15 +3075,15 @@ fil_make_ibbackup_old_name(
}
#endif /* UNIV_HOTBACKUP */
-/************************************************************************
+/********************************************************************//**
Opens an .ibd file and adds the associated single-table tablespace to the
InnoDB fil0fil.c data structures. */
static
void
fil_load_single_table_tablespace(
/*=============================*/
- const char* dbname, /* in: database name */
- const char* filename) /* in: file name (not a path),
+ const char* dbname, /*!< in: database name */
+ const char* filename) /*!< in: file name (not a path),
including the .ibd extension */
{
os_file_t file;
@@ -2852,9 +3092,10 @@ fil_load_single_table_tablespace(
byte* buf2;
byte* page;
ulint space_id;
+ ulint flags;
ulint size_low;
ulint size_high;
- ib_longlong size;
+ ib_int64_t size;
#ifdef UNIV_HOTBACKUP
fil_space_t* space;
#endif
@@ -2974,7 +3215,7 @@ fil_load_single_table_tablespace(
/* Every .ibd file is created >= 4 pages in size. Smaller files
cannot be ok. */
- size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low;
+ size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low;
#ifndef UNIV_HOTBACKUP
if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
fprintf(stderr,
@@ -3002,8 +3243,10 @@ fil_load_single_table_tablespace(
/* We have to read the tablespace id from the file */
space_id = fsp_header_get_space_id(page);
+ flags = fsp_header_get_flags(page);
} else {
space_id = ULINT_UNDEFINED;
+ flags = 0;
}
#ifndef UNIV_HOTBACKUP
@@ -3022,7 +3265,7 @@ fil_load_single_table_tablespace(
fprintf(stderr,
"InnoDB: Renaming tablespace %s of id %lu,\n"
"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because its size %lld is too small"
+ "InnoDB: because its size %" PRId64 " is too small"
" (< 4 pages 16 kB each),\n"
"InnoDB: or the space id in the file header"
" is not sensible.\n"
@@ -3048,9 +3291,9 @@ fil_load_single_table_tablespace(
file than delete it, because if there is a bug, we do not want to
destroy valuable data. */
- mutex_enter(&(fil_system->mutex));
+ mutex_enter(&fil_system->mutex);
- space = fil_get_space_for_id_low(space_id);
+ space = fil_space_get_by_id(space_id);
if (space) {
char* new_path;
@@ -3068,7 +3311,7 @@ fil_load_single_table_tablespace(
new_path = fil_make_ibbackup_old_name(filepath);
- mutex_exit(&(fil_system->mutex));
+ mutex_exit(&fil_system->mutex);
ut_a(os_file_rename(filepath, new_path));
@@ -3078,13 +3321,23 @@ fil_load_single_table_tablespace(
return;
}
- mutex_exit(&(fil_system->mutex));
+ mutex_exit(&fil_system->mutex);
#endif
- success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
+ success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
if (!success) {
- goto func_exit;
+ if (srv_force_recovery > 0) {
+ fprintf(stderr,
+ "InnoDB: innodb_force_recovery"
+ " was set to %lu. Continuing crash recovery\n"
+ "InnoDB: even though the tablespace creation"
+ " of this table failed.\n",
+ srv_force_recovery);
+ goto func_exit;
+ }
+
+ exit(1);
}
/* We do not use the size information we have about the file, because
@@ -3098,21 +3351,21 @@ func_exit:
mem_free(filepath);
}
-/***************************************************************************
+/***********************************************************************//**
A fault-tolerant function that tries to read the next file name in the
directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
-idea is to read as much good data as we can and jump over bad data. */
+idea is to read as much good data as we can and jump over bad data.
+@return 0 if ok, -1 if error even after the retries, 1 if at the end
+of the directory */
static
int
fil_file_readdir_next_file(
/*=======================*/
- /* out: 0 if ok, -1 if error even after the
- retries, 1 if at the end of the directory */
- ulint* err, /* out: this is set to DB_ERROR if an error
+ ulint* err, /*!< out: this is set to DB_ERROR if an error
was encountered, otherwise not changed */
- const char* dirname,/* in: directory name or path */
- os_file_dir_t dir, /* in: directory stream */
- os_file_stat_t* info) /* in/out: buffer where the info is returned */
+ const char* dirname,/*!< in: directory name or path */
+ os_file_dir_t dir, /*!< in: directory stream */
+ os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
{
ulint i;
int ret;
@@ -3138,18 +3391,18 @@ fil_file_readdir_next_file(
return(-1);
}
-/************************************************************************
+/********************************************************************//**
At the server startup, if we need crash recovery, scans the database
directories under the MySQL datadir, looking for .ibd files. Those files are
single-table tablespaces. We need to know the space id in each of them so that
we know into which file we should look to check the contents of a page stored
in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0. */
-
+space id is != 0.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
fil_load_single_table_tablespaces(void)
/*===================================*/
- /* out: DB_SUCCESS or error number */
{
int ret;
char* dbpath = NULL;
@@ -3267,23 +3520,22 @@ next_datadir_item:
return(err);
}
-/************************************************************************
+/********************************************************************//**
If we need crash recovery, and we have called
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
we can call this function to print an error message of orphaned .ibd files
for which there is not a data dictionary entry with a matching table name
and space id. */
-
+UNIV_INTERN
void
fil_print_orphaned_tablespaces(void)
/*================================*/
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- space = UT_LIST_GET_FIRST(system->space_list);
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
while (space) {
if (space->purpose == FIL_TABLESPACE && space->id != 0
@@ -3298,128 +3550,115 @@ fil_print_orphaned_tablespaces(void)
space = UT_LIST_GET_NEXT(space_list, space);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
}
-/***********************************************************************
+/*******************************************************************//**
Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there. */
-
+or is being deleted there.
+@return TRUE if does not exist or is being\ deleted */
+UNIV_INTERN
ibool
fil_tablespace_deleted_or_being_deleted_in_mem(
/*===========================================*/
- /* out: TRUE if does not exist or is being\
- deleted */
- ulint id, /* in: space id */
- ib_longlong version)/* in: tablespace_version should be this; if
+ ulint id, /*!< in: space id */
+ ib_int64_t version)/*!< in: tablespace_version should be this; if
you pass -1 as the value of this, then this
parameter is ignored */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
if (space == NULL || space->is_being_deleted) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(TRUE);
}
- if (version != ((ib_longlong)-1)
+ if (version != ((ib_int64_t)-1)
&& space->tablespace_version != version) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(TRUE);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
-/***********************************************************************
-Returns TRUE if a single-table tablespace exists in the memory cache. */
-
+/*******************************************************************//**
+Returns TRUE if a single-table tablespace exists in the memory cache.
+@return TRUE if exists */
+UNIV_INTERN
ibool
fil_tablespace_exists_in_mem(
/*=========================*/
- /* out: TRUE if exists */
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
- if (space == NULL) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
- return(FALSE);
- }
-
- mutex_exit(&(system->mutex));
-
- return(TRUE);
+ return(space != NULL);
}
-/***********************************************************************
+/*******************************************************************//**
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache. */
-
+there may be many tablespaces which are not yet in the memory cache.
+@return TRUE if a matching tablespace exists in the memory cache */
+UNIV_INTERN
ibool
fil_space_for_table_exists_in_mem(
/*==============================*/
- /* out: TRUE if a matching tablespace
- exists in the memory cache */
- ulint id, /* in: space id */
- const char* name, /* in: table name in the standard
+ ulint id, /*!< in: space id */
+ const char* name, /*!< in: table name in the standard
'databasename/tablename' format or
the dir path to a temp table */
- ibool is_temp, /* in: TRUE if created with CREATE
+ ibool is_temp, /*!< in: TRUE if created with CREATE
TEMPORARY TABLE */
- ibool mark_space, /* in: in crash recovery, at database
+ ibool mark_space, /*!< in: in crash recovery, at database
startup we mark all spaces which have
an associated table in the InnoDB
data dictionary, so that
we can print a warning about orphaned
tablespaces */
ibool print_error_if_does_not_exist)
- /* in: print detailed error
+ /*!< in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
{
- fil_system_t* system = fil_system;
fil_space_t* namespace;
fil_space_t* space;
char* path;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
path = fil_make_ibd_name(name, is_temp);
/* Look if there is a space with the same id */
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
/* Look if there is a space with the same name; the name is the
directory path from the datadir to the file */
- HASH_SEARCH(name_hash, system->name_hash,
- ut_fold_string(path), namespace,
- 0 == strcmp(namespace->name, path));
+ namespace = fil_space_get_by_name(path);
if (space && space == namespace) {
/* Found */
@@ -3428,7 +3667,7 @@ fil_space_for_table_exists_in_mem(
}
mem_free(path);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(TRUE);
}
@@ -3436,7 +3675,7 @@ fil_space_for_table_exists_in_mem(
if (!print_error_if_does_not_exist) {
mem_free(path);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
@@ -3476,12 +3715,11 @@ fil_space_for_table_exists_in_mem(
}
error_exit:
fputs("InnoDB: Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting-datadict.html\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
"InnoDB: for how to resolve the issue.\n", stderr);
mem_free(path);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
@@ -3511,69 +3749,65 @@ error_exit:
}
mem_free(path);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(FALSE);
}
-/***********************************************************************
+/*******************************************************************//**
Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache. */
+tablespace memory cache.
+@return space id, ULINT_UNDEFINED if not found */
static
ulint
fil_get_space_id_for_table(
/*=======================*/
- /* out: space id, ULINT_UNDEFINED if not
- found */
- const char* name) /* in: table name in the standard
+ const char* name) /*!< in: table name in the standard
'databasename/tablename' format */
{
- fil_system_t* system = fil_system;
fil_space_t* namespace;
ulint id = ULINT_UNDEFINED;
char* path;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
path = fil_make_ibd_name(name, FALSE);
/* Look if there is a space with the same name; the name is the
directory path to the file */
- HASH_SEARCH(name_hash, system->name_hash,
- ut_fold_string(path), namespace,
- 0 == strcmp(namespace->name, path));
+ namespace = fil_space_get_by_name(path);
+
if (namespace) {
id = namespace->id;
}
mem_free(path);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(id);
}
-/**************************************************************************
+/**********************************************************************//**
Tries to extend a data file so that it would accommodate the number of pages
given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing. */
-
+enough already, does nothing.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_extend_space_to_desired_size(
/*=============================*/
- /* out: TRUE if success */
- ulint* actual_size, /* out: size of the space after extension;
+ ulint* actual_size, /*!< out: size of the space after extension;
if we ran out of disk space this may be lower
than the desired size */
- ulint space_id, /* in: space id */
- ulint size_after_extend)/* in: desired size in pages after the
+ ulint space_id, /*!< in: space id */
+ ulint size_after_extend)/*!< in: desired size in pages after the
extension; if the current space size is bigger
than this already, the function does nothing */
{
- fil_system_t* system = fil_system;
fil_node_t* node;
fil_space_t* space;
byte* buf2;
@@ -3583,12 +3817,12 @@ fil_extend_space_to_desired_size(
ulint file_start_page_no;
ulint offset_high;
ulint offset_low;
+ ulint page_size;
ibool success = TRUE;
fil_mutex_enter_and_prepare_for_io(space_id);
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
+ space = fil_space_get_by_id(space_id);
ut_a(space);
if (space->size >= size_after_extend) {
@@ -3596,44 +3830,48 @@ fil_extend_space_to_desired_size(
*actual_size = space->size;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(TRUE);
}
+ page_size = dict_table_flags_to_zip_size(space->flags);
+ if (!page_size) {
+ page_size = UNIV_PAGE_SIZE;
+ }
+
node = UT_LIST_GET_LAST(space->chain);
- fil_node_prepare_for_io(node, system, space);
+ fil_node_prepare_for_io(node, fil_system, space);
start_page_no = space->size;
file_start_page_no = space->size - node->size;
/* Extend at most 64 pages at a time */
- buf_size = ut_min(64, size_after_extend - start_page_no)
- * UNIV_PAGE_SIZE;
- buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE);
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
+ buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
+ buf2 = mem_alloc(buf_size + page_size);
+ buf = ut_align(buf2, page_size);
memset(buf, 0, buf_size);
while (start_page_no < size_after_extend) {
- ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE,
+ ulint n_pages = ut_min(buf_size / page_size,
size_after_extend - start_page_no);
offset_high = (start_page_no - file_start_page_no)
- / (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE));
+ / (4096 * ((1024 * 1024) / page_size));
offset_low = ((start_page_no - file_start_page_no)
- % (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)))
- * UNIV_PAGE_SIZE;
+ % (4096 * ((1024 * 1024) / page_size)))
+ * page_size;
#ifdef UNIV_HOTBACKUP
success = os_file_write(node->name, node->handle, buf,
offset_low, offset_high,
- UNIV_PAGE_SIZE * n_pages);
+ page_size * n_pages);
#else
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
node->name, node->handle, buf,
offset_low, offset_high,
- UNIV_PAGE_SIZE * n_pages,
+ page_size * n_pages,
NULL, NULL);
#endif
if (success) {
@@ -3646,9 +3884,9 @@ fil_extend_space_to_desired_size(
how much we were able to extend it */
n_pages = ((ulint)
- (os_file_get_size_as_iblonglong
- (node->handle)
- / UNIV_PAGE_SIZE)) - node->size;
+ (os_file_get_size_as_iblonglong(
+ node->handle)
+ / page_size)) - node->size;
node->size += n_pages;
space->size += n_pages;
@@ -3661,13 +3899,13 @@ fil_extend_space_to_desired_size(
mem_free(buf2);
- fil_node_complete_io(node, system, OS_FILE_WRITE);
+ fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
*actual_size = space->size;
#ifndef UNIV_HOTBACKUP
if (space_id == 0) {
- ulint pages_per_mb = (1024 * 1024) / UNIV_PAGE_SIZE;
+ ulint pages_per_mb = (1024 * 1024) / page_size;
/* Keep the last data file size info up to date, rounded to
full megabytes */
@@ -3680,7 +3918,7 @@ fil_extend_space_to_desired_size(
/*
printf("Extended %s to %lu, actual size %lu pages\n", space->name,
size_after_extend, *actual_size); */
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
fil_flush(space_id);
@@ -3688,17 +3926,16 @@ fil_extend_space_to_desired_size(
}
#ifdef UNIV_HOTBACKUP
-/************************************************************************
+/********************************************************************//**
Extends all tablespaces to the size stored in the space header. During the
ibbackup --apply-log phase we extended the spaces on-demand so that log records
could be applied, but that may have left spaces still too small compared to
the size stored in the space header. */
-
+UNIV_INTERN
void
fil_extend_tablespaces_to_stored_len(void)
/*======================================*/
{
- fil_system_t* system = fil_system;
fil_space_t* space;
byte* buf;
ulint actual_size;
@@ -3708,18 +3945,19 @@ fil_extend_tablespaces_to_stored_len(void)
buf = mem_alloc(UNIV_PAGE_SIZE);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- space = UT_LIST_GET_FIRST(system->space_list);
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
while (space) {
ut_a(space->purpose == FIL_TABLESPACE);
- mutex_exit(&(system->mutex)); /* no need to protect with a
+ mutex_exit(&fil_system->mutex); /* no need to protect with a
mutex, because this is a
single-threaded operation */
- error = fil_read(TRUE, space->id, 0, 0, UNIV_PAGE_SIZE, buf,
- NULL);
+ error = fil_read(TRUE, space->id,
+ dict_table_flags_to_zip_size(space->flags),
+ 0, 0, UNIV_PAGE_SIZE, buf, NULL);
ut_a(error == DB_SUCCESS);
size_in_header = fsp_get_size_low(buf);
@@ -3739,12 +3977,12 @@ fil_extend_tablespaces_to_stored_len(void)
exit(1);
}
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
space = UT_LIST_GET_NEXT(space_list, space);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
mem_free(buf);
}
@@ -3752,26 +3990,25 @@ fil_extend_tablespaces_to_stored_len(void)
/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
-/***********************************************************************
-Tries to reserve free extents in a file space. */
-
+/*******************************************************************//**
+Tries to reserve free extents in a file space.
+@return TRUE if succeed */
+UNIV_INTERN
ibool
fil_space_reserve_free_extents(
/*===========================*/
- /* out: TRUE if succeed */
- ulint id, /* in: space id */
- ulint n_free_now, /* in: number of free extents now */
- ulint n_to_reserve) /* in: how many one wants to reserve */
+ ulint id, /*!< in: space id */
+ ulint n_free_now, /*!< in: number of free extents now */
+ ulint n_to_reserve) /*!< in: how many one wants to reserve */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
ibool success;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
ut_a(space);
@@ -3782,68 +4019,66 @@ fil_space_reserve_free_extents(
success = TRUE;
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(success);
}
-/***********************************************************************
+/*******************************************************************//**
Releases free extents in a file space. */
-
+UNIV_INTERN
void
fil_space_release_free_extents(
/*===========================*/
- ulint id, /* in: space id */
- ulint n_reserved) /* in: how many one reserved */
+ ulint id, /*!< in: space id */
+ ulint n_reserved) /*!< in: how many one reserved */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
ut_a(space);
ut_a(space->n_reserved_extents >= n_reserved);
space->n_reserved_extents -= n_reserved;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
}
-/***********************************************************************
+/*******************************************************************//**
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
-
+UNIV_INTERN
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
- ulint id) /* in: space id */
+ ulint id) /*!< in: space id */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
ulint n;
- ut_ad(system);
+ ut_ad(fil_system);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+ space = fil_space_get_by_id(id);
ut_a(space);
n = space->n_reserved_extents;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(n);
}
/*============================ FILE I/O ================================*/
-/************************************************************************
+/********************************************************************//**
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
Prepares a file node for i/o. Opens the file if it is closed. Updates the
@@ -3854,9 +4089,9 @@ static
void
fil_node_prepare_for_io(
/*====================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- fil_space_t* space) /* in: space */
+ fil_node_t* node, /*!< in: file node */
+ fil_system_t* system, /*!< in: tablespace memory cache */
+ fil_space_t* space) /*!< in: space */
{
ut_ad(node && system && space);
ut_ad(mutex_own(&(system->mutex)));
@@ -3889,16 +4124,16 @@ fil_node_prepare_for_io(
node->n_pending++;
}
-/************************************************************************
+/********************************************************************//**
Updates the data structures when an i/o operation finishes. Updates the
pending i/o's field in the node appropriately. */
static
void
fil_node_complete_io(
/*=================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- ulint type) /* in: OS_FILE_WRITE or OS_FILE_READ; marks
+ fil_node_t* node, /*!< in: file node */
+ fil_system_t* system, /*!< in: tablespace memory cache */
+ ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
the node as modified if
type == OS_FILE_WRITE */
{
@@ -3930,18 +4165,18 @@ fil_node_complete_io(
}
}
-/************************************************************************
+/********************************************************************//**
Report information about an invalid page access. */
static
void
fil_report_invalid_page_access(
/*===========================*/
- ulint block_offset, /* in: block offset */
- ulint space_id, /* in: space id */
- const char* space_name, /* in: space name */
- ulint byte_offset, /* in: byte offset */
- ulint len, /* in: I/O length */
- ulint type) /* in: I/O type */
+ ulint block_offset, /*!< in: block offset */
+ ulint space_id, /*!< in: space id */
+ const char* space_name, /*!< in: space name */
+ ulint byte_offset, /*!< in: byte offset */
+ ulint len, /*!< in: I/O length */
+ ulint type) /*!< in: I/O type */
{
fprintf(stderr,
"InnoDB: Error: trying to access page number %lu"
@@ -3958,16 +4193,15 @@ fil_report_invalid_page_access(
(ulong) byte_offset, (ulong) len, (ulong) type);
}
-/************************************************************************
-Reads or writes data. This operation is asynchronous (aio). */
-
+/********************************************************************//**
+Reads or writes data. This operation is asynchronous (aio).
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INTERN
ulint
fil_io(
/*===*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
ORed to OS_FILE_LOG, if a log i/o
and ORed to OS_AIO_SIMULATED_WAKE_LATER
if simulated aio and we want to post a
@@ -3976,22 +4210,23 @@ fil_io(
because i/os are not actually handled until
all have been posted: use with great
caution! */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in
+ ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ ulint space_id, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint block_offset, /*!< in: offset in number of blocks */
+ ulint byte_offset, /*!< in: remainder of offset in bytes; in
aio this must be divisible by the OS block
size */
- ulint len, /* in: how many bytes to read or write; this
+ ulint len, /*!< in: how many bytes to read or write; this
must not cross a file boundary; in aio this
must be a block size multiple */
- void* buf, /* in/out: buffer where to store read data
+ void* buf, /*!< in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
+ void* message) /*!< in: message for aio handler if non-sync
aio used, else ignored */
{
- fil_system_t* system = fil_system;
ulint mode;
fil_space_t* space;
fil_node_t* node;
@@ -4008,29 +4243,38 @@ fil_io(
type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
ut_ad(byte_offset < UNIV_PAGE_SIZE);
+ ut_ad(!zip_size || !byte_offset);
+ ut_ad(ut_is_2pow(zip_size));
ut_ad(buf);
ut_ad(len > 0);
- ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
+#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
+# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
+#endif
ut_ad(fil_validate());
-#ifndef UNIV_LOG_DEBUG
+#ifndef UNIV_HOTBACKUP
+# ifndef UNIV_LOG_DEBUG
/* ibuf bitmap pages must be read in the sync aio mode: */
ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
- || !ibuf_bitmap_page(block_offset) || sync || is_log);
-#ifdef UNIV_SYNC_DEBUG
+ || !ibuf_bitmap_page(zip_size, block_offset)
+ || sync || is_log);
ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
- || ibuf_page(space_id, block_offset));
-#endif
-#endif
+ || ibuf_page(space_id, zip_size, block_offset, NULL));
+# endif /* UNIV_LOG_DEBUG */
if (sync) {
mode = OS_AIO_SYNC;
- } else if (type == OS_FILE_READ && !is_log
- && ibuf_page(space_id, block_offset)) {
- mode = OS_AIO_IBUF;
} else if (is_log) {
mode = OS_AIO_LOG;
+ } else if (type == OS_FILE_READ
+ && !recv_no_ibuf_operations
+ && ibuf_page(space_id, zip_size, block_offset, NULL)) {
+ mode = OS_AIO_IBUF;
} else {
mode = OS_AIO_NORMAL;
}
+#else /* !UNIV_HOTBACKUP */
+ ut_a(sync);
+ mode = OS_AIO_SYNC;
+#endif /* !UNIV_HOTBACKUP */
if (type == OS_FILE_READ) {
srv_data_read+= len;
@@ -4043,10 +4287,10 @@ fil_io(
fil_mutex_enter_and_prepare_for_io(space_id);
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
+ space = fil_space_get_by_id(space_id);
+
if (!space) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -4065,7 +4309,7 @@ fil_io(
node = UT_LIST_GET_FIRST(space->chain);
for (;;) {
- if (node == NULL) {
+ if (UNIV_UNLIKELY(node == NULL)) {
fil_report_invalid_page_access(
block_offset, space_id, space->name,
byte_offset, len, type);
@@ -4090,12 +4334,12 @@ fil_io(
}
/* Open file if closed */
- fil_node_prepare_for_io(node, system, space);
+ fil_node_prepare_for_io(node, fil_system, space);
/* Check that at least the start offset is within the bounds of a
single-table tablespace */
- if (space->purpose == FIL_TABLESPACE && space->id != 0
- && node->size <= block_offset) {
+ if (UNIV_UNLIKELY(node->size <= block_offset)
+ && space->id != 0 && space->purpose == FIL_TABLESPACE) {
fil_report_invalid_page_access(
block_offset, space_id, space->name, byte_offset,
@@ -4104,17 +4348,35 @@ fil_io(
ut_error;
}
- /* Now we have made the changes in the data structures of system */
- mutex_exit(&(system->mutex));
+ /* Now we have made the changes in the data structures of fil_system */
+ mutex_exit(&fil_system->mutex);
/* Calculate the low 32 bits and the high 32 bits of the file offset */
- offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
- offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL)
- + byte_offset;
+ if (!zip_size) {
+ offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
+ offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT)
+ & 0xFFFFFFFFUL) + byte_offset;
- ut_a(node->size - block_offset
- >= (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE);
+ ut_a(node->size - block_offset
+ >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
+ / UNIV_PAGE_SIZE));
+ } else {
+ ulint zip_size_shift;
+ switch (zip_size) {
+ case 1024: zip_size_shift = 10; break;
+ case 2048: zip_size_shift = 11; break;
+ case 4096: zip_size_shift = 12; break;
+ case 8192: zip_size_shift = 13; break;
+ case 16384: zip_size_shift = 14; break;
+ default: ut_error;
+ }
+ offset_high = block_offset >> (32 - zip_size_shift);
+ offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL)
+ + byte_offset;
+ ut_a(node->size - block_offset
+ >= (len + (zip_size - 1)) / zip_size);
+ }
/* Do aio */
@@ -4141,11 +4403,11 @@ fil_io(
/* The i/o operation is already completed when we return from
os_aio: */
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- fil_node_complete_io(node, system, type);
+ fil_node_complete_io(node, fil_system, type);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
ut_ad(fil_validate());
}
@@ -4153,75 +4415,19 @@ fil_io(
return(DB_SUCCESS);
}
-/************************************************************************
-Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_read(
-/*=====*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to read; this must not
- cross a file boundary; in aio this must be a
- block size multiple */
- void* buf, /* in/out: buffer where to store data read;
- in aio this must be appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
- aio used, else ignored */
-{
- return(fil_io(OS_FILE_READ, sync, space_id, block_offset,
- byte_offset, len, buf, message));
-}
-
-/************************************************************************
-Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_write(
-/*======*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to write; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /* in: buffer from which to write; in aio
- this must be appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
- aio used, else ignored */
-{
- return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset,
- byte_offset, len, buf, message));
-}
-
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
into segments (see os0file.c for more info). The thread specifies which
segment it wants to wait for. */
-
+UNIV_INTERN
void
fil_aio_wait(
/*=========*/
- ulint segment) /* in: the number of the segment in the aio
+ ulint segment) /*!< in: the number of the segment in the aio
array to wait for */
{
- fil_system_t* system = fil_system;
ibool ret;
fil_node_t* fil_node;
void* message;
@@ -4234,8 +4440,6 @@ fil_aio_wait(
#ifdef WIN_ASYNC_IO
ret = os_aio_windows_handle(segment, 0, &fil_node,
&message, &type);
-#elif defined(POSIX_ASYNC_IO)
- ret = os_aio_posix_handle(segment, &fil_node, &message);
#else
ret = 0; /* Eliminate compiler warning */
ut_error;
@@ -4251,11 +4455,11 @@ fil_aio_wait(
srv_set_io_thread_op_info(segment, "complete io for fil node");
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
fil_node_complete_io(fil_node, fil_system, type);
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
ut_ad(fil_validate());
@@ -4265,7 +4469,7 @@ fil_aio_wait(
deadlocks in the i/o system. We keep tablespace 0 data files always
open, and use a special i/o thread to serve insert buffer requests. */
- if (buf_pool_is_block(message)) {
+ if (fil_node->space->purpose == FIL_TABLESPACE) {
srv_set_io_thread_op_info(segment, "complete io for buf page");
buf_page_io_complete(message);
} else {
@@ -4273,34 +4477,34 @@ fil_aio_wait(
log_io_complete(message);
}
}
+#endif /* UNIV_HOTBACKUP */
-/**************************************************************************
+/**********************************************************************//**
Flushes to disk possible writes cached by the OS. If the space does not exist
or is being dropped, does not do anything. */
-
+UNIV_INTERN
void
fil_flush(
/*======*/
- ulint space_id) /* in: file space id (this can be a group of
+ ulint space_id) /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
fil_node_t* node;
os_file_t file;
- ib_longlong old_mod_counter;
+ ib_int64_t old_mod_counter;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_id(space_id);
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
if (!space || space->is_being_deleted) {
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return;
}
- space->n_pending_flushes++; /* prevent dropping of the space while
+ space->n_pending_flushes++; /*!< prevent dropping of the space while
we are flushing */
node = UT_LIST_GET_FIRST(space->chain);
@@ -4331,11 +4535,11 @@ retry:
not know what bugs OS's may contain in file
i/o; sleep for a while */
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
os_thread_sleep(20000);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
if (node->flush_counter >= old_mod_counter) {
@@ -4349,14 +4553,14 @@ retry:
file = node->handle;
node->n_pending_flushes++;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
/* fprintf(stderr, "Flushing to file %s\n",
node->name); */
os_file_flush(file);
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
node->n_pending_flushes--;
skip_flush:
@@ -4370,7 +4574,7 @@ skip_flush:
UT_LIST_REMOVE(
unflushed_spaces,
- system->unflushed_spaces,
+ fil_system->unflushed_spaces,
space);
}
}
@@ -4387,42 +4591,41 @@ skip_flush:
space->n_pending_flushes--;
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
}
-/**************************************************************************
+/**********************************************************************//**
Flushes to disk the writes in file spaces of the given type possibly cached by
the OS. */
-
+UNIV_INTERN
void
fil_flush_file_spaces(
/*==================*/
- ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */
+ ulint purpose) /*!< in: FIL_TABLESPACE, FIL_LOG */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
ulint* space_ids;
ulint n_space_ids;
ulint i;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
- n_space_ids = UT_LIST_GET_LEN(system->unflushed_spaces);
+ n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
if (n_space_ids == 0) {
- mutex_exit(&system->mutex);
+ mutex_exit(&fil_system->mutex);
return;
}
/* Assemble a list of space ids to flush. Previously, we
- traversed system->unflushed_spaces and called UT_LIST_GET_NEXT()
+ traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
on a space that was just removed from the list by fil_flush().
Thus, the space could be dropped and the memory overwritten. */
space_ids = mem_alloc(n_space_ids * sizeof *space_ids);
n_space_ids = 0;
- for (space = UT_LIST_GET_FIRST(system->unflushed_spaces);
+ for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
space;
space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
@@ -4432,7 +4635,7 @@ fil_flush_file_spaces(
}
}
- mutex_exit(&system->mutex);
+ mutex_exit(&fil_system->mutex);
/* Flush the spaces. It will not hurt to call fil_flush() on
a non-existing space id. */
@@ -4444,30 +4647,31 @@ fil_flush_file_spaces(
mem_free(space_ids);
}
-/**********************************************************************
-Checks the consistency of the tablespace cache. */
-
+/******************************************************************//**
+Checks the consistency of the tablespace cache.
+@return TRUE if ok */
+UNIV_INTERN
ibool
fil_validate(void)
/*==============*/
- /* out: TRUE if ok */
{
- fil_system_t* system = fil_system;
fil_space_t* space;
fil_node_t* fil_node;
ulint n_open = 0;
ulint i;
- mutex_enter(&(system->mutex));
+ mutex_enter(&fil_system->mutex);
/* Look for spaces in the hash table */
- for (i = 0; i < hash_get_n_cells(system->spaces); i++) {
+ for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
- space = HASH_GET_FIRST(system->spaces, i);
+ space = HASH_GET_FIRST(fil_system->spaces, i);
while (space != NULL) {
- UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
+ UT_LIST_VALIDATE(chain, fil_node_t, space->chain,
+ ut_a(ut_list_node_313->open
+ || !ut_list_node_313->n_pending));
fil_node = UT_LIST_GET_FIRST(space->chain);
@@ -4485,11 +4689,11 @@ fil_validate(void)
}
}
- ut_a(system->n_open == n_open);
+ ut_a(fil_system->n_open == n_open);
- UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
+ UT_LIST_VALIDATE(LRU, fil_node_t, fil_system->LRU, (void) 0);
- fil_node = UT_LIST_GET_FIRST(system->LRU);
+ fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
while (fil_node != NULL) {
ut_a(fil_node->n_pending == 0);
@@ -4500,67 +4704,95 @@ fil_validate(void)
fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
}
- mutex_exit(&(system->mutex));
+ mutex_exit(&fil_system->mutex);
return(TRUE);
}
-/************************************************************************
-Returns TRUE if file address is undefined. */
+/********************************************************************//**
+Returns TRUE if file address is undefined.
+@return TRUE if undefined */
+UNIV_INTERN
ibool
fil_addr_is_null(
/*=============*/
- /* out: TRUE if undefined */
- fil_addr_t addr) /* in: address */
+ fil_addr_t addr) /*!< in: address */
{
- if (addr.page == FIL_NULL) {
-
- return(TRUE);
- }
-
- return(FALSE);
+ return(addr.page == FIL_NULL);
}
-/************************************************************************
-Accessor functions for a file page */
-
+/********************************************************************//**
+Get the predecessor of a file page.
+@return FIL_PAGE_PREV */
+UNIV_INTERN
ulint
-fil_page_get_prev(byte* page)
+fil_page_get_prev(
+/*==============*/
+ const byte* page) /*!< in: file page */
{
return(mach_read_from_4(page + FIL_PAGE_PREV));
}
+/********************************************************************//**
+Get the successor of a file page.
+@return FIL_PAGE_NEXT */
+UNIV_INTERN
ulint
-fil_page_get_next(byte* page)
+fil_page_get_next(
+/*==============*/
+ const byte* page) /*!< in: file page */
{
return(mach_read_from_4(page + FIL_PAGE_NEXT));
}
-/*************************************************************************
+/*********************************************************************//**
Sets the file page type. */
-
+UNIV_INTERN
void
fil_page_set_type(
/*==============*/
- byte* page, /* in: file page */
- ulint type) /* in: type */
+ byte* page, /*!< in/out: file page */
+ ulint type) /*!< in: type */
{
ut_ad(page);
mach_write_to_2(page + FIL_PAGE_TYPE, type);
}
-/*************************************************************************
-Gets the file page type. */
-
+/*********************************************************************//**
+Gets the file page type.
+@return type; NOTE that if the type has not been written to page, the
+return value not defined */
+UNIV_INTERN
ulint
fil_page_get_type(
/*==============*/
- /* out: type; NOTE that if the type has not been
- written to page, the return value not defined */
- byte* page) /* in: file page */
+ const byte* page) /*!< in: file page */
{
ut_ad(page);
return(mach_read_from_2(page + FIL_PAGE_TYPE));
}
+
+/********************************************************************
+Initializes the tablespace memory cache. */
+UNIV_INTERN
+void
+fil_close(void)
+/*===========*/
+{
+ /* The mutex should already have been freed. */
+ ut_ad(fil_system->mutex.magic_n == 0);
+
+ hash_table_free(fil_system->spaces);
+
+ hash_table_free(fil_system->name_hash);
+
+ ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
+ ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
+ ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
+
+ mem_free(fil_system);
+
+ fil_system = NULL;
+}
diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
index e1074933fe8..3cc4318fc06 100644
--- a/storage/innobase/fsp/fsp0fsp.c
+++ b/storage/innobase/fsp/fsp0fsp.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-File space management
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fsp/fsp0fsp.c
+File space management
Created 11/29/1995 Heikki Tuuri
***********************************************************************/
@@ -14,18 +31,23 @@ Created 11/29/1995 Heikki Tuuri
#include "buf0buf.h"
#include "fil0fil.h"
-#include "sync0sync.h"
#include "mtr0log.h"
-#include "fut0fut.h"
#include "ut0byte.h"
-#include "srv0srv.h"
-#include "page0types.h"
-#include "ibuf0ibuf.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "dict0boot.h"
+#include "page0page.h"
+#include "page0zip.h"
+#ifdef UNIV_HOTBACKUP
+# include "fut0lst.h"
+#else /* UNIV_HOTBACKUP */
+# include "sync0sync.h"
+# include "fut0fut.h"
+# include "srv0srv.h"
+# include "ibuf0ibuf.h"
+# include "btr0btr.h"
+# include "btr0sea.h"
+# include "dict0boot.h"
+# include "log0log.h"
+#endif /* UNIV_HOTBACKUP */
#include "dict0mem.h"
-#include "log0log.h"
#define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header
@@ -60,11 +82,7 @@ descriptor page, but used only in the first. */
about the first extent, but have not
physically allocted those pages to the
file */
-#define FSP_LOWEST_NO_WRITE 16 /* The lowest page offset for which
- the page has not been written to disk
- (if it has been written, we know that
- the OS has really reserved the
- physical space for the page) */
+#define FSP_SPACE_FLAGS 16 /* table->flags & ~DICT_TF_COMPACT */
#define FSP_FRAG_N_USED 20 /* number of used pages in the
FSP_FREE_FRAG list */
#define FSP_FREE 24 /* list of free extents */
@@ -139,8 +157,9 @@ typedef byte fseg_inode_t;
(16 + 3 * FLST_BASE_NODE_SIZE \
+ FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
-#define FSP_SEG_INODES_PER_PAGE \
- ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
+#define FSP_SEG_INODES_PER_PAGE(zip_size) \
+ (((zip_size ? zip_size : UNIV_PAGE_SIZE) \
+ - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
/* Number of segment inodes which fit on a
single page */
@@ -212,60 +231,73 @@ the extent are free and which contain old tuple version to clean. */
/* Offset of the descriptor array on a descriptor page */
#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/* Flag to indicate if we have printed the tablespace full error. */
+static ibool fsp_tbs_full_error_printed = FALSE;
+
+/**********************************************************************//**
Returns an extent to the free list of a space. */
static
void
fsp_free_extent(
/*============*/
- ulint space, /* in: space id */
- ulint page, /* in: page offset in the extent */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page, /*!< in: page offset in the extent */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
Frees an extent of a segment to the space free list. */
static
void
fseg_free_extent(
/*=============*/
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: page offset in the extent */
- mtr_t* mtr); /* in: mtr handle */
-/**************************************************************************
+ fseg_inode_t* seg_inode, /*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page, /*!< in: page offset in the extent */
+ mtr_t* mtr); /*!< in: mtr handle */
+/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how
-many pages are currently used. */
+many pages are currently used.
+@return number of reserved pages */
static
ulint
fseg_n_reserved_pages_low(
/*======================*/
- /* out: number of reserved pages */
- fseg_inode_t* header, /* in: segment inode */
- ulint* used, /* out: number of pages used (<= reserved) */
- mtr_t* mtr); /* in: mtr handle */
-/************************************************************************
+ fseg_inode_t* header, /*!< in: segment inode */
+ ulint* used, /*!< out: number of pages used (not
+ more than reserved) */
+ mtr_t* mtr); /*!< in: mtr handle */
+/********************************************************************//**
Marks a page used. The page must reside within the extents of the given
segment. */
static
void
fseg_mark_page_used(
/*================*/
- fseg_inode_t* seg_inode,/* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ fseg_inode_t* seg_inode,/*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page, /*!< in: page offset */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
Returns the first extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE. */
+-> FSEG_FREE.
+@return the first extent descriptor, or NULL if none */
static
xdes_t*
fseg_get_first_extent(
/*==================*/
- /* out: the first extent descriptor, or NULL if
- none */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ fseg_inode_t* inode, /*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
Puts new extents to the free list if
there are free extents above the free limit. If an extent happens
to contain an extent descriptor page, the extent is put to
@@ -274,87 +306,97 @@ static
void
fsp_fill_free_list(
/*===============*/
- ibool init_space, /* in: TRUE if this is a single-table
+ ibool init_space, /*!< in: TRUE if this is a single-table
tablespace and we are only initing
the tablespace's first extent
descriptor page and ibuf bitmap page;
then we do not allocate more extents */
- ulint space, /* in: space */
- fsp_header_t* header, /* in: space header */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ ulint space, /*!< in: space */
+ fsp_header_t* header, /*!< in: space header */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
+fragmentation.
+@return the allocated page number, FIL_NULL if no page could be allocated */
static
ulint
fseg_alloc_free_page_low(
/*=====================*/
- /* out: the allocated page number, FIL_NULL
- if no page could be allocated */
- ulint space, /* in: space */
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction, /* in: if the new page is needed because
+ ulint space, /*!< in: space */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ fseg_inode_t* seg_inode, /*!< in: segment inode */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ byte direction, /*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /* in: mtr handle */
-
-
-/**************************************************************************
-Reads the file space size stored in the header page. */
+ mtr_t* mtr); /*!< in: mtr handle */
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
+Reads the file space size stored in the header page.
+@return tablespace size stored in the space header */
+UNIV_INTERN
ulint
fsp_get_size_low(
/*=============*/
- /* out: tablespace size stored in the space header */
- page_t* page) /* in: header page (page 0 in the tablespace) */
+ page_t* page) /*!< in: header page (page 0 in the tablespace) */
{
return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
}
-/**************************************************************************
-Gets a pointer to the space header and x-locks its page. */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Gets a pointer to the space header and x-locks its page.
+@return pointer to the space header, page x-locked */
UNIV_INLINE
fsp_header_t*
fsp_get_space_header(
/*=================*/
- /* out: pointer to the space header, page x-locked */
- ulint id, /* in: space id */
- mtr_t* mtr) /* in: mtr */
+ ulint id, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* block;
fsp_header_t* header;
- ut_ad(mtr);
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(zip_size <= UNIV_PAGE_SIZE);
+ ut_ad(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
+ ut_ad(id || !zip_size);
+
+ block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
+ header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
- header = FSP_HEADER_OFFSET + buf_page_get(id, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
+ ut_ad(zip_size == dict_table_flags_to_zip_size(
+ mach_read_from_4(FSP_SPACE_FLAGS + header)));
return(header);
}
-/**************************************************************************
-Gets a descriptor bit of a page. */
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return TRUE if free */
UNIV_INLINE
ibool
xdes_get_bit(
/*=========*/
- /* out: TRUE if free */
- xdes_t* descr, /* in: descriptor */
- ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /* in: page offset within extent:
+ xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset, /*!< in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint index;
ulint byte_index;
ulint bit_index;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
ut_ad(offset < FSP_EXTENT_SIZE);
@@ -368,26 +410,25 @@ xdes_get_bit(
bit_index));
}
-/**************************************************************************
+/**********************************************************************//**
Sets a descriptor bit of a page. */
UNIV_INLINE
void
xdes_set_bit(
/*=========*/
- xdes_t* descr, /* in: descriptor */
- ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /* in: page offset within extent:
+ xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset, /*!< in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
- ibool val, /* in: bit value */
- mtr_t* mtr) /* in: mtr */
+ ibool val, /*!< in: bit value */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint index;
ulint byte_index;
ulint bit_index;
ulint descr_byte;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
ut_ad(offset < FSP_EXTENT_SIZE);
@@ -404,29 +445,27 @@ xdes_set_bit(
MLOG_1BYTE, mtr);
}
-/**************************************************************************
+/**********************************************************************//**
Looks for a descriptor bit having the desired value. Starts from hint
and scans upward; at the end of the extent the search is wrapped to
-the start of the extent. */
+the start of the extent.
+@return bit index of the bit, ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
xdes_find_bit(
/*==========*/
- /* out: bit index of the bit, ULINT_UNDEFINED if not
- found */
- xdes_t* descr, /* in: descriptor */
- ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ibool val, /* in: desired bit value */
- ulint hint, /* in: hint of which bit position would be desirable */
- mtr_t* mtr) /* in: mtr */
+ xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ibool val, /*!< in: desired bit value */
+ ulint hint, /*!< in: hint of which bit position would be desirable */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint i;
ut_ad(descr && mtr);
ut_ad(val <= TRUE);
ut_ad(hint < FSP_EXTENT_SIZE);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
for (i = hint; i < FSP_EXTENT_SIZE; i++) {
if (val == xdes_get_bit(descr, bit, i, mtr)) {
@@ -444,28 +483,26 @@ xdes_find_bit(
return(ULINT_UNDEFINED);
}
-/**************************************************************************
+/**********************************************************************//**
Looks for a descriptor bit having the desired value. Scans the extent in
-a direction opposite to xdes_find_bit. */
+a direction opposite to xdes_find_bit.
+@return bit index of the bit, ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
xdes_find_bit_downward(
/*===================*/
- /* out: bit index of the bit, ULINT_UNDEFINED if not
- found */
- xdes_t* descr, /* in: descriptor */
- ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ibool val, /* in: desired bit value */
- ulint hint, /* in: hint of which bit position would be desirable */
- mtr_t* mtr) /* in: mtr */
+ xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ibool val, /*!< in: desired bit value */
+ ulint hint, /*!< in: hint of which bit position would be desirable */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint i;
ut_ad(descr && mtr);
ut_ad(val <= TRUE);
ut_ad(hint < FSP_EXTENT_SIZE);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
for (i = hint + 1; i > 0; i--) {
if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
@@ -483,22 +520,21 @@ xdes_find_bit_downward(
return(ULINT_UNDEFINED);
}
-/**************************************************************************
-Returns the number of used pages in a descriptor. */
+/**********************************************************************//**
+Returns the number of used pages in a descriptor.
+@return number of pages used */
UNIV_INLINE
ulint
xdes_get_n_used(
/*============*/
- /* out: number of pages used */
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr */
+ xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint i;
ulint count = 0;
ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
for (i = 0; i < FSP_EXTENT_SIZE; i++) {
if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
count++;
@@ -508,15 +544,15 @@ xdes_get_n_used(
return(count);
}
-/**************************************************************************
-Returns true if extent contains no used pages. */
+/**********************************************************************//**
+Returns true if extent contains no used pages.
+@return TRUE if totally free */
UNIV_INLINE
ibool
xdes_is_free(
/*=========*/
- /* out: TRUE if totally free */
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr */
+ xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
if (0 == xdes_get_n_used(descr, mtr)) {
@@ -526,15 +562,15 @@ xdes_is_free(
return(FALSE);
}
-/**************************************************************************
-Returns true if extent contains no free pages. */
+/**********************************************************************//**
+Returns true if extent contains no free pages.
+@return TRUE if full */
UNIV_INLINE
ibool
xdes_is_full(
/*=========*/
- /* out: TRUE if full */
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr */
+ xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
@@ -544,56 +580,57 @@ xdes_is_full(
return(FALSE);
}
-/**************************************************************************
+/**********************************************************************//**
Sets the state of an xdes. */
UNIV_INLINE
void
xdes_set_state(
/*===========*/
- xdes_t* descr, /* in: descriptor */
- ulint state, /* in: state to set */
- mtr_t* mtr) /* in: mtr handle */
+ xdes_t* descr, /*!< in: descriptor */
+ ulint state, /*!< in: state to set */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ut_ad(descr && mtr);
ut_ad(state >= XDES_FREE);
ut_ad(state <= XDES_FSEG);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
}
-/**************************************************************************
-Gets the state of an xdes. */
+/**********************************************************************//**
+Gets the state of an xdes.
+@return state */
UNIV_INLINE
ulint
xdes_get_state(
/*===========*/
- /* out: state */
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr handle */
+ xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr handle */
{
+ ulint state;
+
ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- return(mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr));
+ state = mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr);
+ ut_ad(state - 1 < XDES_FSEG);
+ return(state);
}
-/**************************************************************************
+/**********************************************************************//**
Inits an extent descriptor to the free and clean state. */
UNIV_INLINE
void
xdes_init(
/*======*/
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr */
+ xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint i;
ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
@@ -603,68 +640,96 @@ xdes_init(
xdes_set_state(descr, XDES_FREE, mtr);
}
-/************************************************************************
-Calculates the page where the descriptor of a page resides. */
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return descriptor page offset */
UNIV_INLINE
ulint
xdes_calc_descriptor_page(
/*======================*/
- /* out: descriptor page offset */
- ulint offset) /* in: page offset */
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset) /*!< in: page offset */
{
-#if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \
- + (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE) * XDES_SIZE
-# error
-#endif
-
- return(ut_2pow_round(offset, XDES_DESCRIBED_PER_PAGE));
+#ifndef DOXYGEN /* Doxygen gets confused of these */
+# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \
+ + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
+# error
+# endif
+# if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET \
+ + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
+# error
+# endif
+#endif /* !DOXYGEN */
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (!zip_size) {
+ return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
+ } else {
+ ut_ad(zip_size > XDES_ARR_OFFSET
+ + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
+ return(ut_2pow_round(offset, zip_size));
+ }
}
-/************************************************************************
-Calculates the descriptor index within a descriptor page. */
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return descriptor index */
UNIV_INLINE
ulint
xdes_calc_descriptor_index(
/*=======================*/
- /* out: descriptor index */
- ulint offset) /* in: page offset */
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset) /*!< in: page offset */
{
- return(ut_2pow_remainder(offset, XDES_DESCRIBED_PER_PAGE)
- / FSP_EXTENT_SIZE);
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (!zip_size) {
+ return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
+ / FSP_EXTENT_SIZE);
+ } else {
+ return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
+ }
}
-/************************************************************************
+/********************************************************************//**
Gets pointer to a the extent descriptor of a page. The page where the extent
descriptor resides is x-locked. If the page offset is equal to the free limit
of the space, adds new extents from above the free limit to the space free
list, if not free limit == space size. This adding is necessary to make the
-descriptor defined, as they are uninitialized above the free limit. */
+descriptor defined, as they are uninitialized above the free limit.
+@return pointer to the extent descriptor, NULL if the page does not
+exist in the space or if the offset exceeds the free limit */
UNIV_INLINE
xdes_t*
xdes_get_descriptor_with_space_hdr(
/*===============================*/
- /* out: pointer to the extent descriptor,
- NULL if the page does not exist in the
- space or if offset > free limit */
- fsp_header_t* sp_header,/* in: space header, x-latched */
- ulint space, /* in: space id */
- ulint offset, /* in: page offset;
+ fsp_header_t* sp_header,/*!< in: space header, x-latched */
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: page offset;
if equal to the free limit,
we try to add new extents to
the space free list */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint limit;
ulint size;
+ ulint zip_size;
ulint descr_page_no;
page_t* descr_page;
ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+ ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
MTR_MEMO_X_LOCK));
+ ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
/* Read free limit and space size */
- limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
- size = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr);
+ limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
+ size = mach_read_from_4(sp_header + FSP_SIZE);
+ zip_size = dict_table_flags_to_zip_size(
+ mach_read_from_4(sp_header + FSP_SPACE_FLAGS));
/* If offset is >= size or > limit, return NULL */
@@ -679,174 +744,181 @@ xdes_get_descriptor_with_space_hdr(
fsp_fill_free_list(FALSE, space, sp_header, mtr);
}
- descr_page_no = xdes_calc_descriptor_page(offset);
+ descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
if (descr_page_no == 0) {
/* It is on the space header page */
- descr_page = buf_frame_align(sp_header);
+ descr_page = page_align(sp_header);
} else {
- descr_page = buf_page_get(space, descr_page_no, RW_X_LATCH,
- mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(descr_page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_t* block;
+
+ block = buf_page_get(space, zip_size, descr_page_no,
+ RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ descr_page = buf_block_get_frame(block);
}
return(descr_page + XDES_ARR_OFFSET
- + XDES_SIZE * xdes_calc_descriptor_index(offset));
+ + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset));
}
-/************************************************************************
+/********************************************************************//**
Gets pointer to a the extent descriptor of a page. The page where the
extent descriptor resides is x-locked. If the page offset is equal to
the free limit of the space, adds new extents from above the free limit
to the space free list, if not free limit == space size. This adding
is necessary to make the descriptor defined, as they are uninitialized
-above the free limit. */
+above the free limit.
+@return pointer to the extent descriptor, NULL if the page does not
+exist in the space or if the offset exceeds the free limit */
static
xdes_t*
xdes_get_descriptor(
/*================*/
- /* out: pointer to the extent descriptor, NULL if the
- page does not exist in the space or if offset > free
- limit */
- ulint space, /* in: space id */
- ulint offset, /* in: page offset; if equal to the free limit,
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint offset, /*!< in: page offset; if equal to the free limit,
we try to add new extents to the space free list */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr) /*!< in: mtr handle */
{
+ buf_block_t* block;
fsp_header_t* sp_header;
- sp_header = FSP_HEADER_OFFSET
- + buf_page_get(space, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(sp_header, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
mtr));
}
-/************************************************************************
+/********************************************************************//**
Gets pointer to a the extent descriptor if the file address
of the descriptor list node is known. The page where the
-extent descriptor resides is x-locked. */
+extent descriptor resides is x-locked.
+@return pointer to the extent descriptor */
UNIV_INLINE
xdes_t*
xdes_lst_get_descriptor(
/*====================*/
- /* out: pointer to the extent descriptor */
- ulint space, /* in: space id */
- fil_addr_t lst_node,/* in: file address of the list node
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ fil_addr_t lst_node,/*!< in: file address of the list node
contained in the descriptor */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr) /*!< in: mtr handle */
{
xdes_t* descr;
ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+ ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
MTR_MEMO_X_LOCK));
- descr = fut_get_ptr(space, lst_node, RW_X_LATCH, mtr) - XDES_FLST_NODE;
+ descr = fut_get_ptr(space, zip_size, lst_node, RW_X_LATCH, mtr)
+ - XDES_FLST_NODE;
return(descr);
}
-/************************************************************************
-Gets pointer to the next descriptor in a descriptor list and x-locks its
-page. */
-UNIV_INLINE
-xdes_t*
-xdes_lst_get_next(
-/*==============*/
- xdes_t* descr, /* in: pointer to a descriptor */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint space;
-
- ut_ad(mtr && descr);
-
- space = buf_frame_get_space_id(descr);
-
- return(xdes_lst_get_descriptor(
- space,
- flst_get_next_addr(descr + XDES_FLST_NODE, mtr), mtr));
-}
-
-/************************************************************************
-Returns page offset of the first page in extent described by a descriptor. */
+/********************************************************************//**
+Returns page offset of the first page in extent described by a descriptor.
+@return offset of the first page in extent */
UNIV_INLINE
ulint
xdes_get_offset(
/*============*/
- /* out: offset of the first page in extent */
- xdes_t* descr) /* in: extent descriptor */
+ xdes_t* descr) /*!< in: extent descriptor */
{
ut_ad(descr);
- return(buf_frame_get_page_no(descr)
- + ((descr - buf_frame_align(descr) - XDES_ARR_OFFSET)
- / XDES_SIZE)
+ return(page_get_page_no(page_align(descr))
+ + ((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE)
* FSP_EXTENT_SIZE);
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************
+/***********************************************************//**
Inits a file page whose prior contents should be ignored. */
static
void
fsp_init_file_page_low(
/*===================*/
- byte* ptr) /* in: pointer to a page */
+ buf_block_t* block) /*!< in: pointer to a page */
{
- page_t* page;
- page = buf_frame_align(ptr);
-
- buf_block_align(page)->check_index_page_at_flush = FALSE;
+ page_t* page = buf_block_get_frame(block);
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+
+#ifndef UNIV_HOTBACKUP
+ block->check_index_page_at_flush = FALSE;
+#endif /* !UNIV_HOTBACKUP */
+
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ memset(page, 0, UNIV_PAGE_SIZE);
+ memset(page_zip->data, 0, page_zip_get_size(page_zip));
+ mach_write_to_4(page + FIL_PAGE_OFFSET,
+ buf_block_get_page_no(block));
+ mach_write_to_4(page
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ buf_block_get_space(block));
+ memcpy(page_zip->data + FIL_PAGE_OFFSET,
+ page + FIL_PAGE_OFFSET, 4);
+ memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
+ return;
+ }
#ifdef UNIV_BASIC_LOG_DEBUG
memset(page, 0xff, UNIV_PAGE_SIZE);
#endif
- mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- ut_dulint_zero);
- mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
+ mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
+ memset(page + FIL_PAGE_LSN, 0, 8);
+ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ buf_block_get_space(block));
+ memset(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, 0, 8);
}
-/***************************************************************
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
Inits a file page whose prior contents should be ignored. */
static
void
fsp_init_file_page(
/*===============*/
- page_t* page, /* in: page */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in: pointer to a page */
+ mtr_t* mtr) /*!< in: mtr */
{
- fsp_init_file_page_low(page);
+ fsp_init_file_page_low(block);
- mlog_write_initial_log_record(page, MLOG_INIT_FILE_PAGE, mtr);
+ mlog_write_initial_log_record(buf_block_get_frame(block),
+ MLOG_INIT_FILE_PAGE, mtr);
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************
-Parses a redo log record of a file page init. */
-
+/***********************************************************//**
+Parses a redo log record of a file page init.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
fsp_parse_init_file_page(
/*=====================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- page_t* page) /* in: page or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
+ buf_block_t* block) /*!< in: block or NULL */
{
ut_ad(ptr && end_ptr);
- if (page) {
- fsp_init_file_page_low(page);
+ if (block) {
+ fsp_init_file_page_low(block);
}
return(ptr);
}
-/**************************************************************************
+/**********************************************************************//**
Initializes the fsp system. */
-
+UNIV_INTERN
void
fsp_init(void)
/*==========*/
@@ -854,46 +926,62 @@ fsp_init(void)
/* Does nothing at the moment */
}
-/**************************************************************************
-Writes the space id to a tablespace header. This function is used past the
-buffer pool when we in fil0fil.c create a new single-table tablespace. */
-
+/**********************************************************************//**
+Writes the space id and compressed page size to a tablespace header.
+This function is used past the buffer pool when we in fil0fil.c create
+a new single-table tablespace. */
+UNIV_INTERN
void
-fsp_header_write_space_id(
-/*======================*/
- page_t* page, /* in: first page in the space */
- ulint space_id) /* in: space id */
+fsp_header_init_fields(
+/*===================*/
+ page_t* page, /*!< in/out: first page in the space */
+ ulint space_id, /*!< in: space id */
+ ulint flags) /*!< in: tablespace flags (FSP_SPACE_FLAGS):
+ 0, or table->flags if newer than COMPACT */
{
- mach_write_to_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID, space_id);
+ /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
+ ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+ ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
+ format, the tablespace flags should equal table->flags. */
+ ut_a(flags != DICT_TF_COMPACT);
+
+ mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page,
+ space_id);
+ mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page,
+ flags);
}
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Initializes the space header of a new created space and creates also the
insert buffer tree root if space == 0. */
-
+UNIV_INTERN
void
fsp_header_init(
/*============*/
- ulint space, /* in: space id */
- ulint size, /* in: current size in blocks */
- mtr_t* mtr) /* in: mini-transaction handle */
+ ulint space, /*!< in: space id */
+ ulint size, /*!< in: current size in blocks */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
fsp_header_t* header;
+ buf_block_t* block;
page_t* page;
+ ulint flags;
+ ulint zip_size;
ut_ad(mtr);
- mtr_x_lock(fil_space_get_latch(space), mtr);
+ mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- page = buf_page_create(space, 0, mtr);
- buf_page_get(space, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ zip_size = dict_table_flags_to_zip_size(flags);
+ block = buf_page_create(space, 0, zip_size, mtr);
+ buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
/* The prior contents of the file page should be ignored */
- fsp_init_file_page(page, mtr);
+ fsp_init_file_page(block, mtr);
+ page = buf_block_get_frame(block);
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR,
MLOG_2BYTES, mtr);
@@ -905,7 +993,8 @@ fsp_header_init(
mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr);
+ mlog_write_ulint(header + FSP_SPACE_FLAGS, flags,
+ MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
flst_init(header + FSP_FREE, mtr);
@@ -917,21 +1006,23 @@ fsp_header_init(
mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1), mtr);
if (space == 0) {
fsp_fill_free_list(FALSE, space, header, mtr);
- btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
- ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr);
+ btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
+ 0, 0, ut_dulint_add(DICT_IBUF_ID_MIN, space),
+ dict_ind_redundant, mtr);
} else {
fsp_fill_free_list(TRUE, space, header, mtr);
}
}
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************************
-Reads the space id from the first page of a tablespace. */
-
+/**********************************************************************//**
+Reads the space id from the first page of a tablespace.
+@return space id, ULINT UNDEFINED if error */
+UNIV_INTERN
ulint
fsp_header_get_space_id(
/*====================*/
- /* out: space id, ULINT UNDEFINED if error */
- page_t* page) /* in: first page of a tablespace */
+ const page_t* page) /*!< in: first page of a tablespace */
{
ulint fsp_id;
ulint id;
@@ -952,24 +1043,56 @@ fsp_header_get_space_id(
return(id);
}
-/**************************************************************************
-Increases the space size field of a space. */
+/**********************************************************************//**
+Reads the space flags from the first page of a tablespace.
+@return flags */
+UNIV_INTERN
+ulint
+fsp_header_get_flags(
+/*=================*/
+ const page_t* page) /*!< in: first page of a tablespace */
+{
+ ut_ad(!page_offset(page));
+
+ return(mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page));
+}
+
+/**********************************************************************//**
+Reads the compressed page size from the first page of a tablespace.
+@return compressed page size in bytes, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_zip_size(
+/*====================*/
+ const page_t* page) /*!< in: first page of a tablespace */
+{
+ ulint flags = fsp_header_get_flags(page);
+
+ return(dict_table_flags_to_zip_size(flags));
+}
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Increases the space size field of a space. */
+UNIV_INTERN
void
fsp_header_inc_size(
/*================*/
- ulint space, /* in: space id */
- ulint size_inc,/* in: size increment in pages */
- mtr_t* mtr) /* in: mini-transaction handle */
+ ulint space, /*!< in: space id */
+ ulint size_inc,/*!< in: size increment in pages */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
fsp_header_t* header;
ulint size;
+ ulint flags;
ut_ad(mtr);
- mtr_x_lock(fil_space_get_latch(space), mtr);
+ mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- header = fsp_get_space_header(space, mtr);
+ header = fsp_get_space_header(space,
+ dict_table_flags_to_zip_size(flags),
+ mtr);
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
@@ -977,33 +1100,30 @@ fsp_header_inc_size(
mtr);
}
-/**************************************************************************
-Gets the current free limit of a tablespace. The free limit means the
-place of the first page which has never been put to the the free list
-for allocation. The space above that address is initialized to zero.
-Sets also the global variable log_fsp_current_free_limit. */
-
+/**********************************************************************//**
+Gets the current free limit of the system tablespace. The free limit
+means the place of the first page which has never been put to the
+free list for allocation. The space above that address is initialized
+to zero. Sets also the global variable log_fsp_current_free_limit.
+@return free limit in megabytes */
+UNIV_INTERN
ulint
-fsp_header_get_free_limit(
-/*======================*/
- /* out: free limit in megabytes */
- ulint space) /* in: space id, must be 0 */
+fsp_header_get_free_limit(void)
+/*===========================*/
{
fsp_header_t* header;
ulint limit;
mtr_t mtr;
- ut_a(space == 0); /* We have only one log_fsp_current_... variable */
-
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(0, 0, &mtr);
limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, &mtr);
- limit = limit / ((1024 * 1024) / UNIV_PAGE_SIZE);
+ limit /= ((1024 * 1024) / UNIV_PAGE_SIZE);
log_fsp_current_free_limit_set_and_checkpoint(limit);
@@ -1012,28 +1132,26 @@ fsp_header_get_free_limit(
return(limit);
}
-/**************************************************************************
-Gets the size of the tablespace from the tablespace header. If we do not
-have an auto-extending data file, this should be equal to the size of the
-data files. If there is an auto-extending data file, this can be smaller. */
-
+/**********************************************************************//**
+Gets the size of the system tablespace from the tablespace header. If
+we do not have an auto-extending data file, this should be equal to
+the size of the data files. If there is an auto-extending data file,
+this can be smaller.
+@return size in pages */
+UNIV_INTERN
ulint
-fsp_header_get_tablespace_size(
-/*===========================*/
- /* out: size in pages */
- ulint space) /* in: space id, must be 0 */
+fsp_header_get_tablespace_size(void)
+/*================================*/
{
fsp_header_t* header;
ulint size;
mtr_t mtr;
- ut_a(space == 0); /* We have only one log_fsp_current_... variable */
-
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(0, 0, &mtr);
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
@@ -1042,18 +1160,18 @@ fsp_header_get_tablespace_size(
return(size);
}
-/***************************************************************************
+/***********************************************************************//**
Tries to extend a single-table tablespace so that a page would fit in the
-data file. */
+data file.
+@return TRUE if success */
static
ibool
fsp_try_extend_data_file_with_pages(
/*================================*/
- /* out: TRUE if success */
- ulint space, /* in: space */
- ulint page_no, /* in: page number */
- fsp_header_t* header, /* in: space header */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space */
+ ulint page_no, /*!< in: page number */
+ fsp_header_t* header, /*!< in: space header */
+ mtr_t* mtr) /*!< in: mtr */
{
ibool success;
ulint actual_size;
@@ -1075,23 +1193,24 @@ fsp_try_extend_data_file_with_pages(
return(success);
}
-/***************************************************************************
-Tries to extend the last data file of a tablespace if it is auto-extending. */
+/***********************************************************************//**
+Tries to extend the last data file of a tablespace if it is auto-extending.
+@return FALSE if not auto-extending */
static
ibool
fsp_try_extend_data_file(
/*=====================*/
- /* out: FALSE if not auto-extending */
- ulint* actual_increase,/* out: actual increase in pages, where
+ ulint* actual_increase,/*!< out: actual increase in pages, where
we measure the tablespace size from
what the header field says; it may be
the actual file size rounded down to
megabyte */
- ulint space, /* in: space */
- fsp_header_t* header, /* in: space header */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space */
+ fsp_header_t* header, /*!< in: space header */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint size;
+ ulint zip_size;
ulint new_size;
ulint old_size;
ulint size_increase;
@@ -1102,65 +1221,86 @@ fsp_try_extend_data_file(
if (space == 0 && !srv_auto_extend_last_data_file) {
+ /* We print the error message only once to avoid
+ spamming the error log. Note that we don't need
+ to reset the flag to FALSE as dealing with this
+ error requires server restart. */
+ if (fsp_tbs_full_error_printed == FALSE) {
+ fprintf(stderr,
+ "InnoDB: Error: Data file(s) ran"
+ " out of space.\n"
+ "Please add another data file or"
+ " use \'autoextend\' for the last"
+ " data file.\n");
+ fsp_tbs_full_error_printed = TRUE;
+ }
return(FALSE);
}
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ zip_size = dict_table_flags_to_zip_size(
+ mach_read_from_4(header + FSP_SPACE_FLAGS));
old_size = size;
- if (space == 0 && srv_last_file_size_max != 0) {
- if (srv_last_file_size_max
- < srv_data_file_sizes[srv_n_data_files - 1]) {
+ if (space == 0) {
+ if (!srv_last_file_size_max) {
+ size_increase = SRV_AUTO_EXTEND_INCREMENT;
+ } else {
+ if (srv_last_file_size_max
+ < srv_data_file_sizes[srv_n_data_files - 1]) {
- fprintf(stderr,
- "InnoDB: Error: Last data file size is %lu,"
- " max size allowed %lu\n",
- (ulong) srv_data_file_sizes[
- srv_n_data_files - 1],
- (ulong) srv_last_file_size_max);
- }
+ fprintf(stderr,
+ "InnoDB: Error: Last data file size"
+ " is %lu, max size allowed %lu\n",
+ (ulong) srv_data_file_sizes[
+ srv_n_data_files - 1],
+ (ulong) srv_last_file_size_max);
+ }
- size_increase = srv_last_file_size_max
- - srv_data_file_sizes[srv_n_data_files - 1];
- if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
+ size_increase = srv_last_file_size_max
+ - srv_data_file_sizes[srv_n_data_files - 1];
+ if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
+ size_increase = SRV_AUTO_EXTEND_INCREMENT;
+ }
}
} else {
- if (space == 0) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
+ /* We extend single-table tablespaces first one extent
+ at a time, but for bigger tablespaces more. It is not
+ enough to extend always by one extent, because some
+ extents are frag page extents. */
+ ulint extent_size; /*!< one megabyte, in pages */
+
+ if (!zip_size) {
+ extent_size = FSP_EXTENT_SIZE;
} else {
- /* We extend single-table tablespaces first one extent
- at a time, but for bigger tablespaces more. It is not
- enough to extend always by one extent, because some
- extents are frag page extents. */
-
- if (size < FSP_EXTENT_SIZE) {
- /* Let us first extend the file to 64 pages */
- success = fsp_try_extend_data_file_with_pages(
- space, FSP_EXTENT_SIZE - 1,
- header, mtr);
- if (!success) {
- new_size = mtr_read_ulint(
- header + FSP_SIZE,
- MLOG_4BYTES, mtr);
-
- *actual_increase = new_size - old_size;
-
- return(FALSE);
- }
-
- size = FSP_EXTENT_SIZE;
- }
+ extent_size = FSP_EXTENT_SIZE
+ * UNIV_PAGE_SIZE / zip_size;
+ }
- if (size < 32 * FSP_EXTENT_SIZE) {
- size_increase = FSP_EXTENT_SIZE;
- } else {
- /* Below in fsp_fill_free_list() we assume
- that we add at most FSP_FREE_ADD extents at
- a time */
- size_increase = FSP_FREE_ADD * FSP_EXTENT_SIZE;
+ if (size < extent_size) {
+ /* Let us first extend the file to extent_size */
+ success = fsp_try_extend_data_file_with_pages(
+ space, extent_size - 1, header, mtr);
+ if (!success) {
+ new_size = mtr_read_ulint(header + FSP_SIZE,
+ MLOG_4BYTES, mtr);
+
+ *actual_increase = new_size - old_size;
+
+ return(FALSE);
}
+
+ size = extent_size;
+ }
+
+ if (size < 32 * extent_size) {
+ size_increase = extent_size;
+ } else {
+ /* Below in fsp_fill_free_list() we assume
+ that we add at most FSP_FREE_ADD extents at
+ a time */
+ size_increase = FSP_FREE_ADD * extent_size;
}
}
@@ -1174,18 +1314,21 @@ fsp_try_extend_data_file(
/* We ignore any fragments of a full megabyte when storing the size
to the space header */
- mlog_write_ulint(header + FSP_SIZE,
- ut_calc_align_down(actual_size,
- (1024 * 1024) / UNIV_PAGE_SIZE),
- MLOG_4BYTES, mtr);
- new_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+ if (!zip_size) {
+ new_size = ut_calc_align_down(actual_size,
+ (1024 * 1024) / UNIV_PAGE_SIZE);
+ } else {
+ new_size = ut_calc_align_down(actual_size,
+ (1024 * 1024) / zip_size);
+ }
+ mlog_write_ulint(header + FSP_SIZE, new_size, MLOG_4BYTES, mtr);
*actual_increase = new_size - old_size;
return(TRUE);
}
-/**************************************************************************
+/**********************************************************************//**
Puts new extents to the free list if there are free extents above the free
limit. If an extent happens to contain an extent descriptor page, the extent
is put to the FSP_FREE_FRAG list with the page marked as used. */
@@ -1193,32 +1336,38 @@ static
void
fsp_fill_free_list(
/*===============*/
- ibool init_space, /* in: TRUE if this is a single-table
+ ibool init_space, /*!< in: TRUE if this is a single-table
tablespace and we are only initing
the tablespace's first extent
descriptor page and ibuf bitmap page;
then we do not allocate more extents */
- ulint space, /* in: space */
- fsp_header_t* header, /* in: space header */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space */
+ fsp_header_t* header, /*!< in: space header */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint limit;
ulint size;
+ ulint zip_size;
xdes_t* descr;
ulint count = 0;
ulint frag_n_used;
- page_t* descr_page;
- page_t* ibuf_page;
ulint actual_increase;
ulint i;
mtr_t ibuf_mtr;
ut_ad(header && mtr);
+ ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
/* Check if we can fill free list from above the free list limit */
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
+ zip_size = dict_table_flags_to_zip_size(
+ mach_read_from_4(FSP_SPACE_FLAGS + header));
+ ut_a(ut_is_2pow(zip_size));
+ ut_a(zip_size <= UNIV_PAGE_SIZE);
+ ut_a(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
+
if (space == 0 && srv_auto_extend_last_data_file
&& size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
@@ -1240,32 +1389,44 @@ fsp_fill_free_list(
while ((init_space && i < 1)
|| ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
+ ibool init_xdes;
+ if (zip_size) {
+ init_xdes = ut_2pow_remainder(i, zip_size) == 0;
+ } else {
+ init_xdes = ut_2pow_remainder(i, UNIV_PAGE_SIZE) == 0;
+ }
+
mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
MLOG_4BYTES, mtr);
/* Update the free limit info in the log system and make
a checkpoint */
if (space == 0) {
+ ut_a(!zip_size);
log_fsp_current_free_limit_set_and_checkpoint(
(i + FSP_EXTENT_SIZE)
/ ((1024 * 1024) / UNIV_PAGE_SIZE));
}
- if (0 == i % XDES_DESCRIBED_PER_PAGE) {
+ if (UNIV_UNLIKELY(init_xdes)) {
+
+ buf_block_t* block;
/* We are going to initialize a new descriptor page
and a new ibuf bitmap page: the prior contents of the
pages should be ignored. */
if (i > 0) {
- descr_page = buf_page_create(space, i, mtr);
- buf_page_get(space, i, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(descr_page,
- SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
- fsp_init_file_page(descr_page, mtr);
- mlog_write_ulint(descr_page + FIL_PAGE_TYPE,
+ block = buf_page_create(
+ space, i, zip_size, mtr);
+ buf_page_get(space, zip_size, i,
+ RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block,
+ SYNC_FSP_PAGE);
+
+ fsp_init_file_page(block, mtr);
+ mlog_write_ulint(buf_block_get_frame(block)
+ + FIL_PAGE_TYPE,
FIL_PAGE_TYPE_XDES,
MLOG_2BYTES, mtr);
}
@@ -1277,17 +1438,17 @@ fsp_fill_free_list(
mtr_start(&ibuf_mtr);
- ibuf_page = buf_page_create(space,
+ block = buf_page_create(space,
i + FSP_IBUF_BITMAP_OFFSET,
- &ibuf_mtr);
- buf_page_get(space, i + FSP_IBUF_BITMAP_OFFSET,
+ zip_size, &ibuf_mtr);
+ buf_page_get(space, zip_size,
+ i + FSP_IBUF_BITMAP_OFFSET,
RW_X_LATCH, &ibuf_mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(ibuf_page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
- fsp_init_file_page(ibuf_page, &ibuf_mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ fsp_init_file_page(block, &ibuf_mtr);
- ibuf_bitmap_page_init(ibuf_page, &ibuf_mtr);
+ ibuf_bitmap_page_init(block, &ibuf_mtr);
mtr_commit(&ibuf_mtr);
}
@@ -1296,11 +1457,14 @@ fsp_fill_free_list(
mtr);
xdes_init(descr, mtr);
-#if XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE
-# error "XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE != 0"
+#if UNIV_PAGE_SIZE % FSP_EXTENT_SIZE
+# error "UNIV_PAGE_SIZE % FSP_EXTENT_SIZE != 0"
+#endif
+#if PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE
+# error "PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE != 0"
#endif
- if (0 == i % XDES_DESCRIBED_PER_PAGE) {
+ if (UNIV_UNLIKELY(init_xdes)) {
/* The first page in the extent is a descriptor page
and the second is an ibuf bitmap page: mark them
@@ -1327,19 +1491,20 @@ fsp_fill_free_list(
}
}
-/**************************************************************************
-Allocates a new free extent. */
+/**********************************************************************//**
+Allocates a new free extent.
+@return extent descriptor, NULL if cannot be allocated */
static
xdes_t*
fsp_alloc_free_extent(
/*==================*/
- /* out: extent descriptor, NULL if cannot be
- allocated */
- ulint space, /* in: space id */
- ulint hint, /* in: hint of which extent would be desirable: any
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint hint, /*!< in: hint of which extent would be desirable: any
page offset in the extent goes; the hint must not
be > FSP_FREE_LIMIT */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
fsp_header_t* header;
fil_addr_t first;
@@ -1347,7 +1512,7 @@ fsp_alloc_free_extent(
ut_ad(mtr);
- header = fsp_get_space_header(space, mtr);
+ header = fsp_get_space_header(space, zip_size, mtr);
descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
@@ -1368,7 +1533,7 @@ fsp_alloc_free_extent(
return(NULL); /* No free extents left */
}
- descr = xdes_lst_get_descriptor(space, first, mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
}
flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
@@ -1376,22 +1541,23 @@ fsp_alloc_free_extent(
return(descr);
}
-/**************************************************************************
-Allocates a single free page from a space. The page is marked as used. */
+/**********************************************************************//**
+Allocates a single free page from a space. The page is marked as used.
+@return the page offset, FIL_NULL if no page could be allocated */
static
ulint
fsp_alloc_free_page(
/*================*/
- /* out: the page offset, FIL_NULL if no page could
- be allocated */
- ulint space, /* in: space id */
- ulint hint, /* in: hint of which page would be desirable */
- mtr_t* mtr) /* in: mtr handle */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ mtr_t* mtr) /*!< in: mtr handle */
{
fsp_header_t* header;
fil_addr_t first;
xdes_t* descr;
- page_t* page;
+ buf_block_t* block;
ulint free;
ulint frag_n_used;
ulint page_no;
@@ -1400,7 +1566,7 @@ fsp_alloc_free_page(
ut_ad(mtr);
- header = fsp_get_space_header(space, mtr);
+ header = fsp_get_space_header(space, zip_size, mtr);
/* Get the hinted descriptor */
descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
@@ -1419,7 +1585,8 @@ fsp_alloc_free_page(
FREE_FRAG list. But we will allocate our page from the
the free extent anyway. */
- descr = fsp_alloc_free_extent(space, hint, mtr);
+ descr = fsp_alloc_free_extent(space, zip_size,
+ hint, mtr);
if (descr == NULL) {
/* No free space left */
@@ -1431,7 +1598,8 @@ fsp_alloc_free_page(
flst_add_last(header + FSP_FREE_FRAG,
descr + XDES_FLST_NODE, mtr);
} else {
- descr = xdes_lst_get_descriptor(space, first, mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size,
+ first, mtr);
}
/* Reset the hint */
@@ -1446,6 +1614,7 @@ fsp_alloc_free_page(
if (free == ULINT_UNDEFINED) {
ut_print_buf(stderr, ((byte*)descr) - 500, 1000);
+ putc('\n', stderr);
ut_error;
}
@@ -1502,28 +1671,28 @@ fsp_alloc_free_page(
be obtained immediately with buf_page_get without need for a disk
read. */
- buf_page_create(space, page_no, mtr);
+ buf_page_create(space, page_no, zip_size, mtr);
- page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
/* Prior contents of the page should be ignored */
- fsp_init_file_page(page, mtr);
+ fsp_init_file_page(block, mtr);
return(page_no);
}
-/**************************************************************************
+/**********************************************************************//**
Frees a single page of a space. The page is marked as free and clean. */
static
void
fsp_free_page(
/*==========*/
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr) /* in: mtr handle */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page, /*!< in: page offset */
+ mtr_t* mtr) /*!< in: mtr handle */
{
fsp_header_t* header;
xdes_t* descr;
@@ -1534,7 +1703,7 @@ fsp_free_page(
/* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
- header = fsp_get_space_header(space, mtr);
+ header = fsp_get_space_header(space, zip_size, mtr);
descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
@@ -1599,32 +1768,35 @@ fsp_free_page(
/* The extent has become free: move it to another list */
flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
mtr);
- fsp_free_extent(space, page, mtr);
+ fsp_free_extent(space, zip_size, page, mtr);
}
}
-/**************************************************************************
+/**********************************************************************//**
Returns an extent to the free list of a space. */
static
void
fsp_free_extent(
/*============*/
- ulint space, /* in: space id */
- ulint page, /* in: page offset in the extent */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page, /*!< in: page offset in the extent */
+ mtr_t* mtr) /*!< in: mtr */
{
fsp_header_t* header;
xdes_t* descr;
ut_ad(mtr);
- header = fsp_get_space_header(space, mtr);
+ header = fsp_get_space_header(space, zip_size, mtr);
descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
if (xdes_get_state(descr, mtr) == XDES_FREE) {
ut_print_buf(stderr, (byte*)descr - 500, 1000);
+ putc('\n', stderr);
ut_error;
}
@@ -1634,46 +1806,50 @@ fsp_free_extent(
flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
}
-/**************************************************************************
-Returns the nth inode slot on an inode page. */
+/**********************************************************************//**
+Returns the nth inode slot on an inode page.
+@return segment inode */
UNIV_INLINE
fseg_inode_t*
fsp_seg_inode_page_get_nth_inode(
/*=============================*/
- /* out: segment inode */
- page_t* page, /* in: segment inode page */
- ulint i, /* in: inode index on page */
- mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
+ page_t* page, /*!< in: segment inode page */
+ ulint i, /*!< in: inode index on page */
+ ulint zip_size __attribute__((unused)),
+ /*!< in: compressed page size, or 0 */
+ mtr_t* mtr __attribute__((unused)))
+ /*!< in: mini-transaction handle */
{
- ut_ad(i < FSP_SEG_INODES_PER_PAGE);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(i < FSP_SEG_INODES_PER_PAGE(zip_size));
+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
}
-/**************************************************************************
-Looks for a used segment inode on a segment inode page. */
+/**********************************************************************//**
+Looks for a used segment inode on a segment inode page.
+@return segment inode index, or ULINT_UNDEFINED if not found */
static
ulint
fsp_seg_inode_page_find_used(
/*=========================*/
- /* out: segment inode index, or ULINT_UNDEFINED
- if not found */
- page_t* page, /* in: segment inode page */
- mtr_t* mtr) /* in: mini-transaction handle */
+ page_t* page, /*!< in: segment inode page */
+ ulint zip_size,/*!< in: compressed page size, or 0 */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint i;
fseg_inode_t* inode;
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
+ for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
- inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+ inode = fsp_seg_inode_page_get_nth_inode(
+ page, i, zip_size, mtr);
- if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
- ut_dulint_zero) != 0) {
+ if (!ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))) {
/* This is used */
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
return(i);
}
}
@@ -1681,74 +1857,83 @@ fsp_seg_inode_page_find_used(
return(ULINT_UNDEFINED);
}
-/**************************************************************************
-Looks for an unused segment inode on a segment inode page. */
+/**********************************************************************//**
+Looks for an unused segment inode on a segment inode page.
+@return segment inode index, or ULINT_UNDEFINED if not found */
static
ulint
fsp_seg_inode_page_find_free(
/*=========================*/
- /* out: segment inode index, or ULINT_UNDEFINED
- if not found */
- page_t* page, /* in: segment inode page */
- ulint j, /* in: search forward starting from this index */
- mtr_t* mtr) /* in: mini-transaction handle */
+ page_t* page, /*!< in: segment inode page */
+ ulint i, /*!< in: search forward starting from this index */
+ ulint zip_size,/*!< in: compressed page size, or 0 */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
- ulint i;
fseg_inode_t* inode;
- for (i = j; i < FSP_SEG_INODES_PER_PAGE; i++) {
+ for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
- inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+ inode = fsp_seg_inode_page_get_nth_inode(
+ page, i, zip_size, mtr);
- if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
- ut_dulint_zero) == 0) {
+ if (ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))) {
/* This is unused */
return(i);
}
+
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
}
return(ULINT_UNDEFINED);
}
-/**************************************************************************
-Allocates a new file segment inode page. */
+/**********************************************************************//**
+Allocates a new file segment inode page.
+@return TRUE if could be allocated */
static
ibool
fsp_alloc_seg_inode_page(
/*=====================*/
- /* out: TRUE if could be allocated */
- fsp_header_t* space_header, /* in: space header */
- mtr_t* mtr) /* in: mini-transaction handle */
+ fsp_header_t* space_header, /*!< in: space header */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
fseg_inode_t* inode;
+ buf_block_t* block;
page_t* page;
ulint page_no;
ulint space;
+ ulint zip_size;
ulint i;
- space = buf_frame_get_space_id(space_header);
+ ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
+
+ space = page_get_space_id(page_align(space_header));
+ zip_size = dict_table_flags_to_zip_size(
+ mach_read_from_4(FSP_SPACE_FLAGS + space_header));
- page_no = fsp_alloc_free_page(space, 0, mtr);
+ page_no = fsp_alloc_free_page(space, zip_size, 0, mtr);
if (page_no == FIL_NULL) {
return(FALSE);
}
- page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+ block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ block->check_index_page_at_flush = FALSE;
- buf_block_align(page)->check_index_page_at_flush = FALSE;
+ page = buf_block_get_frame(block);
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
MLOG_2BYTES, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
+ for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
- inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
+ inode = fsp_seg_inode_page_get_nth_inode(page, i,
+ zip_size, mtr);
mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
}
@@ -1758,23 +1943,26 @@ fsp_alloc_seg_inode_page(
return(TRUE);
}
-/**************************************************************************
-Allocates a new file segment inode. */
+/**********************************************************************//**
+Allocates a new file segment inode.
+@return segment inode, or NULL if not enough space */
static
fseg_inode_t*
fsp_alloc_seg_inode(
/*================*/
- /* out: segment inode, or NULL if
- not enough space */
- fsp_header_t* space_header, /* in: space header */
- mtr_t* mtr) /* in: mini-transaction handle */
+ fsp_header_t* space_header, /*!< in: space header */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint page_no;
+ buf_block_t* block;
page_t* page;
fseg_inode_t* inode;
ibool success;
+ ulint zip_size;
ulint n;
+ ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
+
if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
/* Allocate a new segment inode page */
@@ -1788,20 +1976,22 @@ fsp_alloc_seg_inode(
page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;
- page = buf_page_get(buf_frame_get_space_id(space_header), page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ zip_size = dict_table_flags_to_zip_size(
+ mach_read_from_4(FSP_SPACE_FLAGS + space_header));
+ block = buf_page_get(page_get_space_id(page_align(space_header)),
+ zip_size, page_no, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ page = buf_block_get_frame(block);
- n = fsp_seg_inode_page_find_free(page, 0, mtr);
+ n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
ut_a(n != ULINT_UNDEFINED);
- inode = fsp_seg_inode_page_get_nth_inode(page, n, mtr);
+ inode = fsp_seg_inode_page_get_nth_inode(page, n, zip_size, mtr);
if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
- mtr)) {
+ zip_size, mtr)) {
/* There are no other unused headers left on the page: move it
to another list */
@@ -1812,29 +2002,34 @@ fsp_alloc_seg_inode(
page + FSEG_INODE_PAGE_NODE, mtr);
}
+ ut_ad(ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))
+ || mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
return(inode);
}
-/**************************************************************************
+/**********************************************************************//**
Frees a file segment inode. */
static
void
fsp_free_seg_inode(
/*===============*/
- ulint space, /* in: space id */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mini-transaction handle */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
page_t* page;
fsp_header_t* space_header;
- page = buf_frame_align(inode);
+ page = page_align(inode);
- space_header = fsp_get_space_header(space, mtr);
+ space_header = fsp_get_space_header(space, zip_size, mtr);
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
- if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, 0, mtr)) {
+ if (ULINT_UNDEFINED
+ == fsp_seg_inode_page_find_free(page, 0, zip_size, mtr)) {
/* Move the page to another list */
@@ -1846,92 +2041,121 @@ fsp_free_seg_inode(
}
mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
- mlog_write_ulint(inode + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr);
+ mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr);
- if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(page, mtr)) {
+ if (ULINT_UNDEFINED
+ == fsp_seg_inode_page_find_used(page, zip_size, mtr)) {
/* There are no other used headers left on the page: free it */
flst_remove(space_header + FSP_SEG_INODES_FREE,
page + FSEG_INODE_PAGE_NODE, mtr);
- fsp_free_page(space, buf_frame_get_page_no(page), mtr);
+ fsp_free_page(space, zip_size, page_get_page_no(page), mtr);
}
}
-/**************************************************************************
-Returns the file segment inode, page x-latched. */
+/**********************************************************************//**
+Returns the file segment inode, page x-latched.
+@return segment inode, page x-latched; NULL if the inode is free */
static
fseg_inode_t*
-fseg_inode_get(
-/*===========*/
- /* out: segment inode, page x-latched */
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr) /* in: mtr handle */
+fseg_inode_try_get(
+/*===============*/
+ fseg_header_t* header, /*!< in: segment header */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ mtr_t* mtr) /*!< in: mtr handle */
{
fil_addr_t inode_addr;
fseg_inode_t* inode;
inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
+ ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
- inode = fut_get_ptr(mach_read_from_4(header + FSEG_HDR_SPACE),
- inode_addr, RW_X_LATCH, mtr);
+ inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr);
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+ if (UNIV_UNLIKELY
+ (ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID)))) {
+
+ inode = NULL;
+ } else {
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
+ }
return(inode);
}
-/**************************************************************************
-Gets the page number from the nth fragment page slot. */
+/**********************************************************************//**
+Returns the file segment inode, page x-latched.
+@return segment inode, page x-latched */
+static
+fseg_inode_t*
+fseg_inode_get(
+/*===========*/
+ fseg_header_t* header, /*!< in: segment header */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ mtr_t* mtr) /*!< in: mtr handle */
+{
+ fseg_inode_t* inode
+ = fseg_inode_try_get(header, space, zip_size, mtr);
+ ut_a(inode);
+ return(inode);
+}
+
+/**********************************************************************//**
+Gets the page number from the nth fragment page slot.
+@return page number, FIL_NULL if not in use */
UNIV_INLINE
ulint
fseg_get_nth_frag_page_no(
/*======================*/
- /* out: page number, FIL_NULL if not in use */
- fseg_inode_t* inode, /* in: segment inode */
- ulint n, /* in: slot index */
- mtr_t* mtr __attribute__((unused))) /* in: mtr handle */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ ulint n, /*!< in: slot index */
+ mtr_t* mtr __attribute__((unused))) /*!< in: mtr handle */
{
ut_ad(inode && mtr);
ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
return(mach_read_from_4(inode + FSEG_FRAG_ARR
+ n * FSEG_FRAG_SLOT_SIZE));
}
-/**************************************************************************
+/**********************************************************************//**
Sets the page number in the nth fragment page slot. */
UNIV_INLINE
void
fseg_set_nth_frag_page_no(
/*======================*/
- fseg_inode_t* inode, /* in: segment inode */
- ulint n, /* in: slot index */
- ulint page_no,/* in: page number to set */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ ulint n, /*!< in: slot index */
+ ulint page_no,/*!< in: page number to set */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ut_ad(inode && mtr);
ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
page_no, MLOG_4BYTES, mtr);
}
-/**************************************************************************
-Finds a fragment page slot which is free. */
+/**********************************************************************//**
+Finds a fragment page slot which is free.
+@return slot index; ULINT_UNDEFINED if none found */
static
ulint
fseg_find_free_frag_page_slot(
/*==========================*/
- /* out: slot index; ULINT_UNDEFINED if none
- found */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint i;
ulint page_no;
@@ -1950,16 +2174,15 @@ fseg_find_free_frag_page_slot(
return(ULINT_UNDEFINED);
}
-/**************************************************************************
-Finds a fragment page slot which is used and last in the array. */
+/**********************************************************************//**
+Finds a fragment page slot which is used and last in the array.
+@return slot index; ULINT_UNDEFINED if none found */
static
ulint
fseg_find_last_used_frag_page_slot(
/*===============================*/
- /* out: slot index; ULINT_UNDEFINED if none
- found */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint i;
ulint page_no;
@@ -1979,15 +2202,15 @@ fseg_find_last_used_frag_page_slot(
return(ULINT_UNDEFINED);
}
-/**************************************************************************
-Calculates reserved fragment page slots. */
+/**********************************************************************//**
+Calculates reserved fragment page slots.
+@return number of fragment pages */
static
ulint
fseg_get_n_frag_pages(
/*==================*/
- /* out: number of fragment pages */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint i;
ulint count = 0;
@@ -2003,51 +2226,55 @@ fseg_get_n_frag_pages(
return(count);
}
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
fseg_create_general(
/*================*/
- /* out: the page where the segment header is placed,
- x-latched, NULL if could not create segment
- because of lack of space */
- ulint space, /* in: space id */
- ulint page, /* in: page where the segment header is placed: if
+ ulint space, /*!< in: space id */
+ ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
- ulint byte_offset, /* in: byte offset of the created segment header
+ ulint byte_offset, /*!< in: byte offset of the created segment header
on the page */
- ibool has_done_reservation, /* in: TRUE if the caller has already
+ ibool has_done_reservation, /*!< in: TRUE if the caller has already
done the reservation for the pages with
fsp_reserve_free_extents (at least 2 extents: one for
the inode and the other for the segment) then there is
no need to do the check for this individual
operation */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
+ ulint flags;
+ ulint zip_size;
fsp_header_t* space_header;
fseg_inode_t* inode;
dulint seg_id;
- fseg_header_t* header = 0; /* remove warning */
+ buf_block_t* block = 0; /* remove warning */
+ fseg_header_t* header = 0; /* remove warning */
rw_lock_t* latch;
ibool success;
ulint n_reserved;
- page_t* ret = NULL;
ulint i;
ut_ad(mtr);
+ ut_ad(byte_offset + FSEG_HEADER_SIZE
+ <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
+
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
if (page != 0) {
- header = byte_offset + buf_page_get(space, page, RW_X_LATCH,
- mtr);
+ block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
+ header = byte_offset + buf_block_get_frame(block);
}
ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
- latch = fil_space_get_latch(space);
+ || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
mtr_x_lock(latch, mtr);
@@ -2055,8 +2282,8 @@ fseg_create_general(
/* This thread did not own the latch before this call: free
excess pages from the insert buffer free list */
- if (space == 0) {
- ibuf_free_excess_pages(space);
+ if (space == IBUF_SPACE_ID) {
+ ibuf_free_excess_pages();
}
}
@@ -2068,7 +2295,7 @@ fseg_create_general(
}
}
- space_header = fsp_get_space_header(space, mtr);
+ space_header = fsp_get_space_header(space, zip_size, mtr);
inode = fsp_alloc_seg_inode(space_header, mtr);
@@ -2099,78 +2326,77 @@ fseg_create_general(
}
if (page == 0) {
- page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr);
+ page = fseg_alloc_free_page_low(space, zip_size,
+ inode, 0, FSP_UP, mtr);
if (page == FIL_NULL) {
- fsp_free_seg_inode(space, inode, mtr);
+ fsp_free_seg_inode(space, zip_size, inode, mtr);
goto funct_exit;
}
- header = byte_offset
- + buf_page_get(space, page, RW_X_LATCH, mtr);
+ block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
+ header = byte_offset + buf_block_get_frame(block);
mlog_write_ulint(header - byte_offset + FIL_PAGE_TYPE,
FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr);
}
mlog_write_ulint(header + FSEG_HDR_OFFSET,
- inode - buf_frame_align(inode), MLOG_2BYTES, mtr);
+ page_offset(inode), MLOG_2BYTES, mtr);
mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
- buf_frame_get_page_no(inode), MLOG_4BYTES, mtr);
+ page_get_page_no(page_align(inode)),
+ MLOG_4BYTES, mtr);
mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);
- ret = buf_frame_align(header);
-
funct_exit:
if (!has_done_reservation) {
fil_space_release_free_extents(space, n_reserved);
}
- return(ret);
+ return(block);
}
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
fseg_create(
/*========*/
- /* out: the page where the segment header is placed,
- x-latched, NULL if could not create segment
- because of lack of space */
- ulint space, /* in: space id */
- ulint page, /* in: page where the segment header is placed: if
+ ulint space, /*!< in: space id */
+ ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
- ulint byte_offset, /* in: byte offset of the created segment header
+ ulint byte_offset, /*!< in: byte offset of the created segment header
on the page */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
return(fseg_create_general(space, page, byte_offset, FALSE, mtr));
}
-/**************************************************************************
+/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
+currently used.
+@return number of reserved pages */
static
ulint
fseg_n_reserved_pages_low(
/*======================*/
- /* out: number of reserved pages */
- fseg_inode_t* inode, /* in: segment inode */
- ulint* used, /* out: number of pages used (<= reserved) */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ ulint* used, /*!< out: number of pages used (not
+ more than reserved) */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint ret;
ut_ad(inode && used && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
*used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
+ FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
@@ -2184,38 +2410,42 @@ fseg_n_reserved_pages_low(
return(ret);
}
-/**************************************************************************
+/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
-
+currently used.
+@return number of reserved pages */
+UNIV_INTERN
ulint
fseg_n_reserved_pages(
/*==================*/
- /* out: number of reserved pages */
- fseg_header_t* header, /* in: segment header */
- ulint* used, /* out: number of pages used (<= reserved) */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_header_t* header, /*!< in: segment header */
+ ulint* used, /*!< out: number of pages used (<= reserved) */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint ret;
fseg_inode_t* inode;
ulint space;
+ ulint flags;
+ ulint zip_size;
+ rw_lock_t* latch;
- space = buf_frame_get_space_id(header);
+ space = page_get_space_id(page_align(header));
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
+ || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
- mtr_x_lock(fil_space_get_latch(space), mtr);
+ mtr_x_lock(latch, mtr);
- inode = fseg_inode_get(header, mtr);
+ inode = fseg_inode_get(header, space, zip_size, mtr);
ret = fseg_n_reserved_pages_low(inode, used, mtr);
return(ret);
}
-/*************************************************************************
+/*********************************************************************//**
Tries to fill the free list of a segment with consecutive free extents.
This happens if the segment is big enough to allow extents in the free list,
the free list is empty, and the extents can be allocated consecutively from
@@ -2224,11 +2454,13 @@ static
void
fseg_fill_free_list(
/*================*/
- fseg_inode_t* inode, /* in: segment inode */
- ulint space, /* in: space id */
- ulint hint, /* in: hint which extent would be good as
+ fseg_inode_t* inode, /*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint hint, /*!< in: hint which extent would be good as
the first extent */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
xdes_t* descr;
ulint i;
@@ -2237,6 +2469,7 @@ fseg_fill_free_list(
ulint used;
ut_ad(inode && mtr);
+ ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
@@ -2254,7 +2487,7 @@ fseg_fill_free_list(
}
for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
- descr = xdes_get_descriptor(space, hint, mtr);
+ descr = xdes_get_descriptor(space, zip_size, hint, mtr);
if ((descr == NULL)
|| (XDES_FREE != xdes_get_state(descr, mtr))) {
@@ -2264,11 +2497,13 @@ fseg_fill_free_list(
return;
}
- descr = fsp_alloc_free_extent(space, hint, mtr);
+ descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
xdes_set_state(descr, XDES_FSEG, mtr);
seg_id = mtr_read_dulint(inode + FSEG_ID, mtr);
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
mlog_write_dulint(descr + XDES_ID, seg_id, mtr);
flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
@@ -2276,34 +2511,38 @@ fseg_fill_free_list(
}
}
-/*************************************************************************
+/*********************************************************************//**
Allocates a free extent for the segment: looks first in the free list of the
segment, then tries to allocate from the space free list. NOTE that the extent
-returned still resides in the segment free list, it is not yet taken off it! */
+returned still resides in the segment free list, it is not yet taken off it!
+@return allocated extent, still placed in the segment free list, NULL
+if could not be allocated */
static
xdes_t*
fseg_alloc_free_extent(
/*===================*/
- /* out: allocated extent, still placed in the
- segment free list, NULL if could
- not be allocated */
- fseg_inode_t* inode, /* in: segment inode */
- ulint space, /* in: space id */
- mtr_t* mtr) /* in: mtr */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ mtr_t* mtr) /*!< in: mtr */
{
xdes_t* descr;
dulint seg_id;
fil_addr_t first;
+ ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+
if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
/* Segment free list is not empty, allocate from it */
first = flst_get_first(inode + FSEG_FREE, mtr);
- descr = xdes_lst_get_descriptor(space, first, mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
} else {
/* Segment free list was empty, allocate from space */
- descr = fsp_alloc_free_extent(space, 0, mtr);
+ descr = fsp_alloc_free_extent(space, zip_size, 0, mtr);
if (descr == NULL) {
@@ -2317,7 +2556,7 @@ fseg_alloc_free_extent(
flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
/* Try to fill the segment free list */
- fseg_fill_free_list(inode, space,
+ fseg_fill_free_list(inode, space, zip_size,
xdes_get_offset(descr) + FSP_EXTENT_SIZE,
mtr);
}
@@ -2325,36 +2564,36 @@ fseg_alloc_free_extent(
return(descr);
}
-/**************************************************************************
+/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
+fragmentation.
+@return the allocated page number, FIL_NULL if no page could be allocated */
static
ulint
fseg_alloc_free_page_low(
/*=====================*/
- /* out: the allocated page number, FIL_NULL
- if no page could be allocated */
- ulint space, /* in: space */
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction, /* in: if the new page is needed because
+ ulint space, /*!< in: space */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ fseg_inode_t* seg_inode, /*!< in: segment inode */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ byte direction, /*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr) /*!< in: mtr handle */
{
fsp_header_t* space_header;
ulint space_size;
dulint seg_id;
ulint used;
ulint reserved;
- xdes_t* descr; /* extent of the hinted page */
- ulint ret_page; /* the allocated page offset, FIL_NULL
+ xdes_t* descr; /*!< extent of the hinted page */
+ ulint ret_page; /*!< the allocated page offset, FIL_NULL
if could not be allocated */
- xdes_t* ret_descr; /* the extent of the allocated page */
- page_t* page;
+ xdes_t* ret_descr; /*!< the extent of the allocated page */
ibool frag_page_allocated = FALSE;
ibool success;
ulint n;
@@ -2363,13 +2602,14 @@ fseg_alloc_free_page_low(
ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
+ ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
- ut_ad(ut_dulint_cmp(seg_id, ut_dulint_zero) > 0);
+ ut_ad(!ut_dulint_is_zero(seg_id));
reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
- space_header = fsp_get_space_header(space, mtr);
+ space_header = fsp_get_space_header(space, zip_size, mtr);
descr = xdes_get_descriptor_with_space_hdr(space_header, space,
hint, mtr);
@@ -2377,7 +2617,7 @@ fseg_alloc_free_page_low(
/* Hint outside space or too high above free limit: reset
hint */
hint = 0;
- descr = xdes_get_descriptor(space, hint, mtr);
+ descr = xdes_get_descriptor(space, zip_size, hint, mtr);
}
/* In the big if-else below we look for ret_page and ret_descr */
@@ -2401,7 +2641,7 @@ fseg_alloc_free_page_low(
=========================================================
the hinted page
===============*/
- ret_descr = fsp_alloc_free_extent(space, hint, mtr);
+ ret_descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
ut_a(ret_descr == descr);
@@ -2411,7 +2651,7 @@ fseg_alloc_free_page_low(
ret_descr + XDES_FLST_NODE, mtr);
/* Try to fill the segment free list */
- fseg_fill_free_list(seg_inode, space,
+ fseg_fill_free_list(seg_inode, space, zip_size,
hint + FSP_EXTENT_SIZE, mtr);
ret_page = hint;
/*-----------------------------------------------------------*/
@@ -2419,7 +2659,8 @@ fseg_alloc_free_page_low(
&& ((reserved - used) < reserved / FSEG_FILLFACTOR)
&& (used >= FSEG_FRAG_LIMIT)
&& (!!(ret_descr
- = fseg_alloc_free_extent(seg_inode, space, mtr)))) {
+ = fseg_alloc_free_extent(seg_inode,
+ space, zip_size, mtr)))) {
/* 3. We take any free extent (which was already assigned above
===============================================================
@@ -2464,7 +2705,8 @@ fseg_alloc_free_page_low(
return(FIL_NULL);
}
- ret_descr = xdes_lst_get_descriptor(space, first, mtr);
+ ret_descr = xdes_lst_get_descriptor(space, zip_size,
+ first, mtr);
ret_page = xdes_get_offset(ret_descr)
+ xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
0, mtr);
@@ -2472,7 +2714,7 @@ fseg_alloc_free_page_low(
} else if (used < FSEG_FRAG_LIMIT) {
/* 6. We allocate an individual page from the space
===================================================*/
- ret_page = fsp_alloc_free_page(space, hint, mtr);
+ ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr);
ret_descr = NULL;
frag_page_allocated = TRUE;
@@ -2490,7 +2732,8 @@ fseg_alloc_free_page_low(
} else {
/* 7. We allocate a new extent and take its first page
======================================================*/
- ret_descr = fseg_alloc_free_extent(seg_inode, space, mtr);
+ ret_descr = fseg_alloc_free_extent(seg_inode,
+ space, zip_size, mtr);
if (ret_descr == NULL) {
ret_page = FIL_NULL;
@@ -2536,27 +2779,32 @@ fseg_alloc_free_page_low(
/* Initialize the allocated page to buffer pool, so that it
can be obtained immediately with buf_page_get without need
for a disk read */
+ buf_block_t* block;
+ ulint zip_size = dict_table_flags_to_zip_size(
+ mach_read_from_4(FSP_SPACE_FLAGS + space_header));
- page = buf_page_create(space, ret_page, mtr);
-
- ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr));
+ block = buf_page_create(space, ret_page, zip_size, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size,
+ ret_page, RW_X_LATCH,
+ mtr))) {
+ ut_error;
+ }
/* The prior contents of the page should be ignored */
- fsp_init_file_page(page, mtr);
+ fsp_init_file_page(block, mtr);
/* At this point we know the extent and the page offset.
The extent is still in the appropriate list (FSEG_NOT_FULL
or FSEG_FREE), and the page is not yet marked as used. */
- ut_ad(xdes_get_descriptor(space, ret_page, mtr) == ret_descr);
+ ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr)
+ == ret_descr);
ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
- fseg_mark_page_used(seg_inode, space, ret_page, mtr);
+ fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr);
}
buf_reset_check_index_page_at_flush(space, ret_page);
@@ -2564,43 +2812,46 @@ fseg_alloc_free_page_low(
return(ret_page);
}
-/**************************************************************************
+/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
+fragmentation.
+@return allocated page offset, FIL_NULL if no page could be allocated */
+UNIV_INTERN
ulint
fseg_alloc_free_page_general(
/*=========================*/
- /* out: allocated page offset, FIL_NULL if no
- page could be allocated */
- fseg_header_t* seg_header,/* in: segment header */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction,/* in: if the new page is needed because
+ fseg_header_t* seg_header,/*!< in: segment header */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ byte direction,/*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- ibool has_done_reservation, /* in: TRUE if the caller has
+ ibool has_done_reservation, /*!< in: TRUE if the caller has
already done the reservation for the page
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr) /*!< in: mtr handle */
{
fseg_inode_t* inode;
ulint space;
+ ulint flags;
+ ulint zip_size;
rw_lock_t* latch;
ibool success;
ulint page_no;
ulint n_reserved;
- space = buf_frame_get_space_id(seg_header);
+ space = page_get_space_id(page_align(seg_header));
+
+ latch = fil_space_get_latch(space, &flags);
+
+ zip_size = dict_table_flags_to_zip_size(flags);
ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
- latch = fil_space_get_latch(space);
+ || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
mtr_x_lock(latch, mtr);
@@ -2608,12 +2859,12 @@ fseg_alloc_free_page_general(
/* This thread did not own the latch before this call: free
excess pages from the insert buffer free list */
- if (space == 0) {
- ibuf_free_excess_pages(space);
+ if (space == IBUF_SPACE_ID) {
+ ibuf_free_excess_pages();
}
}
- inode = fseg_inode_get(seg_header, mtr);
+ inode = fseg_inode_get(seg_header, space, zip_size, mtr);
if (!has_done_reservation) {
success = fsp_reserve_free_extents(&n_reserved, space, 2,
@@ -2623,7 +2874,7 @@ fseg_alloc_free_page_general(
}
}
- page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode),
+ page_no = fseg_alloc_free_page_low(space, zip_size,
inode, hint, direction, mtr);
if (!has_done_reservation) {
fil_space_release_free_extents(space, n_reserved);
@@ -2632,47 +2883,45 @@ fseg_alloc_free_page_general(
return(page_no);
}
-/**************************************************************************
+/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
+fragmentation.
+@return allocated page offset, FIL_NULL if no page could be allocated */
+UNIV_INTERN
ulint
fseg_alloc_free_page(
/*=================*/
- /* out: allocated page offset, FIL_NULL if no
- page could be allocated */
- fseg_header_t* seg_header,/* in: segment header */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction,/* in: if the new page is needed because
+ fseg_header_t* seg_header,/*!< in: segment header */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ byte direction,/*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /* in: mtr handle */
+ mtr_t* mtr) /*!< in: mtr handle */
{
return(fseg_alloc_free_page_general(seg_header, hint, direction,
FALSE, mtr));
}
-/**************************************************************************
+/**********************************************************************//**
Checks that we have at least 2 frag pages free in the first extent of a
single-table tablespace, and they are also physically initialized to the data
file. That is we have already extended the data file so that those pages are
inside the data file. If not, this function extends the tablespace with
-pages. */
+pages.
+@return TRUE if there were >= 3 free pages, or we were able to extend */
static
ibool
fsp_reserve_free_pages(
/*===================*/
- /* out: TRUE if there were >= 3 free
- pages, or we were able to extend */
- ulint space, /* in: space id, must be != 0 */
- fsp_header_t* space_header, /* in: header of that space,
+ ulint space, /*!< in: space id, must be != 0 */
+ fsp_header_t* space_header, /*!< in: header of that space,
x-latched */
- ulint size, /* in: size of the tablespace in pages,
+ ulint size, /*!< in: size of the tablespace in pages,
must be < FSP_EXTENT_SIZE / 2 */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
xdes_t* descr;
ulint n_used;
@@ -2695,7 +2944,7 @@ fsp_reserve_free_pages(
space_header, mtr));
}
-/**************************************************************************
+/**********************************************************************//**
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
@@ -2719,25 +2968,27 @@ Single-table tablespaces whose size is < 32 pages are a special case. In this
function we would liberally reserve several 64 page extents for every page
split or merge in a B-tree. But we do not want to waste disk space if the table
only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available. */
-
+case, just ensuring that there are 3 free pages available.
+@return TRUE if we were able to make the reservation */
+UNIV_INTERN
ibool
fsp_reserve_free_extents(
/*=====================*/
- /* out: TRUE if we were able to make the reservation */
- ulint* n_reserved,/* out: number of extents actually reserved; if we
+ ulint* n_reserved,/*!< out: number of extents actually reserved; if we
return TRUE and the tablespace size is < 64 pages,
then this can be 0, otherwise it is n_ext */
- ulint space, /* in: space id */
- ulint n_ext, /* in: number of extents to reserve */
- ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space id */
+ ulint n_ext, /*!< in: number of extents to reserve */
+ ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+ mtr_t* mtr) /*!< in: mtr */
{
fsp_header_t* space_header;
rw_lock_t* latch;
ulint n_free_list_ext;
ulint free_limit;
ulint size;
+ ulint flags;
+ ulint zip_size;
ulint n_free;
ulint n_free_up;
ulint reserve;
@@ -2745,16 +2996,17 @@ fsp_reserve_free_extents(
ulint n_pages_added;
ut_ad(mtr);
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
*n_reserved = n_ext;
- latch = fil_space_get_latch(space);
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
+
+ ut_ad(!mutex_own(&kernel_mutex)
+ || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
mtr_x_lock(latch, mtr);
- space_header = fsp_get_space_header(space, mtr);
+ space_header = fsp_get_space_header(space, zip_size, mtr);
try_again:
size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
@@ -2777,8 +3029,13 @@ try_again:
if (n_free_up > 0) {
n_free_up--;
- n_free_up = n_free_up - n_free_up
- / (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
+ if (!zip_size) {
+ n_free_up -= n_free_up
+ / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
+ } else {
+ n_free_up -= n_free_up
+ / (zip_size / FSP_EXTENT_SIZE);
+ }
}
n_free = n_free_list_ext + n_free_up;
@@ -2823,22 +3080,24 @@ try_to_extend:
return(FALSE);
}
-/**************************************************************************
+/**********************************************************************//**
This function should be used to get information on how much we still
will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents. */
-
+the safety margin required by the above function fsp_reserve_free_extents.
+@return available space in kB */
+UNIV_INTERN
ullint
fsp_get_available_space_in_free_extents(
/*====================================*/
- /* out: available space in kB */
- ulint space) /* in: space id */
+ ulint space) /*!< in: space id */
{
fsp_header_t* space_header;
ulint n_free_list_ext;
ulint free_limit;
ulint size;
+ ulint flags;
+ ulint zip_size;
ulint n_free;
ulint n_free_up;
ulint reserve;
@@ -2849,11 +3108,12 @@ fsp_get_available_space_in_free_extents(
mtr_start(&mtr);
- latch = fil_space_get_latch(space);
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
mtr_x_lock(latch, &mtr);
- space_header = fsp_get_space_header(space, &mtr);
+ space_header = fsp_get_space_header(space, zip_size, &mtr);
size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
@@ -2879,8 +3139,13 @@ fsp_get_available_space_in_free_extents(
if (n_free_up > 0) {
n_free_up--;
- n_free_up = n_free_up - n_free_up
- / (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
+ if (!zip_size) {
+ n_free_up -= n_free_up
+ / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
+ } else {
+ n_free_up -= n_free_up
+ / (zip_size / FSP_EXTENT_SIZE);
+ }
}
n_free = n_free_list_ext + n_free_up;
@@ -2895,29 +3160,40 @@ fsp_get_available_space_in_free_extents(
return(0);
}
- return((ullint)(n_free - reserve)
- * FSP_EXTENT_SIZE
- * (UNIV_PAGE_SIZE / 1024));
+ if (!zip_size) {
+ return((ullint) (n_free - reserve)
+ * FSP_EXTENT_SIZE
+ * (UNIV_PAGE_SIZE / 1024));
+ } else {
+ return((ullint) (n_free - reserve)
+ * FSP_EXTENT_SIZE
+ * (zip_size / 1024));
+ }
}
-/************************************************************************
+/********************************************************************//**
Marks a page used. The page must reside within the extents of the given
segment. */
static
void
fseg_mark_page_used(
/*================*/
- fseg_inode_t* seg_inode,/* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr) /* in: mtr */
+ fseg_inode_t* seg_inode,/*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page, /*!< in: page offset */
+ mtr_t* mtr) /*!< in: mtr */
{
xdes_t* descr;
ulint not_full_n_used;
ut_ad(seg_inode && mtr);
+ ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
- descr = xdes_get_descriptor(space, page, mtr);
+ descr = xdes_get_descriptor(space, zip_size, page, mtr);
ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr)
== mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
@@ -2955,16 +3231,18 @@ fseg_mark_page_used(
}
}
-/**************************************************************************
+/**********************************************************************//**
Frees a single page of a segment. */
static
void
fseg_free_page_low(
/*===============*/
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_inode_t* seg_inode, /*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page, /*!< in: page offset */
+ mtr_t* mtr) /*!< in: mtr handle */
{
xdes_t* descr;
ulint not_full_n_used;
@@ -2976,13 +3254,14 @@ fseg_free_page_low(
ut_ad(seg_inode && mtr);
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
+ ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
/* Drop search system page hash index if the page is found in
the pool and is hashed */
- btr_search_drop_page_hash_when_freed(space, page);
+ btr_search_drop_page_hash_when_freed(space, zip_size, page);
- descr = xdes_get_descriptor(space, page, mtr);
+ descr = xdes_get_descriptor(space, zip_size, page, mtr);
ut_a(descr);
if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
@@ -3001,8 +3280,7 @@ fseg_free_page_low(
"InnoDB: database!\n", (ulong) page);
crash:
fputs("InnoDB: Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
+ "InnoDB: " REFMAN "forcing-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
ut_error;
}
@@ -3022,7 +3300,7 @@ crash:
}
}
- fsp_free_page(space, page, mtr);
+ fsp_free_page(space, zip_size, page, mtr);
return;
}
@@ -3088,48 +3366,55 @@ crash:
/* The extent has become free: free it to space */
flst_remove(seg_inode + FSEG_NOT_FULL,
descr + XDES_FLST_NODE, mtr);
- fsp_free_extent(space, page, mtr);
+ fsp_free_extent(space, zip_size, page, mtr);
}
}
-/**************************************************************************
+/**********************************************************************//**
Frees a single page of a segment. */
-
+UNIV_INTERN
void
fseg_free_page(
/*===========*/
- fseg_header_t* seg_header, /* in: segment header */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_header_t* seg_header, /*!< in: segment header */
+ ulint space, /*!< in: space id */
+ ulint page, /*!< in: page offset */
+ mtr_t* mtr) /*!< in: mtr handle */
{
+ ulint flags;
+ ulint zip_size;
fseg_inode_t* seg_inode;
+ rw_lock_t* latch;
+
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
+ || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
- mtr_x_lock(fil_space_get_latch(space), mtr);
+ mtr_x_lock(latch, mtr);
- seg_inode = fseg_inode_get(seg_header, mtr);
+ seg_inode = fseg_inode_get(seg_header, space, zip_size, mtr);
- fseg_free_page_low(seg_inode, space, page, mtr);
+ fseg_free_page_low(seg_inode, space, zip_size, page, mtr);
#ifdef UNIV_DEBUG_FILE_ACCESSES
buf_page_set_file_page_was_freed(space, page);
#endif
}
-/**************************************************************************
+/**********************************************************************//**
Frees an extent of a segment to the space free list. */
static
void
fseg_free_extent(
/*=============*/
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: a page in the extent */
- mtr_t* mtr) /* in: mtr handle */
+ fseg_inode_t* seg_inode, /*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page, /*!< in: a page in the extent */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint first_page_in_extent;
xdes_t* descr;
@@ -3139,11 +3424,13 @@ fseg_free_extent(
ut_ad(seg_inode && mtr);
- descr = xdes_get_descriptor(space, page, mtr);
+ descr = xdes_get_descriptor(space, zip_size, page, mtr);
ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
ut_a(0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, mtr),
mtr_read_dulint(seg_inode + FSEG_ID, mtr)));
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
@@ -3154,7 +3441,7 @@ fseg_free_extent(
found in the pool and is hashed */
btr_search_drop_page_hash_when_freed(
- space, first_page_in_extent + i);
+ space, zip_size, first_page_in_extent + i);
}
}
@@ -3178,7 +3465,7 @@ fseg_free_extent(
MLOG_4BYTES, mtr);
}
- fsp_free_extent(space, page, mtr);
+ fsp_free_extent(space, zip_size, page, mtr);
#ifdef UNIV_DEBUG_FILE_ACCESSES
for (i = 0; i < FSP_EXTENT_SIZE; i++) {
@@ -3189,53 +3476,66 @@ fseg_free_extent(
#endif
}
-/**************************************************************************
+/**********************************************************************//**
Frees part of a segment. This function can be used to free a segment by
repeatedly calling this function in different mini-transactions. Doing
the freeing in a single mini-transaction might result in too big a
-mini-transaction. */
-
+mini-transaction.
+@return TRUE if freeing completed */
+UNIV_INTERN
ibool
fseg_free_step(
/*===========*/
- /* out: TRUE if freeing completed */
- fseg_header_t* header, /* in, own: segment header; NOTE: if the header
+ fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
resides on the first page of the frag list
of the segment, this pointer becomes obsolete
after the last freeing step */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint n;
ulint page;
xdes_t* descr;
fseg_inode_t* inode;
ulint space;
+ ulint flags;
+ ulint zip_size;
+ ulint header_page;
+ rw_lock_t* latch;
- space = buf_frame_get_space_id(header);
+ space = page_get_space_id(page_align(header));
+ header_page = page_get_page_no(page_align(header));
+
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
+ || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
- mtr_x_lock(fil_space_get_latch(space), mtr);
+ mtr_x_lock(latch, mtr);
- descr = xdes_get_descriptor(space, buf_frame_get_page_no(header), mtr);
+ descr = xdes_get_descriptor(space, zip_size, header_page, mtr);
/* Check that the header resides on a page which has not been
freed yet */
ut_a(descr);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header)
- % FSP_EXTENT_SIZE, mtr) == FALSE);
- inode = fseg_inode_get(header, mtr);
+ ut_a(xdes_get_bit(descr, XDES_FREE_BIT,
+ header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
+ inode = fseg_inode_try_get(header, space, zip_size, mtr);
+
+ if (UNIV_UNLIKELY(inode == NULL)) {
+ fprintf(stderr, "double free of inode from %u:%u\n",
+ (unsigned) space, (unsigned) header_page);
+ return(TRUE);
+ }
- descr = fseg_get_first_extent(inode, mtr);
+ descr = fseg_get_first_extent(inode, space, zip_size, mtr);
if (descr != NULL) {
/* Free the extent held by the segment */
page = xdes_get_offset(descr);
- fseg_free_extent(inode, space, page, mtr);
+ fseg_free_extent(inode, space, zip_size, page, mtr);
return(FALSE);
}
@@ -3245,19 +3545,19 @@ fseg_free_step(
if (n == ULINT_UNDEFINED) {
/* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, inode, mtr);
+ fsp_free_seg_inode(space, zip_size, inode, mtr);
return(TRUE);
}
- fseg_free_page_low(inode, space,
+ fseg_free_page_low(inode, space, zip_size,
fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
n = fseg_find_last_used_frag_page_slot(inode, mtr);
if (n == ULINT_UNDEFINED) {
/* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, inode, mtr);
+ fsp_free_seg_inode(space, zip_size, inode, mtr);
return(TRUE);
}
@@ -3265,43 +3565,47 @@ fseg_free_step(
return(FALSE);
}
-/**************************************************************************
+/**********************************************************************//**
Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed. */
-
+leaves the header page unfreed.
+@return TRUE if freeing completed, except the header page */
+UNIV_INTERN
ibool
fseg_free_step_not_header(
/*======================*/
- /* out: TRUE if freeing completed, except the
- header page */
- fseg_header_t* header, /* in: segment header which must reside on
+ fseg_header_t* header, /*!< in: segment header which must reside on
the first fragment page of the segment */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint n;
ulint page;
xdes_t* descr;
fseg_inode_t* inode;
ulint space;
+ ulint flags;
+ ulint zip_size;
ulint page_no;
+ rw_lock_t* latch;
- space = buf_frame_get_space_id(header);
+ space = page_get_space_id(page_align(header));
+
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
+ || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
- mtr_x_lock(fil_space_get_latch(space), mtr);
+ mtr_x_lock(latch, mtr);
- inode = fseg_inode_get(header, mtr);
+ inode = fseg_inode_get(header, space, zip_size, mtr);
- descr = fseg_get_first_extent(inode, mtr);
+ descr = fseg_get_first_extent(inode, space, zip_size, mtr);
if (descr != NULL) {
/* Free the extent held by the segment */
page = xdes_get_offset(descr);
- fseg_free_extent(inode, space, page, mtr);
+ fseg_free_extent(inode, space, zip_size, page, mtr);
return(FALSE);
}
@@ -3316,73 +3620,38 @@ fseg_free_step_not_header(
page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
- if (page_no == buf_frame_get_page_no(header)) {
+ if (page_no == page_get_page_no(page_align(header))) {
return(TRUE);
}
- fseg_free_page_low(inode, space, page_no, mtr);
+ fseg_free_page_low(inode, space, zip_size, page_no, mtr);
return(FALSE);
}
-/***********************************************************************
-Frees a segment. The freeing is performed in several mini-transactions,
-so that there is no danger of bufferfixing too many buffer pages. */
-
-void
-fseg_free(
-/*======*/
- ulint space, /* in: space id */
- ulint page_no,/* in: page number where the segment header is
- placed */
- ulint offset) /* in: byte offset of the segment header on that
- page */
-{
- mtr_t mtr;
- ibool finished;
- fseg_header_t* header;
- fil_addr_t addr;
-
- addr.page = page_no;
- addr.boffset = offset;
-
- for (;;) {
- mtr_start(&mtr);
-
- header = fut_get_ptr(space, addr, RW_X_LATCH, &mtr);
-
- finished = fseg_free_step(header, &mtr);
-
- mtr_commit(&mtr);
-
- if (finished) {
-
- return;
- }
- }
-}
-
-/**************************************************************************
+/**********************************************************************//**
Returns the first extent descriptor for a segment. We think of the extent
lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE. */
+-> FSEG_FREE.
+@return the first extent descriptor, or NULL if none */
static
xdes_t*
fseg_get_first_extent(
/*==================*/
- /* out: the first extent descriptor, or NULL if
- none */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ mtr_t* mtr) /*!< in: mtr */
{
fil_addr_t first;
- ulint space;
xdes_t* descr;
ut_ad(inode && mtr);
- space = buf_frame_get_space_id(inode);
+ ut_ad(space == page_get_space_id(page_align(inode)));
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
first = fil_addr_null;
@@ -3403,20 +3672,20 @@ fseg_get_first_extent(
return(NULL);
}
- descr = xdes_lst_get_descriptor(space, first, mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
return(descr);
}
-/***********************************************************************
-Validates a segment. */
+/*******************************************************************//**
+Validates a segment.
+@return TRUE if ok */
static
ibool
fseg_validate_low(
/*==============*/
- /* out: TRUE if ok */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr2) /* in: mtr */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ mtr_t* mtr2) /*!< in: mtr */
{
ulint space;
dulint seg_id;
@@ -3426,11 +3695,10 @@ fseg_validate_low(
ulint n_used = 0;
ulint n_used2 = 0;
- ut_ad(mtr_memo_contains(mtr2, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr2, inode, MTR_MEMO_PAGE_X_FIX));
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
- space = buf_frame_get_space_id(inode);
+ space = page_get_space_id(page_align(inode));
seg_id = mtr_read_dulint(inode + FSEG_ID, mtr2);
n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
@@ -3443,10 +3711,15 @@ fseg_validate_low(
node_addr = flst_get_first(inode + FSEG_FREE, mtr2);
while (!fil_addr_is_null(node_addr)) {
+ ulint flags;
+ ulint zip_size;
+
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
+ zip_size = dict_table_flags_to_zip_size(flags);
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size,
+ node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) == 0);
ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
@@ -3462,10 +3735,15 @@ fseg_validate_low(
node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);
while (!fil_addr_is_null(node_addr)) {
+ ulint flags;
+ ulint zip_size;
+
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
+ zip_size = dict_table_flags_to_zip_size(flags);
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size,
+ node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) > 0);
ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
@@ -3484,10 +3762,15 @@ fseg_validate_low(
node_addr = flst_get_first(inode + FSEG_FULL, mtr2);
while (!fil_addr_is_null(node_addr)) {
+ ulint flags;
+ ulint zip_size;
+
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
+ zip_size = dict_table_flags_to_zip_size(flags);
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size,
+ node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
@@ -3503,39 +3786,44 @@ fseg_validate_low(
return(TRUE);
}
-/***********************************************************************
-Validates a segment. */
-
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Validates a segment.
+@return TRUE if ok */
+UNIV_INTERN
ibool
fseg_validate(
/*==========*/
- /* out: TRUE if ok */
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr2) /* in: mtr */
+ fseg_header_t* header, /*!< in: segment header */
+ mtr_t* mtr) /*!< in: mtr */
{
fseg_inode_t* inode;
ibool ret;
ulint space;
+ ulint flags;
+ ulint zip_size;
- space = buf_frame_get_space_id(header);
+ space = page_get_space_id(page_align(header));
- mtr_x_lock(fil_space_get_latch(space), mtr2);
+ mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+ zip_size = dict_table_flags_to_zip_size(flags);
- inode = fseg_inode_get(header, mtr2);
+ inode = fseg_inode_get(header, space, zip_size, mtr);
- ret = fseg_validate_low(inode, mtr2);
+ ret = fseg_validate_low(inode, mtr);
return(ret);
}
+#endif /* UNIV_DEBUG */
-/***********************************************************************
+/*******************************************************************//**
Writes info of a segment. */
static
void
fseg_print_low(
/*===========*/
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr */
+ fseg_inode_t* inode, /*!< in: segment inode */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint space;
ulint seg_id_low;
@@ -3550,10 +3838,9 @@ fseg_print_low(
ulint page_no;
dulint d_var;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
- space = buf_frame_get_space_id(inode);
- page_no = buf_frame_get_page_no(inode);
+ ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
+ space = page_get_space_id(page_align(inode));
+ page_no = page_get_page_no(page_align(inode));
reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
@@ -3579,42 +3866,51 @@ fseg_print_low(
(ulong) reserved, (ulong) used, (ulong) n_full,
(ulong) n_frag, (ulong) n_free, (ulong) n_not_full,
(ulong) n_used);
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
}
-/***********************************************************************
+#ifdef UNIV_BTR_PRINT
+/*******************************************************************//**
Writes info of a segment. */
-
+UNIV_INTERN
void
fseg_print(
/*=======*/
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr) /* in: mtr */
+ fseg_header_t* header, /*!< in: segment header */
+ mtr_t* mtr) /*!< in: mtr */
{
fseg_inode_t* inode;
ulint space;
+ ulint flags;
+ ulint zip_size;
- space = buf_frame_get_space_id(header);
+ space = page_get_space_id(page_align(header));
- mtr_x_lock(fil_space_get_latch(space), mtr);
+ mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+ zip_size = dict_table_flags_to_zip_size(flags);
- inode = fseg_inode_get(header, mtr);
+ inode = fseg_inode_get(header, space, zip_size, mtr);
fseg_print_low(inode, mtr);
}
+#endif /* UNIV_BTR_PRINT */
-/***********************************************************************
-Validates the file space system and its segments. */
-
+/*******************************************************************//**
+Validates the file space system and its segments.
+@return TRUE if ok */
+UNIV_INTERN
ibool
fsp_validate(
/*=========*/
- /* out: TRUE if ok */
- ulint space) /* in: space id */
+ ulint space) /*!< in: space id */
{
fsp_header_t* header;
fseg_inode_t* seg_inode;
page_t* seg_inode_page;
+ rw_lock_t* latch;
ulint size;
+ ulint flags;
+ ulint zip_size;
ulint free_limit;
ulint frag_n_used;
mtr_t mtr;
@@ -3630,15 +3926,21 @@ fsp_validate(
ulint seg_inode_len_free;
ulint seg_inode_len_full;
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
+ ut_a(ut_is_2pow(zip_size));
+ ut_a(zip_size <= UNIV_PAGE_SIZE);
+ ut_a(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
+
/* Start first a mini-transaction mtr2 to lock out all other threads
from the fsp system */
mtr_start(&mtr2);
- mtr_x_lock(fil_space_get_latch(space), &mtr2);
+ mtr_x_lock(latch, &mtr2);
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
@@ -3663,19 +3965,20 @@ fsp_validate(
/* Validate FSP_FREE list */
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
node_addr = flst_get_first(header + FSP_FREE, &mtr);
mtr_commit(&mtr);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
descr_count++;
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size,
+ node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) == 0);
ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
@@ -3686,19 +3989,20 @@ fsp_validate(
/* Validate FSP_FREE_FRAG list */
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
mtr_commit(&mtr);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
descr_count++;
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size,
+ node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) > 0);
ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
@@ -3712,19 +4016,20 @@ fsp_validate(
/* Validate FSP_FULL_FRAG list */
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
mtr_commit(&mtr);
while (!fil_addr_is_null(node_addr)) {
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
descr_count++;
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
+ descr = xdes_lst_get_descriptor(space, zip_size,
+ node_addr, &mtr);
ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
@@ -3735,9 +4040,9 @@ fsp_validate(
/* Validate segments */
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
@@ -3747,20 +4052,19 @@ fsp_validate(
while (!fil_addr_is_null(node_addr)) {
- for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
-
+ n = 0;
+ do {
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
seg_inode_page = fut_get_ptr(
- space, node_addr, RW_X_LATCH, &mtr)
+ space, zip_size, node_addr, RW_X_LATCH, &mtr)
- FSEG_INODE_PAGE_NODE;
seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, &mtr);
- ut_a(ut_dulint_cmp(
- mach_read_from_8(seg_inode + FSEG_ID),
- ut_dulint_zero) != 0);
+ seg_inode_page, n, zip_size, &mtr);
+ ut_a(!ut_dulint_is_zero(
+ mach_read_from_8(seg_inode + FSEG_ID)));
fseg_validate_low(seg_inode, &mtr);
descr_count += flst_get_len(seg_inode + FSEG_FREE,
@@ -3775,15 +4079,15 @@ fsp_validate(
next_node_addr = flst_get_next_addr(
seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
mtr_commit(&mtr);
- }
+ } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
node_addr = next_node_addr;
}
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
@@ -3793,20 +4097,20 @@ fsp_validate(
while (!fil_addr_is_null(node_addr)) {
- for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+ n = 0;
+ do {
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
seg_inode_page = fut_get_ptr(
- space, node_addr, RW_X_LATCH, &mtr)
+ space, zip_size, node_addr, RW_X_LATCH, &mtr)
- FSEG_INODE_PAGE_NODE;
seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, &mtr);
- if (ut_dulint_cmp(
- mach_read_from_8(seg_inode + FSEG_ID),
- ut_dulint_zero) != 0) {
+ seg_inode_page, n, zip_size, &mtr);
+ if (!ut_dulint_is_zero(
+ mach_read_from_8(seg_inode + FSEG_ID))) {
fseg_validate_low(seg_inode, &mtr);
descr_count += flst_get_len(
@@ -3822,16 +4126,23 @@ fsp_validate(
next_node_addr = flst_get_next_addr(
seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
mtr_commit(&mtr);
- }
+ } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
node_addr = next_node_addr;
}
ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
- ut_a(n_used + n_full_frag_pages
- == n_used2 + 2* ((free_limit + XDES_DESCRIBED_PER_PAGE - 1)
- / XDES_DESCRIBED_PER_PAGE)
- + seg_inode_len_full + seg_inode_len_free);
+ if (!zip_size) {
+ ut_a(n_used + n_full_frag_pages
+ == n_used2 + 2 * ((free_limit + (UNIV_PAGE_SIZE - 1))
+ / UNIV_PAGE_SIZE)
+ + seg_inode_len_full + seg_inode_len_free);
+ } else {
+ ut_a(n_used + n_full_frag_pages
+ == n_used2 + 2 * ((free_limit + (zip_size - 1))
+ / zip_size)
+ + seg_inode_len_full + seg_inode_len_free);
+ }
ut_a(frag_n_used == n_used);
mtr_commit(&mtr2);
@@ -3839,17 +4150,20 @@ fsp_validate(
return(TRUE);
}
-/***********************************************************************
+/*******************************************************************//**
Prints info of a file space. */
-
+UNIV_INTERN
void
fsp_print(
/*======*/
- ulint space) /* in: space id */
+ ulint space) /*!< in: space id */
{
fsp_header_t* header;
fseg_inode_t* seg_inode;
page_t* seg_inode_page;
+ rw_lock_t* latch;
+ ulint flags;
+ ulint zip_size;
ulint size;
ulint free_limit;
ulint frag_n_used;
@@ -3866,18 +4180,21 @@ fsp_print(
mtr_t mtr;
mtr_t mtr2;
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_table_flags_to_zip_size(flags);
+
/* Start first a mini-transaction mtr2 to lock out all other threads
from the fsp system */
mtr_start(&mtr2);
- mtr_x_lock(fil_space_get_latch(space), &mtr2);
+ mtr_x_lock(latch, &mtr2);
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
@@ -3900,7 +4217,7 @@ fsp_print(
"not full frag extents %lu: used pages %lu,"
" full frag extents %lu\n"
"first seg id not used %lu %lu\n",
- (long) space,
+ (ulong) space,
(ulong) size, (ulong) free_limit, (ulong) n_free,
(ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag,
(ulong) seg_id_high, (ulong) seg_id_low);
@@ -3910,9 +4227,9 @@ fsp_print(
/* Print segments */
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
@@ -3920,20 +4237,21 @@ fsp_print(
while (!fil_addr_is_null(node_addr)) {
- for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+ n = 0;
+
+ do {
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
seg_inode_page = fut_get_ptr(
- space, node_addr, RW_X_LATCH, &mtr)
+ space, zip_size, node_addr, RW_X_LATCH, &mtr)
- FSEG_INODE_PAGE_NODE;
seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, &mtr);
- ut_a(ut_dulint_cmp(
- mach_read_from_8(seg_inode + FSEG_ID),
- ut_dulint_zero) != 0);
+ seg_inode_page, n, zip_size, &mtr);
+ ut_a(!ut_dulint_is_zero(
+ mach_read_from_8(seg_inode + FSEG_ID)));
fseg_print_low(seg_inode, &mtr);
n_segs++;
@@ -3941,15 +4259,15 @@ fsp_print(
next_node_addr = flst_get_next_addr(
seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
mtr_commit(&mtr);
- }
+ } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
node_addr = next_node_addr;
}
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
- header = fsp_get_space_header(space, &mtr);
+ header = fsp_get_space_header(space, zip_size, &mtr);
node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
@@ -3957,20 +4275,21 @@ fsp_print(
while (!fil_addr_is_null(node_addr)) {
- for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
+ n = 0;
+
+ do {
mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(latch, &mtr);
seg_inode_page = fut_get_ptr(
- space, node_addr, RW_X_LATCH, &mtr)
+ space, zip_size, node_addr, RW_X_LATCH, &mtr)
- FSEG_INODE_PAGE_NODE;
seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, &mtr);
- if (ut_dulint_cmp(
- mach_read_from_8(seg_inode + FSEG_ID),
- ut_dulint_zero) != 0) {
+ seg_inode_page, n, zip_size, &mtr);
+ if (!ut_dulint_is_zero(
+ mach_read_from_8(seg_inode + FSEG_ID))) {
fseg_print_low(seg_inode, &mtr);
n_segs++;
@@ -3979,7 +4298,7 @@ fsp_print(
next_node_addr = flst_get_next_addr(
seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
mtr_commit(&mtr);
- }
+ } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
node_addr = next_node_addr;
}
@@ -3988,3 +4307,4 @@ fsp_print(
fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/fut/fut0fut.c b/storage/innobase/fut/fut0fut.c
index 7f7a8fa39e7..20b45a575e6 100644
--- a/storage/innobase/fut/fut0fut.c
+++ b/storage/innobase/fut/fut0fut.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-File-based utilities
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fut/fut0fut.c
+File-based utilities
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
diff --git a/storage/innobase/fut/fut0lst.c b/storage/innobase/fut/fut0lst.c
index 75fa8bf5552..a1e21c22725 100644
--- a/storage/innobase/fut/fut0lst.c
+++ b/storage/innobase/fut/fut0lst.c
@@ -1,7 +1,24 @@
-/**********************************************************************
-File-based list utilities
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1995 Innobase Oy
+/******************************************************************//**
+@file fut/fut0lst.c
+File-based list utilities
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
@@ -13,18 +30,18 @@ Created 11/28/1995 Heikki Tuuri
#endif
#include "buf0buf.h"
+#include "page0page.h"
-
-/************************************************************************
+/********************************************************************//**
Adds a node to an empty list. */
static
void
flst_add_to_empty(
/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of
+ flst_base_node_t* base, /*!< in: pointer to base node of
empty list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr) /* in: mini-transaction handle */
+ flst_node_t* node, /*!< in: node to add */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
@@ -32,10 +49,8 @@ flst_add_to_empty(
ut_ad(mtr && base && node);
ut_ad(base != node);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
ut_a(len == 0);
@@ -53,15 +68,15 @@ flst_add_to_empty(
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
-/************************************************************************
+/********************************************************************//**
Adds a node as the last node in a list. */
-
+UNIV_INTERN
void
flst_add_last(
/*==========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr) /* in: mini-transaction handle */
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node, /*!< in: node to add */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
@@ -71,10 +86,8 @@ flst_add_last(
ut_ad(mtr && base && node);
ut_ad(base != node);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
last_addr = flst_get_last(base, mtr);
@@ -83,10 +96,12 @@ flst_add_last(
/* If the list is not empty, call flst_insert_after */
if (len != 0) {
if (last_addr.page == node_addr.page) {
- last_node = buf_frame_align(node) + last_addr.boffset;
+ last_node = page_align(node) + last_addr.boffset;
} else {
- last_node = fut_get_ptr(space, last_addr, RW_X_LATCH,
- mtr);
+ ulint zip_size = fil_space_get_zip_size(space);
+
+ last_node = fut_get_ptr(space, zip_size, last_addr,
+ RW_X_LATCH, mtr);
}
flst_insert_after(base, last_node, node, mtr);
@@ -96,15 +111,15 @@ flst_add_last(
}
}
-/************************************************************************
+/********************************************************************//**
Adds a node as the first node in a list. */
-
+UNIV_INTERN
void
flst_add_first(
/*===========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr) /* in: mini-transaction handle */
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node, /*!< in: node to add */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
@@ -114,10 +129,8 @@ flst_add_first(
ut_ad(mtr && base && node);
ut_ad(base != node);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
first_addr = flst_get_first(base, mtr);
@@ -126,10 +139,11 @@ flst_add_first(
/* If the list is not empty, call flst_insert_before */
if (len != 0) {
if (first_addr.page == node_addr.page) {
- first_node = buf_frame_align(node)
- + first_addr.boffset;
+ first_node = page_align(node) + first_addr.boffset;
} else {
- first_node = fut_get_ptr(space, first_addr,
+ ulint zip_size = fil_space_get_zip_size(space);
+
+ first_node = fut_get_ptr(space, zip_size, first_addr,
RW_X_LATCH, mtr);
}
@@ -140,16 +154,16 @@ flst_add_first(
}
}
-/************************************************************************
+/********************************************************************//**
Inserts a node after another in a list. */
-
+UNIV_INTERN
void
flst_insert_after(
/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node1, /* in: node to insert after */
- flst_node_t* node2, /* in: node to add */
- mtr_t* mtr) /* in: mini-transaction handle */
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node1, /*!< in: node to insert after */
+ flst_node_t* node2, /*!< in: node to add */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
fil_addr_t node1_addr;
@@ -162,12 +176,9 @@ flst_insert_after(
ut_ad(base != node1);
ut_ad(base != node2);
ut_ad(node2 != node1);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node1),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
@@ -180,7 +191,10 @@ flst_insert_after(
if (!fil_addr_is_null(node3_addr)) {
/* Update prev field of node3 */
- node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH, mtr);
+ ulint zip_size = fil_space_get_zip_size(space);
+
+ node3 = fut_get_ptr(space, zip_size,
+ node3_addr, RW_X_LATCH, mtr);
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
} else {
/* node1 was last in list: update last field in base */
@@ -195,16 +209,16 @@ flst_insert_after(
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
-/************************************************************************
+/********************************************************************//**
Inserts a node before another in a list. */
-
+UNIV_INTERN
void
flst_insert_before(
/*===============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: node to insert */
- flst_node_t* node3, /* in: node to insert before */
- mtr_t* mtr) /* in: mini-transaction handle */
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: node to insert */
+ flst_node_t* node3, /*!< in: node to insert before */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
@@ -217,12 +231,9 @@ flst_insert_before(
ut_ad(base != node2);
ut_ad(base != node3);
ut_ad(node2 != node3);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node3),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
@@ -234,8 +245,10 @@ flst_insert_before(
flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
if (!fil_addr_is_null(node1_addr)) {
+ ulint zip_size = fil_space_get_zip_size(space);
/* Update next field of node1 */
- node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, mtr);
+ node1 = fut_get_ptr(space, zip_size, node1_addr,
+ RW_X_LATCH, mtr);
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
} else {
/* node3 was first in list: update first field in base */
@@ -250,17 +263,18 @@ flst_insert_before(
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
-/************************************************************************
+/********************************************************************//**
Removes a node. */
-
+UNIV_INTERN
void
flst_remove(
/*========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: node to remove */
- mtr_t* mtr) /* in: mini-transaction handle */
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: node to remove */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
+ ulint zip_size;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
@@ -269,12 +283,11 @@ flst_remove(
ulint len;
ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+ zip_size = fil_space_get_zip_size(space);
node1_addr = flst_get_prev_addr(node2, mtr);
node3_addr = flst_get_next_addr(node2, mtr);
@@ -285,10 +298,10 @@ flst_remove(
if (node1_addr.page == node2_addr.page) {
- node1 = buf_frame_align(node2) + node1_addr.boffset;
+ node1 = page_align(node2) + node1_addr.boffset;
} else {
- node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
- mtr);
+ node1 = fut_get_ptr(space, zip_size,
+ node1_addr, RW_X_LATCH, mtr);
}
ut_ad(node1 != node2);
@@ -304,10 +317,10 @@ flst_remove(
if (node3_addr.page == node2_addr.page) {
- node3 = buf_frame_align(node2) + node3_addr.boffset;
+ node3 = page_align(node2) + node3_addr.boffset;
} else {
- node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH,
- mtr);
+ node3 = fut_get_ptr(space, zip_size,
+ node3_addr, RW_X_LATCH, mtr);
}
ut_ad(node2 != node3);
@@ -325,19 +338,19 @@ flst_remove(
mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
}
-/************************************************************************
+/********************************************************************//**
Cuts off the tail of the list, including the node given. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
-
+UNIV_INTERN
void
flst_cut_end(
/*=========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: first node to remove */
- ulint n_nodes,/* in: number of nodes to remove,
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: first node to remove */
+ ulint n_nodes,/*!< in: number of nodes to remove,
must be >= 1 */
- mtr_t* mtr) /* in: mini-transaction handle */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
@@ -346,10 +359,8 @@ flst_cut_end(
ulint len;
ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
ut_ad(n_nodes > 0);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
@@ -362,10 +373,11 @@ flst_cut_end(
if (node1_addr.page == node2_addr.page) {
- node1 = buf_frame_align(node2) + node1_addr.boffset;
+ node1 = page_align(node2) + node1_addr.boffset;
} else {
- node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
- mtr);
+ node1 = fut_get_ptr(space,
+ fil_space_get_zip_size(space),
+ node1_addr, RW_X_LATCH, mtr);
}
flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
@@ -383,28 +395,26 @@ flst_cut_end(
mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
}
-/************************************************************************
+/********************************************************************//**
Cuts off the tail of the list, not including the given node. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
-
+UNIV_INTERN
void
flst_truncate_end(
/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: first node not to remove */
- ulint n_nodes,/* in: number of nodes to remove */
- mtr_t* mtr) /* in: mini-transaction handle */
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: first node not to remove */
+ ulint n_nodes,/*!< in: number of nodes to remove */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
fil_addr_t node2_addr;
ulint len;
ulint space;
ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
if (n_nodes == 0) {
ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
@@ -426,27 +436,27 @@ flst_truncate_end(
mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
}
-/************************************************************************
-Validates a file-based list. */
-
+/********************************************************************//**
+Validates a file-based list.
+@return TRUE if ok */
+UNIV_INTERN
ibool
flst_validate(
/*==========*/
- /* out: TRUE if ok */
- flst_base_node_t* base, /* in: pointer to base node of list */
- mtr_t* mtr1) /* in: mtr */
+ const flst_base_node_t* base, /*!< in: pointer to base node of list */
+ mtr_t* mtr1) /*!< in: mtr */
{
- ulint space;
- flst_node_t* node;
- fil_addr_t node_addr;
- fil_addr_t base_addr;
- ulint len;
- ulint i;
- mtr_t mtr2;
+ ulint space;
+ ulint zip_size;
+ const flst_node_t* node;
+ fil_addr_t node_addr;
+ fil_addr_t base_addr;
+ ulint len;
+ ulint i;
+ mtr_t mtr2;
ut_ad(base);
- ut_ad(mtr_memo_contains(mtr1, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
/* We use two mini-transaction handles: the first is used to
lock the base node, and prevent other threads from modifying the
@@ -457,6 +467,7 @@ flst_validate(
/* Find out the space id */
buf_ptr_get_fsp_addr(base, &space, &base_addr);
+ zip_size = fil_space_get_zip_size(space);
len = flst_get_len(base, mtr1);
node_addr = flst_get_first(base, mtr1);
@@ -464,7 +475,8 @@ flst_validate(
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
- node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
+ node = fut_get_ptr(space, zip_size,
+ node_addr, RW_X_LATCH, &mtr2);
node_addr = flst_get_next_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
@@ -478,7 +490,8 @@ flst_validate(
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
- node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
+ node = fut_get_ptr(space, zip_size,
+ node_addr, RW_X_LATCH, &mtr2);
node_addr = flst_get_prev_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
@@ -490,29 +503,28 @@ flst_validate(
return(TRUE);
}
-/************************************************************************
+/********************************************************************//**
Prints info of a file-based list. */
-
+UNIV_INTERN
void
flst_print(
/*=======*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- mtr_t* mtr) /* in: mtr */
+ const flst_base_node_t* base, /*!< in: pointer to base node of list */
+ mtr_t* mtr) /*!< in: mtr */
{
- buf_frame_t* frame;
- ulint len;
+ const buf_frame_t* frame;
+ ulint len;
ut_ad(base && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- frame = buf_frame_align(base);
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+ frame = page_align((byte*) base);
len = flst_get_len(base, mtr);
fprintf(stderr,
"FILE-BASED LIST:\n"
"Base node in space %lu page %lu byte offset %lu; len %lu\n",
- (ulong) buf_frame_get_space_id(frame),
- (ulong) buf_frame_get_page_no(frame),
- (ulong) (base - frame), (ulong) len);
+ (ulong) page_get_space_id(frame),
+ (ulong) page_get_page_no(frame),
+ (ulong) page_offset(base), (ulong) len);
}
diff --git a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c
index 077497493b4..cb5e541b55d 100644
--- a/storage/innobase/ha/ha0ha.c
+++ b/storage/innobase/ha/ha0ha.c
@@ -1,7 +1,24 @@
-/************************************************************************
-The hash table with external chains
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1994-1997 Innobase Oy
+/********************************************************************//**
+@file ha/ha0ha.c
+The hash table with external chains
Created 8/22/1994 Heikki Tuuri
*************************************************************************/
@@ -11,92 +28,129 @@ Created 8/22/1994 Heikki Tuuri
#include "ha0ha.ic"
#endif
-#include "buf0buf.h"
-
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
+#ifdef UNIV_DEBUG
+# include "buf0buf.h"
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_SYNC_DEBUG
+# include "btr0sea.h"
+#endif /* UNIV_SYNC_DEBUG */
+#include "page0page.h"
+/*************************************************************//**
+Creates a hash table with at least n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+@return own: created table */
+UNIV_INTERN
hash_table_t*
ha_create_func(
/*===========*/
- /* out, own: created table */
- ibool in_btr_search, /* in: TRUE if the hash table is used in
- the btr_search module */
- ulint n, /* in: number of array cells */
+ ulint n, /*!< in: number of array cells */
#ifdef UNIV_SYNC_DEBUG
- ulint mutex_level, /* in: level of the mutexes in the latching
+ ulint mutex_level, /*!< in: level of the mutexes in the latching
order: this is used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes) /* in: number of mutexes to protect the
+ ulint n_mutexes) /*!< in: number of mutexes to protect the
hash table: must be a power of 2, or 0 */
{
hash_table_t* table;
+#ifndef UNIV_HOTBACKUP
ulint i;
+#endif /* !UNIV_HOTBACKUP */
+ ut_ad(ut_is_2pow(n_mutexes));
table = hash_create(n);
- if (in_btr_search) {
- table->adaptive = TRUE;
- } else {
- table->adaptive = FALSE;
- }
-
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+ table->adaptive = TRUE;
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
but in practise it never should in this case, hence the asserts. */
if (n_mutexes == 0) {
- if (in_btr_search) {
- table->heap = mem_heap_create_in_btr_search(4096);
- ut_a(table->heap);
- } else {
- table->heap = mem_heap_create_in_buffer(4096);
- }
+ table->heap = mem_heap_create_in_btr_search(
+ ut_min(4096, MEM_MAX_ALLOC_IN_BUF));
+ ut_a(table->heap);
return(table);
}
+#ifndef UNIV_HOTBACKUP
hash_create_mutexes(table, n_mutexes, mutex_level);
table->heaps = mem_alloc(n_mutexes * sizeof(void*));
for (i = 0; i < n_mutexes; i++) {
- if (in_btr_search) {
- table->heaps[i] = mem_heap_create_in_btr_search(4096);
- ut_a(table->heaps[i]);
- } else {
- table->heaps[i] = mem_heap_create_in_buffer(4096);
- }
+ table->heaps[i] = mem_heap_create_in_btr_search(4096);
+ ut_a(table->heaps[i]);
}
+#endif /* !UNIV_HOTBACKUP */
return(table);
}
-/*****************************************************************
+/*************************************************************//**
+Empties a hash table and frees the memory heaps. */
+UNIV_INTERN
+void
+ha_clear(
+/*=====*/
+ hash_table_t* table) /*!< in, own: hash table */
+{
+ ulint i;
+ ulint n;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
+#endif /* UNIV_SYNC_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+ /* Free the memory heaps. */
+ n = table->n_mutexes;
+
+ for (i = 0; i < n; i++) {
+ mem_heap_free(table->heaps[i]);
+ }
+#endif /* !UNIV_HOTBACKUP */
+
+ /* Clear the hash table. */
+ n = hash_get_n_cells(table);
+
+ for (i = 0; i < n; i++) {
+ hash_get_nth_cell(table, i)->node = NULL;
+ }
+}
+
+/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
-is inserted. */
-
+is inserted.
+@return TRUE if succeed, FALSE if no more memory could be allocated */
+UNIV_INTERN
ibool
-ha_insert_for_fold(
-/*===============*/
- /* out: TRUE if succeed, FALSE if no more
- memory could be allocated */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of data; if a node with
+ha_insert_for_fold_func(
+/*====================*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold, /*!< in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
- void* data) /* in: data, must not be NULL */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ buf_block_t* block, /*!< in: buffer block containing the data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ void* data) /*!< in: data, must not be NULL */
{
hash_cell_t* cell;
ha_node_t* node;
ha_node_t* prev_node;
- buf_block_t* prev_block;
ulint hash;
ut_ad(table && data);
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ ut_a(block->frame == page_align(data));
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ ASSERT_HASH_MUTEX_OWN(table, fold);
hash = hash_calc_hash(fold, table);
@@ -106,13 +160,20 @@ ha_insert_for_fold(
while (prev_node != NULL) {
if (prev_node->fold == fold) {
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
- prev_block = buf_block_align(prev_node->data);
+ buf_block_t* prev_block = prev_node->block;
+ ut_a(prev_block->frame
+ == page_align(prev_node->data));
ut_a(prev_block->n_pointers > 0);
prev_block->n_pointers--;
- buf_block_align(data)->n_pointers++;
+ block->n_pointers++;
}
+# endif /* !UNIV_HOTBACKUP */
+ prev_node->block = block;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
prev_node->data = data;
return(TRUE);
@@ -134,11 +195,15 @@ ha_insert_for_fold(
return(FALSE);
}
- ha_node_set_data(node, data);
+ ha_node_set_data(node, block, data);
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
- buf_block_align(data)->n_pointers++;
+ block->n_pointers++;
}
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
node->fold = fold;
@@ -163,93 +228,88 @@ ha_insert_for_fold(
return(TRUE);
}
-/***************************************************************
+/***********************************************************//**
Deletes a hash node. */
-
+UNIV_INTERN
void
ha_delete_hash_node(
/*================*/
- hash_table_t* table, /* in: hash table */
- ha_node_t* del_node) /* in: node to be deleted */
+ hash_table_t* table, /*!< in: hash table */
+ ha_node_t* del_node) /*!< in: node to be deleted */
{
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
- ut_a(buf_block_align(del_node->data)->n_pointers > 0);
- buf_block_align(del_node->data)->n_pointers--;
+ ut_a(del_node->block->frame = page_align(del_node->data));
+ ut_a(del_node->block->n_pointers > 0);
+ del_node->block->n_pointers--;
}
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
}
-/*****************************************************************
-Deletes an entry from a hash table. */
-
-void
-ha_delete(
-/*======*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of data */
- void* data) /* in: data, must not be NULL and must exist
- in the hash table */
-{
- ha_node_t* node;
-
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- node = ha_search_with_data(table, fold, data);
-
- ut_a(node);
-
- ha_delete_hash_node(table, node);
-}
-
-/*************************************************************
+/*********************************************************//**
Looks for an element when we know the pointer to the data, and updates
the pointer to data, if found. */
-
+UNIV_INTERN
void
-ha_search_and_update_if_found(
-/*==========================*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data, /* in: pointer to the data */
- void* new_data)/* in: new pointer to the data */
+ha_search_and_update_if_found_func(
+/*===============================*/
+ hash_table_t* table, /*!< in/out: hash table */
+ ulint fold, /*!< in: folded value of the searched data */
+ void* data, /*!< in: pointer to the data */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ buf_block_t* new_block,/*!< in: block containing new_data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ void* new_data)/*!< in: new pointer to the data */
{
ha_node_t* node;
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+ ASSERT_HASH_MUTEX_OWN(table, fold);
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ ut_a(new_block->frame == page_align(new_data));
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
node = ha_search_with_data(table, fold, data);
if (node) {
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
- ut_a(buf_block_align(node->data)->n_pointers > 0);
- buf_block_align(node->data)->n_pointers--;
- buf_block_align(new_data)->n_pointers++;
+ ut_a(node->block->n_pointers > 0);
+ node->block->n_pointers--;
+ new_block->n_pointers++;
}
+# endif /* !UNIV_HOTBACKUP */
+ node->block = new_block;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
node->data = new_data;
}
}
-/*********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
-
+UNIV_INTERN
void
ha_remove_all_nodes_to_page(
/*========================*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: fold value */
- page_t* page) /* in: buffer page */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold, /*!< in: fold value */
+ const page_t* page) /*!< in: buffer page */
{
ha_node_t* node;
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+ ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
while (node) {
- if (buf_frame_align(ha_node_get_data(node)) == page) {
+ if (page_align(ha_node_get_data(node)) == page) {
/* Remove the hash node */
@@ -270,23 +330,23 @@ ha_remove_all_nodes_to_page(
node = ha_chain_get_first(table, fold);
while (node) {
- ut_a(buf_frame_align(ha_node_get_data(node)) != page);
+ ut_a(page_align(ha_node_get_data(node)) != page);
node = ha_chain_get_next(node);
}
#endif
}
-/*****************************************************************
-Validates a given range of the cells in hash table. */
-
+/*************************************************************//**
+Validates a given range of the cells in hash table.
+@return TRUE if ok */
+UNIV_INTERN
ibool
ha_validate(
/*========*/
- /* out: TRUE if ok */
- hash_table_t* table, /* in: hash table */
- ulint start_index, /* in: start index */
- ulint end_index) /* in: end index */
+ hash_table_t* table, /*!< in: hash table */
+ ulint start_index, /*!< in: start index */
+ ulint end_index) /*!< in: end index */
{
hash_cell_t* cell;
ha_node_t* node;
@@ -322,14 +382,14 @@ ha_validate(
return(ok);
}
-/*****************************************************************
+/*************************************************************//**
Prints info of a hash table. */
-
+UNIV_INTERN
void
ha_print_info(
/*==========*/
- FILE* file, /* in: file where to print */
- hash_table_t* table) /* in: hash table */
+ FILE* file, /*!< in: file where to print */
+ hash_table_t* table) /*!< in: hash table */
{
#ifdef UNIV_DEBUG
/* Some of the code here is disabled for performance reasons in production
@@ -378,3 +438,4 @@ builds, see http://bugs.mysql.com/36941 */
(ulong) n_bufs);
}
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/ha/ha0storage.c b/storage/innobase/ha/ha0storage.c
index 698e34f1166..698e34f1166 100644
--- a/storage/innodb_plugin/ha/ha0storage.c
+++ b/storage/innobase/ha/ha0storage.c
diff --git a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
index 4807015eee5..2800d7793f8 100644
--- a/storage/innobase/ha/hash0hash.c
+++ b/storage/innobase/ha/hash0hash.c
@@ -1,7 +1,24 @@
-/******************************************************
-The simple hash table utility
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1997 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file ha/hash0hash.c
+The simple hash table utility
Created 5/20/1997 Heikki Tuuri
*******************************************************/
@@ -13,37 +30,38 @@ Created 5/20/1997 Heikki Tuuri
#include "mem0mem.h"
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
Reserves the mutex for a fold value in a hash table. */
-
+UNIV_INTERN
void
hash_mutex_enter(
/*=============*/
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold */
{
mutex_enter(hash_get_mutex(table, fold));
}
-/****************************************************************
+/************************************************************//**
Releases the mutex for a fold value in a hash table. */
-
+UNIV_INTERN
void
hash_mutex_exit(
/*============*/
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold */
{
mutex_exit(hash_get_mutex(table, fold));
}
-/****************************************************************
+/************************************************************//**
Reserves all the mutexes of a hash table, in an ascending order. */
-
+UNIV_INTERN
void
hash_mutex_enter_all(
/*=================*/
- hash_table_t* table) /* in: hash table */
+ hash_table_t* table) /*!< in: hash table */
{
ulint i;
@@ -53,13 +71,13 @@ hash_mutex_enter_all(
}
}
-/****************************************************************
+/************************************************************//**
Releases all the mutexes of a hash table. */
-
+UNIV_INTERN
void
hash_mutex_exit_all(
/*================*/
- hash_table_t* table) /* in: hash table */
+ hash_table_t* table) /*!< in: hash table */
{
ulint i;
@@ -68,22 +86,21 @@ hash_mutex_exit_all(
mutex_exit(table->mutexes + i);
}
}
+#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************
+/*************************************************************//**
Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
-
+chosen to be a prime number slightly bigger than n.
+@return own: created table */
+UNIV_INTERN
hash_table_t*
hash_create(
/*========*/
- /* out, own: created table */
- ulint n) /* in: number of array cells */
+ ulint n) /*!< in: number of array cells */
{
hash_cell_t* array;
ulint prime;
hash_table_t* table;
- ulint i;
- hash_cell_t* cell;
prime = ut_find_prime(n);
@@ -91,57 +108,60 @@ hash_create(
array = ut_malloc(sizeof(hash_cell_t) * prime);
- table->adaptive = FALSE;
table->array = array;
table->n_cells = prime;
+#ifndef UNIV_HOTBACKUP
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ table->adaptive = FALSE;
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
table->n_mutexes = 0;
table->mutexes = NULL;
table->heaps = NULL;
+#endif /* !UNIV_HOTBACKUP */
table->heap = NULL;
table->magic_n = HASH_TABLE_MAGIC_N;
/* Initialize the cell array */
-
- for (i = 0; i < prime; i++) {
-
- cell = hash_get_nth_cell(table, i);
- cell->node = NULL;
- }
+ hash_table_clear(table);
return(table);
}
-/*****************************************************************
+/*************************************************************//**
Frees a hash table. */
-
+UNIV_INTERN
void
hash_table_free(
/*============*/
- hash_table_t* table) /* in, own: hash table */
+ hash_table_t* table) /*!< in, own: hash table */
{
+#ifndef UNIV_HOTBACKUP
ut_a(table->mutexes == NULL);
+#endif /* !UNIV_HOTBACKUP */
ut_free(table->array);
mem_free(table);
}
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Creates a mutex array to protect a hash table. */
-
+UNIV_INTERN
void
hash_create_mutexes_func(
/*=====================*/
- hash_table_t* table, /* in: hash table */
+ hash_table_t* table, /*!< in: hash table */
#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /* in: latching order level of the
+ ulint sync_level, /*!< in: latching order level of the
mutexes: used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes) /* in: number of mutexes, must be a
+ ulint n_mutexes) /*!< in: number of mutexes, must be a
power of 2 */
{
ulint i;
- ut_a(n_mutexes == ut_2_power_up(n_mutexes));
+ ut_a(n_mutexes > 0);
+ ut_a(ut_is_2pow(n_mutexes));
table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
@@ -151,3 +171,4 @@ hash_create_mutexes_func(
table->n_mutexes = n_mutexes;
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/ha_innodb.def b/storage/innobase/ha_innodb.def
index e0faa62deb1..e0faa62deb1 100644
--- a/storage/innodb_plugin/ha_innodb.def
+++ b/storage/innobase/ha_innodb.def
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index f17635c69cc..919f8736043 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -1,21 +1,53 @@
-/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
+/*****************************************************************************
+
+Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, 2009 Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
-/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
-NOTE: You can only use noninlined InnoDB functions in this file, because we
-have disabled the InnoDB inlining in this file. */
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
/* TODO list for the InnoDB handler in 5.0:
- Remove the flag trx->active_trans and look at trx->conc_state
@@ -30,20 +62,63 @@ have disabled the InnoDB inlining in this file. */
#endif
#include <mysql_priv.h>
-#include <mysqld_error.h>
#include <m_ctype.h>
-#include <hash.h>
-#include <myisampack.h>
#include <mysys_err.h>
-#include <my_sys.h>
-#include "ha_innodb.h"
#include <mysql/plugin.h>
+/** @file ha_innodb.cc */
+
+/* Include necessary InnoDB headers */
+extern "C" {
+#include "univ.i"
+#include "buf0lru.h"
+#include "btr0sea.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "trx0roll.h"
+#include "trx0trx.h"
+#include "trx0sys.h"
+#include "mtr0mtr.h"
+#include "row0ins.h"
+#include "row0mysql.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "log0log.h"
+#include "lock0lock.h"
+#include "dict0crea.h"
+#include "btr0cur.h"
+#include "btr0btr.h"
+#include "fsp0fsp.h"
+#include "sync0sync.h"
+#include "fil0fil.h"
+#include "trx0xa.h"
+#include "row0merge.h"
+#include "thr0loc.h"
+#include "dict0boot.h"
+#include "ha_prototypes.h"
+#include "ut0mem.h"
+#include "ibuf0ibuf.h"
+}
+
+#include "ha_innodb.h"
+#include "i_s.h"
+
#ifndef MYSQL_SERVER
+# ifndef MYSQL_PLUGIN_IMPORT
+# define MYSQL_PLUGIN_IMPORT /* nothing */
+# endif /* MYSQL_PLUGIN_IMPORT */
/* This is needed because of Bug #3596. Let us hope that pthread_mutex_t
is defined the same in both builds: the MySQL server and the InnoDB plugin. */
-extern pthread_mutex_t LOCK_thread_count;
+extern MYSQL_PLUGIN_IMPORT pthread_mutex_t LOCK_thread_count;
+
+#if MYSQL_VERSION_ID < 50124
+/* this is defined in mysql_priv.h inside #ifdef MYSQL_SERVER
+but we need it here */
+bool check_global_access(THD *thd, ulong want_access);
+#endif /* MYSQL_VERSION_ID < 50124 */
#endif /* MYSQL_SERVER */
/** to protect innobase_open_files */
@@ -57,64 +132,50 @@ static pthread_mutex_t commit_cond_m;
static pthread_mutex_t analyze_mutex;
static bool innodb_inited = 0;
-/*
- This needs to exist until the query cache callback is removed
- or learns to pass hton.
-*/
-static handlerton *innodb_hton_ptr;
-
#define INSIDE_HA_INNOBASE_CC
-/* Include necessary InnoDB headers */
-extern "C" {
-#include "../storage/innobase/include/univ.i"
-#include "../storage/innobase/include/os0file.h"
-#include "../storage/innobase/include/os0thread.h"
-#include "../storage/innobase/include/srv0start.h"
-#include "../storage/innobase/include/srv0srv.h"
-#include "../storage/innobase/include/trx0roll.h"
-#include "../storage/innobase/include/trx0trx.h"
-#include "../storage/innobase/include/trx0sys.h"
-#include "../storage/innobase/include/mtr0mtr.h"
-#include "../storage/innobase/include/row0ins.h"
-#include "../storage/innobase/include/row0mysql.h"
-#include "../storage/innobase/include/row0sel.h"
-#include "../storage/innobase/include/row0upd.h"
-#include "../storage/innobase/include/log0log.h"
-#include "../storage/innobase/include/lock0lock.h"
-#include "../storage/innobase/include/dict0crea.h"
-#include "../storage/innobase/include/btr0cur.h"
-#include "../storage/innobase/include/btr0btr.h"
-#include "../storage/innobase/include/fsp0fsp.h"
-#include "../storage/innobase/include/sync0sync.h"
-#include "../storage/innobase/include/fil0fil.h"
-#include "../storage/innobase/include/trx0xa.h"
-#include "../storage/innobase/include/thr0loc.h"
-#include "../storage/innobase/include/ha_prototypes.h"
-}
+/* In the Windows plugin, the return value of current_thd is
+undefined. Map it to NULL. */
+
+#define EQ_CURRENT_THD(thd) ((thd) == current_thd)
+
+
+static struct handlerton* innodb_hton_ptr;
static const long AUTOINC_OLD_STYLE_LOCKING = 0;
static const long AUTOINC_NEW_STYLE_LOCKING = 1;
static const long AUTOINC_NO_LOCKING = 2;
static long innobase_mirrored_log_groups, innobase_log_files_in_group,
- innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb,
+ innobase_log_buffer_size,
innobase_additional_mem_pool_size, innobase_file_io_threads,
- innobase_lock_wait_timeout, innobase_force_recovery,
- innobase_open_files, innobase_autoinc_lock_mode;
+ innobase_force_recovery, innobase_open_files,
+ innobase_autoinc_lock_mode;
static ulong innobase_commit_concurrency = 0;
+static ulong innobase_read_io_threads;
+static ulong innobase_write_io_threads;
static long long innobase_buffer_pool_size, innobase_log_file_size;
+/** Percentage of the buffer pool to reserve for 'old' blocks.
+Connected to buf_LRU_old_ratio. */
+static uint innobase_old_blocks_pct;
+
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
static char* innobase_data_home_dir = NULL;
static char* innobase_data_file_path = NULL;
static char* innobase_log_group_home_dir = NULL;
-/* The following has a misleading name: starting from 4.0.5, this also
-affects Windows: */
-static char* innobase_unix_file_flush_method = NULL;
+static char* innobase_file_format_name = NULL;
+static char* innobase_change_buffering = NULL;
+
+/* Note: This variable can be set to on/off and any of the supported
+file formats in the configuration file, but can only be set to any
+of the supported file formats during runtime. */
+static char* innobase_file_format_check = NULL;
+
+static char* innobase_file_flush_method = NULL;
/* Below we have boolean-valued start-up parameters, and their default
values */
@@ -126,15 +187,15 @@ static char* innobase_log_arch_dir = NULL;
#endif /* UNIV_LOG_ARCHIVE */
static my_bool innobase_use_doublewrite = TRUE;
static my_bool innobase_use_checksums = TRUE;
-static my_bool innobase_file_per_table = FALSE;
static my_bool innobase_locks_unsafe_for_binlog = FALSE;
static my_bool innobase_rollback_on_timeout = FALSE;
static my_bool innobase_create_status_file = FALSE;
static my_bool innobase_stats_on_metadata = TRUE;
-static my_bool innobase_adaptive_hash_index = TRUE;
static char* internal_innobase_data_file_path = NULL;
+static char* innodb_version_str = (char*) INNODB_VERSION_STR;
+
/* The following counter is used to convey information to InnoDB
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
@@ -143,14 +204,18 @@ it every INNOBASE_WAKE_INTERVAL'th step. */
#define INNOBASE_WAKE_INTERVAL 32
static ulong innobase_active_counter = 0;
-static HASH innobase_open_tables;
+static hash_table_t* innobase_open_tables;
#ifdef __NETWARE__ /* some special cleanup for NetWare */
bool nw_panic = FALSE;
#endif
-static uchar* innobase_get_key(INNOBASE_SHARE *share, size_t *length,
- my_bool not_used __attribute__((unused)));
+/** Allowed values of innodb_change_buffering */
+static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
+ "none", /* IBUF_USE_NONE */
+ "inserts" /* IBUF_USE_INSERT */
+};
+
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
@@ -165,23 +230,6 @@ static handler *innobase_create_handler(handlerton *hton,
TABLE_SHARE *table,
MEM_ROOT *mem_root);
-/***********************************************************************
-This function checks each index name for a table against reserved
-system default primary index name 'GEN_CLUST_INDEX'. If a name matches,
-this function pushes an error message to the client, and returns true. */
-static
-bool
-innobase_index_name_is_reserved(
-/*============================*/
- /* out: true if index name matches a
- reserved name */
- const trx_t* trx, /* in: InnoDB transaction handle */
- const TABLE* form, /* in: information on table
- columns and indexes */
- const char* norm_name); /* in: table name */
-
-static const char innobase_hton_name[]= "InnoDB";
-
/* "GEN_CLUST_INDEX" is the name reserved for Innodb default
system primary index. */
static const char innobase_index_reserve_name[]= "GEN_CLUST_INDEX";
@@ -200,20 +248,56 @@ void
innobase_commit_concurrency_init_default(void);
/*==========================================*/
-/*****************************************************************
-Check for a valid value of innobase_commit_concurrency. */
+/************************************************************//**
+Validate the file format name and return its corresponding id.
+@return valid file format id */
+static
+uint
+innobase_file_format_name_lookup(
+/*=============================*/
+ const char* format_name); /*!< in: pointer to file format
+ name */
+/************************************************************//**
+Validate the file format check config parameters, as a side effect it
+sets the srv_check_file_format_at_startup variable.
+@return true if one of "on" or "off" */
+static
+bool
+innobase_file_format_check_on_off(
+/*==============================*/
+ const char* format_check); /*!< in: parameter value */
+/************************************************************//**
+Validate the file format check config parameters, as a side effect it
+sets the srv_check_file_format_at_startup variable.
+@return the format_id if valid config value, otherwise, return -1 */
+static
+int
+innobase_file_format_validate_and_set(
+/*================================*/
+ const char* format_check); /*!< in: parameter value */
+/****************************************************************//**
+Return alter table flags supported in an InnoDB database. */
+static
+uint
+innobase_alter_table_flags(
+/*=======================*/
+ uint flags);
+
+static const char innobase_hton_name[]= "InnoDB";
+
+/*************************************************************//**
+Check for a valid value of innobase_commit_concurrency.
+@return 0 for valid innodb_commit_concurrency */
static
int
innobase_commit_concurrency_validate(
/*=================================*/
- /* out: 0 for valid
- innodb_commit_concurrency */
- THD* thd, /* in: thread handle */
- struct st_mysql_sys_var* var, /* in: pointer to system
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
variable */
- void* save, /* out: immediate result
+ void* save, /*!< out: immediate result
for update function */
- struct st_mysql_value* value) /* in: incoming string */
+ struct st_mysql_value* value) /*!< in: incoming string */
{
long long intbuf;
ulong commit_concurrency;
@@ -243,6 +327,15 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
/* check_func */ NULL, /* update_func */ NULL,
/* default */ TRUE);
+static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
+ "Use strict mode when evaluating create options.",
+ NULL, NULL, FALSE);
+
+static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
+ "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
+ NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
+
+
static handler *innobase_create_handler(handlerton *hton,
TABLE_SHARE *table,
MEM_ROOT *mem_root)
@@ -250,62 +343,64 @@ static handler *innobase_create_handler(handlerton *hton,
return new (mem_root) ha_innobase(hton, table);
}
-/***********************************************************************
-This function is used to prepare X/Open XA distributed transaction */
+/*******************************************************************//**
+This function is used to prepare an X/Open XA distributed transaction.
+@return 0 or error number */
static
int
innobase_xa_prepare(
/*================*/
- /* out: 0 or error number */
- handlerton* hton,
- THD* thd, /* in: handle to the MySQL thread of the user
- whose XA transaction should be prepared */
- bool all); /* in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
-/***********************************************************************
-This function is used to recover X/Open XA distributed transactions */
+ handlerton* hton, /*!< in: InnoDB handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread of
+ the user whose XA transaction should
+ be prepared */
+ bool all); /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement
+ ended */
+/*******************************************************************//**
+This function is used to recover X/Open XA distributed transactions.
+@return number of prepared transactions stored in xid_list */
static
int
innobase_xa_recover(
/*================*/
- /* out: number of prepared transactions
- stored in xid_list */
- handlerton* hton,
- XID* xid_list, /* in/out: prepared transactions */
- uint len); /* in: number of slots in xid_list */
-/***********************************************************************
+ handlerton* hton, /*!< in: InnoDB handlerton */
+ XID* xid_list,/*!< in/out: prepared transactions */
+ uint len); /*!< in: number of slots in xid_list */
+/*******************************************************************//**
This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return 0 or error number */
static
int
innobase_commit_by_xid(
/*===================*/
- /* out: 0 or error number */
handlerton* hton,
- XID* xid); /* in: X/Open XA transaction identification */
-/***********************************************************************
+ XID* xid); /*!< in: X/Open XA transaction identification */
+/*******************************************************************//**
This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return 0 or error number */
static
int
innobase_rollback_by_xid(
/*=====================*/
- /* out: 0 or error number */
- handlerton* hton,
- XID *xid); /* in: X/Open XA transaction identification */
-/***********************************************************************
+ handlerton* hton, /*!< in: InnoDB handlerton */
+ XID* xid); /*!< in: X/Open XA transaction
+ identification */
+/*******************************************************************//**
Create a consistent view for a cursor based on current transaction
which is created if the corresponding MySQL thread still lacks one.
This consistent view is then used inside of MySQL when accessing records
-using a cursor. */
+using a cursor.
+@return pointer to cursor view or NULL */
static
void*
innobase_create_cursor_view(
/*========================*/
- /* out: pointer to cursor view or NULL */
- handlerton* hton, /* in: innobase hton */
- THD* thd); /* in: user thread handle */
-/***********************************************************************
+ handlerton* hton, /*!< in: innobase hton */
+ THD* thd); /*!< in: user thread handle */
+/*******************************************************************//**
Set the given consistent cursor view to a transaction which is created
if the corresponding MySQL thread still lacks one. If the given
consistent cursor view is NULL global read view of a transaction is
@@ -315,9 +410,9 @@ void
innobase_set_cursor_view(
/*=====================*/
handlerton* hton,
- THD* thd, /* in: user thread handle */
- void* curview);/* in: Consistent cursor view to be set */
-/***********************************************************************
+ THD* thd, /*!< in: user thread handle */
+ void* curview);/*!< in: Consistent cursor view to be set */
+/*******************************************************************//**
Close the given consistent cursor view of a transaction and restore
global read view to a transaction read view. Transaction is created if the
corresponding MySQL thread still lacks one. */
@@ -326,71 +421,70 @@ void
innobase_close_cursor_view(
/*=======================*/
handlerton* hton,
- THD* thd, /* in: user thread handle */
- void* curview);/* in: Consistent read view to be closed */
-/*********************************************************************
+ THD* thd, /*!< in: user thread handle */
+ void* curview);/*!< in: Consistent read view to be closed */
+/*****************************************************************//**
Removes all tables in the named database inside InnoDB. */
static
void
innobase_drop_database(
/*===================*/
- /* out: error number */
- handlerton* hton, /* in: handlerton of Innodb */
- char* path); /* in: database path; inside InnoDB the name
+ handlerton* hton, /*!< in: handlerton of Innodb */
+ char* path); /*!< in: database path; inside InnoDB the name
of the last directory in the path is used as
the database name: for example, in 'mysql/data/test'
the database name is 'test' */
-/***********************************************************************
+/*******************************************************************//**
Closes an InnoDB database. */
static
int
innobase_end(handlerton *hton, ha_panic_function type);
-/*********************************************************************
+/*****************************************************************//**
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
-have one. */
+have one.
+@return 0 */
static
int
innobase_start_trx_and_assign_read_view(
/*====================================*/
- /* out: 0 */
- handlerton* hton, /* in: Innodb handlerton */
- THD* thd); /* in: MySQL thread handle of the user for whom
+ handlerton* hton, /*!< in: Innodb handlerton */
+ THD* thd); /*!< in: MySQL thread handle of the user for whom
the transaction should be committed */
-/********************************************************************
+/****************************************************************//**
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint. */
+the logs, and the name of this function should be innobase_checkpoint.
+@return TRUE if error */
static
bool
innobase_flush_logs(
/*================*/
- /* out: TRUE if error */
- handlerton* hton); /* in: InnoDB handlerton */
+ handlerton* hton); /*!< in: InnoDB handlerton */
-/****************************************************************************
+/************************************************************************//**
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
Monitor to the client. */
static
bool
innodb_show_status(
/*===============*/
- handlerton* hton, /* in: the innodb handlerton */
- THD* thd, /* in: the MySQL query thread of the caller */
+ handlerton* hton, /*!< in: the innodb handlerton */
+ THD* thd, /*!< in: the MySQL query thread of the caller */
stat_print_fn *stat_print);
static
bool innobase_show_status(handlerton *hton, THD* thd,
stat_print_fn* stat_print,
enum ha_stat_type stat_type);
-/*********************************************************************
+/*****************************************************************//**
Commits a transaction in an InnoDB database. */
static
void
innobase_commit_low(
/*================*/
- trx_t* trx); /* in: transaction handle */
+ trx_t* trx); /*!< in: transaction handle */
static SHOW_VAR innodb_status_variables[]= {
{"buffer_pool_pages_data",
@@ -409,10 +503,10 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
{"buffer_pool_pages_total",
(char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
- {"buffer_pool_read_ahead_rnd",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
- {"buffer_pool_read_ahead_seq",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
+ {"buffer_pool_read_ahead",
+ (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
+ {"buffer_pool_read_ahead_evicted",
+ (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG},
{"buffer_pool_read_requests",
(char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG},
{"buffer_pool_reads",
@@ -441,6 +535,8 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
{"dblwr_writes",
(char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
+ {"have_atomic_builtins",
+ (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL},
{"log_waits",
(char*) &export_vars.innodb_log_waits, SHOW_LONG},
{"log_write_requests",
@@ -486,30 +582,30 @@ static SHOW_VAR innodb_status_variables[]= {
/* General functions */
-/**********************************************************************
+/******************************************************************//**
Returns true if the thread is the replication thread on the slave
server. Used in srv_conc_enter_innodb() to determine if the thread
should be allowed to enter InnoDB - the replication thread is treated
differently than other threads. Also used in
-srv_conc_force_exit_innodb(). */
-extern "C"
+srv_conc_force_exit_innodb().
+@return true if thd is the replication thread */
+extern "C" UNIV_INTERN
ibool
thd_is_replication_slave_thread(
/*============================*/
- /* out: true if thd is the replication thread */
- void* thd) /* in: thread handle (THD*) */
+ void* thd) /*!< in: thread handle (THD*) */
{
return((ibool) thd_slave_thread((THD*) thd));
}
-/**********************************************************************
+/******************************************************************//**
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
-inline
+static inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
- trx_t* trx) /* in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
{
if (UNIV_LIKELY(!srv_thread_concurrency)) {
@@ -519,14 +615,14 @@ innodb_srv_conc_enter_innodb(
srv_conc_enter_innodb(trx);
}
-/**********************************************************************
+/******************************************************************//**
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
-inline
+static inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
- trx_t* trx) /* in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
{
if (UNIV_LIKELY(!trx->declared_to_be_inside_innodb)) {
@@ -536,16 +632,16 @@ innodb_srv_conc_exit_innodb(
srv_conc_exit_innodb(trx);
}
-/**********************************************************************
+/******************************************************************//**
Releases possible search latch and InnoDB thread FIFO ticket. These should
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
-inline
+static inline
void
innobase_release_stat_resources(
/*============================*/
- trx_t* trx) /* in: transaction object */
+ trx_t* trx) /*!< in: transaction object */
{
if (trx->has_search_latch) {
trx_search_latch_release_if_reserved(trx);
@@ -558,57 +654,85 @@ innobase_release_stat_resources(
}
}
-/**********************************************************************
+/******************************************************************//**
Returns true if the transaction this thread is processing has edited
non-transactional tables. Used by the deadlock detector when deciding
which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables. */
-extern "C"
+rolling back transactions that have edited non-transactional tables.
+@return true if non-transactional tables have been edited */
+extern "C" UNIV_INTERN
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
- /* out: true if non-transactional tables have
- been edited */
- void* thd) /* in: thread handle (THD*) */
+ void* thd) /*!< in: thread handle (THD*) */
{
return((ibool) thd_non_transactional_update((THD*) thd));
}
-/**********************************************************************
-Returns true if the thread is executing a SELECT statement. */
-extern "C"
+/******************************************************************//**
+Returns true if the thread is executing a SELECT statement.
+@return true if thd is executing SELECT */
+extern "C" UNIV_INTERN
ibool
thd_is_select(
/*==========*/
- /* out: true if thd is executing SELECT */
- const void* thd) /* in: thread handle (THD*) */
+ const void* thd) /*!< in: thread handle (THD*) */
{
return(thd_sql_command((const THD*) thd) == SQLCOM_SELECT);
}
-/************************************************************************
-Obtain the InnoDB transaction of a MySQL thread. */
-inline
+/******************************************************************//**
+Returns true if the thread supports XA,
+global value of innodb_supports_xa if thd is NULL.
+@return true if thd has XA support */
+extern "C" UNIV_INTERN
+ibool
+thd_supports_xa(
+/*============*/
+ void* thd) /*!< in: thread handle (THD*), or NULL to query
+ the global innodb_supports_xa */
+{
+ return(THDVAR((THD*) thd, support_xa));
+}
+
+/******************************************************************//**
+Returns the lock wait timeout for the current connection.
+@return the lock wait timeout, in seconds */
+extern "C" UNIV_INTERN
+ulong
+thd_lock_wait_timeout(
+/*==================*/
+ void* thd) /*!< in: thread handle (THD*), or NULL to query
+ the global innodb_lock_wait_timeout */
+{
+ /* According to <mysql/plugin.h>, passing thd == NULL
+ returns the global value of the session variable. */
+ return(THDVAR((THD*) thd, lock_wait_timeout));
+}
+
+/********************************************************************//**
+Obtain the InnoDB transaction of a MySQL thread.
+@return reference to transaction pointer */
+static inline
trx_t*&
thd_to_trx(
/*=======*/
- /* out: reference to transaction pointer */
- THD* thd) /* in: MySQL thread */
+ THD* thd) /*!< in: MySQL thread */
{
return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
}
-/************************************************************************
+/********************************************************************//**
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
-documentation, see handler.cc. */
+documentation, see handler.cc.
+@return 0 */
static
int
innobase_release_temporary_latches(
/*===============================*/
- /* out: 0 */
- handlerton* hton, /* in: handlerton */
- THD* thd) /* in: MySQL thread */
+ handlerton* hton, /*!< in: handlerton */
+ THD* thd) /*!< in: MySQL thread */
{
trx_t* trx;
@@ -616,7 +740,7 @@ innobase_release_temporary_latches(
if (!innodb_inited) {
- return 0;
+ return(0);
}
trx = thd_to_trx(thd);
@@ -624,15 +748,15 @@ innobase_release_temporary_latches(
if (trx) {
innobase_release_stat_resources(trx);
}
- return 0;
+ return(0);
}
-/************************************************************************
+/********************************************************************//**
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
-inline
+static inline
void
innobase_active_small(void)
/*=======================*/
@@ -644,24 +768,31 @@ innobase_active_small(void)
}
}
-/************************************************************************
+/********************************************************************//**
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
-timeout or a deadlock. */
-static
+timeout or a deadlock.
+@return MySQL error code */
+extern "C" UNIV_INTERN
int
convert_error_code_to_mysql(
/*========================*/
- /* out: MySQL error code */
- int error, /* in: InnoDB error code */
- THD* thd) /* in: user thread handle or NULL */
+ int error, /*!< in: InnoDB error code */
+ ulint flags, /*!< in: InnoDB table flags, or 0 */
+ THD* thd) /*!< in: user thread handle or NULL */
{
- if (error == DB_SUCCESS) {
-
+ switch (error) {
+ case DB_SUCCESS:
return(0);
- } else if (error == (int) DB_DUPLICATE_KEY) {
+ case DB_INTERRUPTED:
+ my_error(ER_QUERY_INTERRUPTED, MYF(0));
+ /* fall through */
+ case DB_ERROR:
+ default:
+ return(-1); /* unspecified error */
+ case DB_DUPLICATE_KEY:
/* Be cautious with returning this error, since
mysql could re-enter the storage layer to get
duplicated key info, the operation requires a
@@ -670,19 +801,16 @@ convert_error_code_to_mysql(
handling stage. */
return(HA_ERR_FOUND_DUPP_KEY);
- } else if (error == (int) DB_FOREIGN_DUPLICATE_KEY) {
-
+ case DB_FOREIGN_DUPLICATE_KEY:
return(HA_ERR_FOREIGN_DUPLICATE_KEY);
- } else if (error == (int) DB_RECORD_NOT_FOUND) {
+ case DB_MISSING_HISTORY:
+ return(HA_ERR_TABLE_DEF_CHANGED);
+ case DB_RECORD_NOT_FOUND:
return(HA_ERR_NO_ACTIVE_RECORD);
- } else if (error == (int) DB_ERROR) {
-
- return(-1); /* unspecified error */
-
- } else if (error == (int) DB_DEADLOCK) {
+ case DB_DEADLOCK:
/* Since we rolled back the whole transaction, we must
tell it also to MySQL so that MySQL knows to empty the
cached binlog for this transaction */
@@ -692,8 +820,8 @@ convert_error_code_to_mysql(
}
return(HA_ERR_LOCK_DEADLOCK);
- } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
+ case DB_LOCK_WAIT_TIMEOUT:
/* Starting from 5.0.13, we let MySQL just roll back the
latest SQL statement in a lock wait timeout. Previously, we
rolled back the whole transaction. */
@@ -705,134 +833,128 @@ convert_error_code_to_mysql(
return(HA_ERR_LOCK_WAIT_TIMEOUT);
- } else if (error == (int) DB_NO_REFERENCED_ROW) {
-
+ case DB_NO_REFERENCED_ROW:
return(HA_ERR_NO_REFERENCED_ROW);
- } else if (error == (int) DB_ROW_IS_REFERENCED) {
-
+ case DB_ROW_IS_REFERENCED:
return(HA_ERR_ROW_IS_REFERENCED);
- } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
-
+ case DB_CANNOT_ADD_CONSTRAINT:
return(HA_ERR_CANNOT_ADD_FOREIGN);
- } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {
+ case DB_CANNOT_DROP_CONSTRAINT:
return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
misleading, a new MySQL error
code should be introduced */
- } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {
+ case DB_COL_APPEARS_TWICE_IN_INDEX:
+ case DB_CORRUPTION:
return(HA_ERR_CRASHED);
- } else if (error == (int) DB_OUT_OF_FILE_SPACE) {
-
+ case DB_OUT_OF_FILE_SPACE:
return(HA_ERR_RECORD_FILE_FULL);
- } else if (error == (int) DB_TABLE_IS_BEING_USED) {
-
+ case DB_TABLE_IS_BEING_USED:
return(HA_ERR_WRONG_COMMAND);
- } else if (error == (int) DB_TABLE_NOT_FOUND) {
-
+ case DB_TABLE_NOT_FOUND:
return(HA_ERR_NO_SUCH_TABLE);
- } else if (error == (int) DB_TOO_BIG_RECORD) {
-
+ case DB_TOO_BIG_RECORD:
+ my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
+ page_get_free_space_of_empty(flags
+ & DICT_TF_COMPACT) / 2);
return(HA_ERR_TO_BIG_ROW);
- } else if (error == (int) DB_CORRUPTION) {
-
- return(HA_ERR_CRASHED);
- } else if (error == (int) DB_NO_SAVEPOINT) {
-
+ case DB_NO_SAVEPOINT:
return(HA_ERR_NO_SAVEPOINT);
- } else if (error == (int) DB_LOCK_TABLE_FULL) {
- /* Since we rolled back the whole transaction, we must
- tell it also to MySQL so that MySQL knows to empty the
- cached binlog for this transaction */
+
+ case DB_LOCK_TABLE_FULL:
+ /* Since we rolled back the whole transaction, we must
+ tell it also to MySQL so that MySQL knows to empty the
+ cached binlog for this transaction */
if (thd) {
thd_mark_transaction_to_rollback(thd, TRUE);
}
- return(HA_ERR_LOCK_TABLE_FULL);
- } else if (error == DB_TOO_MANY_CONCURRENT_TRXS) {
-
- return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
+ return(HA_ERR_LOCK_TABLE_FULL);
- } else if (error == DB_UNSUPPORTED) {
+ case DB_PRIMARY_KEY_IS_NULL:
+ return(ER_PRIMARY_CANT_HAVE_NULL);
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ /* New error code HA_ERR_TOO_MANY_CONCURRENT_TRXS is only
+ available in 5.1.38 and later, but the plugin should still
+ work with previous versions of MySQL. */
+#ifdef HA_ERR_TOO_MANY_CONCURRENT_TRXS
+ return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
+#else /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
+ return(HA_ERR_RECORD_FILE_FULL);
+#endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
+ case DB_UNSUPPORTED:
return(HA_ERR_UNSUPPORTED);
- } else {
- return(-1); // Unknown error
- }
+ }
}
-/*****************************************************************
+/*************************************************************//**
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
-the kernel_mutex.
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-extern "C"
+the kernel_mutex. */
+extern "C" UNIV_INTERN
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
+ ut_ad(!mutex_own(&kernel_mutex));
VOID(pthread_mutex_lock(&LOCK_thread_count));
}
-/*****************************************************************
+/*************************************************************//**
Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-extern "C"
+In the InnoDB latching order, the mutex sits right above the
+kernel_mutex. In debug builds, we assert that the kernel_mutex is
+released before this function is invoked. */
+extern "C" UNIV_INTERN
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
+ ut_ad(!mutex_own(&kernel_mutex));
VOID(pthread_mutex_unlock(&LOCK_thread_count));
}
-/*****************************************************************
-Prints info of a THD object (== user session thread) to the given file.
-NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for
-this function! */
-extern "C"
+/*************************************************************//**
+Prints info of a THD object (== user session thread) to the given file. */
+extern "C" UNIV_INTERN
void
innobase_mysql_print_thd(
/*=====================*/
- FILE* f, /* in: output stream */
- void* input_thd, /* in: pointer to a MySQL THD object */
- uint max_query_len) /* in: max query length to print, or 0 to
+ FILE* f, /*!< in: output stream */
+ void* thd, /*!< in: pointer to a MySQL THD object */
+ uint max_query_len) /*!< in: max query length to print, or 0 to
use the default max length */
{
- THD* thd;
char buffer[1024];
- thd = (THD*) input_thd;
- fputs(thd_security_context(thd, buffer, sizeof(buffer),
+ fputs(thd_security_context((THD*) thd, buffer, sizeof buffer,
max_query_len), f);
putc('\n', f);
}
-/**********************************************************************
-Get the variable length bounds of the given character set.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/include/data0type.ic! */
-extern "C"
+/******************************************************************//**
+Get the variable length bounds of the given character set. */
+extern "C" UNIV_INTERN
void
innobase_get_cset_width(
/*====================*/
- ulint cset, /* in: MySQL charset-collation code */
- ulint* mbminlen, /* out: minimum length of a char (in bytes) */
- ulint* mbmaxlen) /* out: maximum length of a char (in bytes) */
+ ulint cset, /*!< in: MySQL charset-collation code */
+ ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
+ ulint* mbmaxlen) /*!< out: maximum length of a char (in bytes) */
{
CHARSET_INFO* cs;
ut_ad(cset < 256);
@@ -844,8 +966,9 @@ innobase_get_cset_width(
*mbminlen = cs->mbminlen;
*mbmaxlen = cs->mbmaxlen;
} else {
- if (current_thd
- && (thd_sql_command(current_thd) == SQLCOM_DROP_TABLE)) {
+ THD* thd = current_thd;
+
+ if (thd && thd_sql_command(thd) == SQLCOM_DROP_TABLE) {
/* Fix bug#46256: allow tables to be dropped if the
collation is not found, but issue a warning. */
@@ -864,85 +987,70 @@ innobase_get_cset_width(
}
}
-/**********************************************************************
-Converts an identifier to a table name.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+/******************************************************************//**
+Converts an identifier to a table name. */
+extern "C" UNIV_INTERN
void
innobase_convert_from_table_id(
/*===========================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len) /* in: length of 'to', in bytes */
+ struct charset_info_st* cs, /*!< in: the 'from' character set */
+ char* to, /*!< out: converted identifier */
+ const char* from, /*!< in: identifier to convert */
+ ulint len) /*!< in: length of 'to', in bytes */
{
uint errors;
- strconvert(thd_charset(current_thd), from,
- &my_charset_filename, to, (uint) len, &errors);
+ strconvert(cs, from, &my_charset_filename, to, (uint) len, &errors);
}
-/**********************************************************************
-Converts an identifier to UTF-8.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+/******************************************************************//**
+Converts an identifier to UTF-8. */
+extern "C" UNIV_INTERN
void
innobase_convert_from_id(
/*=====================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len) /* in: length of 'to', in bytes */
+ struct charset_info_st* cs, /*!< in: the 'from' character set */
+ char* to, /*!< out: converted identifier */
+ const char* from, /*!< in: identifier to convert */
+ ulint len) /*!< in: length of 'to', in bytes */
{
uint errors;
- strconvert(thd_charset(current_thd), from,
- system_charset_info, to, (uint) len, &errors);
+ strconvert(cs, from, system_charset_info, to, (uint) len, &errors);
}
-/**********************************************************************
+/******************************************************************//**
Compares NUL-terminated UTF-8 strings case insensitively.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+@return 0 if a=b, <0 if a<b, >1 if a>b */
+extern "C" UNIV_INTERN
int
innobase_strcasecmp(
/*================*/
- /* out: 0 if a=b, <0 if a<b, >1 if a>b */
- const char* a, /* in: first string to compare */
- const char* b) /* in: second string to compare */
+ const char* a, /*!< in: first string to compare */
+ const char* b) /*!< in: second string to compare */
{
return(my_strcasecmp(system_charset_info, a, b));
}
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+/******************************************************************//**
+Makes all characters in a NUL-terminated UTF-8 string lower case. */
+extern "C" UNIV_INTERN
void
innobase_casedn_str(
/*================*/
- char* a) /* in/out: string to put in lower case */
+ char* a) /*!< in/out: string to put in lower case */
{
my_casedn_str(system_charset_info, a);
}
-/**************************************************************************
+/**********************************************************************//**
Determines the connection character set.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
+@return connection character set */
+extern "C" UNIV_INTERN
struct charset_info_st*
innobase_get_charset(
/*=================*/
- /* out: connection character set */
- void* mysql_thd) /* in: MySQL thread handle */
+ void* mysql_thd) /*!< in: MySQL thread handle */
{
return(thd_charset((THD*) mysql_thd));
}
@@ -960,7 +1068,7 @@ _dosmaperr(
/*********************************************************************//**
Creates a temporary file.
@return temporary file descriptor, or < 0 on error */
-extern "C"
+extern "C" UNIV_INTERN
int
innobase_mysql_tmpfile(void)
/*========================*/
@@ -1041,13 +1149,13 @@ innobase_mysql_tmpfile(void)
DBUG_RETURN(fd);
}
#else
-/*************************************************************************
-Creates a temporary file. */
-extern "C"
+/*********************************************************************//**
+Creates a temporary file.
+@return temporary file descriptor, or < 0 on error */
+extern "C" UNIV_INTERN
int
innobase_mysql_tmpfile(void)
/*========================*/
- /* out: temporary file descriptor, or < 0 on error */
{
int fd2 = -1;
File fd = mysql_tmpfile("ib");
@@ -1072,29 +1180,69 @@ innobase_mysql_tmpfile(void)
}
return(fd2);
}
-#endif
+#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
-/*************************************************************************
-Wrapper around MySQL's copy_and_convert function, see it for
-documentation. */
-extern "C"
+/*********************************************************************//**
+Wrapper around MySQL's copy_and_convert function.
+@return number of bytes copied to 'to' */
+extern "C" UNIV_INTERN
ulint
innobase_convert_string(
/*====================*/
- void* to,
- ulint to_length,
- CHARSET_INFO* to_cs,
- const void* from,
- ulint from_length,
- CHARSET_INFO* from_cs,
- uint* errors)
+ void* to, /*!< out: converted string */
+ ulint to_length, /*!< in: number of bytes reserved
+ for the converted string */
+ CHARSET_INFO* to_cs, /*!< in: character set to convert to */
+ const void* from, /*!< in: string to convert */
+ ulint from_length, /*!< in: number of bytes to convert */
+ CHARSET_INFO* from_cs, /*!< in: character set to convert from */
+ uint* errors) /*!< out: number of errors encountered
+ during the conversion */
{
return(copy_and_convert((char*)to, (uint32) to_length, to_cs,
(const char*)from, (uint32) from_length, from_cs,
errors));
}
-/*************************************************************************
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
+the result to "buf". The result is converted to "system_charset_info".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return number of bytes that were written */
+extern "C" UNIV_INTERN
+ulint
+innobase_raw_format(
+/*================*/
+ const char* data, /*!< in: raw data */
+ ulint data_len, /*!< in: raw data length
+ in bytes */
+ ulint charset_coll, /*!< in: charset collation */
+ char* buf, /*!< out: output buffer */
+ ulint buf_size) /*!< in: output buffer size
+ in bytes */
+{
+ /* XXX we use a hard limit instead of allocating
+ but_size bytes from the heap */
+ CHARSET_INFO* data_cs;
+ char buf_tmp[8192];
+ ulint buf_tmp_used;
+ uint num_errors;
+
+ data_cs = all_charsets[charset_coll];
+
+ buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp),
+ system_charset_info,
+ data, data_len, data_cs,
+ &num_errors);
+
+ return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
+}
+
+/*********************************************************************//**
Compute the next autoinc value.
For MySQL replication the autoincrement values can be partitioned among
@@ -1110,16 +1258,16 @@ values we want to reserve for multi-value inserts e.g.,
innobase_next_autoinc() will be called with increment set to
n * 3 where autoinc_lock_mode != TRADITIONAL because we want
-to reserve 3 values for the multi-value INSERT above. */
+to reserve 3 values for the multi-value INSERT above.
+@return the next value */
static
ulonglong
innobase_next_autoinc(
/*==================*/
- /* out: the next value */
- ulonglong current, /* in: Current value */
- ulonglong increment, /* in: increment current by */
- ulonglong offset, /* in: AUTOINC offset */
- ulonglong max_value) /* in: max value for type */
+ ulonglong current, /*!< in: Current value */
+ ulonglong increment, /*!< in: increment current by */
+ ulonglong offset, /*!< in: AUTOINC offset */
+ ulonglong max_value) /*!< in: max value for type */
{
ulonglong next_value;
@@ -1142,7 +1290,7 @@ innobase_next_autoinc(
} else {
next_value = current + increment;
}
- } else {
+ } else if (max_value > current) {
if (current > offset) {
next_value = ((current - offset) / increment) + 1;
} else {
@@ -1168,6 +1316,8 @@ innobase_next_autoinc(
next_value += offset;
}
}
+ } else {
+ next_value = max_value;
}
ut_a(next_value <= max_value);
@@ -1175,58 +1325,84 @@ innobase_next_autoinc(
return(next_value);
}
-/*************************************************************************
+/*********************************************************************//**
+Initializes some fields in an InnoDB transaction object. */
+static
+void
+innobase_trx_init(
+/*==============*/
+ THD* thd, /*!< in: user thread handle */
+ trx_t* trx) /*!< in/out: InnoDB transaction handle */
+{
+ DBUG_ENTER("innobase_trx_init");
+ DBUG_ASSERT(EQ_CURRENT_THD(thd));
+ DBUG_ASSERT(thd == trx->mysql_thd);
+
+ trx->check_foreigns = !thd_test_options(
+ thd, OPTION_NO_FOREIGN_KEY_CHECKS);
+
+ trx->check_unique_secondary = !thd_test_options(
+ thd, OPTION_RELAXED_UNIQUE_CHECKS);
+
+ DBUG_VOID_RETURN;
+}
+
+/*********************************************************************//**
+Allocates an InnoDB transaction for a MySQL handler object.
+@return InnoDB transaction handle */
+extern "C" UNIV_INTERN
+trx_t*
+innobase_trx_allocate(
+/*==================*/
+ THD* thd) /*!< in: user thread handle */
+{
+ trx_t* trx;
+
+ DBUG_ENTER("innobase_trx_allocate");
+ DBUG_ASSERT(thd != NULL);
+ DBUG_ASSERT(EQ_CURRENT_THD(thd));
+
+ trx = trx_allocate_for_mysql();
+
+ trx->mysql_thd = thd;
+ trx->mysql_query_str = thd_query(thd);
+
+ innobase_trx_init(thd, trx);
+
+ DBUG_RETURN(trx);
+}
+
+/*********************************************************************//**
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
-lacks one. */
+lacks one.
+@return InnoDB transaction handle */
static
trx_t*
check_trx_exists(
/*=============*/
- /* out: InnoDB transaction handle */
- THD* thd) /* in: user thread handle */
+ THD* thd) /*!< in: user thread handle */
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(thd == current_thd);
+ ut_ad(EQ_CURRENT_THD(thd));
if (trx == NULL) {
- DBUG_ASSERT(thd != NULL);
- trx = trx_allocate_for_mysql();
-
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- /* Update the info whether we should skip XA steps that eat
- CPU time */
- trx->support_xa = THDVAR(thd, support_xa);
- } else {
- if (trx->magic_n != TRX_MAGIC_N) {
- mem_analyze_corruption(trx);
-
- ut_error;
- }
+ trx = innobase_trx_allocate(thd);
+ } else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
+ mem_analyze_corruption(trx);
+ ut_error;
}
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- } else {
- trx->check_foreigns = TRUE;
- }
-
- if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
- trx->check_unique_secondary = FALSE;
- } else {
- trx->check_unique_secondary = TRUE;
- }
+ innobase_trx_init(thd, trx);
return(trx);
}
-/*************************************************************************
+/*********************************************************************//**
Construct ha_innobase handler. */
-
+UNIV_INTERN
ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
:handler(hton, table_arg),
int_table_flags(HA_REC_NOT_IN_SEQ |
@@ -1242,16 +1418,22 @@ ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
num_write_row(0)
{}
-/*************************************************************************
+/*********************************************************************//**
+Destruct ha_innobase handler. */
+UNIV_INTERN
+ha_innobase::~ha_innobase()
+{
+}
+
+/*********************************************************************//**
Updates the user_thd field in a handle and also allocates a new InnoDB
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
-inline
-int
+UNIV_INTERN inline
+void
ha_innobase::update_thd(
/*====================*/
- /* out: 0 or error code */
- THD* thd) /* in: thd to use the handle */
+ THD* thd) /*!< in: thd to use the handle */
{
trx_t* trx;
@@ -1263,39 +1445,52 @@ ha_innobase::update_thd(
}
user_thd = thd;
+}
- return(0);
+/*********************************************************************//**
+Updates the user_thd field in a handle and also allocates a new InnoDB
+transaction handle if needed, and updates the transaction fields in the
+prebuilt struct. */
+UNIV_INTERN
+void
+ha_innobase::update_thd()
+/*=====================*/
+{
+ THD* thd = ha_thd();
+ ut_ad(EQ_CURRENT_THD(thd));
+ update_thd(thd);
}
-/*************************************************************************
+/*********************************************************************//**
Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
roll back the statement if the statement results in an error. This MUST be
called for every SQL statement that may be rolled back by MySQL. Calling this
several times to register the same statement is allowed, too. */
-inline
+static inline
void
innobase_register_stmt(
/*===================*/
- handlerton* hton, /* in: Innobase hton */
- THD* thd) /* in: MySQL thd (connection) object */
+ handlerton* hton, /*!< in: Innobase hton */
+ THD* thd) /*!< in: MySQL thd (connection) object */
{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
/* Register the statement */
trans_register_ha(thd, FALSE, hton);
}
-/*************************************************************************
+/*********************************************************************//**
Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
to call the InnoDB prepare and commit, or rollback for the transaction. This
MUST be called for every transaction for which the user may call commit or
rollback. Calling this several times to register the same transaction is
allowed, too.
This function also registers the current SQL statement. */
-inline
+static inline
void
innobase_register_trx_and_stmt(
/*===========================*/
- handlerton *hton, /* in: Innobase handlerton */
- THD* thd) /* in: MySQL thd (connection) object */
+ handlerton *hton, /*!< in: Innobase handlerton */
+ THD* thd) /*!< in: MySQL thd (connection) object */
{
/* NOTE that actually innobase_register_stmt() registers also
the transaction in the AUTOCOMMIT=1 mode. */
@@ -1352,7 +1547,7 @@ AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/
-/**********************************************************************
+/******************************************************************//**
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.
@@ -1369,24 +1564,23 @@ at the start of a SELECT processing. Then the calling thread cannot be
holding any InnoDB semaphores. The calling thread is holding the
query cache mutex, and this function will reserver the InnoDB kernel mutex.
Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
-the InnoDB kernel mutex. */
+the InnoDB kernel mutex.
+@return TRUE if permitted, FALSE if not; note that the value FALSE
+does not mean we should invalidate the query cache: invalidation is
+called explicitly */
static
my_bool
innobase_query_caching_of_table_permitted(
/*======================================*/
- /* out: TRUE if permitted, FALSE if not;
- note that the value FALSE does not mean
- we should invalidate the query cache:
- invalidation is called explicitly */
- THD* thd, /* in: thd of the user who is trying to
+ THD* thd, /*!< in: thd of the user who is trying to
store a result to the query cache or
retrieve it */
- char* full_name, /* in: concatenation of database name,
- the null character '\0', and the table
+ char* full_name, /*!< in: concatenation of database name,
+ the null character NUL, and the table
name */
- uint full_name_len, /* in: length of the full name, i.e.
+ uint full_name_len, /*!< in: length of the full name, i.e.
len(dbname) + len(tablename) + 1 */
- ulonglong *unused) /* unused for this engine */
+ ulonglong *unused) /*!< unused for this engine */
{
ibool is_autocommit;
trx_t* trx;
@@ -1408,9 +1602,9 @@ innobase_query_caching_of_table_permitted(
"search, latch though calling "
"innobase_query_caching_of_table_permitted.");
- mutex_enter_noninline(&kernel_mutex);
+ mutex_enter(&kernel_mutex);
trx_print(stderr, trx, 1024);
- mutex_exit_noninline(&kernel_mutex);
+ mutex_exit(&kernel_mutex);
}
innobase_release_stat_resources(trx);
@@ -1476,21 +1670,21 @@ innobase_query_caching_of_table_permitted(
return((my_bool)FALSE);
}
-/*********************************************************************
-Invalidates the MySQL query cache for the table.
-NOTE that the exact prototype of this function has to be in
-/innobase/row/row0ins.c! */
-extern "C"
+/*****************************************************************//**
+Invalidates the MySQL query cache for the table. */
+extern "C" UNIV_INTERN
void
innobase_invalidate_query_cache(
/*============================*/
- trx_t* trx, /* in: transaction which modifies the table */
- char* full_name, /* in: concatenation of database name, null
- char '\0', table name, null char'\0';
- NOTE that in Windows this is always
- in LOWER CASE! */
- ulint full_name_len) /* in: full name length where also the null
- chars count */
+ trx_t* trx, /*!< in: transaction which
+ modifies the table */
+ const char* full_name, /*!< in: concatenation of
+ database name, null char NUL,
+ table name, null char NUL;
+ NOTE that in Windows this is
+ always in LOWER CASE! */
+ ulint full_name_len) /*!< in: full name length where
+ also the null chars count */
{
/* Note that the sync0sync.h rank of the query cache mutex is just
above the InnoDB kernel mutex. The caller of this function must not
@@ -1499,7 +1693,7 @@ innobase_invalidate_query_cache(
/* Argument TRUE below means we are using transactions */
#ifdef HAVE_QUERY_CACHE
mysql_query_cache_invalidate4((THD*) trx->mysql_thd,
- (const char*) full_name,
+ full_name,
(uint32) full_name_len,
TRUE);
#endif
@@ -1607,7 +1801,7 @@ no_quote:
Convert a table or index name to the MySQL system_charset_info (UTF-8)
and quote it if needed.
@return pointer to the end of buf */
-extern "C"
+extern "C" UNIV_INTERN
char*
innobase_convert_name(
/*==================*/
@@ -1639,6 +1833,17 @@ innobase_convert_name(
- (slash - id) - 1,
thd, TRUE);
}
+ } else if (UNIV_UNLIKELY(*id == TEMP_INDEX_PREFIX)) {
+ /* Temporary index name (smart ALTER TABLE) */
+ const char temp_index_suffix[]= "--temporary--";
+
+ s = innobase_convert_identifier(buf, buflen, id + 1, idlen - 1,
+ thd, FALSE);
+ if (s - buf + (sizeof temp_index_suffix - 1) < buflen) {
+ memcpy(s, temp_index_suffix,
+ sizeof temp_index_suffix - 1);
+ s += sizeof temp_index_suffix - 1;
+ }
} else {
no_db_name:
s = innobase_convert_identifier(buf, buflen, id, idlen,
@@ -1649,38 +1854,38 @@ no_db_name:
}
-/**************************************************************************
-Determines if the currently running transaction has been interrupted. */
-extern "C"
+/**********************************************************************//**
+Determines if the currently running transaction has been interrupted.
+@return TRUE if interrupted */
+extern "C" UNIV_INTERN
ibool
trx_is_interrupted(
/*===============*/
- /* out: TRUE if interrupted */
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
}
-/******************************************************************
+/**************************************************************//**
Resets some fields of a prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
static
void
reset_template(
/*===========*/
- row_prebuilt_t* prebuilt) /* in/out: prebuilt struct */
+ row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */
{
prebuilt->keep_other_fields_on_keyread = 0;
prebuilt->read_just_key = 0;
}
-/*********************************************************************
+/*****************************************************************//**
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
fetch next etc. This function inits the necessary things even after a
transaction commit. */
-
+UNIV_INTERN
void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
@@ -1699,7 +1904,7 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/* If the transaction is not started yet, start it */
- trx_start_if_not_started_noninline(prebuilt->trx);
+ trx_start_if_not_started(prebuilt->trx);
/* Assign a read view if the transaction does not have it yet */
@@ -1736,19 +1941,20 @@ ha_innobase::init_table_handle_for_HANDLER(void)
reset_template(prebuilt);
}
-/*************************************************************************
-Opens an InnoDB database. */
+/*********************************************************************//**
+Opens an InnoDB database.
+@return 0 on success, error code on failure */
static
int
innobase_init(
/*==========*/
- /* out: 0 on success, error code on failure */
- void *p) /* in: InnoDB handlerton */
+ void *p) /*!< in: InnoDB handlerton */
{
- static char current_dir[3]; /* Set if using current lib */
+ static char current_dir[3]; /*!< Set if using current lib */
int err;
bool ret;
char *default_path;
+ uint format_id;
DBUG_ENTER("innobase_init");
handlerton *innobase_hton= (handlerton *)p;
@@ -1778,6 +1984,7 @@ innobase_init(
innobase_hton->show_status=innobase_show_status;
innobase_hton->flags=HTON_NO_FLAGS;
innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
+ innobase_hton->alter_table_flags = innobase_alter_table_flags;
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
@@ -1871,16 +2078,12 @@ innobase_init(
MYF(MY_FAE));
ret = (bool) srv_parse_data_file_paths_and_sizes(
- internal_innobase_data_file_path,
- &srv_data_file_names,
- &srv_data_file_sizes,
- &srv_data_file_is_raw_partition,
- &srv_n_data_files,
- &srv_auto_extend_last_data_file,
- &srv_last_file_size_max);
+ internal_innobase_data_file_path);
if (ret == FALSE) {
sql_print_error(
"InnoDB: syntax error in innodb_data_file_path");
+mem_free_and_error:
+ srv_free_paths_and_sizes();
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
goto error;
@@ -1905,21 +2108,96 @@ innobase_init(
#endif /* UNIG_LOG_ARCHIVE */
ret = (bool)
- srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
- &srv_log_group_home_dirs);
+ srv_parse_log_group_home_dirs(innobase_log_group_home_dir);
if (ret == FALSE || innobase_mirrored_log_groups != 1) {
sql_print_error("syntax error in innodb_log_group_home_dir, or a "
"wrong number of mirrored log groups");
- my_free(internal_innobase_data_file_path,
- MYF(MY_ALLOW_ZERO_PTR));
- goto error;
+ goto mem_free_and_error;
+ }
+
+ /* Validate the file format by animal name */
+ if (innobase_file_format_name != NULL) {
+
+ format_id = innobase_file_format_name_lookup(
+ innobase_file_format_name);
+
+ if (format_id > DICT_TF_FORMAT_MAX) {
+
+ sql_print_error("InnoDB: wrong innodb_file_format.");
+
+ goto mem_free_and_error;
+ }
+ } else {
+ /* Set it to the default file format id. Though this
+ should never happen. */
+ format_id = 0;
+ }
+
+ srv_file_format = format_id;
+
+ /* Given the type of innobase_file_format_name we have little
+ choice but to cast away the constness from the returned name.
+ innobase_file_format_name is used in the MySQL set variable
+ interface and so can't be const. */
+
+ innobase_file_format_name =
+ (char*) trx_sys_file_format_id_to_name(format_id);
+
+ /* Process innobase_file_format_check variable */
+ ut_a(innobase_file_format_check != NULL);
+
+ /* As a side effect it will set srv_check_file_format_at_startup
+ on valid input. First we check for "on"/"off". */
+ if (!innobase_file_format_check_on_off(innobase_file_format_check)) {
+
+ /* Did the user specify a format name that we support ?
+ As a side effect it will update the variable
+ srv_check_file_format_at_startup */
+ if (innobase_file_format_validate_and_set(
+ innobase_file_format_check) < 0) {
+
+ sql_print_error("InnoDB: invalid "
+ "innodb_file_format_check value: "
+ "should be either 'on' or 'off' or "
+ "any value up to %s or its "
+ "equivalent numeric id",
+ trx_sys_file_format_id_to_name(
+ DICT_TF_FORMAT_MAX));
+
+ goto mem_free_and_error;
+ }
}
+ if (innobase_change_buffering) {
+ ulint use;
+
+ for (use = 0;
+ use < UT_ARR_SIZE(innobase_change_buffering_values);
+ use++) {
+ if (!innobase_strcasecmp(
+ innobase_change_buffering,
+ innobase_change_buffering_values[use])) {
+ ibuf_use = (ibuf_use_t) use;
+ goto innobase_change_buffering_inited_ok;
+ }
+ }
+
+ sql_print_error("InnoDB: invalid value "
+ "innodb_file_format_check=%s",
+ innobase_change_buffering);
+ goto mem_free_and_error;
+ }
+
+innobase_change_buffering_inited_ok:
+ ut_a((ulint) ibuf_use < UT_ARR_SIZE(innobase_change_buffering_values));
+ innobase_change_buffering = (char*)
+ innobase_change_buffering_values[ibuf_use];
+
/* --------------------------------------------------*/
- srv_file_flush_method_str = innobase_unix_file_flush_method;
+ srv_file_flush_method_str = innobase_file_flush_method;
srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
srv_n_log_files = (ulint) innobase_log_files_in_group;
@@ -1930,28 +2208,14 @@ innobase_init(
#endif /* UNIV_LOG_ARCHIVE */
srv_log_buffer_size = (ulint) innobase_log_buffer_size;
- /* We set srv_pool_size here in units of 1 kB. InnoDB internally
- changes the value so that it becomes the number of database pages. */
-
- if (innobase_buffer_pool_awe_mem_mb == 0) {
- srv_pool_size = (ulint)(innobase_buffer_pool_size / 1024);
- } else {
- srv_use_awe = TRUE;
- srv_pool_size = (ulint)
- (1024 * innobase_buffer_pool_awe_mem_mb);
- srv_awe_window_size = (ulint) innobase_buffer_pool_size;
-
- /* Note that what the user specified as
- innodb_buffer_pool_size is actually the AWE memory window
- size in this case, and the real buffer pool size is
- determined by .._awe_mem_mb. */
- }
+ srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
srv_n_file_io_threads = (ulint) innobase_file_io_threads;
+ srv_n_read_io_threads = (ulint) innobase_read_io_threads;
+ srv_n_write_io_threads = (ulint) innobase_write_io_threads;
- srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
srv_force_recovery = (ulint) innobase_force_recovery;
srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
@@ -1964,15 +2228,11 @@ innobase_init(
row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
- srv_file_per_table = (ibool) innobase_file_per_table;
srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
srv_max_n_open_files = (ulint) innobase_open_files;
srv_innodb_status = (ibool) innobase_create_status_file;
- srv_use_adaptive_hash_indexes =
- (ibool) innobase_adaptive_hash_index;
-
srv_print_verbose_log = mysqld_embedded ? 0 : 1;
/* Store the default charset-collation number of this MySQL
@@ -1989,9 +2249,11 @@ innobase_init(
and consequently we do not need to know the ordering internally in
InnoDB. */
- ut_a(0 == strcmp((char*)my_charset_latin1.name,
- (char*)"latin1_swedish_ci"));
- memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
+ ut_a(0 == strcmp(my_charset_latin1.name, "latin1_swedish_ci"));
+ srv_latin1_ordering = my_charset_latin1.sort_order;
+
+ innobase_old_blocks_pct = buf_LRU_old_ratio_update(
+ innobase_old_blocks_pct, FALSE);
innobase_commit_concurrency_init_default();
@@ -2001,18 +2263,13 @@ innobase_init(
modules, we check at run time that the size is the same in
these compilation modules. */
- srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);
-
err = innobase_start_or_create_for_mysql();
if (err != DB_SUCCESS) {
- my_free(internal_innobase_data_file_path,
- MYF(MY_ALLOW_ZERO_PTR));
- goto error;
+ goto mem_free_and_error;
}
- (void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
- (hash_get_key) innobase_get_key, 0, 0);
+ innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
@@ -2020,23 +2277,36 @@ innobase_init(
pthread_mutex_init(&analyze_mutex, MY_MUTEX_INIT_FAST);
pthread_cond_init(&commit_cond, NULL);
innodb_inited= 1;
+#ifdef MYSQL_DYNAMIC_PLUGIN
+ if (innobase_hton != p) {
+ innobase_hton = reinterpret_cast<handlerton*>(p);
+ *innobase_hton = *innodb_hton_ptr;
+ }
+#endif /* MYSQL_DYNAMIC_PLUGIN */
+
+ /* Get the current high water mark format. */
+ innobase_file_format_check = (char*) trx_sys_file_format_max_get();
DBUG_RETURN(FALSE);
error:
DBUG_RETURN(TRUE);
}
-/***********************************************************************
-Closes an InnoDB database. */
+/*******************************************************************//**
+Closes an InnoDB database.
+@return TRUE if error */
static
int
-innobase_end(handlerton *hton, ha_panic_function type)
-/*==============*/
- /* out: TRUE if error */
+innobase_end(
+/*=========*/
+ handlerton* hton, /*!< in/out: InnoDB handlerton */
+ ha_panic_function type __attribute__((unused)))
+ /*!< in: ha_panic() parameter */
{
int err= 0;
DBUG_ENTER("innobase_end");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
#ifdef __NETWARE__ /* some special cleanup for NetWare */
if (nw_panic) {
@@ -2047,10 +2317,12 @@ innobase_end(handlerton *hton, ha_panic_function type)
srv_fast_shutdown = (ulint) innobase_fast_shutdown;
innodb_inited = 0;
+ hash_table_free(innobase_open_tables);
+ innobase_open_tables = NULL;
if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
err = 1;
}
- hash_free(&innobase_open_tables);
+ srv_free_paths_and_sizes();
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
@@ -2064,31 +2336,48 @@ innobase_end(handlerton *hton, ha_panic_function type)
DBUG_RETURN(err);
}
-/********************************************************************
+/****************************************************************//**
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint. */
+the logs, and the name of this function should be innobase_checkpoint.
+@return TRUE if error */
static
bool
-innobase_flush_logs(handlerton *hton)
-/*=====================*/
- /* out: TRUE if error */
+innobase_flush_logs(
+/*================*/
+ handlerton* hton) /*!< in/out: InnoDB handlerton */
{
bool result = 0;
DBUG_ENTER("innobase_flush_logs");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
log_buffer_flush_to_disk();
DBUG_RETURN(result);
}
-/*********************************************************************
+/****************************************************************//**
+Return alter table flags supported in an InnoDB database. */
+static
+uint
+innobase_alter_table_flags(
+/*=======================*/
+ uint flags)
+{
+ return(HA_ONLINE_ADD_INDEX_NO_WRITES
+ | HA_ONLINE_DROP_INDEX_NO_WRITES
+ | HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES
+ | HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES
+ | HA_ONLINE_ADD_PK_INDEX_NO_WRITES);
+}
+
+/*****************************************************************//**
Commits a transaction in an InnoDB database. */
static
void
innobase_commit_low(
/*================*/
- trx_t* trx) /* in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
{
if (trx->conc_state == TRX_NOT_STARTED) {
@@ -2098,23 +2387,24 @@ innobase_commit_low(
trx_commit_for_mysql(trx);
}
-/*********************************************************************
+/*****************************************************************//**
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
-have one. */
+have one.
+@return 0 */
static
int
innobase_start_trx_and_assign_read_view(
/*====================================*/
- /* out: 0 */
- handlerton *hton, /* in: Innodb handlerton */
- THD* thd) /* in: MySQL thread handle of the user for whom
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd) /*!< in: MySQL thread handle of the user for whom
the transaction should be committed */
{
trx_t* trx;
DBUG_ENTER("innobase_start_trx_and_assign_read_view");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
/* Create a new trx struct for thd, if it does not yet have one */
@@ -2128,7 +2418,7 @@ innobase_start_trx_and_assign_read_view(
/* If the transaction is not started yet, start it */
- trx_start_if_not_started_noninline(trx);
+ trx_start_if_not_started(trx);
/* Assign a read view if the transaction does not have it yet */
@@ -2137,37 +2427,35 @@ innobase_start_trx_and_assign_read_view(
/* Set the MySQL flag to mark that there is an active transaction */
if (trx->active_trans == 0) {
- innobase_register_trx_and_stmt(hton, current_thd);
+ innobase_register_trx_and_stmt(hton, thd);
trx->active_trans = 1;
}
DBUG_RETURN(0);
}
-/*********************************************************************
+/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
-ended. */
+ended.
+@return 0 */
static
int
innobase_commit(
/*============*/
- /* out: 0 */
- handlerton *hton, /* in: Innodb handlerton */
- THD* thd, /* in: MySQL thread handle of the user for whom
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
the transaction should be committed */
- bool all) /* in: TRUE - commit transaction
+ bool all) /*!< in: TRUE - commit transaction
FALSE - the current SQL statement ended */
{
trx_t* trx;
DBUG_ENTER("innobase_commit");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
DBUG_PRINT("trans", ("ending transaction"));
trx = check_trx_exists(thd);
- /* Update the info whether we should skip XA steps that eat CPU time */
- trx->support_xa = THDVAR(thd, support_xa);
-
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
@@ -2222,10 +2510,28 @@ retry:
}
}
+ /* The following calls to read the MySQL binary log
+ file name and the position return consistent results:
+ 1) Other InnoDB transactions cannot intervene between
+ these calls as we are holding prepare_commit_mutex.
+ 2) Binary logging of other engines is not relevant
+ to InnoDB as all InnoDB requires is that committing
+ InnoDB transactions appear in the same order in the
+ MySQL binary log as they appear in InnoDB logs.
+ 3) A MySQL log file rotation cannot happen because
+ MySQL protects against this by having a counter of
+ transactions in prepared state and it only allows
+ a rotation when the counter drops to zero. See
+ LOCK_prep_xids and COND_prep_xids in log.cc. */
trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos();
+ trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush after releasing the
+ prepare_commit_mutex. */
+ trx->flush_log_later = TRUE;
innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
if (innobase_commit_concurrency > 0) {
pthread_mutex_lock(&commit_cond_m);
@@ -2239,6 +2545,8 @@ retry:
pthread_mutex_unlock(&prepare_commit_mutex);
}
+ /* Now do a write + flush of logs. */
+ trx_commit_complete_for_mysql(trx);
trx->active_trans = 0;
} else {
@@ -2272,30 +2580,28 @@ retry:
DBUG_RETURN(0);
}
-/*********************************************************************
-Rolls back a transaction or the latest SQL statement. */
+/*****************************************************************//**
+Rolls back a transaction or the latest SQL statement.
+@return 0 or error number */
static
int
innobase_rollback(
/*==============*/
- /* out: 0 or error number */
- handlerton *hton, /* in: Innodb handlerton */
- THD* thd, /* in: handle to the MySQL thread of the user
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread of the user
whose transaction should be rolled back */
- bool all) /* in: TRUE - commit transaction
+ bool all) /*!< in: TRUE - commit transaction
FALSE - the current SQL statement ended */
{
int error = 0;
trx_t* trx;
DBUG_ENTER("innobase_rollback");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
DBUG_PRINT("trans", ("aborting transaction"));
trx = check_trx_exists(thd);
- /* Update the info whether we should skip XA steps that eat CPU time */
- trx->support_xa = THDVAR(thd, support_xa);
-
/* Release a possible FIFO ticket and search latch. Since we will
reserve the kernel mutex, we have to release the search system latch
first to obey the latching order. */
@@ -2319,17 +2625,17 @@ innobase_rollback(
error = trx_rollback_last_sql_stat_for_mysql(trx);
}
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+ DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
}
-/*********************************************************************
-Rolls back a transaction */
+/*****************************************************************//**
+Rolls back a transaction
+@return 0 or error number */
static
int
innobase_rollback_trx(
/*==================*/
- /* out: 0 or error number */
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
int error = 0;
@@ -2350,28 +2656,29 @@ innobase_rollback_trx(
error = trx_rollback_for_mysql(trx);
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+ DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
}
-/*********************************************************************
-Rolls back a transaction to a savepoint. */
+/*****************************************************************//**
+Rolls back a transaction to a savepoint.
+@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
+given name */
static
int
innobase_rollback_to_savepoint(
/*===========================*/
- /* out: 0 if success, HA_ERR_NO_SAVEPOINT if
- no savepoint with the given name */
- handlerton *hton, /* in: Innodb handlerton */
- THD* thd, /* in: handle to the MySQL thread of the user
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread of the user
whose transaction should be rolled back */
- void* savepoint) /* in: savepoint data */
+ void* savepoint) /*!< in: savepoint data */
{
- ib_longlong mysql_binlog_cache_pos;
+ ib_int64_t mysql_binlog_cache_pos;
int error = 0;
trx_t* trx;
char name[64];
DBUG_ENTER("innobase_rollback_to_savepoint");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
trx = check_trx_exists(thd);
@@ -2387,27 +2694,28 @@ innobase_rollback_to_savepoint(
error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
&mysql_binlog_cache_pos);
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+ DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
}
-/*********************************************************************
-Release transaction savepoint name. */
+/*****************************************************************//**
+Release transaction savepoint name.
+@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
+given name */
static
int
innobase_release_savepoint(
/*=======================*/
- /* out: 0 if success, HA_ERR_NO_SAVEPOINT if
- no savepoint with the given name */
- handlerton* hton, /* in: handlerton for Innodb */
- THD* thd, /* in: handle to the MySQL thread of the user
+ handlerton* hton, /*!< in: handlerton for Innodb */
+ THD* thd, /*!< in: handle to the MySQL thread of the user
whose transaction should be rolled back */
- void* savepoint) /* in: savepoint data */
+ void* savepoint) /*!< in: savepoint data */
{
int error = 0;
trx_t* trx;
char name[64];
DBUG_ENTER("innobase_release_savepoint");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
trx = check_trx_exists(thd);
@@ -2417,24 +2725,25 @@ innobase_release_savepoint(
error = (int) trx_release_savepoint_for_mysql(trx, name);
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+ DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
}
-/*********************************************************************
-Sets a transaction savepoint. */
+/*****************************************************************//**
+Sets a transaction savepoint.
+@return always 0, that is, always succeeds */
static
int
innobase_savepoint(
/*===============*/
- /* out: always 0, that is, always succeeds */
- handlerton* hton, /* in: handle to the Innodb handlerton */
- THD* thd, /* in: handle to the MySQL thread */
- void* savepoint) /* in: savepoint data */
+ handlerton* hton, /*!< in: handle to the Innodb handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread */
+ void* savepoint) /*!< in: savepoint data */
{
int error = 0;
trx_t* trx;
DBUG_ENTER("innobase_savepoint");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
/*
In the autocommit mode there is no sense to set a savepoint
@@ -2461,20 +2770,20 @@ innobase_savepoint(
char name[64];
longlong2str((ulint)savepoint,name,36);
- error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
+ error = (int) trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
+ DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
}
-/*********************************************************************
-Frees a possible InnoDB trx object associated with the current THD. */
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return 0 or error number */
static
int
innobase_close_connection(
/*======================*/
- /* out: 0 or error number */
- handlerton* hton, /* in: innobase handlerton */
- THD* thd) /* in: handle to the MySQL thread of the user
+ handlerton* hton, /*!< in: innobase handlerton */
+ THD* thd) /*!< in: handle to the MySQL thread of the user
whose resources should be free'd */
{
trx_t* trx;
@@ -2511,23 +2820,41 @@ innobase_close_connection(
}
-/*****************************************************************************
+/*************************************************************************//**
** InnoDB database tables
*****************************************************************************/
-/********************************************************************
-Get the record format from the data dictionary. */
+/****************************************************************//**
+Get the record format from the data dictionary.
+@return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
+ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
+UNIV_INTERN
enum row_type
ha_innobase::get_row_type() const
/*=============================*/
- /* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
{
if (prebuilt && prebuilt->table) {
- if (dict_table_is_comp_noninline(prebuilt->table)) {
- return(ROW_TYPE_COMPACT);
- } else {
+ const ulint flags = prebuilt->table->flags;
+
+ if (UNIV_UNLIKELY(!flags)) {
return(ROW_TYPE_REDUNDANT);
}
+
+ ut_ad(flags & DICT_TF_COMPACT);
+
+ switch (flags & DICT_TF_FORMAT_MASK) {
+ case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT:
+ return(ROW_TYPE_COMPACT);
+ case DICT_TF_FORMAT_ZIP << DICT_TF_FORMAT_SHIFT:
+ if (flags & DICT_TF_ZSSIZE_MASK) {
+ return(ROW_TYPE_COMPRESSED);
+ } else {
+ return(ROW_TYPE_DYNAMIC);
+ }
+#if DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX
+# error "DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX"
+#endif
+ }
}
ut_ad(0);
return(ROW_TYPE_NOT_USED);
@@ -2535,36 +2862,137 @@ ha_innobase::get_row_type() const
-/********************************************************************
-Get the table flags to use for the statement. */
+/****************************************************************//**
+Get the table flags to use for the statement.
+@return table flags */
+UNIV_INTERN
handler::Table_flags
ha_innobase::table_flags() const
+/*============================*/
{
/* Need to use tx_isolation here since table flags is (also)
called before prebuilt is inited. */
- ulong const tx_isolation = thd_tx_isolation(current_thd);
+ ulong const tx_isolation = thd_tx_isolation(ha_thd());
if (tx_isolation <= ISO_READ_COMMITTED)
return int_table_flags;
return int_table_flags | HA_BINLOG_STMT_CAPABLE;
}
-/********************************************************************
+/****************************************************************//**
Gives the file extension of an InnoDB single-table tablespace. */
static const char* ha_innobase_exts[] = {
".ibd",
NullS
};
+/****************************************************************//**
+Returns the table type (storage engine name).
+@return table type */
+UNIV_INTERN
+const char*
+ha_innobase::table_type() const
+/*===========================*/
+{
+ return(innobase_hton_name);
+}
+
+/****************************************************************//**
+Returns the index type. */
+UNIV_INTERN
+const char*
+ha_innobase::index_type(
+/*====================*/
+ uint)
+ /*!< out: index type */
+{
+ return("BTREE");
+}
+
+/****************************************************************//**
+Returns the table file name extension.
+@return file extension string */
+UNIV_INTERN
const char**
ha_innobase::bas_ext() const
/*========================*/
- /* out: file extension string */
{
- return ha_innobase_exts;
+ return(ha_innobase_exts);
}
+/****************************************************************//**
+Returns the operations supported for indexes.
+@return flags of supported operations */
+UNIV_INTERN
+ulong
+ha_innobase::index_flags(
+/*=====================*/
+ uint,
+ uint,
+ bool)
+const
+{
+ return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
+ | HA_READ_RANGE | HA_KEYREAD_ONLY);
+}
-/*********************************************************************
+/****************************************************************//**
+Returns the maximum number of keys.
+@return MAX_KEY */
+UNIV_INTERN
+uint
+ha_innobase::max_supported_keys() const
+/*===================================*/
+{
+ return(MAX_KEY);
+}
+
+/****************************************************************//**
+Returns the maximum key length.
+@return maximum supported key length, in bytes */
+UNIV_INTERN
+uint
+ha_innobase::max_supported_key_length() const
+/*=========================================*/
+{
+ /* An InnoDB page must store >= 2 keys; a secondary key record
+ must also contain the primary key value: max key length is
+ therefore set to slightly less than 1 / 4 of page size which
+ is 16 kB; but currently MySQL does not work with keys whose
+ size is > MAX_KEY_LENGTH */
+ return(3500);
+}
+
+/****************************************************************//**
+Returns the key map of keys that are usable for scanning.
+@return key_map_full */
+UNIV_INTERN
+const key_map*
+ha_innobase::keys_to_use_for_scanning()
+{
+ return(&key_map_full);
+}
+
+/****************************************************************//**
+Determines if table caching is supported.
+@return HA_CACHE_TBL_ASKTRANSACT */
+UNIV_INTERN
+uint8
+ha_innobase::table_cache_type()
+{
+ return(HA_CACHE_TBL_ASKTRANSACT);
+}
+
+/****************************************************************//**
+Determines if the primary key is clustered index.
+@return true */
+UNIV_INTERN
+bool
+ha_innobase::primary_key_is_clustered()
+{
+ return(true);
+}
+
+/*****************************************************************//**
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
test/mytable. On Windows normalization puts both the database name and the
@@ -2573,9 +3001,9 @@ static
void
normalize_table_name(
/*=================*/
- char* norm_name, /* out: normalized name as a
+ char* norm_name, /*!< out: normalized name as a
null-terminated string */
- const char* name) /* in: table name string */
+ const char* name) /*!< in: table name string */
{
char* name_ptr;
char* db_ptr;
@@ -2610,19 +3038,19 @@ normalize_table_name(
#endif
}
-/************************************************************************
+/********************************************************************//**
Set the autoinc column max value. This should only be called once from
-ha_innobase::open(). Therefore there's no need for a covering lock. */
-
-ulong
+ha_innobase::open(). Therefore there's no need for a covering lock.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
ha_innobase::innobase_initialize_autoinc()
/*======================================*/
{
dict_index_t* index;
ulonglong auto_inc;
const char* col_name;
- ulint error = DB_SUCCESS;
- dict_table_t* innodb_table = prebuilt->table;
+ ulint error;
col_name = table->found_next_number_field->field_name;
index = innobase_get_index(table->s->next_number_index);
@@ -2630,15 +3058,15 @@ ha_innobase::innobase_initialize_autoinc()
/* Execute SELECT MAX(col_name) FROM TABLE; */
error = row_search_max_autoinc(index, col_name, &auto_inc);
- if (error == DB_SUCCESS) {
+ switch (error) {
+ case DB_SUCCESS:
- /* At the this stage we dont' know the increment
+ /* At the this stage we don't know the increment
or the offset, so use default inrement of 1. */
++auto_inc;
+ break;
- dict_table_autoinc_initialize(innodb_table, auto_inc);
-
- } else if (error == DB_RECORD_NOT_FOUND) {
+ case DB_RECORD_NOT_FOUND:
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: MySQL and InnoDB data "
"dictionaries are out of sync.\n"
@@ -2655,26 +3083,28 @@ ha_innobase::innobase_initialize_autoinc()
col_name, index->table->name);
auto_inc = 0xFFFFFFFFFFFFFFFFULL;
+ break;
- dict_table_autoinc_initialize(innodb_table, auto_inc);
+ default:
+ return(error);
+ }
- error = DB_SUCCESS;
- } /* else other errors are still fatal */
+ dict_table_autoinc_initialize(prebuilt->table, auto_inc);
- return(ulong(error));
+ return(DB_SUCCESS);
}
-/*********************************************************************
+/*****************************************************************//**
Creates and opens a handle to a table which already exists in an InnoDB
-database. */
-
+database.
+@return 1 if error, 0 if success */
+UNIV_INTERN
int
ha_innobase::open(
/*==============*/
- /* out: 1 if error, 0 if success */
- const char* name, /* in: table name */
- int mode, /* in: not used */
- uint test_if_locked) /* in: not used */
+ const char* name, /*!< in: table name */
+ int mode, /*!< in: not used */
+ uint test_if_locked) /*!< in: not used */
{
dict_table_t* ib_table;
char norm_name[1000];
@@ -2741,7 +3171,7 @@ retry:
if (is_part) {
sql_print_error("Failed to open table %s after "
- "%lu attemtps.\n", norm_name,
+ "%lu attempts.\n", norm_name,
retries);
}
@@ -2757,7 +3187,7 @@ retry:
"or, the table contains indexes that this "
"version of the engine\n"
"doesn't support.\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
+ "See " REFMAN "innodb-troubleshooting.html\n"
"how you can resolve the problem.\n",
norm_name);
free_share(share);
@@ -2773,14 +3203,14 @@ retry:
"Have you deleted the .ibd file from the "
"database directory under\nthe MySQL datadir, "
"or have you used DISCARD TABLESPACE?\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
+ "See " REFMAN "innodb-troubleshooting.html\n"
"how you can resolve the problem.\n",
norm_name);
free_share(share);
my_free(upd_buff, MYF(0));
my_errno = ENOENT;
- dict_table_decrement_handle_count(ib_table);
+ dict_table_decrement_handle_count(ib_table, FALSE);
DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
}
@@ -2849,12 +3279,21 @@ retry:
}
}
- stats.block_size = 16 * 1024; /* Index block size in InnoDB: used by MySQL
- in query optimization */
+ /* Index block size in InnoDB: used by MySQL in query optimization */
+ stats.block_size = 16 * 1024;
/* Init table lock structure */
thr_lock_data_init(&share->lock,&lock,(void*) 0);
+ if (prebuilt->table) {
+ /* We update the highest file format in the system table
+ space, if this table has higher file format setting. */
+
+ trx_sys_file_format_max_upgrade(
+ (const char**) &innobase_file_format_check,
+ dict_table_get_format(prebuilt->table));
+ }
+
info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
/* Only if the table has an AUTOINC column. */
@@ -2879,30 +3318,31 @@ retry:
DBUG_RETURN(0);
}
+UNIV_INTERN
uint
ha_innobase::max_supported_key_part_length() const
{
return(DICT_MAX_INDEX_COL_LEN - 1);
}
-/**********************************************************************
-Closes a handle to an InnoDB table. */
-
+/******************************************************************//**
+Closes a handle to an InnoDB table.
+@return 0 */
+UNIV_INTERN
int
ha_innobase::close(void)
/*====================*/
- /* out: 0 */
{
THD* thd;
DBUG_ENTER("ha_innobase::close");
- thd = current_thd; // avoid calling current_thd twice, it may be slow
+ thd = ha_thd();
if (thd != NULL) {
innobase_release_temporary_latches(ht, thd);
}
- row_prebuilt_free(prebuilt);
+ row_prebuilt_free(prebuilt, FALSE);
my_free(upd_buff, MYF(0));
free_share(share);
@@ -2917,30 +3357,30 @@ ha_innobase::close(void)
/* The following accessor functions should really be inside MySQL code! */
-/******************************************************************
-Gets field offset for a field in a table. */
-inline
+/**************************************************************//**
+Gets field offset for a field in a table.
+@return offset */
+static inline
uint
get_field_offset(
/*=============*/
- /* out: offset */
- TABLE* table, /* in: MySQL table object */
- Field* field) /* in: MySQL field object */
+ TABLE* table, /*!< in: MySQL table object */
+ Field* field) /*!< in: MySQL field object */
{
return((uint) (field->ptr - table->record[0]));
}
-/******************************************************************
+/**************************************************************//**
Checks if a field in a record is SQL NULL. Uses the record format
-information in table to track the null bit in record. */
+information in table to track the null bit in record.
+@return 1 if NULL, 0 otherwise */
static inline
uint
field_in_record_is_null(
/*====================*/
- /* out: 1 if NULL, 0 otherwise */
- TABLE* table, /* in: MySQL table object */
- Field* field, /* in: MySQL field object */
- char* record) /* in: a row in MySQL format */
+ TABLE* table, /*!< in: MySQL table object */
+ Field* field, /*!< in: MySQL field object */
+ char* record) /*!< in: a row in MySQL format */
{
int null_offset;
@@ -2960,16 +3400,16 @@ field_in_record_is_null(
return(0);
}
-/******************************************************************
+/**************************************************************//**
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
-inline
+static inline
void
set_field_in_record_to_null(
/*========================*/
- TABLE* table, /* in: MySQL table object */
- Field* field, /* in: MySQL field object */
- char* record) /* in: a row in MySQL format */
+ TABLE* table, /*!< in: MySQL table object */
+ Field* field, /*!< in: MySQL field object */
+ char* record) /*!< in: a row in MySQL format */
{
int null_offset;
@@ -2979,25 +3419,23 @@ set_field_in_record_to_null(
record[null_offset] = record[null_offset] | field->null_bit;
}
-extern "C" {
-/*****************************************************************
+/*************************************************************//**
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
of this function is in rem0cmp.c in InnoDB source code! If you change this
-function, remember to update the prototype there! */
-
+function, remember to update the prototype there!
+@return 1, 0, -1, if a is greater, equal, less than b, respectively */
+extern "C" UNIV_INTERN
int
innobase_mysql_cmp(
/*===============*/
- /* out: 1, 0, -1, if a is greater,
- equal, less than b, respectively */
- int mysql_type, /* in: MySQL type */
- uint charset_number, /* in: number of the charset */
- unsigned char* a, /* in: data field */
- unsigned int a_length, /* in: data field length,
+ int mysql_type, /*!< in: MySQL type */
+ uint charset_number, /*!< in: number of the charset */
+ const unsigned char* a, /*!< in: data field */
+ unsigned int a_length, /*!< in: data field length,
not UNIV_SQL_NULL */
- unsigned char* b, /* in: data field */
- unsigned int b_length) /* in: data field length,
+ const unsigned char* b, /*!< in: data field */
+ unsigned int b_length) /*!< in: data field length,
not UNIV_SQL_NULL */
{
CHARSET_INFO* charset;
@@ -3056,27 +3494,30 @@ innobase_mysql_cmp(
return(0);
}
default:
- assert(0);
+ ut_error;
}
return(0);
}
-}
-/******************************************************************
+/**************************************************************//**
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
-VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
-inline
+VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
+@return DATA_BINARY, DATA_VARCHAR, ... */
+extern "C" UNIV_INTERN
ulint
get_innobase_type_from_mysql_type(
/*==============================*/
- /* out: DATA_BINARY, DATA_VARCHAR, ... */
- ulint* unsigned_flag, /* out: DATA_UNSIGNED if an 'unsigned type';
- at least ENUM and SET, and unsigned integer
- types are 'unsigned types' */
- Field* field) /* in: MySQL field */
+ ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
+ 'unsigned type';
+ at least ENUM and SET,
+ and unsigned integer
+ types are 'unsigned types' */
+ const void* f) /*!< in: MySQL Field */
{
+ const class Field* field = reinterpret_cast<const class Field*>(f);
+
/* The following asserts try to check that the MySQL type code fits in
8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
the type */
@@ -3160,21 +3601,21 @@ get_innobase_type_from_mysql_type(
case MYSQL_TYPE_LONG_BLOB:
return(DATA_BLOB);
default:
- assert(0);
+ ut_error;
}
return(0);
}
-/***********************************************************************
+/*******************************************************************//**
Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
storage format. */
-inline
+static inline
void
innobase_write_to_2_little_endian(
/*==============================*/
- byte* buf, /* in: where to store */
- ulint val) /* in: value to write, must be < 64k */
+ byte* buf, /*!< in: where to store */
+ ulint val) /*!< in: value to write, must be < 64k */
{
ut_a(val < 256 * 256);
@@ -3182,31 +3623,31 @@ innobase_write_to_2_little_endian(
buf[1] = (byte)(val / 256);
}
-/***********************************************************************
+/*******************************************************************//**
Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
-storage format. */
-inline
+storage format.
+@return value */
+static inline
uint
innobase_read_from_2_little_endian(
/*===============================*/
- /* out: value */
- const uchar* buf) /* in: from where to read */
+ const uchar* buf) /*!< in: from where to read */
{
return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
}
-/***********************************************************************
-Stores a key value for a row to a buffer. */
-
+/*******************************************************************//**
+Stores a key value for a row to a buffer.
+@return key value length as stored in buff */
+UNIV_INTERN
uint
ha_innobase::store_key_val_for_row(
/*===============================*/
- /* out: key value length as stored in buff */
- uint keynr, /* in: key number */
- char* buff, /* in/out: buffer for the key value (in MySQL
+ uint keynr, /*!< in: key number */
+ char* buff, /*!< in/out: buffer for the key value (in MySQL
format) */
- uint buff_len,/* in: buffer length */
- const uchar* record)/* in: row in MySQL format */
+ uint buff_len,/*!< in: buffer length */
+ const uchar* record)/*!< in: row in MySQL format */
{
KEY* key_info = table->key_info + keynr;
KEY_PART_INFO* key_part = key_info->key_part;
@@ -3264,13 +3705,13 @@ ha_innobase::store_key_val_for_row(
if (mysql_type == MYSQL_TYPE_VARCHAR) {
/* >= 5.0.3 true VARCHAR */
- ulint lenlen;
- ulint len;
- byte* data;
- ulint key_len;
- ulint true_len;
+ ulint lenlen;
+ ulint len;
+ const byte* data;
+ ulint key_len;
+ ulint true_len;
CHARSET_INFO* cs;
- int error=0;
+ int error=0;
key_len = key_part->length;
@@ -3339,7 +3780,7 @@ ha_innobase::store_key_val_for_row(
ulint true_len;
int error=0;
ulint blob_len;
- byte* blob_data;
+ const byte* blob_data;
ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
@@ -3472,19 +3913,19 @@ ha_innobase::store_key_val_for_row(
DBUG_RETURN((uint)(buff - buff_start));
}
-/******************************************************************
+/**************************************************************//**
Builds a 'template' to the prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
static
void
build_template(
/*===========*/
- row_prebuilt_t* prebuilt, /* in/out: prebuilt struct */
- THD* thd, /* in: current user thread, used
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */
+ THD* thd, /*!< in: current user thread, used
only if templ_type is
ROW_MYSQL_REC_FIELDS */
- TABLE* table, /* in: MySQL table */
- uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or
+ TABLE* table, /*!< in: MySQL table */
+ uint templ_type) /*!< in: ROW_MYSQL_WHOLE_ROW or
ROW_MYSQL_REC_FIELDS */
{
dict_index_t* index;
@@ -3539,7 +3980,7 @@ build_template(
}
}
- clust_index = dict_table_get_first_index_noninline(prebuilt->table);
+ clust_index = dict_table_get_first_index(prebuilt->table);
if (templ_type == ROW_MYSQL_REC_FIELDS) {
index = prebuilt->index;
@@ -3559,8 +4000,7 @@ build_template(
if (!prebuilt->mysql_template) {
prebuilt->mysql_template = (mysql_row_templ_t*)
- mem_alloc_noninline(
- n_fields * sizeof(mysql_row_templ_t));
+ mem_alloc(n_fields * sizeof(mysql_row_templ_t));
}
prebuilt->template_type = templ_type;
@@ -3618,7 +4058,7 @@ include_field:
templ->col_no = i;
if (index == clust_index) {
- templ->rec_field_no = dict_col_get_clust_pos_noninline(
+ templ->rec_field_no = dict_col_get_clust_pos(
&index->table->cols[i], index);
} else {
templ->rec_field_no = dict_index_get_nth_col_pos(
@@ -3656,8 +4096,8 @@ include_field:
(((Field_varstring*)field)->length_bytes);
}
- templ->charset = dtype_get_charset_coll_noninline(
- index->table->cols[i].prtype);
+ templ->charset = dtype_get_charset_coll(
+ index->table->cols[i].prtype);
templ->mbminlen = index->table->cols[i].mbminlen;
templ->mbmaxlen = index->table->cols[i].mbmaxlen;
templ->is_unsigned = index->table->cols[i].prtype
@@ -3678,16 +4118,16 @@ skip_field:
for (i = 0; i < n_requested_fields; i++) {
templ = prebuilt->mysql_template + i;
- templ->rec_field_no = dict_col_get_clust_pos_noninline(
+ templ->rec_field_no = dict_col_get_clust_pos(
&index->table->cols[templ->col_no],
clust_index);
}
}
}
-/************************************************************************
+/********************************************************************//**
Get the upper limit of the MySQL integral and floating-point type. */
-
+UNIV_INTERN
ulonglong
ha_innobase::innobase_get_int_col_max_value(
/*========================================*/
@@ -3746,18 +4186,17 @@ ha_innobase::innobase_get_int_col_max_value(
return(max_value);
}
-/************************************************************************
+/********************************************************************//**
This special handling is really to overcome the limitations of MySQL's
binlogging. We need to eliminate the non-determinism that will arise in
INSERT ... SELECT type of statements, since MySQL binlog only stores the
min value of the autoinc interval. Once that is fixed we can get rid of
-the special lock handling.*/
-
-ulong
+the special lock handling.
+@return DB_SUCCESS if all OK else error code */
+UNIV_INTERN
+ulint
ha_innobase::innobase_lock_autoinc(void)
/*====================================*/
- /* out: DB_SUCCESS if all OK else
- error code */
{
ulint error = DB_SUCCESS;
@@ -3807,15 +4246,14 @@ ha_innobase::innobase_lock_autoinc(void)
return(ulong(error));
}
-/************************************************************************
-Reset the autoinc value in the table.*/
-
-ulong
+/********************************************************************//**
+Reset the autoinc value in the table.
+@return DB_SUCCESS if all went well else error code */
+UNIV_INTERN
+ulint
ha_innobase::innobase_reset_autoinc(
/*================================*/
- /* out: DB_SUCCESS if all went well
- else error code */
- ulonglong autoinc) /* in: value to store */
+ ulonglong autoinc) /*!< in: value to store */
{
ulint error;
@@ -3831,16 +4269,15 @@ ha_innobase::innobase_reset_autoinc(
return(ulong(error));
}
-/************************************************************************
+/********************************************************************//**
Store the autoinc value in the table. The autoinc value is only set if
-it's greater than the existing autoinc value in the table.*/
-
-ulong
+it's greater than the existing autoinc value in the table.
+@return DB_SUCCESS if all went well else error code */
+UNIV_INTERN
+ulint
ha_innobase::innobase_set_max_autoinc(
/*==================================*/
- /* out: DB_SUCCES if all went well
- else error code */
- ulonglong auto_inc) /* in: value to store */
+ ulonglong auto_inc) /*!< in: value to store */
{
ulint error;
@@ -3856,15 +4293,15 @@ ha_innobase::innobase_set_max_autoinc(
return(ulong(error));
}
-/************************************************************************
+/********************************************************************//**
Stores a row in an InnoDB database, to the table specified in this
-handle. */
-
+handle.
+@return error code */
+UNIV_INTERN
int
ha_innobase::write_row(
/*===================*/
- /* out: error code */
- uchar* record) /* in: a row in MySQL format */
+ uchar* record) /*!< in: a row in MySQL format */
{
ulint error = 0;
int error_result= 0;
@@ -3877,7 +4314,7 @@ ha_innobase::write_row(
if (prebuilt->trx != trx) {
sql_print_error("The transaction object for the table handle is at "
"%p, but for the current thread it is at %p",
- prebuilt->trx, trx);
+ (const void*) prebuilt->trx, (const void*) trx);
fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
@@ -3911,7 +4348,7 @@ ha_innobase::write_row(
being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
dict_table_t* src_table;
- ulint mode;
+ enum lock_mode mode;
num_write_row = 0;
@@ -4046,7 +4483,6 @@ no_commit:
case SQLCOM_INSERT_SELECT:
case SQLCOM_REPLACE_SELECT:
goto set_max_autoinc;
- break;
default:
break;
@@ -4060,29 +4496,24 @@ no_commit:
update the table upper limit. Note: last_value
will be 0 if get_auto_increment() was not called.*/
- if (auto_inc >= prebuilt->autoinc_last_value) {
+ if (auto_inc <= col_max_value
+ && auto_inc >= prebuilt->autoinc_last_value) {
set_max_autoinc:
- /* This should filter out the negative
- values set explicitly by the user. */
- if (auto_inc <= col_max_value) {
- ut_a(prebuilt->autoinc_increment > 0);
+ ut_a(prebuilt->autoinc_increment > 0);
- ulonglong need;
- ulonglong offset;
+ ulonglong need;
+ ulonglong offset;
- offset = prebuilt->autoinc_offset;
- need = prebuilt->autoinc_increment;
+ offset = prebuilt->autoinc_offset;
+ need = prebuilt->autoinc_increment;
- auto_inc = innobase_next_autoinc(
- auto_inc,
- need, offset, col_max_value);
+ auto_inc = innobase_next_autoinc(
+ auto_inc, need, offset, col_max_value);
- err = innobase_set_max_autoinc(
- auto_inc);
+ err = innobase_set_max_autoinc(auto_inc);
- if (err != DB_SUCCESS) {
- error = err;
- }
+ if (err != DB_SUCCESS) {
+ error = err;
}
}
break;
@@ -4092,7 +4523,9 @@ set_max_autoinc:
innodb_srv_conc_exit_innodb(prebuilt->trx);
report_error:
- error_result = convert_error_code_to_mysql((int) error, user_thd);
+ error_result = convert_error_code_to_mysql((int) error,
+ prebuilt->table->flags,
+ user_thd);
func_exit:
innobase_active_small();
@@ -4100,23 +4533,23 @@ func_exit:
DBUG_RETURN(error_result);
}
-/**************************************************************************
+/**********************************************************************//**
Checks which fields have changed in a row and stores information
-of them to an update vector. */
+of them to an update vector.
+@return error number or 0 */
static
int
calc_row_difference(
/*================*/
- /* out: error number or 0 */
- upd_t* uvect, /* in/out: update vector */
- uchar* old_row, /* in: old row in MySQL format */
- uchar* new_row, /* in: new row in MySQL format */
- struct st_table* table, /* in: table in MySQL data
+ upd_t* uvect, /*!< in/out: update vector */
+ uchar* old_row, /*!< in: old row in MySQL format */
+ uchar* new_row, /*!< in: new row in MySQL format */
+ struct st_table* table, /*!< in: table in MySQL data
dictionary */
- uchar* upd_buff, /* in: buffer to use */
- ulint buff_len, /* in: buffer length */
- row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */
- THD* thd) /* in: user thread */
+ uchar* upd_buff, /*!< in: buffer to use */
+ ulint buff_len, /*!< in: buffer length */
+ row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */
+ THD* thd) /*!< in: user thread */
{
uchar* original_upd_buff = upd_buff;
Field* field;
@@ -4125,9 +4558,9 @@ calc_row_difference(
ulint o_len;
ulint n_len;
ulint col_pack_len;
- byte* new_mysql_row_col;
- byte* o_ptr;
- byte* n_ptr;
+ const byte* new_mysql_row_col;
+ const byte* o_ptr;
+ const byte* n_ptr;
byte* buf;
upd_field_t* ufield;
ulint col_type;
@@ -4137,7 +4570,7 @@ calc_row_difference(
uint i;
n_fields = table->s->fields;
- clust_index = dict_table_get_first_index_noninline(prebuilt->table);
+ clust_index = dict_table_get_first_index(prebuilt->table);
/* We use upd_buff to convert changed fields */
buf = (byte*) upd_buff;
@@ -4145,8 +4578,8 @@ calc_row_difference(
for (i = 0; i < n_fields; i++) {
field = table->field[i];
- o_ptr = (byte*) old_row + get_field_offset(table, field);
- n_ptr = (byte*) new_row + get_field_offset(table, field);
+ o_ptr = (const byte*) old_row + get_field_offset(table, field);
+ n_ptr = (const byte*) new_row + get_field_offset(table, field);
/* Use new_mysql_row_col and col_pack_len save the values */
@@ -4216,8 +4649,8 @@ calc_row_difference(
/* Let us use a dummy dfield to make the conversion
from the MySQL column format to the InnoDB format */
- dict_col_copy_type_noninline(prebuilt->table->cols + i,
- &dfield.type);
+ dict_col_copy_type(prebuilt->table->cols + i,
+ dfield_get_type(&dfield));
if (n_len != UNIV_SQL_NULL) {
buf = row_mysql_store_col_in_innobase_format(
@@ -4226,17 +4659,15 @@ calc_row_difference(
TRUE,
new_mysql_row_col,
col_pack_len,
- dict_table_is_comp_noninline(
- prebuilt->table));
- ufield->new_val.data = dfield.data;
- ufield->new_val.len = dfield.len;
+ dict_table_is_comp(prebuilt->table));
+ dfield_copy_data(&ufield->new_val, &dfield);
} else {
- ufield->new_val.data = NULL;
- ufield->new_val.len = UNIV_SQL_NULL;
+ dfield_set_null(&ufield->new_val);
}
ufield->exp = NULL;
- ufield->field_no = dict_col_get_clust_pos_noninline(
+ ufield->orig_len = 0;
+ ufield->field_no = dict_col_get_clust_pos(
&prebuilt->table->cols[i], clust_index);
n_changed++;
}
@@ -4250,20 +4681,20 @@ calc_row_difference(
return(0);
}
-/**************************************************************************
+/**********************************************************************//**
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
TODO: currently InnoDB does not prevent the 'Halloween problem':
in a searched update a single row can get updated several times
-if its index columns are updated! */
-
+if its index columns are updated!
+@return error number or 0 */
+UNIV_INTERN
int
ha_innobase::update_row(
/*====================*/
- /* out: error number or 0 */
- const uchar* old_row, /* in: old row in MySQL format */
- uchar* new_row) /* in: new row in MySQL format */
+ const uchar* old_row, /*!< in: old row in MySQL format */
+ uchar* new_row) /*!< in: new row in MySQL format */
{
upd_t* uvect;
int error = 0;
@@ -4294,7 +4725,7 @@ ha_innobase::update_row(
/* This is not a delete */
prebuilt->upd_node->is_delete = FALSE;
- assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+ ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
innodb_srv_conc_enter_innodb(trx);
@@ -4342,7 +4773,8 @@ ha_innobase::update_row(
innodb_srv_conc_exit_innodb(trx);
- error = convert_error_code_to_mysql(error, user_thd);
+ error = convert_error_code_to_mysql(error,
+ prebuilt->table->flags, user_thd);
if (error == 0 /* success */
&& uvect->n_fields == 0 /* no columns were updated */) {
@@ -4362,14 +4794,14 @@ ha_innobase::update_row(
DBUG_RETURN(error);
}
-/**************************************************************************
-Deletes a row given as the parameter. */
-
+/**********************************************************************//**
+Deletes a row given as the parameter.
+@return error number or 0 */
+UNIV_INTERN
int
ha_innobase::delete_row(
/*====================*/
- /* out: error number or 0 */
- const uchar* record) /* in: a row in MySQL format */
+ const uchar* record) /*!< in: a row in MySQL format */
{
int error = 0;
trx_t* trx = thd_to_trx(user_thd);
@@ -4394,7 +4826,8 @@ ha_innobase::delete_row(
innodb_srv_conc_exit_innodb(trx);
- error = convert_error_code_to_mysql(error, user_thd);
+ error = convert_error_code_to_mysql(
+ error, prebuilt->table->flags, user_thd);
/* Tell the InnoDB server that there might be work for
utility threads: */
@@ -4404,11 +4837,11 @@ ha_innobase::delete_row(
DBUG_RETURN(error);
}
-/**************************************************************************
+/**********************************************************************//**
Removes a new lock set on a row, if it was not read optimistically. This can
be called after a row has been read in the processing of an UPDATE or a DELETE
query, if the option innodb_locks_unsafe_for_binlog is set. */
-
+UNIV_INTERN
void
ha_innobase::unlock_row(void)
/*=========================*/
@@ -4442,6 +4875,7 @@ ha_innobase::unlock_row(void)
}
/* See handler.h and row0mysql.h for docs on this function. */
+UNIV_INTERN
bool
ha_innobase::was_semi_consistent_read(void)
/*=======================================*/
@@ -4450,6 +4884,7 @@ ha_innobase::was_semi_consistent_read(void)
}
/* See handler.h and row0mysql.h for docs on this function. */
+UNIV_INTERN
void
ha_innobase::try_semi_consistent_read(bool yes)
/*===========================================*/
@@ -4470,27 +4905,25 @@ ha_innobase::try_semi_consistent_read(bool yes)
}
}
-/**********************************************************************
-Initializes a handle to use an index. */
-
+/******************************************************************//**
+Initializes a handle to use an index.
+@return 0 or error number */
+UNIV_INTERN
int
ha_innobase::index_init(
/*====================*/
- /* out: 0 or error number */
- uint keynr, /* in: key (index) number */
- bool sorted) /* in: 1 if result MUST be sorted according to index */
+ uint keynr, /*!< in: key (index) number */
+ bool sorted) /*!< in: 1 if result MUST be sorted according to index */
{
- int error = 0;
DBUG_ENTER("index_init");
- error = change_active_index(keynr);
-
- DBUG_RETURN(error);
+ DBUG_RETURN(change_active_index(keynr));
}
-/**********************************************************************
-Currently does nothing. */
-
+/******************************************************************//**
+Currently does nothing.
+@return 0 */
+UNIV_INTERN
int
ha_innobase::index_end(void)
/*========================*/
@@ -4501,10 +4934,10 @@ ha_innobase::index_end(void)
DBUG_RETURN(error);
}
-/*************************************************************************
+/*********************************************************************//**
Converts a search mode flag understood by MySQL to a flag understood
by InnoDB. */
-inline
+static inline
ulint
convert_search_mode_to_innobase(
/*============================*/
@@ -4606,18 +5039,17 @@ overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */
-/**************************************************************************
+/**********************************************************************//**
Positions an index cursor to the index specified in the handle. Fetches the
-row if any. */
-
+row if any.
+@return 0, HA_ERR_KEY_NOT_FOUND, or error number */
+UNIV_INTERN
int
ha_innobase::index_read(
/*====================*/
- /* out: 0, HA_ERR_KEY_NOT_FOUND,
- or error number */
- uchar* buf, /* in/out: buffer for the returned
+ uchar* buf, /*!< in/out: buffer for the returned
row */
- const uchar* key_ptr, /* in: key value; if this is NULL
+ const uchar* key_ptr, /*!< in: key value; if this is NULL
we position the cursor at the
start or end of index; this can
also contain an InnoDB row id, in
@@ -4626,8 +5058,8 @@ ha_innobase::index_read(
also be a prefix of a full key value,
and the last column can be a prefix
of a full column */
- uint key_len,/* in: key value length */
- enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
+ uint key_len,/*!< in: key value length */
+ enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
{
ulint mode;
dict_index_t* index;
@@ -4643,24 +5075,30 @@ ha_innobase::index_read(
index = prebuilt->index;
+ if (UNIV_UNLIKELY(index == NULL)) {
+ prebuilt->index_usable = FALSE;
+ DBUG_RETURN(HA_ERR_CRASHED);
+ }
+
/* Note that if the index for which the search template is built is not
necessarily prebuilt->index, but can also be the clustered index */
if (prebuilt->sql_stat_start) {
- build_template(prebuilt, user_thd, table,
- ROW_MYSQL_REC_FIELDS);
+ build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
}
if (key_ptr) {
/* Convert the search key value to InnoDB format into
prebuilt->search_tuple */
- row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
- (byte*) key_val_buff,
- (ulint)upd_and_key_val_buff_len,
- index,
- (byte*) key_ptr,
- (ulint) key_len, prebuilt->trx);
+ row_sel_convert_mysql_key_to_innobase(
+ prebuilt->search_tuple,
+ (byte*) key_val_buff,
+ (ulint)upd_and_key_val_buff_len,
+ index,
+ (byte*) key_ptr,
+ (ulint) key_len,
+ prebuilt->trx);
} else {
/* We position the cursor to the last or the first entry
in the index */
@@ -4673,10 +5111,12 @@ ha_innobase::index_read(
match_mode = 0;
if (find_flag == HA_READ_KEY_EXACT) {
+
match_mode = ROW_SEL_EXACT;
} else if (find_flag == HA_READ_PREFIX
- || find_flag == HA_READ_PREFIX_LAST) {
+ || find_flag == HA_READ_PREFIX_LAST) {
+
match_mode = ROW_SEL_EXACT_PREFIX;
}
@@ -4695,51 +5135,55 @@ ha_innobase::index_read(
ret = DB_UNSUPPORTED;
}
- if (ret == DB_SUCCESS) {
+ switch (ret) {
+ case DB_SUCCESS:
error = 0;
table->status = 0;
-
- } else if (ret == DB_RECORD_NOT_FOUND) {
+ break;
+ case DB_RECORD_NOT_FOUND:
error = HA_ERR_KEY_NOT_FOUND;
table->status = STATUS_NOT_FOUND;
-
- } else if (ret == DB_END_OF_INDEX) {
+ break;
+ case DB_END_OF_INDEX:
error = HA_ERR_KEY_NOT_FOUND;
table->status = STATUS_NOT_FOUND;
- } else {
- error = convert_error_code_to_mysql((int) ret, user_thd);
+ break;
+ default:
+ error = convert_error_code_to_mysql((int) ret,
+ prebuilt->table->flags,
+ user_thd);
table->status = STATUS_NOT_FOUND;
+ break;
}
DBUG_RETURN(error);
}
-/***********************************************************************
+/*******************************************************************//**
The following functions works like index_read, but it find the last
-row with the current key value or prefix. */
-
+row with the current key value or prefix.
+@return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
+UNIV_INTERN
int
ha_innobase::index_read_last(
/*=========================*/
- /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
- error code */
- uchar* buf, /* out: fetched row */
- const uchar* key_ptr,/* in: key value, or a prefix of a full
+ uchar* buf, /*!< out: fetched row */
+ const uchar* key_ptr,/*!< in: key value, or a prefix of a full
key value */
- uint key_len)/* in: length of the key val or prefix
+ uint key_len)/*!< in: length of the key val or prefix
in bytes */
{
return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
}
-/************************************************************************
-Get the index for a handle. Does not change active index.*/
-
+/********************************************************************//**
+Get the index for a handle. Does not change active index.
+@return NULL or index instance. */
+UNIV_INTERN
dict_index_t*
ha_innobase::innobase_get_index(
/*============================*/
- /* out: NULL or index instance. */
- uint keynr) /* in: use this index; MAX_KEY means always
+ uint keynr) /*!< in: use this index; MAX_KEY means always
clustered index, even if it was internally
generated by InnoDB */
{
@@ -4755,10 +5199,10 @@ ha_innobase::innobase_get_index(
if (keynr != MAX_KEY && table->s->keys > 0) {
key = table->key_info + keynr;
- index = dict_table_get_index_noninline(
- prebuilt->table, key->name);
+ index = dict_table_get_index_on_name(prebuilt->table,
+ key->name);
} else {
- index = dict_table_get_first_index_noninline(prebuilt->table);
+ index = dict_table_get_first_index(prebuilt->table);
}
if (!index) {
@@ -4772,14 +5216,14 @@ ha_innobase::innobase_get_index(
DBUG_RETURN(index);
}
-/************************************************************************
-Changes the active index of a handle. */
-
+/********************************************************************//**
+Changes the active index of a handle.
+@return 0 or error code */
+UNIV_INTERN
int
ha_innobase::change_active_index(
/*=============================*/
- /* out: 0 or error code */
- uint keynr) /* in: use this index; MAX_KEY means always clustered
+ uint keynr) /*!< in: use this index; MAX_KEY means always clustered
index, even if it was internally generated by
InnoDB */
{
@@ -4792,11 +5236,27 @@ ha_innobase::change_active_index(
prebuilt->index = innobase_get_index(keynr);
- if (!prebuilt->index) {
+ if (UNIV_UNLIKELY(!prebuilt->index)) {
+ sql_print_warning("InnoDB: change_active_index(%u) failed",
+ keynr);
+ prebuilt->index_usable = FALSE;
DBUG_RETURN(1);
}
- assert(prebuilt->search_tuple != 0);
+ prebuilt->index_usable = row_merge_is_index_usable(prebuilt->trx,
+ prebuilt->index);
+
+ if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+ push_warning_printf(user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ HA_ERR_TABLE_DEF_CHANGED,
+ "InnoDB: insufficient history for index %u",
+ keynr);
+ /* The caller seems to ignore this. Thus, we must check
+ this again in row_search_for_mysql(). */
+ DBUG_RETURN(2);
+ }
+
+ ut_a(prebuilt->search_tuple != 0);
dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
@@ -4814,23 +5274,23 @@ ha_innobase::change_active_index(
DBUG_RETURN(0);
}
-/**************************************************************************
+/**********************************************************************//**
Positions an index cursor to the index specified in keynr. Fetches the
-row if any. */
-/* ??? This is only used to read whole keys ??? */
-
+row if any.
+??? This is only used to read whole keys ???
+@return error number or 0 */
+UNIV_INTERN
int
ha_innobase::index_read_idx(
/*========================*/
- /* out: error number or 0 */
- uchar* buf, /* in/out: buffer for the returned
+ uchar* buf, /*!< in/out: buffer for the returned
row */
- uint keynr, /* in: use this index */
- const uchar* key, /* in: key value; if this is NULL
+ uint keynr, /*!< in: use this index */
+ const uchar* key, /*!< in: key value; if this is NULL
we position the cursor at the
start or end of index */
- uint key_len, /* in: key value length */
- enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
+ uint key_len, /*!< in: key value length */
+ enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
{
if (change_active_index(keynr)) {
@@ -4840,19 +5300,18 @@ ha_innobase::index_read_idx(
return(index_read(buf, key, key_len, find_flag));
}
-/***************************************************************************
+/***********************************************************************//**
Reads the next or previous row from a cursor, which must have previously been
-positioned using index_read. */
-
+positioned using index_read.
+@return 0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
int
ha_innobase::general_fetch(
/*=======================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error
- number */
- uchar* buf, /* in/out: buffer for next row in MySQL
+ uchar* buf, /*!< in/out: buffer for next row in MySQL
format */
- uint direction, /* in: ROW_SEL_NEXT or ROW_SEL_PREV */
- uint match_mode) /* in: 0, ROW_SEL_EXACT, or
+ uint direction, /*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */
+ uint match_mode) /*!< in: 0, ROW_SEL_EXACT, or
ROW_SEL_EXACT_PREFIX */
{
ulint ret;
@@ -4864,39 +5323,43 @@ ha_innobase::general_fetch(
innodb_srv_conc_enter_innodb(prebuilt->trx);
- ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
- direction);
+ ret = row_search_for_mysql(
+ (byte*)buf, 0, prebuilt, match_mode, direction);
+
innodb_srv_conc_exit_innodb(prebuilt->trx);
- if (ret == DB_SUCCESS) {
+ switch (ret) {
+ case DB_SUCCESS:
error = 0;
table->status = 0;
-
- } else if (ret == DB_RECORD_NOT_FOUND) {
+ break;
+ case DB_RECORD_NOT_FOUND:
error = HA_ERR_END_OF_FILE;
table->status = STATUS_NOT_FOUND;
-
- } else if (ret == DB_END_OF_INDEX) {
+ break;
+ case DB_END_OF_INDEX:
error = HA_ERR_END_OF_FILE;
table->status = STATUS_NOT_FOUND;
- } else {
- error = convert_error_code_to_mysql((int) ret, user_thd);
+ break;
+ default:
+ error = convert_error_code_to_mysql(
+ (int) ret, prebuilt->table->flags, user_thd);
table->status = STATUS_NOT_FOUND;
+ break;
}
DBUG_RETURN(error);
}
-/***************************************************************************
+/***********************************************************************//**
Reads the next row from a cursor, which must have previously been
-positioned using index_read. */
-
+positioned using index_read.
+@return 0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
int
ha_innobase::index_next(
/*====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error
- number */
- uchar* buf) /* in/out: buffer for next row in MySQL
+ uchar* buf) /*!< in/out: buffer for next row in MySQL
format */
{
ha_statistic_increment(&SSV::ha_read_next_count);
@@ -4904,47 +5367,46 @@ ha_innobase::index_next(
return(general_fetch(buf, ROW_SEL_NEXT, 0));
}
-/***********************************************************************
-Reads the next row matching to the key value given as the parameter. */
-
+/*******************************************************************//**
+Reads the next row matching to the key value given as the parameter.
+@return 0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
int
ha_innobase::index_next_same(
/*=========================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error
- number */
- uchar* buf, /* in/out: buffer for the row */
- const uchar* key, /* in: key value */
- uint keylen) /* in: key value length */
+ uchar* buf, /*!< in/out: buffer for the row */
+ const uchar* key, /*!< in: key value */
+ uint keylen) /*!< in: key value length */
{
ha_statistic_increment(&SSV::ha_read_next_count);
return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}
-/***************************************************************************
+/***********************************************************************//**
Reads the previous row from a cursor, which must have previously been
-positioned using index_read. */
-
+positioned using index_read.
+@return 0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
int
ha_innobase::index_prev(
/*====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error number */
- uchar* buf) /* in/out: buffer for previous row in MySQL format */
+ uchar* buf) /*!< in/out: buffer for previous row in MySQL format */
{
ha_statistic_increment(&SSV::ha_read_prev_count);
return(general_fetch(buf, ROW_SEL_PREV, 0));
}
-/************************************************************************
+/********************************************************************//**
Positions a cursor on the first record in an index and reads the
-corresponding row to buf. */
-
+corresponding row to buf.
+@return 0, HA_ERR_END_OF_FILE, or error code */
+UNIV_INTERN
int
ha_innobase::index_first(
/*=====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error code */
- uchar* buf) /* in/out: buffer for the row */
+ uchar* buf) /*!< in/out: buffer for the row */
{
int error;
@@ -4962,15 +5424,15 @@ ha_innobase::index_first(
DBUG_RETURN(error);
}
-/************************************************************************
+/********************************************************************//**
Positions a cursor on the last record in an index and reads the
-corresponding row to buf. */
-
+corresponding row to buf.
+@return 0, HA_ERR_END_OF_FILE, or error code */
+UNIV_INTERN
int
ha_innobase::index_last(
/*====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error code */
- uchar* buf) /* in/out: buffer for the row */
+ uchar* buf) /*!< in/out: buffer for the row */
{
int error;
@@ -4988,14 +5450,14 @@ ha_innobase::index_last(
DBUG_RETURN(error);
}
-/********************************************************************
-Initialize a table scan. */
-
+/****************************************************************//**
+Initialize a table scan.
+@return 0 or error number */
+UNIV_INTERN
int
ha_innobase::rnd_init(
/*==================*/
- /* out: 0 or error number */
- bool scan) /* in: ???????? */
+ bool scan) /*!< in: TRUE if table/index scan FALSE otherwise */
{
int err;
@@ -5020,26 +5482,26 @@ ha_innobase::rnd_init(
return(err);
}
-/*********************************************************************
-Ends a table scan. */
-
+/*****************************************************************//**
+Ends a table scan.
+@return 0 or error number */
+UNIV_INTERN
int
ha_innobase::rnd_end(void)
/*======================*/
- /* out: 0 or error number */
{
return(index_end());
}
-/*********************************************************************
+/*****************************************************************//**
Reads the next row in a table scan (also used to read the FIRST row
-in a table scan). */
-
+in a table scan).
+@return 0, HA_ERR_END_OF_FILE, or error number */
+UNIV_INTERN
int
ha_innobase::rnd_next(
/*==================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error number */
- uchar* buf) /* in/out: returns the row in this buffer,
+ uchar* buf) /*!< in/out: returns the row in this buffer,
in MySQL format */
{
int error;
@@ -5049,9 +5511,11 @@ ha_innobase::rnd_next(
if (start_of_scan) {
error = index_first(buf);
+
if (error == HA_ERR_KEY_NOT_FOUND) {
error = HA_ERR_END_OF_FILE;
}
+
start_of_scan = 0;
} else {
error = general_fetch(buf, ROW_SEL_NEXT, 0);
@@ -5060,15 +5524,15 @@ ha_innobase::rnd_next(
DBUG_RETURN(error);
}
-/**************************************************************************
-Fetches a row from the table based on a row reference. */
-
+/**********************************************************************//**
+Fetches a row from the table based on a row reference.
+@return 0, HA_ERR_KEY_NOT_FOUND, or error code */
+UNIV_INTERN
int
ha_innobase::rnd_pos(
/*=================*/
- /* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */
- uchar* buf, /* in/out: buffer for the row */
- uchar* pos) /* in: primary key value of the row in the
+ uchar* buf, /*!< in/out: buffer for the row */
+ uchar* pos) /*!< in: primary key value of the row in the
MySQL format, or the row id if the clustered
index was internally generated by InnoDB; the
length of data in pos has to be ref_length */
@@ -5112,7 +5576,7 @@ ha_innobase::rnd_pos(
DBUG_RETURN(error);
}
-/*************************************************************************
+/*********************************************************************//**
Stores a reference to the current row to 'ref' field of the handle. Note
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
@@ -5120,11 +5584,11 @@ is the current 'position' of the handle, because if row ref is actually
the row id internally generated in InnoDB, then 'record' does not contain
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
-
+UNIV_INTERN
void
ha_innobase::position(
/*==================*/
- const uchar* record) /* in: row in MySQL format */
+ const uchar* record) /*!< in: row in MySQL format */
{
uint len;
@@ -5153,41 +5617,23 @@ ha_innobase::position(
}
}
-/*********************************************************************
-If it's a DB_TOO_BIG_RECORD error then set a suitable message to
-return to the client.*/
-inline
-void
-innodb_check_for_record_too_big_error(
-/*==================================*/
- ulint comp, /* in: ROW_FORMAT: nonzero=COMPACT, 0=REDUNDANT */
- int error) /* in: error code to check */
-{
- if (error == (int)DB_TOO_BIG_RECORD) {
- ulint max_row_size
- = page_get_free_space_of_empty_noninline(comp) / 2;
-
- my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size);
- }
-}
-
/* limit innodb monitor access to users with PROCESS privilege.
See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \
(row_is_magic_monitor_table(table_name) \
&& check_global_access(thd, PROCESS_ACL))
-/*********************************************************************
+/*****************************************************************//**
Creates a table definition to an InnoDB database. */
static
int
create_table_def(
/*=============*/
- trx_t* trx, /* in: InnoDB transaction handle */
- TABLE* form, /* in: information on table
+ trx_t* trx, /*!< in: InnoDB transaction handle */
+ TABLE* form, /*!< in: information on table
columns and indexes */
- const char* table_name, /* in: table name */
- const char* path_of_temp_table,/* in: if this is a table explicitly
+ const char* table_name, /*!< in: table name */
+ const char* path_of_temp_table,/*!< in: if this is a table explicitly
created by the user with the
TEMPORARY keyword, then this
parameter is the dir path where the
@@ -5195,7 +5641,7 @@ create_table_def(
an .ibd file for it (no .ibd extension
in the path, though); otherwise this
is NULL */
- ulint flags) /* in: table flags */
+ ulint flags) /*!< in: table flags */
{
Field* field;
dict_table_t* table;
@@ -5254,9 +5700,19 @@ create_table_def(
charset_no = (ulint)field->charset()->number;
- ut_a(charset_no < 256); /* in data0type.h we assume
- that the number fits in one
- byte */
+ if (UNIV_UNLIKELY(charset_no >= 256)) {
+ /* in data0type.h we assume that the
+ number fits in one byte in prtype */
+ push_warning_printf(
+ (THD*) trx->mysql_thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_CANT_CREATE_TABLE,
+ "In InnoDB, charset-collation codes"
+ " must be below 256."
+ " Unsupported code %lu.",
+ (ulong) charset_no);
+ DBUG_RETURN(ER_CANT_CREATE_TABLE);
+ }
}
ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
@@ -5281,17 +5737,8 @@ create_table_def(
/* First check whether the column to be added has a
system reserved name. */
if (dict_col_name_is_reserved(field->field_name)){
- push_warning_printf(
- (THD*) trx->mysql_thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_CANT_CREATE_TABLE,
- "Error creating table '%s' with "
- "column name '%s'. '%s' is a "
- "reserved name. Please try to "
- "re-create the table with a "
- "different column name.",
- table->name, (char*) field->field_name,
- (char*) field->field_name);
+ my_error(ER_WRONG_COLUMN_NAME, MYF(0),
+ field->field_name);
dict_mem_table_free(table);
trx_commit_for_mysql(trx);
@@ -5313,25 +5760,32 @@ create_table_def(
error = row_create_table_for_mysql(table, trx);
- innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error);
+ if (error == DB_DUPLICATE_KEY) {
+ char buf[100];
+ innobase_convert_identifier(buf, sizeof buf,
+ table_name, strlen(table_name),
+ trx->mysql_thd, TRUE);
+ my_error(ER_TABLE_EXISTS_ERROR, MYF(0), buf);
+ }
error_ret:
- error = convert_error_code_to_mysql(error, NULL);
+ error = convert_error_code_to_mysql(error, flags, NULL);
DBUG_RETURN(error);
}
-/*********************************************************************
+/*****************************************************************//**
Creates an index in an InnoDB database. */
static
int
create_index(
/*=========*/
- trx_t* trx, /* in: InnoDB transaction handle */
- TABLE* form, /* in: information on table
+ trx_t* trx, /*!< in: InnoDB transaction handle */
+ TABLE* form, /*!< in: information on table
columns and indexes */
- const char* table_name, /* in: table name */
- uint key_num) /* in: index number */
+ ulint flags, /*!< in: InnoDB table flags */
+ const char* table_name, /*!< in: table name */
+ uint key_num) /*!< in: index number */
{
Field* field;
dict_index_t* index;
@@ -5369,8 +5823,8 @@ create_index(
/* We pass 0 as the space id, and determine at a lower level the space
id where to store the table */
- index = dict_mem_index_create((char*) table_name, key->name, 0,
- ind_type, n_fields);
+ index = dict_mem_index_create(table_name, key->name, 0,
+ ind_type, n_fields);
field_lengths = (ulint*) my_malloc(sizeof(ulint) * n_fields,
MYF(MY_FAE));
@@ -5441,27 +5895,23 @@ create_index(
sure we don't create too long indexes. */
error = row_create_index_for_mysql(index, trx, field_lengths);
- innodb_check_for_record_too_big_error(form->s->row_type
- != ROW_TYPE_REDUNDANT, error);
-
- error = convert_error_code_to_mysql(error, NULL);
+ error = convert_error_code_to_mysql(error, flags, NULL);
my_free(field_lengths, MYF(0));
DBUG_RETURN(error);
}
-/*********************************************************************
+/*****************************************************************//**
Creates an index to an InnoDB table when the user has defined no
primary index. */
static
int
create_clustered_index_when_no_primary(
/*===================================*/
- trx_t* trx, /* in: InnoDB transaction handle */
- ulint comp, /* in: ROW_FORMAT:
- nonzero=COMPACT, 0=REDUNDANT */
- const char* table_name) /* in: table name */
+ trx_t* trx, /*!< in: InnoDB transaction handle */
+ ulint flags, /*!< in: InnoDB table flags */
+ const char* table_name) /*!< in: table name */
{
dict_index_t* index;
int error;
@@ -5471,22 +5921,187 @@ create_clustered_index_when_no_primary(
index = dict_mem_index_create(table_name,
innobase_index_reserve_name,
0, DICT_CLUSTERED, 0);
- error = row_create_index_for_mysql(index, trx, NULL);
- innodb_check_for_record_too_big_error(comp, error);
+ error = row_create_index_for_mysql(index, trx, NULL);
- error = convert_error_code_to_mysql(error, NULL);
+ error = convert_error_code_to_mysql(error, flags, NULL);
return(error);
}
-/*********************************************************************
-Update create_info. Used in SHOW CREATE TABLE et al. */
+/*****************************************************************//**
+Validates the create options. We may build on this function
+in future. For now, it checks two specifiers:
+KEY_BLOCK_SIZE and ROW_FORMAT
+If innodb_strict_mode is not set then this function is a no-op
+@return TRUE if valid. */
+static
+ibool
+create_options_are_valid(
+/*=====================*/
+ THD* thd, /*!< in: connection thread. */
+ TABLE* form, /*!< in: information on table
+ columns and indexes */
+ HA_CREATE_INFO* create_info) /*!< in: create info. */
+{
+ ibool kbs_specified = FALSE;
+ ibool ret = TRUE;
+
+
+ ut_ad(thd != NULL);
+
+ /* If innodb_strict_mode is not set don't do any validation. */
+ if (!(THDVAR(thd, strict_mode))) {
+ return(TRUE);
+ }
+ ut_ad(form != NULL);
+ ut_ad(create_info != NULL);
+
+ /* First check if KEY_BLOCK_SIZE was specified. */
+ if (create_info->key_block_size
+ || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) {
+
+ kbs_specified = TRUE;
+ switch (create_info->key_block_size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ case 16:
+ /* Valid value. */
+ break;
+ default:
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: invalid"
+ " KEY_BLOCK_SIZE = %lu."
+ " Valid values are"
+ " [1, 2, 4, 8, 16]",
+ create_info->key_block_size);
+ ret = FALSE;
+ }
+ }
+
+ /* If KEY_BLOCK_SIZE was specified, check for its
+ dependencies. */
+ if (kbs_specified && !srv_file_per_table) {
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: KEY_BLOCK_SIZE"
+ " requires innodb_file_per_table.");
+ ret = FALSE;
+ }
+
+ if (kbs_specified && srv_file_format < DICT_TF_FORMAT_ZIP) {
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: KEY_BLOCK_SIZE"
+ " requires innodb_file_format >"
+ " Antelope.");
+ ret = FALSE;
+ }
+
+ /* Now check for ROW_FORMAT specifier. */
+ if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) {
+ switch (form->s->row_type) {
+ const char* row_format_name;
+ case ROW_TYPE_COMPRESSED:
+ case ROW_TYPE_DYNAMIC:
+ row_format_name
+ = form->s->row_type == ROW_TYPE_COMPRESSED
+ ? "COMPRESSED"
+ : "DYNAMIC";
+
+ /* These two ROW_FORMATs require
+ srv_file_per_table and srv_file_format */
+ if (!srv_file_per_table) {
+ push_warning_printf(
+ thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ROW_FORMAT=%s"
+ " requires innodb_file_per_table.",
+ row_format_name);
+ ret = FALSE;
+
+ }
+
+ if (srv_file_format < DICT_TF_FORMAT_ZIP) {
+ push_warning_printf(
+ thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ROW_FORMAT=%s"
+ " requires innodb_file_format >"
+ " Antelope.",
+ row_format_name);
+ ret = FALSE;
+ }
+
+ /* Cannot specify KEY_BLOCK_SIZE with
+ ROW_FORMAT = DYNAMIC.
+ However, we do allow COMPRESSED to be
+ specified with KEY_BLOCK_SIZE. */
+ if (kbs_specified
+ && form->s->row_type == ROW_TYPE_DYNAMIC) {
+ push_warning_printf(
+ thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: cannot specify"
+ " ROW_FORMAT = DYNAMIC with"
+ " KEY_BLOCK_SIZE.");
+ ret = FALSE;
+ }
+
+ break;
+
+ case ROW_TYPE_REDUNDANT:
+ case ROW_TYPE_COMPACT:
+ case ROW_TYPE_DEFAULT:
+ /* Default is COMPACT. */
+ row_format_name
+ = form->s->row_type == ROW_TYPE_REDUNDANT
+ ? "REDUNDANT"
+ : "COMPACT";
+
+ /* Cannot specify KEY_BLOCK_SIZE with these
+ format specifiers. */
+ if (kbs_specified) {
+ push_warning_printf(
+ thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: cannot specify"
+ " ROW_FORMAT = %s with"
+ " KEY_BLOCK_SIZE.",
+ row_format_name);
+ ret = FALSE;
+ }
+
+ break;
+
+ default:
+ push_warning(thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: invalid ROW_FORMAT specifier.");
+ ret = FALSE;
+
+ }
+ }
+
+ return(ret);
+}
+
+/*****************************************************************//**
+Update create_info. Used in SHOW CREATE TABLE et al. */
+UNIV_INTERN
void
ha_innobase::update_create_info(
/*============================*/
- HA_CREATE_INFO* create_info) /* in/out: create info */
+ HA_CREATE_INFO* create_info) /*!< in/out: create info */
{
if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
ha_innobase::info(HA_STATUS_AUTO);
@@ -5494,17 +6109,17 @@ ha_innobase::update_create_info(
}
}
-/*********************************************************************
-Creates a new table to an InnoDB database. */
-
+/*****************************************************************//**
+Creates a new table to an InnoDB database.
+@return error number */
+UNIV_INTERN
int
ha_innobase::create(
/*================*/
- /* out: error number */
- const char* name, /* in: table name */
- TABLE* form, /* in: information on table
+ const char* name, /*!< in: table name */
+ TABLE* form, /*!< in: information on table
columns and indexes */
- HA_CREATE_INFO* create_info) /* in: more information of the
+ HA_CREATE_INFO* create_info) /*!< in: more information of the
created table, contains also the
create statement string */
{
@@ -5517,8 +6132,11 @@ ha_innobase::create(
char name2[FN_REFLEN];
char norm_name[FN_REFLEN];
THD* thd = ha_thd();
- ib_longlong auto_inc_value;
+ ib_int64_t auto_inc_value;
ulint flags;
+ /* Cache the value of innodb_file_format, in case it is
+ modified by another thread while the table is being created. */
+ const ulint file_format = srv_file_format;
DBUG_ENTER("ha_innobase::create");
@@ -5566,18 +6184,7 @@ ha_innobase::create(
trx_search_latch_release_if_reserved(parent_trx);
- trx = trx_allocate_for_mysql();
-
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- }
-
- if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
- trx->check_unique_secondary = FALSE;
- }
+ trx = innobase_trx_allocate(thd);
if (lower_case_table_names) {
srv_lower_case_table_names = TRUE;
@@ -5599,8 +6206,145 @@ ha_innobase::create(
flags = 0;
- if (form->s->row_type != ROW_TYPE_REDUNDANT) {
- flags |= DICT_TF_COMPACT;
+ /* Validate create options if innodb_strict_mode is set. */
+ if (!create_options_are_valid(thd, form, create_info)) {
+ error = ER_ILLEGAL_HA_CREATE_OPTION;
+ goto cleanup;
+ }
+
+ if (create_info->key_block_size
+ || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) {
+ /* Determine the page_zip.ssize corresponding to the
+ requested page size (key_block_size) in kilobytes. */
+
+ ulint ssize, ksize;
+ ulint key_block_size = create_info->key_block_size;
+
+ for (ssize = ksize = 1; ssize <= DICT_TF_ZSSIZE_MAX;
+ ssize++, ksize <<= 1) {
+ if (key_block_size == ksize) {
+ flags = ssize << DICT_TF_ZSSIZE_SHIFT
+ | DICT_TF_COMPACT
+ | DICT_TF_FORMAT_ZIP
+ << DICT_TF_FORMAT_SHIFT;
+ break;
+ }
+ }
+
+ if (!srv_file_per_table) {
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: KEY_BLOCK_SIZE"
+ " requires innodb_file_per_table.");
+ flags = 0;
+ }
+
+ if (file_format < DICT_TF_FORMAT_ZIP) {
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: KEY_BLOCK_SIZE"
+ " requires innodb_file_format >"
+ " Antelope.");
+ flags = 0;
+ }
+
+ if (!flags) {
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ignoring"
+ " KEY_BLOCK_SIZE=%lu.",
+ create_info->key_block_size);
+ }
+ }
+
+ if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) {
+ if (flags) {
+ /* KEY_BLOCK_SIZE was specified. */
+ if (form->s->row_type != ROW_TYPE_COMPRESSED) {
+ /* ROW_FORMAT other than COMPRESSED
+ ignores KEY_BLOCK_SIZE. It does not
+ make sense to reject conflicting
+ KEY_BLOCK_SIZE and ROW_FORMAT, because
+ such combinations can be obtained
+ with ALTER TABLE anyway. */
+ push_warning_printf(
+ thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ignoring KEY_BLOCK_SIZE=%lu"
+ " unless ROW_FORMAT=COMPRESSED.",
+ create_info->key_block_size);
+ flags = 0;
+ }
+ } else {
+ /* No KEY_BLOCK_SIZE */
+ if (form->s->row_type == ROW_TYPE_COMPRESSED) {
+ /* ROW_FORMAT=COMPRESSED without
+ KEY_BLOCK_SIZE implies half the
+ maximum KEY_BLOCK_SIZE. */
+ flags = (DICT_TF_ZSSIZE_MAX - 1)
+ << DICT_TF_ZSSIZE_SHIFT
+ | DICT_TF_COMPACT
+ | DICT_TF_FORMAT_ZIP
+ << DICT_TF_FORMAT_SHIFT;
+#if DICT_TF_ZSSIZE_MAX < 1
+# error "DICT_TF_ZSSIZE_MAX < 1"
+#endif
+ }
+ }
+
+ switch (form->s->row_type) {
+ const char* row_format_name;
+ case ROW_TYPE_REDUNDANT:
+ break;
+ case ROW_TYPE_COMPRESSED:
+ case ROW_TYPE_DYNAMIC:
+ row_format_name
+ = form->s->row_type == ROW_TYPE_COMPRESSED
+ ? "COMPRESSED"
+ : "DYNAMIC";
+
+ if (!srv_file_per_table) {
+ push_warning_printf(
+ thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ROW_FORMAT=%s"
+ " requires innodb_file_per_table.",
+ row_format_name);
+ } else if (file_format < DICT_TF_FORMAT_ZIP) {
+ push_warning_printf(
+ thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: ROW_FORMAT=%s"
+ " requires innodb_file_format >"
+ " Antelope.",
+ row_format_name);
+ } else {
+ flags |= DICT_TF_COMPACT
+ | (DICT_TF_FORMAT_ZIP
+ << DICT_TF_FORMAT_SHIFT);
+ break;
+ }
+
+ /* fall through */
+ case ROW_TYPE_NOT_USED:
+ case ROW_TYPE_FIXED:
+ default:
+ push_warning(thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: assuming ROW_FORMAT=COMPACT.");
+ case ROW_TYPE_DEFAULT:
+ case ROW_TYPE_COMPACT:
+ flags = DICT_TF_COMPACT;
+ break;
+ }
+ } else if (!flags) {
+ /* No KEY_BLOCK_SIZE or ROW_FORMAT specified:
+ use ROW_FORMAT=COMPACT by default. */
+ flags = DICT_TF_COMPACT;
}
/* Look for a primary key */
@@ -5612,11 +6356,12 @@ ha_innobase::create(
/* Our function row_get_mysql_key_number_for_index assumes
the primary key is always number 0, if it exists */
- DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
+ ut_a(primary_key_no == -1 || primary_key_no == 0);
/* Check for name conflicts (with reserved name) for
any user indices to be created. */
- if (innobase_index_name_is_reserved(trx, form, norm_name)) {
+ if (innobase_index_name_is_reserved(trx, form->key_info,
+ form->s->keys)) {
error = -1;
goto cleanup;
}
@@ -5638,8 +6383,7 @@ ha_innobase::create(
by InnoDB */
error = create_clustered_index_when_no_primary(
- trx, form->s->row_type != ROW_TYPE_REDUNDANT,
- norm_name);
+ trx, flags, norm_name);
if (error) {
goto cleanup;
}
@@ -5648,7 +6392,7 @@ ha_innobase::create(
if (primary_key_no != -1) {
/* In InnoDB the clustered index must always be created
first */
- if ((error = create_index(trx, form, norm_name,
+ if ((error = create_index(trx, form, flags, norm_name,
(uint) primary_key_no))) {
goto cleanup;
}
@@ -5658,7 +6402,8 @@ ha_innobase::create(
if (i != (uint) primary_key_no) {
- if ((error = create_index(trx, form, norm_name, i))) {
+ if ((error = create_index(trx, form, flags, norm_name,
+ i))) {
goto cleanup;
}
}
@@ -5669,7 +6414,7 @@ ha_innobase::create(
*trx->mysql_query_str, norm_name,
create_info->options & HA_LEX_CREATE_TMP_TABLE);
- error = convert_error_code_to_mysql(error, NULL);
+ error = convert_error_code_to_mysql(error, flags, NULL);
if (error) {
goto cleanup;
@@ -5690,6 +6435,15 @@ ha_innobase::create(
DBUG_ASSERT(innobase_table != 0);
+ if (innobase_table) {
+ /* We update the highest file format in the system table
+ space, if this table has higher file format setting. */
+
+ trx_sys_file_format_max_upgrade(
+ (const char**) &innobase_file_format_check,
+ dict_table_get_format(innobase_table));
+ }
+
/* Note: We can't call update_thd() as prebuilt will not be
setup at this stage and so we use thd. */
@@ -5737,14 +6491,14 @@ cleanup:
DBUG_RETURN(error);
}
-/*********************************************************************
-Discards or imports an InnoDB tablespace. */
-
+/*****************************************************************//**
+Discards or imports an InnoDB tablespace.
+@return 0 == success, -1 == error */
+UNIV_INTERN
int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
- /* out: 0 == success, -1 == error */
- my_bool discard) /* in: TRUE if discard, else import */
+ my_bool discard) /*!< in: TRUE if discard, else import */
{
dict_table_t* dict_table;
trx_t* trx;
@@ -5765,18 +6519,18 @@ ha_innobase::discard_or_import_tablespace(
err = row_import_tablespace_for_mysql(dict_table->name, trx);
}
- err = convert_error_code_to_mysql(err, NULL);
+ err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
DBUG_RETURN(err);
}
-/*********************************************************************
-Deletes all rows of an InnoDB table. */
-
+/*****************************************************************//**
+Deletes all rows of an InnoDB table.
+@return error number */
+UNIV_INTERN
int
ha_innobase::delete_all_rows(void)
/*==============================*/
- /* out: error number */
{
int error;
@@ -5803,23 +6557,24 @@ ha_innobase::delete_all_rows(void)
goto fallback;
}
- error = convert_error_code_to_mysql(error, NULL);
+ error = convert_error_code_to_mysql(error, prebuilt->table->flags,
+ NULL);
DBUG_RETURN(error);
}
-/*********************************************************************
+/*****************************************************************//**
Drops a table from an InnoDB database. Before calling this function,
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
operation inside InnoDB will remove all locks any user has on the table
-inside InnoDB. */
-
+inside InnoDB.
+@return error number */
+UNIV_INTERN
int
ha_innobase::delete_table(
/*======================*/
- /* out: error number */
- const char* name) /* in: table name */
+ const char* name) /*!< in: table name */
{
ulint name_len;
int error;
@@ -5848,28 +6603,17 @@ ha_innobase::delete_table(
trx_search_latch_release_if_reserved(parent_trx);
+ trx = innobase_trx_allocate(thd);
+
if (lower_case_table_names) {
srv_lower_case_table_names = TRUE;
} else {
srv_lower_case_table_names = FALSE;
}
- trx = trx_allocate_for_mysql();
-
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- }
-
- if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
- trx->check_unique_secondary = FALSE;
- }
-
name_len = strlen(name);
- assert(name_len < 1000);
+ ut_a(name_len < 1000);
/* Drop the table in InnoDB */
@@ -5892,26 +6636,24 @@ ha_innobase::delete_table(
trx_free_for_mysql(trx);
- error = convert_error_code_to_mysql(error, NULL);
+ error = convert_error_code_to_mysql(error, 0, NULL);
DBUG_RETURN(error);
}
-/*********************************************************************
+/*****************************************************************//**
Removes all tables in the named database inside InnoDB. */
static
void
innobase_drop_database(
/*===================*/
- /* out: error number */
- handlerton *hton, /* in: handlerton of Innodb */
- char* path) /* in: database path; inside InnoDB the name
+ handlerton *hton, /*!< in: handlerton of Innodb */
+ char* path) /*!< in: database path; inside InnoDB the name
of the last directory in the path is used as
the database name: for example, in 'mysql/data/test'
the database name is 'test' */
{
ulint len = 0;
- trx_t* parent_trx;
trx_t* trx;
char* ptr;
int error;
@@ -5921,12 +6663,18 @@ innobase_drop_database(
/* Get the transaction associated with the current thd, or create one
if not yet created */
- parent_trx = check_trx_exists(thd);
+ DBUG_ASSERT(hton == innodb_hton_ptr);
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
+ /* In the Windows plugin, thd = current_thd is always NULL */
+ if (thd) {
+ trx_t* parent_trx = check_trx_exists(thd);
- trx_search_latch_release_if_reserved(parent_trx);
+ /* In case MySQL calls this in the middle of a SELECT
+ query, release possible adaptive hash latch to avoid
+ deadlocks of threads */
+
+ trx_search_latch_release_if_reserved(parent_trx);
+ }
ptr = strend(path) - 2;
@@ -5944,14 +6692,14 @@ innobase_drop_database(
#ifdef __WIN__
innobase_casedn_str(namebuf);
#endif
+#if defined __WIN__ && !defined MYSQL_SERVER
+ /* In the Windows plugin, thd = current_thd is always NULL */
trx = trx_allocate_for_mysql();
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- }
-
+ trx->mysql_thd = NULL;
+ trx->mysql_query_str = NULL;
+#else
+ trx = innobase_trx_allocate(thd);
+#endif
error = row_drop_database_for_mysql(namebuf, trx);
my_free(namebuf, MYF(0));
@@ -5968,32 +6716,85 @@ innobase_drop_database(
innobase_commit_low(trx);
trx_free_for_mysql(trx);
-#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR
- error = convert_error_code_to_mysql(error, NULL);
-
- return(error);
-#else
- return;
-#endif
}
+/*********************************************************************//**
+Renames an InnoDB table.
+@return 0 or error code */
+static
+int
+innobase_rename_table(
+/*==================*/
+ trx_t* trx, /*!< in: transaction */
+ const char* from, /*!< in: old name of the table */
+ const char* to, /*!< in: new name of the table */
+ ibool lock_and_commit)
+ /*!< in: TRUE=lock data dictionary and commit */
+{
+ int error;
+ char* norm_to;
+ char* norm_from;
-/*************************************************************************
-Renames an InnoDB table. */
+ if (lower_case_table_names) {
+ srv_lower_case_table_names = TRUE;
+ } else {
+ srv_lower_case_table_names = FALSE;
+ }
+
+ // Magic number 64 arbitrary
+ norm_to = (char*) my_malloc(strlen(to) + 64, MYF(0));
+ norm_from = (char*) my_malloc(strlen(from) + 64, MYF(0));
+ normalize_table_name(norm_to, to);
+ normalize_table_name(norm_from, from);
+
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ if (lock_and_commit) {
+ row_mysql_lock_data_dictionary(trx);
+ }
+
+ error = row_rename_table_for_mysql(
+ norm_from, norm_to, trx, lock_and_commit);
+
+ if (error != DB_SUCCESS) {
+ FILE* ef = dict_foreign_err_file;
+
+ fputs("InnoDB: Renaming table ", ef);
+ ut_print_name(ef, trx, TRUE, norm_from);
+ fputs(" to ", ef);
+ ut_print_name(ef, trx, TRUE, norm_to);
+ fputs(" failed!\n", ef);
+ }
+
+ if (lock_and_commit) {
+ row_mysql_unlock_data_dictionary(trx);
+
+ /* Flush the log to reduce probability that the .frm
+ files and the InnoDB data dictionary get out-of-sync
+ if the user runs with innodb_flush_log_at_trx_commit = 0 */
+
+ log_buffer_flush_to_disk();
+ }
+
+ my_free(norm_to, MYF(0));
+ my_free(norm_from, MYF(0));
+
+ return error;
+}
+/*********************************************************************//**
+Renames an InnoDB table.
+@return 0 or error code */
+UNIV_INTERN
int
ha_innobase::rename_table(
/*======================*/
- /* out: 0 or error code */
- const char* from, /* in: old name of the table */
- const char* to) /* in: new name of the table */
+ const char* from, /*!< in: old name of the table */
+ const char* to) /*!< in: new name of the table */
{
- ulint name_len1;
- ulint name_len2;
+ trx_t* trx;
int error;
trx_t* parent_trx;
- trx_t* trx;
- char norm_from[1000];
- char norm_to[1000];
THD* thd = ha_thd();
DBUG_ENTER("ha_innobase::rename_table");
@@ -6008,38 +6809,9 @@ ha_innobase::rename_table(
trx_search_latch_release_if_reserved(parent_trx);
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
- trx = trx_allocate_for_mysql();
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- }
+ trx = innobase_trx_allocate(thd);
- name_len1 = strlen(from);
- name_len2 = strlen(to);
-
- assert(name_len1 < 1000);
- assert(name_len2 < 1000);
-
- normalize_table_name(norm_from, from);
- normalize_table_name(norm_to, to);
-
- /* Rename the table in InnoDB */
-
- error = row_rename_table_for_mysql(norm_from, norm_to, trx);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
+ error = innobase_rename_table(trx, from, to, TRUE);
/* Tell the InnoDB server that there might be work for
utility threads: */
@@ -6067,23 +6839,22 @@ ha_innobase::rename_table(
error = DB_ERROR;
}
- error = convert_error_code_to_mysql(error, NULL);
+ error = convert_error_code_to_mysql(error, 0, NULL);
DBUG_RETURN(error);
}
-/*************************************************************************
-Estimates the number of index records in a range. */
-
+/*********************************************************************//**
+Estimates the number of index records in a range.
+@return estimated number of rows */
+UNIV_INTERN
ha_rows
ha_innobase::records_in_range(
/*==========================*/
- /* out: estimated number of
- rows */
- uint keynr, /* in: index number */
- key_range *min_key, /* in: start key value of the
+ uint keynr, /*!< in: index number */
+ key_range *min_key, /*!< in: start key value of the
range, may also be 0 */
- key_range *max_key) /* in: range end key val, may
+ key_range *max_key) /*!< in: range end key val, may
also be 0 */
{
KEY* key;
@@ -6096,11 +6867,10 @@ ha_innobase::records_in_range(
+ table->s->max_key_length + 100;
dtuple_t* range_start;
dtuple_t* range_end;
- ib_longlong n_rows;
+ ib_int64_t n_rows;
ulint mode1;
ulint mode2;
- void* heap1;
- void* heap2;
+ mem_heap_t* heap;
DBUG_ENTER("records_in_range");
@@ -6117,12 +6887,18 @@ ha_innobase::records_in_range(
key = table->key_info + active_index;
- index = dict_table_get_index_noninline(prebuilt->table, key->name);
+ index = dict_table_get_index_on_name(prebuilt->table, key->name);
+
+ /* MySQL knows about this index and so we must be able to find it.*/
+ ut_a(index);
- range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
+ heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t)
+ + sizeof(dtuple_t)));
+
+ range_start = dtuple_create(heap, key->key_parts);
dict_index_copy_types(range_start, index, key->key_parts);
- range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
+ range_end = dtuple_create(heap, key->key_parts);
dict_index_copy_types(range_end, index, key->key_parts);
row_sel_convert_mysql_key_to_innobase(
@@ -6157,8 +6933,7 @@ ha_innobase::records_in_range(
n_rows = HA_POS_ERROR;
}
- dtuple_free_for_mysql(heap1);
- dtuple_free_for_mysql(heap2);
+ mem_heap_free(heap);
my_free(key_val_buff2, MYF(0));
@@ -6177,14 +6952,14 @@ ha_innobase::records_in_range(
DBUG_RETURN((ha_rows) n_rows);
}
-/*************************************************************************
+/*********************************************************************//**
Gives an UPPER BOUND to the number of rows in a table. This is used in
-filesort.cc. */
-
+filesort.cc.
+@return upper bound of rows */
+UNIV_INTERN
ha_rows
ha_innobase::estimate_rows_upper_bound(void)
/*======================================*/
- /* out: upper bound of rows */
{
dict_index_t* index;
ulonglong estimate;
@@ -6206,10 +6981,13 @@ ha_innobase::estimate_rows_upper_bound(void)
trx_search_latch_release_if_reserved(prebuilt->trx);
- index = dict_table_get_first_index_noninline(prebuilt->table);
+ index = dict_table_get_first_index(prebuilt->table);
+
+ ut_a(index->stat_n_leaf_pages > 0);
+
+ local_data_file_length =
+ ((ulonglong) index->stat_n_leaf_pages) * UNIV_PAGE_SIZE;
- local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
- * UNIV_PAGE_SIZE;
/* Calculate a minimum length for a clustered index record and from
that an upper bound for the number of rows. Since we only calculate
@@ -6224,15 +7002,15 @@ ha_innobase::estimate_rows_upper_bound(void)
DBUG_RETURN((ha_rows) estimate);
}
-/*************************************************************************
+/*********************************************************************//**
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
-decide if we should scan the table or use keys. */
-
+decide if we should scan the table or use keys.
+@return estimated time measured in disk seeks */
+UNIV_INTERN
double
ha_innobase::scan_time()
/*====================*/
- /* out: estimated time measured in disk seeks */
{
/* Since MySQL seems to favor table scans too much over index
searches, we pretend that a sequential read takes the same time
@@ -6242,17 +7020,17 @@ ha_innobase::scan_time()
return((double) (prebuilt->table->stat_clustered_index_size));
}
-/**********************************************************************
+/******************************************************************//**
Calculate the time it takes to read a set of ranges through an index
-This enables us to optimise reads for clustered indexes. */
-
+This enables us to optimise reads for clustered indexes.
+@return estimated time measured in disk seeks */
+UNIV_INTERN
double
ha_innobase::read_time(
/*===================*/
- /* out: estimated time measured in disk seeks */
- uint index, /* in: key number */
- uint ranges, /* in: how many ranges */
- ha_rows rows) /* in: estimated number of rows in the ranges */
+ uint index, /*!< in: key number */
+ uint ranges, /*!< in: how many ranges */
+ ha_rows rows) /*!< in: estimated number of rows in the ranges */
{
ha_rows total_rows;
double time_for_scan;
@@ -6280,19 +7058,19 @@ ha_innobase::read_time(
return(ranges + (double) rows / (double) total_rows * time_for_scan);
}
-/*************************************************************************
+/*********************************************************************//**
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */
-
+UNIV_INTERN
int
ha_innobase::info(
/*==============*/
- uint flag) /* in: what information MySQL requests */
+ uint flag) /*!< in: what information MySQL requests */
{
dict_table_t* ib_table;
dict_index_t* index;
ha_rows rec_per_key;
- ib_longlong n_rows;
+ ib_int64_t n_rows;
ulong j;
ulong i;
char path[FN_REFLEN];
@@ -6458,10 +7236,10 @@ ha_innobase::info(
}
if (flag & HA_STATUS_CONST) {
- index = dict_table_get_first_index_noninline(ib_table);
+ index = dict_table_get_first_index(ib_table);
if (prebuilt->clust_index_was_generated) {
- index = dict_table_get_next_index_noninline(index);
+ index = dict_table_get_next_index(index);
}
for (i = 0; i < table->s->keys; i++) {
@@ -6472,8 +7250,8 @@ ha_innobase::info(
".frm file. Have you mixed up "
".frm files from different "
"installations? See "
-"http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
-
+ REFMAN
+ "innodb-troubleshooting.html\n",
ib_table->name);
break;
}
@@ -6485,7 +7263,7 @@ ha_innobase::info(
"Index %s of %s has %lu columns unique inside InnoDB, but MySQL is asking "
"statistics for %lu columns. Have you mixed up .frm files from different "
"installations? "
-"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
+"See " REFMAN "innodb-troubleshooting.html\n",
index->name,
ib_table->name,
(unsigned long)
@@ -6517,37 +7295,45 @@ ha_innobase::info(
(ulong) rec_per_key;
}
- index = dict_table_get_next_index_noninline(index);
+ index = dict_table_get_next_index(index);
}
}
if (flag & HA_STATUS_ERRKEY) {
+ const dict_index_t* err_index;
+
ut_a(prebuilt->trx);
ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- errkey = (unsigned int) row_get_mysql_key_number_for_index(
- (dict_index_t*) trx_get_error_info(prebuilt->trx));
+ err_index = trx_get_error_info(prebuilt->trx);
+
+ if (err_index) {
+ errkey = (unsigned int)
+ row_get_mysql_key_number_for_index(err_index);
+ } else {
+ errkey = (unsigned int) prebuilt->trx->error_key_num;
+ }
}
- if (flag & HA_STATUS_AUTO && table->found_next_number_field) {
- stats.auto_increment_value = innobase_peek_autoinc();
+ if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
+ stats.auto_increment_value = innobase_peek_autoinc();
}
prebuilt->trx->op_info = (char*)"";
- DBUG_RETURN(0);
+ DBUG_RETURN(0);
}
-/**************************************************************************
+/**********************************************************************//**
Updates index cardinalities of the table, based on 8 random dives into
-each index tree. This does NOT calculate exact statistics on the table. */
-
+each index tree. This does NOT calculate exact statistics on the table.
+@return returns always 0 (success) */
+UNIV_INTERN
int
ha_innobase::analyze(
/*=================*/
- /* out: returns always 0 (success) */
- THD* thd, /* in: connection thread handle */
- HA_CHECK_OPT* check_opt) /* in: currently ignored */
+ THD* thd, /*!< in: connection thread handle */
+ HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
{
/* Serialize ANALYZE TABLE inside InnoDB, see
Bug#38996 Race condition in ANALYZE TABLE */
@@ -6561,31 +7347,30 @@ ha_innobase::analyze(
return(0);
}
-/**************************************************************************
+/**********************************************************************//**
This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
the table in MySQL. */
-
+UNIV_INTERN
int
ha_innobase::optimize(
/*==================*/
- THD* thd, /* in: connection thread handle */
- HA_CHECK_OPT* check_opt) /* in: currently ignored */
+ THD* thd, /*!< in: connection thread handle */
+ HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
{
return(HA_ADMIN_TRY_ALTER);
}
-/***********************************************************************
+/*******************************************************************//**
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
-may also assert a failure and crash the server. */
-
+may also assert a failure and crash the server.
+@return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
+UNIV_INTERN
int
ha_innobase::check(
/*===============*/
- /* out: HA_ADMIN_CORRUPT or
- HA_ADMIN_OK */
- THD* thd, /* in: user thread handle */
- HA_CHECK_OPT* check_opt) /* in: check options, currently
+ THD* thd, /*!< in: user thread handle */
+ HA_CHECK_OPT* check_opt) /*!< in: check options, currently
ignored */
{
ulint ret;
@@ -6604,24 +7389,27 @@ ha_innobase::check(
ret = row_check_table_for_mysql(prebuilt);
- if (ret == DB_SUCCESS) {
+ switch (ret) {
+ case DB_SUCCESS:
return(HA_ADMIN_OK);
+ case DB_INTERRUPTED:
+ my_error(ER_QUERY_INTERRUPTED, MYF(0));
+ return(-1);
+ default:
+ return(HA_ADMIN_CORRUPT);
}
-
- return(HA_ADMIN_CORRUPT);
}
-/*****************************************************************
+/*************************************************************//**
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
-foreign keys. */
-
+foreign keys.
+@return table comment + InnoDB free space + info on foreign keys */
+UNIV_INTERN
char*
ha_innobase::update_table_comment(
/*==============================*/
- /* out: table comment + InnoDB free space +
- info on foreign keys */
- const char* comment)/* in: table comment defined by user */
+ const char* comment)/*!< in: table comment defined by user */
{
uint length = (uint) strlen(comment);
char* str;
@@ -6647,7 +7435,7 @@ ha_innobase::update_table_comment(
/* output the data to a temporary file */
- mutex_enter_noninline(&srv_dict_tmpfile_mutex);
+ mutex_enter(&srv_dict_tmpfile_mutex);
rewind(srv_dict_tmpfile);
fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
@@ -6680,22 +7468,22 @@ ha_innobase::update_table_comment(
pos[flen] = 0;
}
- mutex_exit_noninline(&srv_dict_tmpfile_mutex);
+ mutex_exit(&srv_dict_tmpfile_mutex);
prebuilt->trx->op_info = (char*)"";
return(str ? str : (char*) comment);
}
-/***********************************************************************
-Gets the foreign key create info for a table stored in InnoDB. */
-
+/*******************************************************************//**
+Gets the foreign key create info for a table stored in InnoDB.
+@return own: character string in the form which can be inserted to the
+CREATE TABLE statement, MUST be freed with
+ha_innobase::free_foreign_key_create_info */
+UNIV_INTERN
char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
- /* out, own: character string in the form which
- can be inserted to the CREATE TABLE statement,
- MUST be freed with ::free_foreign_key_create_info */
{
char* str = 0;
long flen;
@@ -6716,7 +7504,7 @@ ha_innobase::get_foreign_key_create_info(void)
trx_search_latch_release_if_reserved(prebuilt->trx);
- mutex_enter_noninline(&srv_dict_tmpfile_mutex);
+ mutex_enter(&srv_dict_tmpfile_mutex);
rewind(srv_dict_tmpfile);
/* output the data to a temporary file */
@@ -6742,12 +7530,13 @@ ha_innobase::get_foreign_key_create_info(void)
str[flen] = 0;
}
- mutex_exit_noninline(&srv_dict_tmpfile_mutex);
+ mutex_exit(&srv_dict_tmpfile_mutex);
return(str);
}
+UNIV_INTERN
int
ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
{
@@ -6758,7 +7547,7 @@ ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
update_thd(ha_thd());
prebuilt->trx->op_info = (char*)"getting list of foreign keys";
trx_search_latch_release_if_reserved(prebuilt->trx);
- mutex_enter_noninline(&(dict_sys->mutex));
+ mutex_enter(&(dict_sys->mutex));
foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
while (foreign != NULL) {
@@ -6815,7 +7604,7 @@ ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
{
length=7;
tmp_buff= "CASCADE";
- }
+ }
else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
{
length=8;
@@ -6833,8 +7622,8 @@ ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
}
f_key_info.delete_method = thd_make_lex_string(
thd, f_key_info.delete_method, tmp_buff, length, 1);
-
-
+
+
if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)
{
length=7;
@@ -6873,17 +7662,18 @@ ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
f_key_list->push_back(pf_key_info);
foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
}
- mutex_exit_noninline(&(dict_sys->mutex));
+ mutex_exit(&(dict_sys->mutex));
prebuilt->trx->op_info = (char*)"";
DBUG_RETURN(0);
}
-/*********************************************************************
+/*****************************************************************//**
Checks if ALTER TABLE may change the storage engine of the table.
Changing storage engines is not allowed for tables for which there
-are foreign key constraints (parent or child tables). */
-
+are foreign key constraints (parent or child tables).
+@return TRUE if can switch engines */
+UNIV_INTERN
bool
ha_innobase::can_switch_engines(void)
/*=================================*/
@@ -6907,18 +7697,18 @@ ha_innobase::can_switch_engines(void)
DBUG_RETURN(can_switch);
}
-/***********************************************************************
+/*******************************************************************//**
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
-REPLACE, not an update. */
-
+REPLACE, not an update.
+@return > 0 if referenced by a FOREIGN KEY */
+UNIV_INTERN
uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
- /* out: > 0 if referenced by a FOREIGN KEY */
{
- if (dict_table_referenced_by_foreign_key(prebuilt->table)) {
+ if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) {
return(1);
}
@@ -6926,29 +7716,29 @@ ha_innobase::referenced_by_foreign_key(void)
return(0);
}
-/***********************************************************************
+/*******************************************************************//**
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */
-
+UNIV_INTERN
void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
- char* str) /* in, own: create info string to free */
+ char* str) /*!< in, own: create info string to free */
{
if (str) {
my_free(str, MYF(0));
}
}
-/***********************************************************************
-Tells something additional to the handler about how to do things. */
-
+/*******************************************************************//**
+Tells something additional to the handler about how to do things.
+@return 0 or error number */
+UNIV_INTERN
int
ha_innobase::extra(
/*===============*/
- /* out: 0 or error number */
enum ha_extra_function operation)
- /* in: HA_EXTRA_FLUSH or some other flag */
+ /*!< in: HA_EXTRA_FLUSH or some other flag */
{
/* Warning: since it is not sure that MySQL calls external_lock
before calling this function, the trx field in prebuilt can be
@@ -6999,11 +7789,9 @@ ha_innobase::extra(
return(0);
}
-/**********************************************************************
-Reset state of file to after 'open'.
-This function is called after every statement for all tables used
-by that statement. */
-int ha_innobase::reset()
+UNIV_INTERN
+int
+ha_innobase::reset()
{
if (prebuilt->blob_heap) {
row_mysql_prebuilt_free_blob_heap(prebuilt);
@@ -7020,7 +7808,7 @@ int ha_innobase::reset()
return(0);
}
-/**********************************************************************
+/******************************************************************//**
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
@@ -7030,13 +7818,13 @@ MySQL-5.0 also calls this before each statement in an execution of a stored
procedure. To make the execution more deterministic for binlogging, MySQL-5.0
locks all tables involved in a stored procedure with full explicit table
locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
-procedure. */
-
+procedure.
+@return 0 or error code */
+UNIV_INTERN
int
ha_innobase::start_stmt(
/*====================*/
- /* out: 0 or error code */
- THD* thd, /* in: handle to the user thread */
+ THD* thd, /*!< in: handle to the user thread */
thr_lock_type lock_type)
{
trx_t* trx;
@@ -7105,14 +7893,14 @@ ha_innobase::start_stmt(
return(0);
}
-/**********************************************************************
-Maps a MySQL trx isolation level code to the InnoDB isolation level code */
-inline
+/******************************************************************//**
+Maps a MySQL trx isolation level code to the InnoDB isolation level code
+@return InnoDB isolation level */
+static inline
ulint
innobase_map_isolation_level(
/*=========================*/
- /* out: InnoDB isolation level */
- enum_tx_isolation iso) /* in: MySQL isolation level code */
+ enum_tx_isolation iso) /*!< in: MySQL isolation level code */
{
switch(iso) {
case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
@@ -7123,21 +7911,21 @@ innobase_map_isolation_level(
}
}
-/**********************************************************************
+/******************************************************************//**
As MySQL will execute an external lock for every new table it uses when it
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
-the SQL statement in case of an error. */
-
+the SQL statement in case of an error.
+@return 0 */
+UNIV_INTERN
int
ha_innobase::external_lock(
/*=======================*/
- /* out: 0 */
- THD* thd, /* in: handle to the user thread */
- int lock_type) /* in: lock type */
+ THD* thd, /*!< in: handle to the user thread */
+ int lock_type) /*!< in: lock type */
{
trx_t* trx;
@@ -7153,10 +7941,13 @@ ha_innobase::external_lock(
if (lock_type == F_WRLCK)
{
ulong const binlog_format= thd_binlog_format(thd);
- ulong const tx_isolation = thd_tx_isolation(current_thd);
- if (tx_isolation <= ISO_READ_COMMITTED
- && binlog_format == BINLOG_FORMAT_STMT
- && thd_binlog_filter_ok(thd))
+ ulong const tx_isolation = thd_tx_isolation(ha_thd());
+ if (tx_isolation <= ISO_READ_COMMITTED
+ && binlog_format == BINLOG_FORMAT_STMT
+#if MYSQL_VERSION_ID > 50140
+ && thd_binlog_filter_ok(thd)
+#endif /* MYSQL_VERSION_ID > 50140 */
+ )
{
char buf[256];
my_snprintf(buf, sizeof(buf),
@@ -7240,7 +8031,7 @@ ha_innobase::external_lock(
if (error != DB_SUCCESS) {
error = convert_error_code_to_mysql(
- (int) error, thd);
+ (int) error, 0, thd);
DBUG_RETURN((int) error);
}
}
@@ -7292,16 +8083,16 @@ ha_innobase::external_lock(
DBUG_RETURN(0);
}
-/**********************************************************************
+/******************************************************************//**
With this function MySQL request a transactional lock to a table when
-user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */
-
+user issued query LOCK TABLES..WHERE ENGINE = InnoDB.
+@return error code */
+UNIV_INTERN
int
ha_innobase::transactional_table_lock(
/*==================================*/
- /* out: error code */
- THD* thd, /* in: handle to the user thread */
- int lock_type) /* in: lock type */
+ THD* thd, /*!< in: handle to the user thread */
+ int lock_type) /*!< in: lock type */
{
trx_t* trx;
@@ -7323,8 +8114,8 @@ ha_innobase::transactional_table_lock(
"InnoDB: Have you deleted the .ibd file"
" from the database directory under\n"
"InnoDB: the MySQL datadir?"
- "InnoDB: See"
- " http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
+ "InnoDB: See " REFMAN
+ "innodb-troubleshooting.html\n"
"InnoDB: how you can resolve the problem.\n",
prebuilt->table->name);
DBUG_RETURN(HA_ERR_CRASHED);
@@ -7367,7 +8158,8 @@ ha_innobase::transactional_table_lock(
error = row_lock_table_for_mysql(prebuilt, NULL, 0);
if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql((int) error, thd);
+ error = convert_error_code_to_mysql(
+ (int) error, prebuilt->table->flags, thd);
DBUG_RETURN((int) error);
}
@@ -7384,29 +8176,27 @@ ha_innobase::transactional_table_lock(
DBUG_RETURN(0);
}
-/****************************************************************************
-Here we export InnoDB status variables to MySQL. */
+/************************************************************************//**
+Here we export InnoDB status variables to MySQL. */
static
-int
-innodb_export_status()
-/*==================*/
+void
+innodb_export_status(void)
+/*======================*/
{
if (innodb_inited) {
srv_export_innodb_status();
}
-
- return 0;
}
-/****************************************************************************
+/************************************************************************//**
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
Monitor to the client. */
static
bool
innodb_show_status(
/*===============*/
- handlerton* hton, /* in: the innodb handlerton */
- THD* thd, /* in: the MySQL query thread of the caller */
+ handlerton* hton, /*!< in: the innodb handlerton */
+ THD* thd, /*!< in: the MySQL query thread of the caller */
stat_print_fn *stat_print)
{
trx_t* trx;
@@ -7416,6 +8206,7 @@ innodb_show_status(
ulint trx_list_end = ULINT_UNDEFINED;
DBUG_ENTER("innodb_show_status");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
trx = check_trx_exists(thd);
@@ -7427,7 +8218,7 @@ innodb_show_status(
long flen, usable_len;
char* str;
- mutex_enter_noninline(&srv_monitor_file_mutex);
+ mutex_enter(&srv_monitor_file_mutex);
rewind(srv_monitor_file);
srv_printf_innodb_monitor(srv_monitor_file,
&trx_list_start, &trx_list_end);
@@ -7448,7 +8239,7 @@ innodb_show_status(
read the contents of the temporary file */
if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
- mutex_exit_noninline(&srv_monitor_file_mutex);
+ mutex_exit(&srv_monitor_file_mutex);
DBUG_RETURN(TRUE);
}
@@ -7473,7 +8264,7 @@ innodb_show_status(
flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
}
- mutex_exit_noninline(&srv_monitor_file_mutex);
+ mutex_exit(&srv_monitor_file_mutex);
bool result = FALSE;
@@ -7486,19 +8277,20 @@ innodb_show_status(
DBUG_RETURN(FALSE);
}
-/****************************************************************************
+/************************************************************************//**
Implements the SHOW MUTEX STATUS command. . */
static
bool
innodb_mutex_show_status(
/*=====================*/
- handlerton* hton, /* in: the innodb handlerton */
- THD* thd, /* in: the MySQL query thread of the
+ handlerton* hton, /*!< in: the innodb handlerton */
+ THD* thd, /*!< in: the MySQL query thread of the
caller */
stat_print_fn* stat_print)
{
char buf1[IO_SIZE], buf2[IO_SIZE];
- mutex_t* mutex;
+ mutex_t* mutex;
+ rw_lock_t* lock;
#ifdef UNIV_DEBUG
ulint rw_lock_count= 0;
ulint rw_lock_count_spin_loop= 0;
@@ -7509,12 +8301,17 @@ innodb_mutex_show_status(
#endif /* UNIV_DEBUG */
uint hton_name_len= (uint) strlen(innobase_hton_name), buf1len, buf2len;
DBUG_ENTER("innodb_mutex_show_status");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
- mutex_enter_noninline(&mutex_list_mutex);
+ mutex_enter(&mutex_list_mutex);
mutex = UT_LIST_GET_FIRST(mutex_list);
while (mutex != NULL) {
+ if (mutex->count_os_wait == 0
+ || buf_pool_is_block_mutex(mutex)) {
+ goto next_mutex;
+ }
#ifdef UNIV_DEBUG
if (mutex->mutex_type != 1) {
if (mutex->count_using > 0) {
@@ -7536,8 +8333,7 @@ innodb_mutex_show_status(
if (stat_print(thd, innobase_hton_name,
hton_name_len, buf1, buf1len,
buf2, buf2len)) {
- mutex_exit_noninline(
- &mutex_list_mutex);
+ mutex_exit(&mutex_list_mutex);
DBUG_RETURN(1);
}
}
@@ -7559,15 +8355,40 @@ innodb_mutex_show_status(
if (stat_print(thd, innobase_hton_name,
hton_name_len, buf1, buf1len,
buf2, buf2len)) {
- mutex_exit_noninline(&mutex_list_mutex);
+ mutex_exit(&mutex_list_mutex);
DBUG_RETURN(1);
}
#endif /* UNIV_DEBUG */
+next_mutex:
mutex = UT_LIST_GET_NEXT(list, mutex);
}
- mutex_exit_noninline(&mutex_list_mutex);
+ mutex_exit(&mutex_list_mutex);
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ lock = UT_LIST_GET_FIRST(rw_lock_list);
+
+ while (lock != NULL) {
+ if (lock->count_os_wait
+ && !buf_pool_is_block_lock(lock)) {
+ buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
+ lock->cfile_name, (ulong) lock->cline);
+ buf2len= my_snprintf(buf2, sizeof(buf2),
+ "os_waits=%lu", lock->count_os_wait);
+
+ if (stat_print(thd, innobase_hton_name,
+ hton_name_len, buf1, buf1len,
+ buf2, buf2len)) {
+ mutex_exit(&rw_lock_list_mutex);
+ DBUG_RETURN(1);
+ }
+ }
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+ mutex_exit(&rw_lock_list_mutex);
#ifdef UNIV_DEBUG
buf2len= my_snprintf(buf2, sizeof(buf2),
@@ -7592,100 +8413,111 @@ bool innobase_show_status(handlerton *hton, THD* thd,
stat_print_fn* stat_print,
enum ha_stat_type stat_type)
{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
switch (stat_type) {
case HA_ENGINE_STATUS:
return innodb_show_status(hton, thd, stat_print);
case HA_ENGINE_MUTEX:
return innodb_mutex_show_status(hton, thd, stat_print);
default:
- return FALSE;
+ return(FALSE);
}
}
-
-/****************************************************************************
+/************************************************************************//**
Handling the shared INNOBASE_SHARE structure that is needed to provide table
locking.
****************************************************************************/
-static uchar* innobase_get_key(INNOBASE_SHARE* share, size_t *length,
- my_bool not_used __attribute__((unused)))
-{
- *length=share->table_name_length;
-
- return (uchar*) share->table_name;
-}
-
static INNOBASE_SHARE* get_share(const char* table_name)
{
INNOBASE_SHARE *share;
pthread_mutex_lock(&innobase_share_mutex);
- uint length=(uint) strlen(table_name);
- if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
- (uchar*) table_name,
- length))) {
+ ulint fold = ut_fold_string(table_name);
+
+ HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
+ INNOBASE_SHARE*, share,
+ ut_ad(share->use_count > 0),
+ !strcmp(share->table_name, table_name));
+
+ if (!share) {
+
+ uint length = (uint) strlen(table_name);
+
+ /* TODO: invoke HASH_MIGRATE if innobase_open_tables
+ grows too big */
share = (INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
MYF(MY_FAE | MY_ZEROFILL));
- share->table_name_length=length;
- share->table_name=(char*) (share+1);
- strmov(share->table_name,table_name);
+ share->table_name = (char*) memcpy(share + 1,
+ table_name, length + 1);
- if (my_hash_insert(&innobase_open_tables,
- (uchar*) share)) {
- pthread_mutex_unlock(&innobase_share_mutex);
- my_free(share,0);
-
- return 0;
- }
+ HASH_INSERT(INNOBASE_SHARE, table_name_hash,
+ innobase_open_tables, fold, share);
thr_lock_init(&share->lock);
- pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
}
share->use_count++;
pthread_mutex_unlock(&innobase_share_mutex);
- return share;
+ return(share);
}
static void free_share(INNOBASE_SHARE* share)
{
pthread_mutex_lock(&innobase_share_mutex);
+#ifdef UNIV_DEBUG
+ INNOBASE_SHARE* share2;
+ ulint fold = ut_fold_string(share->table_name);
+
+ HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
+ INNOBASE_SHARE*, share2,
+ ut_ad(share->use_count > 0),
+ !strcmp(share->table_name, share2->table_name));
+
+ ut_a(share2 == share);
+#endif /* UNIV_DEBUG */
+
if (!--share->use_count) {
- hash_delete(&innobase_open_tables, (uchar*) share);
+ ulint fold = ut_fold_string(share->table_name);
+
+ HASH_DELETE(INNOBASE_SHARE, table_name_hash,
+ innobase_open_tables, fold, share);
thr_lock_delete(&share->lock);
- pthread_mutex_destroy(&share->mutex);
my_free(share, MYF(0));
+
+ /* TODO: invoke HASH_MIGRATE if innobase_open_tables
+ shrinks too much */
}
pthread_mutex_unlock(&innobase_share_mutex);
}
-/*********************************************************************
+/*****************************************************************//**
Converts a MySQL table lock stored in the 'lock' field of the handle to
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
-end of an SQL statement. */
-
+end of an SQL statement.
+@return pointer to the next element in the 'to' array */
+UNIV_INTERN
THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
- /* out: pointer to the next
- element in the 'to' array */
- THD* thd, /* in: user thread handle */
- THR_LOCK_DATA** to, /* in: pointer to an array
+ THD* thd, /*!< in: user thread handle */
+ THR_LOCK_DATA** to, /*!< in: pointer to an array
of pointers to lock structs;
pointer to the 'lock' field
of current handle is stored
next to this array */
- enum thr_lock_type lock_type) /* in: lock type to store in
+ enum thr_lock_type lock_type) /*!< in: lock type to store in
'lock'; this may also be
TL_IGNORE */
{
@@ -7719,7 +8551,7 @@ ha_innobase::store_lock(
}
}
- DBUG_ASSERT(thd == current_thd);
+ DBUG_ASSERT(EQ_CURRENT_THD(thd));
const bool in_lock_tables = thd_in_lock_tables(thd);
const uint sql_command = thd_sql_command(thd);
@@ -7763,6 +8595,7 @@ ha_innobase::store_lock(
&& isolation_level != TRX_ISO_SERIALIZABLE
&& (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
&& (sql_command == SQLCOM_INSERT_SELECT
+ || sql_command == SQLCOM_REPLACE_SELECT
|| sql_command == SQLCOM_UPDATE
|| sql_command == SQLCOM_CREATE_TABLE)) {
@@ -7770,10 +8603,11 @@ ha_innobase::store_lock(
option set or this session is using READ COMMITTED
isolation level and isolation level of the transaction
is not set to serializable and MySQL is doing
- INSERT INTO...SELECT or UPDATE ... = (SELECT ...) or
- CREATE ... SELECT... without FOR UPDATE or
- IN SHARE MODE in select, then we use consistent
- read for select. */
+ INSERT INTO...SELECT or REPLACE INTO...SELECT
+ or UPDATE ... = (SELECT ...) or CREATE ...
+ SELECT... without FOR UPDATE or IN SHARE
+ MODE in select, then we use consistent read
+ for select. */
prebuilt->select_lock_type = LOCK_NONE;
prebuilt->stored_select_lock_type = LOCK_NONE;
@@ -7868,16 +8702,16 @@ ha_innobase::store_lock(
return(to);
}
-/*******************************************************************************
+/*********************************************************************//**
Read the next autoinc value. Acquire the relevant locks before reading
the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
-on return and all relevant locks acquired. */
-
-ulong
+on return and all relevant locks acquired.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
ha_innobase::innobase_get_autoinc(
/*==============================*/
- /* out: DB_SUCCESS or error code */
- ulonglong* value) /* out: autoinc value */
+ ulonglong* value) /*!< out: autoinc value */
{
*value = 0;
@@ -7891,18 +8725,18 @@ ha_innobase::innobase_get_autoinc(
/* It should have been initialized during open. */
ut_a(*value != 0);
}
-
- return(ulong(prebuilt->autoinc_error));
+
+ return(prebuilt->autoinc_error);
}
-/***********************************************************************
+/*******************************************************************//**
This function reads the global auto-inc counter. It doesn't use the
-AUTOINC lock even if the lock mode is set to TRADITIONAL. */
-
+AUTOINC lock even if the lock mode is set to TRADITIONAL.
+@return the autoinc value */
+UNIV_INTERN
ulonglong
-ha_innobase::innobase_peek_autoinc()
-/*================================*/
- /* out: the autoinc value */
+ha_innobase::innobase_peek_autoinc(void)
+/*====================================*/
{
ulonglong auto_inc;
dict_table_t* innodb_table;
@@ -7919,26 +8753,26 @@ ha_innobase::innobase_peek_autoinc()
ut_a(auto_inc > 0);
dict_table_autoinc_unlock(innodb_table);
-
+
return(auto_inc);
}
-/*******************************************************************************
+/*********************************************************************//**
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as
we have a table-level lock). offset, increment, nb_desired_values are ignored.
-*first_value is set to -1 if error (deadlock or lock wait timeout) */
-
+*first_value is set to -1 if error (deadlock or lock wait timeout) */
+UNIV_INTERN
void
ha_innobase::get_auto_increment(
/*============================*/
- ulonglong offset, /* in: */
- ulonglong increment, /* in: table autoinc increment */
- ulonglong nb_desired_values, /* in: number of values reqd */
- ulonglong *first_value, /* out: the autoinc value */
- ulonglong *nb_reserved_values) /* out: count of reserved values */
+ ulonglong offset, /*!< in: table autoinc offset */
+ ulonglong increment, /*!< in: table autoinc increment */
+ ulonglong nb_desired_values, /*!< in: number of values reqd */
+ ulonglong *first_value, /*!< out: the autoinc value */
+ ulonglong *nb_reserved_values) /*!< out: count of reserved values */
{
trx_t* trx;
ulint error;
@@ -8034,11 +8868,17 @@ ha_innobase::get_auto_increment(
dict_table_autoinc_unlock(prebuilt->table);
}
-/* See comment in handler.h */
+/*******************************************************************//**
+Reset the auto-increment counter to the given value, i.e. the next row
+inserted will get the given value. This is called e.g. after TRUNCATE
+is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
+returned by storage engines that don't support this operation.
+@return 0 or error code */
+UNIV_INTERN
int
ha_innobase::reset_auto_increment(
/*==============================*/
- ulonglong value) /* in: new value for table autoinc */
+ ulonglong value) /*!< in: new value for table autoinc */
{
DBUG_ENTER("ha_innobase::reset_auto_increment");
@@ -8049,7 +8889,9 @@ ha_innobase::reset_auto_increment(
error = row_lock_table_autoinc_for_mysql(prebuilt);
if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(error, user_thd);
+ error = convert_error_code_to_mysql(error,
+ prebuilt->table->flags,
+ user_thd);
DBUG_RETURN(error);
}
@@ -8065,6 +8907,7 @@ ha_innobase::reset_auto_increment(
}
/* See comment in handler.cc */
+UNIV_INTERN
bool
ha_innobase::get_error_message(int error, String *buf)
{
@@ -8073,22 +8916,21 @@ ha_innobase::get_error_message(int error, String *buf)
buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
system_charset_info);
- return FALSE;
+ return(FALSE);
}
-/***********************************************************************
+/*******************************************************************//**
Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
If there is no explicitly declared non-null unique key or a primary key, then
-InnoDB internally uses the row id as the primary key. */
-
+InnoDB internally uses the row id as the primary key.
+@return < 0 if ref1 < ref2, 0 if equal, else > 0 */
+UNIV_INTERN
int
ha_innobase::cmp_ref(
/*=================*/
- /* out: < 0 if ref1 < ref2, 0 if equal, else
- > 0 */
- const uchar* ref1, /* in: an (internal) primary key value in the
+ const uchar* ref1, /*!< in: an (internal) primary key value in the
MySQL key value format */
- const uchar* ref2) /* in: an (internal) primary key value in the
+ const uchar* ref2) /*!< in: an (internal) primary key value in the
MySQL key value format */
{
enum_field_types mysql_type;
@@ -8148,25 +8990,24 @@ ha_innobase::cmp_ref(
return(0);
}
-/***********************************************************************
-Ask InnoDB if a query to a table can be cached. */
-
+/*******************************************************************//**
+Ask InnoDB if a query to a table can be cached.
+@return TRUE if query caching of the table is permitted */
+UNIV_INTERN
my_bool
ha_innobase::register_query_cache_table(
/*====================================*/
- /* out: TRUE if query caching
- of the table is permitted */
- THD* thd, /* in: user thread handle */
- char* table_key, /* in: concatenation of database name,
- the null character '\0',
+ THD* thd, /*!< in: user thread handle */
+ char* table_key, /*!< in: concatenation of database name,
+ the null character NUL,
and the table name */
- uint key_length, /* in: length of the full name, i.e.
+ uint key_length, /*!< in: length of the full name, i.e.
len(dbname) + len(tablename) + 1 */
qc_engine_callback*
- call_back, /* out: pointer to function for
+ call_back, /*!< out: pointer to function for
checking if query caching
is permitted */
- ulonglong *engine_data) /* in/out: data to call_back */
+ ulonglong *engine_data) /*!< in/out: data to call_back */
{
*call_back = innobase_query_caching_of_table_permitted;
*engine_data = 0;
@@ -8175,45 +9016,43 @@ ha_innobase::register_query_cache_table(
engine_data));
}
+UNIV_INTERN
char*
ha_innobase::get_mysql_bin_log_name()
{
return(trx_sys_mysql_bin_log_name);
}
+UNIV_INTERN
ulonglong
ha_innobase::get_mysql_bin_log_pos()
{
- /* trx... is ib_longlong, which is a typedef for a 64-bit integer
+ /* trx... is ib_int64_t, which is a typedef for a 64-bit integer
(__int64 or longlong) so it's ok to cast it to ulonglong. */
return(trx_sys_mysql_bin_log_pos);
}
-/**********************************************************************
+/******************************************************************//**
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.
-
-NOTE: the prototype of this function is copied to data0type.c! If you change
-this function, you MUST change also data0type.c! */
-extern "C"
+@return number of bytes occupied by the first n characters */
+extern "C" UNIV_INTERN
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
- /* out: number of bytes occupied by the first
- n characters */
- ulint charset_id, /* in: character set id */
- ulint prefix_len, /* in: prefix length in bytes of the index
+ ulint charset_id, /*!< in: character set id */
+ ulint prefix_len, /*!< in: prefix length in bytes of the index
(this has to be divided by mbmaxlen to get the
number of CHARACTERS n in the prefix) */
- ulint data_len, /* in: length of the string in bytes */
- const char* str) /* in: character string */
+ ulint data_len, /*!< in: length of the string in bytes */
+ const char* str) /*!< in: character string */
{
- ulint char_length; /* character length in bytes */
- ulint n_chars; /* number of characters in prefix */
- CHARSET_INFO* charset; /* charset used in the field */
+ ulint char_length; /*!< character length in bytes */
+ ulint n_chars; /*!< number of characters in prefix */
+ CHARSET_INFO* charset; /*!< charset used in the field */
charset = get_charset((uint) charset_id, MYF(MY_WME));
@@ -8264,49 +9103,30 @@ innobase_get_at_most_n_mbchars(
return(char_length);
}
-/***********************************************************************
-This function is used to prepare X/Open XA distributed transaction */
+/*******************************************************************//**
+This function is used to prepare an X/Open XA distributed transaction.
+@return 0 or error number */
static
int
innobase_xa_prepare(
/*================*/
- /* out: 0 or error number */
- handlerton *hton,
- THD* thd, /* in: handle to the MySQL thread of the user
- whose XA transaction should be prepared */
- bool all) /* in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
+ handlerton* hton, /*!< in: InnoDB handlerton */
+ THD* thd, /*!< in: handle to the MySQL thread of
+ the user whose XA transaction should
+ be prepared */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement
+ ended */
{
int error = 0;
trx_t* trx = check_trx_exists(thd);
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
+ DBUG_ASSERT(hton == innodb_hton_ptr);
- if (!THDVAR(thd, support_xa)) {
+ /* we use support_xa value as it was seen at transaction start
+ time, not the current session variable value. Any possible changes
+ to the session variable take effect only in the next transaction */
+ if (!trx->support_xa) {
return(0);
}
@@ -8355,21 +9175,48 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- return error;
+ if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
+ (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
+ {
+
+ /* For ibbackup to work the order of transactions in binlog
+ and InnoDB must be the same. Consider the situation
+
+ thread1> prepare; write to binlog; ...
+ <context switch>
+ thread2> prepare; write to binlog; commit
+ thread1> ... commit
+
+ To ensure this will not happen we're taking the mutex on
+ prepare, and releasing it on commit.
+
+ Note: only do it for normal commits, done via ha_commit_trans.
+ If 2pc protocol is executed by external transaction
+ coordinator, it will be just a regular MySQL client
+ executing XA PREPARE and XA COMMIT commands.
+ In this case we cannot know how many minutes or hours
+ will be between XA PREPARE and XA COMMIT, and we don't want
+ to block for undefined period of time. */
+ pthread_mutex_lock(&prepare_commit_mutex);
+ trx->active_trans = 2;
+ }
+
+ return(error);
}
-/***********************************************************************
-This function is used to recover X/Open XA distributed transactions */
+/*******************************************************************//**
+This function is used to recover X/Open XA distributed transactions.
+@return number of prepared transactions stored in xid_list */
static
int
innobase_xa_recover(
/*================*/
- /* out: number of prepared transactions
- stored in xid_list */
- handlerton *hton,
- XID* xid_list, /* in/out: prepared transactions */
- uint len) /* in: number of slots in xid_list */
+ handlerton* hton, /*!< in: InnoDB handlerton */
+ XID* xid_list,/*!< in/out: prepared transactions */
+ uint len) /*!< in: number of slots in xid_list */
{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
if (len == 0 || xid_list == NULL) {
return(0);
@@ -8378,19 +9225,21 @@ innobase_xa_recover(
return(trx_recover_for_mysql(xid_list, len));
}
-/***********************************************************************
+/*******************************************************************//**
This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return 0 or error number */
static
int
innobase_commit_by_xid(
/*===================*/
- /* out: 0 or error number */
handlerton *hton,
- XID* xid) /* in: X/Open XA transaction identification */
+ XID* xid) /*!< in: X/Open XA transaction identification */
{
trx_t* trx;
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
trx = trx_get_trx_by_xid(xid);
if (trx) {
@@ -8402,19 +9251,22 @@ innobase_commit_by_xid(
}
}
-/***********************************************************************
+/*******************************************************************//**
This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return 0 or error number */
static
int
innobase_rollback_by_xid(
/*=====================*/
- /* out: 0 or error number */
- handlerton *hton,
- XID *xid) /* in: X/Open XA transaction identification */
+ handlerton* hton, /*!< in: InnoDB handlerton */
+ XID* xid) /*!< in: X/Open XA transaction
+ identification */
{
trx_t* trx;
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
trx = trx_get_trx_by_xid(xid);
if (trx) {
@@ -8424,23 +9276,25 @@ innobase_rollback_by_xid(
}
}
-/***********************************************************************
+/*******************************************************************//**
Create a consistent view for a cursor based on current transaction
which is created if the corresponding MySQL thread still lacks one.
This consistent view is then used inside of MySQL when accessing records
-using a cursor. */
+using a cursor.
+@return pointer to cursor view or NULL */
static
void*
innobase_create_cursor_view(
/*========================*/
- /* out: pointer to cursor view or NULL */
- handlerton *hton, /* in: innobase hton */
- THD* thd) /* in: user thread handle */
+ handlerton *hton, /*!< in: innobase hton */
+ THD* thd) /*!< in: user thread handle */
{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
return(read_cursor_view_create_for_mysql(check_trx_exists(thd)));
}
-/***********************************************************************
+/*******************************************************************//**
Close the given consistent cursor view of a transaction and restore
global read view to a transaction read view. Transaction is created if the
corresponding MySQL thread still lacks one. */
@@ -8449,14 +9303,16 @@ void
innobase_close_cursor_view(
/*=======================*/
handlerton *hton,
- THD* thd, /* in: user thread handle */
- void* curview)/* in: Consistent read view to be closed */
+ THD* thd, /*!< in: user thread handle */
+ void* curview)/*!< in: Consistent read view to be closed */
{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
read_cursor_view_close_for_mysql(check_trx_exists(thd),
(cursor_view_t*) curview);
}
-/***********************************************************************
+/*******************************************************************//**
Set the given consistent cursor view to a transaction which is created
if the corresponding MySQL thread still lacks one. If the given
consistent cursor view is NULL global read view of a transaction is
@@ -8466,9 +9322,11 @@ void
innobase_set_cursor_view(
/*=====================*/
handlerton *hton,
- THD* thd, /* in: user thread handle */
- void* curview)/* in: Consistent cursor view to be set */
+ THD* thd, /*!< in: user thread handle */
+ void* curview)/*!< in: Consistent cursor view to be set */
{
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
read_cursor_set_for_mysql(check_trx_exists(thd),
(cursor_view_t*) curview);
}
@@ -8565,20 +9423,22 @@ foreign_key_column_is_being_renamed(
return(false);
}
-bool ha_innobase::check_if_incompatible_data(
+UNIV_INTERN
+bool
+ha_innobase::check_if_incompatible_data(
HA_CREATE_INFO* info,
uint table_changes)
{
if (table_changes != IS_EQUAL_YES) {
- return COMPATIBLE_DATA_NO;
+ return(COMPATIBLE_DATA_NO);
}
/* Check that auto_increment value was not changed */
if ((info->used_fields & HA_CREATE_USED_AUTO) &&
info->auto_increment_value != 0) {
- return COMPATIBLE_DATA_NO;
+ return(COMPATIBLE_DATA_NO);
}
/* Check if a column participating in a foreign key is being renamed.
@@ -8589,13 +9449,418 @@ bool ha_innobase::check_if_incompatible_data(
}
/* Check that row format didn't change */
- if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT) &&
- get_row_type() != info->row_type) {
+ if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
+ && info->row_type != ROW_TYPE_DEFAULT
+ && info->row_type != get_row_type()) {
- return COMPATIBLE_DATA_NO;
+ return(COMPATIBLE_DATA_NO);
}
- return COMPATIBLE_DATA_YES;
+ /* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */
+ if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) {
+ return(COMPATIBLE_DATA_NO);
+ }
+
+ return(COMPATIBLE_DATA_YES);
+}
+
+/************************************************************//**
+Validate the file format name and return its corresponding id.
+@return valid file format id */
+static
+uint
+innobase_file_format_name_lookup(
+/*=============================*/
+ const char* format_name) /*!< in: pointer to file format name */
+{
+ char* endp;
+ uint format_id;
+
+ ut_a(format_name != NULL);
+
+ /* The format name can contain the format id itself instead of
+ the name and we check for that. */
+ format_id = (uint) strtoul(format_name, &endp, 10);
+
+ /* Check for valid parse. */
+ if (*endp == '\0' && *format_name != '\0') {
+
+ if (format_id <= DICT_TF_FORMAT_MAX) {
+
+ return(format_id);
+ }
+ } else {
+
+ for (format_id = 0; format_id <= DICT_TF_FORMAT_MAX;
+ format_id++) {
+ const char* name;
+
+ name = trx_sys_file_format_id_to_name(format_id);
+
+ if (!innobase_strcasecmp(format_name, name)) {
+
+ return(format_id);
+ }
+ }
+ }
+
+ return(DICT_TF_FORMAT_MAX + 1);
+}
+
+/************************************************************//**
+Validate the file format check value, is it one of "on" or "off",
+as a side effect it sets the srv_check_file_format_at_startup variable.
+@return true if config value one of "on" or "off" */
+static
+bool
+innobase_file_format_check_on_off(
+/*==============================*/
+ const char* format_check) /*!< in: parameter value */
+{
+ bool ret = true;
+
+ if (!innobase_strcasecmp(format_check, "off")) {
+
+ /* Set the value to disable checking. */
+ srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX + 1;
+
+ } else if (!innobase_strcasecmp(format_check, "on")) {
+
+ /* Set the value to the lowest supported format. */
+ srv_check_file_format_at_startup = DICT_TF_FORMAT_51;
+ } else {
+ ret = FALSE;
+ }
+
+ return(ret);
+}
+
+/************************************************************//**
+Validate the file format check config parameters, as a side effect it
+sets the srv_check_file_format_at_startup variable.
+@return the format_id if valid config value, otherwise, return -1 */
+static
+int
+innobase_file_format_validate_and_set(
+/*================================*/
+ const char* format_check) /*!< in: parameter value */
+{
+ uint format_id;
+
+ format_id = innobase_file_format_name_lookup(format_check);
+
+ if (format_id < DICT_TF_FORMAT_MAX + 1) {
+ srv_check_file_format_at_startup = format_id;
+
+ return((int) format_id);
+ } else {
+ return(-1);
+ }
+}
+
+/*************************************************************//**
+Check if it is a valid file format. This function is registered as
+a callback with MySQL.
+@return 0 for valid file format */
+static
+int
+innodb_file_format_name_validate(
+/*=============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value) /*!< in: incoming string */
+{
+ const char* file_format_input;
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ int len = sizeof(buff);
+
+ ut_a(save != NULL);
+ ut_a(value != NULL);
+
+ file_format_input = value->val_str(value, buff, &len);
+
+ if (file_format_input != NULL) {
+ uint format_id;
+
+ format_id = innobase_file_format_name_lookup(
+ file_format_input);
+
+ if (format_id <= DICT_TF_FORMAT_MAX) {
+
+ /* Save a pointer to the name in the
+ 'file_format_name_map' constant array. */
+ *static_cast<const char**>(save) =
+ trx_sys_file_format_id_to_name(format_id);
+
+ return(0);
+ }
+ }
+
+ *static_cast<const char**>(save) = NULL;
+ return(1);
+}
+
+/****************************************************************//**
+Update the system variable innodb_file_format using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_file_format_name_update(
+/*===========================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr, /*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ const char* format_name;
+
+ ut_a(var_ptr != NULL);
+ ut_a(save != NULL);
+
+ format_name = *static_cast<const char*const*>(save);
+
+ if (format_name) {
+ uint format_id;
+
+ format_id = innobase_file_format_name_lookup(format_name);
+
+ if (format_id <= DICT_TF_FORMAT_MAX) {
+ srv_file_format = format_id;
+ }
+ }
+
+ *static_cast<const char**>(var_ptr)
+ = trx_sys_file_format_id_to_name(srv_file_format);
+}
+
+/*************************************************************//**
+Check if valid argument to innodb_file_format_check. This
+function is registered as a callback with MySQL.
+@return 0 for valid file format */
+static
+int
+innodb_file_format_check_validate(
+/*==============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value) /*!< in: incoming string */
+{
+ const char* file_format_input;
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ int len = sizeof(buff);
+ int format_id;
+
+ ut_a(save != NULL);
+ ut_a(value != NULL);
+
+ file_format_input = value->val_str(value, buff, &len);
+
+ if (file_format_input != NULL) {
+
+ /* Check if user set on/off, we want to print a suitable
+ message if they did so. */
+
+ if (innobase_file_format_check_on_off(file_format_input)) {
+ push_warning_printf(thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "InnoDB: invalid innodb_file_format_check "
+ "value; on/off can only be set at startup or "
+ "in the configuration file");
+ } else {
+ format_id = innobase_file_format_validate_and_set(
+ file_format_input);
+
+ if (format_id >= 0) {
+ /* Save a pointer to the name in the
+ 'file_format_name_map' constant array. */
+ *static_cast<const char**>(save) =
+ trx_sys_file_format_id_to_name(
+ (uint)format_id);
+
+ return(0);
+
+ } else {
+ push_warning_printf(thd,
+ MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "InnoDB: invalid innodb_file_format_check "
+ "value; can be any format up to %s "
+ "or its equivalent numeric id",
+ trx_sys_file_format_id_to_name(
+ DICT_TF_FORMAT_MAX));
+ }
+ }
+ }
+
+ *static_cast<const char**>(save) = NULL;
+ return(1);
+}
+
+/****************************************************************//**
+Update the system variable innodb_file_format_check using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_file_format_check_update(
+/*============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr, /*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ const char* format_name_in;
+ const char** format_name_out;
+ uint format_id;
+
+ ut_a(save != NULL);
+ ut_a(var_ptr != NULL);
+
+ format_name_in = *static_cast<const char*const*>(save);
+
+ if (!format_name_in) {
+
+ return;
+ }
+
+ format_id = innobase_file_format_name_lookup(format_name_in);
+
+ if (format_id > DICT_TF_FORMAT_MAX) {
+ /* DEFAULT is "on", which is invalid at runtime. */
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Ignoring SET innodb_file_format=%s",
+ format_name_in);
+ return;
+ }
+
+ format_name_out = static_cast<const char**>(var_ptr);
+
+ /* Update the max format id in the system tablespace. */
+ if (trx_sys_file_format_max_set(format_id, format_name_out)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " [Info] InnoDB: the file format in the system "
+ "tablespace is now set to %s.\n", *format_name_out);
+ }
+}
+
+/****************************************************************//**
+Update the system variable innodb_adaptive_hash_index using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_adaptive_hash_index_update(
+/*==============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr, /*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ if (*(my_bool*) save) {
+ btr_search_enable();
+ } else {
+ btr_search_disable();
+ }
+}
+
+/****************************************************************//**
+Update the system variable innodb_old_blocks_pct using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_old_blocks_pct_update(
+/*=========================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ innobase_old_blocks_pct = buf_LRU_old_ratio_update(
+ *static_cast<const uint*>(save), TRUE);
+}
+
+/*************************************************************//**
+Check if it is a valid value of innodb_change_buffering. This function is
+registered as a callback with MySQL.
+@return 0 for valid innodb_change_buffering */
+static
+int
+innodb_change_buffering_validate(
+/*=============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value) /*!< in: incoming string */
+{
+ const char* change_buffering_input;
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ int len = sizeof(buff);
+
+ ut_a(save != NULL);
+ ut_a(value != NULL);
+
+ change_buffering_input = value->val_str(value, buff, &len);
+
+ if (change_buffering_input != NULL) {
+ ulint use;
+
+ for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values);
+ use++) {
+ if (!innobase_strcasecmp(
+ change_buffering_input,
+ innobase_change_buffering_values[use])) {
+ *(ibuf_use_t*) save = (ibuf_use_t) use;
+ return(0);
+ }
+ }
+ }
+
+ return(1);
+}
+
+/****************************************************************//**
+Update the system variable innodb_change_buffering using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_change_buffering_update(
+/*===========================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr, /*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ ut_a(var_ptr != NULL);
+ ut_a(save != NULL);
+ ut_a((*(ibuf_use_t*) save) < IBUF_USE_COUNT);
+
+ ibuf_use = *(const ibuf_use_t*) save;
+
+ *(const char**) var_ptr = innobase_change_buffering_values[ibuf_use];
}
static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
@@ -8609,36 +9874,39 @@ static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
/***********************************************************************
This function checks each index name for a table against reserved
system default primary index name 'GEN_CLUST_INDEX'. If a name matches,
-this function pushes an error message to the client, and returns true. */
-static
+this function pushes an warning message to the client, and returns true. */
+extern "C" UNIV_INTERN
bool
innobase_index_name_is_reserved(
/*============================*/
/* out: true if an index name
matches the reserved name */
const trx_t* trx, /* in: InnoDB transaction handle */
- const TABLE* form, /* in: information on table
- columns and indexes */
- const char* norm_name) /* in: table name */
+ const KEY* key_info, /* in: Indexes to be created */
+ ulint num_of_keys) /* in: Number of indexes to
+ be created. */
{
- KEY* key;
+ const KEY* key;
uint key_num; /* index number */
- for (key_num = 0; key_num < form->s->keys; key_num++) {
- key = form->key_info + key_num;
+ for (key_num = 0; key_num < num_of_keys; key_num++) {
+ key = &key_info[key_num];
if (innobase_strcasecmp(key->name,
innobase_index_reserve_name) == 0) {
/* Push warning to mysql */
push_warning_printf((THD*) trx->mysql_thd,
MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_CANT_CREATE_TABLE,
+ ER_WRONG_NAME_FOR_INDEX,
"Cannot Create Index with name "
"'%s'. The name is reserved "
"for the system default primary "
"index.",
innobase_index_reserve_name);
+ my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
+ innobase_index_reserve_name);
+
return(true);
}
}
@@ -8672,6 +9940,11 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
"Disable with --skip-innodb-doublewrite.",
NULL, NULL, TRUE);
+static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
+ PLUGIN_VAR_RQCMDARG,
+ "Number of IOPs the server can do. Tunes the background IO rate",
+ NULL, NULL, 200, 100, ~0L, 0);
+
static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
PLUGIN_VAR_OPCMDARG,
"Speeds up the shutdown process of the InnoDB storage engine. Possible "
@@ -8684,11 +9957,27 @@ static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
".",
NULL, NULL, 1, 0, IF_NETWARE(1,2), 0);
-static MYSQL_SYSVAR_BOOL(file_per_table, innobase_file_per_table,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
+ PLUGIN_VAR_NOCMDARG,
"Stores each InnoDB table to an .ibd file in the database dir.",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
+ PLUGIN_VAR_RQCMDARG,
+ "File format to use for new tables in .ibd files.",
+ innodb_file_format_name_validate,
+ innodb_file_format_name_update, "Antelope");
+
+/* If a new file format is introduced, the file format
+name needs to be updated accordingly. Please refer to
+file_format_name_map[] defined in trx0sys.c for the next
+file format name. */
+static MYSQL_SYSVAR_STR(file_format_check, innobase_file_format_check,
+ PLUGIN_VAR_OPCMDARG,
+ "The highest file format in the tablespace.",
+ innodb_file_format_check_validate,
+ innodb_file_format_check_update, "Barracuda");
+
static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
PLUGIN_VAR_OPCMDARG,
"Set to 0 (write and flush once per second),"
@@ -8696,7 +9985,7 @@ static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
" or 2 (write at commit, flush once per second).",
NULL, NULL, 1, 0, 2, 0);
-static MYSQL_SYSVAR_STR(flush_method, innobase_unix_file_flush_method,
+static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"With which method to flush data.", NULL, NULL, NULL);
@@ -8722,7 +10011,12 @@ static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir,
static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
PLUGIN_VAR_RQCMDARG,
"Percentage of dirty pages allowed in bufferpool.",
- NULL, NULL, 90, 0, 100, 0);
+ NULL, NULL, 75, 0, 99, 0);
+
+static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
+ PLUGIN_VAR_NOCMDARG,
+ "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
PLUGIN_VAR_RQCMDARG,
@@ -8744,24 +10038,27 @@ static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
"Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
NULL, NULL, TRUE);
-static MYSQL_SYSVAR_BOOL(use_legacy_cardinality_algorithm,
- srv_use_legacy_cardinality_algorithm,
- PLUGIN_VAR_OPCMDARG,
- "Use legacy algorithm for picking random pages during index cardinality "
- "estimation. Disable this to use a better algorithm, but note that your "
- "query plans may change (enabled by default).",
- NULL, NULL, TRUE);
+static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_sample_pages,
+ PLUGIN_VAR_RQCMDARG,
+ "The number of index pages to sample when calculating statistics (default 8)",
+ NULL, NULL, 8, 1, ~0ULL, 0);
-static MYSQL_SYSVAR_BOOL(adaptive_hash_index, innobase_adaptive_hash_index,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
+ PLUGIN_VAR_OPCMDARG,
"Enable InnoDB adaptive hash index (enabled by default). "
"Disable with --skip-innodb-adaptive-hash-index.",
- NULL, NULL, TRUE);
+ NULL, innodb_adaptive_hash_index_update, TRUE);
+
+static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
+ PLUGIN_VAR_RQCMDARG,
+ "Replication thread delay (ms) on the slave server if "
+ "innodb_thread_concurrency is reached (0 by default)",
+ NULL, NULL, 0, 0, ~0UL, 0);
static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
- NULL, NULL, 1*1024*1024L, 512*1024L, LONG_MAX, 1024);
+ NULL, NULL, 8*1024*1024L, 512*1024L, LONG_MAX, 1024);
static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
PLUGIN_VAR_RQCMDARG,
@@ -8771,7 +10068,7 @@ static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
- NULL, NULL, 8*1024*1024L, 1024*1024L, LONGLONG_MAX, 1024*1024L);
+ NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
PLUGIN_VAR_RQCMDARG,
@@ -8784,24 +10081,29 @@ static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
NULL, NULL, 500L, 1L, ~0L, 0);
static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
"Number of file I/O threads in InnoDB.",
NULL, NULL, 4, 4, 64, 0);
+static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Number of background read I/O threads in InnoDB.",
+ NULL, NULL, 4, 1, 64, 0);
+
+static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Number of background write I/O threads in InnoDB.",
+ NULL, NULL, 4, 1, 64, 0);
+
static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Helps to save your data in case the disk image of the database becomes corrupt.",
NULL, NULL, 0, 0, 6, 0);
-static MYSQL_SYSVAR_LONG(lock_wait_timeout, innobase_lock_wait_timeout,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.",
- NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
-
static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"The size of the buffer which InnoDB uses to write log to the log files on disk.",
- NULL, NULL, 1024*1024L, 256*1024L, LONG_MAX, 1024);
+ NULL, NULL, 8*1024*1024L, 256*1024L, LONG_MAX, 1024);
static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -8818,6 +10120,18 @@ static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
"Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
NULL, NULL, 1, 1, 10, 0);
+static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
+ PLUGIN_VAR_RQCMDARG,
+ "Percentage of the buffer pool to reserve for 'old' blocks.",
+ NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
+
+static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
+ PLUGIN_VAR_RQCMDARG,
+ "Move blocks to the 'new' end of the buffer pool if the first access"
+ " was at least this many milliseconds ago."
+ " The timeout is disabled if 0 (the default).",
+ NULL, NULL, 0, 0, UINT_MAX32, 0);
+
static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"How many files at the maximum InnoDB keeps open at the same time.",
@@ -8825,13 +10139,18 @@ static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
PLUGIN_VAR_RQCMDARG,
- "Count of spin-loop rounds in InnoDB mutexes",
- NULL, NULL, 20L, 0L, ~0L, 0);
+ "Count of spin-loop rounds in InnoDB mutexes (30 by default)",
+ NULL, NULL, 30L, 0L, ~0L, 0);
+
+static MYSQL_SYSVAR_ULONG(spin_wait_delay, srv_spin_wait_delay,
+ PLUGIN_VAR_OPCMDARG,
+ "Maximum delay between polling for a spin lock (6 by default)",
+ NULL, NULL, 6L, 0L, ~0L, 0);
static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
PLUGIN_VAR_RQCMDARG,
"Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
- NULL, NULL, 8, 0, 1000, 0);
+ NULL, NULL, 0, 0, 1000, 0);
static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
PLUGIN_VAR_RQCMDARG,
@@ -8855,6 +10174,28 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
AUTOINC_NO_LOCKING, 0); /* Maximum value */
+static MYSQL_SYSVAR_STR(version, innodb_version_str,
+ PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
+ "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
+
+static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Use OS memory allocator instead of InnoDB's internal memory allocator",
+ NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
+ PLUGIN_VAR_RQCMDARG,
+ "Buffer changes to reduce random access: "
+ "OFF, ON, inserting, deleting, changing, or purging.",
+ innodb_change_buffering_validate,
+ innodb_change_buffering_update, NULL);
+
+static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
+ PLUGIN_VAR_RQCMDARG,
+ "Number of pages that must be accessed sequentially for InnoDB to"
+ "trigger a readahead.",
+ NULL, NULL, 56, 0, 64, 0);
+
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(additional_mem_pool_size),
MYSQL_SYSVAR(autoextend_increment),
@@ -8867,7 +10208,11 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(doublewrite),
MYSQL_SYSVAR(fast_shutdown),
MYSQL_SYSVAR(file_io_threads),
+ MYSQL_SYSVAR(read_io_threads),
+ MYSQL_SYSVAR(write_io_threads),
MYSQL_SYSVAR(file_per_table),
+ MYSQL_SYSVAR(file_format),
+ MYSQL_SYSVAR(file_format_check),
MYSQL_SYSVAR(flush_log_at_trx_commit),
MYSQL_SYSVAR(flush_method),
MYSQL_SYSVAR(force_recovery),
@@ -8882,20 +10227,31 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(log_files_in_group),
MYSQL_SYSVAR(log_group_home_dir),
MYSQL_SYSVAR(max_dirty_pages_pct),
+ MYSQL_SYSVAR(adaptive_flushing),
MYSQL_SYSVAR(max_purge_lag),
MYSQL_SYSVAR(mirrored_log_groups),
+ MYSQL_SYSVAR(old_blocks_pct),
+ MYSQL_SYSVAR(old_blocks_time),
MYSQL_SYSVAR(open_files),
MYSQL_SYSVAR(rollback_on_timeout),
MYSQL_SYSVAR(stats_on_metadata),
- MYSQL_SYSVAR(use_legacy_cardinality_algorithm),
+ MYSQL_SYSVAR(stats_sample_pages),
MYSQL_SYSVAR(adaptive_hash_index),
+ MYSQL_SYSVAR(replication_delay),
MYSQL_SYSVAR(status_file),
+ MYSQL_SYSVAR(strict_mode),
MYSQL_SYSVAR(support_xa),
MYSQL_SYSVAR(sync_spin_loops),
+ MYSQL_SYSVAR(spin_wait_delay),
MYSQL_SYSVAR(table_locks),
MYSQL_SYSVAR(thread_concurrency),
MYSQL_SYSVAR(thread_sleep_delay),
MYSQL_SYSVAR(autoinc_lock_mode),
+ MYSQL_SYSVAR(version),
+ MYSQL_SYSVAR(use_sys_malloc),
+ MYSQL_SYSVAR(change_buffering),
+ MYSQL_SYSVAR(read_ahead_threshold),
+ MYSQL_SYSVAR(io_capacity),
NULL
};
@@ -8904,16 +10260,23 @@ mysql_declare_plugin(innobase)
MYSQL_STORAGE_ENGINE_PLUGIN,
&innobase_storage_engine,
innobase_hton_name,
- "Innobase OY",
+ "Innobase Oy",
"Supports transactions, row-level locking, and foreign keys",
PLUGIN_LICENSE_GPL,
innobase_init, /* Plugin Init */
NULL, /* Plugin Deinit */
- 0x0100 /* 1.0 */,
+ INNODB_VERSION_SHORT,
innodb_status_variables_export,/* status variables */
innobase_system_variables, /* system variables */
NULL /* reserved */
-}
+},
+i_s_innodb_trx,
+i_s_innodb_locks,
+i_s_innodb_lock_waits,
+i_s_innodb_cmp,
+i_s_innodb_cmp_reset,
+i_s_innodb_cmpmem,
+i_s_innodb_cmpmem_reset
mysql_declare_plugin_end;
/** @brief Initialize the default value of innodb_commit_concurrency.
@@ -8933,3 +10296,125 @@ innobase_commit_concurrency_init_default(void)
MYSQL_SYSVAR_NAME(commit_concurrency).def_val
= innobase_commit_concurrency;
}
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+typedef struct innobase_convert_name_test_struct {
+ char* buf;
+ ulint buflen;
+ const char* id;
+ ulint idlen;
+ void* thd;
+ ibool file_id;
+
+ const char* expected;
+} innobase_convert_name_test_t;
+
+void
+test_innobase_convert_name()
+{
+ char buf[1024];
+ ulint i;
+
+ innobase_convert_name_test_t test_input[] = {
+ {buf, sizeof(buf), "abcd", 4, NULL, TRUE, "\"abcd\""},
+ {buf, 7, "abcd", 4, NULL, TRUE, "\"abcd\""},
+ {buf, 6, "abcd", 4, NULL, TRUE, "\"abcd\""},
+ {buf, 5, "abcd", 4, NULL, TRUE, "\"abc\""},
+ {buf, 4, "abcd", 4, NULL, TRUE, "\"ab\""},
+
+ {buf, sizeof(buf), "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
+ {buf, 9, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
+ {buf, 8, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
+ {buf, 7, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
+ {buf, 6, "ab@0060cd", 9, NULL, TRUE, "\"ab`c\""},
+ {buf, 5, "ab@0060cd", 9, NULL, TRUE, "\"ab`\""},
+ {buf, 4, "ab@0060cd", 9, NULL, TRUE, "\"ab\""},
+
+ {buf, sizeof(buf), "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50#ab\"\"cd\""},
+ {buf, 17, "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50#ab\"\"cd\""},
+ {buf, 16, "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50#ab\"\"c\""},
+ {buf, 15, "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50#ab\"\"\""},
+ {buf, 14, "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50#ab\""},
+ {buf, 13, "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50#ab\""},
+ {buf, 12, "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50#a\""},
+ {buf, 11, "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50#\""},
+ {buf, 10, "ab\"cd", 5, NULL, TRUE,
+ "\"#mysql50\""},
+
+ {buf, sizeof(buf), "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
+ {buf, 9, "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
+ {buf, 8, "ab/cd", 5, NULL, TRUE, "\"ab\".\"c\""},
+ {buf, 7, "ab/cd", 5, NULL, TRUE, "\"ab\".\"\""},
+ {buf, 6, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
+ {buf, 5, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
+ {buf, 4, "ab/cd", 5, NULL, TRUE, "\"ab\""},
+ {buf, 3, "ab/cd", 5, NULL, TRUE, "\"a\""},
+ {buf, 2, "ab/cd", 5, NULL, TRUE, "\"\""},
+ /* XXX probably "" is a better result in this case
+ {buf, 1, "ab/cd", 5, NULL, TRUE, "."},
+ */
+ {buf, 0, "ab/cd", 5, NULL, TRUE, ""},
+ };
+
+ for (i = 0; i < sizeof(test_input) / sizeof(test_input[0]); i++) {
+
+ char* end;
+ ibool ok = TRUE;
+ size_t res_len;
+
+ fprintf(stderr, "TESTING %lu, %s, %lu, %s\n",
+ test_input[i].buflen,
+ test_input[i].id,
+ test_input[i].idlen,
+ test_input[i].expected);
+
+ end = innobase_convert_name(
+ test_input[i].buf,
+ test_input[i].buflen,
+ test_input[i].id,
+ test_input[i].idlen,
+ test_input[i].thd,
+ test_input[i].file_id);
+
+ res_len = (size_t) (end - test_input[i].buf);
+
+ if (res_len != strlen(test_input[i].expected)) {
+
+ fprintf(stderr, "unexpected len of the result: %u, "
+ "expected: %u\n", (unsigned) res_len,
+ (unsigned) strlen(test_input[i].expected));
+ ok = FALSE;
+ }
+
+ if (memcmp(test_input[i].buf,
+ test_input[i].expected,
+ strlen(test_input[i].expected)) != 0
+ || !ok) {
+
+ fprintf(stderr, "unexpected result: %.*s, "
+ "expected: %s\n", (int) res_len,
+ test_input[i].buf,
+ test_input[i].expected);
+ ok = FALSE;
+ }
+
+ if (ok) {
+ fprintf(stderr, "OK: res: %.*s\n\n", (int) res_len,
+ buf);
+ } else {
+ fprintf(stderr, "FAILED\n\n");
+ return;
+ }
+ }
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 9ddb516c3dc..31e88ed8530 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -1,17 +1,20 @@
-/* Copyright (C) 2000-2005 MySQL AB && Innobase Oy
+/*****************************************************************************
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
+Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
/*
This file is based on ha_berkeley.h of MySQL distribution
@@ -24,34 +27,43 @@
#pragma interface /* gcc class implementation */
#endif
+/** InnoDB table share */
typedef struct st_innobase_share {
- THR_LOCK lock;
- pthread_mutex_t mutex;
- char *table_name;
- uint table_name_length,use_count;
+ THR_LOCK lock; /*!< MySQL lock protecting
+ this structure */
+ const char* table_name; /*!< InnoDB table name */
+ uint use_count; /*!< reference count,
+ incremented in get_share()
+ and decremented in free_share() */
+ void* table_name_hash;/*!< hash table chain node */
} INNOBASE_SHARE;
+/** InnoDB B-tree index */
struct dict_index_struct;
+/** Prebuilt structures in an Innobase table handle used within MySQL */
struct row_prebuilt_struct;
+/** InnoDB B-tree index */
typedef struct dict_index_struct dict_index_t;
+/** Prebuilt structures in an Innobase table handle used within MySQL */
typedef struct row_prebuilt_struct row_prebuilt_t;
-/* The class defining a handle to an Innodb table */
+/** The class defining a handle to an Innodb table */
class ha_innobase: public handler
{
- row_prebuilt_t* prebuilt; /* prebuilt struct in InnoDB, used
+ row_prebuilt_t* prebuilt; /*!< prebuilt struct in InnoDB, used
to save CPU time with prebuilt data
structures*/
- THD* user_thd; /* the thread handle of the user
+ THD* user_thd; /*!< the thread handle of the user
currently using the handle; this is
set in external_lock function */
THR_LOCK_DATA lock;
- INNOBASE_SHARE *share;
+ INNOBASE_SHARE* share; /*!< information for MySQL
+ table locking */
- uchar* upd_buff; /* buffer used in updates */
- uchar* key_val_buff; /* buffer used in converting
+ uchar* upd_buff; /*!< buffer used in updates */
+ uchar* key_val_buff; /*!< buffer used in converting
search key values from MySQL format
to Innodb format */
ulong upd_and_key_val_buff_len;
@@ -59,62 +71,49 @@ class ha_innobase: public handler
two buffers */
Table_flags int_table_flags;
uint primary_key;
- ulong start_of_scan; /* this is set to 1 when we are
+ ulong start_of_scan; /*!< this is set to 1 when we are
starting a table scan but have not
yet fetched any row, else 0 */
uint last_match_mode;/* match mode of the latest search:
ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
or undefined */
- uint num_write_row; /* number of write_row() calls */
+ uint num_write_row; /*!< number of write_row() calls */
uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
const uchar* record);
- int update_thd(THD* thd);
+ inline void update_thd(THD* thd);
+ void update_thd();
int change_active_index(uint keynr);
int general_fetch(uchar* buf, uint direction, uint match_mode);
- ulong innobase_lock_autoinc();
+ ulint innobase_lock_autoinc();
ulonglong innobase_peek_autoinc();
- ulong innobase_set_max_autoinc(ulonglong auto_inc);
- ulong innobase_reset_autoinc(ulonglong auto_inc);
- ulong innobase_get_autoinc(ulonglong* value);
- ulong innobase_update_autoinc(ulonglong auto_inc);
- ulong innobase_initialize_autoinc();
+ ulint innobase_set_max_autoinc(ulonglong auto_inc);
+ ulint innobase_reset_autoinc(ulonglong auto_inc);
+ ulint innobase_get_autoinc(ulonglong* value);
+ ulint innobase_update_autoinc(ulonglong auto_inc);
+ ulint innobase_initialize_autoinc();
dict_index_t* innobase_get_index(uint keynr);
ulonglong innobase_get_int_col_max_value(const Field* field);
/* Init values for the class: */
public:
ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
- ~ha_innobase() {}
+ ~ha_innobase();
/*
Get the row type from the storage engine. If this method returns
ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
*/
enum row_type get_row_type() const;
- const char* table_type() const { return("InnoDB");}
- const char *index_type(uint key_number) { return "BTREE"; }
+ const char* table_type() const;
+ const char* index_type(uint key_number);
const char** bas_ext() const;
Table_flags table_flags() const;
- ulong index_flags(uint idx, uint part, bool all_parts) const
- {
- return (HA_READ_NEXT |
- HA_READ_PREV |
- HA_READ_ORDER |
- HA_READ_RANGE |
- HA_KEYREAD_ONLY);
- }
- uint max_supported_keys() const { return MAX_KEY; }
- /* An InnoDB page must store >= 2 keys;
- a secondary key record must also contain the
- primary key value:
- max key length is therefore set to slightly
- less than 1 / 4 of page size which is 16 kB;
- but currently MySQL does not work with keys
- whose size is > MAX_KEY_LENGTH */
- uint max_supported_key_length() const { return 3500; }
+ ulong index_flags(uint idx, uint part, bool all_parts) const;
+ uint max_supported_keys() const;
+ uint max_supported_key_length() const;
uint max_supported_key_part_length() const;
- const key_map *keys_to_use_for_scanning() { return &key_map_full; }
+ const key_map* keys_to_use_for_scanning();
int open(const char *name, int mode, uint test_if_locked);
int close(void);
@@ -185,7 +184,7 @@ class ha_innobase: public handler
virtual bool get_error_message(int error, String *buf);
- uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
+ uint8 table_cache_type();
/*
ask handler about permission to cache table during query registration
*/
@@ -195,8 +194,14 @@ class ha_innobase: public handler
ulonglong *engine_data);
static char *get_mysql_bin_log_name();
static ulonglong get_mysql_bin_log_pos();
- bool primary_key_is_clustered() { return true; }
+ bool primary_key_is_clustered();
int cmp_ref(const uchar *ref1, const uchar *ref2);
+ /** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
+ int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
+ int prepare_drop_index(TABLE *table_arg, uint *key_num,
+ uint num_of_keys);
+ int final_drop_index(TABLE *table_arg);
+ /** @} */
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
};
@@ -253,10 +258,54 @@ int thd_binlog_format(const MYSQL_THD thd);
*/
void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
+#if MYSQL_VERSION_ID > 50140
/**
Check if binary logging is filtered for thread's current db.
@param thd Thread handle
@retval 1 the query is not filtered, 0 otherwise.
*/
bool thd_binlog_filter_ok(const MYSQL_THD thd);
+#endif /* MYSQL_VERSION_ID > 50140 */
}
+
+typedef struct trx_struct trx_t;
+/********************************************************************//**
+@file handler/ha_innodb.h
+Converts an InnoDB error code to a MySQL error code and also tells to MySQL
+about a possible transaction rollback inside InnoDB caused by a lock wait
+timeout or a deadlock.
+@return MySQL error code */
+extern "C"
+int
+convert_error_code_to_mysql(
+/*========================*/
+ int error, /*!< in: InnoDB error code */
+ ulint flags, /*!< in: InnoDB table flags, or 0 */
+ MYSQL_THD thd); /*!< in: user thread handle or NULL */
+
+/*********************************************************************//**
+Allocates an InnoDB transaction for a MySQL handler object.
+@return InnoDB transaction handle */
+extern "C"
+trx_t*
+innobase_trx_allocate(
+/*==================*/
+ MYSQL_THD thd); /*!< in: user thread handle */
+
+
+/*********************************************************************//**
+This function checks each index name for a table against reserved
+system default primary index name 'GEN_CLUST_INDEX'. If a name
+matches, this function pushes an warning message to the client,
+and returns true. */
+extern "C"
+bool
+innobase_index_name_is_reserved(
+/*============================*/
+ /* out: true if the index name
+ matches the reserved name */
+ const trx_t* trx, /* in: InnoDB transaction handle */
+ const KEY* key_info, /* in: Indexes to be created */
+ ulint num_of_keys); /* in: Number of indexes to
+ be created. */
+
diff --git a/storage/innodb_plugin/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index a5008991400..a5008991400 100644
--- a/storage/innodb_plugin/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
diff --git a/storage/innodb_plugin/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 524fe696de2..524fe696de2 100644
--- a/storage/innodb_plugin/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
diff --git a/storage/innodb_plugin/handler/i_s.h b/storage/innobase/handler/i_s.h
index 402c88bbedb..402c88bbedb 100644
--- a/storage/innodb_plugin/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
diff --git a/storage/innodb_plugin/handler/mysql_addons.cc b/storage/innobase/handler/mysql_addons.cc
index eae1fe9fbc2..eae1fe9fbc2 100644
--- a/storage/innodb_plugin/handler/mysql_addons.cc
+++ b/storage/innobase/handler/mysql_addons.cc
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
index d54a3378993..08986fac0ef 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.c
+++ b/storage/innobase/ibuf/ibuf0ibuf.c
@@ -1,17 +1,44 @@
-/******************************************************
-Insert buffer
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ibuf/ibuf0ibuf.c
+Insert buffer
Created 7/19/1997 Heikki Tuuri
*******************************************************/
#include "ibuf0ibuf.h"
+/** Number of bits describing a single page */
+#define IBUF_BITS_PER_PAGE 4
+#if IBUF_BITS_PER_PAGE % 2
+# error "IBUF_BITS_PER_PAGE must be an even number!"
+#endif
+/** The start address for an insert buffer bitmap page bitmap */
+#define IBUF_BITMAP PAGE_DATA
+
#ifdef UNIV_NONINL
#include "ibuf0ibuf.ic"
#endif
+#ifndef UNIV_HOTBACKUP
+
#include "buf0buf.h"
#include "buf0rea.h"
#include "fsp0fsp.h"
@@ -134,39 +161,45 @@ level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
it uses synchronous aio, it can access any pages, as long as it obeys the
access order rules. */
-/* Buffer pool size per the maximum insert buffer size */
+/** Buffer pool size per the maximum insert buffer size */
#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
-/* The insert buffer control structure */
-ibuf_t* ibuf = NULL;
+/** Table name for the insert buffer. */
+#define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
-static ulint ibuf_rnd = 986058871;
+/** Operations that can currently be buffered. */
+UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_INSERT;
-ulint ibuf_flush_count = 0;
+/** The insert buffer control structure */
+UNIV_INTERN ibuf_t* ibuf = NULL;
-#ifdef UNIV_IBUF_DEBUG
-/* Dimensions for the ibuf_count array */
-#define IBUF_COUNT_N_SPACES 500
-#define IBUF_COUNT_N_PAGES 2000
+/** Counter for ibuf_should_try() */
+UNIV_INTERN ulint ibuf_flush_count = 0;
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/** Number of tablespaces in the ibuf_counts array */
+#define IBUF_COUNT_N_SPACES 4
+/** Number of pages within each tablespace in the ibuf_counts array */
+#define IBUF_COUNT_N_PAGES 130000
-/* Buffered entry counts for file pages, used in debugging */
+/** Buffered entry counts for file pages, used in debugging */
static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
-/**********************************************************************
+/******************************************************************//**
Checks that the indexes to ibuf_counts[][] are within limits. */
UNIV_INLINE
void
ibuf_count_check(
/*=============*/
- ulint space_id, /* in: space identifier */
- ulint page_no) /* in: page number */
+ ulint space_id, /*!< in: space identifier */
+ ulint page_no) /*!< in: page number */
{
if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
return;
}
fprintf(stderr,
- "InnoDB: UNIV_IBUF_DEBUG limits space_id and page_no\n"
+ "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
"InnoDB: and breaks crash recovery.\n"
"InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
"InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
@@ -176,52 +209,52 @@ ibuf_count_check(
}
#endif
-/* The start address for an insert buffer bitmap page bitmap */
-#define IBUF_BITMAP PAGE_DATA
-
-/* Offsets in bits for the bits describing a single page in the bitmap */
-#define IBUF_BITMAP_FREE 0
-#define IBUF_BITMAP_BUFFERED 2
-#define IBUF_BITMAP_IBUF 3 /* TRUE if page is a part of the ibuf
- tree, excluding the root page, or is
- in the free list of the ibuf */
-
-/* Number of bits describing a single page */
-#define IBUF_BITS_PER_PAGE 4
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE must be an even number!"
-#endif
-
-/* The mutex used to block pessimistic inserts to ibuf trees */
+/** @name Offsets to the per-page bits in the insert buffer bitmap */
+/* @{ */
+#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the
+ amount of free space */
+#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered
+ changes for the page */
+#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of
+ the ibuf tree, excluding the
+ root page, or is in the free
+ list of the ibuf */
+/* @} */
+
+/** The mutex used to block pessimistic inserts to ibuf trees */
static mutex_t ibuf_pessimistic_insert_mutex;
-/* The mutex protecting the insert buffer structs */
+/** The mutex protecting the insert buffer structs */
static mutex_t ibuf_mutex;
-/* The mutex protecting the insert buffer bitmaps */
+/** The mutex protecting the insert buffer bitmaps */
static mutex_t ibuf_bitmap_mutex;
-/* The area in pages from which contract looks for page numbers for merge */
+/** The area in pages from which contract looks for page numbers for merge */
#define IBUF_MERGE_AREA 8
-/* Inside the merge area, pages which have at most 1 per this number less
+/** Inside the merge area, pages which have at most 1 per this number less
buffered entries compared to maximum volume that can buffered for a single
page are merged along with the page whose buffer became full */
#define IBUF_MERGE_THRESHOLD 4
-/* In ibuf_contract at most this number of pages is read to memory in one
+/** In ibuf_contract at most this number of pages is read to memory in one
batch, in order to merge the entries for them in the insert buffer */
#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
-/* If the combined size of the ibuf trees exceeds ibuf->max_size by this
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
many pages, we start to contract it in connection to inserts there, using
non-synchronous contract */
#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
-/* Same as above, but use synchronous contract */
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
+many pages, we start to contract it in connection to inserts there, using
+synchronous contract */
#define IBUF_CONTRACT_ON_INSERT_SYNC 5
-/* Same as above, but no insert is done, only contract is called */
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by
+this many pages, we start to contract it synchronous contract, but do
+not insert */
#define IBUF_CONTRACT_DO_NOT_INSERT 10
/* TODO: how to cope with drop table if there are records in the insert
@@ -230,15 +263,7 @@ because ibuf merge is done to a page when it is read in, and it is
still physically like the index page even if the index would have been
dropped! So, there seems to be no problem. */
-/**********************************************************************
-Validates the ibuf data structures when the caller owns ibuf_mutex. */
-
-ibool
-ibuf_validate_low(void);
-/*===================*/
- /* out: TRUE if ok */
-
-/**********************************************************************
+/******************************************************************//**
Sets the flag in the current OS thread local storage denoting that it is
inside an insert buffer routine. */
UNIV_INLINE
@@ -255,7 +280,7 @@ ibuf_enter(void)
*ptr = TRUE;
}
-/**********************************************************************
+/******************************************************************//**
Sets the flag in the current OS thread local storage denoting that it is
exiting an insert buffer routine. */
UNIV_INLINE
@@ -272,97 +297,90 @@ ibuf_exit(void)
*ptr = FALSE;
}
-/**********************************************************************
+/******************************************************************//**
Returns TRUE if the current OS thread is performing an insert buffer
-routine. */
+routine.
+For instance, a read-ahead of non-ibuf pages is forbidden by threads
+that are executing an insert buffer routine.
+@return TRUE if inside an insert buffer routine */
+UNIV_INTERN
ibool
ibuf_inside(void)
/*=============*/
- /* out: TRUE if inside an insert buffer routine: for instance,
- a read-ahead of non-ibuf pages is then forbidden */
{
return(*thr_local_get_in_ibuf_field());
}
-/**********************************************************************
-Gets the ibuf header page and x-latches it. */
+/******************************************************************//**
+Gets the ibuf header page and x-latches it.
+@return insert buffer header page */
static
page_t*
ibuf_header_page_get(
/*=================*/
- /* out: insert buffer header page */
- ulint space, /* in: space id */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
-
- ut_a(space == 0);
+ buf_block_t* block;
ut_ad(!ibuf_inside());
- page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_IBUF_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
- return(page);
+ return(buf_block_get_frame(block));
}
-/**********************************************************************
-Gets the root page and x-latches it. */
+/******************************************************************//**
+Gets the root page and x-latches it.
+@return insert buffer tree root page */
static
page_t*
ibuf_tree_root_get(
/*===============*/
- /* out: insert buffer tree root page */
- ibuf_data_t* data, /* in: ibuf data */
- ulint space, /* in: space id */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
+ buf_block_t* block;
- ut_a(space == 0);
ut_ad(ibuf_inside());
- mtr_x_lock(dict_index_get_lock(data->index), mtr);
+ mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
- page = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
- mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
- return(page);
-}
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
-#ifdef UNIV_IBUF_DEBUG
-/**********************************************************************
-Gets the ibuf count for a given page. */
+ return(buf_block_get_frame(block));
+}
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/******************************************************************//**
+Gets the ibuf count for a given page.
+@return number of entries in the insert buffer currently buffered for
+this page */
+UNIV_INTERN
ulint
ibuf_count_get(
/*===========*/
- /* out: number of entries in the insert buffer
- currently buffered for this page */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space id */
+ ulint page_no)/*!< in: page number */
{
ibuf_count_check(space, page_no);
return(ibuf_counts[space][page_no]);
}
-/**********************************************************************
+/******************************************************************//**
Sets the ibuf count for a given page. */
static
void
ibuf_count_set(
/*===========*/
- ulint space, /* in: space id */
- ulint page_no,/* in: page number */
- ulint val) /* in: value to set */
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: page number */
+ ulint val) /*!< in: value to set */
{
ibuf_count_check(space, page_no);
ut_a(val < UNIV_PAGE_SIZE);
@@ -371,116 +389,92 @@ ibuf_count_set(
}
#endif
-/**********************************************************************
-Creates the insert buffer data structure at a database startup and initializes
-the data structures for the insert buffer. */
-
+/******************************************************************//**
+Closes insert buffer and frees the data structures. */
+UNIV_INTERN
void
-ibuf_init_at_db_start(void)
-/*=======================*/
+ibuf_close(void)
+/*============*/
{
- ibuf = mem_alloc(sizeof(ibuf_t));
-
- /* Note that also a pessimistic delete can sometimes make a B-tree
- grow in size, as the references on the upper levels of the tree can
- change */
-
- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
- / IBUF_POOL_SIZE_PER_MAX_SIZE;
-
- UT_LIST_INIT(ibuf->data_list);
-
- ibuf->size = 0;
-
- mutex_create(&ibuf_pessimistic_insert_mutex,
- SYNC_IBUF_PESS_INSERT_MUTEX);
+ mutex_free(&ibuf_pessimistic_insert_mutex);
+ memset(&ibuf_pessimistic_insert_mutex,
+ 0x0, sizeof(ibuf_pessimistic_insert_mutex));
- mutex_create(&ibuf_mutex, SYNC_IBUF_MUTEX);
+ mutex_free(&ibuf_mutex);
+ memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
- mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
+ mutex_free(&ibuf_bitmap_mutex);
+ memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
- fil_ibuf_init_at_db_start();
+ mem_free(ibuf);
+ ibuf = NULL;
}
-/**********************************************************************
-Updates the size information in an ibuf data, assuming the segment size has
-not changed. */
+/******************************************************************//**
+Updates the size information of the ibuf, assuming the segment size has not
+changed. */
static
void
-ibuf_data_sizes_update(
-/*===================*/
- ibuf_data_t* data, /* in: ibuf data struct */
- page_t* root, /* in: ibuf tree root */
- mtr_t* mtr) /* in: mtr */
+ibuf_size_update(
+/*=============*/
+ const page_t* root, /*!< in: ibuf tree root */
+ mtr_t* mtr) /*!< in: mtr */
{
- ulint old_size;
-
ut_ad(mutex_own(&ibuf_mutex));
- old_size = data->size;
-
- data->free_list_len = flst_get_len(root + PAGE_HEADER
+ ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
+ PAGE_BTR_IBUF_FREE_LIST, mtr);
- data->height = 1 + btr_page_get_level(root, mtr);
+ ibuf->height = 1 + btr_page_get_level(root, mtr);
- data->size = data->seg_size - (1 + data->free_list_len);
/* the '1 +' is the ibuf header page */
- ut_ad(data->size < data->seg_size);
-
- if (page_get_n_recs(root) == 0) {
+ ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
- data->empty = TRUE;
- } else {
- data->empty = FALSE;
- }
-
- ut_ad(ibuf->size + data->size >= old_size);
-
- ibuf->size = ibuf->size + data->size - old_size;
-
-#if 0
- fprintf(stderr, "ibuf size %lu, space ibuf size %lu\n",
- ibuf->size, data->size);
-#endif
+ ibuf->empty = page_get_n_recs(root) == 0;
}
-/**********************************************************************
-Creates the insert buffer data struct for a single tablespace. Reads the
-root page of the insert buffer tree in the tablespace. This function can
-be called only after the dictionary system has been initialized, as this
-creates also the insert buffer table and index into this tablespace. */
-
-ibuf_data_t*
-ibuf_data_init_for_space(
-/*=====================*/
- /* out, own: ibuf data struct, linked to the list
- in ibuf control structure */
- ulint space) /* in: space id */
+/******************************************************************//**
+Creates the insert buffer data structure at a database startup and initializes
+the data structures for the insert buffer. */
+UNIV_INTERN
+void
+ibuf_init_at_db_start(void)
+/*=======================*/
{
- ibuf_data_t* data;
page_t* root;
- page_t* header_page;
mtr_t mtr;
- char* buf;
- mem_heap_t* heap;
dict_table_t* table;
+ mem_heap_t* heap;
dict_index_t* index;
ulint n_used;
+ page_t* header_page;
+ ulint error;
- ut_a(space == 0);
+ ibuf = mem_alloc(sizeof(ibuf_t));
- data = mem_alloc(sizeof(ibuf_data_t));
+ memset(ibuf, 0, sizeof(*ibuf));
- data->space = space;
+ /* Note that also a pessimistic delete can sometimes make a B-tree
+ grow in size, as the references on the upper levels of the tree can
+ change */
+
+ ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
+ / IBUF_POOL_SIZE_PER_MAX_SIZE;
+
+ mutex_create(&ibuf_pessimistic_insert_mutex,
+ SYNC_IBUF_PESS_INSERT_MUTEX);
+
+ mutex_create(&ibuf_mutex, SYNC_IBUF_MUTEX);
+
+ mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
mtr_start(&mtr);
mutex_enter(&ibuf_mutex);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
- header_page = ibuf_header_page_get(space, &mtr);
+ header_page = ibuf_header_page_get(&mtr);
fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
&n_used, &mtr);
@@ -488,29 +482,20 @@ ibuf_data_init_for_space(
ut_ad(n_used >= 2);
- data->seg_size = n_used;
+ ibuf->seg_size = n_used;
- root = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
- &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(root, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+ {
+ buf_block_t* block;
- data->size = 0;
- data->n_inserts = 0;
- data->n_merges = 0;
- data->n_merged_recs = 0;
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
- ibuf_data_sizes_update(data, root, &mtr);
- /*
- if (!data->empty) {
- fprintf(stderr,
- "InnoDB: index entries found in the insert buffer\n");
- } else {
- fprintf(stderr,
- "InnoDB: insert buffer empty\n");
+ root = buf_block_get_frame(block);
}
- */
+
+ ibuf_size_update(root, &mtr);
mutex_exit(&ibuf_mutex);
mtr_commit(&mtr);
@@ -518,104 +503,105 @@ ibuf_data_init_for_space(
ibuf_exit();
heap = mem_heap_create(450);
- buf = mem_heap_alloc(heap, 50);
- sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space);
- /* use old-style record format for the insert buffer */
- table = dict_mem_table_create(buf, space, 2, 0);
+ /* Use old-style record format for the insert buffer. */
+ table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
- dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "TYPES", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
- table->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
+ table->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
dict_table_add_to_cache(table, heap);
mem_heap_free(heap);
index = dict_mem_index_create(
- buf, "CLUST_IND", space,
- DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 2);
-
- dict_mem_index_add_field(index, "PAGE_NO", 0);
- dict_mem_index_add_field(index, "TYPES", 0);
+ IBUF_TABLE_NAME, "CLUST_IND",
+ IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
- index->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
+ dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
- dict_index_add_to_cache(table, index, FSP_IBUF_TREE_ROOT_PAGE_NO);
+ index->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
- data->index = dict_table_get_first_index(table);
+ error = dict_index_add_to_cache(table, index,
+ FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
+ ut_a(error == DB_SUCCESS);
- mutex_enter(&ibuf_mutex);
-
- UT_LIST_ADD_LAST(data_list, ibuf->data_list, data);
-
- mutex_exit(&ibuf_mutex);
-
- return(data);
+ ibuf->index = dict_table_get_first_index(table);
}
-
-/*************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
Initializes an ibuf bitmap page. */
-
+UNIV_INTERN
void
ibuf_bitmap_page_init(
/*==================*/
- page_t* page, /* in: bitmap page */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in: bitmap page */
+ mtr_t* mtr) /*!< in: mtr */
{
- ulint bit_offset;
+ page_t* page;
ulint byte_offset;
+ ulint zip_size = buf_block_get_zip_size(block);
- /* Write all zeros to the bitmap */
+ ut_a(ut_is_2pow(zip_size));
- bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE;
+ page = buf_block_get_frame(block);
+ fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
- byte_offset = bit_offset / 8 + 1;
- /* better: byte_offset = UT_BITS_IN_BYTES(bit_offset); */
+ /* Write all zeros to the bitmap */
- fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
+ if (!zip_size) {
+ byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
+ * IBUF_BITS_PER_PAGE);
+ } else {
+ byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
+ }
memset(page + IBUF_BITMAP, 0, byte_offset);
/* The remaining area (up to the page trailer) is uninitialized. */
+#ifndef UNIV_HOTBACKUP
mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
+#endif /* !UNIV_HOTBACKUP */
}
-/*************************************************************************
-Parses a redo log record of an ibuf bitmap page init. */
-
+/*********************************************************************//**
+Parses a redo log record of an ibuf bitmap page init.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
ibuf_parse_bitmap_init(
/*===================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
+ buf_block_t* block, /*!< in: block or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
ut_ad(ptr && end_ptr);
- if (page) {
- ibuf_bitmap_page_init(page, mtr);
+ if (block) {
+ ibuf_bitmap_page_init(block, mtr);
}
return(ptr);
}
-
-/************************************************************************
-Gets the desired bits for a given page from a bitmap page. */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Gets the desired bits for a given page from a bitmap page.
+@return value of bits */
UNIV_INLINE
ulint
ibuf_bitmap_page_get_bits(
/*======================*/
- /* out: value of bits */
- page_t* page, /* in: bitmap page */
- ulint page_no,/* in: page whose bits to get */
- ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
- mtr_t* mtr __attribute__((unused))) /* in: mtr containing an
- x-latch to the bitmap
- page */
+ const page_t* page, /*!< in: bitmap page */
+ ulint page_no,/*!< in: page whose bits to get */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint bit, /*!< in: IBUF_BITMAP_FREE,
+ IBUF_BITMAP_BUFFERED, ... */
+ mtr_t* mtr __attribute__((unused)))
+ /*!< in: mtr containing an
+ x-latch to the bitmap page */
{
ulint byte_offset;
ulint bit_offset;
@@ -626,11 +612,16 @@ ibuf_bitmap_page_get_bits(
#if IBUF_BITS_PER_PAGE % 2
# error "IBUF_BITS_PER_PAGE % 2 != 0"
#endif
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
- bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
- + bit;
+ if (!zip_size) {
+ bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
+ + bit;
+ } else {
+ bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
+ + bit;
+ }
byte_offset = bit_offset / 8;
bit_offset = bit_offset % 8;
@@ -650,17 +641,19 @@ ibuf_bitmap_page_get_bits(
return(value);
}
-/************************************************************************
+/********************************************************************//**
Sets the desired bit for a given page in a bitmap page. */
static
void
ibuf_bitmap_page_set_bits(
/*======================*/
- page_t* page, /* in: bitmap page */
- ulint page_no,/* in: page whose bits to set */
- ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
- ulint val, /* in: value to set */
- mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */
+ page_t* page, /*!< in: bitmap page */
+ ulint page_no,/*!< in: page whose bits to set */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
+ ulint val, /*!< in: value to set */
+ mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */
{
ulint byte_offset;
ulint bit_offset;
@@ -670,15 +663,20 @@ ibuf_bitmap_page_set_bits(
#if IBUF_BITS_PER_PAGE % 2
# error "IBUF_BITS_PER_PAGE % 2 != 0"
#endif
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_IBUF_DEBUG
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
- || (0 == ibuf_count_get(buf_frame_get_space_id(page),
+ || (0 == ibuf_count_get(page_get_space_id(page),
page_no)));
#endif
- bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
- + bit;
+ if (!zip_size) {
+ bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
+ + bit;
+ } else {
+ bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
+ + bit;
+ }
byte_offset = bit_offset / 8;
bit_offset = bit_offset % 8;
@@ -702,48 +700,55 @@ ibuf_bitmap_page_set_bits(
MLOG_1BYTE, mtr);
}
-/************************************************************************
-Calculates the bitmap page number for a given page number. */
+/********************************************************************//**
+Calculates the bitmap page number for a given page number.
+@return the bitmap page number where the file page is mapped */
UNIV_INLINE
ulint
ibuf_bitmap_page_no_calc(
/*=====================*/
- /* out: the bitmap page number where
- the file page is mapped */
- ulint page_no) /* in: tablespace page number */
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint page_no) /*!< in: tablespace page number */
{
- return(FSP_IBUF_BITMAP_OFFSET
- + XDES_DESCRIBED_PER_PAGE
- * (page_no / XDES_DESCRIBED_PER_PAGE));
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (!zip_size) {
+ return(FSP_IBUF_BITMAP_OFFSET
+ + (page_no & ~(UNIV_PAGE_SIZE - 1)));
+ } else {
+ return(FSP_IBUF_BITMAP_OFFSET
+ + (page_no & ~(zip_size - 1)));
+ }
}
-/************************************************************************
+/********************************************************************//**
Gets the ibuf bitmap page where the bits describing a given file page are
-stored. */
+stored.
+@return bitmap page where the file page is mapped, that is, the bitmap
+page containing the descriptor bits for the file page; the bitmap page
+is x-latched */
static
page_t*
ibuf_bitmap_get_map_page(
/*=====================*/
- /* out: bitmap page where the file page is mapped,
- that is, the bitmap page containing the descriptor
- bits for the file page; the bitmap page is
- x-latched */
- ulint space, /* in: space id of the file page */
- ulint page_no,/* in: page number of the file page */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space id of the file page */
+ ulint page_no,/*!< in: page number of the file page */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
+ buf_block_t* block;
- page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no),
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_IBUF_BITMAP);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(space, zip_size,
+ ibuf_bitmap_page_no_calc(zip_size, page_no),
+ RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
- return(page);
+ return(buf_block_get_frame(block));
}
-/****************************************************************************
+/************************************************************************//**
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
@@ -752,184 +757,229 @@ UNIV_INLINE
void
ibuf_set_free_bits_low(
/*===================*/
- ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is set if the index is
- non-clustered and page level is 0 */
- ulint val, /* in: value to set: < 4 */
- mtr_t* mtr) /* in: mtr */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ const buf_block_t* block, /*!< in: index page; free bits are set if
+ the index is non-clustered and page
+ level is 0 */
+ ulint val, /*!< in: value to set: < 4 */
+ mtr_t* mtr) /*!< in/out: mtr */
{
page_t* bitmap_page;
+ ulint space;
+ ulint page_no;
- if (type & DICT_CLUSTERED) {
-
- return;
- }
-
- if (btr_page_get_level_low(page) != 0) {
+ if (!page_is_leaf(buf_block_get_frame(block))) {
return;
}
- bitmap_page = ibuf_bitmap_get_map_page(
- buf_frame_get_space_id(page),
- buf_frame_get_page_no(page), mtr);
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
#ifdef UNIV_IBUF_DEBUG
# if 0
fprintf(stderr,
- "Setting page no %lu free bits to %lu should be %lu\n",
- buf_frame_get_page_no(page), val,
- ibuf_index_page_calc_free(page));
+ "Setting space %lu page %lu free bits to %lu should be %lu\n",
+ space, page_no, val,
+ ibuf_index_page_calc_free(zip_size, block));
# endif
- ut_a(val <= ibuf_index_page_calc_free(page));
+ ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
#endif /* UNIV_IBUF_DEBUG */
- ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
IBUF_BITMAP_FREE, val, mtr);
-
}
-/****************************************************************************
+/************************************************************************//**
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
-
+UNIV_INTERN
void
-ibuf_set_free_bits(
-/*===============*/
- ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is set if the index is
- non-clustered and page level is 0 */
- ulint val, /* in: value to set: < 4 */
- ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which
- the bits must have before setting; this is for
- debugging */
+ibuf_set_free_bits_func(
+/*====================*/
+ buf_block_t* block, /*!< in: index page of a non-clustered index;
+ free bit is reset if page level is 0 */
+#ifdef UNIV_IBUF_DEBUG
+ ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
+ value which the bits must have before
+ setting; this is for debugging */
+#endif /* UNIV_IBUF_DEBUG */
+ ulint val) /*!< in: value to set: < 4 */
{
mtr_t mtr;
+ page_t* page;
page_t* bitmap_page;
+ ulint space;
+ ulint page_no;
+ ulint zip_size;
- if (type & DICT_CLUSTERED) {
-
- return;
- }
+ page = buf_block_get_frame(block);
- if (btr_page_get_level_low(page) != 0) {
+ if (!page_is_leaf(page)) {
return;
}
mtr_start(&mtr);
- bitmap_page = ibuf_bitmap_get_map_page(
- buf_frame_get_space_id(page), buf_frame_get_page_no(page),
- &mtr);
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+ zip_size = buf_block_get_zip_size(block);
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
- if (max_val != ULINT_UNDEFINED) {
#ifdef UNIV_IBUF_DEBUG
+ if (max_val != ULINT_UNDEFINED) {
ulint old_val;
old_val = ibuf_bitmap_page_get_bits(
- bitmap_page, buf_frame_get_page_no(page),
+ bitmap_page, page_no, zip_size,
IBUF_BITMAP_FREE, &mtr);
# if 0
if (old_val != max_val) {
fprintf(stderr,
"Ibuf: page %lu old val %lu max val %lu\n",
- buf_frame_get_page_no(page),
+ page_get_page_no(page),
old_val, max_val);
}
# endif
ut_a(old_val <= max_val);
-#endif
}
-#ifdef UNIV_IBUF_DEBUG
# if 0
fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
- buf_frame_get_page_no(page), val,
- ibuf_index_page_calc_free(page));
+ page_get_page_no(page), val,
+ ibuf_index_page_calc_free(zip_size, block));
# endif
- ut_a(val <= ibuf_index_page_calc_free(page));
-#endif
- ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
+ ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
+#endif /* UNIV_IBUF_DEBUG */
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
IBUF_BITMAP_FREE, val, &mtr);
mtr_commit(&mtr);
}
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to only ibuf bitmap operations, which would result if the latch to the
-bitmap page were kept. */
-
-void
-ibuf_reset_free_bits_with_type(
-/*===========================*/
- ulint type, /* in: index type */
- page_t* page) /* in: index page; free bits are set to 0 if the index
- is non-clustered and non-unique and the page level is
- 0 */
-{
- ibuf_set_free_bits(type, page, 0, ULINT_UNDEFINED);
-}
-
-/****************************************************************************
+/************************************************************************//**
Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to solely ibuf bitmap operations, which would result if the latch to
-the bitmap page were kept. */
-
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to decrement or reset the bits in the bitmap in a mini-transaction
+that is committed before the mini-transaction that affects the free
+space. */
+UNIV_INTERN
void
ibuf_reset_free_bits(
/*=================*/
- dict_index_t* index, /* in: index */
- page_t* page) /* in: index page; free bits are set to 0 if
- the index is non-clustered and non-unique and
- the page level is 0 */
+ buf_block_t* block) /*!< in: index page; free bits are set to 0
+ if the index is a non-clustered
+ non-unique, and page level is 0 */
{
- ibuf_set_free_bits(index->type, page, 0, ULINT_UNDEFINED);
+ ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
}
-/**************************************************************************
-Updates the free bits for a page to reflect the present state. Does this
-in the mtr given, which means that the latching order rules virtually prevent
-any further operations for this OS thread until mtr is committed. */
-
+/**********************************************************************//**
+Updates the free bits for an uncompressed page to reflect the present
+state. Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
void
ibuf_update_free_bits_low(
/*======================*/
- dict_index_t* index, /* in: index */
- page_t* page, /* in: index page */
- ulint max_ins_size, /* in: value of maximum insert size
- with reorganize before the latest
- operation performed to the page */
- mtr_t* mtr) /* in: mtr */
+ const buf_block_t* block, /*!< in: index page */
+ ulint max_ins_size, /*!< in: value of
+ maximum insert size
+ with reorganize before
+ the latest operation
+ performed to the page */
+ mtr_t* mtr) /*!< in/out: mtr */
{
ulint before;
ulint after;
- before = ibuf_index_page_calc_free_bits(max_ins_size);
+ ut_a(!buf_block_get_page_zip(block));
- after = ibuf_index_page_calc_free(page);
+ before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+ after = ibuf_index_page_calc_free(0, block);
+
+ /* This approach cannot be used on compressed pages, since the
+ computed value of "before" often does not match the current
+ state of the bitmap. This is because the free space may
+ increase or decrease when a compressed page is reorganized. */
if (before != after) {
- ibuf_set_free_bits_low(index->type, page, after, mtr);
+ ibuf_set_free_bits_low(0, block, after, mtr);
}
}
-/**************************************************************************
-Updates the free bits for the two pages to reflect the present state. Does
-this in the mtr given, which means that the latching order rules virtually
-prevent any further operations until mtr is committed. */
+/**********************************************************************//**
+Updates the free bits for a compressed page to reflect the present
+state. Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_zip(
+/*======================*/
+ buf_block_t* block, /*!< in/out: index page */
+ mtr_t* mtr) /*!< in/out: mtr */
+{
+ page_t* bitmap_page;
+ ulint space;
+ ulint page_no;
+ ulint zip_size;
+ ulint after;
+
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+ zip_size = buf_block_get_zip_size(block);
+
+ ut_a(page_is_leaf(buf_block_get_frame(block)));
+ ut_a(zip_size);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
+
+ after = ibuf_index_page_calc_free_zip(zip_size, block);
+
+ if (after == 0) {
+ /* We move the page to the front of the buffer pool LRU list:
+ the purpose of this is to prevent those pages to which we
+ cannot make inserts using the insert buffer from slipping
+ out of the buffer pool */
+
+ buf_page_make_young(&block->page);
+ }
+
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, after, mtr);
+}
+/**********************************************************************//**
+Updates the free bits for the two pages to reflect the present state.
+Does this in the mtr given, which means that the latching order rules
+virtually prevent any further operations until mtr is committed.
+NOTE: The free bits in the insert buffer bitmap must never exceed the
+free space on a page. It is safe to set the free bits in the same
+mini-transaction that updated the pages. */
+UNIV_INTERN
void
ibuf_update_free_bits_for_two_pages_low(
/*====================================*/
- dict_index_t* index, /* in: index */
- page_t* page1, /* in: index page */
- page_t* page2, /* in: index page */
- mtr_t* mtr) /* in: mtr */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ buf_block_t* block1, /*!< in: index page */
+ buf_block_t* block2, /*!< in: index page */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint state;
@@ -939,115 +989,93 @@ ibuf_update_free_bits_for_two_pages_low(
mutex_enter(&ibuf_bitmap_mutex);
- state = ibuf_index_page_calc_free(page1);
+ state = ibuf_index_page_calc_free(zip_size, block1);
- ibuf_set_free_bits_low(index->type, page1, state, mtr);
+ ibuf_set_free_bits_low(zip_size, block1, state, mtr);
- state = ibuf_index_page_calc_free(page2);
+ state = ibuf_index_page_calc_free(zip_size, block2);
- ibuf_set_free_bits_low(index->type, page2, state, mtr);
+ ibuf_set_free_bits_low(zip_size, block2, state, mtr);
mutex_exit(&ibuf_bitmap_mutex);
}
-/**************************************************************************
-Returns TRUE if the page is one of the fixed address ibuf pages. */
+/**********************************************************************//**
+Returns TRUE if the page is one of the fixed address ibuf pages.
+@return TRUE if a fixed address ibuf i/o page */
UNIV_INLINE
ibool
ibuf_fixed_addr_page(
/*=================*/
- /* out: TRUE if a fixed address ibuf i/o page */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint page_no)/*!< in: page number */
{
- return((space == 0 && page_no == IBUF_TREE_ROOT_PAGE_NO)
- || ibuf_bitmap_page(page_no));
+ return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
+ || ibuf_bitmap_page(zip_size, page_no));
}
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
+/***********************************************************************//**
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==TRUE.
+@return TRUE if level 2 or level 3 page */
+UNIV_INTERN
ibool
ibuf_page(
/*======*/
- /* out: TRUE if level 2 or level 3 page */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number */
+ mtr_t* mtr) /*!< in: mtr which will contain an x-latch to the
+ bitmap page if the page is not one of the fixed
+ address ibuf pages, or NULL, in which case a new
+ transaction is created. */
{
- page_t* bitmap_page;
- mtr_t mtr;
ibool ret;
+ mtr_t local_mtr;
+ page_t* bitmap_page;
- if (recv_no_ibuf_operations) {
- /* Recovery is running: no ibuf operations should be
- performed */
-
- return(FALSE);
- }
+ ut_ad(!recv_no_ibuf_operations);
- if (ibuf_fixed_addr_page(space, page_no)) {
+ if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
return(TRUE);
- }
-
- if (space != 0) {
- /* Currently we only have an ibuf tree in space 0 */
+ } else if (space != IBUF_SPACE_ID) {
return(FALSE);
}
- ut_ad(fil_space_get_type(space) == FIL_TABLESPACE);
-
- mtr_start(&mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
-
- ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
- &mtr);
- mtr_commit(&mtr);
-
- return(ret);
-}
+ ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+ if (mtr == NULL) {
+ mtr = &local_mtr;
+ mtr_start(mtr);
+ }
-ibool
-ibuf_page_low(
-/*==========*/
- /* out: TRUE if level 2 or level 3 page */
- ulint space, /* in: space id */
- ulint page_no,/* in: page number */
- mtr_t* mtr) /* in: mtr which will contain an x-latch to the
- bitmap page if the page is not one of the fixed
- address ibuf pages */
-{
- page_t* bitmap_page;
- ibool ret;
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
- if (ibuf_fixed_addr_page(space, page_no)) {
+ ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_IBUF, mtr);
- return(TRUE);
+ if (mtr == &local_mtr) {
+ mtr_commit(mtr);
}
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr);
-
- ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
- mtr);
return(ret);
}
-/************************************************************************
-Returns the page number field of an ibuf record. */
+/********************************************************************//**
+Returns the page number field of an ibuf record.
+@return page number */
static
ulint
ibuf_rec_get_page_no(
/*=================*/
- /* out: page number */
- rec_t* rec) /* in: ibuf record */
+ const rec_t* rec) /*!< in: ibuf record */
{
- byte* field;
- ulint len;
+ const byte* field;
+ ulint len;
ut_ad(ibuf_inside());
ut_ad(rec_get_n_fields_old(rec) > 2);
@@ -1071,18 +1099,18 @@ ibuf_rec_get_page_no(
return(mach_read_from_4(field));
}
-/************************************************************************
+/********************************************************************//**
Returns the space id field of an ibuf record. For < 4.1.x format records
-returns 0. */
+returns 0.
+@return space id */
static
ulint
ibuf_rec_get_space(
/*===============*/
- /* out: space id */
- rec_t* rec) /* in: ibuf record */
+ const rec_t* rec) /*!< in: ibuf record */
{
- byte* field;
- ulint len;
+ const byte* field;
+ ulint len;
ut_ad(ibuf_inside());
ut_ad(rec_get_n_fields_old(rec) > 2);
@@ -1105,16 +1133,16 @@ ibuf_rec_get_space(
return(0);
}
-/************************************************************************
+/********************************************************************//**
Creates a dummy index for inserting a record to a non-clustered index.
-*/
+
+@return dummy index */
static
dict_index_t*
ibuf_dummy_index_create(
/*====================*/
- /* out: dummy index */
- ulint n, /* in: number of fields */
- ibool comp) /* in: TRUE=use compact record format */
+ ulint n, /*!< in: number of fields */
+ ibool comp) /*!< in: TRUE=use compact record format */
{
dict_table_t* table;
dict_index_t* index;
@@ -1133,32 +1161,31 @@ ibuf_dummy_index_create(
return(index);
}
-/************************************************************************
+/********************************************************************//**
Add a column to the dummy index */
static
void
ibuf_dummy_index_add_col(
/*=====================*/
- dict_index_t* index, /* in: dummy index */
- dtype_t* type, /* in: the data type of the column */
- ulint len) /* in: length of the column */
+ dict_index_t* index, /*!< in: dummy index */
+ const dtype_t* type, /*!< in: the data type of the column */
+ ulint len) /*!< in: length of the column */
{
ulint i = index->table->n_def;
dict_mem_table_add_col(index->table, NULL, NULL,
dtype_get_mtype(type),
dtype_get_prtype(type),
dtype_get_len(type));
- dict_index_add_col(index, index->table, (dict_col_t*)
+ dict_index_add_col(index, index->table,
dict_table_get_nth_col(index->table, i), len);
}
-/************************************************************************
-Deallocates a dummy index for inserting a record to a non-clustered index.
-*/
+/********************************************************************//**
+Deallocates a dummy index for inserting a record to a non-clustered index. */
static
void
ibuf_dummy_index_free(
/*==================*/
- dict_index_t* index) /* in: dummy index */
+ dict_index_t* index) /*!< in, own: dummy index */
{
dict_table_t* table = index->table;
@@ -1166,28 +1193,79 @@ ibuf_dummy_index_free(
dict_mem_table_free(table);
}
-/*************************************************************************
+/*********************************************************************//**
+Builds the entry to insert into a non-clustered index when we have the
+corresponding record in an ibuf index.
+
+NOTE that as we copy pointers to fields in ibuf_rec, the caller must
+hold a latch to the ibuf_rec page as long as the entry is used!
+
+@return own: entry to insert to a non-clustered index */
+UNIV_INLINE
+dtuple_t*
+ibuf_build_entry_pre_4_1_x(
+/*=======================*/
+ const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
+ mem_heap_t* heap, /*!< in: heap where built */
+ dict_index_t** pindex) /*!< out, own: dummy index that
+ describes the entry */
+{
+ ulint i;
+ ulint len;
+ const byte* types;
+ dtuple_t* tuple;
+ ulint n_fields;
+
+ ut_a(trx_doublewrite_must_reset_space_ids);
+ ut_a(!trx_sys_multiple_tablespace_format);
+
+ n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
+ tuple = dtuple_create(heap, n_fields);
+ types = rec_get_nth_field_old(ibuf_rec, 1, &len);
+
+ ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+
+ for (i = 0; i < n_fields; i++) {
+ const byte* data;
+ dfield_t* field;
+
+ field = dtuple_get_nth_field(tuple, i);
+
+ data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
+
+ dfield_set_data(field, data, len);
+
+ dtype_read_for_order_and_null_size(
+ dfield_get_type(field),
+ types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+ }
+
+ *pindex = ibuf_dummy_index_create(n_fields, FALSE);
+
+ return(tuple);
+}
+
+/*********************************************************************//**
Builds the entry to insert into a non-clustered index when we have the
-corresponding record in an ibuf index. */
+corresponding record in an ibuf index.
+
+NOTE that as we copy pointers to fields in ibuf_rec, the caller must
+hold a latch to the ibuf_rec page as long as the entry is used!
+
+@return own: entry to insert to a non-clustered index */
static
dtuple_t*
ibuf_build_entry_from_ibuf_rec(
/*===========================*/
- /* out, own: entry to insert to
- a non-clustered index; NOTE that
- as we copy pointers to fields in
- ibuf_rec, the caller must hold a
- latch to the ibuf_rec page as long
- as the entry is used! */
- rec_t* ibuf_rec, /* in: record in an insert buffer */
- mem_heap_t* heap, /* in: heap where built */
- dict_index_t** pindex) /* out, own: dummy index that
+ const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
+ mem_heap_t* heap, /*!< in: heap where built */
+ dict_index_t** pindex) /*!< out, own: dummy index that
describes the entry */
{
dtuple_t* tuple;
dfield_t* field;
ulint n_fields;
- byte* types;
+ const byte* types;
const byte* data;
ulint len;
ulint i;
@@ -1198,29 +1276,7 @@ ibuf_build_entry_from_ibuf_rec(
if (len > 1) {
/* This a < 4.1.x format record */
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
- tuple = dtuple_create(heap, n_fields);
- types = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
-
- dfield_set_data(field, data, len);
-
- dtype_read_for_order_and_null_size(
- dfield_get_type(field),
- types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
- }
-
- *pindex = ibuf_dummy_index_create(n_fields, FALSE);
- return(tuple);
+ return(ibuf_build_entry_pre_4_1_x(ibuf_rec, heap, pindex));
}
/* This a >= 4.1.x format record */
@@ -1262,29 +1318,38 @@ ibuf_build_entry_from_ibuf_rec(
ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
}
+ /* Prevent an ut_ad() failure in page_zip_write_rec() by
+ adding system columns to the dummy table pointed to by the
+ dummy secondary index. The insert buffer is only used for
+ secondary indexes, whose records never contain any system
+ columns, such as DB_TRX_ID. */
+ ut_d(dict_table_add_system_columns(index->table, index->table->heap));
+
*pindex = index;
+
return(tuple);
}
-/************************************************************************
+/********************************************************************//**
Returns the space taken by a stored non-clustered index entry if converted to
-an index record. */
+an index record.
+@return size of index record in bytes + an upper limit of the space
+taken in the page directory */
static
ulint
ibuf_rec_get_volume(
/*================*/
- /* out: size of index record in bytes + an upper
- limit of the space taken in the page directory */
- rec_t* ibuf_rec)/* in: ibuf record */
+ const rec_t* ibuf_rec)/*!< in: ibuf record */
{
- dtype_t dtype;
- ibool new_format = FALSE;
- ulint data_size = 0;
- ulint n_fields;
- byte* types;
- byte* data;
- ulint len;
- ulint i;
+ dtype_t dtype;
+ ibool new_format = FALSE;
+ ulint data_size = 0;
+ ulint n_fields;
+ const byte* types;
+ const byte* data;
+ ulint len;
+ ulint i;
+ ulint comp;
ut_ad(ibuf_inside());
ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
@@ -1302,6 +1367,7 @@ ibuf_rec_get_volume(
types = rec_get_nth_field_old(ibuf_rec, 1, &len);
ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+ comp = 0;
} else {
/* >= 4.1.x format record */
@@ -1310,15 +1376,17 @@ ibuf_rec_get_volume(
types = rec_get_nth_field_old(ibuf_rec, 3, &len);
- ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
- if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
+ comp = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
+
+ ut_a(comp <= 1);
+ if (comp) {
/* compact record format */
ulint volume;
dict_index_t* dummy_index;
mem_heap_t* heap = mem_heap_create(500);
dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
ibuf_rec, heap, &dummy_index);
- volume = rec_get_converted_size(dummy_index, entry);
+ volume = rec_get_converted_size(dummy_index, entry, 0);
ibuf_dummy_index_free(dummy_index);
mem_heap_free(heap);
return(volume + page_dir_calc_reserved_space(1));
@@ -1345,37 +1413,38 @@ ibuf_rec_get_volume(
}
if (len == UNIV_SQL_NULL) {
- data_size += dtype_get_sql_null_size(&dtype);
+ data_size += dtype_get_sql_null_size(&dtype, comp);
} else {
data_size += len;
}
}
- return(data_size + rec_get_converted_extra_size(data_size, n_fields)
+ return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
+ page_dir_calc_reserved_space(1));
}
-/*************************************************************************
+/*********************************************************************//**
Builds the tuple to insert to an ibuf tree when we have an entry for a
-non-clustered index. */
+non-clustered index.
+
+NOTE that the original entry must be kept because we copy pointers to
+its fields.
+
+@return own: entry to insert into an ibuf index tree */
static
dtuple_t*
ibuf_entry_build(
/*=============*/
- /* out, own: entry to insert into an ibuf
- index tree; NOTE that the original entry
- must be kept because we copy pointers to its
- fields */
- dict_index_t* index, /* in: non-clustered index */
- dtuple_t* entry, /* in: entry for a non-clustered index */
- ulint space, /* in: space id */
- ulint page_no,/* in: index page number where entry should
+ dict_index_t* index, /*!< in: non-clustered index */
+ const dtuple_t* entry, /*!< in: entry for a non-clustered index */
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: index page number where entry should
be inserted */
- mem_heap_t* heap) /* in: heap into which to build */
+ mem_heap_t* heap) /*!< in: heap into which to build */
{
dtuple_t* tuple;
dfield_t* field;
- dfield_t* entry_field;
+ const dfield_t* entry_field;
ulint n_fields;
byte* buf;
byte* buf2;
@@ -1461,12 +1530,13 @@ ibuf_entry_build(
#ifdef UNIV_DEBUG
if (fixed_len) {
/* dict_index_add_col() should guarantee these */
- ut_ad(fixed_len <= (ulint) entry_field->type.len);
+ ut_ad(fixed_len <= (ulint)
+ dfield_get_type(entry_field)->len);
if (ifield->prefix_len) {
ut_ad(ifield->prefix_len == fixed_len);
} else {
- ut_ad(fixed_len
- == (ulint) entry_field->type.len);
+ ut_ad(fixed_len == (ulint)
+ dfield_get_type(entry_field)->len);
}
}
#endif /* UNIV_DEBUG */
@@ -1494,17 +1564,17 @@ ibuf_entry_build(
return(tuple);
}
-/*************************************************************************
+/*********************************************************************//**
Builds a search tuple used to search buffered inserts for an index page.
-This is for < 4.1.x format records */
+This is for < 4.1.x format records
+@return own: search tuple */
static
dtuple_t*
ibuf_search_tuple_build(
/*====================*/
- /* out, own: search tuple */
- ulint space, /* in: space id */
- ulint page_no,/* in: index page number */
- mem_heap_t* heap) /* in: heap into which to build */
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: index page number */
+ mem_heap_t* heap) /*!< in: heap into which to build */
{
dtuple_t* tuple;
dfield_t* field;
@@ -1531,17 +1601,17 @@ ibuf_search_tuple_build(
return(tuple);
}
-/*************************************************************************
+/*********************************************************************//**
Builds a search tuple used to search buffered inserts for an index page.
-This is for >= 4.1.x format records. */
+This is for >= 4.1.x format records.
+@return own: search tuple */
static
dtuple_t*
ibuf_new_search_tuple_build(
/*========================*/
- /* out, own: search tuple */
- ulint space, /* in: space id */
- ulint page_no,/* in: index page number */
- mem_heap_t* heap) /* in: heap into which to build */
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: index page number */
+ mem_heap_t* heap) /*!< in: heap into which to build */
{
dtuple_t* tuple;
dfield_t* field;
@@ -1586,15 +1656,14 @@ ibuf_new_search_tuple_build(
return(tuple);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if there are enough pages in the free list of the ibuf tree that we
-dare to start a pessimistic insert to the insert buffer. */
+dare to start a pessimistic insert to the insert buffer.
+@return TRUE if enough free pages in list */
UNIV_INLINE
ibool
-ibuf_data_enough_free_for_insert(
-/*=============================*/
- /* out: TRUE if enough free pages in list */
- ibuf_data_t* data) /* in: ibuf data for the space */
+ibuf_data_enough_free_for_insert(void)
+/*==================================*/
{
ut_ad(mutex_own(&ibuf_mutex));
@@ -1604,57 +1673,49 @@ ibuf_data_enough_free_for_insert(
inserts buffered for pages that we read to the buffer pool, without
any risk of running out of free space in the insert buffer. */
- if (data->free_list_len >= data->size / 2 + 3 * data->height) {
-
- return(TRUE);
- }
-
- return(FALSE);
+ return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if there are enough pages in the free list of the ibuf tree that we
-should remove them and free to the file space management. */
+should remove them and free to the file space management.
+@return TRUE if enough free pages in list */
UNIV_INLINE
ibool
-ibuf_data_too_much_free(
-/*====================*/
- /* out: TRUE if enough free pages in list */
- ibuf_data_t* data) /* in: ibuf data for the space */
+ibuf_data_too_much_free(void)
+/*=========================*/
{
ut_ad(mutex_own(&ibuf_mutex));
- return(data->free_list_len >= 3 + data->size / 2 + 3 * data->height);
+ return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
}
-/*************************************************************************
+/*********************************************************************//**
Allocates a new page from the ibuf file segment and adds it to the free
-list. */
+list.
+@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */
static
ulint
-ibuf_add_free_page(
-/*===============*/
- /* out: DB_SUCCESS, or DB_STRONG_FAIL
- if no space left */
- ulint space, /* in: space id */
- ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
+ibuf_add_free_page(void)
+/*====================*/
{
mtr_t mtr;
page_t* header_page;
+ ulint flags;
+ ulint zip_size;
ulint page_no;
page_t* page;
page_t* root;
page_t* bitmap_page;
- ut_a(space == 0);
-
mtr_start(&mtr);
/* Acquire the fsp latch before the ibuf header, obeying the latching
order */
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
+ zip_size = dict_table_flags_to_zip_size(flags);
- header_page = ibuf_header_page_get(space, &mtr);
+ header_page = ibuf_header_page_get(&mtr);
/* Allocate a new page: NOTE that if the page has been a part of a
non-clustered index which has subsequently been dropped, then the
@@ -1666,26 +1727,33 @@ ibuf_add_free_page(
of a deadlock. This is the reason why we created a special ibuf
header page apart from the ibuf tree. */
- page_no = fseg_alloc_free_page(header_page + IBUF_HEADER
- + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
- &mtr);
+ page_no = fseg_alloc_free_page(
+ header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
+ &mtr);
+
if (page_no == FIL_NULL) {
mtr_commit(&mtr);
return(DB_STRONG_FAIL);
}
- page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
+ {
+ buf_block_t* block;
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
+
+
+ page = buf_block_get_frame(block);
+ }
ibuf_enter();
mutex_enter(&ibuf_mutex);
- root = ibuf_tree_root_get(ibuf_data, space, &mtr);
+ root = ibuf_tree_root_get(&mtr);
/* Add the page to the free list and update the ibuf size data */
@@ -1695,16 +1763,18 @@ ibuf_add_free_page(
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
MLOG_2BYTES, &mtr);
- ibuf_data->seg_size++;
- ibuf_data->free_list_len++;
+ ibuf->seg_size++;
+ ibuf->free_list_len++;
/* Set the bit indicating that this page is now an ibuf tree page
(level 2 page) */
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(
+ IBUF_SPACE_ID, page_no, zip_size, &mtr);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr);
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
- TRUE, &mtr);
mtr_commit(&mtr);
mutex_exit(&ibuf_mutex);
@@ -1714,32 +1784,31 @@ ibuf_add_free_page(
return(DB_SUCCESS);
}
-/*************************************************************************
+/*********************************************************************//**
Removes a page from the free list and frees it to the fsp system. */
static
void
-ibuf_remove_free_page(
-/*==================*/
- ulint space, /* in: space id */
- ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
+ibuf_remove_free_page(void)
+/*=======================*/
{
mtr_t mtr;
mtr_t mtr2;
page_t* header_page;
+ ulint flags;
+ ulint zip_size;
ulint page_no;
page_t* page;
page_t* root;
page_t* bitmap_page;
- ut_a(space == 0);
-
mtr_start(&mtr);
/* Acquire the fsp latch before the ibuf header, obeying the latching
order */
- mtr_x_lock(fil_space_get_latch(space), &mtr);
+ mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
+ zip_size = dict_table_flags_to_zip_size(flags);
- header_page = ibuf_header_page_get(space, &mtr);
+ header_page = ibuf_header_page_get(&mtr);
/* Prevent pessimistic inserts to insert buffer trees for a while */
mutex_enter(&ibuf_pessimistic_insert_mutex);
@@ -1748,7 +1817,7 @@ ibuf_remove_free_page(
mutex_enter(&ibuf_mutex);
- if (!ibuf_data_too_much_free(ibuf_data)) {
+ if (!ibuf_data_too_much_free()) {
mutex_exit(&ibuf_mutex);
@@ -1763,11 +1832,10 @@ ibuf_remove_free_page(
mtr_start(&mtr2);
- root = ibuf_tree_root_get(ibuf_data, space, &mtr2);
+ root = ibuf_tree_root_get(&mtr2);
page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- &mtr2)
- .page;
+ &mtr2).page;
/* NOTE that we must release the latch on the ibuf tree root
because in fseg_free_page we access level 1 pages, and the root
@@ -1785,45 +1853,54 @@ ibuf_remove_free_page(
page from it. */
fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- space, page_no, &mtr);
+ IBUF_SPACE_ID, page_no, &mtr);
+
#ifdef UNIV_DEBUG_FILE_ACCESSES
- buf_page_reset_file_page_was_freed(space, page_no);
+ buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
#endif
+
ibuf_enter();
mutex_enter(&ibuf_mutex);
- root = ibuf_tree_root_get(ibuf_data, space, &mtr);
+ root = ibuf_tree_root_get(&mtr);
ut_ad(page_no == flst_get_last(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, &mtr)
- .page);
+ + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
- page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
+ {
+ buf_block_t* block;
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+
+ page = buf_block_get_frame(block);
+ }
/* Remove the page from the free list and update the ibuf size data */
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
- ibuf_data->seg_size--;
- ibuf_data->free_list_len--;
+ ibuf->seg_size--;
+ ibuf->free_list_len--;
mutex_exit(&ibuf_pessimistic_insert_mutex);
/* Set the bit indicating that this page is no more an ibuf tree page
(level 2 page) */
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(
+ IBUF_SPACE_ID, page_no, zip_size, &mtr);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
- FALSE, &mtr);
#ifdef UNIV_DEBUG_FILE_ACCESSES
- buf_page_set_file_page_was_freed(space, page_no);
+ buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
#endif
mtr_commit(&mtr);
@@ -1832,45 +1909,34 @@ ibuf_remove_free_page(
ibuf_exit();
}
-/***************************************************************************
+/***********************************************************************//**
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
-
+UNIV_INTERN
void
-ibuf_free_excess_pages(
-/*===================*/
- ulint space) /* in: space id */
+ibuf_free_excess_pages(void)
+/*========================*/
{
- ibuf_data_t* ibuf_data;
ulint i;
- if (space != 0) {
- fprintf(stderr,
- "InnoDB: Error: calling ibuf_free_excess_pages"
- " for space %lu\n", (ulong) space);
- return;
- }
-
#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX));
+ ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
+ RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- ut_ad(rw_lock_get_x_lock_count(fil_space_get_latch(space)) == 1);
+
+ ut_ad(rw_lock_get_x_lock_count(
+ fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
+
ut_ad(!ibuf_inside());
/* NOTE: We require that the thread did not own the latch before,
because then we know that we can obey the correct latching order
for ibuf latches */
- ibuf_data = fil_space_get_ibuf_data(space);
-
- if (ibuf_data == NULL) {
- /* Not yet initialized */
-
-#if 0 /* defined UNIV_DEBUG */
- fprintf(stderr,
- "Ibuf for space %lu not yet initialized\n", space);
-#endif
+ if (!ibuf) {
+ /* Not yet initialized; not sure if this is possible, but
+ does no harm to check for it. */
return;
}
@@ -1882,7 +1948,7 @@ ibuf_free_excess_pages(
mutex_enter(&ibuf_mutex);
- if (!ibuf_data_too_much_free(ibuf_data)) {
+ if (!ibuf_data_too_much_free()) {
mutex_exit(&ibuf_mutex);
@@ -1891,32 +1957,32 @@ ibuf_free_excess_pages(
mutex_exit(&ibuf_mutex);
- ibuf_remove_free_page(space, ibuf_data);
+ ibuf_remove_free_page();
}
}
-/*************************************************************************
-Reads page numbers from a leaf in an ibuf tree. */
+/*********************************************************************//**
+Reads page numbers from a leaf in an ibuf tree.
+@return a lower limit for the combined volume of records which will be
+merged */
static
ulint
ibuf_get_merge_page_nos(
/*====================*/
- /* out: a lower limit for the combined volume
- of records which will be merged */
- ibool contract,/* in: TRUE if this function is called to
+ ibool contract,/*!< in: TRUE if this function is called to
contract the tree, FALSE if this is called
when a single page becomes full and we look
if it pays to read also nearby pages */
- rec_t* rec, /* in: record from which we read up and down
+ rec_t* rec, /*!< in: record from which we read up and down
in the chain of records */
- ulint* space_ids,/* in/out: space id's of the pages */
- ib_longlong* space_versions,/* in/out: tablespace version
+ ulint* space_ids,/*!< in/out: space id's of the pages */
+ ib_int64_t* space_versions,/*!< in/out: tablespace version
timestamps; used to prevent reading in old
pages after DISCARD + IMPORT tablespace */
- ulint* page_nos,/* in/out: buffer for at least
+ ulint* page_nos,/*!< in/out: buffer for at least
IBUF_MAX_N_PAGES_MERGED many page numbers;
the page numbers are in an ascending order */
- ulint* n_stored)/* out: number of page numbers stored to
+ ulint* n_stored)/*!< out: number of page numbers stored to
page_nos in this function */
{
ulint prev_page_no;
@@ -1966,8 +2032,8 @@ ibuf_get_merge_page_nos(
rec_space_id = ibuf_rec_get_space(rec);
if (rec_space_id != first_space_id
- || rec_page_no / IBUF_MERGE_AREA
- != first_page_no / IBUF_MERGE_AREA) {
+ || (rec_page_no / IBUF_MERGE_AREA)
+ != (first_page_no / IBUF_MERGE_AREA)) {
break;
}
@@ -2068,81 +2134,40 @@ ibuf_get_merge_page_nos(
return(sum_volumes);
}
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
static
ulint
ibuf_contract_ext(
/*==============*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ulint* n_pages,/* out: number of pages to which merged */
- ibool sync) /* in: TRUE if the caller wants to wait for the
+ ulint* n_pages,/*!< out: number of pages to which merged */
+ ibool sync) /*!< in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
{
- ulint rnd_pos;
- ibuf_data_t* data;
btr_pcur_t pcur;
- ulint space;
- ibool all_trees_empty;
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED];
+ ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
ulint n_stored;
ulint sum_sizes;
mtr_t mtr;
*n_pages = 0;
-loop:
ut_ad(!ibuf_inside());
mutex_enter(&ibuf_mutex);
- ut_ad(ibuf_validate_low());
-
- /* Choose an ibuf tree at random (though there really is only one tree
- in the current implementation) */
- ibuf_rnd += 865558671;
-
- rnd_pos = ibuf_rnd % ibuf->size;
-
- all_trees_empty = TRUE;
-
- data = UT_LIST_GET_FIRST(ibuf->data_list);
-
- for (;;) {
- if (!data->empty) {
- all_trees_empty = FALSE;
-
- if (rnd_pos < data->size) {
-
- break;
- }
-
- rnd_pos -= data->size;
- }
-
- data = UT_LIST_GET_NEXT(data_list, data);
-
- if (data == NULL) {
- if (all_trees_empty) {
- mutex_exit(&ibuf_mutex);
-
- return(0);
- }
+ if (ibuf->empty) {
+ibuf_is_empty:
+ mutex_exit(&ibuf_mutex);
- data = UT_LIST_GET_FIRST(ibuf->data_list);
- }
+ return(0);
}
- ut_ad(data);
-
- space = data->index->space;
-
- ut_a(space == 0); /* We currently only have an ibuf tree in
- space 0 */
mtr_start(&mtr);
ibuf_enter();
@@ -2150,22 +2175,23 @@ loop:
/* Open a cursor to a randomly chosen leaf of the tree, at a random
position within the leaf */
- btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr);
-
- if (0 == page_get_n_recs(btr_pcur_get_page(&pcur))) {
+ btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
- /* This tree is empty */
+ if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
+ /* When the ibuf tree is emptied completely, the last record
+ is removed using an optimistic delete and ibuf_size_update
+ is not called, causing ibuf->empty to remain FALSE. If we do
+ not reset it to TRUE here then database shutdown will hang
+ in the loop in ibuf_contract_for_n_pages. */
- data->empty = TRUE;
+ ibuf->empty = TRUE;
ibuf_exit();
mtr_commit(&mtr);
btr_pcur_close(&pcur);
- mutex_exit(&ibuf_mutex);
-
- goto loop;
+ goto ibuf_is_empty;
}
mutex_exit(&ibuf_mutex);
@@ -2189,16 +2215,16 @@ loop:
return(sum_sizes + 1);
}
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
ulint
ibuf_contract(
/*==========*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ibool sync) /* in: TRUE if the caller wants to wait for the
+ ibool sync) /*!< in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
{
@@ -2207,19 +2233,19 @@ ibuf_contract(
return(ibuf_contract_ext(&n_pages, sync));
}
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
ulint
ibuf_contract_for_n_pages(
/*======================*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ibool sync, /* in: TRUE if the caller wants to wait for the
+ ibool sync, /*!< in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
- ulint n_pages)/* in: try to read at least this many pages to
+ ulint n_pages)/*!< in: try to read at least this many pages to
the buffer pool and merge the ibuf contents to
them */
{
@@ -2242,13 +2268,13 @@ ibuf_contract_for_n_pages(
return(sum_bytes);
}
-/*************************************************************************
+/*********************************************************************//**
Contract insert buffer trees after insert if they are too big. */
UNIV_INLINE
void
ibuf_contract_after_insert(
/*=======================*/
- ulint entry_size) /* in: size of a record which was inserted
+ ulint entry_size) /*!< in: size of a record which was inserted
into an ibuf tree */
{
ibool sync;
@@ -2283,26 +2309,24 @@ ibuf_contract_after_insert(
}
}
-/*************************************************************************
+/*********************************************************************//**
Gets an upper limit for the combined size of entries buffered in the insert
-buffer for a given page. */
-
+buffer for a given page.
+@return upper limit for the volume of buffered inserts for the index
+page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span
+several pages in the insert buffer */
+static
ulint
ibuf_get_volume_buffered(
/*=====================*/
- /* out: upper limit for the volume of
- buffered inserts for the index page, in bytes;
- we may also return UNIV_PAGE_SIZE, if the
- entries for the index page span on several
- pages in the insert buffer */
- btr_pcur_t* pcur, /* in: pcur positioned at a place in an
+ btr_pcur_t* pcur, /*!< in: pcur positioned at a place in an
insert buffer tree where we would insert an
entry for the index page whose number is
page_no, latch mode has to be BTR_MODIFY_PREV
or BTR_MODIFY_TREE */
- ulint space, /* in: space id */
- ulint page_no,/* in: page number of an index page */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: page number of an index page */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint volume;
rec_t* rec;
@@ -2323,8 +2347,7 @@ ibuf_get_volume_buffered(
volume = 0;
rec = btr_pcur_get_rec(pcur);
-
- page = buf_frame_align(rec);
+ page = page_align(rec);
if (page_rec_is_supremum(rec)) {
rec = page_rec_get_prev(rec);
@@ -2356,16 +2379,23 @@ ibuf_get_volume_buffered(
goto count_later;
}
- prev_page = buf_page_get(0, prev_page_no, RW_X_LATCH, mtr);
+ {
+ buf_block_t* block;
+
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, mtr);
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+
+ prev_page = buf_block_get_frame(block);
+ }
+
#ifdef UNIV_BTR_DEBUG
ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
+ == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
-
rec = page_get_supremum_rec(prev_page);
rec = page_rec_get_prev(rec);
@@ -2423,16 +2453,22 @@ count_later:
return(volume);
}
- next_page = buf_page_get(0, next_page_no, RW_X_LATCH, mtr);
+ {
+ buf_block_t* block;
+
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, mtr);
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+
+ next_page = buf_block_get_frame(block);
+ }
+
#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr)
- == buf_frame_get_page_no(page));
+ ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(next_page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
-
rec = page_get_infimum_rec(next_page);
rec = page_rec_get_next(rec);
@@ -2456,37 +2492,33 @@ count_later:
}
}
-/*************************************************************************
+/*********************************************************************//**
Reads the biggest tablespace id from the high end of the insert buffer
tree and updates the counter in fil_system. */
-
+UNIV_INTERN
void
ibuf_update_max_tablespace_id(void)
/*===============================*/
{
ulint max_space_id;
- rec_t* rec;
- byte* field;
+ const rec_t* rec;
+ const byte* field;
ulint len;
- ibuf_data_t* ibuf_data;
- dict_index_t* ibuf_index;
btr_pcur_t pcur;
mtr_t mtr;
- ibuf_data = fil_space_get_ibuf_data(0);
-
- ibuf_index = ibuf_data->index;
- ut_a(!dict_table_is_comp(ibuf_index->table));
+ ut_a(!dict_table_is_comp(ibuf->index->table));
ibuf_enter();
mtr_start(&mtr);
- btr_pcur_open_at_index_side(FALSE, ibuf_index, BTR_SEARCH_LEAF,
- &pcur, TRUE, &mtr);
+ btr_pcur_open_at_index_side(
+ FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
btr_pcur_move_to_prev(&pcur, &mtr);
- if (btr_pcur_is_before_first_on_page(&pcur, &mtr)) {
+ if (btr_pcur_is_before_first_on_page(&pcur)) {
/* The tree is empty */
max_space_id = 0;
@@ -2508,24 +2540,26 @@ ibuf_update_max_tablespace_id(void)
fil_set_max_space_id_if_bigger(max_space_id);
}
-/*************************************************************************
+/*********************************************************************//**
Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. */
+page, if this is possible.
+@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
static
ulint
ibuf_insert_low(
/*============*/
- /* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
- ulint mode, /* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
- dtuple_t* entry, /* in: index entry to insert */
- dict_index_t* index, /* in: index where to insert; must not be
+ ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
+ const dtuple_t* entry, /*!< in: index entry to insert */
+ ulint entry_size,
+ /*!< in: rec_get_converted_size(index, entry) */
+ dict_index_t* index, /*!< in: index where to insert; must not be
unique or clustered */
- ulint space, /* in: space id where to insert */
- ulint page_no,/* in: page number where to insert */
- que_thr_t* thr) /* in: query thread */
+ ulint space, /*!< in: space id where to insert */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number where to insert */
+ que_thr_t* thr) /*!< in: query thread */
{
big_rec_t* dummy_big_rec;
- ulint entry_size;
btr_pcur_t pcur;
btr_cur_t* cursor;
dtuple_t* ibuf_entry;
@@ -2534,33 +2568,25 @@ ibuf_insert_low(
rec_t* ins_rec;
ibool old_bit_value;
page_t* bitmap_page;
- ibuf_data_t* ibuf_data;
- dict_index_t* ibuf_index;
page_t* root;
ulint err;
ibool do_merge;
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED];
+ ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
ulint n_stored;
ulint bits;
mtr_t mtr;
mtr_t bitmap_mtr;
- ut_a(!(index->type & DICT_CLUSTERED));
+ ut_a(!dict_index_is_clust(index));
ut_ad(dtuple_check_typed(entry));
+ ut_ad(ut_is_2pow(zip_size));
ut_a(trx_sys_multiple_tablespace_format);
do_merge = FALSE;
- /* Currently the insert buffer of space 0 takes care of inserts to all
- tablespaces */
-
- ibuf_data = fil_space_get_ibuf_data(0);
-
- ibuf_index = ibuf_data->index;
-
mutex_enter(&ibuf_mutex);
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
@@ -2587,7 +2613,7 @@ ibuf_insert_low(
mutex_enter(&ibuf_mutex);
- while (!ibuf_data_enough_free_for_insert(ibuf_data)) {
+ while (!ibuf_data_enough_free_for_insert()) {
mutex_exit(&ibuf_mutex);
@@ -2595,7 +2621,7 @@ ibuf_insert_low(
mutex_exit(&ibuf_pessimistic_insert_mutex);
- err = ibuf_add_free_page(0, ibuf_data);
+ err = ibuf_add_free_page();
if (err == DB_STRONG_FAIL) {
@@ -2612,8 +2638,6 @@ ibuf_insert_low(
ibuf_enter();
}
- entry_size = rec_get_converted_size(index, entry);
-
heap = mem_heap_create(512);
/* Build the entry which contains the space id and the page number as
@@ -2628,18 +2652,19 @@ ibuf_insert_low(
mtr_start(&mtr);
- btr_pcur_open(ibuf_index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
+ btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
/* Find out the volume of already buffered inserts for the same index
page */
buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a((buffered == 0) || ibuf_count_get(space, page_no));
#endif
mtr_start(&bitmap_mtr);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &bitmap_mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
+ zip_size, &bitmap_mtr);
/* We check if the index page is suitable for buffered entries */
@@ -2652,11 +2677,11 @@ ibuf_insert_low(
goto function_exit;
}
- bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+ bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
IBUF_BITMAP_FREE, &bitmap_mtr);
if (buffered + entry_size + page_dir_calc_reserved_space(1)
- > ibuf_index_page_calc_free_from_bits(bits)) {
+ > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
mtr_commit(&bitmap_mtr);
/* It may not fit */
@@ -2673,11 +2698,12 @@ ibuf_insert_low(
/* Set the bitmap bit denoting that the insert buffer contains
buffered entries for this index page, if the bit is not set yet */
- old_bit_value = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
- IBUF_BITMAP_BUFFERED,
- &bitmap_mtr);
+ old_bit_value = ibuf_bitmap_page_get_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_BUFFERED, &bitmap_mtr);
+
if (!old_bit_value) {
- ibuf_bitmap_page_set_bits(bitmap_page, page_no,
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
IBUF_BITMAP_BUFFERED, TRUE,
&bitmap_mtr);
}
@@ -2689,12 +2715,11 @@ ibuf_insert_low(
if (mode == BTR_MODIFY_PREV) {
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
ibuf_entry, &ins_rec,
- &dummy_big_rec, thr,
- &mtr);
+ &dummy_big_rec, 0, thr, &mtr);
if (err == DB_SUCCESS) {
/* Update the page max trx id field */
- page_update_max_trx_id(buf_frame_align(ins_rec),
- thr_get_trx(thr)->id);
+ page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
+ thr_get_trx(thr)->id, &mtr);
}
} else {
ut_ad(mode == BTR_MODIFY_TREE);
@@ -2704,25 +2729,24 @@ ibuf_insert_low(
which would cause the x-latching of the root after that to
break the latching order. */
- root = ibuf_tree_root_get(ibuf_data, 0, &mtr);
+ root = ibuf_tree_root_get(&mtr);
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG,
cursor,
ibuf_entry, &ins_rec,
- &dummy_big_rec, thr,
- &mtr);
+ &dummy_big_rec, 0, thr, &mtr);
if (err == DB_SUCCESS) {
/* Update the page max trx id field */
- page_update_max_trx_id(buf_frame_align(ins_rec),
- thr_get_trx(thr)->id);
+ page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
+ thr_get_trx(thr)->id, &mtr);
}
- ibuf_data_sizes_update(ibuf_data, root, &mtr);
+ ibuf_size_update(root, &mtr);
}
function_exit:
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
if (err == DB_SUCCESS) {
fprintf(stderr,
"Incrementing ibuf count of space %lu page %lu\n"
@@ -2734,7 +2758,6 @@ function_exit:
}
#endif
if (mode == BTR_MODIFY_TREE) {
- ut_ad(ibuf_validate_low());
mutex_exit(&ibuf_mutex);
mutex_exit(&ibuf_pessimistic_insert_mutex);
@@ -2746,17 +2769,17 @@ function_exit:
mem_heap_free(heap);
- mutex_enter(&ibuf_mutex);
-
if (err == DB_SUCCESS) {
- ibuf_data->empty = FALSE;
- ibuf_data->n_inserts++;
- }
+ mutex_enter(&ibuf_mutex);
- mutex_exit(&ibuf_mutex);
+ ibuf->empty = FALSE;
+ ibuf->n_inserts++;
+
+ mutex_exit(&ibuf_mutex);
- if ((mode == BTR_MODIFY_TREE) && (err == DB_SUCCESS)) {
- ibuf_contract_after_insert(entry_size);
+ if (mode == BTR_MODIFY_TREE) {
+ ibuf_contract_after_insert(entry_size);
+ }
}
if (do_merge) {
@@ -2770,39 +2793,56 @@ function_exit:
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. Does not do insert if the index is clustered
-or unique. */
-
+or unique.
+@return TRUE if success */
+UNIV_INTERN
ibool
ibuf_insert(
/*========*/
- /* out: TRUE if success */
- dtuple_t* entry, /* in: index entry to insert */
- dict_index_t* index, /* in: index where to insert */
- ulint space, /* in: space id where to insert */
- ulint page_no,/* in: page number where to insert */
- que_thr_t* thr) /* in: query thread */
+ const dtuple_t* entry, /*!< in: index entry to insert */
+ dict_index_t* index, /*!< in: index where to insert */
+ ulint space, /*!< in: space id where to insert */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number where to insert */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
+ ulint entry_size;
ut_a(trx_sys_multiple_tablespace_format);
ut_ad(dtuple_check_typed(entry));
+ ut_ad(ut_is_2pow(zip_size));
+
+ ut_a(!dict_index_is_clust(index));
+
+ switch (UNIV_EXPECT(ibuf_use, IBUF_USE_INSERT)) {
+ case IBUF_USE_NONE:
+ return(FALSE);
+ case IBUF_USE_INSERT:
+ goto do_insert;
+ case IBUF_USE_COUNT:
+ break;
+ }
- ut_a(!(index->type & DICT_CLUSTERED));
+ ut_error; /* unknown value of ibuf_use */
- if (rec_get_converted_size(index, entry)
+do_insert:
+ entry_size = rec_get_converted_size(index, entry, 0);
+
+ if (entry_size
>= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
/ 2)) {
return(FALSE);
}
- err = ibuf_insert_low(BTR_MODIFY_PREV, entry, index, space, page_no,
- thr);
+ err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
+ index, space, zip_size, page_no, thr);
if (err == DB_FAIL) {
- err = ibuf_insert_low(BTR_MODIFY_TREE, entry, index, space,
- page_no, thr);
+ err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
+ index, space, zip_size, page_no, thr);
}
if (err == DB_SUCCESS) {
@@ -2819,21 +2859,22 @@ ibuf_insert(
}
}
-/************************************************************************
+/********************************************************************//**
During merge, inserts to an index page a secondary index entry extracted
from the insert buffer. */
static
void
ibuf_insert_to_index_page(
/*======================*/
- dtuple_t* entry, /* in: buffered entry to insert */
- page_t* page, /* in: index page where the buffered entry
+ dtuple_t* entry, /*!< in: buffered entry to insert */
+ buf_block_t* block, /*!< in/out: index page where the buffered entry
should be placed */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
page_cur_t page_cur;
ulint low_match;
+ page_t* page = buf_block_get_frame(block);
rec_t* rec;
page_t* bitmap_page;
ulint old_bits;
@@ -2859,7 +2900,7 @@ ibuf_insert_to_index_page(
"InnoDB: but the number of fields does not match!\n",
stderr);
dump:
- buf_page_print(page);
+ buf_page_print(page, 0);
dtuple_print(stderr, entry);
@@ -2874,97 +2915,108 @@ dump:
return;
}
- low_match = page_cur_search(page, index, entry,
+ low_match = page_cur_search(block, index, entry,
PAGE_CUR_LE, &page_cur);
if (low_match == dtuple_get_n_fields(entry)) {
+ page_zip_des_t* page_zip;
+
rec = page_cur_get_rec(&page_cur);
+ page_zip = buf_block_get_page_zip(block);
- btr_cur_del_unmark_for_ibuf(rec, mtr);
+ btr_cur_del_unmark_for_ibuf(rec, page_zip, mtr);
} else {
- rec = page_cur_tuple_insert(&page_cur, entry, index, mtr);
+ rec = page_cur_tuple_insert(&page_cur, entry, index, 0, mtr);
- if (rec == NULL) {
- /* If the record did not fit, reorganize */
+ if (UNIV_LIKELY(rec != NULL)) {
+ return;
+ }
- btr_page_reorganize(page, index, mtr);
+ /* If the record did not fit, reorganize */
- page_cur_search(page, index, entry,
- PAGE_CUR_LE, &page_cur);
+ btr_page_reorganize(block, index, mtr);
+ page_cur_search(block, index, entry, PAGE_CUR_LE, &page_cur);
- /* This time the record must fit */
- if (UNIV_UNLIKELY(!page_cur_tuple_insert(
- &page_cur, entry, index,
- mtr))) {
+ /* This time the record must fit */
+ if (UNIV_UNLIKELY
+ (!page_cur_tuple_insert(&page_cur, entry, index,
+ 0, mtr))) {
+ ulint space;
+ ulint page_no;
+ ulint zip_size;
- ut_print_timestamp(stderr);
+ ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Insert buffer insert"
- " fails; page free %lu,"
- " dtuple size %lu\n",
- (ulong) page_get_max_insert_size(
- page, 1),
- (ulong) rec_get_converted_size(
- index, entry));
- fputs("InnoDB: Cannot insert index record ",
- stderr);
- dtuple_print(stderr, entry);
- fputs("\nInnoDB: The table where"
- " this index record belongs\n"
- "InnoDB: is now probably corrupt."
- " Please run CHECK TABLE on\n"
- "InnoDB: that table.\n", stderr);
-
- bitmap_page = ibuf_bitmap_get_map_page(
- buf_frame_get_space_id(page),
- buf_frame_get_page_no(page),
- mtr);
- old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page,
- buf_frame_get_page_no(page),
- IBUF_BITMAP_FREE, mtr);
-
- fprintf(stderr, "InnoDB: Bitmap bits %lu\n",
- (ulong) old_bits);
-
- fputs("InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- }
+ fprintf(stderr,
+ " InnoDB: Error: Insert buffer insert"
+ " fails; page free %lu,"
+ " dtuple size %lu\n",
+ (ulong) page_get_max_insert_size(
+ page, 1),
+ (ulong) rec_get_converted_size(
+ index, entry, 0));
+ fputs("InnoDB: Cannot insert index record ",
+ stderr);
+ dtuple_print(stderr, entry);
+ fputs("\nInnoDB: The table where"
+ " this index record belongs\n"
+ "InnoDB: is now probably corrupt."
+ " Please run CHECK TABLE on\n"
+ "InnoDB: that table.\n", stderr);
+
+ space = page_get_space_id(page);
+ zip_size = buf_block_get_zip_size(block);
+ page_no = page_get_page_no(page);
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ space, page_no, zip_size, mtr);
+ old_bits = ibuf_bitmap_page_get_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, mtr);
+
+ fprintf(stderr,
+ "InnoDB: space %lu, page %lu,"
+ " zip_size %lu, bitmap bits %lu\n",
+ (ulong) space, (ulong) page_no,
+ (ulong) zip_size, (ulong) old_bits);
+
+ fputs("InnoDB: Submit a detailed bug report"
+ " to http://bugs.mysql.com\n", stderr);
}
}
}
-/*************************************************************************
+/*********************************************************************//**
Deletes from ibuf the record on which pcur is positioned. If we have to
resort to a pessimistic delete, this function commits mtr and closes
-the cursor. */
+the cursor.
+@return TRUE if mtr was committed and pcur closed in this operation */
static
ibool
ibuf_delete_rec(
/*============*/
- /* out: TRUE if mtr was committed and pcur
- closed in this operation */
- ulint space, /* in: space id */
- ulint page_no,/* in: index page number where the record
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: index page number where the record
should belong */
- btr_pcur_t* pcur, /* in: pcur positioned on the record to
+ btr_pcur_t* pcur, /*!< in: pcur positioned on the record to
delete, having latch mode BTR_MODIFY_LEAF */
- dtuple_t* search_tuple,
- /* in: search tuple for entries of page_no */
- mtr_t* mtr) /* in: mtr */
+ const dtuple_t* search_tuple,
+ /*!< in: search tuple for entries of page_no */
+ mtr_t* mtr) /*!< in: mtr */
{
ibool success;
- ibuf_data_t* ibuf_data;
page_t* root;
ulint err;
ut_ad(ibuf_inside());
+ ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
+ ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
+ ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
if (success) {
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
fprintf(stderr,
"Decrementing ibuf count of space %lu page %lu\n"
"from %lu by 1\n", space, page_no,
@@ -2975,16 +3027,15 @@ ibuf_delete_rec(
return(FALSE);
}
+ ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
+ ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
+ ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
+
/* We have to resort to a pessimistic delete from ibuf */
btr_pcur_store_position(pcur, mtr);
btr_pcur_commit_specify_mtr(pcur, mtr);
- /* Currently the insert buffer of space 0 takes care of inserts to all
- tablespaces */
-
- ibuf_data = fil_space_get_ibuf_data(0);
-
mutex_enter(&ibuf_mutex);
mtr_start(mtr);
@@ -2992,7 +3043,7 @@ ibuf_delete_rec(
success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
if (!success) {
- if (fil_space_get_version(space) == -1) {
+ if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
/* The tablespace has been dropped. It is possible
that another thread has deleted the insert buffer
entry. Do not complain. */
@@ -3018,7 +3069,7 @@ ibuf_delete_rec(
btr_pcur_commit_specify_mtr(pcur, mtr);
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
- if (!btr_validate_index(ibuf_data->index, NULL)) {
+ if (!btr_validate_index(ibuf->index, NULL)) {
ut_error;
}
@@ -3028,18 +3079,16 @@ ibuf_delete_rec(
goto func_exit;
}
- root = ibuf_tree_root_get(ibuf_data, 0, mtr);
+ root = ibuf_tree_root_get(mtr);
btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
- FALSE, mtr);
+ RB_NONE, mtr);
ut_a(err == DB_SUCCESS);
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
#endif
- ibuf_data_sizes_update(ibuf_data, root, mtr);
-
- ut_ad(ibuf_validate_low());
+ ibuf_size_update(root, mtr);
commit_and_exit:
btr_pcur_commit_specify_mtr(pcur, mtr);
@@ -3052,53 +3101,74 @@ func_exit:
return(TRUE);
}
-/*************************************************************************
+/*********************************************************************//**
When an index page is read from a disk to the buffer pool, this function
inserts to the page the possible index entries buffered in the insert buffer.
The entries are deleted from the insert buffer. If the page is not read, but
created in the buffer pool, this function deletes its buffered entries from
the insert buffer; there can exist entries for such a page if the page
belonged to an index which subsequently was dropped. */
-
+UNIV_INTERN
void
ibuf_merge_or_delete_for_page(
/*==========================*/
- page_t* page, /* in: if page has been read from disk, pointer to
- the page x-latched, else NULL */
- ulint space, /* in: space id of the index page */
- ulint page_no,/* in: page number of the index page */
- ibool update_ibuf_bitmap)/* in: normally this is set to TRUE, but if
- we have deleted or are deleting the tablespace, then we
- naturally do not want to update a non-existent bitmap
- page */
+ buf_block_t* block, /*!< in: if page has been read from
+ disk, pointer to the page x-latched,
+ else NULL */
+ ulint space, /*!< in: space id of the index page */
+ ulint page_no,/*!< in: page number of the index page */
+ ulint zip_size,/*!< in: compressed page size in bytes,
+ or 0 */
+ ibool update_ibuf_bitmap)/*!< in: normally this is set
+ to TRUE, but if we have deleted or are
+ deleting the tablespace, then we
+ naturally do not want to update a
+ non-existent bitmap page */
{
mem_heap_t* heap;
btr_pcur_t pcur;
- dtuple_t* entry;
dtuple_t* search_tuple;
- rec_t* ibuf_rec;
- buf_block_t* block;
- page_t* bitmap_page;
- ibuf_data_t* ibuf_data;
ulint n_inserts;
#ifdef UNIV_IBUF_DEBUG
ulint volume;
#endif
+ page_zip_des_t* page_zip = NULL;
ibool tablespace_being_deleted = FALSE;
ibool corruption_noticed = FALSE;
mtr_t mtr;
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+ ut_ad(!block || buf_block_get_space(block) == space);
+ ut_ad(!block || buf_block_get_page_no(block) == page_no);
+ ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
+ if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
+ || trx_sys_hdr_page(space, page_no)) {
return;
}
- if (ibuf_fixed_addr_page(space, page_no) || fsp_descr_page(page_no)
- || trx_sys_hdr_page(space, page_no)) {
+ /* We cannot refer to zip_size in the following, because
+ zip_size is passed as ULINT_UNDEFINED (it is unknown) when
+ buf_read_ibuf_merge_pages() is merging (discarding) changes
+ for a dropped tablespace. When block != NULL or
+ update_ibuf_bitmap is specified, the zip_size must be known.
+ That is why we will repeat the check below, with zip_size in
+ place of 0. Passing zip_size as 0 assumes that the
+ uncompressed page size always is a power-of-2 multiple of the
+ compressed page size. */
+
+ if (ibuf_fixed_addr_page(space, 0, page_no)
+ || fsp_descr_page(0, page_no)) {
return;
}
- if (update_ibuf_bitmap) {
+ if (UNIV_LIKELY(update_ibuf_bitmap)) {
+ ut_a(ut_is_2pow(zip_size));
+
+ if (ibuf_fixed_addr_page(space, zip_size, page_no)
+ || fsp_descr_page(zip_size, page_no)) {
+ return;
+ }
+
/* If the following returns FALSE, we get the counter
incremented, and must decrement it when we leave this
function. When the counter is > 0, that prevents tablespace
@@ -3106,37 +3176,41 @@ ibuf_merge_or_delete_for_page(
tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
- if (tablespace_being_deleted) {
+ if (UNIV_UNLIKELY(tablespace_being_deleted)) {
/* Do not try to read the bitmap page from space;
just delete the ibuf records for the page */
- page = NULL;
+ block = NULL;
update_ibuf_bitmap = FALSE;
- }
- }
+ } else {
+ page_t* bitmap_page;
- if (update_ibuf_bitmap) {
- mtr_start(&mtr);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+ mtr_start(&mtr);
- if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
- IBUF_BITMAP_BUFFERED, &mtr)) {
- /* No inserts buffered for this page */
- mtr_commit(&mtr);
+ bitmap_page = ibuf_bitmap_get_map_page(
+ space, page_no, zip_size, &mtr);
- if (!tablespace_being_deleted) {
- fil_decr_pending_ibuf_merges(space);
- }
+ if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+ zip_size,
+ IBUF_BITMAP_BUFFERED,
+ &mtr)) {
+ /* No inserts buffered for this page */
+ mtr_commit(&mtr);
- return;
- }
- mtr_commit(&mtr);
- }
+ if (!tablespace_being_deleted) {
+ fil_decr_pending_ibuf_merges(space);
+ }
- /* Currently the insert buffer of space 0 takes care of inserts to all
- tablespaces */
+ return;
+ }
+ mtr_commit(&mtr);
+ }
+ } else if (block
+ && (ibuf_fixed_addr_page(space, zip_size, page_no)
+ || fsp_descr_page(zip_size, page_no))) {
- ibuf_data = fil_space_get_ibuf_data(0);
+ return;
+ }
ibuf_enter();
@@ -3150,16 +3224,20 @@ ibuf_merge_or_delete_for_page(
heap);
}
- if (page) {
+ if (block) {
/* Move the ownership of the x-latch on the page to this OS
thread, so that we can acquire a second x-latch on it. This
is needed for the insert operations to the index page to pass
the debug checks. */
- block = buf_block_align(page);
rw_lock_x_lock_move_ownership(&(block->lock));
+ page_zip = buf_block_get_page_zip(block);
+
+ if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
+ != FIL_PAGE_INDEX)
+ || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
- if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
+ page_t* bitmap_page;
corruption_noticed = TRUE;
@@ -3171,14 +3249,14 @@ ibuf_merge_or_delete_for_page(
stderr);
bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
- &mtr);
- buf_page_print(bitmap_page);
+ zip_size, &mtr);
+ buf_page_print(bitmap_page, 0);
mtr_commit(&mtr);
fputs("\nInnoDB: Dump of the page:\n", stderr);
- buf_page_print(page);
+ buf_page_print(block->frame, 0);
fprintf(stderr,
"InnoDB: Error: corruption in the tablespace."
@@ -3186,7 +3264,7 @@ ibuf_merge_or_delete_for_page(
"InnoDB: buffer records to page n:o %lu"
" though the page\n"
"InnoDB: type is %lu, which is"
- " not an index page!\n"
+ " not an index leaf page!\n"
"InnoDB: We try to resolve the problem"
" by skipping the insert buffer\n"
"InnoDB: merge for this page."
@@ -3196,7 +3274,8 @@ ibuf_merge_or_delete_for_page(
"InnoDB: Please submit a detailed bug report"
" to http://bugs.mysql.com\n\n",
(ulong) page_no,
- (ulong) fil_page_get_type(page));
+ (ulong)
+ fil_page_get_type(block->frame));
}
}
@@ -3207,65 +3286,76 @@ ibuf_merge_or_delete_for_page(
loop:
mtr_start(&mtr);
- if (page) {
- ibool success = buf_page_get_known_nowait(RW_X_LATCH, page,
- BUF_KEEP_OLD,
- __FILE__, __LINE__,
- &mtr);
+ if (block) {
+ ibool success;
+
+ success = buf_page_get_known_nowait(
+ RW_X_LATCH, block,
+ BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
+
ut_a(success);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
}
/* Position pcur in the insert buffer at the first entry for this
index page */
- btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &pcur, &mtr);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ btr_pcur_open_on_user_rec(
+ ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
goto reset_bit;
}
for (;;) {
- ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ rec_t* rec;
- ibuf_rec = btr_pcur_get_rec(&pcur);
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+
+ rec = btr_pcur_get_rec(&pcur);
/* Check if the entry is for this index page */
- if (ibuf_rec_get_page_no(ibuf_rec) != page_no
- || ibuf_rec_get_space(ibuf_rec) != space) {
- if (page) {
- page_header_reset_last_insert(page, &mtr);
+ if (ibuf_rec_get_page_no(rec) != page_no
+ || ibuf_rec_get_space(rec) != space) {
+
+ if (block) {
+ page_header_reset_last_insert(
+ block->frame, page_zip, &mtr);
}
+
goto reset_bit;
}
- if (corruption_noticed) {
+ if (UNIV_UNLIKELY(corruption_noticed)) {
fputs("InnoDB: Discarding record\n ", stderr);
- rec_print_old(stderr, ibuf_rec);
- fputs("\n from the insert buffer!\n\n", stderr);
- } else if (page) {
+ rec_print_old(stderr, rec);
+ fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
+ } else if (block) {
/* Now we have at pcur a record which should be
inserted to the index page; NOTE that the call below
- copies pointers to fields in ibuf_rec, and we must
- keep the latch to the ibuf_rec page until the
+ copies pointers to fields in rec, and we must
+ keep the latch to the rec page until the
insertion is finished! */
+ dtuple_t* entry;
+ trx_id_t max_trx_id;
dict_index_t* dummy_index;
- dulint max_trx_id = page_get_max_trx_id(
- buf_frame_align(ibuf_rec));
- page_update_max_trx_id(page, max_trx_id);
+
+ max_trx_id = page_get_max_trx_id(page_align(rec));
+ page_update_max_trx_id(block, page_zip, max_trx_id,
+ &mtr);
entry = ibuf_build_entry_from_ibuf_rec(
- ibuf_rec, heap, &dummy_index);
+ rec, heap, &dummy_index);
#ifdef UNIV_IBUF_DEBUG
- volume += rec_get_converted_size(dummy_index, entry)
+ volume += rec_get_converted_size(dummy_index, entry, 0)
+ page_dir_calc_reserved_space(1);
ut_a(volume <= 4 * UNIV_PAGE_SIZE
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
#endif
- ibuf_insert_to_index_page(entry, page,
+ ibuf_insert_to_index_page(entry, block,
dummy_index, &mtr);
ibuf_dummy_index_free(dummy_index);
}
@@ -3279,9 +3369,7 @@ loop:
we start from the beginning again */
goto loop;
- }
-
- if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
+ } else if (btr_pcur_is_after_last_on_page(&pcur)) {
mtr_commit(&mtr);
btr_pcur_close(&pcur);
@@ -3290,39 +3378,38 @@ loop:
}
reset_bit:
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
if (ibuf_count_get(space, page_no) > 0) {
/* btr_print_tree(ibuf_data->index->tree, 100);
ibuf_print(); */
}
#endif
- if (update_ibuf_bitmap) {
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
- ibuf_bitmap_page_set_bits(bitmap_page, page_no,
- IBUF_BITMAP_BUFFERED, FALSE, &mtr);
- if (page) {
+ if (UNIV_LIKELY(update_ibuf_bitmap)) {
+ page_t* bitmap_page;
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ space, page_no, zip_size, &mtr);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+
+ if (block) {
ulint old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, IBUF_BITMAP_FREE, &mtr);
- ulint new_bits = ibuf_index_page_calc_free(page);
-#if 0 /* defined UNIV_IBUF_DEBUG */
- fprintf(stderr, "Old bits %lu new bits %lu"
- " max size %lu\n",
- old_bits, new_bits,
- page_get_max_insert_size_after_reorganize(
- page, 1));
-#endif
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, &mtr);
+
+ ulint new_bits = ibuf_index_page_calc_free(
+ zip_size, block);
+
if (old_bits != new_bits) {
- ibuf_bitmap_page_set_bits(bitmap_page, page_no,
- IBUF_BITMAP_FREE,
- new_bits, &mtr);
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, new_bits, &mtr);
}
}
}
-#if 0 /* defined UNIV_IBUF_DEBUG */
- fprintf(stderr,
- "Ibuf merge %lu records volume %lu to page no %lu\n",
- n_inserts, volume, page_no);
-#endif
+
mtr_commit(&mtr);
btr_pcur_close(&pcur);
mem_heap_free(heap);
@@ -3330,8 +3417,8 @@ reset_bit:
/* Protect our statistics keeping from race conditions */
mutex_enter(&ibuf_mutex);
- ibuf_data->n_merges++;
- ibuf_data->n_merged_recs += n_inserts;
+ ibuf->n_merges++;
+ ibuf->n_merged_recs += n_inserts;
mutex_exit(&ibuf_mutex);
@@ -3341,21 +3428,22 @@ reset_bit:
}
ibuf_exit();
-#ifdef UNIV_IBUF_DEBUG
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(space, page_no) == 0);
#endif
}
-/*************************************************************************
+/*********************************************************************//**
Deletes all entries in the insert buffer for a given space id. This is used
in DISCARD TABLESPACE and IMPORT TABLESPACE.
NOTE: this does not update the page free bitmaps in the space. The space will
become CORRUPT when you call this function! */
-
+UNIV_INTERN
void
ibuf_delete_for_discarded_space(
/*============================*/
- ulint space) /* in: space id */
+ ulint space) /*!< in: space id */
{
mem_heap_t* heap;
btr_pcur_t pcur;
@@ -3363,15 +3451,9 @@ ibuf_delete_for_discarded_space(
rec_t* ibuf_rec;
ulint page_no;
ibool closed;
- ibuf_data_t* ibuf_data;
ulint n_inserts;
mtr_t mtr;
- /* Currently the insert buffer of space 0 takes care of inserts to all
- tablespaces */
-
- ibuf_data = fil_space_get_ibuf_data(0);
-
heap = mem_heap_create(512);
/* Use page number 0 to build the search tuple so that we get the
@@ -3387,16 +3469,18 @@ loop:
/* Position pcur in the insert buffer at the first entry for the
space */
- btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &pcur, &mtr);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ btr_pcur_open_on_user_rec(
+ ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
goto leave_loop;
}
for (;;) {
- ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
ibuf_rec = btr_pcur_get_rec(&pcur);
@@ -3422,7 +3506,7 @@ loop:
goto loop;
}
- if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
+ if (btr_pcur_is_after_last_on_page(&pcur)) {
mtr_commit(&mtr);
btr_pcur_close(&pcur);
@@ -3439,77 +3523,41 @@ leave_loop:
/* Protect our statistics keeping from race conditions */
mutex_enter(&ibuf_mutex);
- ibuf_data->n_merges++;
- ibuf_data->n_merged_recs += n_inserts;
+ ibuf->n_merges++;
+ ibuf->n_merged_recs += n_inserts;
mutex_exit(&ibuf_mutex);
- /*
- fprintf(stderr,
- "InnoDB: Discarded %lu ibuf entries for space %lu\n",
- (ulong) n_inserts, (ulong) space);
- */
+
ibuf_exit();
mem_heap_free(heap);
}
-
-/**********************************************************************
-Validates the ibuf data structures when the caller owns ibuf_mutex. */
-
-ibool
-ibuf_validate_low(void)
-/*===================*/
- /* out: TRUE if ok */
-{
- ibuf_data_t* data;
- ulint sum_sizes;
-
- ut_ad(mutex_own(&ibuf_mutex));
-
- sum_sizes = 0;
-
- data = UT_LIST_GET_FIRST(ibuf->data_list);
-
- while (data) {
- sum_sizes += data->size;
-
- data = UT_LIST_GET_NEXT(data_list, data);
- }
-
- ut_a(sum_sizes == ibuf->size);
-
- return(TRUE);
-}
-
-/**********************************************************************
-Looks if the insert buffer is empty. */
-
+/******************************************************************//**
+Looks if the insert buffer is empty.
+@return TRUE if empty */
+UNIV_INTERN
ibool
ibuf_is_empty(void)
/*===============*/
- /* out: TRUE if empty */
{
- ibuf_data_t* data;
ibool is_empty;
- page_t* root;
+ const page_t* root;
mtr_t mtr;
ibuf_enter();
mutex_enter(&ibuf_mutex);
- data = UT_LIST_GET_FIRST(ibuf->data_list);
-
mtr_start(&mtr);
- root = ibuf_tree_root_get(data, 0, &mtr);
+ root = ibuf_tree_root_get(&mtr);
if (page_get_n_recs(root) == 0) {
is_empty = TRUE;
- if (data->empty == FALSE) {
+ if (ibuf->empty == FALSE) {
fprintf(stderr,
"InnoDB: Warning: insert buffer tree is empty"
" but the data struct does not\n"
@@ -3518,15 +3566,13 @@ ibuf_is_empty(void)
"InnoDB: run to completion.\n");
}
} else {
- ut_a(data->empty == FALSE);
+ ut_a(ibuf->empty == FALSE);
is_empty = FALSE;
}
mtr_commit(&mtr);
- ut_a(data->space == 0);
-
mutex_exit(&ibuf_mutex);
ibuf_exit();
@@ -3534,47 +3580,45 @@ ibuf_is_empty(void)
return(is_empty);
}
-/**********************************************************************
+/******************************************************************//**
Prints info of ibuf. */
-
+UNIV_INTERN
void
ibuf_print(
/*=======*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
- ibuf_data_t* data;
-#ifdef UNIV_IBUF_DEBUG
+#ifdef UNIV_IBUF_COUNT_DEBUG
ulint i;
+ ulint j;
#endif
mutex_enter(&ibuf_mutex);
- data = UT_LIST_GET_FIRST(ibuf->data_list);
-
- while (data) {
- fprintf(file,
- "Ibuf: size %lu, free list len %lu, seg size %lu,\n"
- "%lu inserts, %lu merged recs, %lu merges\n",
- (ulong) data->size,
- (ulong) data->free_list_len,
- (ulong) data->seg_size,
- (ulong) data->n_inserts,
- (ulong) data->n_merged_recs,
- (ulong) data->n_merges);
-#ifdef UNIV_IBUF_DEBUG
- for (i = 0; i < IBUF_COUNT_N_PAGES; i++) {
- if (ibuf_count_get(data->space, i) > 0) {
-
+ fprintf(file,
+ "Ibuf: size %lu, free list len %lu, seg size %lu,\n"
+ "%lu inserts, %lu merged recs, %lu merges\n",
+ (ulong) ibuf->size,
+ (ulong) ibuf->free_list_len,
+ (ulong) ibuf->seg_size,
+ (ulong) ibuf->n_inserts,
+ (ulong) ibuf->n_merged_recs,
+ (ulong) ibuf->n_merges);
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
+ for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
+ ulint count = ibuf_count_get(i, j);
+
+ if (count > 0) {
fprintf(stderr,
- "Ibuf count for page %lu is %lu\n",
- (ulong) i,
- (ulong)
- ibuf_count_get(data->space, i));
+ "Ibuf count for space/page %lu/%lu"
+ " is %lu\n",
+ (ulong) i, (ulong) j, (ulong) count);
}
}
-#endif
- data = UT_LIST_GET_NEXT(data_list, data);
}
+#endif /* UNIV_IBUF_COUNT_DEBUG */
mutex_exit(&ibuf_mutex);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 1573de7e818..d5c8258513c 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -1,7 +1,24 @@
-/******************************************************
-The B-tree
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1994-1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0btr.h
+The B-tree
Created 6/2/1994 Heikki Tuuri
*******************************************************/
@@ -14,292 +31,330 @@ Created 6/2/1994 Heikki Tuuri
#include "dict0dict.h"
#include "data0data.h"
#include "page0cur.h"
-#include "rem0rec.h"
#include "mtr0mtr.h"
#include "btr0types.h"
-/* Maximum record size which can be stored on a page, without using the
+#ifndef UNIV_HOTBACKUP
+/** Maximum record size which can be stored on a page, without using the
special big record storage structure */
-
#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
-/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
-such; none of the tree operations avoid producing trees bigger than this. It
-is instead a "max depth that other code must work with", useful for e.g.
-fixed-size arrays that must store some information about each level in a
-tree. In other words: if a B-tree with bigger depth than this is
-encountered, it is not acceptable for it to lead to mysterious memory
-corruption, but it is acceptable for the program to die with a clear assert
-failure. */
-#define BTR_MAX_LEVELS 100
+/** @brief Maximum depth of a B-tree in InnoDB.
-/* Latching modes for btr_cur_search_to_nth_level(). */
-#define BTR_SEARCH_LEAF RW_S_LATCH
-#define BTR_MODIFY_LEAF RW_X_LATCH
-#define BTR_NO_LATCHES RW_NO_LATCH
-#define BTR_MODIFY_TREE 33
-#define BTR_CONT_MODIFY_TREE 34
-#define BTR_SEARCH_PREV 35
-#define BTR_MODIFY_PREV 36
+Note that this isn't a maximum as such; none of the tree operations
+avoid producing trees bigger than this. It is instead a "max depth
+that other code must work with", useful for e.g. fixed-size arrays
+that must store some information about each level in a tree. In other
+words: if a B-tree with bigger depth than this is encountered, it is
+not acceptable for it to lead to mysterious memory corruption, but it
+is acceptable for the program to die with a clear assert failure. */
+#define BTR_MAX_LEVELS 100
-/* If this is ORed to the latch mode, it means that the search tuple will be
-inserted to the index, at the searched position */
+/** Latching modes for btr_cur_search_to_nth_level(). */
+enum btr_latch_mode {
+ /** Search a record on a leaf page and S-latch it. */
+ BTR_SEARCH_LEAF = RW_S_LATCH,
+ /** (Prepare to) modify a record on a leaf page and X-latch it. */
+ BTR_MODIFY_LEAF = RW_X_LATCH,
+ /** Obtain no latches. */
+ BTR_NO_LATCHES = RW_NO_LATCH,
+ /** Start modifying the entire B-tree. */
+ BTR_MODIFY_TREE = 33,
+ /** Continue modifying the entire B-tree. */
+ BTR_CONT_MODIFY_TREE = 34,
+ /** Search the previous record. */
+ BTR_SEARCH_PREV = 35,
+ /** Modify the previous record. */
+ BTR_MODIFY_PREV = 36
+};
+
+/** If this is ORed to btr_latch_mode, it means that the search tuple
+will be inserted to the index, at the searched position */
#define BTR_INSERT 512
-/* This flag ORed to latch mode says that we do the search in query
+/** This flag ORed to btr_latch_mode says that we do the search in query
optimization */
#define BTR_ESTIMATE 1024
-/* This flag ORed to latch mode says that we can ignore possible
-UNIQUE definition on secondary indexes when we decide if we can use the
-insert buffer to speed up inserts */
+/** This flag ORed to btr_latch_mode says that we can ignore possible
+UNIQUE definition on secondary indexes when we decide if we can use
+the insert buffer to speed up inserts */
#define BTR_IGNORE_SEC_UNIQUE 2048
-/******************************************************************
-Gets the root node of a tree and x-latches it. */
-
+/**************************************************************//**
+Gets the root node of a tree and x-latches it.
+@return root page, x-latched */
+UNIV_INTERN
page_t*
btr_root_get(
/*=========*/
- /* out: root page, x-latched */
- dict_index_t* index, /* in: index tree */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
+ dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+buf_block_t*
+btr_block_get(
+/*==========*/
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ ulint mode, /*!< in: latch mode */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
page_t*
btr_page_get(
/*=========*/
- ulint space, /* in: space id */
- ulint page_no, /* in: page number */
- ulint mode, /* in: latch mode */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Gets the index id field of a page. */
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ ulint mode, /*!< in: latch mode */
+ mtr_t* mtr); /*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/**************************************************************//**
+Gets the index id field of a page.
+@return index id */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
- /* out: index id */
- page_t* page); /* in: index page */
-/************************************************************
-Gets the node level field in an index page. */
+ const page_t* page); /*!< in: index page */
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Gets the node level field in an index page.
+@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
- /* out: level, leaf level == 0 */
- page_t* page); /* in: index page */
-/************************************************************
-Gets the node level field in an index page. */
+ const page_t* page); /*!< in: index page */
+/********************************************************//**
+Gets the node level field in an index page.
+@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
- /* out: level, leaf level == 0 */
- page_t* page, /* in: index page */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Gets the next index page number. */
+ const page_t* page, /*!< in: index page */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
+Gets the next index page number.
+@return next page number */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
- /* out: next page number */
- page_t* page, /* in: index page */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Gets the previous index page number. */
+ const page_t* page, /*!< in: index page */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
+Gets the previous index page number.
+@return prev page number */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
- /* out: prev page number */
- page_t* page, /* in: index page */
- mtr_t* mtr); /* in: mini-transaction handle */
-/*****************************************************************
+ const page_t* page, /*!< in: index page */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/*************************************************************//**
Gets pointer to the previous user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor. */
-
+that the caller has appropriate latches on the page and its neighbor.
+@return previous user record, NULL if there is none */
+UNIV_INTERN
rec_t*
btr_get_prev_user_rec(
/*==================*/
- /* out: previous user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr); /* in: mtr holding a latch on the page, and if
+ rec_t* rec, /*!< in: record on leaf level */
+ mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
needed, also to the previous page */
-/*****************************************************************
+/*************************************************************//**
Gets pointer to the next user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor. */
-
+that the caller has appropriate latches on the page and its neighbor.
+@return next user record, NULL if there is none */
+UNIV_INTERN
rec_t*
btr_get_next_user_rec(
/*==================*/
- /* out: next user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr); /* in: mtr holding a latch on the page, and if
+ rec_t* rec, /*!< in: record on leaf level */
+ mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
needed, also to the next page */
-/******************************************************************
+/**************************************************************//**
Releases the latch on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
- page_t* page, /* in: page */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Gets the child node file address in a node pointer. */
+ buf_block_t* block, /*!< in: buffer block */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
+ BTR_MODIFY_LEAF */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
+Gets the child node file address in a node pointer.
+@return child node address */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
- /* out: child node address */
- rec_t* rec, /* in: node pointer record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/****************************************************************
-Creates the root node for a new index tree. */
-
+ const rec_t* rec, /*!< in: node pointer record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/************************************************************//**
+Creates the root node for a new index tree.
+@return page number of the created root, FIL_NULL if did not succeed */
+UNIV_INTERN
ulint
btr_create(
/*=======*/
- /* out: page number of the created root, FIL_NULL if
- did not succeed */
- ulint type, /* in: type of the index */
- ulint space, /* in: space where created */
- dulint index_id,/* in: index id */
- ulint comp, /* in: nonzero=compact page format */
- mtr_t* mtr); /* in: mini-transaction handle */
-/****************************************************************
+ ulint type, /*!< in: type of the index */
+ ulint space, /*!< in: space where created */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ dulint index_id,/*!< in: index id */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/************************************************************//**
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
-
+UNIV_INTERN
void
btr_free_but_not_root(
/*==================*/
- ulint space, /* in: space where created */
- ulint root_page_no); /* in: root page number */
-/****************************************************************
+ ulint space, /*!< in: space where created */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint root_page_no); /*!< in: root page number */
+/************************************************************//**
Frees the B-tree root page. Other tree MUST already have been freed. */
-
+UNIV_INTERN
void
btr_free_root(
/*==========*/
- ulint space, /* in: space where created */
- ulint root_page_no, /* in: root page number */
- mtr_t* mtr); /* in: a mini-transaction which has already
+ ulint space, /*!< in: space where created */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint root_page_no, /*!< in: root page number */
+ mtr_t* mtr); /*!< in: a mini-transaction which has already
been started */
-/*****************************************************************
+/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts
the tuple. It is assumed that mtr contains an x-latch on the tree.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called. */
-
+guaranteed to be available before this function is called.
+@return inserted record */
+UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
- /* out: inserted record */
- btr_cur_t* cursor, /* in: cursor at which to insert: must be
+ btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Reorganizes an index page. */
-
-void
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr); /*!< in: mtr */
+/*************************************************************//**
+Reorganizes an index page.
+IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
+page of a non-clustered index, the caller must update the insert
+buffer free bits in the same mini-transaction in such a way that the
+modification will be redo-logged.
+@return TRUE on success, FALSE on failure */
+UNIV_INTERN
+ibool
btr_page_reorganize(
/*================*/
- page_t* page, /* in: page to be reorganized */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ buf_block_t* block, /*!< in: page to be reorganized */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr); /*!< in: mtr */
+/*************************************************************//**
Decides if the page should be split at the convergence point of
-inserts converging to left. */
-
+inserts converging to left.
+@return TRUE if split recommended */
+UNIV_INTERN
ibool
btr_page_get_split_rec_to_left(
/*===========================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec);/* out: if split recommended,
+ btr_cur_t* cursor, /*!< in: cursor at which to insert */
+ rec_t** split_rec);/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
-/*****************************************************************
+/*************************************************************//**
Decides if the page should be split at the convergence point of
-inserts converging to right. */
-
+inserts converging to right.
+@return TRUE if split recommended */
+UNIV_INTERN
ibool
btr_page_get_split_rec_to_right(
/*============================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec);/* out: if split recommended,
+ btr_cur_t* cursor, /*!< in: cursor at which to insert */
+ rec_t** split_rec);/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
-/*****************************************************************
+/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
-is released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore
-enough free disk space must be guaranteed to be available before
-this function is called. */
-
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+
+@return inserted record */
+UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
- /* out: inserted record; NOTE: the tree
- x-latch is released! NOTE: 2 free disk
- pages must be available! */
- btr_cur_t* cursor, /* in: cursor at which to insert; when the
+ btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr); /* in: mtr */
-/***********************************************************
+ const dtuple_t* tuple, /*!< in: tuple to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr); /*!< in: mtr */
+/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
-
+UNIV_INTERN
void
btr_insert_on_non_leaf_level(
/*=========================*/
- dict_index_t* index, /* in: index */
- ulint level, /* in: level, must be > 0 */
- dtuple_t* tuple, /* in: the record to be inserted */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level, must be > 0 */
+ dtuple_t* tuple, /*!< in: the record to be inserted */
+ mtr_t* mtr); /*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/****************************************************************//**
Sets a record as the predefined minimum record. */
-
+UNIV_INTERN
void
btr_set_min_rec_mark(
/*=================*/
- rec_t* rec, /* in: record */
- ulint comp, /* in: nonzero=compact page format */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ rec_t* rec, /*!< in/out: record */
+ mtr_t* mtr); /*!< in: mtr */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Deletes on the upper level the node pointer to a page. */
-
+UNIV_INTERN
void
btr_node_ptr_delete(
/*================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page whose node pointer is deleted */
- mtr_t* mtr); /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: page whose node pointer is deleted */
+ mtr_t* mtr); /*!< in: mtr */
#ifdef UNIV_DEBUG
-/****************************************************************
-Checks that the node pointer to a page is appropriate. */
-
+/************************************************************//**
+Checks that the node pointer to a page is appropriate.
+@return TRUE */
+UNIV_INTERN
ibool
btr_check_node_ptr(
/*===============*/
- /* out: TRUE */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: index page */
- mtr_t* mtr); /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: index page */
+ mtr_t* mtr); /*!< in: mtr */
#endif /* UNIV_DEBUG */
-/*****************************************************************
+/*************************************************************//**
Tries to merge the page first to the left immediate brother if such a
brother exists, and the node pointers to the current page and to the
brother reside on the same page. If the left brother does not satisfy these
@@ -307,142 +362,145 @@ conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to
-the brothers, if they exist. NOTE: it is assumed that the caller has reserved
-enough free extents so that the compression will always succeed if done! */
-void
+the brothers, if they exist.
+@return TRUE on success */
+UNIV_INTERN
+ibool
btr_compress(
/*=========*/
- btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
+ btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
the page must not be empty: in record delete
use btr_discard_page if the page would become
empty */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*************************************************************//**
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
be used for the root page, which is allowed to be empty. */
-
+UNIV_INTERN
void
btr_discard_page(
/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to discard: not on
+ btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
the root page */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/****************************************************************//**
Parses the redo log record for setting an index record as the predefined
-minimum record. */
-
+minimum record.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_parse_set_min_rec_mark(
/*=======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint comp, /* in: nonzero=compact page format */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses a redo log record of reorganizing a page. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ ulint comp, /*!< in: nonzero=compact page format */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/***********************************************************//**
+Parses a redo log record of reorganizing a page.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_parse_page_reorganize(
/*======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/******************************************************************
-Gets the number of pages in a B-tree. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ dict_index_t* index, /*!< in: record descriptor */
+ buf_block_t* block, /*!< in: page to be reorganized, or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
+Gets the number of pages in a B-tree.
+@return number of pages */
+UNIV_INTERN
ulint
btr_get_size(
/*=========*/
- /* out: number of pages */
- dict_index_t* index, /* in: index */
- ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
-/******************************************************************
+ dict_index_t* index, /*!< in: index */
+ ulint flag); /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents! */
-
-page_t*
+that the caller has made the reservation for free extents!
+@return new allocated block, x-latched; NULL if out of space */
+UNIV_INTERN
+buf_block_t*
btr_page_alloc(
/*===========*/
- /* out: new allocated page, x-latched;
- NULL if out of space */
- dict_index_t* index, /* in: index tree */
- ulint hint_page_no, /* in: hint of a good page */
- byte file_direction, /* in: direction where a possible
+ dict_index_t* index, /*!< in: index tree */
+ ulint hint_page_no, /*!< in: hint of a good page */
+ byte file_direction, /*!< in: direction where a possible
page split is made */
- ulint level, /* in: level where the page is placed
+ ulint level, /*!< in: level where the page is placed
in the tree */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
-
+UNIV_INTERN
void
btr_page_free(
/*==========*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: block to be freed, x-latched */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
Frees a file page used in an index tree. Can be used also to BLOB
external storage pages, because the page level 0 can be given as an
argument. */
-
+UNIV_INTERN
void
btr_page_free_low(
/*==============*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- ulint level, /* in: page level */
- mtr_t* mtr); /* in: mtr */
+ dict_index_t* index, /*!< in: index tree */
+ buf_block_t* block, /*!< in: block to be freed, x-latched */
+ ulint level, /*!< in: page level */
+ mtr_t* mtr); /*!< in: mtr */
#ifdef UNIV_BTR_PRINT
-/*****************************************************************
+/*************************************************************//**
Prints size info of a B-tree. */
-
+UNIV_INTERN
void
btr_print_size(
/*===========*/
- dict_index_t* index); /* in: index tree */
-/******************************************************************
+ dict_index_t* index); /*!< in: index tree */
+/**************************************************************//**
Prints directories and other info of all nodes in the index. */
-
+UNIV_INTERN
void
btr_print_index(
/*============*/
- dict_index_t* index, /* in: index */
- ulint width); /* in: print this many entries from start
+ dict_index_t* index, /*!< in: index */
+ ulint width); /*!< in: print this many entries from start
and end */
#endif /* UNIV_BTR_PRINT */
-/****************************************************************
+/************************************************************//**
Checks the size and number of fields in a record based on the definition of
-the index. */
-
+the index.
+@return TRUE if ok */
+UNIV_INTERN
ibool
btr_index_rec_validate(
/*===================*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: index record */
- dict_index_t* index, /* in: index */
- ibool dump_on_error); /* in: TRUE if the function
- should print hex dump of record
- and page on error */
-/******************************************************************
-Checks the consistency of an index tree. */
-
+ const rec_t* rec, /*!< in: index record */
+ const dict_index_t* index, /*!< in: index */
+ ibool dump_on_error); /*!< in: TRUE if the function
+ should print hex dump of record
+ and page on error */
+/**************************************************************//**
+Checks the consistency of an index tree.
+@return TRUE if ok */
+UNIV_INTERN
ibool
btr_validate_index(
/*===============*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- trx_t* trx); /* in: transaction or NULL */
+ dict_index_t* index, /*!< in: index */
+ trx_t* trx); /*!< in: transaction or NULL */
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2
+#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "btr0btr.ic"
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index 4a88f58b318..2259d22c9a6 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -1,73 +1,127 @@
-/******************************************************
-The B-tree
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0btr.ic
+The B-tree
Created 6/2/1994 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
+#ifndef UNIV_HOTBACKUP
#include "mtr0mtr.h"
#include "mtr0log.h"
+#include "page0zip.h"
-#define BTR_MAX_NODE_LEVEL 50 /* used in debug checking */
+#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
+ (not really a hard limit).
+ Used in debug assertions
+ in btr_page_set_level and
+ btr_page_get_level_low */
-/******************************************************************
+/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space, /* in: space id */
- ulint page_no, /* in: page number */
- ulint mode, /* in: latch mode */
- mtr_t* mtr) /* in: mtr */
+buf_block_t*
+btr_block_get(
+/*==========*/
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ ulint mode, /*!< in: latch mode */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
+ buf_block_t* block;
+
+ block = buf_page_get(space, zip_size, page_no, mode, mtr);
- page = buf_page_get(space, page_no, mode, mtr);
-#ifdef UNIV_SYNC_DEBUG
if (mode != RW_NO_LATCH) {
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
}
-#endif
- return(page);
+
+ return(block);
}
-/******************************************************************
+/**************************************************************//**
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ ulint mode, /*!< in: latch mode */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ return(buf_block_get_frame(btr_block_get(space, zip_size, page_no,
+ mode, mtr)));
+}
+
+/**************************************************************//**
Sets the index id field of a page. */
UNIV_INLINE
void
btr_page_set_index_id(
/*==================*/
- page_t* page, /* in: page to be created */
- dulint id, /* in: index id */
- mtr_t* mtr) /* in: mtr */
+ page_t* page, /*!< in: page to be created */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ dulint id, /*!< in: index id */
+ mtr_t* mtr) /*!< in: mtr */
{
- mlog_write_dulint(page + PAGE_HEADER + PAGE_INDEX_ID, id, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
+ page_zip_write_header(page_zip,
+ page + (PAGE_HEADER + PAGE_INDEX_ID),
+ 8, mtr);
+ } else {
+ mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID),
+ id, mtr);
+ }
}
+#endif /* !UNIV_HOTBACKUP */
-/******************************************************************
-Gets the index id field of a page. */
+/**************************************************************//**
+Gets the index id field of a page.
+@return index id */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
- /* out: index id */
- page_t* page) /* in: index page */
+ const page_t* page) /*!< in: index page */
{
return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
}
-/************************************************************
-Gets the node level field in an index page. */
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Gets the node level field in an index page.
+@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
- /* out: level, leaf level == 0 */
- page_t* page) /* in: index page */
+ const page_t* page) /*!< in: index page */
{
ulint level;
@@ -80,115 +134,138 @@ btr_page_get_level_low(
return(level);
}
-/************************************************************
-Gets the node level field in an index page. */
+/********************************************************//**
+Gets the node level field in an index page.
+@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
- /* out: level, leaf level == 0 */
- page_t* page, /* in: index page */
- mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
+ const page_t* page, /*!< in: index page */
+ mtr_t* mtr __attribute__((unused)))
+ /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
return(btr_page_get_level_low(page));
}
-/************************************************************
+/********************************************************//**
Sets the node level field in an index page. */
UNIV_INLINE
void
btr_page_set_level(
/*===============*/
- page_t* page, /* in: index page */
- ulint level, /* in: level, leaf level == 0 */
- mtr_t* mtr) /* in: mini-transaction handle */
+ page_t* page, /*!< in: index page */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ ulint level, /*!< in: level, leaf level == 0 */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
- mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
- MLOG_2BYTES, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
+ page_zip_write_header(page_zip,
+ page + (PAGE_HEADER + PAGE_LEVEL),
+ 2, mtr);
+ } else {
+ mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
+ MLOG_2BYTES, mtr);
+ }
}
-/************************************************************
-Gets the next index page number. */
+/********************************************************//**
+Gets the next index page number.
+@return next page number */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
- /* out: next page number */
- page_t* page, /* in: index page */
- mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
+ const page_t* page, /*!< in: index page */
+ mtr_t* mtr __attribute__((unused)))
+ /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
return(mach_read_from_4(page + FIL_PAGE_NEXT));
}
-/************************************************************
+/********************************************************//**
Sets the next index page field. */
UNIV_INLINE
void
btr_page_set_next(
/*==============*/
- page_t* page, /* in: index page */
- ulint next, /* in: next page number */
- mtr_t* mtr) /* in: mini-transaction handle */
+ page_t* page, /*!< in: index page */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ ulint next, /*!< in: next page number */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
- mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_4(page + FIL_PAGE_NEXT, next);
+ page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
+ } else {
+ mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
+ }
}
-/************************************************************
-Gets the previous index page number. */
+/********************************************************//**
+Gets the previous index page number.
+@return prev page number */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
- /* out: prev page number */
- page_t* page, /* in: index page */
- mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
+ const page_t* page, /*!< in: index page */
+ mtr_t* mtr __attribute__((unused))) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
return(mach_read_from_4(page + FIL_PAGE_PREV));
}
-/************************************************************
+/********************************************************//**
Sets the previous index page field. */
UNIV_INLINE
void
btr_page_set_prev(
/*==============*/
- page_t* page, /* in: index page */
- ulint prev, /* in: previous page number */
- mtr_t* mtr) /* in: mini-transaction handle */
+ page_t* page, /*!< in: index page */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ ulint prev, /*!< in: previous page number */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
- mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_4(page + FIL_PAGE_PREV, prev);
+ page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
+ } else {
+ mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
+ }
}
-/******************************************************************
-Gets the child node file address in a node pointer. */
+/**************************************************************//**
+Gets the child node file address in a node pointer.
+@return child node address */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
- /* out: child node address */
- rec_t* rec, /* in: node pointer record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const rec_t* rec, /*!< in: node pointer record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
- byte* field;
- ulint len;
- ulint page_no;
+ const byte* field;
+ ulint len;
+ ulint page_no;
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
@@ -205,30 +282,29 @@ btr_node_ptr_get_child_page_no(
"InnoDB: a nonsensical page number 0"
" in a node ptr record at offset %lu\n",
(ulong) page_offset(rec));
- buf_page_print(buf_frame_align(rec));
+ buf_page_print(page_align(rec), 0);
}
return(page_no);
}
-/******************************************************************
+/**************************************************************//**
Releases the latches on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
- page_t* page, /* in: page */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in: buffer block */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
+ BTR_MODIFY_LEAF */
+ mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(!mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_MODIFY));
- if (latch_mode == BTR_SEARCH_LEAF) {
- mtr_memo_release(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_S_FIX);
- } else {
- ut_ad(latch_mode == BTR_MODIFY_LEAF);
- mtr_memo_release(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX);
- }
+ ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
+ ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
+
+ mtr_memo_release(mtr, block,
+ latch_mode == BTR_SEARCH_LEAF
+ ? MTR_MEMO_PAGE_S_FIX
+ : MTR_MEMO_PAGE_X_FIX);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 213dcb7f568..480a3877e54 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,7 +1,24 @@
-/******************************************************
-The index tree cursor
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994-1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0cur.h
+The index tree cursor
Created 10/16/1994 Heikki Tuuri
*******************************************************/
@@ -11,12 +28,8 @@ Created 10/16/1994 Heikki Tuuri
#include "univ.i"
#include "dict0dict.h"
-#include "data0data.h"
#include "page0cur.h"
#include "btr0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "ha0ha.h"
/* Mode flags for btr_cur operations; these can be ORed */
#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
@@ -24,83 +37,109 @@ Created 10/16/1994 Heikki Tuuri
#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
update vector or inserted entry */
+#ifndef UNIV_HOTBACKUP
+#include "que0types.h"
+#include "row0types.h"
+#include "ha0ha.h"
+
#define BTR_CUR_ADAPT
#define BTR_CUR_HASH_ADAPT
-/*************************************************************
-Returns the page cursor component of a tree cursor. */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the page cursor component of a tree cursor.
+@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
- /* out: pointer to page cursor component */
- btr_cur_t* cursor);/* in: tree cursor */
-/*************************************************************
-Returns the record pointer of a tree cursor. */
+ const btr_cur_t* cursor);/*!< in: tree cursor */
+#else /* UNIV_DEBUG */
+# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the buffer block on which the tree cursor is positioned.
+@return pointer to buffer block */
+UNIV_INLINE
+buf_block_t*
+btr_cur_get_block(
+/*==============*/
+ btr_cur_t* cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the record pointer of a tree cursor.
+@return pointer to record */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
- /* out: pointer to record */
- btr_cur_t* cursor);/* in: tree cursor */
-/*************************************************************
+ btr_cur_t* cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the compressed page on which the tree cursor is positioned.
+@return pointer to compressed page, or NULL if the page is not compressed */
+UNIV_INLINE
+page_zip_des_t*
+btr_cur_get_page_zip(
+/*=================*/
+ btr_cur_t* cursor);/*!< in: tree cursor */
+/*********************************************************//**
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
- btr_cur_t* cursor);/* in: tree cursor */
-/*************************************************************
-Returns the page of a tree cursor. */
+ btr_cur_t* cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the page of a tree cursor.
+@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
- /* out: pointer to page */
- btr_cur_t* cursor);/* in: tree cursor */
-/*************************************************************
-Returns the index of a cursor. */
+ btr_cur_t* cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the index of a cursor.
+@return index */
UNIV_INLINE
dict_index_t*
btr_cur_get_index(
/*==============*/
- /* out: index */
- btr_cur_t* cursor);/* in: B-tree cursor */
-/*************************************************************
+ btr_cur_t* cursor);/*!< in: B-tree cursor */
+/*********************************************************//**
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in tree */
- btr_cur_t* cursor);/* in: cursor */
-/************************************************************************
+ dict_index_t* index, /*!< in: index */
+ rec_t* rec, /*!< in: record in tree */
+ buf_block_t* block, /*!< in: buffer block of rec */
+ btr_cur_t* cursor);/*!< in: cursor */
+/********************************************************************//**
Searches an index tree and positions a tree cursor on a given level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
to node pointer page number fields on the upper levels of the tree!
Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
-
+UNIV_INTERN
void
btr_cur_search_to_nth_level(
/*========================*/
- dict_index_t* index, /* in: index */
- ulint level, /* in: the tree level of search */
- dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: the tree level of search */
+ const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
- ulint mode, /* in: PAGE_CUR_L, ...;
+ ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be PAGE_CUR_LE,
not PAGE_CUR_GE, as the latter may end up on
the previous page of the record! Inserts
should always be made using PAGE_CUR_LE to
search the position! */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
BTR_INSERT and BTR_ESTIMATE;
- cursor->left_page is used to store a pointer
+ cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
NOTE that if has_search_latch
@@ -108,375 +147,379 @@ btr_cur_search_to_nth_level(
on the cursor page, we assume
the caller uses his search latch
to protect the record! */
- btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is
+ btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
- ulint has_search_latch,/* in: latch mode the caller
+ ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*****************************************************************//**
Opens a cursor at either end of an index. */
-
+UNIV_INTERN
void
btr_cur_open_at_index_side(
/*=======================*/
- ibool from_left, /* in: TRUE if open to the low end,
+ ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: latch mode */
- btr_cur_t* cursor, /* in: cursor */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ dict_index_t* index, /*!< in: index */
+ ulint latch_mode, /*!< in: latch mode */
+ btr_cur_t* cursor, /*!< in: cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
-
+UNIV_INTERN
void
btr_cur_open_at_rnd_pos(
/*====================*/
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /* in/out: B-tree cursor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ dict_index_t* index, /*!< in: index */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /*!< in/out: B-tree cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does
not succeed if there is too little space on the page. If there is just
one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record. */
-
+prevent trying to split a page with just one record.
+@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+UNIV_INTERN
ulint
btr_cur_optimistic_insert(
/*======================*/
- /* out: DB_SUCCESS, DB_WAIT_LOCK,
- DB_FAIL, or error number */
- ulint flags, /* in: undo logging and locking flags: if not
+ ulint flags, /*!< in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
- btr_cur_t* cursor, /* in: cursor on page after which to insert;
+ btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
- dtuple_t* entry, /* in: entry to insert */
- rec_t** rec, /* out: pointer to inserted record if
+ dtuple_t* entry, /*!< in/out: entry to insert */
+ rec_t** rec, /*!< out: pointer to inserted record if
succeed */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
- que_thr_t* thr, /* in: query thread or NULL */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ ulint n_ext, /*!< in: number of externally stored columns */
+ que_thr_t* thr, /*!< in: query thread or NULL */
+ mtr_t* mtr); /*!< in: mtr; if this function returns
+ DB_SUCCESS on a leaf page of a secondary
+ index in a compressed tablespace, the
+ mtr must be committed before latching
+ any further pages */
+/*************************************************************//**
Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist. */
-
+to brothers of page, if those brothers exist.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
btr_cur_pessimistic_insert(
/*=======================*/
- /* out: DB_SUCCESS or error number */
- ulint flags, /* in: undo logging and locking flags: if not
+ ulint flags, /*!< in: undo logging and locking flags: if not
zero, the parameter thr should be
specified; if no undo logging is specified,
then the caller must have reserved enough
free extents in the file space so that the
insertion will certainly succeed */
- btr_cur_t* cursor, /* in: cursor after which to insert;
+ btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
- dtuple_t* entry, /* in: entry to insert */
- rec_t** rec, /* out: pointer to inserted record if
+ dtuple_t* entry, /*!< in/out: entry to insert */
+ rec_t** rec, /*!< out: pointer to inserted record if
succeed */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
- que_thr_t* thr, /* in: query thread or NULL */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Updates a record when the update causes no size changes in its fields. */
-
+ ulint n_ext, /*!< in: number of externally stored columns */
+ que_thr_t* thr, /*!< in: query thread or NULL */
+ mtr_t* mtr); /*!< in: mtr */
+/*************************************************************//**
+Updates a record when the update causes no size changes in its fields.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
btr_cur_update_in_place(
/*====================*/
- /* out: DB_SUCCESS or error number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- upd_t* update, /* in: update vector */
- ulint cmpl_info,/* in: compiler info on secondary index
+ const upd_t* update, /*!< in: update vector */
+ ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr); /*!< in: mtr; must be committed before
+ latching any further pages */
+/*************************************************************//**
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended. */
-
+so that tree compression is recommended.
+@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
+DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
+there is not enough space left on the compressed page */
+UNIV_INTERN
ulint
btr_cur_optimistic_update(
/*======================*/
- /* out: DB_SUCCESS, or DB_OVERFLOW if the
- updated record does not fit, DB_UNDERFLOW
- if the page would become too empty */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
- upd_t* update, /* in: update vector; this must also
+ const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
- ulint cmpl_info,/* in: compiler info on secondary index
+ ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr); /*!< in: mtr; must be committed before
+ latching any further pages */
+/*************************************************************//**
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist. */
-
+own x-latches to brothers of page, if those brothers exist.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
btr_cur_pessimistic_update(
/*=======================*/
- /* out: DB_SUCCESS or error code */
- ulint flags, /* in: undo logging, locking, and rollback
+ ulint flags, /*!< in: undo logging, locking, and rollback
flags */
- btr_cur_t* cursor, /* in: cursor on the record to update */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
+ btr_cur_t* cursor, /*!< in: cursor on the record to update */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
- upd_t* update, /* in: update vector; this is allowed also
+ const upd_t* update, /*!< in: update vector; this is allowed also
contain trx id and roll ptr fields, but
the values in update vector have no effect */
- ulint cmpl_info,/* in: compiler info on secondary index
+ ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr); /*!< in: mtr; must be committed before
+ latching any further pages */
+/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created. */
-
+undo log record created.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
ulint
btr_cur_del_mark_set_clust_rec(
/*===========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor */
- ibool val, /* in: value to set */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
-Sets a secondary index record delete mark to TRUE or FALSE. */
-
+ ulint flags, /*!< in: undo logging and locking flags */
+ btr_cur_t* cursor, /*!< in: cursor */
+ ibool val, /*!< in: value to set */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr); /*!< in: mtr */
+/***********************************************************//**
+Sets a secondary index record delete mark to TRUE or FALSE.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
ulint
btr_cur_del_mark_set_sec_rec(
/*=========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- number */
- ulint flags, /* in: locking flag */
- btr_cur_t* cursor, /* in: cursor */
- ibool val, /* in: value to set */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
-Sets a secondary index record delete mark to FALSE. This function is
-only used by the insert buffer insert merge mechanism. */
-
+ ulint flags, /*!< in: locking flag */
+ btr_cur_t* cursor, /*!< in: cursor */
+ ibool val, /*!< in: value to set */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr); /*!< in: mtr */
+/***********************************************************//**
+Clear a secondary index record's delete mark. This function is only
+used by the insert buffer insert merge mechanism. */
+UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
- rec_t* rec, /* in: record to delete unmark */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Tries to compress a page of the tree on the leaf level. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-void
-btr_cur_compress(
-/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ rec_t* rec, /*!< in/out: record to delete unmark */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page
+ corresponding to rec, or NULL
+ when the tablespace is
+ uncompressed */
+ mtr_t* mtr); /*!< in: mtr */
+/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
+free extents so that the compression will always succeed if done!
+@return TRUE if compression occurred */
+UNIV_INTERN
ibool
btr_cur_compress_if_useful(
/*=======================*/
- /* out: TRUE if compression occurred */
- btr_cur_t* cursor, /* in: cursor on the page to compress;
+ btr_cur_t* cursor, /*!< in: cursor on the page to compress;
cursor does not stay valid if compression
occurs */
- mtr_t* mtr); /* in: mtr */
-/***********************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*******************************************************//**
Removes the record on which the tree cursor is positioned. It is assumed
that the mtr has an x-latch on the page where the cursor is positioned,
-but no latch on the whole tree. */
-
+but no latch on the whole tree.
+@return TRUE if success, i.e., the page did not become too empty */
+UNIV_INTERN
ibool
btr_cur_optimistic_delete(
/*======================*/
- /* out: TRUE if success, i.e., the page
- did not become too empty */
- btr_cur_t* cursor, /* in: cursor on the record to delete;
+ btr_cur_t* cursor, /*!< in: cursor on the record to delete;
cursor stays valid: if deletion succeeds,
on function exit it points to the successor
of the deleted record */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ mtr_t* mtr); /*!< in: mtr; if this function returns
+ TRUE on a leaf page of a secondary
+ index, the mtr must be committed
+ before latching any further pages */
+/*************************************************************//**
Removes the record on which the tree cursor is positioned. Tries
to compress the page if its fillfactor drops below a threshold
or if it is the only page on the level. It is assumed that mtr holds
an x-latch on the tree and on the cursor page. To avoid deadlocks,
mtr must also own x-latches to brothers of page, if those brothers
-exist. */
-
+exist.
+@return TRUE if compression occurred */
+UNIV_INTERN
ibool
btr_cur_pessimistic_delete(
/*=======================*/
- /* out: TRUE if compression occurred */
- ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+ ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
these may actually grow in size */
- ibool has_reserved_extents, /* in: TRUE if the
+ ibool has_reserved_extents, /*!< in: TRUE if the
caller has already reserved enough free
extents so that he knows that the operation
will succeed */
- btr_cur_t* cursor, /* in: cursor on the record to delete;
+ btr_cur_t* cursor, /*!< in: cursor on the record to delete;
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
- ibool in_rollback,/* in: TRUE if called in rollback */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
-Parses a redo log record of updating a record in-place. */
-
+ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ mtr_t* mtr); /*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a redo log record of updating a record in-place.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_cur_parse_update_in_place(
/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- dict_index_t* index); /* in: index corresponding to page */
-/********************************************************************
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in/out: page or NULL */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ dict_index_t* index); /*!< in: index corresponding to page */
+/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a clustered
-index record. */
-
+index record.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: index corresponding to page */
- page_t* page); /* in: page or NULL */
-/********************************************************************
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in/out: page or NULL */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ dict_index_t* index); /*!< in: index corresponding to page */
+/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a secondary
-index record. */
-
+index record.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
-/***********************************************************************
-Estimates the number of rows in a given index range. */
-
-ib_longlong
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in/out: page or NULL */
+ page_zip_des_t* page_zip);/*!< in/out: compressed page, or NULL */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Estimates the number of rows in a given index range.
+@return estimated number of rows */
+UNIV_INTERN
+ib_int64_t
btr_estimate_n_rows_in_range(
/*=========================*/
- /* out: estimated number of rows */
- dict_index_t* index, /* in: index */
- dtuple_t* tuple1, /* in: range start, may also be empty tuple */
- ulint mode1, /* in: search mode for range start */
- dtuple_t* tuple2, /* in: range end, may also be empty tuple */
- ulint mode2); /* in: search mode for range end */
-/***********************************************************************
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */
+ ulint mode1, /*!< in: search mode for range start */
+ const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */
+ ulint mode2); /*!< in: search mode for range end */
+/*******************************************************************//**
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
-
+UNIV_INTERN
void
btr_estimate_number_of_different_key_vals(
/*======================================*/
- dict_index_t* index); /* in: index */
-/***********************************************************************
+ dict_index_t* index); /*!< in: index */
+/*******************************************************************//**
Marks not updated extern fields as not-owned by this record. The ownership
is transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
to free the field. */
-
+UNIV_INTERN
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
- rec_t* rec, /* in: record in a clustered index */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update, /* in: update vector */
- mtr_t* mtr); /* in: mtr */
-/***********************************************************************
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
+ part will be updated, or NULL */
+ rec_t* rec, /*!< in/out: record in a clustered index */
+ dict_index_t* index, /*!< in: index of the page */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const upd_t* update, /*!< in: update vector */
+ mtr_t* mtr); /*!< in: mtr, or NULL if not logged */
+/*******************************************************************//**
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
in entry, so that they are not freed in a rollback. */
-
+UNIV_INTERN
void
btr_cur_mark_dtuple_inherited_extern(
/*=================================*/
- dtuple_t* entry, /* in: updated entry to be inserted to
- clustered index */
- ulint* ext_vec, /* in: array of extern fields in the
- original record */
- ulint n_ext_vec, /* in: number of elements in ext_vec */
- upd_t* update); /* in: update vector */
-/***********************************************************************
+ dtuple_t* entry, /*!< in/out: updated entry to be
+ inserted to clustered index */
+ const upd_t* update); /*!< in: update vector */
+/*******************************************************************//**
Marks all extern fields in a dtuple as owned by the record. */
-
+UNIV_INTERN
void
btr_cur_unmark_dtuple_extern_fields(
/*================================*/
- dtuple_t* entry, /* in: clustered index entry */
- ulint* ext_vec, /* in: array of numbers of fields
- which have been stored externally */
- ulint n_ext_vec); /* in: number of elements in ext_vec */
-/***********************************************************************
+ dtuple_t* entry); /*!< in/out: clustered index entry */
+/*******************************************************************//**
Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The fields are stored on pages allocated from leaf node
-file segment of the index tree. */
-
+them in rec. The extern flags in rec will have to be set beforehand.
+The fields are stored on pages allocated from leaf node
+file segment of the index tree.
+@return DB_SUCCESS or error */
+UNIV_INTERN
ulint
btr_store_big_rec_extern_fields(
/*============================*/
- /* out: DB_SUCCESS or error */
- dict_index_t* index, /* in: index of rec; the index tree
+ dict_index_t* index, /*!< in: index of rec; the index tree
MUST be X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets, /* in: rec_get_offsets(rec, index);
+ buf_block_t* rec_block, /*!< in/out: block containing rec */
+ rec_t* rec, /*!< in: record */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
- big_rec_t* big_rec_vec, /* in: vector containing fields
+ big_rec_t* big_rec_vec, /*!< in: vector containing fields
to be stored externally */
- mtr_t* local_mtr); /* in: mtr containing the latch to
+ mtr_t* local_mtr); /*!< in: mtr containing the latch to
rec and to the tree */
-/***********************************************************************
+/*******************************************************************//**
Frees the space in an externally stored field to the file space
management if the field in data is owned the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
-
+UNIV_INTERN
void
btr_free_externally_stored_field(
/*=============================*/
- dict_index_t* index, /* in: index of the data, the index
+ dict_index_t* index, /*!< in: index of the data, the index
tree MUST be X-latched; if the tree
height is 1, then also the root page
must be X-latched! (this is relevant
@@ -484,130 +527,131 @@ btr_free_externally_stored_field(
from purge where 'data' is located on
an undo log page, not an index
page) */
- byte* data, /* in: internally stored data
- + reference to the externally
- stored part */
- ulint local_len, /* in: length of data */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* local_mtr); /* in: mtr containing the latch to
+ byte* field_ref, /*!< in/out: field reference */
+ const rec_t* rec, /*!< in: record containing field_ref, for
+ page_zip_write_blob_ptr(), or NULL */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
+ or NULL */
+ page_zip_des_t* page_zip, /*!< in: compressed page corresponding
+ to rec, or NULL if rec == NULL */
+ ulint i, /*!< in: field number of field_ref;
+ ignored if rec == NULL */
+ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
+ mtr_t* local_mtr); /*!< in: mtr containing the latch to
data an an X-latch to the index
tree */
-/***************************************************************
-Frees the externally stored fields for a record. */
-
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
- dict_index_t* index, /* in: index of the data, the index
- tree MUST be X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* mtr); /* in: mini-transaction handle which contains
- an X-latch to record page and to the index
- tree */
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. */
-
+/*******************************************************************//**
+Copies the prefix of an externally stored field of a record. The
+clustered index record must be protected by a lock or a page latch.
+@return the length of the copied field, or 0 if the column was being
+or has been deleted */
+UNIV_INTERN
+ulint
+btr_copy_externally_stored_field_prefix(
+/*====================================*/
+ byte* buf, /*!< out: the field, or a prefix of it */
+ ulint len, /*!< in: length of buf, in bytes */
+ ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
+ zero for uncompressed BLOBs */
+ const byte* data, /*!< in: 'internally' stored part of the
+ field containing also the reference to
+ the external part; must be protected by
+ a lock or a page latch */
+ ulint local_len);/*!< in: length of data, in bytes */
+/*******************************************************************//**
+Copies an externally stored field of a record to mem heap.
+@return the field copied to heap */
+UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
/*=================================*/
- /* out: the field copied to heap */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint no, /* in: field number */
- ulint* len, /* out: length of the field */
- mem_heap_t* heap); /* in: mem heap */
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. Parameter
-data contains a pointer to 'internally' stored part of the field:
-possibly some data, and the reference to the externally stored part in
-the last 20 bytes of data. */
-
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
- /* out: the whole field copied to heap */
- ulint* len, /* out: length of the whole field */
- byte* data, /* in: 'internally' stored part of the
- field containing also the reference to
- the external part */
- ulint local_len,/* in: length of data */
- mem_heap_t* heap); /* in: mem heap */
-/***********************************************************************
-Stores the positions of the fields marked as extern storage in the update
-vector, and also those fields who are marked as extern storage in rec
-and not mentioned in updated fields. We use this function to remember
-which fields we must mark as extern storage in a record inserted for an
-update. */
-
+ const rec_t* rec, /*!< in: record in a clustered index;
+ must be protected by a lock or a page latch */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
+ zero for uncompressed BLOBs */
+ ulint no, /*!< in: field number */
+ ulint* len, /*!< out: length of the field */
+ mem_heap_t* heap); /*!< in: mem heap */
+/*******************************************************************//**
+Flags the data tuple fields that are marked as extern storage in the
+update vector. We use this function to remember which fields we must
+mark as extern storage in a record inserted for an update.
+@return number of flagged external columns */
+UNIV_INTERN
ulint
btr_push_update_extern_fields(
/*==========================*/
- /* out: number of values stored in ext_vect */
- ulint* ext_vect,/* in: array of ulints, must be preallocated
- to have space for all fields in rec */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update);/* in: update vector or NULL */
-
+ dtuple_t* tuple, /*!< in/out: data tuple */
+ const upd_t* update, /*!< in: update vector */
+ mem_heap_t* heap) /*!< in: memory heap */
+ __attribute__((nonnull));
/*######################################################################*/
-/* In the pessimistic delete, if the page data size drops below this
+/** In the pessimistic delete, if the page data size drops below this
limit, merging it to a neighbor is tried */
-
#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
-/* A slot in the path array. We store here info on a search path down the
+/** A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
typedef struct btr_path_struct btr_path_t;
struct btr_path_struct{
- ulint nth_rec; /* index of the record
+ ulint nth_rec; /*!< index of the record
where the page cursor stopped on
this level (index in alphabetical
order); value ULINT_UNDEFINED
denotes array end */
- ulint n_recs; /* number of records on the page */
+ ulint n_recs; /*!< number of records on the page */
};
-#define BTR_PATH_ARRAY_N_SLOTS 250 /* size of path array (in slots) */
+#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
+
+/** Values for the flag documenting the used search method */
+enum btr_cur_method {
+ BTR_CUR_HASH = 1, /*!< successful shortcut using
+ the hash index */
+ BTR_CUR_HASH_FAIL, /*!< failure using hash, success using
+ binary search: the misleading hash
+ reference is stored in the field
+ hash_node, and might be necessary to
+ update */
+ BTR_CUR_BINARY, /*!< success using the binary search */
+ BTR_CUR_INSERT_TO_IBUF /*!< performed the intended insert to
+ the insert buffer */
+};
-/* The tree cursor: the definition appears here only for the compiler
+/** The tree cursor: the definition appears here only for the compiler
to know struct size! */
-
struct btr_cur_struct {
- dict_index_t* index; /* index where positioned */
- page_cur_t page_cur; /* page cursor */
- page_t* left_page; /* this field is used to store
+ dict_index_t* index; /*!< index where positioned */
+ page_cur_t page_cur; /*!< page cursor */
+ buf_block_t* left_block; /*!< this field is used to store
a pointer to the left neighbor
page, in the cases
BTR_SEARCH_PREV and
BTR_MODIFY_PREV */
/*------------------------------*/
- que_thr_t* thr; /* this field is only used when
- btr_cur_search_... is called for an
- index entry insertion: the calling
- query thread is passed here to be
+ que_thr_t* thr; /*!< this field is only used
+ when btr_cur_search_to_nth_level
+ is called for an index entry
+ insertion: the calling query
+ thread is passed here to be
used in the insert buffer */
/*------------------------------*/
- /* The following fields are used in btr_cur_search... to pass
- information: */
- ulint flag; /* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
- BTR_CUR_BINARY, or
- BTR_CUR_INSERT_TO_IBUF */
- ulint tree_height; /* Tree height if the search is done
+ /** The following fields are used in
+ btr_cur_search_to_nth_level to pass information: */
+ /* @{ */
+ enum btr_cur_method flag; /*!< Search method used */
+ ulint tree_height; /*!< Tree height if the search is done
for a pessimistic insert or update
operation */
- ulint up_match; /* If the search mode was PAGE_CUR_LE,
+ ulint up_match; /*!< If the search mode was PAGE_CUR_LE,
the number of matched fields to the
the first user record to the right of
the cursor record after
- btr_cur_search_...;
+ btr_cur_search_to_nth_level;
for the mode PAGE_CUR_GE, the matched
fields to the first user record AT THE
CURSOR or to the right of it;
@@ -617,87 +661,90 @@ struct btr_cur_struct {
record if that record is on a
different leaf page! (See the note in
row_ins_duplicate_key.) */
- ulint up_bytes; /* number of matched bytes to the
+ ulint up_bytes; /*!< number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
- ulint low_match; /* if search mode was PAGE_CUR_LE,
+ ulint low_match; /*!< if search mode was PAGE_CUR_LE,
the number of matched fields to the
first user record AT THE CURSOR or
to the left of it after
- btr_cur_search_...;
+ btr_cur_search_to_nth_level;
NOT defined for PAGE_CUR_GE or any
other search modes; see also the NOTE
in up_match! */
- ulint low_bytes; /* number of matched bytes to the
+ ulint low_bytes; /*!< number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
- ulint n_fields; /* prefix length used in a hash
+ ulint n_fields; /*!< prefix length used in a hash
search if hash_node != NULL */
- ulint n_bytes; /* hash prefix bytes if hash_node !=
+ ulint n_bytes; /*!< hash prefix bytes if hash_node !=
NULL */
- ulint fold; /* fold value used in the search if
+ ulint fold; /*!< fold value used in the search if
flag is BTR_CUR_HASH */
/*------------------------------*/
- btr_path_t* path_arr; /* in estimating the number of
+ /* @} */
+ btr_path_t* path_arr; /*!< in estimating the number of
rows in range, we store in this array
information of the path through
the tree */
};
-/* Values for the flag documenting the used search method */
-#define BTR_CUR_HASH 1 /* successful shortcut using the hash
- index */
-#define BTR_CUR_HASH_FAIL 2 /* failure using hash, success using
- binary search: the misleading hash
- reference is stored in the field
- hash_node, and might be necessary to
- update */
-#define BTR_CUR_BINARY 3 /* success using the binary search */
-#define BTR_CUR_INSERT_TO_IBUF 4 /* performed the intended insert to
- the insert buffer */
-
-/* If pessimistic delete fails because of lack of file space,
-there is still a good change of success a little later: try this many times,
-and sleep this many microseconds in between */
+/** If pessimistic delete fails because of lack of file space, there
+is still a good change of success a little later. Try this many
+times. */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
+/** If pessimistic delete fails because of lack of file space, there
+is still a good change of success a little later. Sleep this many
+microseconds between retries. */
#define BTR_CUR_RETRY_SLEEP_TIME 50000
-/* The reference in a field for which data is stored on a different page.
+/** The reference in a field for which data is stored on a different page.
The reference is at the end of the 'locally' stored part of the field.
'Locally' means storage in the index record.
We store locally a long enough prefix of each column so that we can determine
the ordering parts of each index record without looking into the externally
stored part. */
-
-/*--------------------------------------*/
-#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
-#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */
-#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header
+/*-------------------------------------- @{ */
+#define BTR_EXTERN_SPACE_ID 0 /*!< space id where stored */
+#define BTR_EXTERN_PAGE_NO 4 /*!< page no where stored */
+#define BTR_EXTERN_OFFSET 8 /*!< offset of BLOB header
on that page */
-#define BTR_EXTERN_LEN 12 /* 8 bytes containing the
+#define BTR_EXTERN_LEN 12 /*!< 8 bytes containing the
length of the externally
stored part of the BLOB.
The 2 highest bits are
reserved to the flags below. */
-/*--------------------------------------*/
-#define BTR_EXTERN_FIELD_REF_SIZE 20
-
-/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
-at lowest address) is set to 1 if this field does not 'own' the externally
-stored field; only the owner field is allowed to free the field in purge!
-If the 2nd highest bit is 1 then it means that the externally stored field
-was inherited from an earlier version of the row. In rollback we are not
-allowed to free an inherited external field. */
+/*-------------------------------------- @} */
+/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
+/** The most significant bit of BTR_EXTERN_LEN (i.e., the most
+significant bit of the byte at smallest address) is set to 1 if this
+field does not 'own' the externally stored field; only the owner field
+is allowed to free the field in purge! */
#define BTR_EXTERN_OWNER_FLAG 128
+/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
+second most significant bit of the byte at smallest address) is 1 then
+it means that the externally stored field was inherited from an
+earlier version of the row. In rollback we are not allowed to free an
+inherited external field. */
#define BTR_EXTERN_INHERITED_FLAG 64
+/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
extern ulint btr_cur_n_non_sea;
+/** Number of successful adaptive hash index lookups in
+btr_cur_search_to_nth_level(). */
extern ulint btr_cur_n_sea;
+/** Old value of btr_cur_n_non_sea. Copied by
+srv_refresh_innodb_monitor_stats(). Referenced by
+srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_non_sea_old;
+/** Old value of btr_cur_n_sea. Copied by
+srv_refresh_innodb_monitor_stats(). Referenced by
+srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_sea_old;
+#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "btr0cur.ic"
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index bd2c46eb734..280583f6ccf 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -1,101 +1,147 @@
-/******************************************************
-The index tree cursor
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994-1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0cur.ic
+The index tree cursor
Created 10/16/1994 Heikki Tuuri
*******************************************************/
+#ifndef UNIV_HOTBACKUP
#include "btr0btr.h"
-/*************************************************************
-Returns the page cursor component of a tree cursor. */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the page cursor component of a tree cursor.
+@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
- /* out: pointer to page cursor component */
- btr_cur_t* cursor) /* in: tree cursor */
+ const btr_cur_t* cursor) /*!< in: tree cursor */
{
- return(&(cursor->page_cur));
+ return(&((btr_cur_t*) cursor)->page_cur);
+}
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the buffer block on which the tree cursor is positioned.
+@return pointer to buffer block */
+UNIV_INLINE
+buf_block_t*
+btr_cur_get_block(
+/*==============*/
+ btr_cur_t* cursor) /*!< in: tree cursor */
+{
+ return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
}
-/*************************************************************
-Returns the record pointer of a tree cursor. */
+/*********************************************************//**
+Returns the record pointer of a tree cursor.
+@return pointer to record */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
- /* out: pointer to record */
- btr_cur_t* cursor) /* in: tree cursor */
+ btr_cur_t* cursor) /*!< in: tree cursor */
{
return(page_cur_get_rec(&(cursor->page_cur)));
}
-/*************************************************************
+/*********************************************************//**
+Returns the compressed page on which the tree cursor is positioned.
+@return pointer to compressed page, or NULL if the page is not compressed */
+UNIV_INLINE
+page_zip_des_t*
+btr_cur_get_page_zip(
+/*=================*/
+ btr_cur_t* cursor) /*!< in: tree cursor */
+{
+ return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
+}
+
+/*********************************************************//**
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
- btr_cur_t* cursor) /* in: tree cursor */
+ btr_cur_t* cursor) /*!< in: tree cursor */
{
page_cur_invalidate(&(cursor->page_cur));
}
-/*************************************************************
-Returns the page of a tree cursor. */
+/*********************************************************//**
+Returns the page of a tree cursor.
+@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
- /* out: pointer to page */
- btr_cur_t* cursor) /* in: tree cursor */
+ btr_cur_t* cursor) /*!< in: tree cursor */
{
- return(buf_frame_align(page_cur_get_rec(&(cursor->page_cur))));
+ return(page_align(page_cur_get_rec(&(cursor->page_cur))));
}
-/*************************************************************
-Returns the index of a cursor. */
+/*********************************************************//**
+Returns the index of a cursor.
+@return index */
UNIV_INLINE
dict_index_t*
btr_cur_get_index(
/*==============*/
- /* out: index */
- btr_cur_t* cursor) /* in: B-tree cursor */
+ btr_cur_t* cursor) /*!< in: B-tree cursor */
{
return(cursor->index);
}
-/*************************************************************
+/*********************************************************//**
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in tree */
- btr_cur_t* cursor) /* in: cursor */
+ dict_index_t* index, /*!< in: index */
+ rec_t* rec, /*!< in: record in tree */
+ buf_block_t* block, /*!< in: buffer block of rec */
+ btr_cur_t* cursor) /*!< out: cursor */
{
- page_cur_position(rec, btr_cur_get_page_cur(cursor));
+ ut_ad(page_align(rec) == block->frame);
+
+ page_cur_position(rec, block, btr_cur_get_page_cur(cursor));
cursor->index = index;
}
-/*************************************************************************
+/*********************************************************************//**
Checks if compressing an index page where a btr cursor is placed makes
-sense. */
+sense.
+@return TRUE if compression is recommended */
UNIV_INLINE
ibool
btr_cur_compress_recommendation(
/*============================*/
- /* out: TRUE if compression is recommended */
- btr_cur_t* cursor, /* in: btr cursor */
- mtr_t* mtr) /* in: mtr */
+ btr_cur_t* cursor, /*!< in: btr cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
page_t* page;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
@@ -110,28 +156,27 @@ btr_cur_compress_recommendation(
root page. */
return(dict_index_get_page(cursor->index)
- != buf_frame_get_page_no(page));
+ != page_get_page_no(page));
}
return(FALSE);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if the record on which the cursor is placed can be deleted without
-making tree compression necessary (or, recommended). */
+making tree compression necessary (or, recommended).
+@return TRUE if can be deleted without recommended compression */
UNIV_INLINE
ibool
btr_cur_can_delete_without_compress(
/*================================*/
- /* out: TRUE if can be deleted without
- recommended compression */
- btr_cur_t* cursor, /* in: btr cursor */
- ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
- mtr_t* mtr) /* in: mtr */
+ btr_cur_t* cursor, /*!< in: btr cursor */
+ ulint rec_size,/*!< in: rec_get_size(btr_cur_get_rec(cursor))*/
+ mtr_t* mtr) /*!< in: mtr */
{
page_t* page;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
@@ -147,8 +192,9 @@ btr_cur_can_delete_without_compress(
compression if this is not the root page. */
return(dict_index_get_page(cursor->index)
- == buf_frame_get_page_no(page));
+ == page_get_page_no(page));
}
return(TRUE);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index ee40e905544..12b1375d8b7 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -1,7 +1,24 @@
-/******************************************************
-The index tree persistent cursor
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0pcur.h
+The index tree persistent cursor
Created 2/23/1996 Heikki Tuuri
*******************************************************/
@@ -29,164 +46,162 @@ of a scroll cursor easier */
#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
-/******************************************************************
-Allocates memory for a persistent cursor object and initializes the cursor. */
-
+/**************************************************************//**
+Allocates memory for a persistent cursor object and initializes the cursor.
+@return own: persistent cursor */
+UNIV_INTERN
btr_pcur_t*
btr_pcur_create_for_mysql(void);
/*============================*/
- /* out, own: persistent cursor */
-/******************************************************************
+/**************************************************************//**
Frees the memory for a persistent cursor object. */
-
+UNIV_INTERN
void
btr_pcur_free_for_mysql(
/*====================*/
- btr_pcur_t* cursor); /* in, own: persistent cursor */
-/******************************************************************
+ btr_pcur_t* cursor); /*!< in, own: persistent cursor */
+/**************************************************************//**
Copies the stored position of a pcur to another pcur. */
-
+UNIV_INTERN
void
btr_pcur_copy_stored_position(
/*==========================*/
- btr_pcur_t* pcur_receive, /* in: pcur which will receive the
+ btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the
position info */
- btr_pcur_t* pcur_donate); /* in: pcur from which the info is
+ btr_pcur_t* pcur_donate); /*!< in: pcur from which the info is
copied */
-/******************************************************************
+/**************************************************************//**
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
void
btr_pcur_init(
/*==========*/
- btr_pcur_t* pcur); /* in: persistent cursor */
-/******************************************************************
+ btr_pcur_t* pcur); /*!< in: persistent cursor */
+/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open(
/*==========*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ...;
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page from the
record! */
- ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
+ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init(
/*=======================*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ...;
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page of the
record! */
- ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...;
+ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
NOTE that if has_search_latch != 0 then
we maybe do not acquire a latch on the cursor
page, but assume that the caller uses his
btr search latch to protect the record! */
- btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
- ulint has_search_latch,/* in: latch mode the caller
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*****************************************************************//**
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
- ibool from_left, /* in: TRUE if open to the low end,
+ ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: latch mode */
- btr_pcur_t* pcur, /* in: cursor */
- ibool do_init, /* in: TRUE if should be initialized */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Gets the up_match value for a pcur after a search. */
+ dict_index_t* index, /*!< in: index */
+ ulint latch_mode, /*!< in: latch mode */
+ btr_pcur_t* pcur, /*!< in: cursor */
+ ibool do_init, /*!< in: TRUE if should be initialized */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
+Gets the up_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_GE, otherwise undefined */
UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
- /* out: number of matched fields at the cursor
- or to the right if search mode was PAGE_CUR_GE,
- otherwise undefined */
- btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
-/******************************************************************
-Gets the low_match value for a pcur after a search. */
+ btr_pcur_t* cursor); /*!< in: memory buffer for persistent cursor */
+/**************************************************************//**
+Gets the low_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_LE, otherwise undefined */
UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
- /* out: number of matched fields at the cursor
- or to the right if search mode was PAGE_CUR_LE,
- otherwise undefined */
- btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
-/******************************************************************
+ btr_pcur_t* cursor); /*!< in: memory buffer for persistent cursor */
+/**************************************************************//**
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
-
+UNIV_INTERN
void
btr_pcur_open_on_user_rec(
/*======================*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ... */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF or
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ ulint mode, /*!< in: PAGE_CUR_L, ... */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
- btr_pcur_t* cursor, /* in: memory buffer for persistent
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos(
/*=====================*/
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in/out: B-tree pcur */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
+ dict_index_t* index, /*!< in: index */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
Frees the possible old_rec_buf buffer of a persistent cursor and sets the
latch mode of the persistent cursor to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_close(
/*===========*/
- btr_pcur_t* cursor); /* in: persistent cursor */
-/******************************************************************
+ btr_pcur_t* cursor); /*!< in: persistent cursor */
+/**************************************************************//**
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
-
+UNIV_INTERN
void
btr_pcur_store_position(
/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
(1) cursor was positioned on a user record: this function restores the position
@@ -197,56 +212,54 @@ infimum;
(3) cursor was positioned on the page supremum: restores to the first record
GREATER than the user record which was the predecessor of the supremum.
(4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree. */
-
+restores to before first or after the last in the tree.
+@return TRUE if the cursor position was stored when it was on a user
+record and it can be restored on a user record whose ordering fields
+are identical to the ones of the original user record */
+UNIV_INTERN
ibool
btr_pcur_restore_position(
/*======================*/
- /* out: TRUE if the cursor position
- was stored when it was on a user record
- and it can be restored on a user record
- whose ordering fields are identical to
- the ones of the original user record */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in: detached persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in: detached persistent cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/**************************************************************//**
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
made by the current mini-transaction to the data protected by the
cursor latch, as then the latch must not be released until mtr_commit. */
-
+UNIV_INTERN
void
btr_pcur_release_leaf(
/*==================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Gets the rel_pos field for a cursor whose position has been stored. */
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************//**
+Gets the rel_pos field for a cursor whose position has been stored.
+@return BTR_PCUR_ON, ... */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
/*=================*/
- /* out: BTR_PCUR_ON, ... */
- btr_pcur_t* cursor);/* in: persistent cursor */
-/*************************************************************
+ const btr_pcur_t* cursor);/*!< in: persistent cursor */
+/*********************************************************//**
Sets the mtr field for a pcur. */
UNIV_INLINE
void
btr_pcur_set_mtr(
/*=============*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in, own: mtr */
-/*************************************************************
-Gets the mtr field for a pcur. */
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr); /*!< in, own: mtr */
+/*********************************************************//**
+Gets the mtr field for a pcur.
+@return mtr */
UNIV_INLINE
mtr_t*
btr_pcur_get_mtr(
/*=============*/
- /* out: mtr */
- btr_pcur_t* cursor); /* in: persistent cursor */
-/******************************************************************
+ btr_pcur_t* cursor); /*!< in: persistent cursor */
+/**************************************************************//**
Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
@@ -256,80 +269,77 @@ UNIV_INLINE
void
btr_pcur_commit(
/*============*/
- btr_pcur_t* pcur); /* in: persistent cursor */
-/******************************************************************
+ btr_pcur_t* pcur); /*!< in: persistent cursor */
+/**************************************************************//**
Differs from btr_pcur_commit in that we can specify the mtr to commit. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
- btr_pcur_t* pcur, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr to commit */
-/******************************************************************
-Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+ btr_pcur_t* pcur, /*!< in: persistent cursor */
+ mtr_t* mtr); /*!< in: mtr to commit */
+/**************************************************************//**
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES.
+@return TRUE if detached */
UNIV_INLINE
ibool
btr_pcur_is_detached(
/*=================*/
- /* out: TRUE if detached */
- btr_pcur_t* pcur); /* in: persistent cursor */
-/*************************************************************
+ btr_pcur_t* pcur); /*!< in: persistent cursor */
+/*********************************************************//**
Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'. */
+left, the cursor stays 'after last in tree'.
+@return TRUE if the cursor was not after last in tree */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
/*==================*/
- /* out: TRUE if the cursor was not after last
- in tree */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************//**
Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'. */
-
+are left, the cursor stays 'before first in tree'.
+@return TRUE if the cursor was not before first in tree */
+UNIV_INTERN
ibool
btr_pcur_move_to_prev(
/*==================*/
- /* out: TRUE if the cursor was not before first
- in tree */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************//**
Moves the persistent cursor to the last record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_last_on_page(
/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************//**
Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'. */
+records are left, the cursor ends up 'after last in tree'.
+@return TRUE if the cursor moved forward, ending on a user record */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
/*===========================*/
- /* out: TRUE if the cursor moved forward,
- ending on a user record */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************//**
Moves the persistent cursor to the first record on the next page.
Releases the latch on the current page, and bufferunfixes it.
Note that there must not be modifications on the current page,
as then the x-latch can be released only in mtr_commit. */
-
+UNIV_INTERN
void
btr_pcur_move_to_next_page(
/*=======================*/
- btr_pcur_t* cursor, /* in: persistent cursor; must be on the
+ btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
last record of the current page */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************//**
Moves the persistent cursor backward if it is on the first record
of the page. Releases the latch on the current page, and bufferunfixes
it. Note that to prevent a possible deadlock, the operation first
@@ -339,113 +349,121 @@ The alphabetical position of the cursor is guaranteed to be sensible
on return, but it may happen that the cursor is not positioned on the
last record of any page, because the structure of the tree may have
changed while the cursor had no latches. */
-
+UNIV_INTERN
void
btr_pcur_move_backward_from_page(
/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor, must be on the
+ btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the
first record of the current page */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Returns the btr cursor component of a persistent cursor. */
+ mtr_t* mtr); /*!< in: mtr */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the btr cursor component of a persistent cursor.
+@return pointer to btr cursor component */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
/*=================*/
- /* out: pointer to btr cursor component */
- btr_pcur_t* cursor); /* in: persistent cursor */
-/*************************************************************
-Returns the page cursor component of a persistent cursor. */
+ const btr_pcur_t* cursor); /*!< in: persistent cursor */
+/*********************************************************//**
+Returns the page cursor component of a persistent cursor.
+@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
- /* out: pointer to page cursor component */
- btr_pcur_t* cursor); /* in: persistent cursor */
-/*************************************************************
-Returns the page of a persistent cursor. */
+ const btr_pcur_t* cursor); /*!< in: persistent cursor */
+#else /* UNIV_DEBUG */
+# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
+# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the page of a persistent cursor.
+@return pointer to the page */
UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
- /* out: pointer to the page */
- btr_pcur_t* cursor);/* in: persistent cursor */
-/*************************************************************
-Returns the record of a persistent cursor. */
+ btr_pcur_t* cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the buffer block of a persistent cursor.
+@return pointer to the block */
+UNIV_INLINE
+buf_block_t*
+btr_pcur_get_block(
+/*===============*/
+ btr_pcur_t* cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the record of a persistent cursor.
+@return pointer to the record */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
- /* out: pointer to the record */
- btr_pcur_t* cursor);/* in: persistent cursor */
-/*************************************************************
+ btr_pcur_t* cursor);/*!< in: persistent cursor */
+/*********************************************************//**
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
ibool
btr_pcur_is_on_user_rec(
/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ const btr_pcur_t* cursor);/*!< in: persistent cursor */
+/*********************************************************//**
Checks if the persistent cursor is after the last user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_on_page(
/*===========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ const btr_pcur_t* cursor);/*!< in: persistent cursor */
+/*********************************************************//**
Checks if the persistent cursor is before the first user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_on_page(
/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ const btr_pcur_t* cursor);/*!< in: persistent cursor */
+/*********************************************************//**
Checks if the persistent cursor is before the first user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_in_tree(
/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************//**
Checks if the persistent cursor is after the last user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_in_tree(
/*===========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************//**
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_next_on_page(
/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
+ btr_pcur_t* cursor);/*!< in/out: persistent cursor */
+/*********************************************************//**
Moves the persistent cursor to the previous record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_prev_on_page(
/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
+ btr_pcur_t* cursor);/*!< in/out: persistent cursor */
/* The persistent B-tree cursor structure. This is used mainly for SQL
selects, updates, and deletes. */
struct btr_pcur_struct{
- btr_cur_t btr_cur; /* a B-tree cursor */
- ulint latch_mode; /* see TODO note below!
+ btr_cur_t btr_cur; /*!< a B-tree cursor */
+ ulint latch_mode; /*!< see TODO note below!
BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
BTR_MODIFY_TREE, or BTR_NO_LATCHES,
depending on the latching state of
@@ -456,29 +474,28 @@ struct btr_pcur_struct{
detached; it can be restored to
attached if the old position was
stored in old_rec */
- ulint old_stored; /* BTR_PCUR_OLD_STORED
+ ulint old_stored; /*!< BTR_PCUR_OLD_STORED
or BTR_PCUR_OLD_NOT_STORED */
- rec_t* old_rec; /* if cursor position is stored,
+ rec_t* old_rec; /*!< if cursor position is stored,
contains an initial segment of the
latest record cursor was positioned
either on, before, or after */
- ulint old_n_fields; /* number of fields in old_rec */
- ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
+ ulint old_n_fields; /*!< number of fields in old_rec */
+ ulint rel_pos; /*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the
old_rec record */
buf_block_t* block_when_stored;/* buffer block when the position was
- stored; note that if AWE is on, frames
- may move */
- dulint modify_clock; /* the modify clock value of the
+ stored */
+ ib_uint64_t modify_clock; /*!< the modify clock value of the
buffer block when the cursor position
was stored */
- ulint pos_state; /* see TODO note below!
+ ulint pos_state; /*!< see TODO note below!
BTR_PCUR_IS_POSITIONED,
BTR_PCUR_WAS_POSITIONED,
BTR_PCUR_NOT_POSITIONED */
- ulint search_mode; /* PAGE_CUR_G, ... */
- trx_t* trx_if_known; /* the transaction, if we know it;
+ ulint search_mode; /*!< PAGE_CUR_G, ... */
+ trx_t* trx_if_known; /*!< the transaction, if we know it;
otherwise this field is not defined;
can ONLY BE USED in error prints in
fatal assertion failures! */
@@ -486,12 +503,12 @@ struct btr_pcur_struct{
/* NOTE that the following fields may possess dynamically allocated
memory which should be freed if not needed anymore! */
- mtr_t* mtr; /* NULL, or this field may contain
+ mtr_t* mtr; /*!< NULL, or this field may contain
a mini-transaction which holds the
latch on the cursor page */
- byte* old_rec_buf; /* NULL, or a dynamically allocated
+ byte* old_rec_buf; /*!< NULL, or a dynamically allocated
buffer for old_rec */
- ulint buf_size; /* old_rec_buf size if old_rec_buf
+ ulint buf_size; /*!< old_rec_buf size if old_rec_buf
is not NULL */
};
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index 66462530716..0ca7223f861 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,20 +1,37 @@
-/******************************************************
-The index tree persistent cursor
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0pcur.ic
+The index tree persistent cursor
Created 2/23/1996 Heikki Tuuri
*******************************************************/
-/*************************************************************
-Gets the rel_pos field for a cursor whose position has been stored. */
+/*********************************************************//**
+Gets the rel_pos field for a cursor whose position has been stored.
+@return BTR_PCUR_ON, ... */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
/*=================*/
- /* out: BTR_PCUR_ON, ... */
- btr_pcur_t* cursor) /* in: persistent cursor */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor);
ut_ad(cursor->old_rec);
@@ -25,97 +42,112 @@ btr_pcur_get_rel_pos(
return(cursor->rel_pos);
}
-/*************************************************************
+/*********************************************************//**
Sets the mtr field for a pcur. */
UNIV_INLINE
void
btr_pcur_set_mtr(
/*=============*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in, own: mtr */
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr) /*!< in, own: mtr */
{
ut_ad(cursor);
cursor->mtr = mtr;
}
-/*************************************************************
-Gets the mtr field for a pcur. */
+/*********************************************************//**
+Gets the mtr field for a pcur.
+@return mtr */
UNIV_INLINE
mtr_t*
btr_pcur_get_mtr(
/*=============*/
- /* out: mtr */
- btr_pcur_t* cursor) /* in: persistent cursor */
+ btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor);
return(cursor->mtr);
}
-/*************************************************************
-Returns the btr cursor component of a persistent cursor. */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the btr cursor component of a persistent cursor.
+@return pointer to btr cursor component */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
/*=================*/
- /* out: pointer to btr cursor component */
- btr_pcur_t* cursor) /* in: persistent cursor */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
- return(&(cursor->btr_cur));
+ const btr_cur_t* btr_cur = &cursor->btr_cur;
+ return((btr_cur_t*) btr_cur);
}
-/*************************************************************
-Returns the page cursor component of a persistent cursor. */
+/*********************************************************//**
+Returns the page cursor component of a persistent cursor.
+@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
- /* out: pointer to page cursor component */
- btr_pcur_t* cursor) /* in: persistent cursor */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
- return(btr_cur_get_page_cur(&(cursor->btr_cur)));
+ return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
}
-
-/*************************************************************
-Returns the page of a persistent cursor. */
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the page of a persistent cursor.
+@return pointer to the page */
UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
- /* out: pointer to the page */
- btr_pcur_t* cursor) /* in: persistent cursor */
+ btr_pcur_t* cursor) /*!< in: persistent cursor */
+{
+ ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+ return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor)));
+}
+
+/*********************************************************//**
+Returns the buffer block of a persistent cursor.
+@return pointer to the block */
+UNIV_INLINE
+buf_block_t*
+btr_pcur_get_block(
+/*===============*/
+ btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- return(page_cur_get_page(btr_pcur_get_page_cur(cursor)));
+ return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor)));
}
-/*************************************************************
-Returns the record of a persistent cursor. */
+/*********************************************************//**
+Returns the record of a persistent cursor.
+@return pointer to the record */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
- /* out: pointer to the record */
- btr_pcur_t* cursor) /* in: persistent cursor */
+ btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- return(page_cur_get_rec(btr_pcur_get_page_cur(cursor)));
+ return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
}
-/******************************************************************
-Gets the up_match value for a pcur after a search. */
+/**************************************************************//**
+Gets the up_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_GE, otherwise undefined */
UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
- /* out: number of matched fields at the cursor
- or to the right if search mode was PAGE_CUR_GE,
- otherwise undefined */
- btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
+ btr_pcur_t* cursor) /*!< in: memory buffer for persistent cursor */
{
btr_cur_t* btr_cursor;
@@ -129,16 +161,15 @@ btr_pcur_get_up_match(
return(btr_cursor->up_match);
}
-/******************************************************************
-Gets the low_match value for a pcur after a search. */
+/**************************************************************//**
+Gets the low_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_LE, otherwise undefined */
UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
- /* out: number of matched fields at the cursor
- or to the right if search mode was PAGE_CUR_LE,
- otherwise undefined */
- btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
+ btr_pcur_t* cursor) /*!< in: memory buffer for persistent cursor */
{
btr_cur_t* btr_cursor;
@@ -151,54 +182,49 @@ btr_pcur_get_low_match(
return(btr_cursor->low_match);
}
-/*************************************************************
+/*********************************************************//**
Checks if the persistent cursor is after the last user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_on_page(
/*===========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
- UT_NOT_USED(mtr);
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
}
-/*************************************************************
+/*********************************************************//**
Checks if the persistent cursor is before the first user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_on_page(
/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
- UT_NOT_USED(mtr);
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
}
-/*************************************************************
+/*********************************************************//**
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
ibool
btr_pcur_is_on_user_rec(
/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- if ((btr_pcur_is_before_first_on_page(cursor, mtr))
- || (btr_pcur_is_after_last_on_page(cursor, mtr))) {
+ if (btr_pcur_is_before_first_on_page(cursor)
+ || btr_pcur_is_after_last_on_page(cursor)) {
return(FALSE);
}
@@ -206,15 +232,15 @@ btr_pcur_is_on_user_rec(
return(TRUE);
}
-/*************************************************************
+/*********************************************************//**
Checks if the persistent cursor is before the first user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_in_tree(
/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
@@ -227,15 +253,15 @@ btr_pcur_is_before_first_in_tree(
return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
}
-/*************************************************************
+/*********************************************************//**
Checks if the persistent cursor is after the last user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_in_tree(
/*===========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
@@ -248,16 +274,14 @@ btr_pcur_is_after_last_in_tree(
return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
}
-/*************************************************************
+/*********************************************************//**
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_next_on_page(
/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ btr_pcur_t* cursor) /*!< in/out: persistent cursor */
{
- UT_NOT_USED(mtr);
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
@@ -266,16 +290,14 @@ btr_pcur_move_to_next_on_page(
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
-/*************************************************************
+/*********************************************************//**
Moves the persistent cursor to the previous record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_prev_on_page(
/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ btr_pcur_t* cursor) /*!< in/out: persistent cursor */
{
- UT_NOT_USED(mtr);
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
@@ -284,42 +306,41 @@ btr_pcur_move_to_prev_on_page(
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
-/*************************************************************
+/*********************************************************//**
Moves the persistent cursor to the last record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_last_on_page(
/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ btr_pcur_t* cursor, /*!< in: persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
UT_NOT_USED(mtr);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- page_cur_set_after_last(buf_frame_align(btr_pcur_get_rec(cursor)),
+ page_cur_set_after_last(btr_pcur_get_block(cursor),
btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
-/*************************************************************
+/*********************************************************//**
Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'. */
+records are left, the cursor ends up 'after last in tree'.
+@return TRUE if the cursor moved forward, ending on a user record */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
/*===========================*/
- /* out: TRUE if the cursor moved forward,
- ending on a user record */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
loop:
- if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+ if (btr_pcur_is_after_last_on_page(cursor)) {
if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
@@ -328,10 +349,10 @@ loop:
btr_pcur_move_to_next_page(cursor, mtr);
} else {
- btr_pcur_move_to_next_on_page(cursor, mtr);
+ btr_pcur_move_to_next_on_page(cursor);
}
- if (btr_pcur_is_on_user_rec(cursor, mtr)) {
+ if (btr_pcur_is_on_user_rec(cursor)) {
return(TRUE);
}
@@ -339,25 +360,24 @@ loop:
goto loop;
}
-/*************************************************************
+/*********************************************************//**
Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'. */
+left, the cursor stays 'after last in tree'.
+@return TRUE if the cursor was not after last in tree */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
/*==================*/
- /* out: TRUE if the cursor was not after last
- in tree */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
+ btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
- if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+ if (btr_pcur_is_after_last_on_page(cursor)) {
if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
@@ -369,12 +389,12 @@ btr_pcur_move_to_next(
return(TRUE);
}
- btr_pcur_move_to_next_on_page(cursor, mtr);
+ btr_pcur_move_to_next_on_page(cursor);
return(TRUE);
}
-/******************************************************************
+/**************************************************************//**
Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
@@ -384,7 +404,7 @@ UNIV_INLINE
void
btr_pcur_commit(
/*============*/
- btr_pcur_t* pcur) /* in: persistent cursor */
+ btr_pcur_t* pcur) /*!< in: persistent cursor */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
@@ -395,14 +415,14 @@ btr_pcur_commit(
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
-/******************************************************************
+/**************************************************************//**
Differs from btr_pcur_commit in that we can specify the mtr to commit. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
- btr_pcur_t* pcur, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr to commit */
+ btr_pcur_t* pcur, /*!< in: persistent cursor */
+ mtr_t* mtr) /*!< in: mtr to commit */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
@@ -413,13 +433,13 @@ btr_pcur_commit_specify_mtr(
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
-/******************************************************************
+/**************************************************************//**
Sets the pcur latch mode to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_detach(
/*============*/
- btr_pcur_t* pcur) /* in: persistent cursor */
+ btr_pcur_t* pcur) /*!< in: persistent cursor */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
@@ -428,14 +448,14 @@ btr_pcur_detach(
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
-/******************************************************************
-Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+/**************************************************************//**
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES.
+@return TRUE if detached */
UNIV_INLINE
ibool
btr_pcur_is_detached(
/*=================*/
- /* out: TRUE if detached */
- btr_pcur_t* pcur) /* in: persistent cursor */
+ btr_pcur_t* pcur) /*!< in: persistent cursor */
{
if (pcur->latch_mode == BTR_NO_LATCHES) {
@@ -445,37 +465,37 @@ btr_pcur_is_detached(
return(FALSE);
}
-/******************************************************************
+/**************************************************************//**
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
void
btr_pcur_init(
/*==========*/
- btr_pcur_t* pcur) /* in: persistent cursor */
+ btr_pcur_t* pcur) /*!< in: persistent cursor */
{
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
pcur->old_rec_buf = NULL;
pcur->old_rec = NULL;
}
-/******************************************************************
+/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open(
/*==========*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ...;
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page from the
record! */
- ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
- mtr_t* mtr) /* in: mtr */
+ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
@@ -497,31 +517,31 @@ btr_pcur_open(
cursor->trx_if_known = NULL;
}
-/******************************************************************
+/**************************************************************//**
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init(
/*=======================*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ...;
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page of the
record! */
- ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...;
+ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
NOTE that if has_search_latch != 0 then
we maybe do not acquire a latch on the cursor
page, but assume that the caller uses his
btr search latch to protect the record! */
- btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
- ulint has_search_latch,/* in: latch mode the caller
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
@@ -541,19 +561,19 @@ btr_pcur_open_with_no_init(
cursor->trx_if_known = NULL;
}
-/*********************************************************************
+/*****************************************************************//**
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
- ibool from_left, /* in: TRUE if open to the low end,
+ ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: latch mode */
- btr_pcur_t* pcur, /* in: cursor */
- ibool do_init, /* in: TRUE if should be initialized */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index */
+ ulint latch_mode, /*!< in: latch mode */
+ btr_pcur_t* pcur, /*!< in: cursor */
+ ibool do_init, /*!< in: TRUE if should be initialized */
+ mtr_t* mtr) /*!< in: mtr */
{
pcur->latch_mode = latch_mode;
@@ -576,16 +596,16 @@ btr_pcur_open_at_index_side(
pcur->trx_if_known = NULL;
}
-/**************************************************************************
+/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos(
/*=====================*/
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in/out: B-tree pcur */
- mtr_t* mtr) /* in: mtr */
+ dict_index_t* index, /*!< in: index */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
+ mtr_t* mtr) /*!< in: mtr */
{
/* Initialize the cursor */
@@ -602,14 +622,14 @@ btr_pcur_open_at_rnd_pos(
cursor->trx_if_known = NULL;
}
-/******************************************************************
+/**************************************************************//**
Frees the possible memory heap of a persistent cursor and sets the latch
mode of the persistent cursor to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_close(
/*===========*/
- btr_pcur_t* cursor) /* in: persistent cursor */
+ btr_pcur_t* cursor) /*!< in: persistent cursor */
{
if (cursor->old_rec_buf != NULL) {
@@ -620,6 +640,7 @@ btr_pcur_close(
}
cursor->btr_cur.page_cur.rec = NULL;
+ cursor->btr_cur.page_cur.block = NULL;
cursor->old_rec = NULL;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index 6d1c2bb86d3..f98ba386f9c 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -1,7 +1,24 @@
-/************************************************************************
-The index tree adaptive search
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0sea.h
+The index tree adaptive search
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
@@ -17,195 +34,235 @@ Created 2/17/1996 Heikki Tuuri
#include "mtr0mtr.h"
#include "ha0ha.h"
-/*********************************************************************
+/*****************************************************************//**
Creates and initializes the adaptive search system at a database start. */
-
+UNIV_INTERN
void
btr_search_sys_create(
/*==================*/
- ulint hash_size); /* in: hash index hash table size */
-/************************************************************************
-Returns search info for an index. */
+ ulint hash_size); /*!< in: hash index hash table size */
+/*****************************************************************//**
+Frees the adaptive search system at a database shutdown. */
+UNIV_INTERN
+void
+btr_search_sys_free(void);
+/*=====================*/
+
+/********************************************************************//**
+Disable the adaptive hash search system and empty the index. */
+UNIV_INTERN
+void
+btr_search_disable(void);
+/*====================*/
+/********************************************************************//**
+Enable the adaptive hash search system. */
+UNIV_INTERN
+void
+btr_search_enable(void);
+/*====================*/
+
+/********************************************************************//**
+Returns search info for an index.
+@return search info; search mutex reserved */
UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
- /* out: search info; search mutex reserved */
- dict_index_t* index); /* in: index */
-/*********************************************************************
-Creates and initializes a search info struct. */
-
+ dict_index_t* index); /*!< in: index */
+/*****************************************************************//**
+Creates and initializes a search info struct.
+@return own: search info struct */
+UNIV_INTERN
btr_search_t*
btr_search_info_create(
/*===================*/
- /* out, own: search info struct */
- mem_heap_t* heap); /* in: heap where created */
-/*********************************************************************
+ mem_heap_t* heap); /*!< in: heap where created */
+/*****************************************************************//**
Returns the value of ref_count. The value is protected by
-btr_search_latch. */
+btr_search_latch.
+@return ref_count value. */
+UNIV_INTERN
ulint
btr_search_info_get_ref_count(
/*==========================*/
- /* out: ref_count value. */
- btr_search_t* info); /* in: search info. */
-/*************************************************************************
+ btr_search_t* info); /*!< in: search info. */
+/*********************************************************************//**
Updates the search info. */
UNIV_INLINE
void
btr_search_info_update(
/*===================*/
- dict_index_t* index, /* in: index of the cursor */
- btr_cur_t* cursor);/* in: cursor which was just positioned */
-/**********************************************************************
+ dict_index_t* index, /*!< in: index of the cursor */
+ btr_cur_t* cursor);/*!< in: cursor which was just positioned */
+/******************************************************************//**
Tries to guess the right search position based on the hash search info
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values. */
-
+both have sensible values.
+@return TRUE if succeeded */
+UNIV_INTERN
ibool
btr_search_guess_on_hash(
/*=====================*/
- /* out: TRUE if succeeded */
- dict_index_t* index, /* in: index */
- btr_search_t* info, /* in: index search info */
- dtuple_t* tuple, /* in: logical record */
- ulint mode, /* in: PAGE_CUR_L, ... */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /* out: tree cursor */
- ulint has_search_latch,/* in: latch mode the caller
+ dict_index_t* index, /*!< in: index */
+ btr_search_t* info, /*!< in: index search info */
+ const dtuple_t* tuple, /*!< in: logical record */
+ ulint mode, /*!< in: PAGE_CUR_L, ... */
+ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
+ btr_cur_t* cursor, /*!< out: tree cursor */
+ ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/********************************************************************//**
Moves or deletes hash entries for moved records. If new_page is already hashed,
then the hash index for page, if any, is dropped. If new_page is not hashed,
and page is hashed, then a new hash index is built to new_page with the same
parameters as page (this often happens when a page is split). */
-
+UNIV_INTERN
void
btr_search_move_or_delete_hash_entries(
/*===================================*/
- page_t* new_page, /* in: records are copied
+ buf_block_t* new_block, /*!< in: records are copied
to this page */
- page_t* page, /* in: index page */
- dict_index_t* index); /* in: record descriptor */
-/************************************************************************
+ buf_block_t* block, /*!< in: index page from which
+ records were copied, and the
+ copied records will be deleted
+ from this page */
+ dict_index_t* index); /*!< in: record descriptor */
+/********************************************************************//**
Drops a page hash index. */
-
+UNIV_INTERN
void
btr_search_drop_page_hash_index(
/*============================*/
- page_t* page); /* in: index page, s- or x-latched */
-/************************************************************************
+ buf_block_t* block); /*!< in: block containing index page,
+ s- or x-latched, or an index page
+ for which we know that
+ block->buf_fix_count == 0 */
+/********************************************************************//**
Drops a page hash index when a page is freed from a fseg to the file system.
Drops possible hash index if the page happens to be in the buffer pool. */
-
+UNIV_INTERN
void
btr_search_drop_page_hash_when_freed(
/*=================================*/
- ulint space, /* in: space id */
- ulint page_no); /* in: page number */
-/************************************************************************
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no); /*!< in: page number */
+/********************************************************************//**
Updates the page hash index when a single record is inserted on a page. */
-
+UNIV_INTERN
void
btr_search_update_hash_node_on_insert(
/*==================================*/
- btr_cur_t* cursor);/* in: cursor which was positioned to the
+ btr_cur_t* cursor);/*!< in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
-/************************************************************************
+/********************************************************************//**
Updates the page hash index when a single record is inserted on a page. */
-
+UNIV_INTERN
void
btr_search_update_hash_on_insert(
/*=============================*/
- btr_cur_t* cursor);/* in: cursor which was positioned to the
+ btr_cur_t* cursor);/*!< in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
-/************************************************************************
+/********************************************************************//**
Updates the page hash index when a single record is deleted from a page. */
-
+UNIV_INTERN
void
btr_search_update_hash_on_delete(
/*=============================*/
- btr_cur_t* cursor);/* in: cursor which was positioned on the
+ btr_cur_t* cursor);/*!< in: cursor which was positioned on the
record to delete using btr_cur_search_...,
the record is not yet deleted */
-/************************************************************************
-Validates the search system. */
-
+/********************************************************************//**
+Validates the search system.
+@return TRUE if ok */
+UNIV_INTERN
ibool
btr_search_validate(void);
/*======================*/
- /* out: TRUE if ok */
-/* The search info struct in an index */
+/** Flag: has the search system been enabled?
+Protected by btr_search_latch and btr_search_enabled_mutex. */
+extern char btr_search_enabled;
+/** The search info struct in an index */
struct btr_search_struct{
- ulint ref_count; /* Number of blocks in this index tree
+ ulint ref_count; /*!< Number of blocks in this index tree
that have search index built
i.e. block->index points to this index.
Protected by btr_search_latch except
when during initialization in
btr_search_info_create(). */
- /* The following fields are not protected by any latch.
+ /* @{ The following fields are not protected by any latch.
Unfortunately, this means that they must be aligned to
the machine word, i.e., they cannot be turned into bit-fields. */
- page_t* root_guess; /* the root page frame when it was last time
+ buf_block_t* root_guess;/*!< the root page frame when it was last time
fetched, or NULL */
- ulint hash_analysis; /* when this exceeds BTR_SEARCH_HASH_ANALYSIS,
- the hash analysis starts; this is reset if no
+ ulint hash_analysis; /*!< when this exceeds
+ BTR_SEARCH_HASH_ANALYSIS, the hash
+ analysis starts; this is reset if no
success noticed */
- ibool last_hash_succ; /* TRUE if the last search would have
+ ibool last_hash_succ; /*!< TRUE if the last search would have
succeeded, or did succeed, using the hash
index; NOTE that the value here is not exact:
it is not calculated for every search, and the
calculation itself is not always accurate! */
ulint n_hash_potential;
- /* number of consecutive searches
+ /*!< number of consecutive searches
which would have succeeded, or did succeed,
using the hash index;
the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
- /*----------------------*/
- ulint n_fields; /* recommended prefix length for hash search:
+ /* @} */
+ /*---------------------- @{ */
+ ulint n_fields; /*!< recommended prefix length for hash search:
number of full fields */
- ulint n_bytes; /* recommended prefix: number of bytes in
- an incomplete field;
- see also BTR_PAGE_MAX_REC_SIZE */
- ibool left_side; /* TRUE or FALSE, depending on whether
+ ulint n_bytes; /*!< recommended prefix: number of bytes in
+ an incomplete field
+ @see BTR_PAGE_MAX_REC_SIZE */
+ ibool left_side; /*!< TRUE or FALSE, depending on whether
the leftmost record of several records with
the same prefix should be indexed in the
hash index */
- /*----------------------*/
+ /*---------------------- @} */
#ifdef UNIV_SEARCH_PERF_STAT
- ulint n_hash_succ; /* number of successful hash searches thus
+ ulint n_hash_succ; /*!< number of successful hash searches thus
far */
- ulint n_hash_fail; /* number of failed hash searches */
- ulint n_patt_succ; /* number of successful pattern searches thus
+ ulint n_hash_fail; /*!< number of failed hash searches */
+ ulint n_patt_succ; /*!< number of successful pattern searches thus
far */
- ulint n_searches; /* number of searches */
+ ulint n_searches; /*!< number of searches */
#endif /* UNIV_SEARCH_PERF_STAT */
#ifdef UNIV_DEBUG
- ulint magic_n; /* magic number */
+ ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */
+/** value of btr_search_struct::magic_n, used in assertions */
# define BTR_SEARCH_MAGIC_N 1112765
#endif /* UNIV_DEBUG */
};
-/* The hash index system */
-
+/** The hash index system */
typedef struct btr_search_sys_struct btr_search_sys_t;
+/** The hash index system */
struct btr_search_sys_struct{
- hash_table_t* hash_index;
+ hash_table_t* hash_index; /*!< the adaptive hash index,
+ mapping dtuple_fold values
+ to rec_t pointers on index pages */
};
+/** The adaptive hash index */
extern btr_search_sys_t* btr_search_sys;
-/* The latch protecting the adaptive search system: this latch protects the
+/** @brief The latch protecting the adaptive search system
+
+This latch protects the
(1) hash index;
(2) columns of a record to which we have a pointer in the hash index;
@@ -216,36 +273,34 @@ but does NOT protect:
Bear in mind (3) and (4) when using the hash index.
*/
-
extern rw_lock_t* btr_search_latch_temp;
+/** The latch protecting the adaptive search system */
#define btr_search_latch (*btr_search_latch_temp)
#ifdef UNIV_SEARCH_PERF_STAT
+/** Number of successful adaptive hash index lookups */
extern ulint btr_search_n_succ;
+/** Number of failed adaptive hash index lookups */
extern ulint btr_search_n_hash_fail;
#endif /* UNIV_SEARCH_PERF_STAT */
-/* After change in n_fields or n_bytes in info, this many rounds are waited
+/** After change in n_fields or n_bytes in info, this many rounds are waited
before starting the hash analysis again: this is to save CPU time when there
is no hope in building a hash index. */
-
#define BTR_SEARCH_HASH_ANALYSIS 17
-/* Limit of consecutive searches for trying a search shortcut on the search
+/** Limit of consecutive searches for trying a search shortcut on the search
pattern */
-
#define BTR_SEARCH_ON_PATTERN_LIMIT 3
-/* Limit of consecutive searches for trying a search shortcut using the hash
-index */
-
+/** Limit of consecutive searches for trying a search shortcut using
+the hash index */
#define BTR_SEARCH_ON_HASH_LIMIT 3
-/* We do this many searches before trying to keep the search latch over calls
-from MySQL. If we notice someone waiting for the latch, we again set this
-much timeout. This is to reduce contention. */
-
+/** We do this many searches before trying to keep the search latch
+over calls from MySQL. If we notice someone waiting for the latch, we
+again set this much timeout. This is to reduce contention. */
#define BTR_SEA_TIMEOUT 10000
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index f4e33027c25..beadeeb8d02 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -1,7 +1,24 @@
-/************************************************************************
-The index tree adaptive search
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0sea.ic
+The index tree adaptive search
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
@@ -10,37 +27,37 @@ Created 2/17/1996 Heikki Tuuri
#include "btr0cur.h"
#include "buf0buf.h"
-/*************************************************************************
+/*********************************************************************//**
Updates the search info. */
-
+UNIV_INTERN
void
btr_search_info_update_slow(
/*========================*/
- btr_search_t* info, /* in/out: search info */
- btr_cur_t* cursor);/* in: cursor which was just positioned */
+ btr_search_t* info, /*!< in/out: search info */
+ btr_cur_t* cursor);/*!< in: cursor which was just positioned */
-/************************************************************************
-Returns search info for an index. */
+/********************************************************************//**
+Returns search info for an index.
+@return search info; search mutex reserved */
UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
- /* out: search info; search mutex reserved */
- dict_index_t* index) /* in: index */
+ dict_index_t* index) /*!< in: index */
{
ut_ad(index);
return(index->search_info);
}
-/*************************************************************************
+/*********************************************************************//**
Updates the search info. */
UNIV_INLINE
void
btr_search_info_update(
/*===================*/
- dict_index_t* index, /* in: index of the cursor */
- btr_cur_t* cursor) /* in: cursor which was just positioned */
+ dict_index_t* index, /*!< in: index of the cursor */
+ btr_cur_t* cursor) /*!< in: cursor which was just positioned */
{
btr_search_t* info;
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 8fa0bf0602d..ef4a6b04b34 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -1,7 +1,24 @@
-/************************************************************************
-The index tree general types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0types.h
+The index tree general types
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
@@ -14,8 +31,21 @@ Created 2/17/1996 Heikki Tuuri
#include "rem0types.h"
#include "page0types.h"
+/** Persistent cursor */
typedef struct btr_pcur_struct btr_pcur_t;
+/** B-tree cursor */
typedef struct btr_cur_struct btr_cur_t;
+/** B-tree search information for the adaptive hash index */
typedef struct btr_search_struct btr_search_t;
+/** The size of a reference to data stored on a different page.
+The reference is stored at the end of the prefix of the field
+in the index record. */
+#define BTR_EXTERN_FIELD_REF_SIZE 20
+
+/** A BLOB field reference full of zero, for use in assertions and tests.
+Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+
#endif
diff --git a/storage/innodb_plugin/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index 7648950d5d1..7648950d5d1 100644
--- a/storage/innodb_plugin/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
diff --git a/storage/innodb_plugin/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
index c419a2374d9..c419a2374d9 100644
--- a/storage/innodb_plugin/include/buf0buddy.ic
+++ b/storage/innobase/include/buf0buddy.ic
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 3e8972d9182..927ff893e39 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,21 +1,24 @@
-/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
+/*****************************************************************************
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License 2
- as published by the Free Software Foundation in June 1991.
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
- You should have received a copy of the GNU General Public License 2
- along with this program (in file COPYING); if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-/******************************************************
-The database buffer pool high-level routines
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buf.h
+The database buffer pool high-level routines
Created 11/5/1995 Heikki Tuuri
*******************************************************/
@@ -27,621 +30,925 @@ Created 11/5/1995 Heikki Tuuri
#include "fil0fil.h"
#include "mtr0types.h"
#include "buf0types.h"
-#include "sync0rw.h"
#include "hash0hash.h"
#include "ut0byte.h"
+#include "page0types.h"
+#ifndef UNIV_HOTBACKUP
#include "os0proc.h"
-/* Flags for flush types */
-#define BUF_FLUSH_LRU 1
-#define BUF_FLUSH_SINGLE_PAGE 2
-#define BUF_FLUSH_LIST 3 /* An array in the pool struct
- has size BUF_FLUSH_LIST + 1: if you
- add more flush types, put them in
- the middle! */
-/* Modes for buf_page_get_gen */
-#define BUF_GET 10 /* get always */
-#define BUF_GET_IF_IN_POOL 11 /* get if in pool */
-#define BUF_GET_NOWAIT 12 /* get if can set the latch without
- waiting */
-#define BUF_GET_NO_LATCH 14 /* get and bufferfix, but set no latch;
- we have separated this case, because
- it is error-prone programming not to
- set a latch, and it should be used
- with care */
-/* Modes for buf_page_get_known_nowait */
-#define BUF_MAKE_YOUNG 51
-#define BUF_KEEP_OLD 52
-/* Magic value to use instead of checksums when they are disabled */
-#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-extern buf_pool_t* buf_pool; /* The buffer pool of the database */
+/** @name Modes for buf_page_get_gen */
+/* @{ */
+#define BUF_GET 10 /*!< get always */
+#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
+#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
+ set no latch; we have
+ separated this case, because
+ it is error-prone programming
+ not to set a latch, and it
+ should be used with care */
+/* @} */
+/** @name Modes for buf_page_get_known_nowait */
+/* @{ */
+#define BUF_MAKE_YOUNG 51 /*!< Move the block to the
+ start of the LRU list if there
+ is a danger that the block
+ would drift out of the buffer
+ pool*/
+#define BUF_KEEP_OLD 52 /*!< Preserve the current LRU
+ position of the block. */
+/* @} */
+
+extern buf_pool_t* buf_pool; /*!< The buffer pool of the database */
#ifdef UNIV_DEBUG
-extern ibool buf_debug_prints;/* If this is set TRUE, the program
+extern ibool buf_debug_prints;/*!< If this is set TRUE, the program
prints info whenever read or flush
occurs */
#endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_write_requests; /* variable to count write request
+extern ulint srv_buf_pool_write_requests; /*!< variable to count write request
issued */
+#else /* !UNIV_HOTBACKUP */
+extern buf_block_t* back_block1; /*!< first block, for --apply-log */
+extern buf_block_t* back_block2; /*!< second block, for page reorganize */
+#endif /* !UNIV_HOTBACKUP */
+
+/** Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-/************************************************************************
-Creates the buffer pool. */
+/** @brief States of a control block
+@see buf_page_struct
+
+The enumeration values must be 0..7. */
+enum buf_page_state {
+ BUF_BLOCK_ZIP_FREE = 0, /*!< contains a free
+ compressed page */
+ BUF_BLOCK_ZIP_PAGE, /*!< contains a clean
+ compressed page */
+ BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed
+ page that is in the
+ buf_pool->flush_list */
+
+ BUF_BLOCK_NOT_USED, /*!< is in the free list;
+ must be after the BUF_BLOCK_ZIP_
+ constants for compressed-only pages
+ @see buf_block_state_valid() */
+ BUF_BLOCK_READY_FOR_USE, /*!< when buf_LRU_get_free_block
+ returns a block, it is in this state */
+ BUF_BLOCK_FILE_PAGE, /*!< contains a buffered file page */
+ BUF_BLOCK_MEMORY, /*!< contains some main memory
+ object */
+ BUF_BLOCK_REMOVE_HASH /*!< hash index should be removed
+ before putting to the free list */
+};
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Creates the buffer pool.
+@return own: buf_pool object, NULL if not enough memory or error */
+UNIV_INTERN
buf_pool_t*
-buf_pool_init(
-/*==========*/
- /* out, own: buf_pool object, NULL if not
- enough memory or error */
- ulint max_size, /* in: maximum size of the buf_pool in
- blocks */
- ulint curr_size, /* in: current size to use, must be <=
- max_size, currently must be equal to
- max_size */
- ulint n_frames); /* in: number of frames; if AWE is used,
- this is the size of the address space window
- where physical memory pages are mapped; if
- AWE is not used then this must be the same
- as max_size */
-/*************************************************************************
-Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
+buf_pool_init(void);
+/*===============*/
+/********************************************************************//**
+Frees the buffer pool at shutdown. This must not be invoked before
+freeing all mutexes. */
+UNIV_INTERN
+void
+buf_pool_free(void);
+/*===============*/
+
+/********************************************************************//**
+Drops the adaptive hash index. To prevent a livelock, this function
+is only to be called while holding btr_search_latch and while
+btr_search_enabled == FALSE. */
+UNIV_INTERN
+void
+buf_pool_drop_hash_index(void);
+/*==========================*/
+
+/********************************************************************//**
+Relocate a buffer control block. Relocates the block on the LRU list
+and in buf_pool->page_hash. Does not relocate bpage->list.
+The caller must take care of relocating bpage->list. */
+UNIV_INTERN
+void
+buf_relocate(
+/*=========*/
+ buf_page_t* bpage, /*!< in/out: control block being relocated;
+ buf_page_get_state(bpage) must be
+ BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
+ buf_page_t* dpage) /*!< in/out: destination control block */
+ __attribute__((nonnull));
+/********************************************************************//**
+Resizes the buffer pool. */
+UNIV_INTERN
+void
+buf_pool_resize(void);
+/*=================*/
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in bytes.
+@return size in bytes */
UNIV_INLINE
ulint
buf_pool_get_curr_size(void);
/*========================*/
- /* out: size in bytes */
-/*************************************************************************
-Gets the maximum size of buffer pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_max_size(void);
-/*=======================*/
- /* out: size in bytes */
-/************************************************************************
+/********************************************************************//**
Gets the smallest oldest_modification lsn for any page in the pool. Returns
-ut_dulint_zero if all modified pages have been flushed to disk. */
+zero if all modified pages have been flushed to disk.
+@return oldest modification in pool, zero if none */
UNIV_INLINE
-dulint
+ib_uint64_t
buf_pool_get_oldest_modification(void);
/*==================================*/
- /* out: oldest modification in pool,
- ut_dulint_zero if none */
-/*************************************************************************
-Allocates a buffer frame. */
-
-buf_frame_t*
-buf_frame_alloc(void);
-/*==================*/
- /* out: buffer frame */
-/*************************************************************************
-Frees a buffer frame which does not contain a file page. */
-
+/********************************************************************//**
+Allocates a buffer block.
+@return own: the allocated block, in state BUF_BLOCK_MEMORY */
+UNIV_INLINE
+buf_block_t*
+buf_block_alloc(
+/*============*/
+ ulint zip_size); /*!< in: compressed page size in bytes,
+ or 0 if uncompressed tablespace */
+/********************************************************************//**
+Frees a buffer block which does not contain a file page. */
+UNIV_INLINE
void
-buf_frame_free(
+buf_block_free(
/*===========*/
- buf_frame_t* frame); /* in: buffer frame */
-/*************************************************************************
-Copies contents of a buffer frame to a given buffer. */
+ buf_block_t* block); /*!< in, own: block to be freed */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Copies contents of a buffer frame to a given buffer.
+@return buf */
UNIV_INLINE
byte*
buf_frame_copy(
/*===========*/
- /* out: buf */
- byte* buf, /* in: buffer to copy to */
- buf_frame_t* frame); /* in: buffer frame */
-/******************************************************************
+ byte* buf, /*!< in: buffer to copy to */
+ const buf_frame_t* frame); /*!< in: buffer frame */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
NOTE! The following macros should be used instead of buf_page_get_gen,
to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
in LA! */
-#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\
- SP, OF, LA, NULL,\
+#define buf_page_get(SP, ZS, OF, LA, MTR) buf_page_get_gen(\
+ SP, ZS, OF, LA, NULL,\
BUF_GET, __FILE__, __LINE__, MTR)
-/******************************************************************
+/**************************************************************//**
Use these macros to bufferfix a page with no latching. Remember not to
read the contents of the page unless you know it is safe. Do not modify
the contents of the page! We have separated this case, because it is
error-prone programming not to set a latch, and it should be used
with care. */
-#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\
- SP, OF, RW_NO_LATCH, NULL,\
+#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
+ SP, ZS, OF, RW_NO_LATCH, NULL,\
BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
-/******************************************************************
-NOTE! The following macros should be used instead of buf_page_get_gen, to
-improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
-#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\
- SP, OF, LA, NULL,\
- BUF_GET_NOWAIT, __FILE__, __LINE__, MTR)
-/******************************************************************
+/**************************************************************//**
NOTE! The following macros should be used instead of
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
RW_X_LATCH are allowed as LA! */
-#define buf_page_optimistic_get(LA, BL, G, MC, MTR) \
- buf_page_optimistic_get_func(LA, BL, G, MC, __FILE__, __LINE__, MTR)
-/************************************************************************
+#define buf_page_optimistic_get(LA, BL, MC, MTR) \
+ buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR)
+/********************************************************************//**
This is the general function used to get optimistic access to a database
-page. */
-
+page.
+@return TRUE if success */
+UNIV_INTERN
ibool
buf_page_optimistic_get_func(
/*=========================*/
- /* out: TRUE if success */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /* in: guessed block */
- buf_frame_t* guess, /* in: guessed frame; note that AWE may move
- frames */
- dulint modify_clock,/* in: modify clock value if mode is
+ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+ buf_block_t* block, /*!< in: guessed block */
+ ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
..._GUESS_ON_CLOCK */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr); /* in: mini-transaction */
-/************************************************************************
-Tries to get the page, but if file io is required, releases all latches
-in mtr down to the given savepoint. If io is required, this function
-retrieves the page to buffer buf_pool, but does not bufferfix it or latch
-it. */
-UNIV_INLINE
-buf_frame_t*
-buf_page_get_release_on_io(
-/*=======================*/
- /* out: pointer to the frame, or NULL
- if not in buffer buf_pool */
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
- in units of a page */
- buf_frame_t* guess, /* in: guessed frame or NULL */
- ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH,
- or RW_NO_LATCH */
- ulint savepoint, /* in: mtr savepoint */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr); /*!< in: mini-transaction */
+/********************************************************************//**
This is used to get access to a known database page, when no waiting can be
-done. */
-
+done.
+@return TRUE if success */
+UNIV_INTERN
ibool
buf_page_get_known_nowait(
/*======================*/
- /* out: TRUE if success */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_frame_t* guess, /* in: the known page frame */
- ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr); /* in: mini-transaction */
-/************************************************************************
-This is the general function used to get access to a database page. */
-
-buf_frame_t*
+ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+ buf_block_t* block, /*!< in: the known page */
+ ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr); /*!< in: mini-transaction */
+
+/*******************************************************************//**
+Given a tablespace id and page number tries to get that page. If the
+page is not in the buffer pool it is not loaded and NULL is returned.
+Suitable for using when holding the kernel mutex. */
+UNIV_INTERN
+const buf_block_t*
+buf_page_try_get_func(
+/*==================*/
+ ulint space_id,/*!< in: tablespace id */
+ ulint page_no,/*!< in: page number */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr); /*!< in: mini-transaction */
+
+/** Tries to get a page. If the page is not in the buffer pool it is
+not loaded. Suitable for using when holding the kernel mutex.
+@param space_id in: tablespace id
+@param page_no in: page number
+@param mtr in: mini-transaction
+@return the page if in buffer pool, NULL if not */
+#define buf_page_try_get(space_id, page_no, mtr) \
+ buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr);
+
+/********************************************************************//**
+Get read access to a compressed page (usually of type
+FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
+The page must be released with buf_page_release_zip().
+NOTE: the page is not protected by any latch. Mutual exclusion has to
+be implemented at a higher level. In other words, all possible
+accesses to a given page through this function must be protected by
+the same set of mutexes or latches.
+@return pointer to the block, or NULL if not compressed */
+UNIV_INTERN
+buf_page_t*
+buf_page_get_zip(
+/*=============*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size */
+ ulint offset);/*!< in: page number */
+/********************************************************************//**
+This is the general function used to get access to a database page.
+@return pointer to the block or NULL */
+UNIV_INTERN
+buf_block_t*
buf_page_get_gen(
/*=============*/
- /* out: pointer to the frame or NULL */
- ulint space, /* in: space id */
- ulint offset, /* in: page number */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_frame_t* guess, /* in: guessed frame or NULL */
- ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint offset, /*!< in: page number */
+ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+ buf_block_t* guess, /*!< in: guessed block or NULL */
+ ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr); /* in: mini-transaction */
-/************************************************************************
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr); /*!< in: mini-transaction */
+/********************************************************************//**
Initializes a page to the buffer buf_pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_init_for_read above). */
-
-buf_frame_t*
+FILE_PAGE (the other is buf_page_get_gen).
+@return pointer to the block, page bufferfixed */
+UNIV_INTERN
+buf_block_t*
buf_page_create(
/*============*/
- /* out: pointer to the frame, page bufferfixed */
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space in units of
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page within space in units of
a page */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ ulint zip_size,/*!< in: compressed page size, or 0 */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+#else /* !UNIV_HOTBACKUP */
+/********************************************************************//**
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
-
+UNIV_INTERN
void
buf_page_init_for_backup_restore(
/*=============================*/
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page within space
in units of a page */
- buf_block_t* block); /* in: block to init */
-/************************************************************************
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ buf_block_t* block); /*!< in: block to init */
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Releases a compressed-only page acquired with buf_page_get_zip(). */
+UNIV_INLINE
+void
+buf_page_release_zip(
+/*=================*/
+ buf_page_t* bpage); /*!< in: buffer block */
+/********************************************************************//**
Decrements the bufferfix count of a buffer control block and releases
a latch, if specified. */
UNIV_INLINE
void
buf_page_release(
/*=============*/
- buf_block_t* block, /* in: buffer block */
- ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH,
+ buf_block_t* block, /*!< in: buffer block */
+ ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/********************************************************************//**
Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from from slipping out of
+function can be used to prevent an important page from slipping out of
the buffer pool. */
-
+UNIV_INTERN
void
buf_page_make_young(
/*================*/
- buf_frame_t* frame); /* in: buffer frame of a file page */
-/************************************************************************
-Returns TRUE if the page can be found in the buffer pool hash table. NOTE
-that it is possible that the page is not yet read from disk, though. */
+ buf_page_t* bpage); /*!< in: buffer block of a file page */
+/********************************************************************//**
+Returns TRUE if the page can be found in the buffer pool hash table.
+NOTE that it is possible that the page is not yet read from disk,
+though.
+
+@return TRUE if found in the page hash table */
+UNIV_INLINE
ibool
buf_page_peek(
/*==========*/
- /* out: TRUE if found from page hash table,
- NOTE that the page is not necessarily yet read
- from disk! */
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
-Returns the buffer control block if the page can be found in the buffer
-pool. NOTE that it is possible that the page is not yet read
-from disk, though. This is a very low-level function: use with care! */
-
-buf_block_t*
-buf_page_peek_block(
-/*================*/
- /* out: control block if found from page hash table,
- otherwise NULL; NOTE that the page is not necessarily
- yet read from disk! */
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
+ ulint space, /*!< in: space id */
+ ulint offset);/*!< in: page number */
+/********************************************************************//**
Resets the check_index_page_at_flush field of a page if found in the buffer
pool. */
-
+UNIV_INTERN
void
buf_reset_check_index_page_at_flush(
/*================================*/
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
+ ulint space, /*!< in: space id */
+ ulint offset);/*!< in: page number */
+#ifdef UNIV_DEBUG_FILE_ACCESSES
+/********************************************************************//**
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
+reallocated.
+@return control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
buf_page_set_file_page_was_freed(
/*=============================*/
- /* out: control block if found from page hash table,
- otherwise NULL */
- ulint space, /* in: space id */
- ulint offset); /* in: page number */
-/************************************************************************
+ ulint space, /*!< in: space id */
+ ulint offset);/*!< in: page number */
+/********************************************************************//**
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
+reallocated.
+@return control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
buf_page_reset_file_page_was_freed(
/*===============================*/
- /* out: control block if found from page hash table,
- otherwise NULL */
- ulint space, /* in: space id */
- ulint offset); /* in: page number */
-/************************************************************************
+ ulint space, /*!< in: space id */
+ ulint offset); /*!< in: page number */
+#endif /* UNIV_DEBUG_FILE_ACCESSES */
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return freed_page_clock */
+UNIV_INLINE
+ulint
+buf_page_get_freed_page_clock(
+/*==========================*/
+ const buf_page_t* bpage) /*!< in: block */
+ __attribute__((pure));
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return freed_page_clock */
+UNIV_INLINE
+ulint
+buf_block_get_freed_page_clock(
+/*===========================*/
+ const buf_block_t* block) /*!< in: block */
+ __attribute__((pure));
+
+/********************************************************************//**
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex. */
+mutex.
+@return TRUE if should be made younger */
UNIV_INLINE
ibool
-buf_block_peek_if_too_old(
-/*======================*/
- /* out: TRUE if should be made younger */
- buf_block_t* block); /* in: block to make younger */
-/************************************************************************
+buf_page_peek_if_too_old(
+/*=====================*/
+ const buf_page_t* bpage); /*!< in: block to make younger */
+/********************************************************************//**
Returns the current state of is_hashed of a page. FALSE if the page is
not in the pool. NOTE that this operation does not fix the page in the
-pool if it is found there. */
-
+pool if it is found there.
+@return TRUE if page hash index is built in search system */
+UNIV_INTERN
ibool
buf_page_peek_if_search_hashed(
/*===========================*/
- /* out: TRUE if page hash index is built in search
- system */
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
+ ulint space, /*!< in: space id */
+ ulint offset);/*!< in: page number */
+/********************************************************************//**
Gets the youngest modification log sequence number for a frame.
-Returns zero if not file page or no modification occurred yet. */
-UNIV_INLINE
-dulint
-buf_frame_get_newest_modification(
-/*==============================*/
- /* out: newest modification to page */
- buf_frame_t* frame); /* in: pointer to a frame */
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
+Returns zero if not file page or no modification occurred yet.
+@return newest modification to page */
UNIV_INLINE
-dulint
-buf_frame_modify_clock_inc(
-/*=======================*/
- /* out: new value */
- buf_frame_t* frame); /* in: pointer to a frame */
-/************************************************************************
+ib_uint64_t
+buf_page_get_newest_modification(
+/*=============================*/
+ const buf_page_t* bpage); /*!< in: block containing the
+ page frame */
+/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
-dulint
+void
buf_block_modify_clock_inc(
/*=======================*/
- /* out: new value */
- buf_block_t* block); /* in: block */
-/************************************************************************
+ buf_block_t* block); /*!< in: block */
+/********************************************************************//**
Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block. */
+or x-lock on the block.
+@return value */
UNIV_INLINE
-dulint
+ib_uint64_t
buf_block_get_modify_clock(
/*=======================*/
- /* out: value */
- buf_block_t* block); /* in: block */
-/************************************************************************
+ buf_block_t* block); /*!< in: block */
+#else /* !UNIV_HOTBACKUP */
+# define buf_block_modify_clock_inc(block) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
Calculates a page checksum which is stored to the page when it is written
to a file. Note that we must be careful to calculate the same value
-on 32-bit and 64-bit architectures. */
-
+on 32-bit and 64-bit architectures.
+@return checksum */
+UNIV_INTERN
ulint
buf_calc_page_new_checksum(
/*=======================*/
- /* out: checksum */
- byte* page); /* in: buffer page */
-/************************************************************************
+ const byte* page); /*!< in: buffer page */
+/********************************************************************//**
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
looked at the first few bytes of the page. This calculates that old
checksum.
NOTE: we must first store the new formula checksum to
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input! */
-
+because this takes that field as an input!
+@return checksum */
+UNIV_INTERN
ulint
buf_calc_page_old_checksum(
/*=======================*/
- /* out: checksum */
- byte* page); /* in: buffer page */
-/************************************************************************
-Checks if a page is corrupt. */
-
+ const byte* page); /*!< in: buffer page */
+/********************************************************************//**
+Checks if a page is corrupt.
+@return TRUE if corrupted */
+UNIV_INTERN
ibool
buf_page_is_corrupted(
/*==================*/
- /* out: TRUE if corrupted */
- byte* read_buf); /* in: a database page */
-/**************************************************************************
-Gets the page number of a pointer pointing within a buffer frame containing
-a file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_page_no(
-/*==================*/
- /* out: page number */
- byte* ptr); /* in: pointer to within a buffer frame */
-/**************************************************************************
-Gets the space id of a pointer pointing within a buffer frame containing a
-file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_space_id(
-/*===================*/
- /* out: space id */
- byte* ptr); /* in: pointer to within a buffer frame */
-/**************************************************************************
+ const byte* read_buf, /*!< in: a database page */
+ ulint zip_size); /*!< in: size of compressed page;
+ 0 for uncompressed pages */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
pointer pointing to a buffer frame containing a file page. */
UNIV_INLINE
void
buf_ptr_get_fsp_addr(
/*=================*/
- byte* ptr, /* in: pointer to a buffer frame */
- ulint* space, /* out: space id */
- fil_addr_t* addr); /* out: page offset and byte offset */
-/**************************************************************************
-Gets the hash value of the page the pointer is pointing to. This can be used
-in searches in the lock hash table. */
+ const void* ptr, /*!< in: pointer to a buffer frame */
+ ulint* space, /*!< out: space id */
+ fil_addr_t* addr); /*!< out: page offset and byte offset */
+/**********************************************************************//**
+Gets the hash value of a block. This can be used in searches in the
+lock hash table.
+@return lock hash value */
UNIV_INLINE
ulint
-buf_frame_get_lock_hash_val(
+buf_block_get_lock_hash_val(
/*========================*/
- /* out: lock hash value */
- byte* ptr); /* in: pointer to within a buffer frame */
-/**************************************************************************
-Gets the mutex number protecting the page record lock hash chain in the lock
-table. */
-UNIV_INLINE
-mutex_t*
-buf_frame_get_mutex(
-/*================*/
- /* out: mutex */
- byte* ptr); /* in: pointer to within a buffer frame */
-/***********************************************************************
-Gets the frame the pointer is pointing to. */
-UNIV_INLINE
-buf_frame_t*
-buf_frame_align(
-/*============*/
- /* out: pointer to frame */
- byte* ptr); /* in: pointer to a frame */
-/***********************************************************************
-Checks if a pointer points to the block array of the buffer pool (blocks, not
-the frames). */
-UNIV_INLINE
-ibool
-buf_pool_is_block(
-/*==============*/
- /* out: TRUE if pointer to block */
- void* ptr); /* in: pointer to memory */
+ const buf_block_t* block) /*!< in: block */
+ __attribute__((pure));
#ifdef UNIV_DEBUG
-/*************************************************************************
-Validates the buffer pool data structure. */
-
+/*********************************************************************//**
+Finds a block in the buffer pool that points to a
+given compressed page.
+@return buffer block pointing to the compressed page, or NULL */
+UNIV_INTERN
+buf_block_t*
+buf_pool_contains_zip(
+/*==================*/
+ const void* data); /*!< in: pointer to compressed page */
+#endif /* UNIV_DEBUG */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
+Validates the buffer pool data structure.
+@return TRUE */
+UNIV_INTERN
ibool
buf_validate(void);
/*==============*/
-/*************************************************************************
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
Prints info of the buffer pool data structure. */
-
+UNIV_INTERN
void
buf_print(void);
/*============*/
-
-/*************************************************************************
-Returns the number of latched pages in the buffer pool. */
-
-ulint
-buf_get_latched_pages_number(void);
-/*==============================*/
-#endif /* UNIV_DEBUG */
-
-/************************************************************************
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
Prints a page to stderr. */
-
+UNIV_INTERN
void
buf_page_print(
/*===========*/
- byte* read_buf); /* in: a database page */
-
-/*************************************************************************
-Returns the number of pending buf pool ios. */
-
+ const byte* read_buf, /*!< in: a database page */
+ ulint zip_size); /*!< in: compressed page size, or
+ 0 for uncompressed pages */
+/********************************************************************//**
+Decompress a block.
+@return TRUE if successful */
+UNIV_INTERN
+ibool
+buf_zip_decompress(
+/*===============*/
+ buf_block_t* block, /*!< in/out: block */
+ ibool check); /*!< in: TRUE=verify the page checksum */
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the number of latched pages in the buffer pool.
+@return number of latched pages */
+UNIV_INTERN
+ulint
+buf_get_latched_pages_number(void);
+/*==============================*/
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Returns the number of pending buf pool ios.
+@return number of pending I/O operations */
+UNIV_INTERN
ulint
buf_get_n_pending_ios(void);
/*=======================*/
-/*************************************************************************
+/*********************************************************************//**
Prints info of the buffer i/o. */
-
+UNIV_INTERN
void
buf_print_io(
/*=========*/
- FILE* file); /* in: file where to print */
-/*************************************************************************
+ FILE* file); /*!< in: file where to print */
+/*********************************************************************//**
Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool. */
-
+database pages in the buffer pool.
+@return modified page percentage ratio */
+UNIV_INTERN
ulint
buf_get_modified_ratio_pct(void);
/*============================*/
-/**************************************************************************
+/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
void
buf_refresh_io_stats(void);
/*======================*/
-/*************************************************************************
-Checks that all file pages in the buffer are in a replaceable state. */
-
+/*********************************************************************//**
+Asserts that all file pages in the buffer are in a replaceable state.
+@return TRUE */
+UNIV_INTERN
ibool
buf_all_freed(void);
/*===============*/
-/*************************************************************************
+/*********************************************************************//**
Checks that there currently are no pending i/o-operations for the buffer
-pool. */
-
+pool.
+@return TRUE if there is no pending i/o */
+UNIV_INTERN
ibool
buf_pool_check_no_pending_io(void);
/*==============================*/
- /* out: TRUE if there is no pending i/o */
-/*************************************************************************
+/*********************************************************************//**
Invalidates the file pages in the buffer pool when an archive recovery is
completed. All the file pages buffered must be in a replaceable state when
this function is called: not latched and not modified. */
-
+UNIV_INTERN
void
buf_pool_invalidate(void);
/*=====================*/
+#endif /* !UNIV_HOTBACKUP */
/*========================================================================
--------------------------- LOWER LEVEL ROUTINES -------------------------
=========================================================================*/
-/************************************************************************
-Maps the page of block to a frame, if not mapped yet. Unmaps some page
-from the end of the awe_LRU_free_mapped. */
-
-void
-buf_awe_map_page_to_frame(
-/*======================*/
- buf_block_t* block, /* in: block whose page should be
- mapped to a frame */
- ibool add_to_mapped_list);/* in: TRUE if we in the case
- we need to map the page should also
- add the block to the
- awe_LRU_free_mapped list */
#ifdef UNIV_SYNC_DEBUG
-/*************************************************************************
+/*********************************************************************//**
Adds latch level info for the rw-lock protecting the buffer frame. This
should be called in the debug version after a successful latching of a
page if we know the latching order level of the acquired latch. */
UNIV_INLINE
void
-buf_page_dbg_add_level(
-/*===================*/
- buf_frame_t* frame, /* in: buffer page where we have acquired
- a latch */
- ulint level); /* in: latching order level */
+buf_block_dbg_add_level(
+/*====================*/
+ buf_block_t* block, /*!< in: buffer page
+ where we have acquired latch */
+ ulint level); /*!< in: latching order level */
+#else /* UNIV_SYNC_DEBUG */
+# define buf_block_dbg_add_level(block, level) /* nothing */
#endif /* UNIV_SYNC_DEBUG */
-/*************************************************************************
-Gets a pointer to the memory frame of a block. */
+/*********************************************************************//**
+Gets the state of a block.
+@return state */
+UNIV_INLINE
+enum buf_page_state
+buf_page_get_state(
+/*===============*/
+ const buf_page_t* bpage); /*!< in: pointer to the control block */
+/*********************************************************************//**
+Gets the state of a block.
+@return state */
+UNIV_INLINE
+enum buf_page_state
+buf_block_get_state(
+/*================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_page_set_state(
+/*===============*/
+ buf_page_t* bpage, /*!< in/out: pointer to control block */
+ enum buf_page_state state); /*!< in: state */
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_block_set_state(
+/*================*/
+ buf_block_t* block, /*!< in/out: pointer to control block */
+ enum buf_page_state state); /*!< in: state */
+/*********************************************************************//**
+Determines if a block is mapped to a tablespace.
+@return TRUE if mapped */
+UNIV_INLINE
+ibool
+buf_page_in_file(
+/*=============*/
+ const buf_page_t* bpage) /*!< in: pointer to control block */
+ __attribute__((pure));
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Determines if a block should be on unzip_LRU list.
+@return TRUE if block belongs to unzip_LRU */
+UNIV_INLINE
+ibool
+buf_page_belongs_to_unzip_LRU(
+/*==========================*/
+ const buf_page_t* bpage) /*!< in: pointer to control block */
+ __attribute__((pure));
+
+/*********************************************************************//**
+Gets the mutex of a block.
+@return pointer to mutex protecting bpage */
+UNIV_INLINE
+mutex_t*
+buf_page_get_mutex(
+/*===============*/
+ const buf_page_t* bpage) /*!< in: pointer to control block */
+ __attribute__((pure));
+
+/*********************************************************************//**
+Get the flush type of a page.
+@return flush type */
+UNIV_INLINE
+enum buf_flush
+buf_page_get_flush_type(
+/*====================*/
+ const buf_page_t* bpage) /*!< in: buffer page */
+ __attribute__((pure));
+/*********************************************************************//**
+Set the flush type of a page. */
+UNIV_INLINE
+void
+buf_page_set_flush_type(
+/*====================*/
+ buf_page_t* bpage, /*!< in: buffer page */
+ enum buf_flush flush_type); /*!< in: flush type */
+/*********************************************************************//**
+Map a block to a file page. */
+UNIV_INLINE
+void
+buf_block_set_file_page(
+/*====================*/
+ buf_block_t* block, /*!< in/out: pointer to control block */
+ ulint space, /*!< in: tablespace id */
+ ulint page_no);/*!< in: page number */
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix(
+/*================*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix(
+/*================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_page_set_io_fix(
+/*================*/
+ buf_page_t* bpage, /*!< in/out: control block */
+ enum buf_io_fix io_fix);/*!< in: io_fix state */
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_block_set_io_fix(
+/*=================*/
+ buf_block_t* block, /*!< in/out: control block */
+ enum buf_io_fix io_fix);/*!< in: io_fix state */
+
+/********************************************************************//**
+Determine if a buffer block can be relocated in memory. The block
+can be dirty, but it must not be I/O-fixed or bufferfixed. */
+UNIV_INLINE
+ibool
+buf_page_can_relocate(
+/*==================*/
+ const buf_page_t* bpage) /*!< control block being relocated */
+ __attribute__((pure));
+
+/*********************************************************************//**
+Determine if a block has been flagged old.
+@return TRUE if old */
+UNIV_INLINE
+ibool
+buf_page_is_old(
+/*============*/
+ const buf_page_t* bpage) /*!< in: control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Flag a block old. */
+UNIV_INLINE
+void
+buf_page_set_old(
+/*=============*/
+ buf_page_t* bpage, /*!< in/out: control block */
+ ibool old); /*!< in: old */
+/*********************************************************************//**
+Determine the time of first access of a block in the buffer pool.
+@return ut_time_ms() at the time of first access, 0 if not accessed */
+UNIV_INLINE
+unsigned
+buf_page_is_accessed(
+/*=================*/
+ const buf_page_t* bpage) /*!< in: control block */
+ __attribute__((nonnull, pure));
+/*********************************************************************//**
+Flag a block accessed. */
+UNIV_INLINE
+void
+buf_page_set_accessed(
+/*==================*/
+ buf_page_t* bpage, /*!< in/out: control block */
+ ulint time_ms) /*!< in: ut_time_ms() */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Gets the buf_block_t handle of a buffered file block if an uncompressed
+page frame exists, or NULL.
+@return control block, or NULL */
+UNIV_INLINE
+buf_block_t*
+buf_page_get_block(
+/*===============*/
+ buf_page_t* bpage) /*!< in: control block, or NULL */
+ __attribute__((pure));
+#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block.
+@return pointer to the frame */
UNIV_INLINE
buf_frame_t*
buf_block_get_frame(
/*================*/
- /* out: pointer to the frame */
- buf_block_t* block); /* in: pointer to the control block */
-/*************************************************************************
-Gets the space id of a block. */
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ __attribute__((pure));
+#else /* UNIV_DEBUG */
+# define buf_block_get_frame(block) (block)->frame
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Gets the space id of a block.
+@return space id */
+UNIV_INLINE
+ulint
+buf_page_get_space(
+/*===============*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Gets the space id of a block.
+@return space id */
UNIV_INLINE
ulint
buf_block_get_space(
/*================*/
- /* out: space id */
- buf_block_t* block); /* in: pointer to the control block */
-/*************************************************************************
-Gets the page number of a block. */
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Gets the page number of a block.
+@return page number */
+UNIV_INLINE
+ulint
+buf_page_get_page_no(
+/*=================*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Gets the page number of a block.
+@return page number */
UNIV_INLINE
ulint
buf_block_get_page_no(
/*==================*/
- /* out: page number */
- buf_block_t* block); /* in: pointer to the control block */
-/***********************************************************************
-Gets the block to whose frame the pointer is pointing to. */
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_page_get_zip_size(
+/*==================*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return compressed page size, or 0 */
UNIV_INLINE
+ulint
+buf_block_get_zip_size(
+/*===================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ __attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable. */
+#define buf_block_get_page_zip(block) \
+ (UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL)
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Gets the block to whose frame the pointer is pointing to.
+@return pointer to block, never NULL */
+UNIV_INTERN
buf_block_t*
buf_block_align(
/*============*/
- /* out: pointer to block */
- byte* ptr); /* in: pointer to a frame */
-/************************************************************************
-This function is used to get info if there is an io operation
-going on on a buffer page. */
-UNIV_INLINE
+ const byte* ptr); /*!< in: pointer to a frame */
+/********************************************************************//**
+Find out if a pointer belongs to a buf_block_t. It can be a pointer to
+the buf_block_t itself or a member of it
+@return TRUE if ptr belongs to a buf_block_t struct */
+UNIV_INTERN
ibool
-buf_page_io_query(
-/*==============*/
- /* out: TRUE if io going on */
- buf_block_t* block); /* in: pool block, must be bufferfixed */
-/***********************************************************************
-Accessor function for block array. */
+buf_pointer_is_block_field(
+/*=======================*/
+ const void* ptr); /*!< in: pointer not
+ dereferenced */
+/** Find out if a pointer corresponds to a buf_block_t::mutex.
+@param m in: mutex candidate
+@return TRUE if m is a buf_block_t::mutex */
+#define buf_pool_is_block_mutex(m) \
+ buf_pointer_is_block_field((const void*)(m))
+/** Find out if a pointer corresponds to a buf_block_t::lock.
+@param l in: rw-lock candidate
+@return TRUE if l is a buf_block_t::lock */
+#define buf_pool_is_block_lock(l) \
+ buf_pointer_is_block_field((const void*)(l))
+
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable.
+@return compressed page descriptor, or NULL */
UNIV_INLINE
-buf_block_t*
-buf_pool_get_nth_block(
+const page_zip_des_t*
+buf_frame_get_page_zip(
/*===================*/
- /* out: pointer to block */
- buf_pool_t* pool, /* in: pool */
- ulint i); /* in: index of the block */
-/************************************************************************
+ const byte* ptr); /*!< in: pointer to the page */
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+/********************************************************************//**
Function which inits a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
@@ -649,179 +956,249 @@ Function which inits a page for read to the buffer buf_pool. If the page is
then this function does nothing.
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later. This is one of the functions which perform the
-state transition NOT_USED => FILE_PAGE to a block (the other is
-buf_page_create). */
-
-buf_block_t*
+and the lock released later.
+@return pointer to the block or NULL */
+UNIV_INTERN
+buf_page_t*
buf_page_init_for_read(
/*===================*/
- /* out: pointer to the block or NULL */
- ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /* in: space id */
- ib_longlong tablespace_version,/* in: prevents reading from a wrong
+ ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+ ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size, or 0 */
+ ibool unzip, /*!< in: TRUE=request uncompressed page */
+ ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong
version of the tablespace in case we have done
DISCARD + IMPORT */
- ulint offset);/* in: page number */
-/************************************************************************
+ ulint offset);/*!< in: page number */
+/********************************************************************//**
Completes an asynchronous read or write request of a file page to or from
the buffer pool. */
-
+UNIV_INTERN
void
buf_page_io_complete(
/*=================*/
- buf_block_t* block); /* in: pointer to the block in question */
-/************************************************************************
+ buf_page_t* bpage); /*!< in: pointer to the block in question */
+/********************************************************************//**
Calculates a folded value of a file page address to use in the page hash
-table. */
+table.
+@return the folded value */
UNIV_INLINE
ulint
buf_page_address_fold(
/*==================*/
- /* out: the folded value */
- ulint space, /* in: space id */
- ulint offset);/* in: offset of the page within space */
-/**********************************************************************
-Returns the control block of a file page, NULL if not found. */
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: offset of the page within space */
+ __attribute__((const));
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+@return block, NULL if not found */
UNIV_INLINE
-buf_block_t*
+buf_page_t*
buf_page_hash_get(
/*==============*/
- /* out: block, NULL if not found */
- ulint space, /* in: space id */
- ulint offset);/* in: offset of the page within space */
-/***********************************************************************
-Increments the pool clock by one and returns its new value. Remember that
-in the 32 bit version the clock wraps around at 4 billion! */
+ ulint space, /*!< in: space id */
+ ulint offset);/*!< in: offset of the page within space */
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found
+or an uncompressed page frame does not exist.
+@return block, NULL if not found */
UNIV_INLINE
-ulint
-buf_pool_clock_tic(void);
-/*====================*/
- /* out: new clock value */
-/*************************************************************************
-Gets the current length of the free list of buffer blocks. */
-
+buf_block_t*
+buf_block_hash_get(
+/*===============*/
+ ulint space, /*!< in: space id */
+ ulint offset);/*!< in: offset of the page within space */
+/*********************************************************************//**
+Gets the current length of the free list of buffer blocks.
+@return length of the free list */
+UNIV_INTERN
ulint
buf_get_free_list_len(void);
/*=======================*/
+#endif /* !UNIV_HOTBACKUP */
+
+
+/** The common buffer control block structure
+for compressed and uncompressed frames */
+
+struct buf_page_struct{
+ /** @name General fields
+ None of these bit-fields must be modified without holding
+ buf_page_get_mutex() [buf_block_struct::mutex or
+ buf_pool_zip_mutex], since they can be stored in the same
+ machine word. Some of these fields are additionally protected
+ by buf_pool_mutex. */
+ /* @{ */
+
+ unsigned space:32; /*!< tablespace id; also protected
+ by buf_pool_mutex. */
+ unsigned offset:32; /*!< page number; also protected
+ by buf_pool_mutex. */
+
+ unsigned state:3; /*!< state of the control block; also
+ protected by buf_pool_mutex.
+ State transitions from
+ BUF_BLOCK_READY_FOR_USE to
+ BUF_BLOCK_MEMORY need not be
+ protected by buf_page_get_mutex().
+ @see enum buf_page_state */
+#ifndef UNIV_HOTBACKUP
+ unsigned flush_type:2; /*!< if this block is currently being
+ flushed to disk, this tells the
+ flush_type.
+ @see enum buf_flush */
+ unsigned io_fix:2; /*!< type of pending I/O operation;
+ also protected by buf_pool_mutex
+ @see enum buf_io_fix */
+ unsigned buf_fix_count:25;/*!< count of how manyfold this block
+ is currently bufferfixed */
+ /* @} */
+#endif /* !UNIV_HOTBACKUP */
+ page_zip_des_t zip; /*!< compressed page; zip.data
+ (but not the data it points to) is
+ also protected by buf_pool_mutex */
+#ifndef UNIV_HOTBACKUP
+ buf_page_t* hash; /*!< node used in chaining to
+ buf_pool->page_hash or
+ buf_pool->zip_hash */
+#ifdef UNIV_DEBUG
+ ibool in_page_hash; /*!< TRUE if in buf_pool->page_hash */
+ ibool in_zip_hash; /*!< TRUE if in buf_pool->zip_hash */
+#endif /* UNIV_DEBUG */
+ /** @name Page flushing fields
+ All these are protected by buf_pool_mutex. */
+ /* @{ */
+
+ UT_LIST_NODE_T(buf_page_t) list;
+ /*!< based on state, this is a
+ list node, protected only by
+ buf_pool_mutex, in one of the
+ following lists in buf_pool:
+
+ - BUF_BLOCK_NOT_USED: free
+ - BUF_BLOCK_FILE_PAGE: flush_list
+ - BUF_BLOCK_ZIP_DIRTY: flush_list
+ - BUF_BLOCK_ZIP_PAGE: zip_clean
+ - BUF_BLOCK_ZIP_FREE: zip_free[]
+
+ The contents of the list node
+ is undefined if !in_flush_list
+ && state == BUF_BLOCK_FILE_PAGE,
+ or if state is one of
+ BUF_BLOCK_MEMORY,
+ BUF_BLOCK_REMOVE_HASH or
+ BUF_BLOCK_READY_IN_USE. */
+#ifdef UNIV_DEBUG
+ ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
+ when buf_pool_mutex is free, the
+ following should hold: in_flush_list
+ == (state == BUF_BLOCK_FILE_PAGE
+ || state == BUF_BLOCK_ZIP_DIRTY) */
+ ibool in_free_list; /*!< TRUE if in buf_pool->free; when
+ buf_pool_mutex is free, the following
+ should hold: in_free_list
+ == (state == BUF_BLOCK_NOT_USED) */
+#endif /* UNIV_DEBUG */
+ ib_uint64_t newest_modification;
+ /*!< log sequence number of
+ the youngest modification to
+ this block, zero if not
+ modified */
+ ib_uint64_t oldest_modification;
+ /*!< log sequence number of
+ the START of the log entry
+ written of the oldest
+ modification to this block
+ which has not yet been flushed
+ on disk; zero if all
+ modifications are on disk */
+ /* @} */
+ /** @name LRU replacement algorithm fields
+ These fields are protected by buf_pool_mutex only (not
+ buf_pool_zip_mutex or buf_block_struct::mutex). */
+ /* @{ */
+
+ UT_LIST_NODE_T(buf_page_t) LRU;
+ /*!< node of the LRU list */
+#ifdef UNIV_DEBUG
+ ibool in_LRU_list; /*!< TRUE if the page is in
+ the LRU list; used in
+ debugging */
+#endif /* UNIV_DEBUG */
+ unsigned old:1; /*!< TRUE if the block is in the old
+ blocks in buf_pool->LRU_old */
+ unsigned freed_page_clock:31;/*!< the value of
+ buf_pool->freed_page_clock
+ when this block was the last
+ time put to the head of the
+ LRU list; a thread is allowed
+ to read this for heuristic
+ purposes without holding any
+ mutex or latch */
+ unsigned access_time:32; /*!< time of first access, or
+ 0 if the block was never accessed
+ in the buffer pool */
+ /* @} */
+# ifdef UNIV_DEBUG_FILE_ACCESSES
+ ibool file_page_was_freed;
+ /*!< this is set to TRUE when fsp
+ frees a page in buffer pool */
+# endif /* UNIV_DEBUG_FILE_ACCESSES */
+#endif /* !UNIV_HOTBACKUP */
+};
-/* The buffer control block structure */
+/** The buffer control block structure */
struct buf_block_struct{
- /* 1. General fields */
+ /** @name General fields */
+ /* @{ */
- ulint magic_n; /* magic number to check */
- ulint state; /* state of the control block:
- BUF_BLOCK_NOT_USED, ...; changing
- this is only allowed when a thread
- has BOTH the buffer pool mutex AND
- block->mutex locked */
- byte* frame; /* pointer to buffer frame which
+ buf_page_t page; /*!< page information; this must
+ be the first field, so that
+ buf_pool->page_hash can point
+ to buf_page_t or buf_block_t */
+ byte* frame; /*!< pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
- UNIV_PAGE_SIZE; if AWE is used, this
- will be NULL for the pages which are
- currently not mapped into the virtual
- address space window of the buffer
- pool */
- os_awe_t* awe_info; /* if AWE is used, then an array of
- awe page infos for
- UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
- (normally = 4) physical memory
- pages; otherwise NULL */
- ulint space; /* space id of the page */
- ulint offset; /* page number within the space */
- ulint lock_hash_val; /* hashed value of the page address
- in the record lock hash table */
- mutex_t mutex; /* mutex protecting this block:
+ UNIV_PAGE_SIZE */
+#ifndef UNIV_HOTBACKUP
+ UT_LIST_NODE_T(buf_block_t) unzip_LRU;
+ /*!< node of the decompressed LRU list;
+ a block is in the unzip_LRU list
+ if page.state == BUF_BLOCK_FILE_PAGE
+ and page.zip.data != NULL */
+#ifdef UNIV_DEBUG
+ ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
+ decompressed LRU list;
+ used in debugging */
+#endif /* UNIV_DEBUG */
+ mutex_t mutex; /*!< mutex protecting this block:
state (also protected by the buffer
pool mutex), io_fix, buf_fix_count,
and accessed; we introduce this new
mutex in InnoDB-5.1 to relieve
contention on the buffer pool mutex */
- rw_lock_t lock; /* read-write lock of the buffer
+ rw_lock_t lock; /*!< read-write lock of the buffer
frame */
- buf_block_t* hash; /* node used in chaining to the page
- hash table */
- ibool check_index_page_at_flush;
- /* TRUE if we know that this is
+ unsigned lock_hash_val:32;/*!< hashed value of the page address
+ in the record lock hash table */
+ unsigned check_index_page_at_flush:1;
+ /*!< TRUE if we know that this is
an index page, and want the database
to check its consistency before flush;
note that there may be pages in the
buffer pool which are index pages,
but this flag is not set because
we do not keep track of all pages */
- /* 2. Page flushing fields */
-
- UT_LIST_NODE_T(buf_block_t) flush_list;
- /* node of the modified, not yet
- flushed blocks list */
- dulint newest_modification;
- /* log sequence number of the youngest
- modification to this block, zero if
- not modified */
- dulint oldest_modification;
- /* log sequence number of the START of
- the log entry written of the oldest
- modification to this block which has
- not yet been flushed on disk; zero if
- all modifications are on disk */
- ulint flush_type; /* if this block is currently being
- flushed to disk, this tells the
- flush_type: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST */
+ /* @} */
+ /** @name Optimistic search field */
+ /* @{ */
- /* 3. LRU replacement algorithm fields */
-
- UT_LIST_NODE_T(buf_block_t) free;
- /* node of the free block list */
- ibool in_free_list; /* TRUE if in the free list; used in
- debugging */
- UT_LIST_NODE_T(buf_block_t) LRU;
- /* node of the LRU list */
- UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
- /* in the AWE version node in the
- list of free and LRU blocks which are
- mapped to a frame */
- ibool in_LRU_list; /* TRUE of the page is in the LRU list;
- used in debugging */
- ulint LRU_position; /* value which monotonically
- decreases (or may stay constant if
- the block is in the old blocks) toward
- the end of the LRU list, if the pool
- ulint_clock has not wrapped around:
- NOTE that this value can only be used
- in heuristic algorithms, because of
- the possibility of a wrap-around! */
- ulint freed_page_clock;/* the value of freed_page_clock
- of the buffer pool when this block was
- the last time put to the head of the
- LRU list; a thread is allowed to
- read this for heuristic purposes
- without holding any mutex or latch */
- ibool old; /* TRUE if the block is in the old
- blocks in the LRU list */
- ibool accessed; /* TRUE if the page has been accessed
- while in the buffer pool: read-ahead
- may read in pages which have not been
- accessed yet; this is protected by
- block->mutex; a thread is allowed to
- read this for heuristic purposes
- without holding any mutex or latch */
- ulint buf_fix_count; /* count of how manyfold this block
- is currently bufferfixed; this is
- protected by block->mutex */
- ulint io_fix; /* if a read is pending to the frame,
- io_fix is BUF_IO_READ, in the case
- of a write BUF_IO_WRITE, otherwise 0;
- this is protected by block->mutex */
- /* 4. Optimistic search field */
-
- dulint modify_clock; /* this clock is incremented every
+ ib_uint64_t modify_clock; /*!< this clock is incremented every
time a pointer to a record on the
page may become obsolete; this is
used in the optimistic cursor
@@ -832,198 +1209,268 @@ struct buf_block_struct{
pool mutex and the page is not
bufferfixed, or (2) the thread has an
x-latch on the block */
+ /* @} */
+ /** @name Hash search fields (unprotected)
+ NOTE that these fields are NOT protected by any semaphore! */
+ /* @{ */
- /* 5. Hash search fields: NOTE that the first 4 fields are NOT
- protected by any semaphore! */
-
- ulint n_hash_helps; /* counter which controls building
+ ulint n_hash_helps; /*!< counter which controls building
of a new hash index for the page */
- ulint n_fields; /* recommended prefix length for hash
+ ulint n_fields; /*!< recommended prefix length for hash
search: number of full fields */
- ulint n_bytes; /* recommended prefix: number of bytes
+ ulint n_bytes; /*!< recommended prefix: number of bytes
in an incomplete field */
- ibool left_side; /* TRUE or FALSE, depending on
+ ibool left_side; /*!< TRUE or FALSE, depending on
whether the leftmost record of several
records with the same prefix should be
indexed in the hash index */
+ /* @} */
- /* These 6 fields may only be modified when we have
+ /** @name Hash search fields
+ These 6 fields may only be modified when we have
an x-latch on btr_search_latch AND
- a) we are holding an s-latch or x-latch on block->lock or
- b) we know that block->buf_fix_count == 0.
+ - we are holding an s-latch or x-latch on buf_block_struct::lock or
+ - we know that buf_block_struct::buf_fix_count == 0.
An exception to this is when we init or create a page
in the buffer pool in buf0buf.c. */
- ibool is_hashed; /* TRUE if hash index has already been
- built on this page; note that it does
- not guarantee that the index is
- complete, though: there may have been
- hash collisions, record deletions,
- etc. */
- ulint n_pointers; /* used in debugging: the number of
+ /* @{ */
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ ulint n_pointers; /*!< used in debugging: the number of
pointers in the adaptive hash index
pointing to this frame */
- ulint curr_n_fields; /* prefix length for hash indexing:
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ unsigned is_hashed:1; /*!< TRUE if hash index has
+ already been built on this
+ page; note that it does not
+ guarantee that the index is
+ complete, though: there may
+ have been hash collisions,
+ record deletions, etc. */
+ unsigned curr_n_fields:10;/*!< prefix length for hash indexing:
number of full fields */
- ulint curr_n_bytes; /* number of bytes in hash indexing */
- ibool curr_left_side; /* TRUE or FALSE in hash indexing */
- dict_index_t* index; /* Index for which the adaptive
+ unsigned curr_n_bytes:15;/*!< number of bytes in hash
+ indexing */
+ unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
+ dict_index_t* index; /*!< Index for which the adaptive
hash index has been created. */
- /* 6. Debug fields */
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_t debug_latch; /* in the debug version, each thread
+ /* @} */
+# ifdef UNIV_SYNC_DEBUG
+ /** @name Debug fields */
+ /* @{ */
+ rw_lock_t debug_latch; /*!< in the debug version, each thread
which bufferfixes the block acquires
an s-latch here; so we can use the
debug utilities in sync0rw */
-#endif
- ibool file_page_was_freed;
- /* this is set to TRUE when fsp
- frees a page in buffer pool */
+ /* @} */
+# endif
+#endif /* !UNIV_HOTBACKUP */
+};
+
+/** Check if a buf_block_t object is in a valid state
+@param block buffer block
+@return TRUE if valid */
+#define buf_block_state_valid(block) \
+(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED \
+ && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Compute the hash fold value for blocks in buf_pool->zip_hash. */
+/* @{ */
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
+#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
+#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
+/* @} */
+
+/** @brief The buffer pool statistics structure. */
+struct buf_pool_stat_struct{
+ ulint n_page_gets; /*!< number of page gets performed;
+ also successful searches through
+ the adaptive hash index are
+ counted as page gets; this field
+ is NOT protected by the buffer
+ pool mutex */
+ ulint n_pages_read; /*!< number read operations */
+ ulint n_pages_written;/*!< number write operations */
+ ulint n_pages_created;/*!< number of pages created
+ in the pool with no read */
+ ulint n_ra_pages_read;/*!< number of pages read in
+ as part of read ahead */
+ ulint n_ra_pages_evicted;/*!< number of read ahead
+ pages that are evicted without
+ being accessed */
+ ulint n_pages_made_young; /*!< number of pages made young, in
+ calls to buf_LRU_make_block_young() */
+ ulint n_pages_not_made_young; /*!< number of pages not made
+ young because the first access
+ was not long enough ago, in
+ buf_page_peek_if_too_old() */
};
-#define BUF_BLOCK_MAGIC_N 41526563
+/** @brief The buffer pool structure.
-/* The buffer pool structure. NOTE! The definition appears here only for
-other modules of this directory (buf) to see it. Do not use from outside! */
+NOTE! The definition appears here only for other modules of this
+directory (buf) to see it. Do not use from outside! */
struct buf_pool_struct{
- /* 1. General fields */
-
- mutex_t mutex; /* mutex protecting the buffer pool
- struct and control blocks, except the
- read-write lock in them */
- byte* frame_mem; /* pointer to the memory area which
- was allocated for the frames; in AWE
- this is the virtual address space
- window where we map pages stored
- in physical memory */
- byte* frame_zero; /* pointer to the first buffer frame:
- this may differ from frame_mem, because
- this is aligned by the frame size */
- byte* high_end; /* pointer to the end of the buffer
- frames */
- ulint n_frames; /* number of frames */
- buf_block_t* blocks; /* array of buffer control blocks */
- buf_block_t** blocks_of_frames;/* inverse mapping which can be used
- to retrieve the buffer control block
- of a frame; this is an array which
- lists the blocks of frames in the
- order frame_zero,
- frame_zero + UNIV_PAGE_SIZE, ...
- a control block is always assigned
- for each frame, even if the frame does
- not contain any data; note that in AWE
- there are more control blocks than
- buffer frames */
- os_awe_t* awe_info; /* if AWE is used, AWE info for the
- physical 4 kB memory pages associated
- with buffer frames */
- ulint max_size; /* number of control blocks ==
- maximum pool size in pages */
- ulint curr_size; /* current pool size in pages;
- currently always the same as
- max_size */
- hash_table_t* page_hash; /* hash table of the file pages */
-
- ulint n_pend_reads; /* number of pending read operations */
-
- time_t last_printout_time; /* when buf_print was last time
+ /** @name General fields */
+ /* @{ */
+
+ ulint n_chunks; /*!< number of buffer pool chunks */
+ buf_chunk_t* chunks; /*!< buffer pool chunks */
+ ulint curr_size; /*!< current pool size in pages */
+ hash_table_t* page_hash; /*!< hash table of buf_page_t or
+ buf_block_t file pages,
+ buf_page_in_file() == TRUE,
+ indexed by (space_id, offset) */
+ hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks
+ whose frames are allocated to the
+ zip buddy system,
+ indexed by block->frame */
+ ulint n_pend_reads; /*!< number of pending read operations */
+ ulint n_pend_unzip; /*!< number of pending decompressions */
+
+ time_t last_printout_time;
+ /*!< when buf_print_io was last time
called */
- ulint n_pages_read; /* number read operations */
- ulint n_pages_written;/* number write operations */
- ulint n_pages_created;/* number of pages created in the pool
- with no read */
- ulint n_page_gets; /* number of page gets performed;
- also successful searches through
- the adaptive hash index are
- counted as page gets; this field
- is NOT protected by the buffer
- pool mutex */
- ulint n_pages_awe_remapped; /* if AWE is enabled, the
- number of remaps of blocks to
- buffer frames */
- ulint n_page_gets_old;/* n_page_gets when buf_print was
- last time called: used to calculate
- hit rate */
- ulint n_pages_read_old;/* n_pages_read when buf_print was
- last time called */
- ulint n_pages_written_old;/* number write operations */
- ulint n_pages_created_old;/* number of pages created in
- the pool with no read */
- ulint n_pages_awe_remapped_old;
- /* 2. Page flushing algorithm fields */
-
- UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
- /* base node of the modified block
+ buf_pool_stat_t stat; /*!< current statistics */
+ buf_pool_stat_t old_stat; /*!< old statistics */
+
+ /* @} */
+
+ /** @name Page flushing algorithm fields */
+
+ /* @{ */
+
+ UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
+ /*!< base node of the modified block
list */
- ibool init_flush[BUF_FLUSH_LIST + 1];
- /* this is TRUE when a flush of the
+ ibool init_flush[BUF_FLUSH_N_TYPES];
+ /*!< this is TRUE when a flush of the
given type is being initialized */
- ulint n_flush[BUF_FLUSH_LIST + 1];
- /* this is the number of pending
+ ulint n_flush[BUF_FLUSH_N_TYPES];
+ /*!< this is the number of pending
writes in the given flush type */
- os_event_t no_flush[BUF_FLUSH_LIST + 1];
- /* this is in the set state when there
- is no flush batch of the given type
- running */
- ulint ulint_clock; /* a sequence number used to count
- time. NOTE! This counter wraps
- around at 4 billion (if ulint ==
- 32 bits)! */
- ulint freed_page_clock;/* a sequence number used to count the
- number of buffer blocks removed from
- the end of the LRU list; NOTE that
- this counter may wrap around at 4
- billion! A thread is allowed to
- read this for heuristic purposes
- without holding any mutex or latch */
- ulint LRU_flush_ended;/* when an LRU flush ends for a page,
+ os_event_t no_flush[BUF_FLUSH_N_TYPES];
+ /*!< this is in the set state
+ when there is no flush batch
+ of the given type running */
+ ulint freed_page_clock;/*!< a sequence number used
+ to count the number of buffer
+ blocks removed from the end of
+ the LRU list; NOTE that this
+ counter may wrap around at 4
+ billion! A thread is allowed
+ to read this for heuristic
+ purposes without holding any
+ mutex or latch */
+ ulint LRU_flush_ended;/*!< when an LRU flush ends for a page,
this is incremented by one; this is
set to zero when a buffer block is
allocated */
- /* 3. LRU replacement algorithm fields */
-
- UT_LIST_BASE_NODE_T(buf_block_t) free;
- /* base node of the free block list;
- in the case of AWE, at the start are
- always free blocks for which the
- physical memory is mapped to a frame */
- UT_LIST_BASE_NODE_T(buf_block_t) LRU;
- /* base node of the LRU list */
- buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
- blocks in the LRU list; NULL if LRU
- length less than BUF_LRU_OLD_MIN_LEN */
- ulint LRU_old_len; /* length of the LRU list from
+ /* @} */
+ /** @name LRU replacement algorithm fields */
+ /* @{ */
+
+ UT_LIST_BASE_NODE_T(buf_page_t) free;
+ /*!< base node of the free
+ block list */
+ UT_LIST_BASE_NODE_T(buf_page_t) LRU;
+ /*!< base node of the LRU list */
+ buf_page_t* LRU_old; /*!< pointer to the about
+ buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+ oldest blocks in the LRU list;
+ NULL if LRU length less than
+ BUF_LRU_OLD_MIN_LEN;
+ NOTE: when LRU_old != NULL, its length
+ should always equal LRU_old_len */
+ ulint LRU_old_len; /*!< length of the LRU list from
the block to which LRU_old points
onward, including that block;
see buf0lru.c for the restrictions
- on this value; not defined if
- LRU_old == NULL */
- UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
- /* list of those blocks which are
- in the LRU list or the free list, and
- where the page is mapped to a frame;
- thus, frames allocated, e.g., to the
- locki table, are not in this list */
+ on this value; 0 if LRU_old == NULL;
+ NOTE: LRU_old_len must be adjusted
+ whenever LRU_old shrinks or grows! */
+
+ UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
+ /*!< base node of the
+ unzip_LRU list */
+
+ /* @} */
+ /** @name Buddy allocator fields
+ The buddy allocator is used for allocating compressed page
+ frames and buf_page_t descriptors of blocks that exist
+ in the buffer pool only in compressed form. */
+ /* @{ */
+ UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
+ /*!< unmodified compressed pages */
+ UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES];
+ /*!< buddy free lists */
+#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
+# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
+#endif
+#if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE
+# error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE"
+#endif
+ /* @} */
};
-/* States of a control block */
-#define BUF_BLOCK_NOT_USED 211 /* is in the free list */
-#define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns
- a block, it is in this state */
-#define BUF_BLOCK_FILE_PAGE 213 /* contains a buffered file page */
-#define BUF_BLOCK_MEMORY 214 /* contains some main memory object */
-#define BUF_BLOCK_REMOVE_HASH 215 /* hash index should be removed
- before putting to the free list */
-
-/* Io_fix states of a control block; these must be != 0 */
-#define BUF_IO_READ 561
-#define BUF_IO_WRITE 562
+/** mutex protecting the buffer pool struct and control blocks, except the
+read-write lock in them */
+extern mutex_t buf_pool_mutex;
+/** mutex protecting the control blocks of compressed-only pages
+(of type buf_page_t, not buf_block_t) */
+extern mutex_t buf_pool_zip_mutex;
+
+/** @name Accessors for buf_pool_mutex.
+Use these instead of accessing buf_pool_mutex directly. */
+/* @{ */
+
+/** Test if buf_pool_mutex is owned. */
+#define buf_pool_mutex_own() mutex_own(&buf_pool_mutex)
+/** Acquire the buffer pool mutex. */
+#define buf_pool_mutex_enter() do { \
+ ut_ad(!mutex_own(&buf_pool_zip_mutex)); \
+ mutex_enter(&buf_pool_mutex); \
+} while (0)
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/** Flag to forbid the release of the buffer pool mutex.
+Protected by buf_pool_mutex. */
+extern ulint buf_pool_mutex_exit_forbidden;
+/** Forbid the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_forbid() do { \
+ ut_ad(buf_pool_mutex_own()); \
+ buf_pool_mutex_exit_forbidden++; \
+} while (0)
+/** Allow the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_allow() do { \
+ ut_ad(buf_pool_mutex_own()); \
+ ut_a(buf_pool_mutex_exit_forbidden); \
+ buf_pool_mutex_exit_forbidden--; \
+} while (0)
+/** Release the buffer pool mutex. */
+# define buf_pool_mutex_exit() do { \
+ ut_a(!buf_pool_mutex_exit_forbidden); \
+ mutex_exit(&buf_pool_mutex); \
+} while (0)
+#else
+/** Forbid the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_forbid() ((void) 0)
+/** Allow the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_allow() ((void) 0)
+/** Release the buffer pool mutex. */
+# define buf_pool_mutex_exit() mutex_exit(&buf_pool_mutex)
+#endif
+#endif /* !UNIV_HOTBACKUP */
+/* @} */
-/************************************************************************
+/**********************************************************************
Let us list the consistency conditions for different control block states.
NOT_USED: is in free list, not in LRU list, not in flush list, nor
@@ -1034,8 +1481,8 @@ MEMORY: is not in free list, LRU list, or flush list, nor page
hash table
FILE_PAGE: space and offset are defined, is in page hash table
if io_fix == BUF_IO_WRITE,
- pool: no_flush[block->flush_type] is in reset state,
- pool: n_flush[block->flush_type] > 0
+ pool: no_flush[flush_type] is in reset state,
+ pool: n_flush[flush_type] > 0
(1) if buf_fix_count == 0, then
is in LRU list, not in free list
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index b077ff0c181..0f92a59a1c7 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -1,361 +1,763 @@
-/******************************************************
-The database buffer buf_pool
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buf.ic
+The database buffer buf_pool
Created 11/5/1995 Heikki Tuuri
*******************************************************/
+#include "mtr0mtr.h"
+#ifndef UNIV_HOTBACKUP
#include "buf0flu.h"
#include "buf0lru.h"
#include "buf0rea.h"
-#include "mtr0mtr.h"
-#ifdef UNIV_DEBUG
-extern ulint buf_dbg_counter; /* This is used to insert validation
- operations in execution in the
- debug version */
-#endif /* UNIV_DEBUG */
-/************************************************************************
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return freed_page_clock */
+UNIV_INLINE
+ulint
+buf_page_get_freed_page_clock(
+/*==========================*/
+ const buf_page_t* bpage) /*!< in: block */
+{
+ /* This is sometimes read without holding buf_pool_mutex. */
+ return(bpage->freed_page_clock);
+}
+
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return freed_page_clock */
+UNIV_INLINE
+ulint
+buf_block_get_freed_page_clock(
+/*===========================*/
+ const buf_block_t* block) /*!< in: block */
+{
+ return(buf_page_get_freed_page_clock(&block->page));
+}
+
+/********************************************************************//**
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex. */
+mutex.
+@return TRUE if should be made younger */
UNIV_INLINE
ibool
-buf_block_peek_if_too_old(
-/*======================*/
- /* out: TRUE if should be made younger */
- buf_block_t* block) /* in: block to make younger */
+buf_page_peek_if_too_old(
+/*=====================*/
+ const buf_page_t* bpage) /*!< in: block to make younger */
{
- return(buf_pool->freed_page_clock >= block->freed_page_clock
- + 1 + (buf_pool->curr_size / 4));
+ if (UNIV_UNLIKELY(buf_pool->freed_page_clock == 0)) {
+ /* If eviction has not started yet, do not update the
+ statistics or move blocks in the LRU list. This is
+ either the warm-up phase or an in-memory workload. */
+ return(FALSE);
+ } else if (buf_LRU_old_threshold_ms && bpage->old) {
+ unsigned access_time = buf_page_is_accessed(bpage);
+
+ if (access_time > 0
+ && (ut_time_ms() - access_time)
+ >= buf_LRU_old_threshold_ms) {
+ return(TRUE);
+ }
+
+ buf_pool->stat.n_pages_not_made_young++;
+ return(FALSE);
+ } else {
+ /* FIXME: bpage->freed_page_clock is 31 bits */
+ return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
+ > ((ulint) bpage->freed_page_clock
+ + (buf_pool->curr_size
+ * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
+ / (BUF_LRU_OLD_RATIO_DIV * 4))));
+ }
}
-/*************************************************************************
-Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in bytes.
+@return size in bytes */
UNIV_INLINE
ulint
buf_pool_get_curr_size(void)
/*========================*/
- /* out: size in bytes */
{
- return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
+ return(buf_pool->curr_size * UNIV_PAGE_SIZE);
}
-/*************************************************************************
-Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
+/********************************************************************//**
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+zero if all modified pages have been flushed to disk.
+@return oldest modification in pool, zero if none */
UNIV_INLINE
-ulint
-buf_pool_get_max_size(void)
-/*=======================*/
- /* out: size in bytes */
+ib_uint64_t
+buf_pool_get_oldest_modification(void)
+/*==================================*/
{
- return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
+ buf_page_t* bpage;
+ ib_uint64_t lsn;
+
+ buf_pool_mutex_enter();
+
+ bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+
+ if (bpage == NULL) {
+ lsn = 0;
+ } else {
+ ut_ad(bpage->in_flush_list);
+ lsn = bpage->oldest_modification;
+ }
+
+ buf_pool_mutex_exit();
+
+ /* The returned answer may be out of date: the flush_list can
+ change after the mutex has been released. */
+
+ return(lsn);
}
+#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
-Accessor function for block array. */
+/*********************************************************************//**
+Gets the state of a block.
+@return state */
UNIV_INLINE
-buf_block_t*
-buf_pool_get_nth_block(
-/*===================*/
- /* out: pointer to block */
- buf_pool_t* buf_pool,/* in: buf_pool */
- ulint i) /* in: index of the block */
+enum buf_page_state
+buf_page_get_state(
+/*===============*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
{
- ut_ad(buf_pool);
- ut_ad(i < buf_pool->max_size);
+ enum buf_page_state state = (enum buf_page_state) bpage->state;
+
+#ifdef UNIV_DEBUG
+ switch (state) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_FILE_PAGE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ break;
+ default:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ return(state);
+}
+/*********************************************************************//**
+Gets the state of a block.
+@return state */
+UNIV_INLINE
+enum buf_page_state
+buf_block_get_state(
+/*================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+{
+ return(buf_page_get_state(&block->page));
+}
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_page_set_state(
+/*===============*/
+ buf_page_t* bpage, /*!< in/out: pointer to control block */
+ enum buf_page_state state) /*!< in: state */
+{
+#ifdef UNIV_DEBUG
+ enum buf_page_state old_state = buf_page_get_state(bpage);
+
+ switch (old_state) {
+ case BUF_BLOCK_ZIP_FREE:
+ ut_error;
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ ut_a(state == BUF_BLOCK_ZIP_DIRTY);
+ break;
+ case BUF_BLOCK_ZIP_DIRTY:
+ ut_a(state == BUF_BLOCK_ZIP_PAGE);
+ break;
+ case BUF_BLOCK_NOT_USED:
+ ut_a(state == BUF_BLOCK_READY_FOR_USE);
+ break;
+ case BUF_BLOCK_READY_FOR_USE:
+ ut_a(state == BUF_BLOCK_MEMORY
+ || state == BUF_BLOCK_FILE_PAGE
+ || state == BUF_BLOCK_NOT_USED);
+ break;
+ case BUF_BLOCK_MEMORY:
+ ut_a(state == BUF_BLOCK_NOT_USED);
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+ ut_a(state == BUF_BLOCK_NOT_USED
+ || state == BUF_BLOCK_REMOVE_HASH);
+ break;
+ case BUF_BLOCK_REMOVE_HASH:
+ ut_a(state == BUF_BLOCK_MEMORY);
+ break;
+ }
+#endif /* UNIV_DEBUG */
+ bpage->state = state;
+ ut_ad(buf_page_get_state(bpage) == state);
+}
- return(i + buf_pool->blocks);
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_block_set_state(
+/*================*/
+ buf_block_t* block, /*!< in/out: pointer to control block */
+ enum buf_page_state state) /*!< in: state */
+{
+ buf_page_set_state(&block->page, state);
}
-/***********************************************************************
-Checks if a pointer points to the block array of the buffer pool (blocks, not
-the frames). */
+/*********************************************************************//**
+Determines if a block is mapped to a tablespace.
+@return TRUE if mapped */
UNIV_INLINE
ibool
-buf_pool_is_block(
-/*==============*/
- /* out: TRUE if pointer to block */
- void* ptr) /* in: pointer to memory */
+buf_page_in_file(
+/*=============*/
+ const buf_page_t* bpage) /*!< in: pointer to control block */
{
- if ((buf_pool->blocks <= (buf_block_t*)ptr)
- && ((buf_block_t*)ptr < buf_pool->blocks
- + buf_pool->max_size)) {
-
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+ /* This is a free page in buf_pool->zip_free[].
+ Such pages should only be accessed by the buddy allocator. */
+ ut_error;
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ case BUF_BLOCK_FILE_PAGE:
return(TRUE);
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ break;
}
return(FALSE);
}
-/************************************************************************
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-ut_dulint_zero if all modified pages have been flushed to disk. */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Determines if a block should be on unzip_LRU list.
+@return TRUE if block belongs to unzip_LRU */
UNIV_INLINE
-dulint
-buf_pool_get_oldest_modification(void)
-/*==================================*/
- /* out: oldest modification in pool,
- ut_dulint_zero if none */
+ibool
+buf_page_belongs_to_unzip_LRU(
+/*==========================*/
+ const buf_page_t* bpage) /*!< in: pointer to control block */
{
- buf_block_t* block;
- dulint lsn;
+ ut_ad(buf_page_in_file(bpage));
- mutex_enter(&(buf_pool->mutex));
-
- block = UT_LIST_GET_LAST(buf_pool->flush_list);
+ return(bpage->zip.data
+ && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+}
- if (block == NULL) {
- lsn = ut_dulint_zero;
- } else {
- lsn = block->oldest_modification;
+/*********************************************************************//**
+Gets the mutex of a block.
+@return pointer to mutex protecting bpage */
+UNIV_INLINE
+mutex_t*
+buf_page_get_mutex(
+/*===============*/
+ const buf_page_t* bpage) /*!< in: pointer to control block */
+{
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+ ut_error;
+ return(NULL);
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ return(&buf_pool_zip_mutex);
+ default:
+ return(&((buf_block_t*) bpage)->mutex);
}
+}
- mutex_exit(&(buf_pool->mutex));
+/*********************************************************************//**
+Get the flush type of a page.
+@return flush type */
+UNIV_INLINE
+enum buf_flush
+buf_page_get_flush_type(
+/*====================*/
+ const buf_page_t* bpage) /*!< in: buffer page */
+{
+ enum buf_flush flush_type = (enum buf_flush) bpage->flush_type;
- return(lsn);
+#ifdef UNIV_DEBUG
+ switch (flush_type) {
+ case BUF_FLUSH_LRU:
+ case BUF_FLUSH_SINGLE_PAGE:
+ case BUF_FLUSH_LIST:
+ return(flush_type);
+ case BUF_FLUSH_N_TYPES:
+ break;
+ }
+ ut_error;
+#endif /* UNIV_DEBUG */
+ return(flush_type);
+}
+/*********************************************************************//**
+Set the flush type of a page. */
+UNIV_INLINE
+void
+buf_page_set_flush_type(
+/*====================*/
+ buf_page_t* bpage, /*!< in: buffer page */
+ enum buf_flush flush_type) /*!< in: flush type */
+{
+ bpage->flush_type = flush_type;
+ ut_ad(buf_page_get_flush_type(bpage) == flush_type);
}
-/***********************************************************************
-Increments the buf_pool clock by one and returns its new value. Remember
-that in the 32 bit version the clock wraps around at 4 billion! */
+/*********************************************************************//**
+Map a block to a file page. */
UNIV_INLINE
-ulint
-buf_pool_clock_tic(void)
+void
+buf_block_set_file_page(
/*====================*/
- /* out: new clock value */
+ buf_block_t* block, /*!< in/out: pointer to control block */
+ ulint space, /*!< in: tablespace id */
+ ulint page_no)/*!< in: page number */
{
- ut_ad(mutex_own(&(buf_pool->mutex)));
+ buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
+ block->page.space = space;
+ block->page.offset = page_no;
+}
- buf_pool->ulint_clock++;
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix(
+/*================*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
+{
+ enum buf_io_fix io_fix = (enum buf_io_fix) bpage->io_fix;
+#ifdef UNIV_DEBUG
+ switch (io_fix) {
+ case BUF_IO_NONE:
+ case BUF_IO_READ:
+ case BUF_IO_WRITE:
+ return(io_fix);
+ }
+ ut_error;
+#endif /* UNIV_DEBUG */
+ return(io_fix);
+}
- return(buf_pool->ulint_clock);
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix(
+/*================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+{
+ return(buf_page_get_io_fix(&block->page));
}
-/*************************************************************************
-Gets a pointer to the memory frame of a block. */
+/*********************************************************************//**
+Sets the io_fix state of a block. */
UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
+void
+buf_page_set_io_fix(
/*================*/
- /* out: pointer to the frame */
- buf_block_t* block) /* in: pointer to the control block */
+ buf_page_t* bpage, /*!< in/out: control block */
+ enum buf_io_fix io_fix) /*!< in: io_fix state */
{
- ut_ad(block);
- ut_ad(block >= buf_pool->blocks);
- ut_ad(block < buf_pool->blocks + buf_pool->max_size);
- ut_ad(block->state != BUF_BLOCK_NOT_USED);
- ut_ad((block->state != BUF_BLOCK_FILE_PAGE)
- || (block->buf_fix_count > 0));
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- return(block->frame);
+ bpage->io_fix = io_fix;
+ ut_ad(buf_page_get_io_fix(bpage) == io_fix);
}
-/*************************************************************************
-Gets the space id of a block. */
+/*********************************************************************//**
+Sets the io_fix state of a block. */
UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- /* out: space id */
- buf_block_t* block) /* in: pointer to the control block */
+void
+buf_block_set_io_fix(
+/*=================*/
+ buf_block_t* block, /*!< in/out: control block */
+ enum buf_io_fix io_fix) /*!< in: io_fix state */
{
- ut_ad(block);
- ut_ad(block >= buf_pool->blocks);
- ut_ad(block < buf_pool->blocks + buf_pool->max_size);
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
+ buf_page_set_io_fix(&block->page, io_fix);
+}
+
+/********************************************************************//**
+Determine if a buffer block can be relocated in memory. The block
+can be dirty, but it must not be I/O-fixed or bufferfixed. */
+UNIV_INLINE
+ibool
+buf_page_can_relocate(
+/*==================*/
+ const buf_page_t* bpage) /*!< control block being relocated */
+{
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(bpage->in_LRU_list);
- return(block->space);
+ return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
+ && bpage->buf_fix_count == 0);
}
-/*************************************************************************
-Gets the page number of a block. */
+/*********************************************************************//**
+Determine if a block has been flagged old.
+@return TRUE if old */
UNIV_INLINE
-ulint
-buf_block_get_page_no(
+ibool
+buf_page_is_old(
+/*============*/
+ const buf_page_t* bpage) /*!< in: control block */
+{
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(buf_pool_mutex_own());
+
+ return(bpage->old);
+}
+
+/*********************************************************************//**
+Flag a block old. */
+UNIV_INLINE
+void
+buf_page_set_old(
+/*=============*/
+ buf_page_t* bpage, /*!< in/out: control block */
+ ibool old) /*!< in: old */
+{
+ ut_a(buf_page_in_file(bpage));
+ ut_ad(buf_pool_mutex_own());
+ ut_ad(bpage->in_LRU_list);
+
+#ifdef UNIV_LRU_DEBUG
+ ut_a((buf_pool->LRU_old_len == 0) == (buf_pool->LRU_old == NULL));
+ /* If a block is flagged "old", the LRU_old list must exist. */
+ ut_a(!old || buf_pool->LRU_old);
+
+ if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage)) {
+ const buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+ const buf_page_t* next = UT_LIST_GET_NEXT(LRU, bpage);
+ if (prev->old == next->old) {
+ ut_a(prev->old == old);
+ } else {
+ ut_a(!prev->old);
+ ut_a(buf_pool->LRU_old == (old ? bpage : next));
+ }
+ }
+#endif /* UNIV_LRU_DEBUG */
+
+ bpage->old = old;
+}
+
+/*********************************************************************//**
+Determine the time of first access of a block in the buffer pool.
+@return ut_time_ms() at the time of first access, 0 if not accessed */
+UNIV_INLINE
+unsigned
+buf_page_is_accessed(
+/*=================*/
+ const buf_page_t* bpage) /*!< in: control block */
+{
+ ut_ad(buf_page_in_file(bpage));
+
+ return(bpage->access_time);
+}
+
+/*********************************************************************//**
+Flag a block accessed. */
+UNIV_INLINE
+void
+buf_page_set_accessed(
/*==================*/
- /* out: page number */
- buf_block_t* block) /* in: pointer to the control block */
+ buf_page_t* bpage, /*!< in/out: control block */
+ ulint time_ms) /*!< in: ut_time_ms() */
{
- ut_ad(block);
- ut_ad(block >= buf_pool->blocks);
- ut_ad(block < buf_pool->blocks + buf_pool->max_size);
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
+ ut_a(buf_page_in_file(bpage));
+ ut_ad(buf_pool_mutex_own());
- return(block->offset);
+ if (!bpage->access_time) {
+ /* Make this the time of the first access. */
+ bpage->access_time = time_ms;
+ }
}
-/***********************************************************************
-Gets the block to whose frame the pointer is pointing to. */
+/*********************************************************************//**
+Gets the buf_block_t handle of a buffered file block if an uncompressed
+page frame exists, or NULL.
+@return control block, or NULL */
UNIV_INLINE
buf_block_t*
-buf_block_align(
-/*============*/
- /* out: pointer to block */
- byte* ptr) /* in: pointer to a frame */
+buf_page_get_block(
+/*===============*/
+ buf_page_t* bpage) /*!< in: control block, or NULL */
{
- buf_block_t* block;
- buf_frame_t* frame_zero;
-
- ut_ad(ptr);
-
- frame_zero = buf_pool->frame_zero;
-
- if (UNIV_UNLIKELY((ulint)ptr < (ulint)frame_zero)
- || UNIV_UNLIKELY((ulint)ptr > (ulint)(buf_pool->high_end))) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Error: trying to access a stray pointer %p\n"
- "InnoDB: buf pool start is at %p, end at %p\n"
- "InnoDB: Probable reason is database corruption"
- " or memory\n"
- "InnoDB: corruption. If this happens in an"
- " InnoDB database recovery, see\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
- "InnoDB: how to force recovery.\n",
- ptr, frame_zero,
- buf_pool->high_end);
- ut_error;
+ if (UNIV_LIKELY(bpage != NULL)) {
+ ut_ad(buf_page_in_file(bpage));
+
+ if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
+ return((buf_block_t*) bpage);
+ }
}
- block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
- >> UNIV_PAGE_SIZE_SHIFT));
- return(block);
+ return(NULL);
}
+#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
-Gets the frame the pointer is pointing to. */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block.
+@return pointer to the frame */
UNIV_INLINE
buf_frame_t*
-buf_frame_align(
-/*============*/
- /* out: pointer to frame */
- byte* ptr) /* in: pointer to a frame */
-{
- buf_frame_t* frame;
-
- ut_ad(ptr);
-
- frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
-
- if (UNIV_UNLIKELY((ulint)frame < (ulint)(buf_pool->frame_zero))
- || UNIV_UNLIKELY((ulint)frame >= (ulint)(buf_pool->high_end))) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Error: trying to access a stray pointer %p\n"
- "InnoDB: buf pool start is at %p, end at %p\n"
- "InnoDB: Probable reason is database corruption"
- " or memory\n"
- "InnoDB: corruption. If this happens in an"
- " InnoDB database recovery, see\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
- "InnoDB: how to force recovery.\n",
- ptr, buf_pool->frame_zero,
- buf_pool->high_end);
+buf_block_get_frame(
+/*================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+{
+ ut_ad(block);
+
+ switch (buf_block_get_state(block)) {
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ case BUF_BLOCK_NOT_USED:
ut_error;
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+# ifndef UNIV_HOTBACKUP
+ ut_a(block->page.buf_fix_count > 0);
+# endif /* !UNIV_HOTBACKUP */
+ /* fall through */
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ goto ok;
}
+ ut_error;
+ok:
+ return((buf_frame_t*) block->frame);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Gets the space id of a block.
+@return space id */
+UNIV_INLINE
+ulint
+buf_page_get_space(
+/*===============*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
+{
+ ut_ad(bpage);
+ ut_a(buf_page_in_file(bpage));
+
+ return(bpage->space);
+}
+
+/*********************************************************************//**
+Gets the space id of a block.
+@return space id */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+{
+ ut_ad(block);
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+ return(block->page.space);
+}
+
+/*********************************************************************//**
+Gets the page number of a block.
+@return page number */
+UNIV_INLINE
+ulint
+buf_page_get_page_no(
+/*=================*/
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
+{
+ ut_ad(bpage);
+ ut_a(buf_page_in_file(bpage));
+
+ return(bpage->offset);
+}
+
+/*********************************************************************//**
+Gets the page number of a block.
+@return page number */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+{
+ ut_ad(block);
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- return(frame);
+ return(block->page.offset);
}
-/**************************************************************************
-Gets the page number of a pointer pointing within a buffer frame containing
-a file page. */
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return compressed page size, or 0 */
UNIV_INLINE
ulint
-buf_frame_get_page_no(
+buf_page_get_zip_size(
/*==================*/
- /* out: page number */
- byte* ptr) /* in: pointer to within a buffer frame */
+ const buf_page_t* bpage) /*!< in: pointer to the control block */
{
- return(buf_block_get_page_no(buf_block_align(ptr)));
+ return(bpage->zip.ssize ? 512 << bpage->zip.ssize : 0);
}
-/**************************************************************************
-Gets the space id of a pointer pointing within a buffer frame containing a
-file page. */
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return compressed page size, or 0 */
UNIV_INLINE
ulint
-buf_frame_get_space_id(
+buf_block_get_zip_size(
+/*===================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+{
+ return(block->page.zip.ssize ? 512 << block->page.zip.ssize : 0);
+}
+
+#ifndef UNIV_HOTBACKUP
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable.
+@return compressed page descriptor, or NULL */
+UNIV_INLINE
+const page_zip_des_t*
+buf_frame_get_page_zip(
/*===================*/
- /* out: space id */
- byte* ptr) /* in: pointer to within a buffer frame */
+ const byte* ptr) /*!< in: pointer to the page */
{
- return(buf_block_get_space(buf_block_align(ptr)));
+ return(buf_block_get_page_zip(buf_block_align(ptr)));
}
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************************
+/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
pointer pointing to a buffer frame containing a file page. */
UNIV_INLINE
void
buf_ptr_get_fsp_addr(
/*=================*/
- byte* ptr, /* in: pointer to a buffer frame */
- ulint* space, /* out: space id */
- fil_addr_t* addr) /* out: page offset and byte offset */
+ const void* ptr, /*!< in: pointer to a buffer frame */
+ ulint* space, /*!< out: space id */
+ fil_addr_t* addr) /*!< out: page offset and byte offset */
{
- buf_block_t* block;
-
- block = buf_block_align(ptr);
+ const page_t* page = (const page_t*) ut_align_down(ptr,
+ UNIV_PAGE_SIZE);
- *space = buf_block_get_space(block);
- addr->page = buf_block_get_page_no(block);
- addr->boffset = ptr - buf_frame_align(ptr);
+ *space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
+ addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
}
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Gets the hash value of the page the pointer is pointing to. This can be used
-in searches in the lock hash table. */
+in searches in the lock hash table.
+@return lock hash value */
UNIV_INLINE
ulint
-buf_frame_get_lock_hash_val(
+buf_block_get_lock_hash_val(
/*========================*/
- /* out: lock hash value */
- byte* ptr) /* in: pointer to within a buffer frame */
+ const buf_block_t* block) /*!< in: block */
+{
+ return(block->lock_hash_val);
+}
+
+/********************************************************************//**
+Allocates a buffer block.
+@return own: the allocated block, in state BUF_BLOCK_MEMORY */
+UNIV_INLINE
+buf_block_t*
+buf_block_alloc(
+/*============*/
+ ulint zip_size) /*!< in: compressed page size in bytes,
+ or 0 if uncompressed tablespace */
{
buf_block_t* block;
- block = buf_block_align(ptr);
+ block = buf_LRU_get_free_block(zip_size);
- return(block->lock_hash_val);
+ buf_block_set_state(block, BUF_BLOCK_MEMORY);
+
+ return(block);
}
-/**************************************************************************
-Gets the mutex number protecting the page record lock hash chain in the lock
-table. */
+/********************************************************************//**
+Frees a buffer block which does not contain a file page. */
UNIV_INLINE
-mutex_t*
-buf_frame_get_mutex(
-/*================*/
- /* out: mutex */
- byte* ptr) /* in: pointer to within a buffer frame */
+void
+buf_block_free(
+/*===========*/
+ buf_block_t* block) /*!< in, own: block to be freed */
{
- buf_block_t* block;
+ buf_pool_mutex_enter();
+
+ mutex_enter(&block->mutex);
- block = buf_block_align(ptr);
+ ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
- return(&block->mutex);
+ buf_LRU_block_free_non_file_page(block);
+
+ mutex_exit(&block->mutex);
+
+ buf_pool_mutex_exit();
}
+#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
-Copies contents of a buffer frame to a given buffer. */
+/*********************************************************************//**
+Copies contents of a buffer frame to a given buffer.
+@return buf */
UNIV_INLINE
byte*
buf_frame_copy(
/*===========*/
- /* out: buf */
- byte* buf, /* in: buffer to copy to */
- buf_frame_t* frame) /* in: buffer frame */
+ byte* buf, /*!< in: buffer to copy to */
+ const buf_frame_t* frame) /*!< in: buffer frame */
{
ut_ad(buf && frame);
@@ -364,133 +766,76 @@ buf_frame_copy(
return(buf);
}
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
Calculates a folded value of a file page address to use in the page hash
-table. */
+table.
+@return the folded value */
UNIV_INLINE
ulint
buf_page_address_fold(
/*==================*/
- /* out: the folded value */
- ulint space, /* in: space id */
- ulint offset) /* in: offset of the page within space */
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: offset of the page within space */
{
return((space << 20) + space + offset);
}
-/************************************************************************
-This function is used to get info if there is an io operation
-going on on a buffer page. */
-UNIV_INLINE
-ibool
-buf_page_io_query(
-/*==============*/
- /* out: TRUE if io going on */
- buf_block_t* block) /* in: buf_pool block, must be bufferfixed */
-{
- mutex_enter(&(buf_pool->mutex));
-
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
-
- if (block->io_fix != 0) {
- mutex_exit(&(buf_pool->mutex));
-
- return(TRUE);
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(FALSE);
-}
-
-/************************************************************************
-Gets the youngest modification log sequence number for a frame. Returns zero
-if not a file page or no modification occurred yet. */
+/********************************************************************//**
+Gets the youngest modification log sequence number for a frame.
+Returns zero if not file page or no modification occurred yet.
+@return newest modification to page */
UNIV_INLINE
-dulint
-buf_frame_get_newest_modification(
-/*==============================*/
- /* out: newest modification to the page */
- buf_frame_t* frame) /* in: pointer to a frame */
+ib_uint64_t
+buf_page_get_newest_modification(
+/*=============================*/
+ const buf_page_t* bpage) /*!< in: block containing the
+ page frame */
{
- buf_block_t* block;
- dulint lsn;
-
- ut_ad(frame);
+ ib_uint64_t lsn;
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
- block = buf_block_align(frame);
+ mutex_enter(block_mutex);
- mutex_enter(&(buf_pool->mutex));
-
- if (block->state == BUF_BLOCK_FILE_PAGE) {
- lsn = block->newest_modification;
+ if (buf_page_in_file(bpage)) {
+ lsn = bpage->newest_modification;
} else {
- lsn = ut_dulint_zero;
+ lsn = 0;
}
- mutex_exit(&(buf_pool->mutex));
+ mutex_exit(block_mutex);
return(lsn);
}
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_frame_modify_clock_inc(
-/*=======================*/
- /* out: new value */
- buf_frame_t* frame) /* in: pointer to a frame */
-{
- buf_block_t* block;
-
- ut_ad(frame);
-
- block = buf_block_align(frame);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- UT_DULINT_INC(block->modify_clock);
-
- return(block->modify_clock);
-}
-
-/************************************************************************
+/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
-dulint
+void
buf_block_modify_clock_inc(
/*=======================*/
- /* out: new value */
- buf_block_t* block) /* in: block */
+ buf_block_t* block) /*!< in: block */
{
#ifdef UNIV_SYNC_DEBUG
- ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
+ ut_ad((buf_pool_mutex_own()
+ && (block->page.buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
- UT_DULINT_INC(block->modify_clock);
-
- return(block->modify_clock);
+ block->modify_clock++;
}
-/************************************************************************
+/********************************************************************//**
Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block. */
+or x-lock on the block.
+@return value */
UNIV_INLINE
-dulint
+ib_uint64_t
buf_block_get_modify_clock(
/*=======================*/
- /* out: value */
- buf_block_t* block) /* in: block */
+ buf_block_t* block) /*!< in: block */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
@@ -500,133 +845,192 @@ buf_block_get_modify_clock(
return(block->modify_clock);
}
-#ifdef UNIV_SYNC_DEBUG
-/***********************************************************************
+/*******************************************************************//**
Increments the bufferfix count. */
UNIV_INLINE
void
-buf_block_buf_fix_inc_debug(
-/*========================*/
- buf_block_t* block, /* in: block to bufferfix */
- const char* file __attribute__ ((unused)), /* in: file name */
- ulint line __attribute__ ((unused))) /* in: line */
+buf_block_buf_fix_inc_func(
+/*=======================*/
+#ifdef UNIV_SYNC_DEBUG
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line */
+#endif /* UNIV_SYNC_DEBUG */
+ buf_block_t* block) /*!< in/out: block to bufferfix */
{
+#ifdef UNIV_SYNC_DEBUG
ibool ret;
- ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
-
- ut_ad(ret == TRUE);
+ ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
+ ut_a(ret);
+#endif /* UNIV_SYNC_DEBUG */
ut_ad(mutex_own(&block->mutex));
- block->buf_fix_count++;
+
+ block->page.buf_fix_count++;
}
+#ifdef UNIV_SYNC_DEBUG
+/** Increments the bufferfix count.
+@param b in/out: block to bufferfix
+@param f in: file name where requested
+@param l in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
#else /* UNIV_SYNC_DEBUG */
-/***********************************************************************
-Increments the bufferfix count. */
+/** Increments the bufferfix count.
+@param b in/out: block to bufferfix
+@param f in: file name where requested
+@param l in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
+#endif /* UNIV_SYNC_DEBUG */
+
+/*******************************************************************//**
+Decrements the bufferfix count. */
UNIV_INLINE
void
-buf_block_buf_fix_inc(
+buf_block_buf_fix_dec(
/*==================*/
- buf_block_t* block) /* in: block to bufferfix */
+ buf_block_t* block) /*!< in/out: block to bufferunfix */
{
ut_ad(mutex_own(&block->mutex));
- block->buf_fix_count++;
+ block->page.buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_s_unlock(&block->debug_latch);
+#endif
}
-#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
-Returns the control block of a file page, NULL if not found. */
+
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+@return block, NULL if not found */
UNIV_INLINE
-buf_block_t*
+buf_page_t*
buf_page_hash_get(
/*==============*/
- /* out: block, NULL if not found */
- ulint space, /* in: space id */
- ulint offset) /* in: offset of the page within space */
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: offset of the page within space */
{
- buf_block_t* block;
+ buf_page_t* bpage;
ulint fold;
ut_ad(buf_pool);
- ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(buf_pool_mutex_own());
/* Look for the page in the hash table */
fold = buf_page_address_fold(space, offset);
- HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
- (block->space == space) && (block->offset == offset));
- ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE);
+ HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
+ ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
+ && buf_page_in_file(bpage)),
+ bpage->space == space && bpage->offset == offset);
+ if (bpage) {
+ ut_a(buf_page_in_file(bpage));
+ ut_ad(bpage->in_page_hash);
+ ut_ad(!bpage->in_zip_hash);
+ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+ }
- return(block);
+ return(bpage);
}
-/************************************************************************
-Tries to get the page, but if file io is required, releases all latches
-in mtr down to the given savepoint. If io is required, this function
-retrieves the page to buffer buf_pool, but does not bufferfix it or latch
-it. */
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found
+or an uncompressed page frame does not exist.
+@return block, NULL if not found */
UNIV_INLINE
-buf_frame_t*
-buf_page_get_release_on_io(
-/*=======================*/
- /* out: pointer to the frame, or NULL
- if not in buffer buf_pool */
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
- in units of a page */
- buf_frame_t* guess, /* in: guessed frame or NULL */
- ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH,
- or RW_NO_LATCH */
- ulint savepoint, /* in: mtr savepoint */
- mtr_t* mtr) /* in: mtr */
-{
- buf_frame_t* frame;
-
- frame = buf_page_get_gen(space, offset, rw_latch, guess,
- BUF_GET_IF_IN_POOL,
- __FILE__, __LINE__,
- mtr);
- if (frame != NULL) {
-
- return(frame);
- }
+buf_block_t*
+buf_block_hash_get(
+/*===============*/
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: offset of the page within space */
+{
+ return(buf_page_get_block(buf_page_hash_get(space, offset)));
+}
- /* The page was not in the buffer buf_pool: release the latches
- down to the savepoint */
+/********************************************************************//**
+Returns TRUE if the page can be found in the buffer pool hash table.
- mtr_rollback_to_savepoint(mtr, savepoint);
+NOTE that it is possible that the page is not yet read from disk,
+though.
- buf_page_get(space, offset, RW_S_LATCH, mtr);
+@return TRUE if found in the page hash table */
+UNIV_INLINE
+ibool
+buf_page_peek(
+/*==========*/
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: page number */
+{
+ const buf_page_t* bpage;
- /* When we get here, the page is in buffer, but we release
- the latches again down to the savepoint, before returning */
+ buf_pool_mutex_enter();
- mtr_rollback_to_savepoint(mtr, savepoint);
+ bpage = buf_page_hash_get(space, offset);
- return(NULL);
+ buf_pool_mutex_exit();
+
+ return(bpage != NULL);
}
-/************************************************************************
+/********************************************************************//**
+Releases a compressed-only page acquired with buf_page_get_zip(). */
+UNIV_INLINE
+void
+buf_page_release_zip(
+/*=================*/
+ buf_page_t* bpage) /*!< in: buffer block */
+{
+ buf_block_t* block;
+
+ ut_ad(bpage);
+ ut_a(bpage->buf_fix_count > 0);
+
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ mutex_enter(&buf_pool_zip_mutex);
+ bpage->buf_fix_count--;
+ mutex_exit(&buf_pool_zip_mutex);
+ return;
+ case BUF_BLOCK_FILE_PAGE:
+ block = (buf_block_t*) bpage;
+ mutex_enter(&block->mutex);
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_s_unlock(&block->debug_latch);
+#endif
+ bpage->buf_fix_count--;
+ mutex_exit(&block->mutex);
+ return;
+ case BUF_BLOCK_ZIP_FREE:
+ case BUF_BLOCK_NOT_USED:
+ case BUF_BLOCK_READY_FOR_USE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ break;
+ }
+
+ ut_error;
+}
+
+/********************************************************************//**
Decrements the bufferfix count of a buffer control block and releases
a latch, if specified. */
UNIV_INLINE
void
buf_page_release(
/*=============*/
- buf_block_t* block, /* in: buffer block */
- ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH,
+ buf_block_t* block, /*!< in: buffer block */
+ ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(block);
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(block->buf_fix_count > 0);
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_a(block->page.buf_fix_count > 0);
if (rw_latch == RW_X_LATCH && mtr->modifications) {
- mutex_enter(&buf_pool->mutex);
+ buf_pool_mutex_enter();
buf_flush_note_modification(block, mtr);
- mutex_exit(&buf_pool->mutex);
+ buf_pool_mutex_exit();
}
mutex_enter(&block->mutex);
@@ -634,7 +1038,7 @@ buf_page_release(
#ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch));
#endif
- block->buf_fix_count--;
+ block->page.buf_fix_count--;
mutex_exit(&block->mutex);
@@ -646,20 +1050,19 @@ buf_page_release(
}
#ifdef UNIV_SYNC_DEBUG
-/*************************************************************************
+/*********************************************************************//**
Adds latch level info for the rw-lock protecting the buffer frame. This
should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. If
-UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
+page if we know the latching order level of the acquired latch. */
UNIV_INLINE
void
-buf_page_dbg_add_level(
-/*===================*/
- buf_frame_t* frame __attribute__((unused)), /* in: buffer page
+buf_block_dbg_add_level(
+/*====================*/
+ buf_block_t* block, /*!< in: buffer page
where we have acquired latch */
- ulint level __attribute__((unused))) /* in: latching order
- level */
+ ulint level) /*!< in: latching order level */
{
- sync_thread_add_level(&(buf_block_align(frame)->lock), level);
+ sync_thread_add_level(&block->lock, level);
}
#endif /* UNIV_SYNC_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 322848509f4..6c751852f54 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer pool flush algorithm
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1995 Innobase Oy
+/**************************************************//**
+@file include/buf0flu.h
+The database buffer pool flush algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
@@ -10,64 +27,76 @@ Created 11/5/1995 Heikki Tuuri
#define buf0flu_h
#include "univ.i"
-#include "buf0types.h"
#include "ut0byte.h"
+#ifndef UNIV_HOTBACKUP
#include "mtr0types.h"
+#include "buf0types.h"
-/************************************************************************
+/********************************************************************//**
+Remove a block from the flush list of modified blocks. */
+UNIV_INTERN
+void
+buf_flush_remove(
+/*=============*/
+ buf_page_t* bpage); /*!< in: pointer to the block in question */
+/********************************************************************//**
Updates the flush system data structures when a write is completed. */
-
+UNIV_INTERN
void
buf_flush_write_complete(
/*=====================*/
- buf_block_t* block); /* in: pointer to the block in question */
-/*************************************************************************
+ buf_page_t* bpage); /*!< in: pointer to the block in question */
+/*********************************************************************//**
Flushes pages from the end of the LRU list if there is too small
a margin of replaceable pages there. */
-
+UNIV_INTERN
void
buf_flush_free_margin(void);
/*=======================*/
-/************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
Initializes a page for writing to the tablespace. */
-
+UNIV_INTERN
void
buf_flush_init_for_writing(
/*=======================*/
- byte* page, /* in: page */
- dulint newest_lsn, /* in: newest modification lsn to the page */
- ulint space, /* in: space id */
- ulint page_no); /* in: page number */
-/***********************************************************************
+ byte* page, /*!< in/out: page */
+ void* page_zip_, /*!< in/out: compressed page, or NULL */
+ ib_uint64_t newest_lsn); /*!< in: newest modification lsn
+ to the page */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-
+the calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued;
+ULINT_UNDEFINED if there was a flush of the same type already running */
+UNIV_INTERN
ulint
buf_flush_batch(
/*============*/
- /* out: number of blocks for which the write
- request was queued */
- ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
- BUF_FLUSH_LIST, then the caller must not own
- any latches on pages */
- ulint min_n, /* in: wished minimum mumber of blocks flushed
- (it is not guaranteed that the actual number
- is that big, though) */
- dulint lsn_limit); /* in the case BUF_FLUSH_LIST all blocks whose
- oldest_modification is smaller than this
- should be flushed (if their number does not
- exceed min_n), otherwise ignored */
-/**********************************************************************
+ enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
+ BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
+ then the caller must not own any
+ latches on pages */
+ ulint min_n, /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+ ib_uint64_t lsn_limit); /*!< in the case BUF_FLUSH_LIST all
+ blocks whose oldest_modification is
+ smaller than this should be flushed
+ (if their number does not exceed
+ min_n), otherwise ignored */
+/******************************************************************//**
Waits until a flush batch of the given type ends */
-
+UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
- ulint type); /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-/************************************************************************
+ enum buf_flush type); /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it not
already in it. */
@@ -75,43 +104,85 @@ UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
- buf_block_t* block, /* in: block which is modified */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
+ buf_block_t* block, /*!< in: block which is modified */
+ mtr_t* mtr); /*!< in: mtr */
+/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
- buf_block_t* block, /* in: block which is modified */
- dulint start_lsn, /* in: start lsn of the first mtr in a
+ buf_block_t* block, /*!< in: block which is modified */
+ ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
- dulint end_lsn); /* in: end lsn of the last mtr in the
+ ib_uint64_t end_lsn); /*!< in: end lsn of the last mtr in the
set of mtr's */
-/************************************************************************
+/********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., transition FILE_PAGE => NOT_USED allowed. */
+i.e., transition FILE_PAGE => NOT_USED allowed.
+@return TRUE if can replace immediately */
+UNIV_INTERN
ibool
buf_flush_ready_for_replace(
/*========================*/
- /* out: TRUE if can replace immediately */
- buf_block_t* block); /* in: buffer control block, must be in state
- BUF_BLOCK_FILE_PAGE and in the LRU list */
-/**********************************************************************
-Validates the flush list. */
+ buf_page_t* bpage); /*!< in: buffer control block, must be
+ buf_page_in_file(bpage) and in the LRU list */
+
+/** @brief Statistics for selecting flush rate based on redo log
+generation speed.
+
+These statistics are generated for heuristics used in estimating the
+rate at which we should flush the dirty blocks to avoid bursty IO
+activity. Note that the rate of flushing not only depends on how many
+dirty pages we have in the buffer pool but it is also a fucntion of
+how much redo the workload is generating and at what rate. */
+
+struct buf_flush_stat_struct
+{
+ ib_uint64_t redo; /**< amount of redo generated. */
+ ulint n_flushed; /**< number of pages flushed. */
+};
+/** Statistics for selecting flush rate of dirty pages. */
+typedef struct buf_flush_stat_struct buf_flush_stat_t;
+/*********************************************************************
+Update the historical stats that we are collecting for flush rate
+heuristics at the end of each interval. */
+UNIV_INTERN
+void
+buf_flush_stat_update(void);
+/*=======================*/
+/*********************************************************************
+Determines the fraction of dirty pages that need to be flushed based
+on the speed at which we generate redo log. Note that if redo log
+is generated at significant rate without a corresponding increase
+in the number of dirty pages (for example, an in-memory workload)
+it can cause IO bursts of flushing. This function implements heuristics
+to avoid this burstiness.
+@return number of dirty pages to be flushed / second */
+UNIV_INTERN
+ulint
+buf_flush_get_desired_flush_rate(void);
+/*==================================*/
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/******************************************************************//**
+Validates the flush list.
+@return TRUE if ok */
+UNIV_INTERN
ibool
buf_flush_validate(void);
/*====================*/
- /* out: TRUE if ok */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-/* When buf_flush_free_margin is called, it tries to make this many blocks
+/** When buf_flush_free_margin is called, it tries to make this many blocks
available to replacement in the free list and at the end of the LRU list (to
make sure that a read-ahead batch can be read efficiently in a single
sweep). */
-
#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
+/** Extra margin to apply above BUF_FLUSH_FREE_BLOCK_MARGIN */
#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
+#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "buf0flu.ic"
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
index ae873c42088..c90cd59e4b6 100644
--- a/storage/innobase/include/buf0flu.ic
+++ b/storage/innobase/include/buf0flu.ic
@@ -1,32 +1,50 @@
-/******************************************************
-The database buffer pool flush algorithm
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0flu.ic
+The database buffer pool flush algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
+#ifndef UNIV_HOTBACKUP
#include "buf0buf.h"
#include "mtr0mtr.h"
-/************************************************************************
+/********************************************************************//**
Inserts a modified block into the flush list. */
-
+UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
- buf_block_t* block); /* in: block which is modified */
-/************************************************************************
+ buf_block_t* block); /*!< in/out: block which is modified */
+/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
-
+UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
- buf_block_t* block); /* in: block which is modified */
+ buf_block_t* block); /*!< in/out: block which is modified */
-/************************************************************************
+/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it is not
already in it. */
@@ -34,73 +52,72 @@ UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
- buf_block_t* block, /* in: block which is modified */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* block, /*!< in: block which is modified */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(block);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(buf_pool->mutex)));
+ ut_ad(buf_pool_mutex_own());
- ut_ad(ut_dulint_cmp(mtr->start_lsn, ut_dulint_zero) != 0);
+ ut_ad(mtr->start_lsn != 0);
ut_ad(mtr->modifications);
- ut_ad(ut_dulint_cmp(block->newest_modification, mtr->end_lsn) <= 0);
+ ut_ad(block->page.newest_modification <= mtr->end_lsn);
- block->newest_modification = mtr->end_lsn;
+ block->page.newest_modification = mtr->end_lsn;
- if (ut_dulint_is_zero(block->oldest_modification)) {
+ if (!block->page.oldest_modification) {
- block->oldest_modification = mtr->start_lsn;
- ut_ad(!ut_dulint_is_zero(block->oldest_modification));
+ block->page.oldest_modification = mtr->start_lsn;
+ ut_ad(block->page.oldest_modification != 0);
buf_flush_insert_into_flush_list(block);
} else {
- ut_ad(ut_dulint_cmp(block->oldest_modification,
- mtr->start_lsn) <= 0);
+ ut_ad(block->page.oldest_modification <= mtr->start_lsn);
}
++srv_buf_pool_write_requests;
}
-/************************************************************************
+/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
- buf_block_t* block, /* in: block which is modified */
- dulint start_lsn, /* in: start lsn of the first mtr in a
+ buf_block_t* block, /*!< in: block which is modified */
+ ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
- dulint end_lsn) /* in: end lsn of the last mtr in the
+ ib_uint64_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
ut_ad(block);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- mutex_enter(&(buf_pool->mutex));
+ buf_pool_mutex_enter();
- ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
+ ut_ad(block->page.newest_modification <= end_lsn);
- block->newest_modification = end_lsn;
+ block->page.newest_modification = end_lsn;
- if (ut_dulint_is_zero(block->oldest_modification)) {
+ if (!block->page.oldest_modification) {
- block->oldest_modification = start_lsn;
+ block->page.oldest_modification = start_lsn;
- ut_ad(!ut_dulint_is_zero(block->oldest_modification));
+ ut_ad(block->page.oldest_modification != 0);
buf_flush_insert_sorted_into_flush_list(block);
} else {
- ut_ad(ut_dulint_cmp(block->oldest_modification,
- start_lsn) <= 0);
+ ut_ad(block->page.oldest_modification <= start_lsn);
}
- mutex_exit(&(buf_pool->mutex));
+ buf_pool_mutex_exit();
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 6d26fd4d3b2..009430af35b 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer pool LRU replacement algorithm
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0lru.h
+The database buffer pool LRU replacement algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
@@ -13,7 +30,19 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "buf0types.h"
-/**********************************************************************
+/** The return type of buf_LRU_free_block() */
+enum buf_lru_free_block_status {
+ /** freed */
+ BUF_LRU_FREED = 0,
+ /** not freed because the caller asked to remove the
+ uncompressed frame but the control block cannot be
+ relocated */
+ BUF_LRU_CANNOT_RELOCATE,
+ /** not freed because of some other reason */
+ BUF_LRU_NOT_FREED
+};
+
+/******************************************************************//**
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
operation, as flushed pages from non-unique non-clustered indexes are here
@@ -21,121 +50,243 @@ taken out of the buffer pool, and their inserts redirected to the insert
buffer. Otherwise, the flushed blocks could get modified again before read
operations need new buffer blocks, and the i/o work done in flushing would be
wasted. */
-
+UNIV_INTERN
void
buf_LRU_try_free_flushed_blocks(void);
/*==================================*/
-/**********************************************************************
+/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks. */
-
+pool for their locks.
+@return TRUE if less than 25 % of buffer pool left */
+UNIV_INTERN
ibool
buf_LRU_buf_pool_running_out(void);
/*==============================*/
- /* out: TRUE if less than 25 % of buffer pool
- left */
/*#######################################################################
These are low-level functions
#########################################################################*/
-/* Minimum LRU list length for which the LRU_old pointer is defined */
-
-#define BUF_LRU_OLD_MIN_LEN 80
+/** Minimum LRU list length for which the LRU_old pointer is defined */
+#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
+/** Maximum LRU list search length in buf_flush_LRU_recommendation() */
#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
-/**********************************************************************
+/******************************************************************//**
Invalidates all pages belonging to a given tablespace when we are deleting
the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
what guarantees that it will not try to read in pages after this operation has
completed? */
-
+UNIV_INTERN
void
buf_LRU_invalidate_tablespace(
/*==========================*/
- ulint id); /* in: space id */
-/**********************************************************************
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around. */
-
-ulint
-buf_LRU_get_recent_limit(void);
-/*==========================*/
- /* out: the limit; zero if could not determine it */
-/**********************************************************************
-Look for a replaceable block from the end of the LRU list and put it to
-the free list if found. */
+ ulint id); /*!< in: space id */
+/********************************************************************//**
+Insert a compressed block into buf_pool->zip_clean in the LRU order. */
+UNIV_INTERN
+void
+buf_LRU_insert_zip_clean(
+/*=====================*/
+ buf_page_t* bpage); /*!< in: pointer to the block in question */
+
+/******************************************************************//**
+Try to free a block. If bpage is a descriptor of a compressed-only
+page, the descriptor object will be freed as well.
+NOTE: If this function returns BUF_LRU_FREED, it will not temporarily
+release buf_pool_mutex. Furthermore, the page frame will no longer be
+accessible via bpage.
+
+The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
+release these two mutexes after the call. No other
+buf_page_get_mutex() may be held when calling this function.
+@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
+BUF_LRU_NOT_FREED otherwise. */
+UNIV_INTERN
+enum buf_lru_free_block_status
+buf_LRU_free_block(
+/*===============*/
+ buf_page_t* bpage, /*!< in: block to be freed */
+ ibool zip, /*!< in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+ ibool* buf_pool_mutex_released);
+ /*!< in: pointer to a variable that will
+ be assigned TRUE if buf_pool_mutex
+ was temporarily released, or NULL */
+/******************************************************************//**
+Try to free a replaceable block.
+@return TRUE if found and freed */
+UNIV_INTERN
ibool
buf_LRU_search_and_free_block(
/*==========================*/
- /* out: TRUE if freed */
- ulint n_iterations); /* in: how many times this has been called
+ ulint n_iterations); /*!< in: how many times this has been called
repeatedly without result: a high value means
- that we should search farther; if value is
- k < 10, then we only search k/10 * number
- of pages in the buffer pool from the end
- of the LRU list */
-/**********************************************************************
+ that we should search farther; if
+ n_iterations < 10, then we search
+ n_iterations / 10 * buf_pool->curr_size
+ pages from the end of the LRU list; if
+ n_iterations < 5, then we will also search
+ n_iterations / 5 of the unzip_LRU list. */
+/******************************************************************//**
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, returns NULL.
+@return a free control block, or NULL if the buf_block->free list is empty */
+UNIV_INTERN
+buf_block_t*
+buf_LRU_get_free_only(void);
+/*=======================*/
+/******************************************************************//**
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, blocks are moved from the end of the
-LRU list to the free list. */
-
+LRU list to the free list.
+@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
+UNIV_INTERN
buf_block_t*
-buf_LRU_get_free_block(void);
-/*=========================*/
- /* out: the free control block; also if AWE is
- used, it is guaranteed that the block has its
- page mapped to a frame when we return */
-/**********************************************************************
-Puts a block back to the free list. */
+buf_LRU_get_free_block(
+/*===================*/
+ ulint zip_size); /*!< in: compressed page size in bytes,
+ or 0 if uncompressed tablespace */
+/******************************************************************//**
+Puts a block back to the free list. */
+UNIV_INTERN
void
buf_LRU_block_free_non_file_page(
/*=============================*/
- buf_block_t* block); /* in: block, must not contain a file page */
-/**********************************************************************
+ buf_block_t* block); /*!< in: block, must not contain a file page */
+/******************************************************************//**
Adds a block to the LRU list. */
-
+UNIV_INTERN
void
buf_LRU_add_block(
/*==============*/
- buf_block_t* block, /* in: control block */
- ibool old); /* in: TRUE if should be put to the old
+ buf_page_t* bpage, /*!< in: control block */
+ ibool old); /*!< in: TRUE if should be put to the old
blocks in the LRU list, else put to the
start; if the LRU list is very short, added to
the start regardless of this parameter */
-/**********************************************************************
+/******************************************************************//**
+Adds a block to the LRU list of decompressed zip pages. */
+UNIV_INTERN
+void
+buf_unzip_LRU_add_block(
+/*====================*/
+ buf_block_t* block, /*!< in: control block */
+ ibool old); /*!< in: TRUE if should be put to the end
+ of the list, else put to the start */
+/******************************************************************//**
Moves a block to the start of the LRU list. */
-
+UNIV_INTERN
void
buf_LRU_make_block_young(
/*=====================*/
- buf_block_t* block); /* in: control block */
-/**********************************************************************
+ buf_page_t* bpage); /*!< in: control block */
+/******************************************************************//**
Moves a block to the end of the LRU list. */
-
+UNIV_INTERN
void
buf_LRU_make_block_old(
/*===================*/
- buf_block_t* block); /* in: control block */
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Validates the LRU list. */
+ buf_page_t* bpage); /*!< in: control block */
+/**********************************************************************//**
+Updates buf_LRU_old_ratio.
+@return updated old_pct */
+UNIV_INTERN
+uint
+buf_LRU_old_ratio_update(
+/*=====================*/
+ uint old_pct,/*!< in: Reserve this percentage of
+ the buffer pool for "old" blocks. */
+ ibool adjust);/*!< in: TRUE=adjust the LRU list;
+ FALSE=just assign buf_LRU_old_ratio
+ during the initialization of InnoDB */
+/********************************************************************//**
+Update the historical stats that we are collecting for LRU eviction
+policy at the end of each interval. */
+UNIV_INTERN
+void
+buf_LRU_stat_update(void);
+/*=====================*/
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Validates the LRU list.
+@return TRUE */
+UNIV_INTERN
ibool
buf_LRU_validate(void);
/*==================*/
-/**************************************************************************
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
Prints the LRU list. */
-
+UNIV_INTERN
void
buf_LRU_print(void);
/*===============*/
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+/** @name Heuristics for detecting index scan @{ */
+/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
+"old" blocks. Protected by buf_pool_mutex. */
+extern uint buf_LRU_old_ratio;
+/** The denominator of buf_LRU_old_ratio. */
+#define BUF_LRU_OLD_RATIO_DIV 1024
+/** Maximum value of buf_LRU_old_ratio.
+@see buf_LRU_old_adjust_len
+@see buf_LRU_old_ratio_update */
+#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
+/** Minimum value of buf_LRU_old_ratio.
+@see buf_LRU_old_adjust_len
+@see buf_LRU_old_ratio_update
+The minimum must exceed
+(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
+#define BUF_LRU_OLD_RATIO_MIN 51
+
+#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
+# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
+#endif
+#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
+# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
+#endif
+
+/** Move blocks to "new" LRU list only if the first access was at
+least this many milliseconds ago. Not protected by any mutex or latch. */
+extern uint buf_LRU_old_threshold_ms;
+/* @} */
+
+/** @brief Statistics for selecting the LRU list for eviction.
+
+These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
+and page_zip_decompress() operations. Based on the statistics we decide
+if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
+struct buf_LRU_stat_struct
+{
+ ulint io; /**< Counter of buffer pool I/O operations. */
+ ulint unzip; /**< Counter of page_zip_decompress operations. */
+};
+
+/** Statistics for selecting the LRU list for eviction. */
+typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
+
+/** Current operation counters. Not protected by any mutex.
+Cleared by buf_LRU_stat_update(). */
+extern buf_LRU_stat_t buf_LRU_stat_cur;
+
+/** Running sum of past values of buf_LRU_stat_cur.
+Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
+extern buf_LRU_stat_t buf_LRU_stat_sum;
+
+/********************************************************************//**
+Increments the I/O counter in buf_LRU_stat_cur. */
+#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
+/********************************************************************//**
+Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
+#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
#ifndef UNIV_NONINL
#include "buf0lru.ic"
diff --git a/storage/innobase/include/buf0lru.ic b/storage/innobase/include/buf0lru.ic
index 7b8ee457b0b..556f45d987f 100644
--- a/storage/innobase/include/buf0lru.ic
+++ b/storage/innobase/include/buf0lru.ic
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer replacement algorithm
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0lru.ic
+The database buffer replacement algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index e4620172860..093750623d6 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer read
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0rea.h
+The database buffer read
Created 11/5/1995 Heikki Tuuri
*******************************************************/
@@ -12,25 +29,24 @@ Created 11/5/1995 Heikki Tuuri
#include "univ.i"
#include "buf0types.h"
-/************************************************************************
+/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible. */
-
-ulint
+released by the i/o-handler thread.
+@return TRUE if page has been read in, FALSE in case of failure */
+UNIV_INTERN
+ibool
buf_read_page(
/*==========*/
- /* out: number of page read requests issued: this can
- be > 1 if read-ahead occurred */
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint offset);/*!< in: page number */
+/********************************************************************//**
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
-that the the algorithm looks at the 'natural' adjacent successor and
+that the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
@@ -48,57 +64,74 @@ function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io. */
-
+which could result in a deadlock if the OS does not support asynchronous io.
+@return number of page read requests issued */
+UNIV_INTERN
ulint
buf_read_ahead_linear(
/*==================*/
- /* out: number of page read requests issued */
- ulint space, /* in: space id */
- ulint offset);/* in: page number of a page; NOTE: the current thread
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint offset);/*!< in: page number of a page; NOTE: the current thread
must want access to this page (see NOTE 3 above) */
-/************************************************************************
+/********************************************************************//**
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
-
+UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
- ibool sync, /* in: TRUE if the caller wants this function
- to wait for the highest address page to get
- read in, before this function returns */
- ulint* space_ids, /* in: array of space ids */
- ib_longlong* space_versions,/* in: the spaces must have this version
- number (timestamp), otherwise we discard the
- read; we use this to cancel reads if
- DISCARD + IMPORT may have changed the
- tablespace size */
- ulint* page_nos, /* in: array of page numbers to read, with the
- highest page number the last in the array */
- ulint n_stored); /* in: number of page numbers in the array */
-/************************************************************************
+ ibool sync, /*!< in: TRUE if the caller
+ wants this function to wait
+ for the highest address page
+ to get read in, before this
+ function returns */
+ const ulint* space_ids, /*!< in: array of space ids */
+ const ib_int64_t* space_versions,/*!< in: the spaces must have
+ this version number
+ (timestamp), otherwise we
+ discard the read; we use this
+ to cancel reads if DISCARD +
+ IMPORT may have changed the
+ tablespace size */
+ const ulint* page_nos, /*!< in: array of page numbers
+ to read, with the highest page
+ number the last in the
+ array */
+ ulint n_stored); /*!< in: number of elements
+ in the arrays */
+/********************************************************************//**
Issues read requests for pages which recovery wants to read in. */
-
+UNIV_INTERN
void
buf_read_recv_pages(
/*================*/
- ibool sync, /* in: TRUE if the caller wants this function
- to wait for the highest address page to get
- read in, before this function returns */
- ulint space, /* in: space id */
- ulint* page_nos, /* in: array of page numbers to read, with the
- highest page number the last in the array */
- ulint n_stored); /* in: number of page numbers in the array */
+ ibool sync, /*!< in: TRUE if the caller
+ wants this function to wait
+ for the highest address page
+ to get read in, before this
+ function returns */
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in
+ bytes, or 0 */
+ const ulint* page_nos, /*!< in: array of page numbers
+ to read, with the highest page
+ number the last in the
+ array */
+ ulint n_stored); /*!< in: number of page numbers
+ in the array */
-/* The size in pages of the area which the read-ahead algorithms read if
+/** The size in pages of the area which the read-ahead algorithms read if
invoked */
-
#define BUF_READ_AHEAD_AREA \
ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
-/* Modes used in read-ahead */
+/** @name Modes used in read-ahead @{ */
+/** read only pages belonging to the insert buffer tree */
#define BUF_READ_IBUF_PAGES_ONLY 131
+/** read any page */
#define BUF_READ_ANY_PAGE 132
+/* @} */
#endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 44fdfa80e73..bfae6477135 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-The database buffer pool global types for the directory
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0types.h
+The database buffer pool global types for the directory
Created 11/17/1995 Heikki Tuuri
*******************************************************/
@@ -9,12 +26,57 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h
#define buf0types_h
+/** Buffer page (uncompressed or compressed) */
+typedef struct buf_page_struct buf_page_t;
+/** Buffer block for which an uncompressed page exists */
typedef struct buf_block_struct buf_block_t;
+/** Buffer pool chunk comprising buf_block_t */
+typedef struct buf_chunk_struct buf_chunk_t;
+/** Buffer pool comprising buf_chunk_t */
typedef struct buf_pool_struct buf_pool_t;
+/** Buffer pool statistics struct */
+typedef struct buf_pool_stat_struct buf_pool_stat_t;
-/* The 'type' used of a buffer frame */
+/** A buffer frame. @see page_t */
typedef byte buf_frame_t;
+/** Flags for flush types */
+enum buf_flush {
+ BUF_FLUSH_LRU = 0, /*!< flush via the LRU list */
+ BUF_FLUSH_SINGLE_PAGE, /*!< flush a single page */
+ BUF_FLUSH_LIST, /*!< flush via the flush list
+ of dirty blocks */
+ BUF_FLUSH_N_TYPES /*!< index of last element + 1 */
+};
+
+/** Flags for io_fix types */
+enum buf_io_fix {
+ BUF_IO_NONE = 0, /**< no pending I/O */
+ BUF_IO_READ, /**< read pending */
+ BUF_IO_WRITE /**< write pending */
+};
+
+/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
+/* @{ */
+#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
+/** Base-2 logarithm of the smallest buddy block size */
+# define BUF_BUDDY_LOW_SHIFT 6
+#else /* 64-bit system */
+/** Base-2 logarithm of the smallest buddy block size */
+# define BUF_BUDDY_LOW_SHIFT 7
+#endif
+#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
+ /*!< minimum block size in the binary
+ buddy system; must be at least
+ sizeof(buf_page_t) */
+#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
+ /*!< number of buddy sizes */
+
+/** twice the maximum block size of the buddy system;
+the underlying memory is aligned by this amount:
+this must be equal to UNIV_PAGE_SIZE */
+#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
+/* @} */
#endif
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index 40592c3c0ce..f9fce3f3657 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -1,7 +1,24 @@
-/************************************************************************
-SQL data field and tuple
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994-1996 Innobase Oy
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/data0data.h
+SQL data field and tuple
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
@@ -16,405 +33,447 @@ Created 5/30/1994 Heikki Tuuri
#include "mem0mem.h"
#include "dict0types.h"
+/** Storage for overflow data in a big record, that is, a clustered
+index record which needs external storage of data fields */
typedef struct big_rec_struct big_rec_t;
-/* Some non-inlined functions used in the MySQL interface: */
-void
-dfield_set_data_noninline(
- dfield_t* field, /* in: field */
- void* data, /* in: data */
- ulint len); /* in: length or UNIV_SQL_NULL */
-void*
-dfield_get_data_noninline(
- dfield_t* field); /* in: field */
-ulint
-dfield_get_len_noninline(
- dfield_t* field); /* in: field */
-ulint
-dtuple_get_n_fields_noninline(
- dtuple_t* tuple); /* in: tuple */
-dfield_t*
-dtuple_get_nth_field_noninline(
- dtuple_t* tuple, /* in: tuple */
- ulint n); /* in: index of field */
-
-/*************************************************************************
-Gets pointer to the type struct of SQL data field. */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets pointer to the type struct of SQL data field.
+@return pointer to the type struct */
UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
- /* out: pointer to the type struct */
- dfield_t* field); /* in: SQL data field */
-/*************************************************************************
+ const dfield_t* field); /*!< in: SQL data field */
+/*********************************************************************//**
+Gets pointer to the data in a field.
+@return pointer to data */
+UNIV_INLINE
+void*
+dfield_get_data(
+/*============*/
+ const dfield_t* field); /*!< in: field */
+#else /* UNIV_DEBUG */
+# define dfield_get_type(field) (&(field)->type)
+# define dfield_get_data(field) ((field)->data)
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
Sets the type struct of SQL data field. */
UNIV_INLINE
void
dfield_set_type(
/*============*/
- dfield_t* field, /* in: SQL data field */
- dtype_t* type); /* in: pointer to data type struct */
-/*************************************************************************
-Gets pointer to the data in a field. */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- /* out: pointer to data */
- dfield_t* field); /* in: field */
-/*************************************************************************
-Gets length of field data. */
+ dfield_t* field, /*!< in: SQL data field */
+ dtype_t* type); /*!< in: pointer to data type struct */
+/*********************************************************************//**
+Gets length of field data.
+@return length of data; UNIV_SQL_NULL if SQL null data */
UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
- /* out: length of data; UNIV_SQL_NULL if
- SQL null data */
- dfield_t* field); /* in: field */
-/*************************************************************************
+ const dfield_t* field); /*!< in: field */
+/*********************************************************************//**
Sets length in a field. */
UNIV_INLINE
void
dfield_set_len(
/*===========*/
- dfield_t* field, /* in: field */
- ulint len); /* in: length or UNIV_SQL_NULL */
-/*************************************************************************
+ dfield_t* field, /*!< in: field */
+ ulint len); /*!< in: length or UNIV_SQL_NULL */
+/*********************************************************************//**
+Determines if a field is SQL NULL
+@return nonzero if SQL null data */
+UNIV_INLINE
+ulint
+dfield_is_null(
+/*===========*/
+ const dfield_t* field); /*!< in: field */
+/*********************************************************************//**
+Determines if a field is externally stored
+@return nonzero if externally stored */
+UNIV_INLINE
+ulint
+dfield_is_ext(
+/*==========*/
+ const dfield_t* field); /*!< in: field */
+/*********************************************************************//**
+Sets the "external storage" flag */
+UNIV_INLINE
+void
+dfield_set_ext(
+/*===========*/
+ dfield_t* field); /*!< in/out: field */
+/*********************************************************************//**
Sets pointer to the data and length in a field. */
UNIV_INLINE
void
dfield_set_data(
/*============*/
- dfield_t* field, /* in: field */
- const void* data, /* in: data */
- ulint len); /* in: length or UNIV_SQL_NULL */
-/**************************************************************************
+ dfield_t* field, /*!< in: field */
+ const void* data, /*!< in: data */
+ ulint len); /*!< in: length or UNIV_SQL_NULL */
+/*********************************************************************//**
+Sets a data field to SQL NULL. */
+UNIV_INLINE
+void
+dfield_set_null(
+/*============*/
+ dfield_t* field); /*!< in/out: field */
+/**********************************************************************//**
Writes an SQL null field full of zeros. */
UNIV_INLINE
void
data_write_sql_null(
/*================*/
- byte* data, /* in: pointer to a buffer of size len */
- ulint len); /* in: SQL null size in bytes */
-/*************************************************************************
+ byte* data, /*!< in: pointer to a buffer of size len */
+ ulint len); /*!< in: SQL null size in bytes */
+/*********************************************************************//**
Copies the data and len fields. */
UNIV_INLINE
void
dfield_copy_data(
/*=============*/
- dfield_t* field1, /* in: field to copy to */
- dfield_t* field2);/* in: field to copy from */
-/*************************************************************************
+ dfield_t* field1, /*!< out: field to copy to */
+ const dfield_t* field2);/*!< in: field to copy from */
+/*********************************************************************//**
Copies a data field to another. */
UNIV_INLINE
void
dfield_copy(
/*========*/
- dfield_t* field1, /* in: field to copy to */
- dfield_t* field2);/* in: field to copy from */
-/*************************************************************************
-Tests if data length and content is equal for two dfields. */
+ dfield_t* field1, /*!< out: field to copy to */
+ const dfield_t* field2);/*!< in: field to copy from */
+/*********************************************************************//**
+Copies the data pointed to by a data field. */
+UNIV_INLINE
+void
+dfield_dup(
+/*=======*/
+ dfield_t* field, /*!< in/out: data field */
+ mem_heap_t* heap); /*!< in: memory heap where allocated */
+/*********************************************************************//**
+Tests if data length and content is equal for two dfields.
+@return TRUE if equal */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
- /* out: TRUE if equal */
- dfield_t* field1, /* in: field */
- dfield_t* field2);/* in: field */
-/*************************************************************************
-Tests if dfield data length and content is equal to the given. */
-
+ const dfield_t* field1, /*!< in: field */
+ const dfield_t* field2);/*!< in: field */
+/*********************************************************************//**
+Tests if dfield data length and content is equal to the given.
+@return TRUE if equal */
+UNIV_INTERN
ibool
dfield_data_is_binary_equal(
/*========================*/
- /* out: TRUE if equal */
- dfield_t* field, /* in: field */
- ulint len, /* in: data length or UNIV_SQL_NULL */
- byte* data); /* in: data */
-/*************************************************************************
-Gets number of fields in a data tuple. */
+ const dfield_t* field, /*!< in: field */
+ ulint len, /*!< in: data length or UNIV_SQL_NULL */
+ const byte* data); /*!< in: data */
+/*********************************************************************//**
+Gets number of fields in a data tuple.
+@return number of fields */
UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
- /* out: number of fields */
- dtuple_t* tuple); /* in: tuple */
-/*************************************************************************
-Gets nth field of a tuple. */
+ const dtuple_t* tuple); /*!< in: tuple */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets nth field of a tuple.
+@return nth field */
UNIV_INLINE
dfield_t*
dtuple_get_nth_field(
/*=================*/
- /* out: nth field */
- dtuple_t* tuple, /* in: tuple */
- ulint n); /* in: index of field */
-/*************************************************************************
-Gets info bits in a data tuple. */
+ const dtuple_t* tuple, /*!< in: tuple */
+ ulint n); /*!< in: index of field */
+#else /* UNIV_DEBUG */
+# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Gets info bits in a data tuple.
+@return info bits */
UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
- /* out: info bits */
- dtuple_t* tuple); /* in: tuple */
-/*************************************************************************
+ const dtuple_t* tuple); /*!< in: tuple */
+/*********************************************************************//**
Sets info bits in a data tuple. */
UNIV_INLINE
void
dtuple_set_info_bits(
/*=================*/
- dtuple_t* tuple, /* in: tuple */
- ulint info_bits); /* in: info bits */
-/*************************************************************************
-Gets number of fields used in record comparisons. */
+ dtuple_t* tuple, /*!< in: tuple */
+ ulint info_bits); /*!< in: info bits */
+/*********************************************************************//**
+Gets number of fields used in record comparisons.
+@return number of fields used in comparisons in rem0cmp.* */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
- /* out: number of fields used in comparisons
- in rem0cmp.* */
- dtuple_t* tuple); /* in: tuple */
-/*************************************************************************
+ const dtuple_t* tuple); /*!< in: tuple */
+/*********************************************************************//**
Gets number of fields used in record comparisons. */
UNIV_INLINE
void
dtuple_set_n_fields_cmp(
/*====================*/
- dtuple_t* tuple, /* in: tuple */
- ulint n_fields_cmp); /* in: number of fields used in
+ dtuple_t* tuple, /*!< in: tuple */
+ ulint n_fields_cmp); /*!< in: number of fields used in
comparisons in rem0cmp.* */
-/**************************************************************
+/**********************************************************//**
Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields. */
+of fields used in record comparisons for this tuple is n_fields.
+@return own: created tuple */
UNIV_INLINE
dtuple_t*
dtuple_create(
/*==========*/
- /* out, own: created tuple */
- mem_heap_t* heap, /* in: memory heap where the tuple
+ mem_heap_t* heap, /*!< in: memory heap where the tuple
is created */
- ulint n_fields); /* in: number of fields */
-
-/*************************************************************************
-Creates a dtuple for use in MySQL. */
+ ulint n_fields); /*!< in: number of fields */
-dtuple_t*
-dtuple_create_for_mysql(
-/*====================*/
- /* out, own created dtuple */
- void** heap, /* out: created memory heap */
- ulint n_fields); /* in: number of fields */
-/*************************************************************************
-Frees a dtuple used in MySQL. */
+/**********************************************************//**
+Wrap data fields in a tuple. The default value for number
+of fields used in record comparisons for this tuple is n_fields.
+@return data tuple */
+UNIV_INLINE
+const dtuple_t*
+dtuple_from_fields(
+/*===============*/
+ dtuple_t* tuple, /*!< in: storage for data tuple */
+ const dfield_t* fields, /*!< in: fields */
+ ulint n_fields); /*!< in: number of fields */
-void
-dtuple_free_for_mysql(
-/*==================*/
- void* heap);
-/*************************************************************************
+/*********************************************************************//**
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
-
+UNIV_INTERN
void
dtuple_set_n_fields(
/*================*/
- dtuple_t* tuple, /* in: tuple */
- ulint n_fields); /* in: number of fields */
-/**************************************************************
+ dtuple_t* tuple, /*!< in: tuple */
+ ulint n_fields); /*!< in: number of fields */
+/*********************************************************************//**
+Copies a data tuple to another. This is a shallow copy; if a deep copy
+is desired, dfield_dup() will have to be invoked on each field.
+@return own: copy of tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_copy(
+/*========*/
+ const dtuple_t* tuple, /*!< in: tuple to copy from */
+ mem_heap_t* heap); /*!< in: memory heap
+ where the tuple is created */
+/**********************************************************//**
The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted. */
+occupied by the field structs or the tuple struct is not counted.
+@return sum of data lens */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
- /* out: sum of data lens */
- dtuple_t* tuple); /* in: typed data tuple */
-/****************************************************************
-Returns TRUE if lengths of two dtuples are equal and respective data fields
-in them are equal when compared with collation in char fields (not as binary
-strings). */
-
-ibool
-dtuple_datas_are_ordering_equal(
-/*============================*/
- /* out: TRUE if length and fieds are equal
- when compared with cmp_data_data:
- NOTE: in character type fields some letters
- are identified with others! (collation) */
- dtuple_t* tuple1, /* in: tuple 1 */
- dtuple_t* tuple2);/* in: tuple 2 */
-/****************************************************************
-Folds a prefix given as the number of fields of a tuple. */
+ const dtuple_t* tuple, /*!< in: typed data tuple */
+ ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
+/*********************************************************************//**
+Computes the number of externally stored fields in a data tuple.
+@return number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_ext(
+/*=============*/
+ const dtuple_t* tuple); /*!< in: tuple */
+/************************************************************//**
+Compare two data tuples, respecting the collation of character fields.
+@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
+than tuple2 */
+UNIV_INTERN
+int
+dtuple_coll_cmp(
+/*============*/
+ const dtuple_t* tuple1, /*!< in: tuple 1 */
+ const dtuple_t* tuple2);/*!< in: tuple 2 */
+/************************************************************//**
+Folds a prefix given as the number of fields of a tuple.
+@return the folded value */
UNIV_INLINE
ulint
dtuple_fold(
/*========*/
- /* out: the folded value */
- dtuple_t* tuple, /* in: the tuple */
- ulint n_fields,/* in: number of complete fields to fold */
- ulint n_bytes,/* in: number of bytes to fold in an
+ const dtuple_t* tuple, /*!< in: the tuple */
+ ulint n_fields,/*!< in: number of complete fields to fold */
+ ulint n_bytes,/*!< in: number of bytes to fold in an
incomplete last field */
- dulint tree_id);/* in: index tree id */
-/***********************************************************************
+ dulint tree_id)/*!< in: index tree id */
+ __attribute__((pure));
+/*******************************************************************//**
Sets types of fields binary in a tuple. */
UNIV_INLINE
void
dtuple_set_types_binary(
/*====================*/
- dtuple_t* tuple, /* in: data tuple */
- ulint n); /* in: number of fields to set */
-/**************************************************************************
-Checks if a dtuple contains an SQL null value. */
+ dtuple_t* tuple, /*!< in: data tuple */
+ ulint n); /*!< in: number of fields to set */
+/**********************************************************************//**
+Checks if a dtuple contains an SQL null value.
+@return TRUE if some field is SQL null */
UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
- /* out: TRUE if some field is SQL null */
- dtuple_t* tuple); /* in: dtuple */
-/**************************************************************
-Checks that a data field is typed. Asserts an error if not. */
-
+ const dtuple_t* tuple); /*!< in: dtuple */
+/**********************************************************//**
+Checks that a data field is typed. Asserts an error if not.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dfield_check_typed(
/*===============*/
- /* out: TRUE if ok */
- dfield_t* field); /* in: data field */
-/**************************************************************
-Checks that a data tuple is typed. Asserts an error if not. */
-
+ const dfield_t* field); /*!< in: data field */
+/**********************************************************//**
+Checks that a data tuple is typed. Asserts an error if not.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dtuple_check_typed(
/*===============*/
- /* out: TRUE if ok */
- dtuple_t* tuple); /* in: tuple */
-/**************************************************************
-Checks that a data tuple is typed. */
-
+ const dtuple_t* tuple); /*!< in: tuple */
+/**********************************************************//**
+Checks that a data tuple is typed.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dtuple_check_typed_no_assert(
/*=========================*/
- /* out: TRUE if ok */
- dtuple_t* tuple); /* in: tuple */
+ const dtuple_t* tuple); /*!< in: tuple */
#ifdef UNIV_DEBUG
-/**************************************************************
+/**********************************************************//**
Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set. */
-
+all fields must have been set.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dtuple_validate(
/*============*/
- /* out: TRUE if ok */
- dtuple_t* tuple); /* in: tuple */
+ const dtuple_t* tuple); /*!< in: tuple */
#endif /* UNIV_DEBUG */
-/*****************************************************************
+/*************************************************************//**
Pretty prints a dfield value according to its data type. */
-
+UNIV_INTERN
void
dfield_print(
/*=========*/
- dfield_t* dfield);/* in: dfield */
-/*****************************************************************
+ const dfield_t* dfield);/*!< in: dfield */
+/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
-
+UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
- dfield_t* dfield); /* in: dfield */
-/**************************************************************
+ const dfield_t* dfield); /*!< in: dfield */
+/**********************************************************//**
The following function prints the contents of a tuple. */
-
+UNIV_INTERN
void
dtuple_print(
/*=========*/
- FILE* f, /* in: output stream */
- dtuple_t* tuple); /* in: tuple */
-/******************************************************************
+ FILE* f, /*!< in: output stream */
+ const dtuple_t* tuple); /*!< in: tuple */
+/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index. */
-
+to determine uniquely the insertion place of the tuple in the index.
+@return own: created big record vector, NULL if we are not able to
+shorten the entry enough, i.e., if there are too many fixed-length or
+short fields in entry or the index is clustered */
+UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
- /* out, own: created big record vector,
- NULL if we are not able to shorten
- the entry enough, i.e., if there are
- too many short fields in entry */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint* ext_vec,/* in: array of externally stored fields,
- or NULL: if a field already is externally
- stored, then we cannot move it to the vector
- this function returns */
- ulint n_ext_vec);/* in: number of elements is ext_vec */
-/******************************************************************
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in/out: index entry */
+ ulint* n_ext); /*!< in/out: number of
+ externally stored columns */
+/**************************************************************//**
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
-
+UNIV_INTERN
void
dtuple_convert_back_big_rec(
/*========================*/
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: entry whose data was put to vector */
- big_rec_t* vector);/* in, own: big rec vector; it is
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in: entry whose data was put to vector */
+ big_rec_t* vector);/*!< in, own: big rec vector; it is
freed in this function */
-/******************************************************************
+/**************************************************************//**
Frees the memory in a big rec vector. */
-
+UNIV_INLINE
void
dtuple_big_rec_free(
/*================*/
- big_rec_t* vector); /* in, own: big rec vector; it is
+ big_rec_t* vector); /*!< in, own: big rec vector; it is
freed in this function */
/*######################################################################*/
-/* Structure for an SQL data field */
+/** Structure for an SQL data field */
struct dfield_struct{
- void* data; /* pointer to data */
- ulint len; /* data length; UNIV_SQL_NULL if SQL null; */
- dtype_t type; /* type of data */
+ void* data; /*!< pointer to data */
+ unsigned ext:1; /*!< TRUE=externally stored, FALSE=local */
+ unsigned len:32; /*!< data length; UNIV_SQL_NULL if SQL null */
+ dtype_t type; /*!< type of data */
};
+/** Structure for an SQL data tuple of fields (logical record) */
struct dtuple_struct {
- ulint info_bits; /* info bits of an index record:
+ ulint info_bits; /*!< info bits of an index record:
the default is 0; this field is used
if an index record is built from
a data tuple */
- ulint n_fields; /* number of fields in dtuple */
- ulint n_fields_cmp; /* number of fields which should
+ ulint n_fields; /*!< number of fields in dtuple */
+ ulint n_fields_cmp; /*!< number of fields which should
be used in comparison services
of rem0cmp.*; the index search
is performed by comparing only these
fields, others are ignored; the
default value in dtuple creation is
the same value as n_fields */
- dfield_t* fields; /* fields */
+ dfield_t* fields; /*!< fields */
UT_LIST_NODE_T(dtuple_t) tuple_list;
- /* data tuples can be linked into a
+ /*!< data tuples can be linked into a
list using this field */
- ulint magic_n;
+#ifdef UNIV_DEBUG
+ ulint magic_n; /*!< magic number, used in
+ debug assertions */
+/** Value of dtuple_struct::magic_n */
+# define DATA_TUPLE_MAGIC_N 65478679
+#endif /* UNIV_DEBUG */
};
-#define DATA_TUPLE_MAGIC_N 65478679
-
-/* A slot for a field in a big rec vector */
+/** A slot for a field in a big rec vector */
typedef struct big_rec_field_struct big_rec_field_t;
+/** A slot for a field in a big rec vector */
struct big_rec_field_struct {
- ulint field_no; /* field number in record */
- ulint len; /* stored data len */
- byte* data; /* stored data */
+ ulint field_no; /*!< field number in record */
+ ulint len; /*!< stored data length, in bytes */
+ const void* data; /*!< stored data */
};
-/* Storage format for overflow data in a big record, that is, a record
-which needs external storage of data fields */
-
+/** Storage format for overflow data in a big record, that is, a
+clustered index record which needs external storage of data fields */
struct big_rec_struct {
- mem_heap_t* heap; /* memory heap from which allocated */
- ulint n_fields; /* number of stored fields */
- big_rec_field_t* fields; /* stored fields */
+ mem_heap_t* heap; /*!< memory heap from which
+ allocated */
+ ulint n_fields; /*!< number of stored fields */
+ big_rec_field_t*fields; /*!< stored fields */
};
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index 753fa9ba45f..da79aa33702 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -1,7 +1,24 @@
-/************************************************************************
-SQL data field and tuple
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1994-1996 Innobase Oy
+/********************************************************************//**
+@file include/data0data.ic
+SQL data field and tuple
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
@@ -10,62 +27,66 @@ Created 5/30/1994 Heikki Tuuri
#include "ut0rnd.h"
#ifdef UNIV_DEBUG
+/** Dummy variable to catch access to uninitialized fields. In the
+debug version, dtuple_create() will make all fields of dtuple_t point
+to data_error. */
extern byte data_error;
-#endif /* UNIV_DEBUG */
-/*************************************************************************
-Gets pointer to the type struct of SQL data field. */
+/*********************************************************************//**
+Gets pointer to the type struct of SQL data field.
+@return pointer to the type struct */
UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
- /* out: pointer to the type struct */
- dfield_t* field) /* in: SQL data field */
+ const dfield_t* field) /*!< in: SQL data field */
{
ut_ad(field);
- return(&(field->type));
+ return((dtype_t*) &(field->type));
}
+#endif /* UNIV_DEBUG */
-/*************************************************************************
+/*********************************************************************//**
Sets the type struct of SQL data field. */
UNIV_INLINE
void
dfield_set_type(
/*============*/
- dfield_t* field, /* in: SQL data field */
- dtype_t* type) /* in: pointer to data type struct */
+ dfield_t* field, /*!< in: SQL data field */
+ dtype_t* type) /*!< in: pointer to data type struct */
{
ut_ad(field && type);
field->type = *type;
}
-/*************************************************************************
-Gets pointer to the data in a field. */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets pointer to the data in a field.
+@return pointer to data */
UNIV_INLINE
void*
dfield_get_data(
/*============*/
- /* out: pointer to data */
- dfield_t* field) /* in: field */
+ const dfield_t* field) /*!< in: field */
{
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
|| (field->data != &data_error));
- return(field->data);
+ return((void*) field->data);
}
+#endif /* UNIV_DEBUG */
-/*************************************************************************
-Gets length of field data. */
+/*********************************************************************//**
+Gets length of field data.
+@return length of data; UNIV_SQL_NULL if SQL null data */
UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
- /* out: length of data; UNIV_SQL_NULL if
- SQL null data */
- dfield_t* field) /* in: field */
+ const dfield_t* field) /*!< in: field */
{
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
@@ -74,139 +95,208 @@ dfield_get_len(
return(field->len);
}
-/*************************************************************************
+/*********************************************************************//**
Sets length in a field. */
UNIV_INLINE
void
dfield_set_len(
/*===========*/
- dfield_t* field, /* in: field */
- ulint len) /* in: length or UNIV_SQL_NULL */
+ dfield_t* field, /*!< in: field */
+ ulint len) /*!< in: length or UNIV_SQL_NULL */
{
ut_ad(field);
+#ifdef UNIV_VALGRIND_DEBUG
+ if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
+ field->ext = 0;
field->len = len;
}
-/*************************************************************************
+/*********************************************************************//**
+Determines if a field is SQL NULL
+@return nonzero if SQL null data */
+UNIV_INLINE
+ulint
+dfield_is_null(
+/*===========*/
+ const dfield_t* field) /*!< in: field */
+{
+ ut_ad(field);
+
+ return(field->len == UNIV_SQL_NULL);
+}
+
+/*********************************************************************//**
+Determines if a field is externally stored
+@return nonzero if externally stored */
+UNIV_INLINE
+ulint
+dfield_is_ext(
+/*==========*/
+ const dfield_t* field) /*!< in: field */
+{
+ ut_ad(field);
+
+ return(UNIV_UNLIKELY(field->ext));
+}
+
+/*********************************************************************//**
+Sets the "external storage" flag */
+UNIV_INLINE
+void
+dfield_set_ext(
+/*===========*/
+ dfield_t* field) /*!< in/out: field */
+{
+ ut_ad(field);
+
+ field->ext = 1;
+}
+
+/*********************************************************************//**
Sets pointer to the data and length in a field. */
UNIV_INLINE
void
dfield_set_data(
/*============*/
- dfield_t* field, /* in: field */
- const void* data, /* in: data */
- ulint len) /* in: length or UNIV_SQL_NULL */
+ dfield_t* field, /*!< in: field */
+ const void* data, /*!< in: data */
+ ulint len) /*!< in: length or UNIV_SQL_NULL */
{
ut_ad(field);
+#ifdef UNIV_VALGRIND_DEBUG
+ if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
field->data = (void*) data;
+ field->ext = 0;
field->len = len;
}
-/*************************************************************************
+/*********************************************************************//**
+Sets a data field to SQL NULL. */
+UNIV_INLINE
+void
+dfield_set_null(
+/*============*/
+ dfield_t* field) /*!< in/out: field */
+{
+ dfield_set_data(field, NULL, UNIV_SQL_NULL);
+}
+
+/*********************************************************************//**
Copies the data and len fields. */
UNIV_INLINE
void
dfield_copy_data(
/*=============*/
- dfield_t* field1, /* in: field to copy to */
- dfield_t* field2) /* in: field to copy from */
+ dfield_t* field1, /*!< out: field to copy to */
+ const dfield_t* field2) /*!< in: field to copy from */
{
ut_ad(field1 && field2);
field1->data = field2->data;
field1->len = field2->len;
+ field1->ext = field2->ext;
}
-/*************************************************************************
+/*********************************************************************//**
Copies a data field to another. */
UNIV_INLINE
void
dfield_copy(
/*========*/
- dfield_t* field1, /* in: field to copy to */
- dfield_t* field2) /* in: field to copy from */
+ dfield_t* field1, /*!< out: field to copy to */
+ const dfield_t* field2) /*!< in: field to copy from */
{
*field1 = *field2;
}
-/*************************************************************************
-Tests if data length and content is equal for two dfields. */
+/*********************************************************************//**
+Copies the data pointed to by a data field. */
+UNIV_INLINE
+void
+dfield_dup(
+/*=======*/
+ dfield_t* field, /*!< in/out: data field */
+ mem_heap_t* heap) /*!< in: memory heap where allocated */
+{
+ if (!dfield_is_null(field)) {
+ UNIV_MEM_ASSERT_RW(field->data, field->len);
+ field->data = mem_heap_dup(heap, field->data, field->len);
+ }
+}
+
+/*********************************************************************//**
+Tests if data length and content is equal for two dfields.
+@return TRUE if equal */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
- /* out: TRUE if equal */
- dfield_t* field1, /* in: field */
- dfield_t* field2) /* in: field */
+ const dfield_t* field1, /*!< in: field */
+ const dfield_t* field2) /*!< in: field */
{
ulint len;
len = field1->len;
- if ((len != field2->len)
- || ((len != UNIV_SQL_NULL)
- && (0 != ut_memcmp(field1->data, field2->data,
- len)))) {
-
- return(FALSE);
- }
-
- return(TRUE);
+ return(len == field2->len
+ && (len == UNIV_SQL_NULL
+ || !memcmp(field1->data, field2->data, len)));
}
-/*************************************************************************
-Gets info bits in a data tuple. */
+/*********************************************************************//**
+Gets info bits in a data tuple.
+@return info bits */
UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
- /* out: info bits */
- dtuple_t* tuple) /* in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
{
ut_ad(tuple);
return(tuple->info_bits);
}
-/*************************************************************************
+/*********************************************************************//**
Sets info bits in a data tuple. */
UNIV_INLINE
void
dtuple_set_info_bits(
/*=================*/
- dtuple_t* tuple, /* in: tuple */
- ulint info_bits) /* in: info bits */
+ dtuple_t* tuple, /*!< in: tuple */
+ ulint info_bits) /*!< in: info bits */
{
ut_ad(tuple);
tuple->info_bits = info_bits;
}
-/*************************************************************************
-Gets number of fields used in record comparisons. */
+/*********************************************************************//**
+Gets number of fields used in record comparisons.
+@return number of fields used in comparisons in rem0cmp.* */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
- /* out: number of fields used in comparisons
- in rem0cmp.* */
- dtuple_t* tuple) /* in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
{
ut_ad(tuple);
return(tuple->n_fields_cmp);
}
-/*************************************************************************
+/*********************************************************************//**
Sets number of fields used in record comparisons. */
UNIV_INLINE
void
dtuple_set_n_fields_cmp(
/*====================*/
- dtuple_t* tuple, /* in: tuple */
- ulint n_fields_cmp) /* in: number of fields used in
+ dtuple_t* tuple, /*!< in: tuple */
+ ulint n_fields_cmp) /*!< in: number of fields used in
comparisons in rem0cmp.* */
{
ut_ad(tuple);
@@ -215,47 +305,49 @@ dtuple_set_n_fields_cmp(
tuple->n_fields_cmp = n_fields_cmp;
}
-/*************************************************************************
-Gets number of fields in a data tuple. */
+/*********************************************************************//**
+Gets number of fields in a data tuple.
+@return number of fields */
UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
- /* out: number of fields */
- dtuple_t* tuple) /* in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
{
ut_ad(tuple);
return(tuple->n_fields);
}
-/*************************************************************************
-Gets nth field of a tuple. */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets nth field of a tuple.
+@return nth field */
UNIV_INLINE
dfield_t*
dtuple_get_nth_field(
/*=================*/
- /* out: nth field */
- dtuple_t* tuple, /* in: tuple */
- ulint n) /* in: index of field */
+ const dtuple_t* tuple, /*!< in: tuple */
+ ulint n) /*!< in: index of field */
{
ut_ad(tuple);
ut_ad(n < tuple->n_fields);
- return(tuple->fields + n);
+ return((dfield_t*) tuple->fields + n);
}
+#endif /* UNIV_DEBUG */
-/**************************************************************
+/**********************************************************//**
Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields. */
+of fields used in record comparisons for this tuple is n_fields.
+@return own: created tuple */
UNIV_INLINE
dtuple_t*
dtuple_create(
/*==========*/
- /* out, own: created tuple */
- mem_heap_t* heap, /* in: memory heap where the tuple
+ mem_heap_t* heap, /*!< in: memory heap where the tuple
is created */
- ulint n_fields) /* in: number of fields */
+ ulint n_fields) /*!< in: number of fields */
{
dtuple_t* tuple;
@@ -266,7 +358,7 @@ dtuple_create(
tuple->info_bits = 0;
tuple->n_fields = n_fields;
tuple->n_fields_cmp = n_fields;
- tuple->fields = (dfield_t*)(((byte*)tuple) + sizeof(dtuple_t));
+ tuple->fields = (dfield_t*) &tuple[1];
#ifdef UNIV_DEBUG
tuple->magic_n = DATA_TUPLE_MAGIC_N;
@@ -275,26 +367,78 @@ dtuple_create(
ulint i;
for (i = 0; i < n_fields; i++) {
- (tuple->fields + i)->data = &data_error;
- dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR;
+ dfield_t* field;
+
+ field = dtuple_get_nth_field(tuple, i);
+
+ dfield_set_len(field, UNIV_SQL_NULL);
+ field->data = &data_error;
+ dfield_get_type(field)->mtype = DATA_ERROR;
}
}
+
+ UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
#endif
return(tuple);
}
-/**************************************************************
+/**********************************************************//**
+Wrap data fields in a tuple. The default value for number
+of fields used in record comparisons for this tuple is n_fields.
+@return data tuple */
+UNIV_INLINE
+const dtuple_t*
+dtuple_from_fields(
+/*===============*/
+ dtuple_t* tuple, /*!< in: storage for data tuple */
+ const dfield_t* fields, /*!< in: fields */
+ ulint n_fields) /*!< in: number of fields */
+{
+ tuple->info_bits = 0;
+ tuple->n_fields = tuple->n_fields_cmp = n_fields;
+ tuple->fields = (dfield_t*) fields;
+ ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
+
+ return(tuple);
+}
+
+/*********************************************************************//**
+Copies a data tuple to another. This is a shallow copy; if a deep copy
+is desired, dfield_dup() will have to be invoked on each field.
+@return own: copy of tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_copy(
+/*========*/
+ const dtuple_t* tuple, /*!< in: tuple to copy from */
+ mem_heap_t* heap) /*!< in: memory heap
+ where the tuple is created */
+{
+ ulint n_fields = dtuple_get_n_fields(tuple);
+ dtuple_t* new_tuple = dtuple_create(heap, n_fields);
+ ulint i;
+
+ for (i = 0; i < n_fields; i++) {
+ dfield_copy(dtuple_get_nth_field(new_tuple, i),
+ dtuple_get_nth_field(tuple, i));
+ }
+
+ return(new_tuple);
+}
+
+/**********************************************************//**
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted. Neither
-is possible space in externally stored parts of the field. */
+is possible space in externally stored parts of the field.
+@return sum of data lengths */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
- /* out: sum of data lengths */
- dtuple_t* tuple) /* in: typed data tuple */
+ const dtuple_t* tuple, /*!< in: typed data tuple */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
- dfield_t* field;
+ const dfield_t* field;
ulint n_fields;
ulint len;
ulint i;
@@ -311,7 +455,8 @@ dtuple_get_data_size(
len = dfield_get_len(field);
if (len == UNIV_SQL_NULL) {
- len = dtype_get_sql_null_size(dfield_get_type(field));
+ len = dtype_get_sql_null_size(dfield_get_type(field),
+ comp);
}
sum += len;
@@ -320,14 +465,38 @@ dtuple_get_data_size(
return(sum);
}
-/***********************************************************************
+/*********************************************************************//**
+Computes the number of externally stored fields in a data tuple.
+@return number of externally stored fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_ext(
+/*=============*/
+ const dtuple_t* tuple) /*!< in: tuple */
+{
+ ulint n_ext = 0;
+ ulint n_fields = tuple->n_fields;
+ ulint i;
+
+ ut_ad(tuple);
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+
+ for (i = 0; i < n_fields; i++) {
+ n_ext += dtuple_get_nth_field(tuple, i)->ext;
+ }
+
+ return(n_ext);
+}
+
+/*******************************************************************//**
Sets types of fields binary in a tuple. */
UNIV_INLINE
void
dtuple_set_types_binary(
/*====================*/
- dtuple_t* tuple, /* in: data tuple */
- ulint n) /* in: number of fields to set */
+ dtuple_t* tuple, /*!< in: data tuple */
+ ulint n) /*!< in: number of fields to set */
{
dtype_t* dfield_type;
ulint i;
@@ -338,22 +507,22 @@ dtuple_set_types_binary(
}
}
-/****************************************************************
-Folds a prefix given as the number of fields of a tuple. */
+/************************************************************//**
+Folds a prefix given as the number of fields of a tuple.
+@return the folded value */
UNIV_INLINE
ulint
dtuple_fold(
/*========*/
- /* out: the folded value */
- dtuple_t* tuple, /* in: the tuple */
- ulint n_fields,/* in: number of complete fields to fold */
- ulint n_bytes,/* in: number of bytes to fold in an
+ const dtuple_t* tuple, /*!< in: the tuple */
+ ulint n_fields,/*!< in: number of complete fields to fold */
+ ulint n_bytes,/*!< in: number of bytes to fold in an
incomplete last field */
- dulint tree_id)/* in: index tree id */
+ dulint tree_id)/*!< in: index tree id */
{
- dfield_t* field;
+ const dfield_t* field;
ulint i;
- byte* data;
+ const byte* data;
ulint len;
ulint fold;
@@ -366,7 +535,7 @@ dtuple_fold(
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
- data = (byte*) dfield_get_data(field);
+ data = (const byte*) dfield_get_data(field);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
@@ -378,7 +547,7 @@ dtuple_fold(
if (n_bytes > 0) {
field = dtuple_get_nth_field(tuple, i);
- data = (byte*) dfield_get_data(field);
+ data = (const byte*) dfield_get_data(field);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
@@ -394,30 +563,26 @@ dtuple_fold(
return(fold);
}
-/**************************************************************************
+/**********************************************************************//**
Writes an SQL null field full of zeros. */
UNIV_INLINE
void
data_write_sql_null(
/*================*/
- byte* data, /* in: pointer to a buffer of size len */
- ulint len) /* in: SQL null size in bytes */
+ byte* data, /*!< in: pointer to a buffer of size len */
+ ulint len) /*!< in: SQL null size in bytes */
{
- ulint j;
-
- for (j = 0; j < len; j++) {
- data[j] = '\0';
- }
+ memset(data, 0, len);
}
-/**************************************************************************
-Checks if a dtuple contains an SQL null value. */
+/**********************************************************************//**
+Checks if a dtuple contains an SQL null value.
+@return TRUE if some field is SQL null */
UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
- /* out: TRUE if some field is SQL null */
- dtuple_t* tuple) /* in: dtuple */
+ const dtuple_t* tuple) /*!< in: dtuple */
{
ulint n;
ulint i;
@@ -425,8 +590,7 @@ dtuple_contains_null(
n = dtuple_get_n_fields(tuple);
for (i = 0; i < n; i++) {
- if (dfield_get_len(dtuple_get_nth_field(tuple, i))
- == UNIV_SQL_NULL) {
+ if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
return(TRUE);
}
@@ -434,3 +598,15 @@ dtuple_contains_null(
return(FALSE);
}
+
+/**************************************************************//**
+Frees the memory in a big rec vector. */
+UNIV_INLINE
+void
+dtuple_big_rec_free(
+/*================*/
+ big_rec_t* vector) /*!< in, own: big rec vector; it is
+ freed in this function */
+{
+ mem_heap_free(vector->heap);
+}
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index e5e9c5076be..a73bed3a9f5 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -1,7 +1,24 @@
-/******************************************************
-Data types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/data0type.h
+Data types
Created 1/16/1996 Heikki Tuuri
*******************************************************/
@@ -122,6 +139,8 @@ be less than 256 */
#define DATA_N_SYS_COLS 3 /* number of system columns defined above */
+#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
+
/* Flags ORed to the precise data type */
#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
the column is declared as NOT NULL */
@@ -149,225 +168,240 @@ SQL null*/
store the charset-collation number; one byte is left unused, though */
#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
-/*************************************************************************
-Gets the MySQL type code from a dtype. */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the MySQL type code from a dtype.
+@return MySQL type code; this is NOT an InnoDB type code! */
UNIV_INLINE
ulint
dtype_get_mysql_type(
/*=================*/
- /* out: MySQL type code; this is NOT an InnoDB
- type code! */
- dtype_t* type); /* in: type struct */
-/*************************************************************************
+ const dtype_t* type); /*!< in: type struct */
+/*********************************************************************//**
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy. */
-
+the characters in the string occupy.
+@return length of the prefix, in bytes */
+UNIV_INTERN
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
- /* out: length of the prefix,
- in bytes */
- ulint prtype, /* in: precise type */
- ulint mbminlen, /* in: minimum length of a
+ ulint prtype, /*!< in: precise type */
+ ulint mbminlen, /*!< in: minimum length of a
multi-byte character */
- ulint mbmaxlen, /* in: maximum length of a
+ ulint mbmaxlen, /*!< in: maximum length of a
multi-byte character */
- ulint prefix_len, /* in: length of the requested
+ ulint prefix_len, /*!< in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
- ulint data_len, /* in: length of str (in bytes) */
- const char* str); /* in: the string whose prefix
+ ulint data_len, /*!< in: length of str (in bytes) */
+ const char* str); /*!< in: the string whose prefix
length is being determined */
-/*************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
Checks if a data main type is a string type. Also a BLOB is considered a
-string type. */
-
+string type.
+@return TRUE if string type */
+UNIV_INTERN
ibool
dtype_is_string_type(
/*=================*/
- /* out: TRUE if string type */
- ulint mtype); /* in: InnoDB main data type code: DATA_CHAR, ... */
-/*************************************************************************
+ ulint mtype); /*!< in: InnoDB main data type code: DATA_CHAR, ... */
+/*********************************************************************//**
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE. */
-
+those DATA_BLOB columns this function currently returns FALSE.
+@return TRUE if binary string type */
+UNIV_INTERN
ibool
dtype_is_binary_string_type(
/*========================*/
- /* out: TRUE if binary string type */
- ulint mtype, /* in: main data type */
- ulint prtype);/* in: precise type */
-/*************************************************************************
+ ulint mtype, /*!< in: main data type */
+ ulint prtype);/*!< in: precise type */
+/*********************************************************************//**
Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE. */
-
+For those DATA_BLOB columns this function currently returns TRUE.
+@return TRUE if non-binary string type */
+UNIV_INTERN
ibool
dtype_is_non_binary_string_type(
/*============================*/
- /* out: TRUE if non-binary string type */
- ulint mtype, /* in: main data type */
- ulint prtype);/* in: precise type */
-/*************************************************************************
+ ulint mtype, /*!< in: main data type */
+ ulint prtype);/*!< in: precise type */
+/*********************************************************************//**
Sets a data type structure. */
UNIV_INLINE
void
dtype_set(
/*======*/
- dtype_t* type, /* in: type struct to init */
- ulint mtype, /* in: main data type */
- ulint prtype, /* in: precise type */
- ulint len); /* in: precision of type */
-/*************************************************************************
+ dtype_t* type, /*!< in: type struct to init */
+ ulint mtype, /*!< in: main data type */
+ ulint prtype, /*!< in: precise type */
+ ulint len); /*!< in: precision of type */
+/*********************************************************************//**
Copies a data type structure. */
UNIV_INLINE
void
dtype_copy(
/*=======*/
- dtype_t* type1, /* in: type struct to copy to */
- const dtype_t* type2); /* in: type struct to copy from */
-/*************************************************************************
-Gets the SQL main data type. */
+ dtype_t* type1, /*!< in: type struct to copy to */
+ const dtype_t* type2); /*!< in: type struct to copy from */
+/*********************************************************************//**
+Gets the SQL main data type.
+@return SQL main data type */
UNIV_INLINE
ulint
dtype_get_mtype(
/*============*/
- dtype_t* type);
-/*************************************************************************
-Gets the precise data type. */
+ const dtype_t* type); /*!< in: data type */
+/*********************************************************************//**
+Gets the precise data type.
+@return precise data type */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
- dtype_t* type);
-/*************************************************************************
+ const dtype_t* type); /*!< in: data type */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_get_mblen(
/*============*/
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type (and collation) */
- ulint* mbminlen, /* out: minimum length of a
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type (and collation) */
+ ulint* mbminlen, /*!< out: minimum length of a
multi-byte character */
- ulint* mbmaxlen); /* out: maximum length of a
+ ulint* mbmaxlen); /*!< out: maximum length of a
multi-byte character */
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-
-ulint
-dtype_get_charset_coll_noninline(
-/*=============================*/
- ulint prtype);/* in: precise data type */
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
+/*********************************************************************//**
+Gets the MySQL charset-collation code for MySQL string types.
+@return MySQL charset-collation code */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
- ulint prtype);/* in: precise data type */
-/*************************************************************************
+ ulint prtype);/*!< in: precise data type */
+/*********************************************************************//**
Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code. */
-
+charset-collation code.
+@return precise type, including the charset-collation code */
+UNIV_INTERN
ulint
dtype_form_prtype(
/*==============*/
- ulint old_prtype, /* in: the MySQL type code and the flags
+ ulint old_prtype, /*!< in: the MySQL type code and the flags
DATA_BINARY_TYPE etc. */
- ulint charset_coll); /* in: MySQL charset-collation code */
-/*************************************************************************
-Gets the type length. */
+ ulint charset_coll); /*!< in: MySQL charset-collation code */
+/*********************************************************************//**
+Determines if a MySQL string type is a subset of UTF-8. This function
+may return false negatives, in case further character-set collation
+codes are introduced in MySQL later.
+@return TRUE if a subset of UTF-8 */
+UNIV_INLINE
+ibool
+dtype_is_utf8(
+/*==========*/
+ ulint prtype);/*!< in: precise data type */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Gets the type length.
+@return fixed length of the type, in bytes, or 0 if variable-length */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
- dtype_t* type);
-/*************************************************************************
-Gets the minimum length of a character, in bytes. */
+ const dtype_t* type); /*!< in: data type */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the minimum length of a character, in bytes.
+@return minimum length of a char, in bytes, or 0 if this is not a
+character type */
UNIV_INLINE
ulint
dtype_get_mbminlen(
/*===============*/
- /* out: minimum length of a char, in bytes,
- or 0 if this is not a character type */
- const dtype_t* type); /* in: type */
-/*************************************************************************
-Gets the maximum length of a character, in bytes. */
+ const dtype_t* type); /*!< in: type */
+/*********************************************************************//**
+Gets the maximum length of a character, in bytes.
+@return maximum length of a char, in bytes, or 0 if this is not a
+character type */
UNIV_INLINE
ulint
dtype_get_mbmaxlen(
/*===============*/
- /* out: maximum length of a char, in bytes,
- or 0 if this is not a character type */
- const dtype_t* type); /* in: type */
-/*************************************************************************
-Gets the padding character code for the type. */
+ const dtype_t* type); /*!< in: type */
+/*********************************************************************//**
+Gets the padding character code for the type.
+@return padding character code, or ULINT_UNDEFINED if no padding specified */
UNIV_INLINE
ulint
dtype_get_pad_char(
/*===============*/
- /* out: padding character code, or
- ULINT_UNDEFINED if no padding specified */
- ulint mtype, /* in: main type */
- ulint prtype); /* in: precise type */
-/***************************************************************************
-Returns the size of a fixed size data type, 0 if not a fixed size type. */
+ ulint mtype, /*!< in: main type */
+ ulint prtype); /*!< in: precise type */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
+Returns the size of a fixed size data type, 0 if not a fixed size type.
+@return fixed size, or 0 */
UNIV_INLINE
ulint
dtype_get_fixed_size_low(
/*=====================*/
- /* out: fixed size, or 0 */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- ulint len, /* in: length */
- ulint mbminlen, /* in: minimum length of a multibyte char */
- ulint mbmaxlen); /* in: maximum length of a multibyte char */
-/***************************************************************************
-Returns the minimum size of a data type. */
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ ulint len, /*!< in: length */
+ ulint mbminlen, /*!< in: minimum length of a multibyte char */
+ ulint mbmaxlen, /*!< in: maximum length of a multibyte char */
+ ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of a data type.
+@return minimum size */
UNIV_INLINE
ulint
dtype_get_min_size_low(
/*===================*/
- /* out: minimum size */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- ulint len, /* in: length */
- ulint mbminlen, /* in: minimum length of a multibyte char */
- ulint mbmaxlen); /* in: maximum length of a multibyte char */
-/***************************************************************************
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ ulint len, /*!< in: length */
+ ulint mbminlen, /*!< in: minimum length of a multibyte char */
+ ulint mbmaxlen); /*!< in: maximum length of a multibyte char */
+/***********************************************************************//**
Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information. */
+incomplete and return incorrect information.
+@return maximum size */
UNIV_INLINE
ulint
dtype_get_max_size_low(
/*===================*/
- /* out: maximum size */
- ulint mtype, /* in: main type */
- ulint len); /* in: length */
-/***************************************************************************
+ ulint mtype, /*!< in: main type */
+ ulint len); /*!< in: length */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0. */
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
- /* out: SQL null storage size
- in ROW_FORMAT=REDUNDANT */
- const dtype_t* type); /* in: type */
-/**************************************************************************
+ const dtype_t* type, /*!< in: type */
+ ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
- dtype_t* type, /* in: type struct */
- byte* buf); /* in: buffer for the stored order info */
-/**************************************************************************
+ dtype_t* type, /*!< in: type struct */
+ const byte* buf); /*!< in: buffer for the stored order info */
+/**********************************************************************//**
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
@@ -375,13 +409,13 @@ UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
- byte* buf, /* in: buffer for
+ byte* buf, /*!< in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
- dtype_t* type, /* in: type struct */
- ulint prefix_len);/* in: prefix length to
+ const dtype_t* type, /*!< in: type struct */
+ ulint prefix_len);/*!< in: prefix length to
replace type->len, or 0 */
-/**************************************************************************
+/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
format. */
@@ -389,24 +423,25 @@ UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
- dtype_t* type, /* in: type struct */
- byte* buf); /* in: buffer for stored type order info */
-
-/*************************************************************************
-Validates a data type structure. */
-
+ dtype_t* type, /*!< in: type struct */
+ const byte* buf); /*!< in: buffer for stored type order info */
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Validates a data type structure.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dtype_validate(
/*===========*/
- /* out: TRUE if ok */
- dtype_t* type); /* in: type struct to validate */
-/*************************************************************************
+ const dtype_t* type); /*!< in: type struct to validate */
+/*********************************************************************//**
Prints a data type structure. */
-
+UNIV_INTERN
void
dtype_print(
/*========*/
- dtype_t* type); /* in: type */
+ const dtype_t* type); /*!< in: type */
/* Structure for an SQL data type.
If you add fields to this structure, be sure to initialize them everywhere.
@@ -417,8 +452,8 @@ dtype_new_read_for_order_and_null_size()
sym_tab_add_null_lit() */
struct dtype_struct{
- unsigned mtype:8; /* main data type */
- unsigned prtype:24; /* precise type; MySQL data
+ unsigned mtype:8; /*!< main data type */
+ unsigned prtype:24; /*!< precise type; MySQL data
type, charset code, flags to
indicate nullability,
signedness, whether this is a
@@ -428,7 +463,7 @@ struct dtype_struct{
/* the remaining fields do not affect alphabetical ordering: */
- unsigned len:16; /* length; for MySQL data this
+ unsigned len:16; /*!< length; for MySQL data this
is field->pack_length(),
except that for a >= 5.0.3
type true VARCHAR this is the
@@ -436,11 +471,12 @@ struct dtype_struct{
string data (in addition to
the string, MySQL uses 1 or 2
bytes to store the string length) */
-
- unsigned mbminlen:2; /* minimum length of a
+#ifndef UNIV_HOTBACKUP
+ unsigned mbminlen:2; /*!< minimum length of a
character, in bytes */
- unsigned mbmaxlen:3; /* maximum length of a
+ unsigned mbmaxlen:3; /*!< maximum length of a
character, in bytes */
+#endif /* !UNIV_HOTBACKUP */
};
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index ad0f95755d2..240b4288f39 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -1,88 +1,112 @@
-/******************************************************
-Data types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/data0type.ic
+Data types
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
-
#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Get the variable length bounds of the given character set.
+# include "ha_prototypes.h"
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_get_cset_width(
-/*====================*/
- ulint cset, /* in: MySQL charset-collation code */
- ulint* mbminlen, /* out: minimum length of a char (in bytes) */
- ulint* mbmaxlen); /* out: maximum length of a char (in bytes) */
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
+/*********************************************************************//**
+Gets the MySQL charset-collation code for MySQL string types.
+@return MySQL charset-collation code */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
- ulint prtype) /* in: precise data type */
+ ulint prtype) /*!< in: precise data type */
{
return((prtype >> 16) & 0xFFUL);
}
-/*************************************************************************
-Gets the MySQL type code from a dtype. */
+/*********************************************************************//**
+Determines if a MySQL string type is a subset of UTF-8. This function
+may return false negatives, in case further character-set collation
+codes are introduced in MySQL later.
+@return TRUE if a subset of UTF-8 */
+UNIV_INLINE
+ibool
+dtype_is_utf8(
+/*==========*/
+ ulint prtype) /*!< in: precise data type */
+{
+ /* These codes have been copied from strings/ctype-extra.c
+ and strings/ctype-utf8.c. */
+ switch (dtype_get_charset_coll(prtype)) {
+ case 11: /* ascii_general_ci */
+ case 65: /* ascii_bin */
+ case 33: /* utf8_general_ci */
+ case 83: /* utf8_bin */
+ case 254: /* utf8_general_cs */
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Gets the MySQL type code from a dtype.
+@return MySQL type code; this is NOT an InnoDB type code! */
UNIV_INLINE
ulint
dtype_get_mysql_type(
/*=================*/
- /* out: MySQL type code; this is NOT an InnoDB
- type code! */
- dtype_t* type) /* in: type struct */
+ const dtype_t* type) /*!< in: type struct */
{
return(type->prtype & 0xFFUL);
}
-/*************************************************************************
+/*********************************************************************//**
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_get_mblen(
/*============*/
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type (and collation) */
- ulint* mbminlen, /* out: minimum length of a
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type (and collation) */
+ ulint* mbminlen, /*!< out: minimum length of a
multi-byte character */
- ulint* mbmaxlen) /* out: maximum length of a
+ ulint* mbmaxlen) /*!< out: maximum length of a
multi-byte character */
{
if (dtype_is_string_type(mtype)) {
-#ifndef UNIV_HOTBACKUP
innobase_get_cset_width(dtype_get_charset_coll(prtype),
mbminlen, mbmaxlen);
ut_ad(*mbminlen <= *mbmaxlen);
ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
-#else /* !UNIV_HOTBACKUP */
- ut_a(mtype <= DATA_BINARY);
- *mbminlen = *mbmaxlen = 1;
-#endif /* !UNIV_HOTBACKUP */
} else {
*mbminlen = *mbmaxlen = 0;
}
}
-/*************************************************************************
+/*********************************************************************//**
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_set_mblen(
/*============*/
- dtype_t* type) /* in/out: type */
+ dtype_t* type) /*!< in/out: type */
{
ulint mbminlen;
ulint mbmaxlen;
@@ -93,17 +117,20 @@ dtype_set_mblen(
ut_ad(dtype_validate(type));
}
+#else /* !UNIV_HOTBACKUP */
+# define dtype_set_mblen(type) (void) 0
+#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
+/*********************************************************************//**
Sets a data type structure. */
UNIV_INLINE
void
dtype_set(
/*======*/
- dtype_t* type, /* in: type struct to init */
- ulint mtype, /* in: main data type */
- ulint prtype, /* in: precise type */
- ulint len) /* in: precision of type */
+ dtype_t* type, /*!< in: type struct to init */
+ ulint mtype, /*!< in: main data type */
+ ulint prtype, /*!< in: precise type */
+ ulint len) /*!< in: precision of type */
{
ut_ad(type);
ut_ad(mtype <= DATA_MTYPE_MAX);
@@ -115,96 +142,99 @@ dtype_set(
dtype_set_mblen(type);
}
-/*************************************************************************
+/*********************************************************************//**
Copies a data type structure. */
UNIV_INLINE
void
dtype_copy(
/*=======*/
- dtype_t* type1, /* in: type struct to copy to */
- const dtype_t* type2) /* in: type struct to copy from */
+ dtype_t* type1, /*!< in: type struct to copy to */
+ const dtype_t* type2) /*!< in: type struct to copy from */
{
*type1 = *type2;
ut_ad(dtype_validate(type1));
}
-/*************************************************************************
-Gets the SQL main data type. */
+/*********************************************************************//**
+Gets the SQL main data type.
+@return SQL main data type */
UNIV_INLINE
ulint
dtype_get_mtype(
/*============*/
- dtype_t* type)
+ const dtype_t* type) /*!< in: data type */
{
ut_ad(type);
return(type->mtype);
}
-/*************************************************************************
-Gets the precise data type. */
+/*********************************************************************//**
+Gets the precise data type.
+@return precise data type */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
- dtype_t* type)
+ const dtype_t* type) /*!< in: data type */
{
ut_ad(type);
return(type->prtype);
}
-/*************************************************************************
-Gets the type length. */
+/*********************************************************************//**
+Gets the type length.
+@return fixed length of the type, in bytes, or 0 if variable-length */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
- dtype_t* type)
+ const dtype_t* type) /*!< in: data type */
{
ut_ad(type);
return(type->len);
}
-/*************************************************************************
-Gets the minimum length of a character, in bytes. */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the minimum length of a character, in bytes.
+@return minimum length of a char, in bytes, or 0 if this is not a
+character type */
UNIV_INLINE
ulint
dtype_get_mbminlen(
/*===============*/
- /* out: minimum length of a char, in bytes,
- or 0 if this is not a character type */
- const dtype_t* type) /* in: type */
+ const dtype_t* type) /*!< in: type */
{
ut_ad(type);
return(type->mbminlen);
}
-/*************************************************************************
-Gets the maximum length of a character, in bytes. */
+/*********************************************************************//**
+Gets the maximum length of a character, in bytes.
+@return maximum length of a char, in bytes, or 0 if this is not a
+character type */
UNIV_INLINE
ulint
dtype_get_mbmaxlen(
/*===============*/
- /* out: maximum length of a char, in bytes,
- or 0 if this is not a character type */
- const dtype_t* type) /* in: type */
+ const dtype_t* type) /*!< in: type */
{
ut_ad(type);
return(type->mbmaxlen);
}
-/*************************************************************************
-Gets the padding character code for a type. */
+/*********************************************************************//**
+Gets the padding character code for a type.
+@return padding character code, or ULINT_UNDEFINED if no padding specified */
UNIV_INLINE
ulint
dtype_get_pad_char(
/*===============*/
- /* out: padding character code, or
- ULINT_UNDEFINED if no padding specified */
- ulint mtype, /* in: main type */
- ulint prtype) /* in: precise type */
+ ulint mtype, /*!< in: main type */
+ ulint prtype) /*!< in: precise type */
{
switch (mtype) {
case DATA_FIXBINARY:
@@ -235,7 +265,7 @@ dtype_get_pad_char(
}
}
-/**************************************************************************
+/**********************************************************************//**
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
@@ -243,11 +273,11 @@ UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
- byte* buf, /* in: buffer for
+ byte* buf, /*!< in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
- dtype_t* type, /* in: type struct */
- ulint prefix_len)/* in: prefix length to
+ const dtype_t* type, /*!< in: type struct */
+ ulint prefix_len)/*!< in: prefix length to
replace type->len, or 0 */
{
#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
@@ -280,7 +310,7 @@ dtype_new_store_for_order_and_null_size(
}
}
-/**************************************************************************
+/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the < 4.1.x
storage format. */
@@ -288,8 +318,8 @@ UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
- dtype_t* type, /* in: type struct */
- byte* buf) /* in: buffer for stored type order info */
+ dtype_t* type, /*!< in: type struct */
+ const byte* buf) /*!< in: buffer for stored type order info */
{
#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
@@ -309,7 +339,7 @@ dtype_read_for_order_and_null_size(
dtype_set_mblen(type);
}
-/**************************************************************************
+/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
storage format. */
@@ -317,8 +347,8 @@ UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
- dtype_t* type, /* in: type struct */
- byte* buf) /* in: buffer for stored type order info */
+ dtype_t* type, /*!< in: type struct */
+ const byte* buf) /*!< in: buffer for stored type order info */
{
ulint charset_coll;
@@ -339,8 +369,6 @@ dtype_new_read_for_order_and_null_size(
type->len = mach_read_from_2(buf + 2);
- mach_read_from_2(buf + 4);
-
charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
if (dtype_is_string_type(type->mtype)) {
@@ -360,19 +388,21 @@ dtype_new_read_for_order_and_null_size(
}
dtype_set_mblen(type);
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************************
-Returns the size of a fixed size data type, 0 if not a fixed size type. */
+/***********************************************************************//**
+Returns the size of a fixed size data type, 0 if not a fixed size type.
+@return fixed size, or 0 */
UNIV_INLINE
ulint
dtype_get_fixed_size_low(
/*=====================*/
- /* out: fixed size, or 0 */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- ulint len, /* in: length */
- ulint mbminlen, /* in: minimum length of a multibyte char */
- ulint mbmaxlen) /* in: maximum length of a multibyte char */
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ ulint len, /*!< in: length */
+ ulint mbminlen, /*!< in: minimum length of a multibyte char */
+ ulint mbmaxlen, /*!< in: maximum length of a multibyte char */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
switch (mtype) {
case DATA_SYS:
@@ -399,14 +429,12 @@ dtype_get_fixed_size_low(
case DATA_DOUBLE:
return(len);
case DATA_MYSQL:
+#ifndef UNIV_HOTBACKUP
if (prtype & DATA_BINARY_TYPE) {
return(len);
+ } else if (!comp) {
+ return(len);
} else {
-#ifdef UNIV_HOTBACKUP
- if (mbminlen == mbmaxlen) {
- return(len);
- }
-#else /* UNIV_HOTBACKUP */
/* We play it safe here and ask MySQL for
mbminlen and mbmaxlen. Although
mbminlen and mbmaxlen are
@@ -438,8 +466,10 @@ dtype_get_fixed_size_low(
if (mbminlen == mbmaxlen) {
return(len);
}
-#endif /* !UNIV_HOTBACKUP */
}
+#else /* !UNIV_HOTBACKUP */
+ return(len);
+#endif /* !UNIV_HOTBACKUP */
/* fall through for variable-length charsets */
case DATA_VARCHAR:
case DATA_BINARY:
@@ -454,18 +484,19 @@ dtype_get_fixed_size_low(
return(0);
}
-/***************************************************************************
-Returns the minimum size of a data type. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of a data type.
+@return minimum size */
UNIV_INLINE
ulint
dtype_get_min_size_low(
/*===================*/
- /* out: minimum size */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- ulint len, /* in: length */
- ulint mbminlen, /* in: minimum length of a multibyte char */
- ulint mbmaxlen) /* in: maximum length of a multibyte char */
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ ulint len, /*!< in: length */
+ ulint mbminlen, /*!< in: minimum length of a multibyte char */
+ ulint mbmaxlen) /*!< in: maximum length of a multibyte char */
{
switch (mtype) {
case DATA_SYS:
@@ -513,16 +544,16 @@ dtype_get_min_size_low(
return(0);
}
-/***************************************************************************
+/***********************************************************************//**
Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information. */
+incomplete and return incorrect information.
+@return maximum size */
UNIV_INLINE
ulint
dtype_get_max_size_low(
/*===================*/
- /* out: maximum size */
- ulint mtype, /* in: main type */
- ulint len) /* in: length */
+ ulint mtype, /*!< in: main type */
+ ulint len) /*!< in: length */
{
switch (mtype) {
case DATA_SYS:
@@ -545,18 +576,24 @@ dtype_get_max_size_low(
return(ULINT_MAX);
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************************
+/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0. */
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
- /* out: SQL null storage size
- in ROW_FORMAT=REDUNDANT */
- const dtype_t* type) /* in: type */
+ const dtype_t* type, /*!< in: type */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
+#ifndef UNIV_HOTBACKUP
return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- type->mbminlen, type->mbmaxlen));
+ type->mbminlen, type->mbmaxlen, comp));
+#else /* !UNIV_HOTBACKUP */
+ return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
+ 0, 0, 0));
+#endif /* !UNIV_HOTBACKUP */
}
diff --git a/storage/innobase/include/data0types.h b/storage/innobase/include/data0types.h
index ab314f8f471..04e835bc401 100644
--- a/storage/innobase/include/data0types.h
+++ b/storage/innobase/include/data0types.h
@@ -1,7 +1,24 @@
-/************************************************************************
-Some type definitions
+/*****************************************************************************
+
+Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-2000 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/data0types.h
+Some type definitions
Created 9/21/2000 Heikki Tuuri
*************************************************************************/
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index ed7ce151718..747e9b5364e 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -1,7 +1,24 @@
-/******************************************************
-Global error codes for the database
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/db0err.h
+Global error codes for the database
Created 5/24/1996 Heikki Tuuri
*******************************************************/
@@ -10,71 +27,80 @@ Created 5/24/1996 Heikki Tuuri
#define db0err_h
-#define DB_SUCCESS 10
+enum db_err {
+ DB_SUCCESS = 10,
-/* The following are error codes */
-#define DB_ERROR 11
-#define DB_OUT_OF_MEMORY 12
-#define DB_OUT_OF_FILE_SPACE 13
-#define DB_LOCK_WAIT 14
-#define DB_DEADLOCK 15
-#define DB_ROLLBACK 16
-#define DB_DUPLICATE_KEY 17
-#define DB_QUE_THR_SUSPENDED 18
-#define DB_MISSING_HISTORY 19 /* required history data has been
+ /* The following are error codes */
+ DB_ERROR,
+ DB_INTERRUPTED,
+ DB_OUT_OF_MEMORY,
+ DB_OUT_OF_FILE_SPACE,
+ DB_LOCK_WAIT,
+ DB_DEADLOCK,
+ DB_ROLLBACK,
+ DB_DUPLICATE_KEY,
+ DB_QUE_THR_SUSPENDED,
+ DB_MISSING_HISTORY, /* required history data has been
deleted due to lack of space in
rollback segment */
-#define DB_CLUSTER_NOT_FOUND 30
-#define DB_TABLE_NOT_FOUND 31
-#define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped
+ DB_CLUSTER_NOT_FOUND = 30,
+ DB_TABLE_NOT_FOUND,
+ DB_MUST_GET_MORE_FILE_SPACE, /* the database has to be stopped
and restarted with more file space */
-#define DB_TABLE_IS_BEING_USED 33
-#define DB_TOO_BIG_RECORD 34 /* a record in an index would become
- bigger than 1/2 free space in a page
- frame */
-#define DB_LOCK_WAIT_TIMEOUT 35 /* lock wait lasted too long */
-#define DB_NO_REFERENCED_ROW 36 /* referenced key value not found
+ DB_TABLE_IS_BEING_USED,
+ DB_TOO_BIG_RECORD, /* a record in an index would not fit
+ on a compressed page, or it would
+ become bigger than 1/2 free space in
+ an uncompressed page frame */
+ DB_LOCK_WAIT_TIMEOUT, /* lock wait lasted too long */
+ DB_NO_REFERENCED_ROW, /* referenced key value not found
for a foreign key in an insert or
update of a row */
-#define DB_ROW_IS_REFERENCED 37 /* cannot delete or update a row
+ DB_ROW_IS_REFERENCED, /* cannot delete or update a row
because it contains a key value
which is referenced */
-#define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint
+ DB_CANNOT_ADD_CONSTRAINT, /* adding a foreign key constraint
to a table failed */
-#define DB_CORRUPTION 39 /* data structure corruption noticed */
-#define DB_COL_APPEARS_TWICE_IN_INDEX 40/* InnoDB cannot handle an index
+ DB_CORRUPTION, /* data structure corruption noticed */
+ DB_COL_APPEARS_TWICE_IN_INDEX, /* InnoDB cannot handle an index
where same column appears twice */
-#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint
+ DB_CANNOT_DROP_CONSTRAINT, /* dropping a foreign key constraint
from a table failed */
-#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given
+ DB_NO_SAVEPOINT, /* no savepoint exists with the given
name */
-#define DB_TABLESPACE_ALREADY_EXISTS 43 /* we cannot create a new single-table
+ DB_TABLESPACE_ALREADY_EXISTS, /* we cannot create a new single-table
tablespace because a file of the same
name already exists */
-#define DB_TABLESPACE_DELETED 44 /* tablespace does not exist or is
+ DB_TABLESPACE_DELETED, /* tablespace does not exist or is
being dropped right now */
-#define DB_LOCK_TABLE_FULL 45 /* lock structs have exhausted the
+ DB_LOCK_TABLE_FULL, /* lock structs have exhausted the
buffer pool (for big transactions,
InnoDB stores the lock structs in the
buffer pool) */
-#define DB_FOREIGN_DUPLICATE_KEY 46 /* foreign key constraints
+ DB_FOREIGN_DUPLICATE_KEY, /* foreign key constraints
activated by the operation would
lead to a duplicate key in some
table */
-#define DB_TOO_MANY_CONCURRENT_TRXS 47 /* when InnoDB runs out of the
+ DB_TOO_MANY_CONCURRENT_TRXS, /* when InnoDB runs out of the
preconfigured undo slots, this can
only happen when there are too many
concurrent transactions */
-#define DB_UNSUPPORTED 48 /* when InnoDB sees any artefact or
+ DB_UNSUPPORTED, /* when InnoDB sees any artefact or
a feature that it can't recoginize or
work with e.g., FT indexes created by
a later version of the engine. */
-/* The following are partial failure codes */
-#define DB_FAIL 1000
-#define DB_OVERFLOW 1001
-#define DB_UNDERFLOW 1002
-#define DB_STRONG_FAIL 1003
-#define DB_RECORD_NOT_FOUND 1500
-#define DB_END_OF_INDEX 1501
+
+ DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY
+ was found to be NULL */
+
+ /* The following are partial failure codes */
+ DB_FAIL = 1000,
+ DB_OVERFLOW,
+ DB_UNDERFLOW,
+ DB_STRONG_FAIL,
+ DB_ZIP_OVERFLOW,
+ DB_RECORD_NOT_FOUND = 1500,
+ DB_END_OF_INDEX
+};
#endif
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index cac79410b24..51d37ee98d1 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -1,7 +1,24 @@
-/******************************************************
-Data dictionary creation and booting
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0boot.h
+Data dictionary creation and booting
Created 4/18/1996 Heikki Tuuri
*******************************************************/
@@ -20,56 +37,55 @@ Created 4/18/1996 Heikki Tuuri
typedef byte dict_hdr_t;
-/**************************************************************************
-Gets a pointer to the dictionary header and x-latches its page. */
-
+/**********************************************************************//**
+Gets a pointer to the dictionary header and x-latches its page.
+@return pointer to the dictionary header, page x-latched */
+UNIV_INTERN
dict_hdr_t*
dict_hdr_get(
/*=========*/
- /* out: pointer to the dictionary header,
- page x-latched */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Returns a new row, table, index, or tree id. */
-
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
+Returns a new row, table, index, or tree id.
+@return the new id */
+UNIV_INTERN
dulint
dict_hdr_get_new_id(
/*================*/
- /* out: the new id */
- ulint type); /* in: DICT_HDR_ROW_ID, ... */
-/**************************************************************************
-Returns a new row id. */
+ ulint type); /*!< in: DICT_HDR_ROW_ID, ... */
+/**********************************************************************//**
+Returns a new row id.
+@return the new id */
UNIV_INLINE
dulint
dict_sys_get_new_row_id(void);
/*=========================*/
- /* out: the new id */
-/**************************************************************************
-Reads a row id from a record or other 6-byte stored form. */
+/**********************************************************************//**
+Reads a row id from a record or other 6-byte stored form.
+@return row id */
UNIV_INLINE
dulint
dict_sys_read_row_id(
/*=================*/
- /* out: row id */
- byte* field); /* in: record field */
-/**************************************************************************
+ byte* field); /*!< in: record field */
+/**********************************************************************//**
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
- byte* field, /* in: record field */
- dulint row_id);/* in: row id */
-/*********************************************************************
+ byte* field, /*!< in: record field */
+ dulint row_id);/*!< in: row id */
+/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
-
+UNIV_INTERN
void
dict_boot(void);
/*===========*/
-/*********************************************************************
+/*****************************************************************//**
Creates and initializes the data dictionary at the database creation. */
-
+UNIV_INTERN
void
dict_create(void);
/*=============*/
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index fe2a9e36653..d5f372e38c4 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -1,27 +1,44 @@
-/******************************************************
-Data dictionary creation and booting
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0boot.ic
+Data dictionary creation and booting
Created 4/18/1996 Heikki Tuuri
*******************************************************/
-/**************************************************************************
+/**********************************************************************//**
Writes the current value of the row id counter to the dictionary header file
page. */
-
+UNIV_INTERN
void
dict_hdr_flush_row_id(void);
/*=======================*/
-/**************************************************************************
-Returns a new row id. */
+/**********************************************************************//**
+Returns a new row id.
+@return the new id */
UNIV_INLINE
dulint
dict_sys_get_new_row_id(void)
/*=========================*/
- /* out: the new id */
{
dulint id;
@@ -41,14 +58,14 @@ dict_sys_get_new_row_id(void)
return(id);
}
-/**************************************************************************
-Reads a row id from a record or other 6-byte stored form. */
+/**********************************************************************//**
+Reads a row id from a record or other 6-byte stored form.
+@return row id */
UNIV_INLINE
dulint
dict_sys_read_row_id(
/*=================*/
- /* out: row id */
- byte* field) /* in: record field */
+ byte* field) /*!< in: record field */
{
#if DATA_ROW_ID_LEN != 6
# error "DATA_ROW_ID_LEN != 6"
@@ -57,14 +74,14 @@ dict_sys_read_row_id(
return(mach_read_from_6(field));
}
-/**************************************************************************
+/**********************************************************************//**
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
- byte* field, /* in: record field */
- dulint row_id) /* in: row id */
+ byte* field, /*!< in: record field */
+ dulint row_id) /*!< in: row id */
{
#if DATA_ROW_ID_LEN != 6
# error "DATA_ROW_ID_LEN != 6"
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index f0f30481abe..cce1246b789 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -1,7 +1,24 @@
-/******************************************************
-Database object creation
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0crea.h
+Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
@@ -16,90 +33,92 @@ Created 1/8/1996 Heikki Tuuri
#include "row0types.h"
#include "mtr0mtr.h"
-/*************************************************************************
-Creates a table create graph. */
-
+/*********************************************************************//**
+Creates a table create graph.
+@return own: table create node */
+UNIV_INTERN
tab_node_t*
tab_create_graph_create(
/*====================*/
- /* out, own: table create node */
- dict_table_t* table, /* in: table to create, built as a memory data
+ dict_table_t* table, /*!< in: table to create, built as a memory data
structure */
- mem_heap_t* heap); /* in: heap where created */
-/*************************************************************************
-Creates an index create graph. */
-
+ mem_heap_t* heap); /*!< in: heap where created */
+/*********************************************************************//**
+Creates an index create graph.
+@return own: index create node */
+UNIV_INTERN
ind_node_t*
ind_create_graph_create(
/*====================*/
- /* out, own: index create node */
- dict_index_t* index, /* in: index to create, built as a memory data
+ dict_index_t* index, /*!< in: index to create, built as a memory data
structure */
- mem_heap_t* heap); /* in: heap where created */
-/***************************************************************
-Creates a table. This is a high-level function used in SQL execution graphs. */
-
+ mem_heap_t* heap); /*!< in: heap where created */
+/***********************************************************//**
+Creates a table. This is a high-level function used in SQL execution graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
dict_create_table_step(
/*===================*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/***************************************************************
+ que_thr_t* thr); /*!< in: query thread */
+/***********************************************************//**
Creates an index. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
dict_create_index_step(
/*===================*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/***********************************************************************
-Truncates the index tree associated with a row in SYS_INDEXES table. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/*******************************************************************//**
+Truncates the index tree associated with a row in SYS_INDEXES table.
+@return new root page number, or FIL_NULL on failure */
+UNIV_INTERN
ulint
dict_truncate_index_tree(
/*=====================*/
- /* out: new root page number, or
- FIL_NULL on failure */
- dict_table_t* table, /* in: the table the index belongs to */
- btr_pcur_t* pcur, /* in/out: persistent cursor pointing to
+ dict_table_t* table, /*!< in: the table the index belongs to */
+ ulint space, /*!< in: 0=truncate,
+ nonzero=create the index tree in the
+ given tablespace */
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
record in the clustered index of
SYS_INDEXES table. The cursor may be
repositioned in this call. */
- mtr_t* mtr); /* in: mtr having the latch
+ mtr_t* mtr); /*!< in: mtr having the latch
on the record page. The mtr may be
committed and restarted in this call. */
-/***********************************************************************
+/*******************************************************************//**
Drops the index tree associated with a row in SYS_INDEXES table. */
-
+UNIV_INTERN
void
dict_drop_index_tree(
/*=================*/
- rec_t* rec, /* in: record in the clustered index of SYS_INDEXES
- table */
- mtr_t* mtr); /* in: mtr having the latch on the record page */
-/********************************************************************
+ rec_t* rec, /*!< in/out: record in the clustered index
+ of SYS_INDEXES table */
+ mtr_t* mtr); /*!< in: mtr having the latch on the record page */
+/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
at database creation or database start if they are not found or are
-not of the right form. */
-
+not of the right form.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
dict_create_or_check_foreign_constraint_tables(void);
/*================================================*/
- /* out: DB_SUCCESS or error code */
-/************************************************************************
+/********************************************************************//**
Adds foreign key definitions to data dictionary tables in the database. We
look at table->foreign_list, and also generate names to constraints that were
not named by the user. A generated constraint has a name of the format
-databasename/tablename_ibfk_<number>, where the numbers start from 1, and are
+databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and are
given locally for this table, that is, the number is not global, as in the
-old format constraints < 4.0.18 it used to be. */
-
+old format constraints < 4.0.18 it used to be.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
dict_create_add_foreigns_to_dictionary(
/*===================================*/
- /* out: error code or DB_SUCCESS */
- ulint start_id,/* in: if we are actually doing ALTER TABLE
+ ulint start_id,/*!< in: if we are actually doing ALTER TABLE
ADD CONSTRAINT, we want to generate constraint
numbers which are bigger than in the table so
far; we number the constraints from
@@ -107,15 +126,14 @@ dict_create_add_foreigns_to_dictionary(
we are creating a new table, or if the table
so far has no constraints for which the name
was generated here */
- dict_table_t* table, /* in: table */
- trx_t* trx); /* in: transaction */
-
+ dict_table_t* table, /*!< in: table */
+ trx_t* trx); /*!< in: transaction */
/* Table create node structure */
struct tab_node_struct{
- que_common_t common; /* node type: QUE_NODE_TABLE_CREATE */
- dict_table_t* table; /* table to create, built as a memory data
+ que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */
+ dict_table_t* table; /*!< table to create, built as a memory data
structure with dict_mem_... functions */
ins_node_t* tab_def; /* child node which does the insert of
the table definition; the row to be inserted
@@ -128,9 +146,9 @@ struct tab_node_struct{
a successful table creation */
/*----------------------*/
/* Local storage for this graph node */
- ulint state; /* node execution state */
- ulint col_no; /* next column definition to insert */
- mem_heap_t* heap; /* memory heap used as auxiliary storage */
+ ulint state; /*!< node execution state */
+ ulint col_no; /*!< next column definition to insert */
+ mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
};
/* Table create node states */
@@ -143,8 +161,8 @@ struct tab_node_struct{
/* Index create node struct */
struct ind_node_struct{
- que_common_t common; /* node type: QUE_NODE_INDEX_CREATE */
- dict_index_t* index; /* index to create, built as a memory data
+ que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */
+ dict_index_t* index; /*!< index to create, built as a memory data
structure with dict_mem_... functions */
ins_node_t* ind_def; /* child node which does the insert of
the index definition; the row to be inserted
@@ -157,12 +175,12 @@ struct ind_node_struct{
a successful index creation */
/*----------------------*/
/* Local storage for this graph node */
- ulint state; /* node execution state */
+ ulint state; /*!< node execution state */
ulint page_no;/* root page number of the index */
- dict_table_t* table; /* table which owns the index */
+ dict_table_t* table; /*!< table which owns the index */
dtuple_t* ind_row;/* index definition row built */
ulint field_no;/* next field definition to insert */
- mem_heap_t* heap; /* memory heap used as auxiliary storage */
+ mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
};
/* Index create node states */
diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic
index b4da2d7e03f..c5365ce7489 100644
--- a/storage/innobase/include/dict0crea.ic
+++ b/storage/innobase/include/dict0crea.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Database object creation
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0crea.ic
+Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 7d5ff09c7a6..12396556c2d 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1,7 +1,24 @@
-/******************************************************
-Data dictionary system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1996 Innobase Oy
+/**************************************************//**
+@file include/dict0dict.h
+Data dictionary system
Created 1/8/1996 Heikki Tuuri
*******************************************************/
@@ -14,11 +31,8 @@ Created 1/8/1996 Heikki Tuuri
#include "dict0mem.h"
#include "data0type.h"
#include "data0data.h"
-#include "sync0sync.h"
-#include "sync0rw.h"
#include "mem0mem.h"
#include "rem0types.h"
-#include "btr0types.h"
#include "ut0mem.h"
#include "ut0lst.h"
#include "hash0hash.h"
@@ -27,281 +41,310 @@ Created 1/8/1996 Heikki Tuuri
#include "trx0types.h"
#ifndef UNIV_HOTBACKUP
-/**********************************************************************
+# include "sync0sync.h"
+# include "sync0rw.h"
+/******************************************************************//**
Makes all characters in a NUL-terminated UTF-8 string lower case. */
-
+UNIV_INTERN
void
dict_casedn_str(
/*============*/
- char* a); /* in/out: string to put in lower case */
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************************
-Get the database name length in a table name. */
-
+ char* a); /*!< in/out: string to put in lower case */
+/********************************************************************//**
+Get the database name length in a table name.
+@return database name length */
+UNIV_INTERN
ulint
dict_get_db_name_len(
/*=================*/
- /* out: database name length */
- const char* name); /* in: table name in the form
+ const char* name); /*!< in: table name in the form
dbname '/' tablename */
-/************************************************************************
-Return the end of table name where we have removed dbname and '/'. */
+/********************************************************************//**
+Return the end of table name where we have removed dbname and '/'.
+@return table name */
const char*
dict_remove_db_name(
/*================*/
- /* out: table name */
- const char* name); /* in: table name in the form
+ const char* name); /*!< in: table name in the form
dbname '/' tablename */
-/************************************************************************
+/**********************************************************************//**
+Returns a table object based on table id.
+@return table, NULL if does not exist */
+UNIV_INTERN
+dict_table_t*
+dict_table_get_on_id(
+/*=================*/
+ dulint table_id, /*!< in: table id */
+ trx_t* trx); /*!< in: transaction handle */
+/********************************************************************//**
Decrements the count of open MySQL handles to a table. */
-
+UNIV_INTERN
void
dict_table_decrement_handle_count(
/*==============================*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
+ dict_table_t* table, /*!< in/out: table */
+ ibool dict_locked); /*!< in: TRUE=data dictionary locked */
+/**********************************************************************//**
Inits the data dictionary module. */
-
+UNIV_INTERN
void
dict_init(void);
/*===========*/
-/************************************************************************
+/********************************************************************//**
Gets the space id of every table of the data dictionary and makes a linear
list and a hash table of them to the data dictionary cache. This function
can be called at database startup if we did not need to do a crash recovery.
In crash recovery we must scan the space id's from the .ibd files in MySQL
database directories. */
-
+UNIV_INTERN
void
dict_load_space_id_list(void);
/*=========================*/
-/*************************************************************************
+/*********************************************************************//**
Gets the column data type. */
UNIV_INLINE
void
dict_col_copy_type(
/*===============*/
- const dict_col_t* col, /* in: column */
- dtype_t* type); /* out: data type */
-/*************************************************************************
-Gets the column data type. */
-
-void
-dict_col_copy_type_noninline(
-/*=========================*/
- const dict_col_t* col, /* in: column */
- dtype_t* type); /* out: data type */
+ const dict_col_t* col, /*!< in: column */
+ dtype_t* type); /*!< out: data type */
+#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
-/*************************************************************************
-Assert that a column and a data type match. */
+/*********************************************************************//**
+Assert that a column and a data type match.
+@return TRUE */
UNIV_INLINE
ibool
dict_col_type_assert_equal(
/*=======================*/
- /* out: TRUE */
- const dict_col_t* col, /* in: column */
- const dtype_t* type); /* in: data type */
+ const dict_col_t* col, /*!< in: column */
+ const dtype_t* type); /*!< in: data type */
#endif /* UNIV_DEBUG */
-/***************************************************************************
-Returns the minimum size of the column. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of the column.
+@return minimum size */
UNIV_INLINE
ulint
dict_col_get_min_size(
/*==================*/
- /* out: minimum size */
- const dict_col_t* col); /* in: column */
-/***************************************************************************
-Returns the maximum size of the column. */
+ const dict_col_t* col); /*!< in: column */
+/***********************************************************************//**
+Returns the maximum size of the column.
+@return maximum size */
UNIV_INLINE
ulint
dict_col_get_max_size(
/*==================*/
- /* out: maximum size */
- const dict_col_t* col); /* in: column */
-/***************************************************************************
-Returns the size of a fixed size column, 0 if not a fixed size column. */
+ const dict_col_t* col); /*!< in: column */
+/***********************************************************************//**
+Returns the size of a fixed size column, 0 if not a fixed size column.
+@return fixed size, or 0 */
UNIV_INLINE
ulint
dict_col_get_fixed_size(
/*====================*/
- /* out: fixed size, or 0 */
- const dict_col_t* col); /* in: column */
-/***************************************************************************
+ const dict_col_t* col, /*!< in: column */
+ ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
+/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0. */
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dict_col_get_sql_null_size(
/*=======================*/
- /* out: SQL null storage size
- in ROW_FORMAT=REDUNDANT */
- const dict_col_t* col); /* in: column */
+ const dict_col_t* col, /*!< in: column */
+ ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-/*************************************************************************
-Gets the column number. */
+/*********************************************************************//**
+Gets the column number.
+@return col->ind, table column position (starting from 0) */
UNIV_INLINE
ulint
dict_col_get_no(
/*============*/
- const dict_col_t* col);
-/*************************************************************************
+ const dict_col_t* col); /*!< in: column */
+/*********************************************************************//**
Gets the column position in the clustered index. */
UNIV_INLINE
ulint
dict_col_get_clust_pos(
/*===================*/
- const dict_col_t* col, /* in: table column */
- const dict_index_t* clust_index); /* in: clustered index */
-/*************************************************************************
-Gets the column position in the clustered index. */
-
-ulint
-dict_col_get_clust_pos_noninline(
-/*=============================*/
- const dict_col_t* col, /* in: table column */
- const dict_index_t* clust_index); /* in: clustered index */
-/********************************************************************
+ const dict_col_t* col, /*!< in: table column */
+ const dict_index_t* clust_index); /*!< in: clustered index */
+/****************************************************************//**
If the given column name is reserved for InnoDB system columns, return
-TRUE. */
-
+TRUE.
+@return TRUE if name is reserved */
+UNIV_INTERN
ibool
dict_col_name_is_reserved(
/*======================*/
- /* out: TRUE if name is reserved */
- const char* name); /* in: column name */
-/************************************************************************
-Acquire the autoinc lock.*/
-
+ const char* name); /*!< in: column name */
+/********************************************************************//**
+Acquire the autoinc lock. */
+UNIV_INTERN
void
dict_table_autoinc_lock(
/*====================*/
- dict_table_t* table); /* in: table */
-/************************************************************************
+ dict_table_t* table); /*!< in/out: table */
+/********************************************************************//**
Unconditionally set the autoinc counter. */
-
+UNIV_INTERN
void
dict_table_autoinc_initialize(
/*==========================*/
- dict_table_t* table, /* in: table */
- ib_ulonglong value); /* in: next value to assign to a row */
-/************************************************************************
+ dict_table_t* table, /*!< in/out: table */
+ ib_uint64_t value); /*!< in: next value to assign to a row */
+/********************************************************************//**
Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized. */
-
-ib_ulonglong
+initialized.
+@return value for a new row, or 0 */
+UNIV_INTERN
+ib_uint64_t
dict_table_autoinc_read(
/*====================*/
- /* out: value for a new row, or 0 */
- dict_table_t* table); /* in: table */
-/************************************************************************
+ const dict_table_t* table); /*!< in: table */
+/********************************************************************//**
Updates the autoinc counter if the value supplied is greater than the
current value. */
-
+UNIV_INTERN
void
dict_table_autoinc_update_if_greater(
/*=================================*/
- dict_table_t* table, /* in: table */
- ib_ulonglong value); /* in: value which was assigned to a row */
-/************************************************************************
-Release the autoinc lock.*/
-
+ dict_table_t* table, /*!< in/out: table */
+ ib_uint64_t value); /*!< in: value which was assigned to a row */
+/********************************************************************//**
+Release the autoinc lock. */
+UNIV_INTERN
void
dict_table_autoinc_unlock(
/*======================*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
+ dict_table_t* table); /*!< in/out: table */
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
Adds system columns to a table object. */
-
+UNIV_INTERN
void
dict_table_add_system_columns(
/*==========================*/
- dict_table_t* table, /* in/out: table */
- mem_heap_t* heap); /* in: temporary heap */
-/**************************************************************************
+ dict_table_t* table, /*!< in/out: table */
+ mem_heap_t* heap); /*!< in: temporary heap */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Adds a table object to the dictionary cache. */
-
+UNIV_INTERN
void
dict_table_add_to_cache(
/*====================*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap); /* in: temporary heap */
-/**************************************************************************
+ dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap); /*!< in: temporary heap */
+/**********************************************************************//**
Removes a table object from the dictionary cache. */
-
+UNIV_INTERN
void
dict_table_remove_from_cache(
/*=========================*/
- dict_table_t* table); /* in, own: table */
-/**************************************************************************
-Renames a table object. */
-
+ dict_table_t* table); /*!< in, own: table */
+/**********************************************************************//**
+Renames a table object.
+@return TRUE if success */
+UNIV_INTERN
ibool
dict_table_rename_in_cache(
/*=======================*/
- /* out: TRUE if success */
- dict_table_t* table, /* in: table */
- const char* new_name, /* in: new name */
- ibool rename_also_foreigns);/* in: in ALTER TABLE we want
+ dict_table_t* table, /*!< in/out: table */
+ const char* new_name, /*!< in: new name */
+ ibool rename_also_foreigns);/*!< in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
-/**************************************************************************
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index); /*!< in, own: index */
+/**********************************************************************//**
Change the id of a table object in the dictionary cache. This is used in
DISCARD TABLESPACE. */
-
+UNIV_INTERN
void
dict_table_change_id_in_cache(
/*==========================*/
- dict_table_t* table, /* in: table object already in cache */
- dulint new_id);/* in: new id to set */
-/**************************************************************************
+ dict_table_t* table, /*!< in/out: table object already in cache */
+ dulint new_id);/*!< in: new id to set */
+/**********************************************************************//**
Adds a foreign key constraint object to the dictionary cache. May free
the object if there already is an object with the same identifier in.
At least one of foreign table or referenced table must already be in
-the dictionary cache! */
-
+the dictionary cache!
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
dict_foreign_add_to_cache(
/*======================*/
- /* out: DB_SUCCESS or error code */
- dict_foreign_t* foreign, /* in, own: foreign key constraint */
- ibool check_charsets);/* in: TRUE=check charset
+ dict_foreign_t* foreign, /*!< in, own: foreign key constraint */
+ ibool check_charsets);/*!< in: TRUE=check charset
compatibility */
-/*************************************************************************
-Checks if a table is referenced by foreign keys. */
-
-ibool
-dict_table_referenced_by_foreign_key(
+/*********************************************************************//**
+Check if the index is referenced by a foreign key, if TRUE return the
+matching instance NULL otherwise.
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_referenced_constraint(
/*=================================*/
- /* out: TRUE if table is referenced by a
- foreign key */
- dict_table_t* table); /* in: InnoDB table */
-/**************************************************************************
-Determines whether a string starts with the specified keyword. */
-
+ dict_table_t* table, /*!< in: InnoDB table */
+ dict_index_t* index); /*!< in: InnoDB index */
+/*********************************************************************//**
+Checks if a table is referenced by foreign keys.
+@return TRUE if table is referenced by a foreign key */
+UNIV_INTERN
ibool
-dict_str_starts_with_keyword(
-/*=========================*/
- /* out: TRUE if str starts
- with keyword */
- void* mysql_thd, /* in: MySQL thread handle */
- const char* str, /* in: string to scan for keyword */
- const char* keyword); /* in: keyword to look for */
-/*************************************************************************
+dict_table_is_referenced_by_foreign_key(
+/*====================================*/
+ const dict_table_t* table); /*!< in: InnoDB table */
+/**********************************************************************//**
+Replace the index in the foreign key list that matches this index's
+definition with an equivalent index. */
+UNIV_INTERN
+void
+dict_table_replace_index_in_foreign_list(
+/*=====================================*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index); /*!< in: index to be replaced */
+/*********************************************************************//**
+Checks if a index is defined for a foreign key constraint. Index is a part
+of a foreign key constraint if the index is referenced by foreign key
+or index is a foreign key index
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_foreign_constraint(
+/*==============================*/
+ dict_table_t* table, /*!< in: InnoDB table */
+ dict_index_t* index); /*!< in: InnoDB index */
+/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
should be called after the indexes for a table have been created.
Each foreign key constraint must be accompanied with indexes in
bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. */
-
+fields than mentioned in the constraint.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
dict_create_foreign_constraints(
/*============================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- const char* sql_string, /* in: table create statement where
+ trx_t* trx, /*!< in: transaction */
+ const char* sql_string, /*!< in: table create statement where
foreign keys are declared like:
FOREIGN KEY (a, b) REFERENCES
table2(c, d), table2 can be written
@@ -309,691 +352,811 @@ dict_create_foreign_constraints(
name before it: test.table2; the
default database id the database of
parameter name */
- const char* name, /* in: table full name in the
+ const char* name, /*!< in: table full name in the
normalized form
database_name/table_name */
- ibool reject_fks); /* in: if TRUE, fail with error
+ ibool reject_fks); /*!< in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
-/**************************************************************************
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
-
+/**********************************************************************//**
+Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
+@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
+constraint id does not match */
+UNIV_INTERN
ulint
dict_foreign_parse_drop_constraints(
/*================================*/
- /* out: DB_SUCCESS or
- DB_CANNOT_DROP_CONSTRAINT if
- syntax error or the constraint
- id does not match */
- mem_heap_t* heap, /* in: heap from which we can
+ mem_heap_t* heap, /*!< in: heap from which we can
allocate memory */
- trx_t* trx, /* in: transaction */
- dict_table_t* table, /* in: table */
- ulint* n, /* out: number of constraints
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table, /*!< in: table */
+ ulint* n, /*!< out: number of constraints
to drop */
- const char*** constraints_to_drop); /* out: id's of the
+ const char*** constraints_to_drop); /*!< out: id's of the
constraints to drop */
-/**************************************************************************
+/**********************************************************************//**
Returns a table object and optionally increment its MySQL open handle count.
NOTE! This is a high-level function to be used mainly from outside the
'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function. */
-
+appropriate function.
+@return table, NULL if does not exist */
+UNIV_INTERN
dict_table_t*
dict_table_get(
/*===========*/
- /* out: table, NULL if
- does not exist */
- const char* table_name, /* in: table name */
+ const char* table_name, /*!< in: table name */
ibool inc_mysql_count);
- /* in: whether to increment the open
+ /*!< in: whether to increment the open
handle count on the table */
-/**************************************************************************
-Returns a table object based on table id. */
-
-dict_table_t*
-dict_table_get_on_id(
-/*=================*/
- /* out: table, NULL if does not exist */
- dulint table_id, /* in: table id */
- trx_t* trx); /* in: transaction handle */
-/**************************************************************************
-Returns a table object based on table id. */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_on_id_low(
+/**********************************************************************//**
+Returns a index object, based on table and index id, and memoryfixes it.
+@return index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_on_id_low(
/*=====================*/
- /* out: table, NULL if does not exist */
- dulint table_id); /* in: table id */
-/**************************************************************************
-Checks if a table is in the dictionary cache. */
+ dict_table_t* table, /*!< in: table */
+ dulint index_id); /*!< in: index id */
+/**********************************************************************//**
+Checks if a table is in the dictionary cache.
+@return table, NULL if not found */
+
UNIV_INLINE
dict_table_t*
dict_table_check_if_in_cache_low(
/*=============================*/
- /* out: table, NULL if not found */
- const char* table_name); /* in: table name */
-/**************************************************************************
+ const char* table_name); /*!< in: table name */
+/**********************************************************************//**
Gets a table; loads it to the dictionary cache if necessary. A low-level
-function. */
+function.
+@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_get_low(
/*===============*/
- /* out: table, NULL if not found */
- const char* table_name); /* in: table name */
-/**************************************************************************
-A noninlined version of dict_table_get_low. */
-
-dict_table_t*
-dict_table_get_low_noninlined(
-/*==========================*/
- /* out: table, NULL if not found */
- const char* table_name); /* in: table name */
-/**************************************************************************
-Returns an index object. */
+ const char* table_name); /*!< in: table name */
+/**********************************************************************//**
+Returns a table object based on table id.
+@return table, NULL if does not exist */
UNIV_INLINE
+dict_table_t*
+dict_table_get_on_id_low(
+/*=====================*/
+ dulint table_id); /*!< in: table id */
+/**********************************************************************//**
+Find an index that is equivalent to the one passed in and is not marked
+for deletion.
+@return index equivalent to foreign->foreign_index, or NULL */
+UNIV_INTERN
dict_index_t*
-dict_table_get_index(
-/*=================*/
- /* out: index, NULL if does not exist */
- dict_table_t* table, /* in: table */
- const char* name); /* in: index name */
-/**************************************************************************
-Returns an index object. */
-
+dict_foreign_find_equiv_index(
+/*==========================*/
+ dict_foreign_t* foreign);/*!< in: foreign key */
+/**********************************************************************//**
+Returns an index object by matching on the name and column names and
+if more than one index matches return the index with the max id
+@return matching index, NULL if not found */
+UNIV_INTERN
dict_index_t*
-dict_table_get_index_noninline(
+dict_table_get_index_by_max_id(
/*===========================*/
- /* out: index, NULL if does not exist */
- dict_table_t* table, /* in: table */
- const char* name); /* in: index name */
-/**************************************************************************
-Returns a column's name. */
-
+ dict_table_t* table, /*!< in: table */
+ const char* name, /*!< in: the index name to find */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols);/*!< in: number of columns */
+/**********************************************************************//**
+Returns a column's name.
+@return column name. NOTE: not guaranteed to stay valid if table is
+modified in any way (columns added, etc.). */
+UNIV_INTERN
const char*
dict_table_get_col_name(
/*====================*/
- /* out: column name. NOTE: not
- guaranteed to stay valid if table is
- modified in any way (columns added,
- etc.). */
- const dict_table_t* table, /* in: table */
- ulint col_nr);/* in: column number */
+ const dict_table_t* table, /*!< in: table */
+ ulint col_nr);/*!< in: column number */
-/**************************************************************************
+/**********************************************************************//**
Prints a table definition. */
-
+UNIV_INTERN
void
dict_table_print(
/*=============*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
+ dict_table_t* table); /*!< in: table */
+/**********************************************************************//**
Prints a table data. */
-
+UNIV_INTERN
void
dict_table_print_low(
/*=================*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
+ dict_table_t* table); /*!< in: table */
+/**********************************************************************//**
Prints a table data when we know the table name. */
-
+UNIV_INTERN
void
dict_table_print_by_name(
/*=====================*/
- const char* name);
-/**************************************************************************
+ const char* name); /*!< in: table name */
+/**********************************************************************//**
Outputs info on foreign keys of a table. */
-
+UNIV_INTERN
void
dict_print_info_on_foreign_keys(
/*============================*/
- ibool create_table_format, /* in: if TRUE then print in
+ ibool create_table_format, /*!< in: if TRUE then print in
a format suitable to be inserted into
a CREATE TABLE, otherwise in the format
of SHOW TABLE STATUS */
- FILE* file, /* in: file where to print */
- trx_t* trx, /* in: transaction */
- dict_table_t* table); /* in: table */
-/**************************************************************************
+ FILE* file, /*!< in: file where to print */
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table); /*!< in: table */
+/**********************************************************************//**
Outputs info on a foreign key of a table in a format suitable for
CREATE TABLE. */
+UNIV_INTERN
void
dict_print_info_on_foreign_key_in_create_format(
/*============================================*/
- FILE* file, /* in: file where to print */
- trx_t* trx, /* in: transaction */
- dict_foreign_t* foreign, /* in: foreign key constraint */
- ibool add_newline); /* in: whether to add a newline */
-/************************************************************************
+ FILE* file, /*!< in: file where to print */
+ trx_t* trx, /*!< in: transaction */
+ dict_foreign_t* foreign, /*!< in: foreign key constraint */
+ ibool add_newline); /*!< in: whether to add a newline */
+/********************************************************************//**
Displays the names of the index and the table. */
+UNIV_INTERN
void
dict_index_name_print(
/*==================*/
- FILE* file, /* in: output stream */
- trx_t* trx, /* in: transaction */
- const dict_index_t* index); /* in: index to print */
-/************************************************************************
-Gets the first index on the table (the clustered index). */
+ FILE* file, /*!< in: output stream */
+ trx_t* trx, /*!< in: transaction */
+ const dict_index_t* index); /*!< in: index to print */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the first index on the table (the clustered index).
+@return index, NULL if none exists */
UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
/*=======================*/
- /* out: index, NULL if none exists */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-
-dict_index_t*
-dict_table_get_first_index_noninline(
-/*=================================*/
- /* out: index, NULL if none exists */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the next index on the table. */
+ const dict_table_t* table); /*!< in: table */
+/********************************************************************//**
+Gets the next index on the table.
+@return index, NULL if none left */
UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
/*======================*/
- /* out: index, NULL if none left */
- dict_index_t* index); /* in: index */
-/************************************************************************
-Gets the next index on the table. */
+ const dict_index_t* index); /*!< in: index */
+#else /* UNIV_DEBUG */
+# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
+# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
+#endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Check whether the index is the clustered index.
+@return nonzero for clustered index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_clust(
+/*================*/
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((pure));
+/********************************************************************//**
+Check whether the index is unique.
+@return nonzero for unique index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_unique(
+/*=================*/
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((pure));
+/********************************************************************//**
+Check whether the index is the insert buffer tree.
+@return nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_ibuf(
+/*===============*/
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((pure));
+/********************************************************************//**
+Check whether the index is a secondary index or the insert buffer tree.
+@return nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_sec_or_ibuf(
+/*======================*/
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((pure));
-dict_index_t*
-dict_table_get_next_index_noninline(
-/*================================*/
- /* out: index, NULL if none left */
- dict_index_t* index); /* in: index */
-/************************************************************************
+/********************************************************************//**
Gets the number of user-defined columns in a table in the dictionary
-cache. */
+cache.
+@return number of user-defined (e.g., not ROW_ID) columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_user_cols(
/*=======================*/
- /* out: number of user-defined (e.g., not
- ROW_ID) columns of a table */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the number of system columns in a table in the dictionary cache. */
+ const dict_table_t* table); /*!< in: table */
+/********************************************************************//**
+Gets the number of system columns in a table in the dictionary cache.
+@return number of system (e.g., ROW_ID) columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_sys_cols(
/*======================*/
- /* out: number of system (e.g.,
- ROW_ID) columns of a table */
- dict_table_t* table); /* in: table */
-/************************************************************************
+ const dict_table_t* table); /*!< in: table */
+/********************************************************************//**
Gets the number of all columns (also system) in a table in the dictionary
-cache. */
+cache.
+@return number of columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_cols(
/*==================*/
- /* out: number of columns of a table */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the nth column of a table. */
+ const dict_table_t* table); /*!< in: table */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth column of a table.
+@return pointer to column object */
UNIV_INLINE
-const dict_col_t*
+dict_col_t*
dict_table_get_nth_col(
/*===================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint pos); /* in: position of column */
-/************************************************************************
-Gets the nth column of a table. */
-
-const dict_col_t*
-dict_table_get_nth_col_noninline(
-/*=============================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint pos); /* in: position of column */
-/************************************************************************
-Gets the given system column of a table. */
+ const dict_table_t* table, /*!< in: table */
+ ulint pos); /*!< in: position of column */
+/********************************************************************//**
+Gets the given system column of a table.
+@return pointer to column object */
UNIV_INLINE
-const dict_col_t*
+dict_col_t*
dict_table_get_sys_col(
/*===================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint sys); /* in: DATA_ROW_ID, ... */
-/************************************************************************
-Gets the given system column number of a table. */
+ const dict_table_t* table, /*!< in: table */
+ ulint sys); /*!< in: DATA_ROW_ID, ... */
+#else /* UNIV_DEBUG */
+#define dict_table_get_nth_col(table, pos) \
+((table)->cols + (pos))
+#define dict_table_get_sys_col(table, sys) \
+((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS)
+#endif /* UNIV_DEBUG */
+/********************************************************************//**
+Gets the given system column number of a table.
+@return column number */
UNIV_INLINE
ulint
dict_table_get_sys_col_no(
/*======================*/
- /* out: column number */
- dict_table_t* table, /* in: table */
- ulint sys); /* in: DATA_ROW_ID, ... */
-/************************************************************************
-Check whether the table uses the compact page format. */
+ const dict_table_t* table, /*!< in: table */
+ ulint sys); /*!< in: DATA_ROW_ID, ... */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Returns the minimum data size of an index record.
+@return minimum data size in bytes */
+UNIV_INLINE
+ulint
+dict_index_get_min_size(
+/*====================*/
+ const dict_index_t* index); /*!< in: index */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Check whether the table uses the compact page format.
+@return TRUE if table uses the compact page format */
UNIV_INLINE
ibool
dict_table_is_comp(
/*===============*/
- /* out: TRUE if table uses the
- compact page format */
- const dict_table_t* table); /* in: table */
-/************************************************************************
-Check whether the table uses the compact page format. */
-
-ibool
-dict_table_is_comp_noninline(
+ const dict_table_t* table); /*!< in: table */
+/********************************************************************//**
+Determine the file format of a table.
+@return file format version */
+UNIV_INLINE
+ulint
+dict_table_get_format(
+/*==================*/
+ const dict_table_t* table); /*!< in: table */
+/********************************************************************//**
+Set the file format of a table. */
+UNIV_INLINE
+void
+dict_table_set_format(
+/*==================*/
+ dict_table_t* table, /*!< in/out: table */
+ ulint format);/*!< in: file format version */
+/********************************************************************//**
+Extract the compressed page size from table flags.
+@return compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_flags_to_zip_size(
/*=========================*/
- /* out: TRUE if table uses the
- compact page format */
- const dict_table_t* table); /* in: table */
-/************************************************************************
+ ulint flags) /*!< in: flags */
+ __attribute__((const));
+/********************************************************************//**
+Check whether the table uses the compressed compact page format.
+@return compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_zip_size(
+/*================*/
+ const dict_table_t* table); /*!< in: table */
+/********************************************************************//**
Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns. */
-
+table. Column prefixes are treated like whole columns.
+@return TRUE if the column, or its prefix, is in the clustered key */
+UNIV_INTERN
ibool
dict_table_col_in_clustered_key(
/*============================*/
- /* out: TRUE if the column, or its prefix, is
- in the clustered key */
- dict_table_t* table, /* in: table */
- ulint n); /* in: column number */
-/***********************************************************************
-Copies types of columns contained in table to tuple. */
-
+ const dict_table_t* table, /*!< in: table */
+ ulint n); /*!< in: column number */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Copies types of columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value. This function should
+be called right after dtuple_create(). */
+UNIV_INTERN
void
dict_table_copy_types(
/*==================*/
- dtuple_t* tuple, /* in: data tuple */
- dict_table_t* table); /* in: index */
-/**************************************************************************
+ dtuple_t* tuple, /*!< in/out: data tuple */
+ const dict_table_t* table); /*!< in: table */
+/**********************************************************************//**
Looks for an index with the given id. NOTE that we do not reserve
the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page! */
-
+printing info of a corrupt database page!
+@return index or NULL if not found from cache */
+UNIV_INTERN
dict_index_t*
dict_index_find_on_id_low(
/*======================*/
- /* out: index or NULL if not found from cache */
- dulint id); /* in: index id */
-/**************************************************************************
-Adds an index to the dictionary cache. */
-
-void
+ dulint id); /*!< in: index id */
+/**********************************************************************//**
+Adds an index to the dictionary cache.
+@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
+UNIV_INTERN
+ulint
dict_index_add_to_cache(
/*====================*/
- dict_table_t* table, /* in: table on which the index is */
- dict_index_t* index, /* in, own: index; NOTE! The index memory
+ dict_table_t* table, /*!< in: table on which the index is */
+ dict_index_t* index, /*!< in, own: index; NOTE! The index memory
object is freed in this function! */
- ulint page_no);/* in: root page number of the index */
-/************************************************************************
+ ulint page_no,/*!< in: root page number of the index */
+ ibool strict);/*!< in: TRUE=refuse to create the index
+ if records could be too big to fit in
+ an B-tree page */
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index); /*!< in, own: index */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system. */
+including fields added by the dictionary system.
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_fields(
/*====================*/
- /* out: number of fields */
- dict_index_t* index); /* in: an internal representation of index
- (in the dictionary cache) */
-/************************************************************************
+ const dict_index_t* index); /*!< in: an internal
+ representation of index (in
+ the dictionary cache) */
+/********************************************************************//**
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree. */
+returned by dict_index_get_n_unique_in_tree.
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique(
/*====================*/
- /* out: number of fields */
- dict_index_t* index); /* in: an internal representation of index
- (in the dictionary cache) */
-/************************************************************************
+ const dict_index_t* index); /*!< in: an internal representation
+ of index (in the dictionary cache) */
+/********************************************************************//**
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account. */
+we also take multiversioning into account.
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
/*============================*/
- /* out: number of fields */
- dict_index_t* index); /* in: an internal representation of index
- (in the dictionary cache) */
-/************************************************************************
+ const dict_index_t* index); /*!< in: an internal representation
+ of index (in the dictionary cache) */
+/********************************************************************//**
Gets the number of user-defined ordering fields in the index. In the internal
representation we add the row id to the ordering fields to make all indexes
unique, but this function returns the number of fields the user defined
-in the index as ordering fields. */
+in the index as ordering fields.
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
/*======================================*/
- /* out: number of fields */
- dict_index_t* index); /* in: an internal representation of index
- (in the dictionary cache) */
-/************************************************************************
-Gets the nth field of an index. */
+ const dict_index_t* index); /*!< in: an internal representation
+ of index (in the dictionary cache) */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth field of an index.
+@return pointer to field object */
UNIV_INLINE
dict_field_t*
dict_index_get_nth_field(
/*=====================*/
- /* out: pointer to field object */
- dict_index_t* index, /* in: index */
- ulint pos); /* in: position of field */
-/************************************************************************
-Gets pointer to the nth column in an index. */
+ const dict_index_t* index, /*!< in: index */
+ ulint pos); /*!< in: position of field */
+#else /* UNIV_DEBUG */
+# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
+#endif /* UNIV_DEBUG */
+/********************************************************************//**
+Gets pointer to the nth column in an index.
+@return column */
UNIV_INLINE
const dict_col_t*
dict_index_get_nth_col(
/*===================*/
- /* out: column */
- const dict_index_t* index, /* in: index */
- ulint pos); /* in: position of the field */
-/************************************************************************
-Gets the column number of the nth field in an index. */
+ const dict_index_t* index, /*!< in: index */
+ ulint pos); /*!< in: position of the field */
+/********************************************************************//**
+Gets the column number of the nth field in an index.
+@return column number */
UNIV_INLINE
ulint
dict_index_get_nth_col_no(
/*======================*/
- /* out: column number */
- const dict_index_t* index, /* in: index */
- ulint pos); /* in: position of the field */
-/************************************************************************
-Looks for column n in an index. */
-
+ const dict_index_t* index, /*!< in: index */
+ ulint pos); /*!< in: position of the field */
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
ulint
dict_index_get_nth_col_pos(
/*=======================*/
- /* out: position in internal representation
- of the index; if not contained, returns
- ULINT_UNDEFINED */
- dict_index_t* index, /* in: index */
- ulint n); /* in: column number */
-/************************************************************************
-Returns TRUE if the index contains a column or a prefix of that column. */
-
+ const dict_index_t* index, /*!< in: index */
+ ulint n); /*!< in: column number */
+/********************************************************************//**
+Returns TRUE if the index contains a column or a prefix of that column.
+@return TRUE if contains the column or its prefix */
+UNIV_INTERN
ibool
dict_index_contains_col_or_prefix(
/*==============================*/
- /* out: TRUE if contains the column or its
- prefix */
- dict_index_t* index, /* in: index */
- ulint n); /* in: column number */
-/************************************************************************
+ const dict_index_t* index, /*!< in: index */
+ ulint n); /*!< in: column number */
+/********************************************************************//**
Looks for a matching field in an index. The column has to be the same. The
column in index must be complete, or must contain a prefix longer than the
column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index. */
-
+from the prefix in index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
ulint
dict_index_get_nth_field_pos(
/*=========================*/
- /* out: position in internal representation
- of the index; if not contained, returns
- ULINT_UNDEFINED */
- dict_index_t* index, /* in: index from which to search */
- dict_index_t* index2, /* in: index */
- ulint n); /* in: field number in index2 */
-/************************************************************************
-Looks for column n position in the clustered index. */
-
+ const dict_index_t* index, /*!< in: index from which to search */
+ const dict_index_t* index2, /*!< in: index */
+ ulint n); /*!< in: field number in index2 */
+/********************************************************************//**
+Looks for column n position in the clustered index.
+@return position in internal representation of the clustered index */
+UNIV_INTERN
ulint
dict_table_get_nth_col_pos(
/*=======================*/
- /* out: position in internal representation
- of the clustered index */
- dict_table_t* table, /* in: table */
- ulint n); /* in: column number */
-/************************************************************************
-Returns the position of a system column in an index. */
+ const dict_table_t* table, /*!< in: table */
+ ulint n); /*!< in: column number */
+/********************************************************************//**
+Returns the position of a system column in an index.
+@return position, ULINT_UNDEFINED if not contained */
UNIV_INLINE
ulint
dict_index_get_sys_col_pos(
/*=======================*/
- /* out: position, ULINT_UNDEFINED if not
- contained */
- dict_index_t* index, /* in: index */
- ulint type); /* in: DATA_ROW_ID, ... */
-/***********************************************************************
+ const dict_index_t* index, /*!< in: index */
+ ulint type); /*!< in: DATA_ROW_ID, ... */
+/*******************************************************************//**
Adds a column to index. */
-
+UNIV_INTERN
void
dict_index_add_col(
/*===============*/
- dict_index_t* index, /* in: index */
- dict_table_t* table, /* in: table */
- dict_col_t* col, /* in: column */
- ulint prefix_len); /* in: column prefix length */
-/***********************************************************************
+ dict_index_t* index, /*!< in/out: index */
+ const dict_table_t* table, /*!< in: table */
+ dict_col_t* col, /*!< in: column */
+ ulint prefix_len); /*!< in: column prefix length */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
Copies types of fields contained in index to tuple. */
-
+UNIV_INTERN
void
dict_index_copy_types(
/*==================*/
- dtuple_t* tuple, /* in: data tuple */
- dict_index_t* index, /* in: index */
- ulint n_fields); /* in: number of field types to copy */
-/*************************************************************************
-Gets the field column. */
+ dtuple_t* tuple, /*!< in/out: data tuple */
+ const dict_index_t* index, /*!< in: index */
+ ulint n_fields); /*!< in: number of
+ field types to copy */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Gets the field column.
+@return field->col, pointer to the table column */
UNIV_INLINE
const dict_col_t*
dict_field_get_col(
/*===============*/
- const dict_field_t* field);
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Returns an index object if it is found in the dictionary cache. */
-
+ const dict_field_t* field); /*!< in: index field */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+Assumes that dict_sys->mutex is already being held.
+@return index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_if_in_cache_low(
+/*===========================*/
+ dulint index_id); /*!< in: index id */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+@return index, NULL if not found */
+UNIV_INTERN
dict_index_t*
dict_index_get_if_in_cache(
/*=======================*/
- /* out: index, NULL if not found */
- dulint index_id); /* in: index id */
-/**************************************************************************
+ dulint index_id); /*!< in: index id */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer. */
-
+no comparison can occur with the page number field in a node pointer.
+@return TRUE if ok */
+UNIV_INTERN
ibool
dict_index_check_search_tuple(
/*==========================*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- dtuple_t* tuple); /* in: tuple used in a search */
-#endif /* UNIV_DEBUG */
-/**************************************************************************
-Builds a node pointer out of a physical record and a page number. */
+ const dict_index_t* index, /*!< in: index tree */
+ const dtuple_t* tuple); /*!< in: tuple used in a search */
+/**********************************************************************//**
+Check for duplicate index entries in a table [using the index name] */
+UNIV_INTERN
+void
+dict_table_check_for_dup_indexes(
+/*=============================*/
+ const dict_table_t* table); /*!< in: Check for dup indexes
+ in this table */
+#endif /* UNIV_DEBUG */
+/**********************************************************************//**
+Builds a node pointer out of a physical record and a page number.
+@return own: node pointer */
+UNIV_INTERN
dtuple_t*
dict_index_build_node_ptr(
/*======================*/
- /* out, own: node pointer */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record for which to build node
- pointer */
- ulint page_no,/* in: page number to put in node pointer */
- mem_heap_t* heap, /* in: memory heap where pointer created */
- ulint level); /* in: level of rec in tree: 0 means leaf
- level */
-/**************************************************************************
+ const dict_index_t* index, /*!< in: index */
+ const rec_t* rec, /*!< in: record for which to build node
+ pointer */
+ ulint page_no,/*!< in: page number to put in node
+ pointer */
+ mem_heap_t* heap, /*!< in: memory heap where pointer
+ created */
+ ulint level); /*!< in: level of rec in tree:
+ 0 means leaf level */
+/**********************************************************************//**
Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely. */
-
+index entry uniquely.
+@return pointer to the prefix record */
+UNIV_INTERN
rec_t*
dict_index_copy_rec_order_prefix(
/*=============================*/
- /* out: pointer to the prefix record */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record for which to copy prefix */
- ulint* n_fields,/* out: number of fields copied */
- byte** buf, /* in/out: memory buffer for the copied prefix,
- or NULL */
- ulint* buf_size);/* in/out: buffer size */
-/**************************************************************************
-Builds a typed data tuple out of a physical record. */
-
+ const dict_index_t* index, /*!< in: index */
+ const rec_t* rec, /*!< in: record for which to
+ copy prefix */
+ ulint* n_fields,/*!< out: number of fields copied */
+ byte** buf, /*!< in/out: memory buffer for the
+ copied prefix, or NULL */
+ ulint* buf_size);/*!< in/out: buffer size */
+/**********************************************************************//**
+Builds a typed data tuple out of a physical record.
+@return own: data tuple */
+UNIV_INTERN
dtuple_t*
dict_index_build_data_tuple(
/*========================*/
- /* out, own: data tuple */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record for which to build data tuple */
- ulint n_fields,/* in: number of data fields */
- mem_heap_t* heap); /* in: memory heap where tuple created */
-/*************************************************************************
-Gets the space id of the root of the index tree. */
+ dict_index_t* index, /*!< in: index */
+ rec_t* rec, /*!< in: record for which to build data tuple */
+ ulint n_fields,/*!< in: number of data fields */
+ mem_heap_t* heap); /*!< in: memory heap where tuple created */
+/*********************************************************************//**
+Gets the space id of the root of the index tree.
+@return space id */
UNIV_INLINE
ulint
dict_index_get_space(
/*=================*/
- /* out: space id */
- dict_index_t* index); /* in: index */
-/*************************************************************************
+ const dict_index_t* index); /*!< in: index */
+/*********************************************************************//**
Sets the space id of the root of the index tree. */
UNIV_INLINE
void
dict_index_set_space(
/*=================*/
- dict_index_t* index, /* in: index */
- ulint space); /* in: space id */
-/*************************************************************************
-Gets the page number of the root of the index tree. */
+ dict_index_t* index, /*!< in/out: index */
+ ulint space); /*!< in: space id */
+/*********************************************************************//**
+Gets the page number of the root of the index tree.
+@return page number */
UNIV_INLINE
ulint
dict_index_get_page(
/*================*/
- /* out: page number */
- dict_index_t* tree); /* in: index */
-/*************************************************************************
+ const dict_index_t* tree); /*!< in: index */
+/*********************************************************************//**
Sets the page number of the root of index tree. */
UNIV_INLINE
void
dict_index_set_page(
/*================*/
- dict_index_t* index, /* in: index */
- ulint page); /* in: page number */
-/*************************************************************************
-Gets the type of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_type(
-/*================*/
- /* out: type */
- dict_index_t* index); /* in: index */
-/*************************************************************************
-Gets the read-write lock of the index tree. */
+ dict_index_t* index, /*!< in/out: index */
+ ulint page); /*!< in: page number */
+/*********************************************************************//**
+Gets the read-write lock of the index tree.
+@return read-write lock */
UNIV_INLINE
rw_lock_t*
dict_index_get_lock(
/*================*/
- /* out: read-write lock */
- dict_index_t* index); /* in: index */
-/************************************************************************
+ dict_index_t* index); /*!< in: index */
+/********************************************************************//**
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index. */
+which make the records bigger might fragment the index.
+@return number of free bytes on page, reserved for updates */
UNIV_INLINE
ulint
dict_index_get_space_reserve(void);
/*==============================*/
- /* out: number of free bytes on page,
- reserved for updates */
-/*************************************************************************
+/*********************************************************************//**
Calculates the minimum record length in an index. */
-
+UNIV_INTERN
ulint
dict_index_calc_min_rec_len(
/*========================*/
- dict_index_t* index); /* in: index */
-/*************************************************************************
+ const dict_index_t* index); /*!< in: index */
+/*********************************************************************//**
Calculates new estimates for table and index statistics. The statistics
are used in query optimization. */
-
+UNIV_INTERN
void
dict_update_statistics_low(
/*=======================*/
- dict_table_t* table, /* in: table */
- ibool has_dict_mutex);/* in: TRUE if the caller has the
+ dict_table_t* table, /*!< in/out: table */
+ ibool has_dict_mutex);/*!< in: TRUE if the caller has the
dictionary mutex */
-/*************************************************************************
+/*********************************************************************//**
Calculates new estimates for table and index statistics. The statistics
are used in query optimization. */
-
+UNIV_INTERN
void
dict_update_statistics(
/*===================*/
- dict_table_t* table); /* in: table */
-/************************************************************************
+ dict_table_t* table); /*!< in/out: table */
+/********************************************************************//**
Reserves the dictionary system mutex for MySQL. */
-
+UNIV_INTERN
void
dict_mutex_enter_for_mysql(void);
/*============================*/
-/************************************************************************
+/********************************************************************//**
Releases the dictionary system mutex for MySQL. */
-
+UNIV_INTERN
void
dict_mutex_exit_for_mysql(void);
/*===========================*/
-/************************************************************************
-Checks if the database name in two table names is the same. */
-
+/********************************************************************//**
+Checks if the database name in two table names is the same.
+@return TRUE if same db name */
+UNIV_INTERN
ibool
dict_tables_have_same_db(
/*=====================*/
- /* out: TRUE if same db name */
- const char* name1, /* in: table name in the form
+ const char* name1, /*!< in: table name in the form
dbname '/' tablename */
- const char* name2); /* in: table name in the form
+ const char* name2); /*!< in: table name in the form
dbname '/' tablename */
-/*************************************************************************
-Scans from pointer onwards. Stops if is at the start of a copy of
-'string' where characters are compared without case sensitivity. Stops
-also at '\0'. */
-
-const char*
-dict_scan_to(
-/*=========*/
- /* out: scanned up to this */
- const char* ptr, /* in: scan from */
- const char* string);/* in: look for this */
+/*********************************************************************//**
+Removes an index from the cache */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index); /*!< in, own: index */
+/**********************************************************************//**
+Get index by name
+@return index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name(
+/*=========================*/
+ dict_table_t* table, /*!< in: table */
+ const char* name); /*!< in: name of the index to find */
+/**********************************************************************//**
+In case there is more than one index with the same name return the index
+with the min(id).
+@return index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name_and_min_id(
+/*====================================*/
+ dict_table_t* table, /*!< in: table */
+ const char* name); /*!< in: name of the index to find */
/* Buffers for storing detailed information about the latest foreign key
and unique key errors */
extern FILE* dict_foreign_err_file;
extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
-extern dict_sys_t* dict_sys; /* the dictionary system */
+/** the dictionary system */
+extern dict_sys_t* dict_sys;
+/** the data dictionary rw-latch protecting dict_sys */
extern rw_lock_t dict_operation_lock;
/* Dictionary system struct */
struct dict_sys_struct{
- mutex_t mutex; /* mutex protecting the data
+ mutex_t mutex; /*!< mutex protecting the data
dictionary; protects also the
disk-based dictionary system tables;
this mutex serializes CREATE TABLE
and DROP TABLE, as well as reading
the dictionary data for a table from
system tables */
- dulint row_id; /* the next row id to assign;
+ dulint row_id; /*!< the next row id to assign;
NOTE that at a checkpoint this
must be written to the dict system
header and flushed to a file; in
recovery this must be derived from
the log records */
- hash_table_t* table_hash; /* hash table of the tables, based
+ hash_table_t* table_hash; /*!< hash table of the tables, based
on name */
- hash_table_t* table_id_hash; /* hash table of the tables, based
+ hash_table_t* table_id_hash; /*!< hash table of the tables, based
on id */
UT_LIST_BASE_NODE_T(dict_table_t)
- table_LRU; /* LRU list of tables */
- ulint size; /* varying space in bytes occupied
+ table_LRU; /*!< LRU list of tables */
+ ulint size; /*!< varying space in bytes occupied
by the data dictionary table and
index objects */
- dict_table_t* sys_tables; /* SYS_TABLES table */
- dict_table_t* sys_columns; /* SYS_COLUMNS table */
- dict_table_t* sys_indexes; /* SYS_INDEXES table */
- dict_table_t* sys_fields; /* SYS_FIELDS table */
+ dict_table_t* sys_tables; /*!< SYS_TABLES table */
+ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */
+ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */
+ dict_table_t* sys_fields; /*!< SYS_FIELDS table */
};
+#endif /* !UNIV_HOTBACKUP */
+
+/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
+extern dict_index_t* dict_ind_redundant;
+/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
+extern dict_index_t* dict_ind_compact;
+
+/**********************************************************************//**
+Inits dict_ind_redundant and dict_ind_compact. */
+UNIV_INTERN
+void
+dict_ind_init(void);
+/*===============*/
+
+/**********************************************************************//**
+Closes the data dictionary module. */
+UNIV_INTERN
+void
+dict_close(void);
+/*============*/
#ifndef UNIV_NONINL
#include "dict0dict.ic"
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 7d38cbcd1fa..46e78df8272 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -1,25 +1,41 @@
-/**********************************************************************
-Data dictionary system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0dict.ic
+Data dictionary system
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
+#include "data0type.h"
+#ifndef UNIV_HOTBACKUP
#include "dict0load.h"
-#include "trx0undo.h"
-#include "trx0sys.h"
#include "rem0types.h"
-#include "data0type.h"
-/*************************************************************************
+/*********************************************************************//**
Gets the column data type. */
UNIV_INLINE
void
dict_col_copy_type(
/*===============*/
- const dict_col_t* col, /* in: column */
- dtype_t* type) /* out: data type */
+ const dict_col_t* col, /*!< in: column */
+ dtype_t* type) /*!< out: data type */
{
ut_ad(col && type);
@@ -29,17 +45,18 @@ dict_col_copy_type(
type->mbminlen = col->mbminlen;
type->mbmaxlen = col->mbmaxlen;
}
+#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
-/*************************************************************************
-Assert that a column and a data type match. */
+/*********************************************************************//**
+Assert that a column and a data type match.
+@return TRUE */
UNIV_INLINE
ibool
dict_col_type_assert_equal(
/*=======================*/
- /* out: TRUE */
- const dict_col_t* col, /* in: column */
- const dtype_t* type) /* in: data type */
+ const dict_col_t* col, /*!< in: column */
+ const dtype_t* type) /*!< in: data type */
{
ut_ad(col);
ut_ad(type);
@@ -47,88 +64,95 @@ dict_col_type_assert_equal(
ut_ad(col->mtype == type->mtype);
ut_ad(col->prtype == type->prtype);
ut_ad(col->len == type->len);
+# ifndef UNIV_HOTBACKUP
ut_ad(col->mbminlen == type->mbminlen);
ut_ad(col->mbmaxlen == type->mbmaxlen);
+# endif /* !UNIV_HOTBACKUP */
return(TRUE);
}
#endif /* UNIV_DEBUG */
-/***************************************************************************
-Returns the minimum size of the column. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of the column.
+@return minimum size */
UNIV_INLINE
ulint
dict_col_get_min_size(
/*==================*/
- /* out: minimum size */
- const dict_col_t* col) /* in: column */
+ const dict_col_t* col) /*!< in: column */
{
return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
col->mbminlen, col->mbmaxlen));
}
-/***************************************************************************
-Returns the maximum size of the column. */
+/***********************************************************************//**
+Returns the maximum size of the column.
+@return maximum size */
UNIV_INLINE
ulint
dict_col_get_max_size(
/*==================*/
- /* out: maximum size */
- const dict_col_t* col) /* in: column */
+ const dict_col_t* col) /*!< in: column */
{
return(dtype_get_max_size_low(col->mtype, col->len));
}
-/***************************************************************************
-Returns the size of a fixed size column, 0 if not a fixed size column. */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
+Returns the size of a fixed size column, 0 if not a fixed size column.
+@return fixed size, or 0 */
UNIV_INLINE
ulint
dict_col_get_fixed_size(
/*====================*/
- /* out: fixed size, or 0 */
- const dict_col_t* col) /* in: column */
+ const dict_col_t* col, /*!< in: column */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
- col->mbminlen, col->mbmaxlen));
+ col->mbminlen, col->mbmaxlen, comp));
}
-/***************************************************************************
+/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0. */
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dict_col_get_sql_null_size(
/*=======================*/
- /* out: SQL null storage size
- in ROW_FORMAT=REDUNDANT */
- const dict_col_t* col) /* in: column */
+ const dict_col_t* col, /*!< in: column */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
- return(dict_col_get_fixed_size(col));
+ return(dict_col_get_fixed_size(col, comp));
}
-/*************************************************************************
-Gets the column number. */
+/*********************************************************************//**
+Gets the column number.
+@return col->ind, table column position (starting from 0) */
UNIV_INLINE
ulint
dict_col_get_no(
/*============*/
- const dict_col_t* col)
+ const dict_col_t* col) /*!< in: column */
{
ut_ad(col);
return(col->ind);
}
-/*************************************************************************
+/*********************************************************************//**
Gets the column position in the clustered index. */
UNIV_INLINE
ulint
dict_col_get_clust_pos(
/*===================*/
- const dict_col_t* col, /* in: table column */
- const dict_index_t* clust_index) /* in: clustered index */
+ const dict_col_t* col, /*!< in: table column */
+ const dict_index_t* clust_index) /*!< in: clustered index */
{
ulint i;
ut_ad(col);
- ut_ad(clust_index && clust_index->type & DICT_CLUSTERED);
+ ut_ad(clust_index);
+ ut_ad(dict_index_is_clust(clust_index));
for (i = 0; i < clust_index->n_def; i++) {
const dict_field_t* field = &clust_index->fields[i];
@@ -141,46 +165,112 @@ dict_col_get_clust_pos(
return(ULINT_UNDEFINED);
}
-/************************************************************************
-Gets the first index on the table (the clustered index). */
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the first index on the table (the clustered index).
+@return index, NULL if none exists */
UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
/*=======================*/
- /* out: index, NULL if none exists */
- dict_table_t* table) /* in: table */
+ const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- return(UT_LIST_GET_FIRST(table->indexes));
+ return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes));
}
-/************************************************************************
-Gets the next index on the table. */
+/********************************************************************//**
+Gets the next index on the table.
+@return index, NULL if none left */
UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
/*======================*/
- /* out: index, NULL if none left */
- dict_index_t* index) /* in: index */
+ const dict_index_t* index) /*!< in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
+}
+#endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Check whether the index is the clustered index.
+@return nonzero for clustered index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_clust(
+/*================*/
+ const dict_index_t* index) /*!< in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED));
+}
+/********************************************************************//**
+Check whether the index is unique.
+@return nonzero for unique index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_unique(
+/*=================*/
+ const dict_index_t* index) /*!< in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return(UNIV_UNLIKELY(index->type & DICT_UNIQUE));
+}
+
+/********************************************************************//**
+Check whether the index is the insert buffer tree.
+@return nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_ibuf(
+/*===============*/
+ const dict_index_t* index) /*!< in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ return(UNIV_UNLIKELY(index->type & DICT_IBUF));
+}
+
+/********************************************************************//**
+Check whether the index is a secondary index or the insert buffer tree.
+@return nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_sec_or_ibuf(
+/*======================*/
+ const dict_index_t* index) /*!< in: index */
{
+ ulint type;
+
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return(UT_LIST_GET_NEXT(indexes, index));
+ type = index->type;
+
+ return(UNIV_LIKELY(!(type & DICT_CLUSTERED) || (type & DICT_IBUF)));
}
-/************************************************************************
+/********************************************************************//**
Gets the number of user-defined columns in a table in the dictionary
-cache. */
+cache.
+@return number of user-defined (e.g., not ROW_ID) columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_user_cols(
/*=======================*/
- /* out: number of user-defined (e.g., not
- ROW_ID) columns of a table */
- dict_table_t* table) /* in: table */
+ const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -188,15 +278,14 @@ dict_table_get_n_user_cols(
return(table->n_cols - DATA_N_SYS_COLS);
}
-/************************************************************************
-Gets the number of system columns in a table in the dictionary cache. */
+/********************************************************************//**
+Gets the number of system columns in a table in the dictionary cache.
+@return number of system (e.g., ROW_ID) columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_sys_cols(
/*======================*/
- /* out: number of system (e.g.,
- ROW_ID) columns of a table */
- dict_table_t* table __attribute__((unused))) /* in: table */
+ const dict_table_t* table __attribute__((unused))) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -205,15 +294,15 @@ dict_table_get_n_sys_cols(
return(DATA_N_SYS_COLS);
}
-/************************************************************************
+/********************************************************************//**
Gets the number of all columns (also system) in a table in the dictionary
-cache. */
+cache.
+@return number of columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_cols(
/*==================*/
- /* out: number of columns of a table */
- dict_table_t* table) /* in: table */
+ const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -221,34 +310,35 @@ dict_table_get_n_cols(
return(table->n_cols);
}
-/************************************************************************
-Gets the nth column of a table. */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth column of a table.
+@return pointer to column object */
UNIV_INLINE
-const dict_col_t*
+dict_col_t*
dict_table_get_nth_col(
/*===================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint pos) /* in: position of column */
+ const dict_table_t* table, /*!< in: table */
+ ulint pos) /*!< in: position of column */
{
ut_ad(table);
ut_ad(pos < table->n_def);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- return((table->cols) + pos);
+ return((dict_col_t*) (table->cols) + pos);
}
-/************************************************************************
-Gets the given system column of a table. */
+/********************************************************************//**
+Gets the given system column of a table.
+@return pointer to column object */
UNIV_INLINE
-const dict_col_t*
+dict_col_t*
dict_table_get_sys_col(
/*===================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint sys) /* in: DATA_ROW_ID, ... */
+ const dict_table_t* table, /*!< in: table */
+ ulint sys) /*!< in: DATA_ROW_ID, ... */
{
- const dict_col_t* col;
+ dict_col_t* col;
ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
@@ -261,16 +351,17 @@ dict_table_get_sys_col(
return(col);
}
+#endif /* UNIV_DEBUG */
-/************************************************************************
-Gets the given system column number of a table. */
+/********************************************************************//**
+Gets the given system column number of a table.
+@return column number */
UNIV_INLINE
ulint
dict_table_get_sys_col_no(
/*======================*/
- /* out: column number */
- dict_table_t* table, /* in: table */
- ulint sys) /* in: DATA_ROW_ID, ... */
+ const dict_table_t* table, /*!< in: table */
+ ulint sys) /*!< in: DATA_ROW_ID, ... */
{
ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
@@ -279,15 +370,14 @@ dict_table_get_sys_col_no(
return(table->n_cols - DATA_N_SYS_COLS + sys);
}
-/************************************************************************
-Check whether the table uses the compact page format. */
+/********************************************************************//**
+Check whether the table uses the compact page format.
+@return TRUE if table uses the compact page format */
UNIV_INLINE
ibool
dict_table_is_comp(
/*===============*/
- /* out: TRUE if table uses the
- compact page format */
- const dict_table_t* table) /* in: table */
+ const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
@@ -298,16 +388,81 @@ dict_table_is_comp(
return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT));
}
-/************************************************************************
+/********************************************************************//**
+Determine the file format of a table.
+@return file format version */
+UNIV_INLINE
+ulint
+dict_table_get_format(
+/*==================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table);
+
+ return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT);
+}
+
+/********************************************************************//**
+Determine the file format of a table. */
+UNIV_INLINE
+void
+dict_table_set_format(
+/*==================*/
+ dict_table_t* table, /*!< in/out: table */
+ ulint format) /*!< in: file format version */
+{
+ ut_ad(table);
+
+ table->flags = (table->flags & ~DICT_TF_FORMAT_MASK)
+ | (format << DICT_TF_FORMAT_SHIFT);
+}
+
+/********************************************************************//**
+Extract the compressed page size from table flags.
+@return compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_flags_to_zip_size(
+/*=========================*/
+ ulint flags) /*!< in: flags */
+{
+ ulint zip_size = flags & DICT_TF_ZSSIZE_MASK;
+
+ if (UNIV_UNLIKELY(zip_size)) {
+ zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
+ << (zip_size >> DICT_TF_ZSSIZE_SHIFT));
+
+ ut_ad(zip_size <= UNIV_PAGE_SIZE);
+ }
+
+ return(zip_size);
+}
+
+/********************************************************************//**
+Check whether the table uses the compressed compact page format.
+@return compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_zip_size(
+/*================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table);
+
+ return(dict_table_flags_to_zip_size(table->flags));
+}
+
+/********************************************************************//**
Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system. */
+including fields added by the dictionary system.
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_fields(
/*====================*/
- /* out: number of fields */
- dict_index_t* index) /* in: an internal representation of index
- (in the dictionary cache) */
+ const dict_index_t* index) /*!< in: an internal
+ representation of index (in
+ the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -315,18 +470,18 @@ dict_index_get_n_fields(
return(index->n_fields);
}
-/************************************************************************
+/********************************************************************//**
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree. */
+returned by dict_index_get_n_unique_in_tree.
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique(
/*====================*/
- /* out: number of fields */
- dict_index_t* index) /* in: an internal representation of index
- (in the dictionary cache) */
+ const dict_index_t* index) /*!< in: an internal representation
+ of index (in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -335,23 +490,23 @@ dict_index_get_n_unique(
return(index->n_uniq);
}
-/************************************************************************
+/********************************************************************//**
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account. */
+we also take multiversioning into account.
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
/*============================*/
- /* out: number of fields */
- dict_index_t* index) /* in: an internal representation of index
- (in the dictionary cache) */
+ const dict_index_t* index) /*!< in: an internal representation
+ of index (in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(index->cached);
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
return(dict_index_get_n_unique(index));
}
@@ -359,55 +514,56 @@ dict_index_get_n_unique_in_tree(
return(dict_index_get_n_fields(index));
}
-/************************************************************************
+/********************************************************************//**
Gets the number of user-defined ordering fields in the index. In the internal
representation of clustered indexes we add the row id to the ordering fields
to make a clustered index unique, but this function returns the number of
-fields the user defined in the index as ordering fields. */
+fields the user defined in the index as ordering fields.
+@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
/*======================================*/
- /* out: number of fields */
- dict_index_t* index) /* in: an internal representation of index
- (in the dictionary cache) */
+ const dict_index_t* index) /*!< in: an internal representation
+ of index (in the dictionary cache) */
{
return(index->n_user_defined_cols);
}
-/************************************************************************
-Gets the nth field of an index. */
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth field of an index.
+@return pointer to field object */
UNIV_INLINE
dict_field_t*
dict_index_get_nth_field(
/*=====================*/
- /* out: pointer to field object */
- dict_index_t* index, /* in: index */
- ulint pos) /* in: position of field */
+ const dict_index_t* index, /*!< in: index */
+ ulint pos) /*!< in: position of field */
{
ut_ad(index);
ut_ad(pos < index->n_def);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return((index->fields) + pos);
+ return((dict_field_t*) (index->fields) + pos);
}
+#endif /* UNIV_DEBUG */
-/************************************************************************
-Returns the position of a system column in an index. */
+/********************************************************************//**
+Returns the position of a system column in an index.
+@return position, ULINT_UNDEFINED if not contained */
UNIV_INLINE
ulint
dict_index_get_sys_col_pos(
/*=======================*/
- /* out: position, ULINT_UNDEFINED if not
- contained */
- dict_index_t* index, /* in: index */
- ulint type) /* in: DATA_ROW_ID, ... */
+ const dict_index_t* index, /*!< in: index */
+ ulint type) /*!< in: DATA_ROW_ID, ... */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(!(index->type & DICT_UNIVERSAL));
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
return(dict_col_get_clust_pos(
dict_table_get_sys_col(index->table, type),
@@ -418,54 +574,75 @@ dict_index_get_sys_col_pos(
index, dict_table_get_sys_col_no(index->table, type)));
}
-/*************************************************************************
-Gets the field column. */
+/*********************************************************************//**
+Gets the field column.
+@return field->col, pointer to the table column */
UNIV_INLINE
const dict_col_t*
dict_field_get_col(
/*===============*/
- const dict_field_t* field)
+ const dict_field_t* field) /*!< in: index field */
{
ut_ad(field);
return(field->col);
}
-/************************************************************************
-Gets pointer to the nth column in an index. */
+/********************************************************************//**
+Gets pointer to the nth column in an index.
+@return column */
UNIV_INLINE
const dict_col_t*
dict_index_get_nth_col(
/*===================*/
- /* out: column */
- const dict_index_t* index, /* in: index */
- ulint pos) /* in: position of the field */
+ const dict_index_t* index, /*!< in: index */
+ ulint pos) /*!< in: position of the field */
{
- return(dict_field_get_col(dict_index_get_nth_field((dict_index_t*)
- index, pos)));
+ return(dict_field_get_col(dict_index_get_nth_field(index, pos)));
}
-/************************************************************************
-Gets the column number the nth field in an index. */
+/********************************************************************//**
+Gets the column number the nth field in an index.
+@return column number */
UNIV_INLINE
ulint
dict_index_get_nth_col_no(
/*======================*/
- /* out: column number */
- const dict_index_t* index, /* in: index */
- ulint pos) /* in: position of the field */
+ const dict_index_t* index, /*!< in: index */
+ ulint pos) /*!< in: position of the field */
{
return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
}
-/*************************************************************************
-Gets the space id of the root of the index tree. */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Returns the minimum data size of an index record.
+@return minimum data size in bytes */
+UNIV_INLINE
+ulint
+dict_index_get_min_size(
+/*====================*/
+ const dict_index_t* index) /*!< in: index */
+{
+ ulint n = dict_index_get_n_fields(index);
+ ulint size = 0;
+
+ while (n--) {
+ size += dict_col_get_min_size(dict_index_get_nth_col(index,
+ n));
+ }
+
+ return(size);
+}
+
+/*********************************************************************//**
+Gets the space id of the root of the index tree.
+@return space id */
UNIV_INLINE
ulint
dict_index_get_space(
/*=================*/
- /* out: space id */
- dict_index_t* index) /* in: index */
+ const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -473,14 +650,14 @@ dict_index_get_space(
return(index->space);
}
-/*************************************************************************
+/*********************************************************************//**
Sets the space id of the root of the index tree. */
UNIV_INLINE
void
dict_index_set_space(
/*=================*/
- dict_index_t* index, /* in: index */
- ulint space) /* in: space id */
+ dict_index_t* index, /*!< in/out: index */
+ ulint space) /*!< in: space id */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -488,14 +665,14 @@ dict_index_set_space(
index->space = space;
}
-/*************************************************************************
-Gets the page number of the root of the index tree. */
+/*********************************************************************//**
+Gets the page number of the root of the index tree.
+@return page number */
UNIV_INLINE
ulint
dict_index_get_page(
/*================*/
- /* out: page number */
- dict_index_t* index) /* in: index */
+ const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -503,14 +680,14 @@ dict_index_get_page(
return(index->page);
}
-/*************************************************************************
+/*********************************************************************//**
Sets the page number of the root of index tree. */
UNIV_INLINE
void
dict_index_set_page(
/*================*/
- dict_index_t* index, /* in: index */
- ulint page) /* in: page number */
+ dict_index_t* index, /*!< in/out: index */
+ ulint page) /*!< in: page number */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -518,29 +695,14 @@ dict_index_set_page(
index->page = page;
}
-/*************************************************************************
-Gets the type of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_type(
-/*================*/
- /* out: type */
- dict_index_t* index) /* in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->type);
-}
-
-/*************************************************************************
-Gets the read-write lock of the index tree. */
+/*********************************************************************//**
+Gets the read-write lock of the index tree.
+@return read-write lock */
UNIV_INLINE
rw_lock_t*
dict_index_get_lock(
/*================*/
- /* out: read-write lock */
- dict_index_t* index) /* in: index */
+ dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
@@ -548,28 +710,27 @@ dict_index_get_lock(
return(&(index->lock));
}
-/************************************************************************
+/********************************************************************//**
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index. */
+which make the records bigger might fragment the index.
+@return number of free bytes on page, reserved for updates */
UNIV_INLINE
ulint
dict_index_get_space_reserve(void)
/*==============================*/
- /* out: number of free bytes on page,
- reserved for updates */
{
return(UNIV_PAGE_SIZE / 16);
}
-/**************************************************************************
-Checks if a table is in the dictionary cache. */
+/**********************************************************************//**
+Checks if a table is in the dictionary cache.
+@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_check_if_in_cache_low(
/*=============================*/
- /* out: table, NULL if not found */
- const char* table_name) /* in: table name */
+ const char* table_name) /*!< in: table name */
{
dict_table_t* table;
ulint table_fold;
@@ -580,20 +741,21 @@ dict_table_check_if_in_cache_low(
/* Look for the table name in the hash table */
table_fold = ut_fold_string(table_name);
- HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table,
- ut_strcmp(table->name, table_name) == 0);
+ HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
+ dict_table_t*, table, ut_ad(table->cached),
+ !strcmp(table->name, table_name));
return(table);
}
-/**************************************************************************
+/**********************************************************************//**
Gets a table; loads it to the dictionary cache if necessary. A low-level
-function. */
+function.
+@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_get_low(
/*===============*/
- /* out: table, NULL if not found */
- const char* table_name) /* in: table name */
+ const char* table_name) /*!< in: table name */
{
dict_table_t* table;
@@ -606,17 +768,19 @@ dict_table_get_low(
table = dict_load_table(table_name);
}
+ ut_ad(!table || table->cached);
+
return(table);
}
-/**************************************************************************
-Returns a table object based on table id. */
+/**********************************************************************//**
+Returns a table object based on table id.
+@return table, NULL if does not exist */
UNIV_INLINE
dict_table_t*
dict_table_get_on_id_low(
/*=====================*/
- /* out: table, NULL if does not exist */
- dulint table_id) /* in: table id */
+ dulint table_id) /*!< in: table id */
{
dict_table_t* table;
ulint fold;
@@ -626,39 +790,17 @@ dict_table_get_on_id_low(
/* Look for the table name in the hash table */
fold = ut_fold_dulint(table_id);
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, table,
- ut_dulint_cmp(table->id, table_id) == 0);
+ HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
+ dict_table_t*, table, ut_ad(table->cached),
+ !ut_dulint_cmp(table->id, table_id));
if (table == NULL) {
table = dict_load_table_on_id(table_id);
}
+ ut_ad(!table || table->cached);
+
/* TODO: should get the type information from MySQL */
return(table);
}
-
-/**************************************************************************
-Returns an index object. */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_index(
-/*=================*/
- /* out: index, NULL if does not exist */
- dict_table_t* table, /* in: table */
- const char* name) /* in: index name */
-{
- dict_index_t* index = NULL;
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (ut_strcmp(name, index->name) == 0) {
-
- break;
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(index);
-}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index 7e19c2eb3c0..60b8c1fb632 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0load.h
Loads to the memory cache database object definitions
from dictionary tables
-(c) 1996 Innobase Oy
-
Created 4/24/1996 Heikki Tuuri
*******************************************************/
@@ -13,8 +30,9 @@ Created 4/24/1996 Heikki Tuuri
#include "univ.i"
#include "dict0types.h"
#include "ut0byte.h"
+#include "mem0mem.h"
-/************************************************************************
+/********************************************************************//**
In a crash recovery we already have all the tablespace objects created.
This function compares the space id information in the InnoDB data dictionary
to what we already read with fil_load_single_table_tablespaces().
@@ -22,72 +40,69 @@ to what we already read with fil_load_single_table_tablespaces().
In a normal startup, we create the tablespace objects for every table in
InnoDB's data dictionary, if the corresponding .ibd file exists.
We also scan the biggest space id, and store it to fil_system. */
-
+UNIV_INTERN
void
dict_check_tablespaces_and_store_max_id(
/*====================================*/
- ibool in_crash_recovery); /* in: are we doing a crash recovery */
-/************************************************************************
-Finds the first table name in the given database. */
-
+ ibool in_crash_recovery); /*!< in: are we doing a crash recovery */
+/********************************************************************//**
+Finds the first table name in the given database.
+@return own: table name, NULL if does not exist; the caller must free
+the memory in the string! */
+UNIV_INTERN
char*
dict_get_first_table_name_in_db(
/*============================*/
- /* out, own: table name, NULL if
- does not exist; the caller must free
- the memory in the string! */
- const char* name); /* in: database name which ends to '/' */
-/************************************************************************
+ const char* name); /*!< in: database name which ends to '/' */
+/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. */
-
+a foreign key references columns in this table.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+ibd_file_missing flag TRUE in the table object we return */
+UNIV_INTERN
dict_table_t*
dict_load_table(
/*============*/
- /* out: table, NULL if does not exist;
- if the table is stored in an .ibd file,
- but the file does not exist,
- then we set the ibd_file_missing flag TRUE
- in the table object we return */
- const char* name); /* in: table name in the
+ const char* name); /*!< in: table name in the
databasename/tablename format */
-/***************************************************************************
-Loads a table object based on the table id. */
-
+/***********************************************************************//**
+Loads a table object based on the table id.
+@return table; NULL if table does not exist */
+UNIV_INTERN
dict_table_t*
dict_load_table_on_id(
/*==================*/
- /* out: table; NULL if table does not exist */
- dulint table_id); /* in: table id */
-/************************************************************************
+ dulint table_id); /*!< in: table id */
+/********************************************************************//**
This function is called when the database is booted.
Loads system table index definitions except for the clustered index which
is added to the dictionary cache at booting before calling this function. */
-
+UNIV_INTERN
void
dict_load_sys_table(
/*================*/
- dict_table_t* table); /* in: system table */
-/***************************************************************************
+ dict_table_t* table); /*!< in: system table */
+/***********************************************************************//**
Loads foreign key constraints where the table is either the foreign key
holder or where the table is referenced by a foreign key. Adds these
constraints to the data dictionary. Note that we know that the dictionary
cache already contains all constraints where the other relevant table is
-already in the dictionary cache. */
-
+already in the dictionary cache.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
dict_load_foreigns(
/*===============*/
- /* out: DB_SUCCESS or error code */
- const char* table_name, /* in: table name */
- ibool check_charsets);/* in: TRUE=check charsets
+ const char* table_name, /*!< in: table name */
+ ibool check_charsets);/*!< in: TRUE=check charsets
compatibility */
-/************************************************************************
+/********************************************************************//**
Prints to the standard output information on all tables found in the data
dictionary system table. */
-
+UNIV_INTERN
void
dict_print(void);
/*============*/
diff --git a/storage/innobase/include/dict0load.ic b/storage/innobase/include/dict0load.ic
index 1a207fbf0fd..ccc16db165b 100644
--- a/storage/innobase/include/dict0load.ic
+++ b/storage/innobase/include/dict0load.ic
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0load.ic
Loads to the memory cache database object definitions
from dictionary tables
-(c) 1996 Innobase Oy
-
Created 4/24/1996 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index ac28fdb1bae..2d001111938 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1,7 +1,24 @@
-/******************************************************
-Data dictionary memory object creation
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0mem.h
+Data dictionary memory object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
@@ -12,121 +29,156 @@ Created 1/8/1996 Heikki Tuuri
#include "univ.i"
#include "dict0types.h"
#include "data0type.h"
-#include "data0data.h"
#include "mem0mem.h"
#include "rem0types.h"
#include "btr0types.h"
+#ifndef UNIV_HOTBACKUP
+# include "lock0types.h"
+# include "que0types.h"
+# include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
#include "ut0mem.h"
#include "ut0lst.h"
#include "ut0rnd.h"
#include "ut0byte.h"
-#include "sync0rw.h"
-#include "lock0types.h"
#include "hash0hash.h"
-#include "que0types.h"
+#include "trx0types.h"
-/* Type flags of an index: OR'ing of the flags is allowed to define a
+/** Type flags of an index: OR'ing of the flags is allowed to define a
combination of types */
-#define DICT_CLUSTERED 1 /* clustered index */
-#define DICT_UNIQUE 2 /* unique index */
-#define DICT_UNIVERSAL 4 /* index which can contain records from any
+/* @{ */
+#define DICT_CLUSTERED 1 /*!< clustered index */
+#define DICT_UNIQUE 2 /*!< unique index */
+#define DICT_UNIVERSAL 4 /*!< index which can contain records from any
other index */
-#define DICT_IBUF 8 /* insert buffer tree */
+#define DICT_IBUF 8 /*!< insert buffer tree */
+/* @} */
-/* Types for a table object */
-#define DICT_TABLE_ORDINARY 1
+/** Types for a table object */
+#define DICT_TABLE_ORDINARY 1 /*!< ordinary table */
#if 0 /* not implemented */
#define DICT_TABLE_CLUSTER_MEMBER 2
#define DICT_TABLE_CLUSTER 3 /* this means that the table is
really a cluster definition */
#endif
-/* Table flags */
-#define DICT_TF_COMPACT 1 /* compact page format */
-
-/**************************************************************************
-Creates a table memory object. */
+/** Table flags. All unused bits must be 0. */
+/* @{ */
+#define DICT_TF_COMPACT 1 /* Compact page format.
+ This must be set for
+ new file formats
+ (later than
+ DICT_TF_FORMAT_51). */
+
+/** Compressed page size (0=uncompressed, up to 15 compressed sizes) */
+/* @{ */
+#define DICT_TF_ZSSIZE_SHIFT 1
+#define DICT_TF_ZSSIZE_MASK (15 << DICT_TF_ZSSIZE_SHIFT)
+#define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1)
+/* @} */
+
+/** File format */
+/* @{ */
+#define DICT_TF_FORMAT_SHIFT 5 /* file format */
+#define DICT_TF_FORMAT_MASK (127 << DICT_TF_FORMAT_SHIFT)
+#define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */
+#define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1:
+ compressed tables,
+ new BLOB treatment */
+/** Maximum supported file format */
+#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP
+
+#define DICT_TF_BITS 6 /*!< number of flag bits */
+#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
+# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
+#endif
+/* @} */
+/* @} */
+/**********************************************************************//**
+Creates a table memory object.
+@return own: table object */
+UNIV_INTERN
dict_table_t*
dict_mem_table_create(
/*==================*/
- /* out, own: table object */
- const char* name, /* in: table name */
- ulint space, /* in: space where the clustered index
+ const char* name, /*!< in: table name */
+ ulint space, /*!< in: space where the clustered index
of the table is placed; this parameter
is ignored if the table is made
a member of a cluster */
- ulint n_cols, /* in: number of columns */
- ulint flags); /* in: table flags */
-/********************************************************************
+ ulint n_cols, /*!< in: number of columns */
+ ulint flags); /*!< in: table flags */
+/****************************************************************//**
Free a table memory object. */
-
+UNIV_INTERN
void
dict_mem_table_free(
/*================*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
+ dict_table_t* table); /*!< in: table */
+/**********************************************************************//**
Adds a column definition to a table. */
-
+UNIV_INTERN
void
dict_mem_table_add_col(
/*===================*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap, /* in: temporary memory heap, or NULL */
- const char* name, /* in: column name, or NULL */
- ulint mtype, /* in: main datatype */
- ulint prtype, /* in: precise type */
- ulint len); /* in: precision */
-/**************************************************************************
-Creates an index memory object. */
-
+ dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
+ const char* name, /*!< in: column name, or NULL */
+ ulint mtype, /*!< in: main datatype */
+ ulint prtype, /*!< in: precise type */
+ ulint len); /*!< in: precision */
+/**********************************************************************//**
+Creates an index memory object.
+@return own: index object */
+UNIV_INTERN
dict_index_t*
dict_mem_index_create(
/*==================*/
- /* out, own: index object */
- const char* table_name, /* in: table name */
- const char* index_name, /* in: index name */
- ulint space, /* in: space where the index tree is
+ const char* table_name, /*!< in: table name */
+ const char* index_name, /*!< in: index name */
+ ulint space, /*!< in: space where the index tree is
placed, ignored if the index is of
the clustered type */
- ulint type, /* in: DICT_UNIQUE,
+ ulint type, /*!< in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
- ulint n_fields); /* in: number of fields */
-/**************************************************************************
+ ulint n_fields); /*!< in: number of fields */
+/**********************************************************************//**
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
-
+UNIV_INTERN
void
dict_mem_index_add_field(
/*=====================*/
- dict_index_t* index, /* in: index */
- const char* name, /* in: column name */
- ulint prefix_len); /* in: 0 or the column prefix length
+ dict_index_t* index, /*!< in: index */
+ const char* name, /*!< in: column name */
+ ulint prefix_len); /*!< in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
-/**************************************************************************
+/**********************************************************************//**
Frees an index memory object. */
-
+UNIV_INTERN
void
dict_mem_index_free(
/*================*/
- dict_index_t* index); /* in: index */
-/**************************************************************************
-Creates and initializes a foreign constraint memory object. */
-
+ dict_index_t* index); /*!< in: index */
+/**********************************************************************//**
+Creates and initializes a foreign constraint memory object.
+@return own: foreign constraint struct */
+UNIV_INTERN
dict_foreign_t*
dict_mem_foreign_create(void);
/*=========================*/
- /* out, own: foreign constraint struct */
-/* Data structure for a column in a table */
+/** Data structure for a column in a table */
struct dict_col_struct{
/*----------------------*/
- /* The following are copied from dtype_t,
+ /** The following are copied from dtype_t,
so that all bit-fields can be packed tightly. */
- unsigned mtype:8; /* main data type */
- unsigned prtype:24; /* precise type; MySQL data
+ /* @{ */
+ unsigned mtype:8; /*!< main data type */
+ unsigned prtype:24; /*!< precise type; MySQL data
type, charset code, flags to
indicate nullability,
signedness, whether this is a
@@ -136,7 +188,7 @@ struct dict_col_struct{
/* the remaining fields do not affect alphabetical ordering: */
- unsigned len:16; /* length; for MySQL data this
+ unsigned len:16; /*!< length; for MySQL data this
is field->pack_length(),
except that for a >= 5.0.3
type true VARCHAR this is the
@@ -145,252 +197,276 @@ struct dict_col_struct{
the string, MySQL uses 1 or 2
bytes to store the string length) */
- unsigned mbminlen:2; /* minimum length of a
+ unsigned mbminlen:2; /*!< minimum length of a
character, in bytes */
- unsigned mbmaxlen:3; /* maximum length of a
+ unsigned mbmaxlen:3; /*!< maximum length of a
character, in bytes */
/*----------------------*/
/* End of definitions copied from dtype_t */
+ /* @} */
- unsigned ind:10; /* table column position
+ unsigned ind:10; /*!< table column position
(starting from 0) */
- unsigned ord_part:1; /* nonzero if this column
+ unsigned ord_part:1; /*!< nonzero if this column
appears in the ordering fields
of an index */
};
-/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length). It is set to 3*256,
-so that one can create a column prefix index on 256 characters of a
-TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
-a character may take at most 3 bytes.
-This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
-files would be at risk! */
+/** @brief DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
+indexed column length (or indexed prefix length).
-#define DICT_MAX_INDEX_COL_LEN 768
+It is set to 3*256, so that one can create a column prefix index on
+256 characters of a TEXT or VARCHAR column also in the UTF-8
+charset. In that charset, a character may take at most 3 bytes. This
+constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
+files would be at risk! */
+#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN
-/* Data structure for a field in an index */
+/** Data structure for a field in an index */
struct dict_field_struct{
- dict_col_t* col; /* pointer to the table column */
- const char* name; /* name of the column */
- unsigned prefix_len:10; /* 0 or the length of the column
+ dict_col_t* col; /*!< pointer to the table column */
+ const char* name; /*!< name of the column */
+ unsigned prefix_len:10; /*!< 0 or the length of the column
prefix in bytes in a MySQL index of
type, e.g., INDEX (textcol(25));
must be smaller than
DICT_MAX_INDEX_COL_LEN; NOTE that
in the UTF-8 charset, MySQL sets this
to 3 * the prefix len in UTF-8 chars */
- unsigned fixed_len:10; /* 0 or the fixed length of the
+ unsigned fixed_len:10; /*!< 0 or the fixed length of the
column if smaller than
DICT_MAX_INDEX_COL_LEN */
};
-/* Data structure for an index */
+/** Data structure for an index. Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_index_create(). */
struct dict_index_struct{
- dulint id; /* id of the index */
- mem_heap_t* heap; /* memory heap */
- ulint type; /* index type */
- const char* name; /* index name */
- const char* table_name; /* table name */
- dict_table_t* table; /* back pointer to table */
+ dulint id; /*!< id of the index */
+ mem_heap_t* heap; /*!< memory heap */
+ const char* name; /*!< index name */
+ const char* table_name;/*!< table name */
+ dict_table_t* table; /*!< back pointer to table */
+#ifndef UNIV_HOTBACKUP
unsigned space:32;
- /* space where the index tree is placed */
- unsigned page:32;/* index tree root page number */
- unsigned trx_id_offset:10;/* position of the the trx id column
+ /*!< space where the index tree is placed */
+ unsigned page:32;/*!< index tree root page number */
+#endif /* !UNIV_HOTBACKUP */
+ unsigned type:4; /*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
+ DICT_UNIVERSAL, DICT_IBUF) */
+ unsigned trx_id_offset:10;/*!< position of the trx id column
in a clustered index record, if the fields
before it are known to be of a fixed size,
0 otherwise */
unsigned n_user_defined_cols:10;
- /* number of columns the user defined to
+ /*!< number of columns the user defined to
be in the index: in the internal
representation we add more columns */
- unsigned n_uniq:10;/* number of fields from the beginning
+ unsigned n_uniq:10;/*!< number of fields from the beginning
which are enough to determine an index
entry uniquely */
- unsigned n_def:10;/* number of fields defined so far */
- unsigned n_fields:10;/* number of fields in the index */
- unsigned n_nullable:10;/* number of nullable fields */
- unsigned cached:1;/* TRUE if the index object is in the
+ unsigned n_def:10;/*!< number of fields defined so far */
+ unsigned n_fields:10;/*!< number of fields in the index */
+ unsigned n_nullable:10;/*!< number of nullable fields */
+ unsigned cached:1;/*!< TRUE if the index object is in the
dictionary cache */
- dict_field_t* fields; /* array of field descriptions */
+ unsigned to_be_dropped:1;
+ /*!< TRUE if this index is marked to be
+ dropped in ha_innobase::prepare_drop_index(),
+ otherwise FALSE */
+ dict_field_t* fields; /*!< array of field descriptions */
+#ifndef UNIV_HOTBACKUP
UT_LIST_NODE_T(dict_index_t)
- indexes;/* list of indexes of the table */
- btr_search_t* search_info; /* info used in optimistic searches */
+ indexes;/*!< list of indexes of the table */
+ btr_search_t* search_info; /*!< info used in optimistic searches */
/*----------------------*/
- ib_longlong* stat_n_diff_key_vals;
- /* approximate number of different key values
- for this index, for each n-column prefix
- where n <= dict_get_n_unique(index); we
- periodically calculate new estimates */
+ /** Statistics for query optimization */
+ /* @{ */
+ ib_int64_t* stat_n_diff_key_vals;
+ /*!< approximate number of different
+ key values for this index, for each
+ n-column prefix where n <=
+ dict_get_n_unique(index); we
+ periodically calculate new
+ estimates */
ulint stat_index_size;
- /* approximate index size in database pages */
+ /*!< approximate index size in
+ database pages */
ulint stat_n_leaf_pages;
- /* approximate number of leaf pages in the
+ /*!< approximate number of leaf pages in the
index tree */
- rw_lock_t lock; /* read-write lock protecting the upper levels
- of the index tree */
+ /* @} */
+ rw_lock_t lock; /*!< read-write lock protecting the
+ upper levels of the index tree */
+ ib_uint64_t trx_id; /*!< id of the transaction that created this
+ index, or 0 if the index existed
+ when InnoDB was started up */
+#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
- ulint magic_n;/* magic number */
+ ulint magic_n;/*!< magic number */
+/** Value of dict_index_struct::magic_n */
# define DICT_INDEX_MAGIC_N 76789786
#endif
};
-/* Data structure for a foreign key constraint; an example:
-FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */
-
+/** Data structure for a foreign key constraint; an example:
+FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
struct dict_foreign_struct{
- mem_heap_t* heap; /* this object is allocated from
+ mem_heap_t* heap; /*!< this object is allocated from
this memory heap */
- char* id; /* id of the constraint as a
+ char* id; /*!< id of the constraint as a
null-terminated string */
- unsigned n_fields:10; /* number of indexes' first fields
- for which the the foreign key
+ unsigned n_fields:10; /*!< number of indexes' first fields
+ for which the foreign key
constraint is defined: we allow the
indexes to contain more fields than
mentioned in the constraint, as long
as the first fields are as mentioned */
- unsigned type:6; /* 0 or DICT_FOREIGN_ON_DELETE_CASCADE
+ unsigned type:6; /*!< 0 or DICT_FOREIGN_ON_DELETE_CASCADE
or DICT_FOREIGN_ON_DELETE_SET_NULL */
- char* foreign_table_name;/* foreign table name */
- dict_table_t* foreign_table; /* table where the foreign key is */
- const char** foreign_col_names;/* names of the columns in the
+ char* foreign_table_name;/*!< foreign table name */
+ dict_table_t* foreign_table; /*!< table where the foreign key is */
+ const char** foreign_col_names;/*!< names of the columns in the
foreign key */
- char* referenced_table_name;/* referenced table name */
- dict_table_t* referenced_table;/* table where the referenced key
+ char* referenced_table_name;/*!< referenced table name */
+ dict_table_t* referenced_table;/*!< table where the referenced key
is */
- const char** referenced_col_names;/* names of the referenced
+ const char** referenced_col_names;/*!< names of the referenced
columns in the referenced table */
- dict_index_t* foreign_index; /* foreign index; we require that
+ dict_index_t* foreign_index; /*!< foreign index; we require that
both tables contain explicitly defined
indexes for the constraint: InnoDB
does not generate new indexes
implicitly */
- dict_index_t* referenced_index;/* referenced index */
+ dict_index_t* referenced_index;/*!< referenced index */
UT_LIST_NODE_T(dict_foreign_t)
- foreign_list; /* list node for foreign keys of the
+ foreign_list; /*!< list node for foreign keys of the
table */
UT_LIST_NODE_T(dict_foreign_t)
- referenced_list;/* list node for referenced keys of the
- table */
+ referenced_list;/*!< list node for referenced
+ keys of the table */
};
-/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
+/** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
a foreign key constraint is enforced, therefore RESTRICT just means no flag */
-#define DICT_FOREIGN_ON_DELETE_CASCADE 1
-#define DICT_FOREIGN_ON_DELETE_SET_NULL 2
-#define DICT_FOREIGN_ON_UPDATE_CASCADE 4
-#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8
-#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16
-#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32
-
-
-/* Data structure for a database table */
+/* @{ */
+#define DICT_FOREIGN_ON_DELETE_CASCADE 1 /*!< ON DELETE CASCADE */
+#define DICT_FOREIGN_ON_DELETE_SET_NULL 2 /*!< ON UPDATE SET NULL */
+#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 /*!< ON DELETE CASCADE */
+#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 /*!< ON UPDATE SET NULL */
+#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 /*!< ON DELETE NO ACTION */
+#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */
+/* @} */
+
+
+/** Data structure for a database table. Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_table_create(). */
struct dict_table_struct{
- dulint id; /* id of the table */
- mem_heap_t* heap; /* memory heap */
- const char* name; /* table name */
- const char* dir_path_of_temp_table;/* NULL or the directory path
+ dulint id; /*!< id of the table */
+ mem_heap_t* heap; /*!< memory heap */
+ const char* name; /*!< table name */
+ const char* dir_path_of_temp_table;/*!< NULL or the directory path
where a TEMPORARY table that was explicitly
created by a user should be placed if
innodb_file_per_table is defined in my.cnf;
in Unix this is usually /tmp/..., in Windows
- \temp\... */
+ temp\... */
unsigned space:32;
- /* space where the clustered index of the
+ /*!< space where the clustered index of the
table is placed */
+ unsigned flags:DICT_TF_BITS;/*!< DICT_TF_COMPACT, ... */
unsigned ibd_file_missing:1;
- /* TRUE if this is in a single-table
+ /*!< TRUE if this is in a single-table
tablespace and the .ibd file is missing; then
we must return in ha_innodb.cc an error if the
user tries to query such an orphaned table */
unsigned tablespace_discarded:1;
- /* this flag is set TRUE when the user
+ /*!< this flag is set TRUE when the user
calls DISCARD TABLESPACE on this
table, and reset to FALSE in IMPORT
TABLESPACE */
- unsigned cached:1;/* TRUE if the table object has been added
+ unsigned cached:1;/*!< TRUE if the table object has been added
to the dictionary cache */
- unsigned flags:8;/* DICT_TF_COMPACT, ... */
- unsigned n_def:10;/* number of columns defined so far */
- unsigned n_cols:10;/* number of columns */
- dict_col_t* cols; /* array of column descriptions */
+ unsigned n_def:10;/*!< number of columns defined so far */
+ unsigned n_cols:10;/*!< number of columns */
+ dict_col_t* cols; /*!< array of column descriptions */
const char* col_names;
- /* Column names packed in a character string
+ /*!< Column names packed in a character string
"name1\0name2\0...nameN\0". Until
the string contains n_cols, it will be
allocated from a temporary heap. The final
string will be allocated from table->heap. */
- hash_node_t name_hash; /* hash chain node */
- hash_node_t id_hash; /* hash chain node */
+#ifndef UNIV_HOTBACKUP
+ hash_node_t name_hash; /*!< hash chain node */
+ hash_node_t id_hash; /*!< hash chain node */
UT_LIST_BASE_NODE_T(dict_index_t)
- indexes; /* list of indexes of the table */
+ indexes; /*!< list of indexes of the table */
UT_LIST_BASE_NODE_T(dict_foreign_t)
- foreign_list;/* list of foreign key constraints
+ foreign_list;/*!< list of foreign key constraints
in the table; these refer to columns
in other tables */
UT_LIST_BASE_NODE_T(dict_foreign_t)
- referenced_list;/* list of foreign key constraints
+ referenced_list;/*!< list of foreign key constraints
which refer to this table */
UT_LIST_NODE_T(dict_table_t)
- table_LRU; /* node of the LRU list of tables */
+ table_LRU; /*!< node of the LRU list of tables */
ulint n_mysql_handles_opened;
- /* count of how many handles MySQL has opened
+ /*!< count of how many handles MySQL has opened
to this table; dropping of the table is
NOT allowed until this count gets to zero;
MySQL does NOT itself check the number of
open handles at drop */
ulint n_foreign_key_checks_running;
- /* count of how many foreign key check
+ /*!< count of how many foreign key check
operations are currently being performed
on the table: we cannot drop the table while
there are foreign key checks running on
it! */
- lock_t* auto_inc_lock;/* a buffer for an auto-inc lock
- for this table: we allocate the memory here
- so that individual transactions can get it
- and release it without a need to allocate
- space from the lock heap of the trx:
- otherwise the lock heap would grow rapidly
- if we do a large insert from a select */
- dulint query_cache_inv_trx_id;
- /* transactions whose trx id < than this
- number are not allowed to store to the MySQL
- query cache or retrieve from it; when a trx
- with undo logs commits, it sets this to the
- value of the trx id counter for the tables it
- had an IX lock on */
+ trx_id_t query_cache_inv_trx_id;
+ /*!< transactions whose trx id is
+ smaller than this number are not
+ allowed to store to the MySQL query
+ cache or retrieve from it; when a trx
+ with undo logs commits, it sets this
+ to the value of the trx id counter for
+ the tables it had an IX lock on */
UT_LIST_BASE_NODE_T(lock_t)
- locks; /* list of locks on the table */
+ locks; /*!< list of locks on the table */
#ifdef UNIV_DEBUG
/*----------------------*/
ibool does_not_fit_in_memory;
- /* this field is used to specify in simulations
- tables which are so big that disk should be
- accessed: disk access is simulated by
- putting the thread to sleep for a while;
- NOTE that this flag is not stored to the data
- dictionary on disk, and the database will
- forget about value TRUE if it has to reload
- the table definition from disk */
+ /*!< this field is used to specify in
+ simulations tables which are so big
+ that disk should be accessed: disk
+ access is simulated by putting the
+ thread to sleep for a while; NOTE that
+ this flag is not stored to the data
+ dictionary on disk, and the database
+ will forget about value TRUE if it has
+ to reload the table definition from
+ disk */
#endif /* UNIV_DEBUG */
/*----------------------*/
unsigned big_rows:1;
- /* flag: TRUE if the maximum length of
+ /*!< flag: TRUE if the maximum length of
a single row exceeds BIG_ROW_SIZE;
initialized in dict_table_add_to_cache() */
- unsigned stat_initialized:1; /* TRUE if statistics have
+ /** Statistics for query optimization */
+ /* @{ */
+ unsigned stat_initialized:1; /*!< TRUE if statistics have
been calculated the first time
after database startup or table creation */
- ib_longlong stat_n_rows;
- /* approximate number of rows in the table;
+ ib_int64_t stat_n_rows;
+ /*!< approximate number of rows in the table;
we periodically calculate new estimates */
ulint stat_clustered_index_size;
- /* approximate clustered index size in
+ /*!< approximate clustered index size in
database pages */
ulint stat_sum_of_other_index_sizes;
- /* other indexes in database pages */
+ /*!< other indexes in database pages */
ulint stat_modified_counter;
- /* when a row is inserted, updated, or deleted,
+ /*!< when a row is inserted, updated,
+ or deleted,
we add 1 to this number; we calculate new
estimates for the stat_... values for the
table and the indexes at an interval of 2 GB
@@ -401,14 +477,38 @@ struct dict_table_struct{
calculation; this counter is not protected by
any latch, because this is only used for
heuristics */
+ /* @} */
/*----------------------*/
+ /**!< The following fields are used by the
+ AUTOINC code. The actual collection of
+ tables locked during AUTOINC read/write is
+ kept in trx_t. In order to quickly determine
+ whether a transaction has locked the AUTOINC
+ lock we keep a pointer to the transaction
+ here in the autoinc_trx variable. This is to
+ avoid acquiring the kernel mutex and scanning
+ the vector in trx_t.
+
+ When an AUTOINC lock has to wait, the
+ corresponding lock instance is created on
+ the trx lock heap rather than use the
+ pre-allocated instance in autoinc_lock below.*/
+ /* @{ */
+ lock_t* autoinc_lock;
+ /*!< a buffer for an AUTOINC lock
+ for this table: we allocate the memory here
+ so that individual transactions can get it
+ and release it without a need to allocate
+ space from the lock heap of the trx:
+ otherwise the lock heap would grow rapidly
+ if we do a large insert from a select */
mutex_t autoinc_mutex;
- /* mutex protecting the autoincrement
+ /*!< mutex protecting the autoincrement
counter */
- ib_ulonglong autoinc;/* autoinc counter value to give to the
+ ib_uint64_t autoinc;/*!< autoinc counter value to give to the
next inserted row */
ulong n_waiting_or_granted_auto_inc_locks;
- /* This counter is used to track the number
+ /*!< This counter is used to track the number
of granted and pending autoinc locks on this
table. This value is set after acquiring the
kernel mutex but we peek the contents to
@@ -416,10 +516,16 @@ struct dict_table_struct{
acquired the AUTOINC lock or not. Of course
only one transaction can be granted the
lock but there can be multiple waiters. */
+ const trx_t* autoinc_trx;
+ /*!< The transaction that currently holds the
+ the AUTOINC lock on this table. */
+ /* @} */
/*----------------------*/
+#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
- ulint magic_n;/* magic number */
+ ulint magic_n;/*!< magic number */
+/** Value of dict_table_struct::magic_n */
# define DICT_TABLE_MAGIC_N 76333786
#endif /* UNIV_DEBUG */
};
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
index 9bcefc2a51f..c36adb07a18 100644
--- a/storage/innobase/include/dict0mem.ic
+++ b/storage/innobase/include/dict0mem.ic
@@ -1,7 +1,24 @@
-/**********************************************************************
-Data dictionary memory object creation
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0mem.ic
+Data dictionary memory object creation
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index b90545f2105..7ad69193cc9 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Data dictionary global types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0types.h
+Data dictionary global types
Created 1/8/1996 Heikki Tuuri
*******************************************************/
@@ -24,4 +41,8 @@ typedef dict_table_t dict_cluster_t;
typedef struct ind_node_struct ind_node_t;
typedef struct tab_node_struct tab_node_t;
+/* Space id and page no where the dictionary header resides */
+#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
+#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
+
#endif
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
index 7affccbf67e..121a5946ac7 100644
--- a/storage/innobase/include/dyn0dyn.h
+++ b/storage/innobase/include/dyn0dyn.h
@@ -1,7 +1,24 @@
-/******************************************************
-The dynamically allocated array
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0dyn.h
+The dynamically allocated array
Created 2/5/1996 Heikki Tuuri
*******************************************************/
@@ -13,148 +30,153 @@ Created 2/5/1996 Heikki Tuuri
#include "ut0lst.h"
#include "mem0mem.h"
+/** A block in a dynamically allocated array */
typedef struct dyn_block_struct dyn_block_t;
+/** Dynamically allocated array */
typedef dyn_block_t dyn_array_t;
-/* This is the initial 'payload' size of a dynamic array;
+/** This is the initial 'payload' size of a dynamic array;
this must be > MLOG_BUF_MARGIN + 30! */
#define DYN_ARRAY_DATA_SIZE 512
-/*************************************************************************
-Initializes a dynamic array. */
+/*********************************************************************//**
+Initializes a dynamic array.
+@return initialized dyn array */
UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
- /* out: initialized dyn array */
- dyn_array_t* arr); /* in: pointer to a memory buffer of
+ dyn_array_t* arr); /*!< in: pointer to a memory buffer of
size sizeof(dyn_array_t) */
-/****************************************************************
+/************************************************************//**
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
- dyn_array_t* arr); /* in: dyn array */
-/*************************************************************************
+ dyn_array_t* arr); /*!< in: dyn array */
+/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
-dyn_array_close. */
+dyn_array_close.
+@return pointer to the buffer */
UNIV_INLINE
byte*
dyn_array_open(
/*===========*/
- /* out: pointer to the buffer */
- dyn_array_t* arr, /* in: dynamic array */
- ulint size); /* in: size in bytes of the buffer; MUST be
+ dyn_array_t* arr, /*!< in: dynamic array */
+ ulint size); /*!< in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
-/*************************************************************************
+/*********************************************************************//**
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
void
dyn_array_close(
/*============*/
- dyn_array_t* arr, /* in: dynamic array */
- byte* ptr); /* in: buffer space from ptr up was not used */
-/*************************************************************************
+ dyn_array_t* arr, /*!< in: dynamic array */
+ byte* ptr); /*!< in: buffer space from ptr up was not used */
+/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to
the added element. The caller must copy the element to
-the pointer returned. */
+the pointer returned.
+@return pointer to the element */
UNIV_INLINE
void*
dyn_array_push(
/*===========*/
- /* out: pointer to the element */
- dyn_array_t* arr, /* in: dynamic array */
- ulint size); /* in: size in bytes of the element */
-/****************************************************************
-Returns pointer to an element in dyn array. */
+ dyn_array_t* arr, /*!< in: dynamic array */
+ ulint size); /*!< in: size in bytes of the element */
+/************************************************************//**
+Returns pointer to an element in dyn array.
+@return pointer to element */
UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
- /* out: pointer to element */
- dyn_array_t* arr, /* in: dyn array */
- ulint pos); /* in: position of element as bytes
+ dyn_array_t* arr, /*!< in: dyn array */
+ ulint pos); /*!< in: position of element as bytes
from array start */
-/****************************************************************
-Returns the size of stored data in a dyn array. */
+/************************************************************//**
+Returns the size of stored data in a dyn array.
+@return data size in bytes */
UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
- /* out: data size in bytes */
- dyn_array_t* arr); /* in: dyn array */
-/****************************************************************
+ dyn_array_t* arr); /*!< in: dyn array */
+/************************************************************//**
Gets the first block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_first_block(
/*======================*/
- dyn_array_t* arr); /* in: dyn array */
-/****************************************************************
+ dyn_array_t* arr); /*!< in: dyn array */
+/************************************************************//**
Gets the last block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_last_block(
/*=====================*/
- dyn_array_t* arr); /* in: dyn array */
-/************************************************************************
-Gets the next block in a dyn array. */
+ dyn_array_t* arr); /*!< in: dyn array */
+/********************************************************************//**
+Gets the next block in a dyn array.
+@return pointer to next, NULL if end of list */
UNIV_INLINE
dyn_block_t*
dyn_array_get_next_block(
/*=====================*/
- /* out: pointer to next, NULL if end of list */
- dyn_array_t* arr, /* in: dyn array */
- dyn_block_t* block); /* in: dyn array block */
-/************************************************************************
-Gets the number of used bytes in a dyn array block. */
+ dyn_array_t* arr, /*!< in: dyn array */
+ dyn_block_t* block); /*!< in: dyn array block */
+/********************************************************************//**
+Gets the number of used bytes in a dyn array block.
+@return number of bytes used */
UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
- /* out: number of bytes used */
- dyn_block_t* block); /* in: dyn array block */
-/************************************************************************
-Gets pointer to the start of data in a dyn array block. */
+ dyn_block_t* block); /*!< in: dyn array block */
+/********************************************************************//**
+Gets pointer to the start of data in a dyn array block.
+@return pointer to data */
UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
- /* out: pointer to data */
- dyn_block_t* block); /* in: dyn array block */
-/************************************************************
+ dyn_block_t* block); /*!< in: dyn array block */
+/********************************************************//**
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
- dyn_array_t* arr, /* in: dyn array */
- const byte* str, /* in: string to write */
- ulint len); /* in: string length */
+ dyn_array_t* arr, /*!< in: dyn array */
+ const byte* str, /*!< in: string to write */
+ ulint len); /*!< in: string length */
/*#################################################################*/
-/* NOTE! Do not use the fields of the struct directly: the definition
+/** @brief A block in a dynamically allocated array.
+NOTE! Do not access the fields of the struct directly: the definition
appears here only for the compiler to know its size! */
struct dyn_block_struct{
- mem_heap_t* heap; /* in the first block this is != NULL
+ mem_heap_t* heap; /*!< in the first block this is != NULL
if dynamic allocation has been needed */
- ulint used; /* number of data bytes used in this block */
+ ulint used; /*!< number of data bytes used in this block;
+ DYN_BLOCK_FULL_FLAG is set when the block
+ becomes full */
byte data[DYN_ARRAY_DATA_SIZE];
- /* storage for array elements */
+ /*!< storage for array elements */
UT_LIST_BASE_NODE_T(dyn_block_t) base;
- /* linear list of dyn blocks: this node is
+ /*!< linear list of dyn blocks: this node is
used only in the first block */
UT_LIST_NODE_T(dyn_block_t) list;
- /* linear list node: used in all blocks */
+ /*!< linear list node: used in all blocks */
#ifdef UNIV_DEBUG
- ulint buf_end;/* only in the debug version: if dyn array is
- opened, this is the buffer end offset, else
- this is 0 */
- ulint magic_n;
+ ulint buf_end;/*!< only in the debug version: if dyn
+ array is opened, this is the buffer
+ end offset, else this is 0 */
+ ulint magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
#endif
};
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
index fcb3c17287a..110e674abff 100644
--- a/storage/innobase/include/dyn0dyn.ic
+++ b/storage/innobase/include/dyn0dyn.ic
@@ -1,42 +1,61 @@
-/******************************************************
-The dynamically allocated array
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0dyn.ic
+The dynamically allocated array
Created 2/5/1996 Heikki Tuuri
*******************************************************/
+/** Value of dyn_block_struct::magic_n */
#define DYN_BLOCK_MAGIC_N 375767
+/** Flag for dyn_block_struct::used that indicates a full block */
#define DYN_BLOCK_FULL_FLAG 0x1000000UL
-/****************************************************************
-Adds a new block to a dyn array. */
-
+/************************************************************//**
+Adds a new block to a dyn array.
+@return created block */
+UNIV_INTERN
dyn_block_t*
dyn_array_add_block(
/*================*/
- /* out: created block */
- dyn_array_t* arr); /* in: dyn array */
+ dyn_array_t* arr); /*!< in: dyn array */
-/****************************************************************
+/************************************************************//**
Gets the first block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_first_block(
/*======================*/
- dyn_array_t* arr) /* in: dyn array */
+ dyn_array_t* arr) /*!< in: dyn array */
{
return(arr);
}
-/****************************************************************
+/************************************************************//**
Gets the last block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_last_block(
/*=====================*/
- dyn_array_t* arr) /* in: dyn array */
+ dyn_array_t* arr) /*!< in: dyn array */
{
if (arr->heap == NULL) {
@@ -46,15 +65,15 @@ dyn_array_get_last_block(
return(UT_LIST_GET_LAST(arr->base));
}
-/************************************************************************
-Gets the next block in a dyn array. */
+/********************************************************************//**
+Gets the next block in a dyn array.
+@return pointer to next, NULL if end of list */
UNIV_INLINE
dyn_block_t*
dyn_array_get_next_block(
/*=====================*/
- /* out: pointer to next, NULL if end of list */
- dyn_array_t* arr, /* in: dyn array */
- dyn_block_t* block) /* in: dyn array block */
+ dyn_array_t* arr, /*!< in: dyn array */
+ dyn_block_t* block) /*!< in: dyn array block */
{
ut_ad(arr && block);
@@ -67,42 +86,42 @@ dyn_array_get_next_block(
return(UT_LIST_GET_NEXT(list, block));
}
-/************************************************************************
-Gets the number of used bytes in a dyn array block. */
+/********************************************************************//**
+Gets the number of used bytes in a dyn array block.
+@return number of bytes used */
UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
- /* out: number of bytes used */
- dyn_block_t* block) /* in: dyn array block */
+ dyn_block_t* block) /*!< in: dyn array block */
{
ut_ad(block);
return((block->used) & ~DYN_BLOCK_FULL_FLAG);
}
-/************************************************************************
-Gets pointer to the start of data in a dyn array block. */
+/********************************************************************//**
+Gets pointer to the start of data in a dyn array block.
+@return pointer to data */
UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
- /* out: pointer to data */
- dyn_block_t* block) /* in: dyn array block */
+ dyn_block_t* block) /*!< in: dyn array block */
{
ut_ad(block);
return(block->data);
}
-/*************************************************************************
-Initializes a dynamic array. */
+/*********************************************************************//**
+Initializes a dynamic array.
+@return initialized dyn array */
UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
- /* out: initialized dyn array */
- dyn_array_t* arr) /* in: pointer to a memory buffer of
+ dyn_array_t* arr) /*!< in: pointer to a memory buffer of
size sizeof(dyn_array_t) */
{
ut_ad(arr);
@@ -120,13 +139,13 @@ dyn_array_create(
return(arr);
}
-/****************************************************************
+/************************************************************//**
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
- dyn_array_t* arr) /* in: dyn array */
+ dyn_array_t* arr) /*!< in: dyn array */
{
if (arr->heap != NULL) {
mem_heap_free(arr->heap);
@@ -137,16 +156,16 @@ dyn_array_free(
#endif
}
-/*************************************************************************
+/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to the added element.
-The caller must copy the element to the pointer returned. */
+The caller must copy the element to the pointer returned.
+@return pointer to the element */
UNIV_INLINE
void*
dyn_array_push(
/*===========*/
- /* out: pointer to the element */
- dyn_array_t* arr, /* in: dynamic array */
- ulint size) /* in: size in bytes of the element */
+ dyn_array_t* arr, /*!< in: dynamic array */
+ ulint size) /*!< in: size in bytes of the element */
{
dyn_block_t* block;
ulint used;
@@ -177,17 +196,17 @@ dyn_array_push(
return((block->data) + used);
}
-/*************************************************************************
+/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
-dyn_array_close. */
+dyn_array_close.
+@return pointer to the buffer */
UNIV_INLINE
byte*
dyn_array_open(
/*===========*/
- /* out: pointer to the buffer */
- dyn_array_t* arr, /* in: dynamic array */
- ulint size) /* in: size in bytes of the buffer; MUST be
+ dyn_array_t* arr, /*!< in: dynamic array */
+ ulint size) /*!< in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
{
dyn_block_t* block;
@@ -223,14 +242,14 @@ dyn_array_open(
return((block->data) + used);
}
-/*************************************************************************
+/*********************************************************************//**
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
void
dyn_array_close(
/*============*/
- dyn_array_t* arr, /* in: dynamic array */
- byte* ptr) /* in: buffer space from ptr up was not used */
+ dyn_array_t* arr, /*!< in: dynamic array */
+ byte* ptr) /*!< in: buffer space from ptr up was not used */
{
dyn_block_t* block;
@@ -250,15 +269,15 @@ dyn_array_close(
#endif
}
-/****************************************************************
-Returns pointer to an element in dyn array. */
+/************************************************************//**
+Returns pointer to an element in dyn array.
+@return pointer to element */
UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
- /* out: pointer to element */
- dyn_array_t* arr, /* in: dyn array */
- ulint pos) /* in: position of element as bytes
+ dyn_array_t* arr, /*!< in: dyn array */
+ ulint pos) /*!< in: position of element as bytes
from array start */
{
dyn_block_t* block;
@@ -288,14 +307,14 @@ dyn_array_get_element(
return(block->data + pos);
}
-/****************************************************************
-Returns the size of stored data in a dyn array. */
+/************************************************************//**
+Returns the size of stored data in a dyn array.
+@return data size in bytes */
UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
- /* out: data size in bytes */
- dyn_array_t* arr) /* in: dyn array */
+ dyn_array_t* arr) /*!< in: dyn array */
{
dyn_block_t* block;
ulint sum = 0;
@@ -319,15 +338,15 @@ dyn_array_get_data_size(
return(sum);
}
-/************************************************************
+/********************************************************//**
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
- dyn_array_t* arr, /* in: dyn array */
- const byte* str, /* in: string to write */
- ulint len) /* in: string length */
+ dyn_array_t* arr, /*!< in: dyn array */
+ const byte* str, /*!< in: string to write */
+ ulint len) /*!< in: string length */
{
ulint n_copied;
diff --git a/storage/innobase/include/eval0eval.h b/storage/innobase/include/eval0eval.h
index f950512adfd..60aefd8d453 100644
--- a/storage/innobase/include/eval0eval.h
+++ b/storage/innobase/include/eval0eval.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0eval.h
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
-(c) 1997 Innobase Oy
-
Created 12/29/1997 Heikki Tuuri
*******************************************************/
@@ -15,79 +32,79 @@ Created 12/29/1997 Heikki Tuuri
#include "pars0sym.h"
#include "pars0pars.h"
-/*********************************************************************
+/*****************************************************************//**
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
-
+UNIV_INTERN
void
eval_node_free_val_buf(
/*===================*/
- que_node_t* node); /* in: query graph node */
-/*********************************************************************
+ que_node_t* node); /*!< in: query graph node */
+/*****************************************************************//**
Evaluates a symbol table symbol. */
UNIV_INLINE
void
eval_sym(
/*=====*/
- sym_node_t* sym_node); /* in: symbol table node */
-/*********************************************************************
+ sym_node_t* sym_node); /*!< in: symbol table node */
+/*****************************************************************//**
Evaluates an expression. */
UNIV_INLINE
void
eval_exp(
/*=====*/
- que_node_t* exp_node); /* in: expression */
-/*********************************************************************
+ que_node_t* exp_node); /*!< in: expression */
+/*****************************************************************//**
Sets an integer value as the value of an expression node. */
UNIV_INLINE
void
eval_node_set_int_val(
/*==================*/
- que_node_t* node, /* in: expression node */
- lint val); /* in: value to set */
-/*********************************************************************
-Gets an integer value from an expression node. */
+ que_node_t* node, /*!< in: expression node */
+ lint val); /*!< in: value to set */
+/*****************************************************************//**
+Gets an integer value from an expression node.
+@return integer value */
UNIV_INLINE
lint
eval_node_get_int_val(
/*==================*/
- /* out: integer value */
- que_node_t* node); /* in: expression node */
-/*********************************************************************
+ que_node_t* node); /*!< in: expression node */
+/*****************************************************************//**
Copies a binary string value as the value of a query graph node. Allocates a
new buffer if necessary. */
UNIV_INLINE
void
eval_node_copy_and_alloc_val(
/*=========================*/
- que_node_t* node, /* in: query graph node */
- byte* str, /* in: binary string */
- ulint len); /* in: string length or UNIV_SQL_NULL */
-/*********************************************************************
+ que_node_t* node, /*!< in: query graph node */
+ const byte* str, /*!< in: binary string */
+ ulint len); /*!< in: string length or UNIV_SQL_NULL */
+/*****************************************************************//**
Copies a query node value to another node. */
UNIV_INLINE
void
eval_node_copy_val(
/*===============*/
- que_node_t* node1, /* in: node to copy to */
- que_node_t* node2); /* in: node to copy from */
-/*********************************************************************
-Gets a iboolean value from a query node. */
+ que_node_t* node1, /*!< in: node to copy to */
+ que_node_t* node2); /*!< in: node to copy from */
+/*****************************************************************//**
+Gets a iboolean value from a query node.
+@return iboolean value */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
/*====================*/
- /* out: iboolean value */
- que_node_t* node); /* in: query graph node */
-/*********************************************************************
-Evaluates a comparison node. */
-
+ que_node_t* node); /*!< in: query graph node */
+/*****************************************************************//**
+Evaluates a comparison node.
+@return the result of the comparison */
+UNIV_INTERN
ibool
eval_cmp(
/*=====*/
- /* out: the result of the comparison */
- func_node_t* cmp_node); /* in: comparison node */
+ func_node_t* cmp_node); /*!< in: comparison node */
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/eval0eval.ic b/storage/innobase/include/eval0eval.ic
index caffa2e0bfd..fe767f39b00 100644
--- a/storage/innobase/include/eval0eval.ic
+++ b/storage/innobase/include/eval0eval.ic
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0eval.ic
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
-(c) 1997 Innobase Oy
-
Created 12/29/1997 Heikki Tuuri
*******************************************************/
@@ -11,41 +28,41 @@ Created 12/29/1997 Heikki Tuuri
#include "rem0cmp.h"
#include "pars0grm.h"
-/*********************************************************************
+/*****************************************************************//**
Evaluates a function node. */
-
+UNIV_INTERN
void
eval_func(
/*======*/
- func_node_t* func_node); /* in: function node */
-/*********************************************************************
+ func_node_t* func_node); /*!< in: function node */
+/*****************************************************************//**
Allocate a buffer from global dynamic memory for a value of a que_node.
NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field. */
-
+allocated for a query node val field.
+@return pointer to allocated buffer */
+UNIV_INTERN
byte*
eval_node_alloc_val_buf(
/*====================*/
- /* out: pointer to allocated buffer */
- que_node_t* node, /* in: query graph node; sets the val field
+ que_node_t* node, /*!< in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
- ulint size); /* in: buffer size */
+ ulint size); /*!< in: buffer size */
-/*********************************************************************
-Allocates a new buffer if needed. */
+/*****************************************************************//**
+Allocates a new buffer if needed.
+@return pointer to buffer */
UNIV_INLINE
byte*
eval_node_ensure_val_buf(
/*=====================*/
- /* out: pointer to buffer */
- que_node_t* node, /* in: query graph node; sets the val field
+ que_node_t* node, /*!< in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
- ulint size) /* in: buffer size */
+ ulint size) /*!< in: buffer size */
{
dfield_t* dfield;
byte* data;
@@ -63,13 +80,13 @@ eval_node_ensure_val_buf(
return(data);
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates a symbol table symbol. */
UNIV_INLINE
void
eval_sym(
/*=====*/
- sym_node_t* sym_node) /* in: symbol table node */
+ sym_node_t* sym_node) /*!< in: symbol table node */
{
ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
@@ -83,13 +100,13 @@ eval_sym(
}
}
-/*********************************************************************
+/*****************************************************************//**
Evaluates an expression. */
UNIV_INLINE
void
eval_exp(
/*=====*/
- que_node_t* exp_node) /* in: expression */
+ que_node_t* exp_node) /*!< in: expression */
{
if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
@@ -101,14 +118,14 @@ eval_exp(
eval_func(exp_node);
}
-/*********************************************************************
+/*****************************************************************//**
Sets an integer value as the value of an expression node. */
UNIV_INLINE
void
eval_node_set_int_val(
/*==================*/
- que_node_t* node, /* in: expression node */
- lint val) /* in: value to set */
+ que_node_t* node, /*!< in: expression node */
+ lint val) /*!< in: value to set */
{
dfield_t* dfield;
byte* data;
@@ -126,14 +143,14 @@ eval_node_set_int_val(
mach_write_to_4(data, (ulint)val);
}
-/*********************************************************************
-Gets an integer non-SQL null value from an expression node. */
+/*****************************************************************//**
+Gets an integer non-SQL null value from an expression node.
+@return integer value */
UNIV_INLINE
lint
eval_node_get_int_val(
/*==================*/
- /* out: integer value */
- que_node_t* node) /* in: expression node */
+ que_node_t* node) /*!< in: expression node */
{
dfield_t* dfield;
@@ -144,14 +161,14 @@ eval_node_get_int_val(
return((int)mach_read_from_4(dfield_get_data(dfield)));
}
-/*********************************************************************
-Gets a iboolean value from a query node. */
+/*****************************************************************//**
+Gets a iboolean value from a query node.
+@return iboolean value */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
/*====================*/
- /* out: iboolean value */
- que_node_t* node) /* in: query graph node */
+ que_node_t* node) /*!< in: query graph node */
{
dfield_t* dfield;
byte* data;
@@ -165,14 +182,14 @@ eval_node_get_ibool_val(
return(mach_read_from_1(data));
}
-/*********************************************************************
+/*****************************************************************//**
Sets a iboolean value as the value of a function node. */
UNIV_INLINE
void
eval_node_set_ibool_val(
/*====================*/
- func_node_t* func_node, /* in: function node */
- ibool val) /* in: value to set */
+ func_node_t* func_node, /*!< in: function node */
+ ibool val) /*!< in: value to set */
{
dfield_t* dfield;
byte* data;
@@ -192,16 +209,16 @@ eval_node_set_ibool_val(
mach_write_to_1(data, val);
}
-/*********************************************************************
+/*****************************************************************//**
Copies a binary string value as the value of a query graph node. Allocates a
new buffer if necessary. */
UNIV_INLINE
void
eval_node_copy_and_alloc_val(
/*=========================*/
- que_node_t* node, /* in: query graph node */
- byte* str, /* in: binary string */
- ulint len) /* in: string length or UNIV_SQL_NULL */
+ que_node_t* node, /*!< in: query graph node */
+ const byte* str, /*!< in: binary string */
+ ulint len) /*!< in: string length or UNIV_SQL_NULL */
{
byte* data;
@@ -216,14 +233,14 @@ eval_node_copy_and_alloc_val(
ut_memcpy(data, str, len);
}
-/*********************************************************************
+/*****************************************************************//**
Copies a query node value to another node. */
UNIV_INLINE
void
eval_node_copy_val(
/*===============*/
- que_node_t* node1, /* in: node to copy to */
- que_node_t* node2) /* in: node to copy from */
+ que_node_t* node1, /*!< in: node to copy to */
+ que_node_t* node2) /*!< in: node to copy from */
{
dfield_t* dfield2;
diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h
index 8416551d0ba..13e2e365320 100644
--- a/storage/innobase/include/eval0proc.h
+++ b/storage/innobase/include/eval0proc.h
@@ -1,7 +1,24 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
+/*****************************************************************************
+
+Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1998 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0proc.h
+Executes SQL stored procedures and their control structures
Created 1/20/1998 Heikki Tuuri
*******************************************************/
@@ -14,70 +31,70 @@ Created 1/20/1998 Heikki Tuuri
#include "pars0sym.h"
#include "pars0pars.h"
-/**************************************************************************
-Performs an execution step of a procedure node. */
+/**********************************************************************//**
+Performs an execution step of a procedure node.
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_step(
/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of an if-statement node. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an if-statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
if_step(
/*====*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of a while-statement node. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a while-statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
while_step(
/*=======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of a for-loop node. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a for-loop node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
for_step(
/*=====*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of an assignment statement node. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an assignment statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
assign_step(
/*========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of a procedure call node. */
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a procedure call node.
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_eval_step(
/*===========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of an exit statement node. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an exit statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
exit_step(
/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of a return-statement node. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a return-statement node.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
return_step(
/*========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
+ que_thr_t* thr); /*!< in: query thread */
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic
index cf738056576..c602af0a694 100644
--- a/storage/innobase/include/eval0proc.ic
+++ b/storage/innobase/include/eval0proc.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
+/*****************************************************************************
+
+Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1998 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0proc.ic
+Executes SQL stored procedures and their control structures
Created 1/20/1998 Heikki Tuuri
*******************************************************/
@@ -10,14 +27,14 @@ Created 1/20/1998 Heikki Tuuri
#include "que0que.h"
#include "eval0eval.h"
-/**************************************************************************
-Performs an execution step of a procedure node. */
+/**********************************************************************//**
+Performs an execution step of a procedure node.
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_step(
/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
proc_node_t* node;
@@ -45,14 +62,14 @@ proc_step(
return(thr);
}
-/**************************************************************************
-Performs an execution step of a procedure call node. */
+/**********************************************************************//**
+Performs an execution step of a procedure call node.
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_eval_step(
/*===========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
func_node_t* node;
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 6b8fd4b03d5..74d0fbcdacd 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,7 +1,24 @@
-/******************************************************
-The low-level file system
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1995 Innobase Oy
+/**************************************************//**
+@file include/fil0fil.h
+The low-level file system
Created 10/25/1995 Heikki Tuuri
*******************************************************/
@@ -10,59 +27,62 @@ Created 10/25/1995 Heikki Tuuri
#define fil0fil_h
#include "univ.i"
+#ifndef UNIV_HOTBACKUP
#include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
#include "dict0types.h"
-#include "ibuf0types.h"
#include "ut0byte.h"
#include "os0file.h"
-/* When mysqld is run, the default directory "." is the mysqld datadir, but in
-ibbackup we must set it explicitly; the patgh must NOT contain the trailing
-'/' or '\' */
+/** When mysqld is run, the default directory "." is the mysqld datadir,
+but in the MySQL Embedded Server Library and ibbackup it is not the default
+directory, and we must set the base file path explicitly */
extern const char* fil_path_to_mysql_datadir;
-/* Initial size of a single-table tablespace in pages */
+/** Initial size of a single-table tablespace in pages */
#define FIL_IBD_FILE_INITIAL_SIZE 4
-/* 'null' (undefined) page offset in the context of file spaces */
+/** 'null' (undefined) page offset in the context of file spaces */
#define FIL_NULL ULINT32_UNDEFINED
/* Space address data type; this is intended to be used when
addresses accurate to a byte are stored in file pages. If the page part
of the address is FIL_NULL, the address is considered undefined. */
-typedef byte fil_faddr_t; /* 'type' definition in C: an address
+typedef byte fil_faddr_t; /*!< 'type' definition in C: an address
stored in a file page is a string of bytes */
#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
-/* A struct for storing a space address FIL_ADDR, when it is used
+/** A struct for storing a space address FIL_ADDR, when it is used
in C program data structures. */
typedef struct fil_addr_struct fil_addr_t;
+/** File space address */
struct fil_addr_struct{
- ulint page; /* page number within a space */
- ulint boffset; /* byte offset within the page */
+ ulint page; /*!< page number within a space */
+ ulint boffset; /*!< byte offset within the page */
};
-/* Null file address */
+/** The null file address */
extern fil_addr_t fil_addr_null;
-/* The byte offsets on a file page for various variables */
-#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the
+/** The byte offsets on a file page for various variables @{ */
+#define FIL_PAGE_SPACE_OR_CHKSUM 0 /*!< in < MySQL-4.0.14 space id the
page belongs to (== 0) but in later
versions the 'new' checksum of the
page */
-#define FIL_PAGE_OFFSET 4 /* page offset inside space */
-#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
- of the page, its offset.
- Otherwise FIL_NULL.
- This field is not set on BLOB pages,
- which are stored as a singly-linked
- list. See also FIL_PAGE_NEXT. */
-#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor
+#define FIL_PAGE_OFFSET 4 /*!< page offset inside space */
+#define FIL_PAGE_PREV 8 /*!< if there is a 'natural'
+ predecessor of the page, its
+ offset. Otherwise FIL_NULL.
+ This field is not set on BLOB
+ pages, which are stored as a
+ singly-linked list. See also
+ FIL_PAGE_NEXT. */
+#define FIL_PAGE_NEXT 12 /*!< if there is a 'natural' successor
of the page, its offset.
Otherwise FIL_NULL.
B-tree index pages
@@ -72,9 +92,9 @@ extern fil_addr_t fil_addr_null;
FIL_PAGE_PREV and FIL_PAGE_NEXT
in the collation order of the
smallest user record on each page. */
-#define FIL_PAGE_LSN 16 /* lsn of the end of the newest
+#define FIL_PAGE_LSN 16 /*!< lsn of the end of the newest
modification log record to the page */
-#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,...,
+#define FIL_PAGE_TYPE 24 /*!< file page type: FIL_PAGE_INDEX,...,
2 bytes.
The contents of this field can only
@@ -89,224 +109,243 @@ extern fil_addr_t fil_addr_null;
MySQL/InnoDB 5.1.7 or later, the
contents of this field is valid
for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the
+#define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the
first page in a data file: the file
has been flushed to disk at least up
to this lsn */
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /* starting from 4.1.x this
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
contains the space id of the page */
-#define FIL_PAGE_DATA 38 /* start of the data on the page */
-
-/* File page trailer */
-#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used
+#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
+/* @} */
+/** File page trailer @{ */
+#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
to store the page checksum, the
last 4 bytes should be identical
to the last 4 bytes of FIL_PAGE_LSN */
-#define FIL_PAGE_DATA_END 8
-
-/* File page types (values of FIL_PAGE_TYPE) */
-#define FIL_PAGE_INDEX 17855 /* B-tree node */
-#define FIL_PAGE_UNDO_LOG 2 /* Undo log page */
-#define FIL_PAGE_INODE 3 /* Index node */
-#define FIL_PAGE_IBUF_FREE_LIST 4 /* Insert buffer free list */
+#define FIL_PAGE_DATA_END 8 /*!< size of the page trailer */
+/* @} */
+
+/** File page types (values of FIL_PAGE_TYPE) @{ */
+#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
+#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
+#define FIL_PAGE_INODE 3 /*!< Index node */
+#define FIL_PAGE_IBUF_FREE_LIST 4 /*!< Insert buffer free list */
/* File page types introduced in MySQL/InnoDB 5.1.7 */
-#define FIL_PAGE_TYPE_ALLOCATED 0 /* Freshly allocated page */
-#define FIL_PAGE_IBUF_BITMAP 5 /* Insert buffer bitmap */
-#define FIL_PAGE_TYPE_SYS 6 /* System page */
-#define FIL_PAGE_TYPE_TRX_SYS 7 /* Transaction system data */
-#define FIL_PAGE_TYPE_FSP_HDR 8 /* File space header */
-#define FIL_PAGE_TYPE_XDES 9 /* Extent descriptor page */
-#define FIL_PAGE_TYPE_BLOB 10 /* Uncompressed BLOB page */
-
-/* Space types */
-#define FIL_TABLESPACE 501
-#define FIL_LOG 502
-
+#define FIL_PAGE_TYPE_ALLOCATED 0 /*!< Freshly allocated page */
+#define FIL_PAGE_IBUF_BITMAP 5 /*!< Insert buffer bitmap */
+#define FIL_PAGE_TYPE_SYS 6 /*!< System page */
+#define FIL_PAGE_TYPE_TRX_SYS 7 /*!< Transaction system data */
+#define FIL_PAGE_TYPE_FSP_HDR 8 /*!< File space header */
+#define FIL_PAGE_TYPE_XDES 9 /*!< Extent descriptor page */
+#define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */
+#define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */
+#define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */
+/* @} */
+
+/** Space types @{ */
+#define FIL_TABLESPACE 501 /*!< tablespace */
+#define FIL_LOG 502 /*!< redo log */
+/* @} */
+
+/** The number of fsyncs done to the log */
extern ulint fil_n_log_flushes;
+/** Number of pending redo log flushes */
extern ulint fil_n_pending_log_flushes;
+/** Number of pending tablespace flushes */
extern ulint fil_n_pending_tablespace_flushes;
-/***********************************************************************
-Returns the version number of a tablespace, -1 if not found. */
-
-ib_longlong
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Returns the version number of a tablespace, -1 if not found.
+@return version number, -1 if the tablespace does not exist in the
+memory cache */
+UNIV_INTERN
+ib_int64_t
fil_space_get_version(
/*==================*/
- /* out: version number, -1 if the tablespace does not
- exist in the memory cache */
- ulint id); /* in: space id */
-/***********************************************************************
-Returns the latch of a file space. */
-
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Returns the latch of a file space.
+@return latch protecting storage allocation */
+UNIV_INTERN
rw_lock_t*
fil_space_get_latch(
/*================*/
- /* out: latch protecting storage allocation */
- ulint id); /* in: space id */
-/***********************************************************************
-Returns the type of a file space. */
-
+ ulint id, /*!< in: space id */
+ ulint* zip_size);/*!< out: compressed page size, or
+ 0 for uncompressed tablespaces */
+/*******************************************************************//**
+Returns the type of a file space.
+@return FIL_TABLESPACE or FIL_LOG */
+UNIV_INTERN
ulint
fil_space_get_type(
/*===============*/
- /* out: FIL_TABLESPACE or FIL_LOG */
- ulint id); /* in: space id */
-/***********************************************************************
-Returns the ibuf data of a file space. */
-
-ibuf_data_t*
-fil_space_get_ibuf_data(
-/*====================*/
- /* out: ibuf data for this space */
- ulint id); /* in: space id */
-/***********************************************************************
+ ulint id); /*!< in: space id */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
Appends a new file to the chain of files of a space. File must be closed. */
-
+UNIV_INTERN
void
fil_node_create(
/*============*/
- const char* name, /* in: file name (file must be closed) */
- ulint size, /* in: file size in database blocks, rounded
+ const char* name, /*!< in: file name (file must be closed) */
+ ulint size, /*!< in: file size in database blocks, rounded
downwards to an integer */
- ulint id, /* in: space id where to append */
- ibool is_raw);/* in: TRUE if a raw device or
+ ulint id, /*!< in: space id where to append */
+ ibool is_raw);/*!< in: TRUE if a raw device or
a raw disk partition */
-/********************************************************************
+#ifdef UNIV_LOG_ARCHIVE
+/****************************************************************//**
Drops files from the start of a file space, so that its size is cut by
the amount given. */
-
+UNIV_INTERN
void
fil_space_truncate_start(
/*=====================*/
- ulint id, /* in: space id */
- ulint trunc_len); /* in: truncate by this much; it is an error
+ ulint id, /*!< in: space id */
+ ulint trunc_len); /*!< in: truncate by this much; it is an error
if this does not equal to the combined size of
some initial files in the space */
-/***********************************************************************
+#endif /* UNIV_LOG_ARCHIVE */
+/*******************************************************************//**
Creates a space memory object and puts it to the 'fil system' hash table. If
-there is an error, prints an error message to the .err log. */
-
+there is an error, prints an error message to the .err log.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_space_create(
/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: space name */
- ulint id, /* in: space id */
- ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
-/***********************************************************************
-Frees a space object from a the tablespace memory cache. Closes the files in
-the chain but does not delete them. */
-
-ibool
-fil_space_free(
-/*===========*/
- /* out: TRUE if success */
- ulint id); /* in: space id */
-/***********************************************************************
+ const char* name, /*!< in: space name */
+ ulint id, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size, or
+ 0 for uncompressed tablespaces */
+ ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
-memory cache. */
-
+memory cache.
+@return space size, 0 if space not found */
+UNIV_INTERN
ulint
fil_space_get_size(
/*===============*/
- /* out: space size, 0 if space not found */
- ulint id); /* in: space id */
-/***********************************************************************
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Returns the flags of the space. The tablespace must be cached
+in the memory cache.
+@return flags, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_flags(
+/*================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
+Returns the compressed page size of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return compressed page size, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_zip_size(
+/*===================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache. */
-
+file space. The tablespace must be cached in the memory cache.
+@return TRUE if the address is meaningful */
+UNIV_INTERN
ibool
fil_check_adress_in_tablespace(
/*===========================*/
- /* out: TRUE if the address is meaningful */
- ulint id, /* in: space id */
- ulint page_no);/* in: page number */
-/********************************************************************
+ ulint id, /*!< in: space id */
+ ulint page_no);/*!< in: page number */
+/****************************************************************//**
Initializes the tablespace memory cache. */
-
+UNIV_INTERN
void
fil_init(
/*=====*/
- ulint max_n_open); /* in: max number of open files */
-/***********************************************************************
+ ulint hash_size, /*!< in: hash table size */
+ ulint max_n_open); /*!< in: max number of open files */
+/*******************************************************************//**
+Initializes the tablespace memory cache. */
+UNIV_INTERN
+void
+fil_close(void);
+/*===========*/
+/*******************************************************************//**
Opens all log files and system tablespace data files. They stay open until the
database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
-
+UNIV_INTERN
void
fil_open_log_and_system_tablespace_files(void);
/*==========================================*/
-/***********************************************************************
+/*******************************************************************//**
Closes all open files. There must not be any pending i/o's or not flushed
modifications in the files. */
-
+UNIV_INTERN
void
fil_close_all_files(void);
/*=====================*/
-/***********************************************************************
+/*******************************************************************//**
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
-
+UNIV_INTERN
void
fil_set_max_space_id_if_bigger(
/*===========================*/
- ulint max_id);/* in: maximum known id */
-/********************************************************************
-Initializes the ibuf data structure for space 0 == the system tablespace.
-This can be called after the file space headers have been created and the
-dictionary system has been initialized. */
-
-void
-fil_ibuf_init_at_db_start(void);
-/*===========================*/
-/********************************************************************
+ ulint max_id);/*!< in: maximum known id */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace. */
-
+header of the first page of each data file in the system tablespace.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
fil_write_flushed_lsn_to_data_files(
/*================================*/
- /* out: DB_SUCCESS or error number */
- dulint lsn, /* in: lsn to write */
- ulint arch_log_no); /* in: latest archived log file number */
-/***********************************************************************
+ ib_uint64_t lsn, /*!< in: lsn to write */
+ ulint arch_log_no); /*!< in: latest archived log
+ file number */
+/*******************************************************************//**
Reads the flushed lsn and arch no fields from a data file at database
startup. */
-
+UNIV_INTERN
void
fil_read_flushed_lsn_and_arch_log_no(
/*=================================*/
- os_file_t data_file, /* in: open data file */
- ibool one_read_already, /* in: TRUE if min and max parameters
- below already contain sensible data */
+ os_file_t data_file, /*!< in: open data file */
+ ibool one_read_already, /*!< in: TRUE if min and max
+ parameters below already
+ contain sensible data */
#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no, /* in/out: */
- ulint* max_arch_log_no, /* in/out: */
+ ulint* min_arch_log_no, /*!< in/out: */
+ ulint* max_arch_log_no, /*!< in/out: */
#endif /* UNIV_LOG_ARCHIVE */
- dulint* min_flushed_lsn, /* in/out: */
- dulint* max_flushed_lsn); /* in/out: */
-/***********************************************************************
+ ib_uint64_t* min_flushed_lsn, /*!< in/out: */
+ ib_uint64_t* max_flushed_lsn); /*!< in/out: */
+/*******************************************************************//**
Increments the count of pending insert buffer page merges, if space is not
-being deleted. */
-
+being deleted.
+@return TRUE if being deleted, and ibuf merges should be skipped */
+UNIV_INTERN
ibool
fil_inc_pending_ibuf_merges(
/*========================*/
- /* out: TRUE if being deleted, and ibuf merges should
- be skipped */
- ulint id); /* in: space id */
-/***********************************************************************
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
Decrements the count of pending insert buffer page merges. */
-
+UNIV_INTERN
void
fil_decr_pending_ibuf_merges(
/*=========================*/
- ulint id); /* in: space id */
-/***********************************************************************
+ ulint id); /*!< in: space id */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
Parses the body of a log record written about an .ibd file operation. That is,
the log record part after the standard (type, space id, page no) header of the
log record.
@@ -317,88 +356,91 @@ at that path does not exist yet. If the database directory for the file to be
created does not exist, then we create the directory, too.
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
-datadir that we should use in replaying the file operations. */
-
+datadir that we should use in replaying the file operations.
+@return end of log record, or NULL if the record was not completely
+contained between ptr and end_ptr */
+UNIV_INTERN
byte*
fil_op_log_parse_or_replay(
/*=======================*/
- /* out: end of log record, or NULL if the
- record was not completely contained between
- ptr and end_ptr */
- byte* ptr, /* in: buffer containing the log record body,
+ byte* ptr, /*!< in: buffer containing the log record body,
or an initial segment of it, if the record does
not fir completely between ptr and end_ptr */
- byte* end_ptr, /* in: buffer end */
- ulint type, /* in: the type of this log record */
- ibool do_replay, /* in: TRUE if we want to replay the
- operation, and not just parse the log record */
- ulint space_id); /* in: if do_replay is TRUE, the space id of
- the tablespace in question; otherwise
- ignored */
-/***********************************************************************
+ byte* end_ptr, /*!< in: buffer end */
+ ulint type, /*!< in: the type of this log record */
+ ulint space_id, /*!< in: the space id of the tablespace in
+ question, or 0 if the log record should
+ only be parsed but not replayed */
+ ulint log_flags); /*!< in: redo log flags
+ (stored in the page number parameter) */
+/*******************************************************************//**
Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache. */
-
+memory cache.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_delete_tablespace(
/*==================*/
- /* out: TRUE if success */
- ulint id); /* in: space id */
-/***********************************************************************
+ ulint id); /*!< in: space id */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
1) we do not drop the table from the data dictionary;
2) we remove all insert buffer entries for the tablespace immediately; in DROP
TABLE they are only removed gradually in the background;
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had. */
-
+as it originally had.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_discard_tablespace(
/*===================*/
- /* out: TRUE if success */
- ulint id); /* in: space id */
-/***********************************************************************
+ ulint id); /*!< in: space id */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache. */
-
+tablespace memory cache.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_rename_tablespace(
/*==================*/
- /* out: TRUE if success */
- const char* old_name, /* in: old table name in the standard
+ const char* old_name, /*!< in: old table name in the standard
databasename/tablename format of
InnoDB, or NULL if we do the rename
based on the space id only */
- ulint id, /* in: space id */
- const char* new_name); /* in: new table name in the standard
+ ulint id, /*!< in: space id */
+ const char* new_name); /*!< in: new table name in the standard
databasename/tablename format
of InnoDB */
-/***********************************************************************
+/*******************************************************************//**
Creates a new single-table tablespace to a database directory of MySQL.
Database directories are under the 'datadir' of MySQL. The datadir is the
directory of a running mysqld program. We can refer to it by simply the
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server. */
-
+dir of the mysqld server.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
fil_create_new_single_table_tablespace(
/*===================================*/
- /* out: DB_SUCCESS or error code */
- ulint* space_id, /* in/out: space id; if this is != 0,
+ ulint* space_id, /*!< in/out: space id; if this is != 0,
then this is an input parameter,
otherwise output */
- const char* tablename, /* in: the table name in the usual
+ const char* tablename, /*!< in: the table name in the usual
databasename/tablename format
of InnoDB, or a dir path to a temp
table */
- ibool is_temp, /* in: TRUE if a table created with
+ ibool is_temp, /*!< in: TRUE if a table created with
CREATE TEMPORARY TABLE */
- ulint size); /* in: the initial size of the
+ ulint flags, /*!< in: tablespace flags */
+ ulint size); /*!< in: the initial size of the
tablespace file in pages,
must be >= FIL_IBD_FILE_INITIAL_SIZE */
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
Tries to open a single-table tablespace and optionally checks the space id is
right in it. If does not succeed, prints an error message to the .err log. This
function is used to open a tablespace when we start up mysqld, and also in
@@ -406,23 +448,24 @@ IMPORT TABLESPACE.
NOTE that we assume this operation is used either at the database startup
or under the protection of the dictionary mutex, so that two users cannot
race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it. */
-
+tablespace open, but closes it after we have looked at the space id in it.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_open_single_table_tablespace(
/*=============================*/
- /* out: TRUE if success */
- ibool check_space_id, /* in: should we check that the space
+ ibool check_space_id, /*!< in: should we check that the space
id in the file is right; we assume
that this function runs much faster
if no check is made, since accessing
the file inode probably is much
faster (the OS caches them) than
accessing the first page of the file */
- ulint id, /* in: space id */
- const char* name); /* in: table name in the
+ ulint id, /*!< in: space id */
+ ulint flags, /*!< in: tablespace flags */
+ const char* name); /*!< in: table name in the
databasename/tablename format */
-/************************************************************************
+/********************************************************************//**
It is possible, though very improbable, that the lsn's in the tablespace to be
imported have risen above the current system lsn, if a lengthy purge, ibuf
merge, or rollback was performed on a backup taken with ibbackup. If that is
@@ -430,150 +473,149 @@ the case, reset page lsn's in the file. We assume that mysqld was shut down
after it performed these cleanup operations on the .ibd file, so that it at
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn. */
-
+lsn's just by looking at that flush lsn.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_reset_too_high_lsns(
/*====================*/
- /* out: TRUE if success */
- const char* name, /* in: table name in the
+ const char* name, /*!< in: table name in the
databasename/tablename format */
- dulint current_lsn); /* in: reset lsn's if the lsn stamped
+ ib_uint64_t current_lsn); /*!< in: reset lsn's if the lsn stamped
to FIL_PAGE_FILE_FLUSH_LSN in the
first page is too high */
-/************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
At the server startup, if we need crash recovery, scans the database
directories under the MySQL datadir, looking for .ibd files. Those files are
single-table tablespaces. We need to know the space id in each of them so that
we know into which file we should look to check the contents of a page stored
in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0. */
-
+space id is != 0.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
fil_load_single_table_tablespaces(void);
/*===================================*/
- /* out: DB_SUCCESS or error number */
-/************************************************************************
+/********************************************************************//**
If we need crash recovery, and we have called
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
we can call this function to print an error message of orphaned .ibd files
for which there is not a data dictionary entry with a matching table name
and space id. */
-
+UNIV_INTERN
void
fil_print_orphaned_tablespaces(void);
/*================================*/
-/***********************************************************************
+/*******************************************************************//**
Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there. */
-
+or is being deleted there.
+@return TRUE if does not exist or is being\ deleted */
+UNIV_INTERN
ibool
fil_tablespace_deleted_or_being_deleted_in_mem(
/*===========================================*/
- /* out: TRUE if does not exist or is being\
- deleted */
- ulint id, /* in: space id */
- ib_longlong version);/* in: tablespace_version should be this; if
+ ulint id, /*!< in: space id */
+ ib_int64_t version);/*!< in: tablespace_version should be this; if
you pass -1 as the value of this, then this
parameter is ignored */
-/***********************************************************************
-Returns TRUE if a single-table tablespace exists in the memory cache. */
-
+/*******************************************************************//**
+Returns TRUE if a single-table tablespace exists in the memory cache.
+@return TRUE if exists */
+UNIV_INTERN
ibool
fil_tablespace_exists_in_mem(
/*=========================*/
- /* out: TRUE if exists */
- ulint id); /* in: space id */
-/***********************************************************************
+ ulint id); /*!< in: space id */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache. */
-
+there may be many tablespaces which are not yet in the memory cache.
+@return TRUE if a matching tablespace exists in the memory cache */
+UNIV_INTERN
ibool
fil_space_for_table_exists_in_mem(
/*==============================*/
- /* out: TRUE if a matching tablespace
- exists in the memory cache */
- ulint id, /* in: space id */
- const char* name, /* in: table name in the standard
+ ulint id, /*!< in: space id */
+ const char* name, /*!< in: table name in the standard
'databasename/tablename' format or
the dir path to a temp table */
- ibool is_temp, /* in: TRUE if created with CREATE
+ ibool is_temp, /*!< in: TRUE if created with CREATE
TEMPORARY TABLE */
- ibool mark_space, /* in: in crash recovery, at database
+ ibool mark_space, /*!< in: in crash recovery, at database
startup we mark all spaces which have
an associated table in the InnoDB
data dictionary, so that
we can print a warning about orphaned
tablespaces */
ibool print_error_if_does_not_exist);
- /* in: print detailed error
+ /*!< in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
-/**************************************************************************
+#else /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Extends all tablespaces to the size stored in the space header. During the
+ibbackup --apply-log phase we extended the spaces on-demand so that log records
+could be appllied, but that may have left spaces still too small compared to
+the size stored in the space header. */
+UNIV_INTERN
+void
+fil_extend_tablespaces_to_stored_len(void);
+/*======================================*/
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
Tries to extend a data file so that it would accommodate the number of pages
given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing. */
-
+enough already, does nothing.
+@return TRUE if success */
+UNIV_INTERN
ibool
fil_extend_space_to_desired_size(
/*=============================*/
- /* out: TRUE if success */
- ulint* actual_size, /* out: size of the space after extension;
+ ulint* actual_size, /*!< out: size of the space after extension;
if we ran out of disk space this may be lower
than the desired size */
- ulint space_id, /* in: space id */
- ulint size_after_extend);/* in: desired size in pages after the
+ ulint space_id, /*!< in: space id */
+ ulint size_after_extend);/*!< in: desired size in pages after the
extension; if the current space size is bigger
than this already, the function does nothing */
-#ifdef UNIV_HOTBACKUP
-/************************************************************************
-Extends all tablespaces to the size stored in the space header. During the
-ibbackup --apply-log phase we extended the spaces on-demand so that log records
-could be appllied, but that may have left spaces still too small compared to
-the size stored in the space header. */
-
-void
-fil_extend_tablespaces_to_stored_len(void);
-/*======================================*/
-#endif
-/***********************************************************************
-Tries to reserve free extents in a file space. */
-
+/*******************************************************************//**
+Tries to reserve free extents in a file space.
+@return TRUE if succeed */
+UNIV_INTERN
ibool
fil_space_reserve_free_extents(
/*===========================*/
- /* out: TRUE if succeed */
- ulint id, /* in: space id */
- ulint n_free_now, /* in: number of free extents now */
- ulint n_to_reserve); /* in: how many one wants to reserve */
-/***********************************************************************
+ ulint id, /*!< in: space id */
+ ulint n_free_now, /*!< in: number of free extents now */
+ ulint n_to_reserve); /*!< in: how many one wants to reserve */
+/*******************************************************************//**
Releases free extents in a file space. */
-
+UNIV_INTERN
void
fil_space_release_free_extents(
/*===========================*/
- ulint id, /* in: space id */
- ulint n_reserved); /* in: how many one reserved */
-/***********************************************************************
+ ulint id, /*!< in: space id */
+ ulint n_reserved); /*!< in: how many one reserved */
+/*******************************************************************//**
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
-
+UNIV_INTERN
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
- ulint id); /* in: space id */
-/************************************************************************
-Reads or writes data. This operation is asynchronous (aio). */
-
+ ulint id); /*!< in: space id */
+/********************************************************************//**
+Reads or writes data. This operation is asynchronous (aio).
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INTERN
ulint
fil_io(
/*===*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
ORed to OS_FILE_LOG, if a log i/o
and ORed to OS_AIO_SIMULATED_WAKE_LATER
if simulated aio and we want to post a
@@ -582,133 +624,98 @@ fil_io(
because i/os are not actually handled until
all have been posted: use with great
caution! */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in
+ ibool sync, /*!< in: TRUE if synchronous aio is desired */
+ ulint space_id, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint block_offset, /*!< in: offset in number of blocks */
+ ulint byte_offset, /*!< in: remainder of offset in bytes; in
aio this must be divisible by the OS block
size */
- ulint len, /* in: how many bytes to read or write; this
+ ulint len, /*!< in: how many bytes to read or write; this
must not cross a file boundary; in aio this
must be a block size multiple */
- void* buf, /* in/out: buffer where to store read data
+ void* buf, /*!< in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
- void* message); /* in: message for aio handler if non-sync
- aio used, else ignored */
-/************************************************************************
-Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_read(
-/*=====*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to read; this must not
- cross a file boundary; in aio this must be a
- block size multiple */
- void* buf, /* in/out: buffer where to store data read;
- in aio this must be appropriately aligned */
- void* message); /* in: message for aio handler if non-sync
- aio used, else ignored */
-/************************************************************************
-Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_write(
-/*======*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to write; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /* in: buffer from which to write; in aio
- this must be appropriately aligned */
- void* message); /* in: message for aio handler if non-sync
+ void* message); /*!< in: message for aio handler if non-sync
aio used, else ignored */
-/**************************************************************************
+/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
into segments (see os0file.c for more info). The thread specifies which
segment it wants to wait for. */
-
+UNIV_INTERN
void
fil_aio_wait(
/*=========*/
- ulint segment); /* in: the number of the segment in the aio
+ ulint segment); /*!< in: the number of the segment in the aio
array to wait for */
-/**************************************************************************
+/**********************************************************************//**
Flushes to disk possible writes cached by the OS. If the space does not exist
or is being dropped, does not do anything. */
-
+UNIV_INTERN
void
fil_flush(
/*======*/
- ulint space_id); /* in: file space id (this can be a group of
+ ulint space_id); /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
-/**************************************************************************
+/**********************************************************************//**
Flushes to disk writes in file spaces of the given type possibly cached by
the OS. */
-
+UNIV_INTERN
void
fil_flush_file_spaces(
/*==================*/
- ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
-/**********************************************************************
-Checks the consistency of the tablespace cache. */
-
+ ulint purpose); /*!< in: FIL_TABLESPACE, FIL_LOG */
+/******************************************************************//**
+Checks the consistency of the tablespace cache.
+@return TRUE if ok */
+UNIV_INTERN
ibool
fil_validate(void);
/*==============*/
- /* out: TRUE if ok */
-/************************************************************************
-Returns TRUE if file address is undefined. */
-
+/********************************************************************//**
+Returns TRUE if file address is undefined.
+@return TRUE if undefined */
+UNIV_INTERN
ibool
fil_addr_is_null(
/*=============*/
- /* out: TRUE if undefined */
- fil_addr_t addr); /* in: address */
-/************************************************************************
-Accessor functions for a file page */
-
+ fil_addr_t addr); /*!< in: address */
+/********************************************************************//**
+Get the predecessor of a file page.
+@return FIL_PAGE_PREV */
+UNIV_INTERN
ulint
-fil_page_get_prev(byte* page);
+fil_page_get_prev(
+/*==============*/
+ const byte* page); /*!< in: file page */
+/********************************************************************//**
+Get the successor of a file page.
+@return FIL_PAGE_NEXT */
+UNIV_INTERN
ulint
-fil_page_get_next(byte* page);
-/*************************************************************************
+fil_page_get_next(
+/*==============*/
+ const byte* page); /*!< in: file page */
+/*********************************************************************//**
Sets the file page type. */
-
+UNIV_INTERN
void
fil_page_set_type(
/*==============*/
- byte* page, /* in: file page */
- ulint type); /* in: type */
-/*************************************************************************
-Gets the file page type. */
-
+ byte* page, /*!< in/out: file page */
+ ulint type); /*!< in: type */
+/*********************************************************************//**
+Gets the file page type.
+@return type; NOTE that if the type has not been written to page, the
+return value not defined */
+UNIV_INTERN
ulint
fil_page_get_type(
/*==============*/
- /* out: type; NOTE that if the type has not been
- written to page, the return value not defined */
- byte* page); /* in: file page */
+ const byte* page); /*!< in: file page */
typedef struct fil_space_struct fil_space_t;
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 17bfbeec2c1..7abd3914eda 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -1,7 +1,24 @@
-/******************************************************
-File space management
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0fsp.h
+File space management
Created 12/18/1995 Heikki Tuuri
*******************************************************/
@@ -17,170 +34,184 @@ Created 12/18/1995 Heikki Tuuri
#include "page0types.h"
#include "fsp0types.h"
-/**************************************************************************
+/**********************************************************************//**
Initializes the file space system. */
-
+UNIV_INTERN
void
fsp_init(void);
/*==========*/
-/**************************************************************************
-Gets the current free limit of a tablespace. The free limit means the
-place of the first page which has never been put to the the free list
-for allocation. The space above that address is initialized to zero.
-Sets also the global variable log_fsp_current_free_limit. */
-
+/**********************************************************************//**
+Gets the current free limit of the system tablespace. The free limit
+means the place of the first page which has never been put to the
+free list for allocation. The space above that address is initialized
+to zero. Sets also the global variable log_fsp_current_free_limit.
+@return free limit in megabytes */
+UNIV_INTERN
ulint
-fsp_header_get_free_limit(
-/*======================*/
- /* out: free limit in megabytes */
- ulint space); /* in: space id, must be 0 */
-/**************************************************************************
-Gets the size of the tablespace from the tablespace header. If we do not
-have an auto-extending data file, this should be equal to the size of the
-data files. If there is an auto-extending data file, this can be smaller. */
-
-ulint
-fsp_header_get_tablespace_size(
+fsp_header_get_free_limit(void);
/*===========================*/
- /* out: size in pages */
- ulint space); /* in: space id, must be 0 */
-/**************************************************************************
-Reads the file space size stored in the header page. */
-
+/**********************************************************************//**
+Gets the size of the system tablespace from the tablespace header. If
+we do not have an auto-extending data file, this should be equal to
+the size of the data files. If there is an auto-extending data file,
+this can be smaller.
+@return size in pages */
+UNIV_INTERN
+ulint
+fsp_header_get_tablespace_size(void);
+/*================================*/
+/**********************************************************************//**
+Reads the file space size stored in the header page.
+@return tablespace size stored in the space header */
+UNIV_INTERN
ulint
fsp_get_size_low(
/*=============*/
- /* out: tablespace size stored in the space header */
- page_t* page); /* in: header page (page 0 in the tablespace) */
-/**************************************************************************
-Reads the space id from the first page of a tablespace. */
-
+ page_t* page); /*!< in: header page (page 0 in the tablespace) */
+/**********************************************************************//**
+Reads the space id from the first page of a tablespace.
+@return space id, ULINT UNDEFINED if error */
+UNIV_INTERN
ulint
fsp_header_get_space_id(
/*====================*/
- /* out: space id, ULINT UNDEFINED if error */
- page_t* page); /* in: first page of a tablespace */
-/**************************************************************************
-Writes the space id to a tablespace header. This function is used past the
-buffer pool when we in fil0fil.c create a new single-table tablespace. */
-
+ const page_t* page); /*!< in: first page of a tablespace */
+/**********************************************************************//**
+Reads the space flags from the first page of a tablespace.
+@return flags */
+UNIV_INTERN
+ulint
+fsp_header_get_flags(
+/*=================*/
+ const page_t* page); /*!< in: first page of a tablespace */
+/**********************************************************************//**
+Reads the compressed page size from the first page of a tablespace.
+@return compressed page size in bytes, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_zip_size(
+/*====================*/
+ const page_t* page); /*!< in: first page of a tablespace */
+/**********************************************************************//**
+Writes the space id and compressed page size to a tablespace header.
+This function is used past the buffer pool when we in fil0fil.c create
+a new single-table tablespace. */
+UNIV_INTERN
void
-fsp_header_write_space_id(
-/*======================*/
- page_t* page, /* in: first page in the space */
- ulint space_id); /* in: space id */
-/**************************************************************************
+fsp_header_init_fields(
+/*===================*/
+ page_t* page, /*!< in/out: first page in the space */
+ ulint space_id, /*!< in: space id */
+ ulint flags); /*!< in: tablespace flags (FSP_SPACE_FLAGS):
+ 0, or table->flags if newer than COMPACT */
+/**********************************************************************//**
Initializes the space header of a new created space and creates also the
insert buffer tree root if space == 0. */
-
+UNIV_INTERN
void
fsp_header_init(
/*============*/
- ulint space, /* in: space id */
- ulint size, /* in: current size in blocks */
- mtr_t* mtr); /* in: mini-transaction handle */
-/**************************************************************************
+ ulint space, /*!< in: space id */
+ ulint size, /*!< in: current size in blocks */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/**********************************************************************//**
Increases the space size field of a space. */
-
+UNIV_INTERN
void
fsp_header_inc_size(
/*================*/
- ulint space, /* in: space id */
- ulint size_inc,/* in: size increment in pages */
- mtr_t* mtr); /* in: mini-transaction handle */
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
+ ulint space, /*!< in: space id */
+ ulint size_inc,/*!< in: size increment in pages */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
fseg_create(
/*========*/
- /* out: the page where the segment header is placed,
- x-latched, NULL if could not create segment
- because of lack of space */
- ulint space, /* in: space id */
- ulint page, /* in: page where the segment header is placed: if
+ ulint space, /*!< in: space id */
+ ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
- ulint byte_offset, /* in: byte offset of the created segment header
+ ulint byte_offset, /*!< in: byte offset of the created segment header
on the page */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
fseg_create_general(
/*================*/
- /* out: the page where the segment header is placed,
- x-latched, NULL if could not create segment
- because of lack of space */
- ulint space, /* in: space id */
- ulint page, /* in: page where the segment header is placed: if
+ ulint space, /*!< in: space id */
+ ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
- ulint byte_offset, /* in: byte offset of the created segment header
+ ulint byte_offset, /*!< in: byte offset of the created segment header
on the page */
- ibool has_done_reservation, /* in: TRUE if the caller has already
+ ibool has_done_reservation, /*!< in: TRUE if the caller has already
done the reservation for the pages with
fsp_reserve_free_extents (at least 2 extents: one for
the inode and the other for the segment) then there is
no need to do the check for this individual
operation */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
-
+currently used.
+@return number of reserved pages */
+UNIV_INTERN
ulint
fseg_n_reserved_pages(
/*==================*/
- /* out: number of reserved pages */
- fseg_header_t* header, /* in: segment header */
- ulint* used, /* out: number of pages used (<= reserved) */
- mtr_t* mtr); /* in: mtr handle */
-/**************************************************************************
+ fseg_header_t* header, /*!< in: segment header */
+ ulint* used, /*!< out: number of pages used (<= reserved) */
+ mtr_t* mtr); /*!< in: mtr handle */
+/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize
-file space fragmentation. */
-
+file space fragmentation.
+@return the allocated page offset FIL_NULL if no page could be allocated */
+UNIV_INTERN
ulint
fseg_alloc_free_page(
/*=================*/
- /* out: the allocated page offset
- FIL_NULL if no page could be allocated */
- fseg_header_t* seg_header, /* in: segment header */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction, /* in: if the new page is needed because
+ fseg_header_t* seg_header, /*!< in: segment header */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ byte direction, /*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /* in: mtr handle */
-/**************************************************************************
+ mtr_t* mtr); /*!< in: mtr handle */
+/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
+fragmentation.
+@return allocated page offset, FIL_NULL if no page could be allocated */
+UNIV_INTERN
ulint
fseg_alloc_free_page_general(
/*=========================*/
- /* out: allocated page offset, FIL_NULL if no
- page could be allocated */
- fseg_header_t* seg_header,/* in: segment header */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction,/* in: if the new page is needed because
+ fseg_header_t* seg_header,/*!< in: segment header */
+ ulint hint, /*!< in: hint of which page would be desirable */
+ byte direction,/*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
- ibool has_done_reservation, /* in: TRUE if the caller has
+ ibool has_done_reservation, /*!< in: TRUE if the caller has
already done the reservation for the page
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
- mtr_t* mtr); /* in: mtr handle */
-/**************************************************************************
+ mtr_t* mtr); /*!< in: mtr handle */
+/**********************************************************************//**
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
@@ -204,129 +235,122 @@ Single-table tablespaces whose size is < 32 pages are a special case. In this
function we would liberally reserve several 64 page extents for every page
split or merge in a B-tree. But we do not want to waste disk space if the table
only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available. */
-
+case, just ensuring that there are 3 free pages available.
+@return TRUE if we were able to make the reservation */
+UNIV_INTERN
ibool
fsp_reserve_free_extents(
/*=====================*/
- /* out: TRUE if we were able to make the reservation */
- ulint* n_reserved,/* out: number of extents actually reserved; if we
+ ulint* n_reserved,/*!< out: number of extents actually reserved; if we
return TRUE and the tablespace size is < 64 pages,
then this can be 0, otherwise it is n_ext */
- ulint space, /* in: space id */
- ulint n_ext, /* in: number of extents to reserve */
- ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ ulint space, /*!< in: space id */
+ ulint n_ext, /*!< in: number of extents to reserve */
+ ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
This function should be used to get information on how much we still
will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents. */
-
+the safety margin required by the above function fsp_reserve_free_extents.
+@return available space in kB */
+UNIV_INTERN
ullint
fsp_get_available_space_in_free_extents(
/*====================================*/
- /* out: available space in kB */
- ulint space); /* in: space id */
-/**************************************************************************
+ ulint space); /*!< in: space id */
+/**********************************************************************//**
Frees a single page of a segment. */
-
+UNIV_INTERN
void
fseg_free_page(
/*===========*/
- fseg_header_t* seg_header, /* in: segment header */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr); /* in: mtr handle */
-/***********************************************************************
-Frees a segment. The freeing is performed in several mini-transactions,
-so that there is no danger of bufferfixing too many buffer pages. */
-
-void
-fseg_free(
-/*======*/
- ulint space, /* in: space id */
- ulint page_no,/* in: page number where the segment header is
- placed */
- ulint offset);/* in: byte offset of the segment header on that
- page */
-/**************************************************************************
+ fseg_header_t* seg_header, /*!< in: segment header */
+ ulint space, /*!< in: space id */
+ ulint page, /*!< in: page offset */
+ mtr_t* mtr); /*!< in: mtr handle */
+/**********************************************************************//**
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
Doing the freeing in a single mini-transaction might result in
-too big a mini-transaction. */
-
+too big a mini-transaction.
+@return TRUE if freeing completed */
+UNIV_INTERN
ibool
fseg_free_step(
/*===========*/
- /* out: TRUE if freeing completed */
- fseg_header_t* header, /* in, own: segment header; NOTE: if the header
+ fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
resides on the first page of the frag list
of the segment, this pointer becomes obsolete
after the last freeing step */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed. */
-
+leaves the header page unfreed.
+@return TRUE if freeing completed, except the header page */
+UNIV_INTERN
ibool
fseg_free_step_not_header(
/*======================*/
- /* out: TRUE if freeing completed, except the
- header page */
- fseg_header_t* header, /* in: segment header which must reside on
+ fseg_header_t* header, /*!< in: segment header which must reside on
the first fragment page of the segment */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************************
-Checks if a page address is an extent descriptor page address. */
+ mtr_t* mtr); /*!< in: mtr */
+/***********************************************************************//**
+Checks if a page address is an extent descriptor page address.
+@return TRUE if a descriptor page */
UNIV_INLINE
ibool
fsp_descr_page(
/*===========*/
- /* out: TRUE if a descriptor page */
- ulint page_no);/* in: page number */
-/***************************************************************
-Parses a redo log record of a file page init. */
-
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint page_no);/*!< in: page number */
+/***********************************************************//**
+Parses a redo log record of a file page init.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
fsp_parse_init_file_page(
/*=====================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
-/***********************************************************************
-Validates the file space system and its segments. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr, /*!< in: buffer end */
+ buf_block_t* block); /*!< in: block or NULL */
+/*******************************************************************//**
+Validates the file space system and its segments.
+@return TRUE if ok */
+UNIV_INTERN
ibool
fsp_validate(
/*=========*/
- /* out: TRUE if ok */
- ulint space); /* in: space id */
-/***********************************************************************
+ ulint space); /*!< in: space id */
+/*******************************************************************//**
Prints info of a file space. */
-
+UNIV_INTERN
void
fsp_print(
/*======*/
- ulint space); /* in: space id */
-/***********************************************************************
-Validates a segment. */
-
+ ulint space); /*!< in: space id */
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Validates a segment.
+@return TRUE if ok */
+UNIV_INTERN
ibool
fseg_validate(
/*==========*/
- /* out: TRUE if ok */
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr2); /* in: mtr */
-/***********************************************************************
+ fseg_header_t* header, /*!< in: segment header */
+ mtr_t* mtr); /*!< in: mtr */
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_BTR_PRINT
+/*******************************************************************//**
Writes info of a segment. */
-
+UNIV_INTERN
void
fseg_print(
/*=======*/
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr); /* in: mtr */
+ fseg_header_t* header, /*!< in: segment header */
+ mtr_t* mtr); /*!< in: mtr */
+#endif /* UNIV_BTR_PRINT */
#ifndef UNIV_NONINL
#include "fsp0fsp.ic"
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 89cd9263bd6..434c370b527 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -1,24 +1,45 @@
-/******************************************************
-File space management
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0fsp.ic
+File space management
Created 12/18/1995 Heikki Tuuri
*******************************************************/
-/***************************************************************************
-Checks if a page address is an extent descriptor page address. */
+/***********************************************************************//**
+Checks if a page address is an extent descriptor page address.
+@return TRUE if a descriptor page */
UNIV_INLINE
ibool
fsp_descr_page(
/*===========*/
- /* out: TRUE if a descriptor page */
- ulint page_no)/* in: page number */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint page_no)/*!< in: page number */
{
- if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_XDES_OFFSET) {
+ ut_ad(ut_is_2pow(zip_size));
- return(TRUE);
+ if (!zip_size) {
+ return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
+ == FSP_XDES_OFFSET));
}
- return(FALSE);
+ return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET));
}
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 6756d9d285c..496081c2346 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -17,6 +17,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************
+@file include/fsp0types.h
File space management types
Created May 26, 2009 Vasil Dimov
@@ -29,61 +30,81 @@ Created May 26, 2009 Vasil Dimov
#include "fil0fil.h" /* for FIL_PAGE_DATA */
-/* If records are inserted in order, there are the following
+/** @name Flags for inserting records in order
+If records are inserted in order, there are the following
flags to tell this (their type is made byte for the compiler
to warn if direction and hint parameters are switched in
-fseg_alloc_free_page): */
-#define FSP_UP ((byte)111) /* alphabetically upwards */
-#define FSP_DOWN ((byte)112) /* alphabetically downwards */
-#define FSP_NO_DIR ((byte)113) /* no order */
-
-/* File space extent size in pages */
-#define FSP_EXTENT_SIZE 64
-
-/* On a page of any file segment, data may be put starting from this offset: */
+fseg_alloc_free_page) */
+/* @{ */
+#define FSP_UP ((byte)111) /*!< alphabetically upwards */
+#define FSP_DOWN ((byte)112) /*!< alphabetically downwards */
+#define FSP_NO_DIR ((byte)113) /*!< no order */
+/* @} */
+
+/** File space extent size (one megabyte) in pages */
+#define FSP_EXTENT_SIZE (1 << (20 - UNIV_PAGE_SIZE_SHIFT))
+
+/** On a page of any file segment, data may be put starting from this
+offset */
#define FSEG_PAGE_DATA FIL_PAGE_DATA
-/* File segment header which points to the inode describing the file segment */
+/** @name File segment header
+The file segment header points to the inode describing the file segment. */
+/* @{ */
+/** Data type for file segment header */
typedef byte fseg_header_t;
-#define FSEG_HDR_SPACE 0 /* space id of the inode */
-#define FSEG_HDR_PAGE_NO 4 /* page number of the inode */
-#define FSEG_HDR_OFFSET 8 /* byte offset of the inode */
+#define FSEG_HDR_SPACE 0 /*!< space id of the inode */
+#define FSEG_HDR_PAGE_NO 4 /*!< page number of the inode */
+#define FSEG_HDR_OFFSET 8 /*!< byte offset of the inode */
-#define FSEG_HEADER_SIZE 10
+#define FSEG_HEADER_SIZE 10 /*!< Length of the file system
+ header, in bytes */
+/* @} */
-/* Flags for fsp_reserve_free_extents */
+/** Flags for fsp_reserve_free_extents @{ */
#define FSP_NORMAL 1000000
#define FSP_UNDO 2000000
#define FSP_CLEANING 3000000
+/* @} */
/* Number of pages described in a single descriptor page: currently each page
description takes less than 1 byte; a descriptor page is repeated every
this many file pages */
-#define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE
+/* #define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE */
+/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
-/* The space low address page map */
+/** @name The space low address page map
+The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
+every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
+/* @{ */
/*--------------------------------------*/
- /* The following two pages are repeated
- every XDES_DESCRIBED_PER_PAGE pages in
- every tablespace. */
-#define FSP_XDES_OFFSET 0 /* extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET 1 /* insert buffer bitmap */
+#define FSP_XDES_OFFSET 0 /* !< extent descriptor */
+#define FSP_IBUF_BITMAP_OFFSET 1 /* !< insert buffer bitmap */
/* The ibuf bitmap pages are the ones whose
page number is the number above plus a
multiple of XDES_DESCRIBED_PER_PAGE */
-#define FSP_FIRST_INODE_PAGE_NO 2 /* in every tablespace */
+#define FSP_FIRST_INODE_PAGE_NO 2 /*!< in every tablespace */
/* The following pages exist
in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO 3 /* in tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /* in tablespace 0 */
+#define FSP_IBUF_HEADER_PAGE_NO 3 /*!< insert buffer
+ header page, in
+ tablespace 0 */
+#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /*!< insert buffer
+ B-tree root page in
+ tablespace 0 */
/* The ibuf tree root page number in
tablespace 0; its fseg inode is on the page
number FSP_FIRST_INODE_PAGE_NO */
-#define FSP_TRX_SYS_PAGE_NO 5 /* in tablespace 0 */
-#define FSP_FIRST_RSEG_PAGE_NO 6 /* in tablespace 0 */
-#define FSP_DICT_HDR_PAGE_NO 7 /* in tablespace 0 */
+#define FSP_TRX_SYS_PAGE_NO 5 /*!< transaction
+ system header, in
+ tablespace 0 */
+#define FSP_FIRST_RSEG_PAGE_NO 6 /*!< first rollback segment
+ page, in tablespace 0 */
+#define FSP_DICT_HDR_PAGE_NO 7 /*!< data dictionary header
+ page, in tablespace 0 */
/*--------------------------------------*/
+/* @} */
#endif /* fsp0types_h */
diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h
index b9546b4e1a0..dce20b3bad6 100644
--- a/storage/innobase/include/fut0fut.h
+++ b/storage/innobase/include/fut0fut.h
@@ -1,7 +1,24 @@
-/**********************************************************************
-File-based utilities
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0fut.h
+File-based utilities
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
@@ -15,18 +32,20 @@ Created 12/13/1995 Heikki Tuuri
#include "fil0fil.h"
#include "mtr0mtr.h"
-/************************************************************************
-Gets a pointer to a file address and latches the page. */
+/********************************************************************//**
+Gets a pointer to a file address and latches the page.
+@return pointer to a byte in a frame; the file page in the frame is
+bufferfixed and latched */
UNIV_INLINE
byte*
fut_get_ptr(
/*========*/
- /* out: pointer to a byte in a frame; the file
- page in the frame is bufferfixed and latched */
- ulint space, /* in: space id */
- fil_addr_t addr, /* in: file address */
- ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr); /* in: mtr handle */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ fil_addr_t addr, /*!< in: file address */
+ ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
+ mtr_t* mtr); /*!< in: mtr handle */
#ifndef UNIV_NONINL
#include "fut0fut.ic"
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
index 6a107786376..0b52719a055 100644
--- a/storage/innobase/include/fut0fut.ic
+++ b/storage/innobase/include/fut0fut.ic
@@ -1,7 +1,24 @@
-/**********************************************************************
-File-based utilities
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0fut.ic
+File-based utilities
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
@@ -9,30 +26,31 @@ Created 12/13/1995 Heikki Tuuri
#include "sync0rw.h"
#include "buf0buf.h"
-/************************************************************************
-Gets a pointer to a file address and latches the page. */
+/********************************************************************//**
+Gets a pointer to a file address and latches the page.
+@return pointer to a byte in a frame; the file page in the frame is
+bufferfixed and latched */
UNIV_INLINE
byte*
fut_get_ptr(
/*========*/
- /* out: pointer to a byte in a frame; the file
- page in the frame is bufferfixed and latched */
- ulint space, /* in: space id */
- fil_addr_t addr, /* in: file address */
- ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr) /* in: mtr handle */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ fil_addr_t addr, /*!< in: file address */
+ ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
+ mtr_t* mtr) /*!< in: mtr handle */
{
- byte* ptr;
+ buf_block_t* block;
+ byte* ptr;
- ut_ad(mtr);
ut_ad(addr.boffset < UNIV_PAGE_SIZE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
- ptr = buf_page_get(space, addr.page, rw_latch, mtr) + addr.boffset;
+ block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
+ ptr = buf_block_get_frame(block) + addr.boffset;
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(ptr, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
return(ptr);
}
diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h
index 5427e2248da..fe024c2498f 100644
--- a/storage/innobase/include/fut0lst.h
+++ b/storage/innobase/include/fut0lst.h
@@ -1,7 +1,24 @@
-/**********************************************************************
-File-based list utilities
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0lst.h
+File-based list utilities
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
@@ -28,171 +45,173 @@ typedef byte flst_node_t;
/* The physical size of a list node in bytes */
#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
-
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
Initializes a list base node. */
UNIV_INLINE
void
flst_init(
/*======*/
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ flst_base_node_t* base, /*!< in: pointer to base node */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
Adds a node as the last node in a list. */
-
+UNIV_INTERN
void
flst_add_last(
/*==========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node, /*!< in: node to add */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
Adds a node as the first node in a list. */
-
+UNIV_INTERN
void
flst_add_first(
/*===========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node, /*!< in: node to add */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
Inserts a node after another in a list. */
-
+UNIV_INTERN
void
flst_insert_after(
/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node1, /* in: node to insert after */
- flst_node_t* node2, /* in: node to add */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node1, /*!< in: node to insert after */
+ flst_node_t* node2, /*!< in: node to add */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
Inserts a node before another in a list. */
-
+UNIV_INTERN
void
flst_insert_before(
/*===============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: node to insert */
- flst_node_t* node3, /* in: node to insert before */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: node to insert */
+ flst_node_t* node3, /*!< in: node to insert before */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
Removes a node. */
-
+UNIV_INTERN
void
flst_remove(
/*========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: node to remove */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: node to remove */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
Cuts off the tail of the list, including the node given. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
-
+UNIV_INTERN
void
flst_cut_end(
/*=========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: first node to remove */
- ulint n_nodes,/* in: number of nodes to remove,
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: first node to remove */
+ ulint n_nodes,/*!< in: number of nodes to remove,
must be >= 1 */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
Cuts off the tail of the list, not including the given node. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
-
+UNIV_INTERN
void
flst_truncate_end(
/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: first node not to remove */
- ulint n_nodes,/* in: number of nodes to remove */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list length. */
+ flst_base_node_t* base, /*!< in: pointer to base node of list */
+ flst_node_t* node2, /*!< in: first node not to remove */
+ ulint n_nodes,/*!< in: number of nodes to remove */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list length.
+@return length */
UNIV_INLINE
ulint
flst_get_len(
/*=========*/
- /* out: length */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list first node address. */
+ const flst_base_node_t* base, /*!< in: pointer to base node */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list first node address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_first(
/*===========*/
- /* out: file address */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list last node address. */
+ const flst_base_node_t* base, /*!< in: pointer to base node */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list last node address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_last(
/*==========*/
- /* out: file address */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list next node address. */
+ const flst_base_node_t* base, /*!< in: pointer to base node */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list next node address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
/*===============*/
- /* out: file address */
- flst_node_t* node, /* in: pointer to node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list prev node address. */
+ const flst_node_t* node, /*!< in: pointer to node */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list prev node address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
/*===============*/
- /* out: file address */
- flst_node_t* node, /* in: pointer to node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
+ const flst_node_t* node, /*!< in: pointer to node */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
Writes a file address. */
UNIV_INLINE
void
flst_write_addr(
/*============*/
- fil_faddr_t* faddr, /* in: pointer to file faddress */
- fil_addr_t addr, /* in: file address */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Reads a file address. */
+ fil_faddr_t* faddr, /*!< in: pointer to file faddress */
+ fil_addr_t addr, /*!< in: file address */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
+Reads a file address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_read_addr(
/*===========*/
- /* out: file address */
- fil_faddr_t* faddr, /* in: pointer to file faddress */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Validates a file-based list. */
-
+ const fil_faddr_t* faddr, /*!< in: pointer to file faddress */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************************//**
+Validates a file-based list.
+@return TRUE if ok */
+UNIV_INTERN
ibool
flst_validate(
/*==========*/
- /* out: TRUE if ok */
- flst_base_node_t* base, /* in: pointer to base node of list */
- mtr_t* mtr1); /* in: mtr */
-/************************************************************************
+ const flst_base_node_t* base, /*!< in: pointer to base node of list */
+ mtr_t* mtr1); /*!< in: mtr */
+/********************************************************************//**
Prints info of a file-based list. */
-
+UNIV_INTERN
void
flst_print(
/*=======*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- mtr_t* mtr); /* in: mtr */
+ const flst_base_node_t* base, /*!< in: pointer to base node of list */
+ mtr_t* mtr); /*!< in: mtr */
#ifndef UNIV_NONINL
#include "fut0lst.ic"
#endif
+#endif /* !UNIV_HOTBACKUP */
+
#endif
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
index 6c7e863b078..dcd13c61871 100644
--- a/storage/innobase/include/fut0lst.ic
+++ b/storage/innobase/include/fut0lst.ic
@@ -1,7 +1,24 @@
-/**********************************************************************
-File-based list utilities
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0lst.ic
+File-based list utilities
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
@@ -26,34 +43,35 @@ Created 11/28/1995 Heikki Tuuri
last element of the list; undefined
if empty list */
-/************************************************************************
+/********************************************************************//**
Writes a file address. */
UNIV_INLINE
void
flst_write_addr(
/*============*/
- fil_faddr_t* faddr, /* in: pointer to file faddress */
- fil_addr_t addr, /* in: file address */
- mtr_t* mtr) /* in: mini-transaction handle */
+ fil_faddr_t* faddr, /*!< in: pointer to file faddress */
+ fil_addr_t addr, /*!< in: file address */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(faddr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(faddr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
+ ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
+ ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
MLOG_2BYTES, mtr);
}
-/************************************************************************
-Reads a file address. */
+/********************************************************************//**
+Reads a file address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_read_addr(
/*===========*/
- /* out: file address */
- fil_faddr_t* faddr, /* in: pointer to file faddress */
- mtr_t* mtr) /* in: mini-transaction handle */
+ const fil_faddr_t* faddr, /*!< in: pointer to file faddress */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
fil_addr_t addr;
@@ -62,86 +80,88 @@ flst_read_addr(
addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
mtr);
+ ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
+ ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
return(addr);
}
-/************************************************************************
+/********************************************************************//**
Initializes a list base node. */
UNIV_INLINE
void
flst_init(
/*======*/
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr) /* in: mini-transaction handle */
+ flst_base_node_t* base, /*!< in: pointer to base node */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+
mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
}
-/************************************************************************
-Gets list length. */
+/********************************************************************//**
+Gets list length.
+@return length */
UNIV_INLINE
ulint
flst_get_len(
/*=========*/
- /* out: length */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr) /* in: mini-transaction handle */
+ const flst_base_node_t* base, /*!< in: pointer to base node */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
}
-/************************************************************************
-Gets list first node address. */
+/********************************************************************//**
+Gets list first node address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_first(
/*===========*/
- /* out: file address */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr) /* in: mini-transaction handle */
+ const flst_base_node_t* base, /*!< in: pointer to base node */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(flst_read_addr(base + FLST_FIRST, mtr));
}
-/************************************************************************
-Gets list last node address. */
+/********************************************************************//**
+Gets list last node address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_last(
/*==========*/
- /* out: file address */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr) /* in: mini-transaction handle */
+ const flst_base_node_t* base, /*!< in: pointer to base node */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(flst_read_addr(base + FLST_LAST, mtr));
}
-/************************************************************************
-Gets list next node address. */
+/********************************************************************//**
+Gets list next node address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
/*===============*/
- /* out: file address */
- flst_node_t* node, /* in: pointer to node */
- mtr_t* mtr) /* in: mini-transaction handle */
+ const flst_node_t* node, /*!< in: pointer to node */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(flst_read_addr(node + FLST_NEXT, mtr));
}
-/************************************************************************
-Gets list prev node address. */
+/********************************************************************//**
+Gets list prev node address.
+@return file address */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
/*===============*/
- /* out: file address */
- flst_node_t* node, /* in: pointer to node */
- mtr_t* mtr) /* in: mini-transaction handle */
+ const flst_node_t* node, /*!< in: pointer to node */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(flst_read_addr(node + FLST_PREV, mtr));
}
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
index beaa06ae755..1ffbd3440aa 100644
--- a/storage/innobase/include/ha0ha.h
+++ b/storage/innobase/include/ha0ha.h
@@ -1,7 +1,24 @@
-/******************************************************
-The hash table with external chains
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ha0ha.h
+The hash table with external chains
Created 8/18/1994 Heikki Tuuri
*******************************************************/
@@ -13,126 +30,210 @@ Created 8/18/1994 Heikki Tuuri
#include "hash0hash.h"
#include "page0types.h"
+#include "buf0types.h"
-/*****************************************************************
-Looks for an element in a hash table. */
+/*************************************************************//**
+Looks for an element in a hash table.
+@return pointer to the data of the first hash table node in chain
+having the fold number, NULL if not found */
UNIV_INLINE
void*
ha_search_and_get_data(
/*===================*/
- /* out: pointer to the data of the first hash
- table node in chain having the fold number,
- NULL if not found */
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: folded value of the searched data */
-/*************************************************************
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: folded value of the searched data */
+/*********************************************************//**
Looks for an element when we know the pointer to the data and updates
the pointer to data if found. */
-
+UNIV_INTERN
void
-ha_search_and_update_if_found(
-/*==========================*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data, /* in: pointer to the data */
- void* new_data);/* in: new pointer to the data */
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
-
+ha_search_and_update_if_found_func(
+/*===============================*/
+ hash_table_t* table, /*!< in/out: hash table */
+ ulint fold, /*!< in: folded value of the searched data */
+ void* data, /*!< in: pointer to the data */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ buf_block_t* new_block,/*!< in: block containing new_data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ void* new_data);/*!< in: new pointer to the data */
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/** Looks for an element when we know the pointer to the data and
+updates the pointer to data if found.
+@param table in/out: hash table
+@param fold in: folded value of the searched data
+@param data in: pointer to the data
+@param new_block in: block containing new_data
+@param new_data in: new pointer to the data */
+# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
+ ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/** Looks for an element when we know the pointer to the data and
+updates the pointer to data if found.
+@param table in/out: hash table
+@param fold in: folded value of the searched data
+@param data in: pointer to the data
+@param new_block ignored: block containing new_data
+@param new_data in: new pointer to the data */
+# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
+ ha_search_and_update_if_found_func(table,fold,data,new_data)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/*************************************************************//**
+Creates a hash table with at least n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+@return own: created table */
+UNIV_INTERN
hash_table_t*
ha_create_func(
/*===========*/
- /* out, own: created table */
- ibool in_btr_search, /* in: TRUE if the hash table is used in
- the btr_search module */
- ulint n, /* in: number of array cells */
+ ulint n, /*!< in: number of array cells */
#ifdef UNIV_SYNC_DEBUG
- ulint mutex_level, /* in: level of the mutexes in the latching
+ ulint mutex_level, /*!< in: level of the mutexes in the latching
order: this is used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes); /* in: number of mutexes to protect the
- hash table: must be a power of 2 */
+ ulint n_mutexes); /*!< in: number of mutexes to protect the
+ hash table: must be a power of 2, or 0 */
#ifdef UNIV_SYNC_DEBUG
-# define ha_create(b,n_c,n_m,level) ha_create_func(b,n_c,level,n_m)
+/** Creates a hash table.
+@return own: created table
+@param n_c in: number of array cells. The actual number of cells is
+chosen to be a slightly bigger prime number.
+@param level in: level of the mutexes in the latching order
+@param n_m in: number of mutexes to protect the hash table;
+ must be a power of 2, or 0 */
+# define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m)
#else /* UNIV_SYNC_DEBUG */
-# define ha_create(b,n_c,n_m,level) ha_create_func(b,n_c,n_m)
+/** Creates a hash table.
+@return own: created table
+@param n_c in: number of array cells. The actual number of cells is
+chosen to be a slightly bigger prime number.
+@param level in: level of the mutexes in the latching order
+@param n_m in: number of mutexes to protect the hash table;
+ must be a power of 2, or 0 */
+# define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m)
#endif /* UNIV_SYNC_DEBUG */
-/*****************************************************************
+
+/*************************************************************//**
+Empties a hash table and frees the memory heaps. */
+UNIV_INTERN
+void
+ha_clear(
+/*=====*/
+ hash_table_t* table); /*!< in, own: hash table */
+
+/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
-is inserted. */
-
+is inserted.
+@return TRUE if succeed, FALSE if no more memory could be allocated */
+UNIV_INTERN
ibool
-ha_insert_for_fold(
-/*===============*/
- /* out: TRUE if succeed, FALSE if no more
- memory could be allocated */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of data; if a node with
+ha_insert_for_fold_func(
+/*====================*/
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold, /*!< in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
- void* data); /* in: data, must not be NULL */
-/*****************************************************************
-Deletes an entry from a hash table. */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ buf_block_t* block, /*!< in: buffer block containing the data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ void* data); /*!< in: data, must not be NULL */
-void
-ha_delete(
-/*======*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of data */
- void* data); /* in: data, must not be NULL and must exist
- in the hash table */
-/*************************************************************
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return TRUE if succeed, FALSE if no more memory could be allocated
+@param t in: hash table
+@param f in: folded value of data
+@param b in: buffer block containing the data
+@param d in: data, must not be NULL */
+# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return TRUE if succeed, FALSE if no more memory could be allocated
+@param t in: hash table
+@param f in: folded value of data
+@param b ignored: buffer block containing the data
+@param d in: data, must not be NULL */
+# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+/*********************************************************//**
Looks for an element when we know the pointer to the data and deletes
-it from the hash table if found. */
+it from the hash table if found.
+@return TRUE if found */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
/*==========================*/
- /* out: TRUE if found */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data); /* in: pointer to the data */
-/*********************************************************************
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold, /*!< in: folded value of the searched data */
+ void* data); /*!< in: pointer to the data */
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
-
+UNIV_INTERN
void
ha_remove_all_nodes_to_page(
/*========================*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: fold value */
- page_t* page); /* in: buffer page */
-/*****************************************************************
-Validates a given range of the cells in hash table. */
-
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold, /*!< in: fold value */
+ const page_t* page); /*!< in: buffer page */
+/*************************************************************//**
+Validates a given range of the cells in hash table.
+@return TRUE if ok */
+UNIV_INTERN
ibool
ha_validate(
/*========*/
- /* out: TRUE if ok */
- hash_table_t* table, /* in: hash table */
- ulint start_index, /* in: start index */
- ulint end_index); /* in: end index */
-/*****************************************************************
+ hash_table_t* table, /*!< in: hash table */
+ ulint start_index, /*!< in: start index */
+ ulint end_index); /*!< in: end index */
+/*************************************************************//**
Prints info of a hash table. */
-
+UNIV_INTERN
void
ha_print_info(
/*==========*/
- FILE* file, /* in: file where to print */
- hash_table_t* table); /* in: hash table */
-
-/* The hash table external chain node */
+ FILE* file, /*!< in: file where to print */
+ hash_table_t* table); /*!< in: hash table */
+#endif /* !UNIV_HOTBACKUP */
+/** The hash table external chain node */
typedef struct ha_node_struct ha_node_t;
+
+/** The hash table external chain node */
struct ha_node_struct {
- ha_node_t* next; /* next chain node or NULL if none */
- void* data; /* pointer to the data */
- ulint fold; /* fold value for the data */
+ ha_node_t* next; /*!< next chain node or NULL if none */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ buf_block_t* block; /*!< buffer block containing the data, or NULL */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ void* data; /*!< pointer to the data */
+ ulint fold; /*!< fold value for the data */
};
+#ifndef UNIV_HOTBACKUP
+/** Assert that the current thread is holding the mutex protecting a
+hash bucket corresponding to a fold value.
+@param table in: hash table
+@param fold in: fold value */
+# define ASSERT_HASH_MUTEX_OWN(table, fold) \
+ ut_ad(!(table)->mutexes || mutex_own(hash_get_mutex(table, fold)))
+#else /* !UNIV_HOTBACKUP */
+/** Assert that the current thread is holding the mutex protecting a
+hash bucket corresponding to a fold value.
+@param table in: hash table
+@param fold in: fold value */
+# define ASSERT_HASH_MUTEX_OWN(table, fold) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
#ifndef UNIV_NONINL
#include "ha0ha.ic"
#endif
diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic
index fb264377f28..734403c4cd9 100644
--- a/storage/innobase/include/ha0ha.ic
+++ b/storage/innobase/include/ha0ha.ic
@@ -1,7 +1,24 @@
-/************************************************************************
-The hash table with external chains
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994-1997 Innobase Oy
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/ha0ha.ic
+The hash table with external chains
Created 8/18/1994 Heikki Tuuri
*************************************************************************/
@@ -9,79 +26,99 @@ Created 8/18/1994 Heikki Tuuri
#include "ut0rnd.h"
#include "mem0mem.h"
-/***************************************************************
+/***********************************************************//**
Deletes a hash node. */
-
+UNIV_INTERN
void
ha_delete_hash_node(
/*================*/
- hash_table_t* table, /* in: hash table */
- ha_node_t* del_node); /* in: node to be deleted */
+ hash_table_t* table, /*!< in: hash table */
+ ha_node_t* del_node); /*!< in: node to be deleted */
-/**********************************************************************
-Gets a hash node data. */
+/******************************************************************//**
+Gets a hash node data.
+@return pointer to the data */
UNIV_INLINE
void*
ha_node_get_data(
/*=============*/
- /* out: pointer to the data */
- ha_node_t* node) /* in: hash chain node */
+ ha_node_t* node) /*!< in: hash chain node */
{
return(node->data);
}
-/**********************************************************************
+/******************************************************************//**
Sets hash node data. */
UNIV_INLINE
void
-ha_node_set_data(
-/*=============*/
- ha_node_t* node, /* in: hash chain node */
- void* data) /* in: pointer to the data */
+ha_node_set_data_func(
+/*==================*/
+ ha_node_t* node, /*!< in: hash chain node */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ buf_block_t* block, /*!< in: buffer block containing the data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ void* data) /*!< in: pointer to the data */
{
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ node->block = block;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
node->data = data;
}
-/**********************************************************************
-Gets the next node in a hash chain. */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/** Sets hash node data.
+@param n in: hash chain node
+@param b in: buffer block containing the data
+@param d in: pointer to the data */
+# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/** Sets hash node data.
+@param n in: hash chain node
+@param b in: buffer block containing the data
+@param d in: pointer to the data */
+# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+/******************************************************************//**
+Gets the next node in a hash chain.
+@return next node, NULL if none */
UNIV_INLINE
ha_node_t*
ha_chain_get_next(
/*==============*/
- /* out: next node, NULL if none */
- ha_node_t* node) /* in: hash chain node */
+ ha_node_t* node) /*!< in: hash chain node */
{
return(node->next);
}
-/**********************************************************************
-Gets the first node in a hash chain. */
+/******************************************************************//**
+Gets the first node in a hash chain.
+@return first node, NULL if none */
UNIV_INLINE
ha_node_t*
ha_chain_get_first(
/*===============*/
- /* out: first node, NULL if none */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold value determining the chain */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold value determining the chain */
{
- return(hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
+ return((ha_node_t*)
+ hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
}
-/*****************************************************************
-Looks for an element in a hash table. */
+/*************************************************************//**
+Looks for an element in a hash table.
+@return pointer to the first hash table node in chain having the fold
+number, NULL if not found */
UNIV_INLINE
ha_node_t*
ha_search(
/*======*/
- /* out: pointer to the first hash table node
- in chain having the fold number, NULL if not
- found */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: folded value of the searched data */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: folded value of the searched data */
{
ha_node_t* node;
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+ ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
@@ -97,21 +134,20 @@ ha_search(
return(NULL);
}
-/*****************************************************************
-Looks for an element in a hash table. */
+/*************************************************************//**
+Looks for an element in a hash table.
+@return pointer to the data of the first hash table node in chain
+having the fold number, NULL if not found */
UNIV_INLINE
void*
ha_search_and_get_data(
/*===================*/
- /* out: pointer to the data of the first hash
- table node in chain having the fold number,
- NULL if not found */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: folded value of the searched data */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: folded value of the searched data */
{
ha_node_t* node;
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+ ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
@@ -127,21 +163,20 @@ ha_search_and_get_data(
return(NULL);
}
-/*************************************************************
-Looks for an element when we know the pointer to the data. */
+/*********************************************************//**
+Looks for an element when we know the pointer to the data.
+@return pointer to the hash table node, NULL if not found in the table */
UNIV_INLINE
ha_node_t*
ha_search_with_data(
/*================*/
- /* out: pointer to the hash table node, NULL
- if not found in the table */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data) /* in: pointer to the data */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold, /*!< in: folded value of the searched data */
+ void* data) /*!< in: pointer to the data */
{
ha_node_t* node;
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+ ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
@@ -157,21 +192,21 @@ ha_search_with_data(
return(NULL);
}
-/*************************************************************
+/*********************************************************//**
Looks for an element when we know the pointer to the data, and deletes
-it from the hash table, if found. */
+it from the hash table, if found.
+@return TRUE if found */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
/*==========================*/
- /* out: TRUE if found */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data) /* in: pointer to the data */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold, /*!< in: folded value of the searched data */
+ void* data) /*!< in: pointer to the data */
{
ha_node_t* node;
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+ ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_search_with_data(table, fold, data);
diff --git a/storage/innodb_plugin/include/ha0storage.h b/storage/innobase/include/ha0storage.h
index c30bd840579..c30bd840579 100644
--- a/storage/innodb_plugin/include/ha0storage.h
+++ b/storage/innobase/include/ha0storage.h
diff --git a/storage/innodb_plugin/include/ha0storage.ic b/storage/innobase/include/ha0storage.ic
index 5acbf82f005..5acbf82f005 100644
--- a/storage/innodb_plugin/include/ha0storage.ic
+++ b/storage/innobase/include/ha0storage.ic
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index ce790814818..e8789d1638b 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -1,34 +1,94 @@
-#ifndef HA_INNODB_PROTOTYPES_H
-#define HA_INNODB_PROTOTYPES_H
+/*****************************************************************************
-#ifndef UNIV_HOTBACKUP
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-#include "univ.i" /* ulint, uint */
-#include "m_ctype.h" /* CHARSET_INFO */
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-/* Prototypes for global functions in ha_innodb.cc that are called by
-InnoDB's C-code. */
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ha_prototypes.h
+Prototypes for global functions in ha_innodb.cc that are called by
+InnoDB C code
+
+Created 5/11/2006 Osku Salerma
+************************************************************************/
+
+#ifndef HA_INNODB_PROTOTYPES_H
+#define HA_INNODB_PROTOTYPES_H
-/*************************************************************************
-Wrapper around MySQL's copy_and_convert function, see it for
-documentation. */
+#include "trx0types.h"
+#include "m_ctype.h" /* CHARSET_INFO */
+/*********************************************************************//**
+Wrapper around MySQL's copy_and_convert function.
+@return number of bytes copied to 'to' */
+UNIV_INTERN
ulint
innobase_convert_string(
/*====================*/
- void* to,
- ulint to_length,
- CHARSET_INFO* to_cs,
- const void* from,
- ulint from_length,
- CHARSET_INFO* from_cs,
- uint* errors);
+ void* to, /*!< out: converted string */
+ ulint to_length, /*!< in: number of bytes reserved
+ for the converted string */
+ CHARSET_INFO* to_cs, /*!< in: character set to convert to */
+ const void* from, /*!< in: string to convert */
+ ulint from_length, /*!< in: number of bytes to convert */
+ CHARSET_INFO* from_cs, /*!< in: character set to convert from */
+ uint* errors); /*!< out: number of errors encountered
+ during the conversion */
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
+the result to "buf". The result is converted to "system_charset_info".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return number of bytes that were written */
+UNIV_INTERN
+ulint
+innobase_raw_format(
+/*================*/
+ const char* data, /*!< in: raw data */
+ ulint data_len, /*!< in: raw data length
+ in bytes */
+ ulint charset_coll, /*!< in: charset collation */
+ char* buf, /*!< out: output buffer */
+ ulint buf_size); /*!< in: output buffer size
+ in bytes */
+
+/*****************************************************************//**
+Invalidates the MySQL query cache for the table. */
+UNIV_INTERN
+void
+innobase_invalidate_query_cache(
+/*============================*/
+ trx_t* trx, /*!< in: transaction which
+ modifies the table */
+ const char* full_name, /*!< in: concatenation of
+ database name, null char NUL,
+ table name, null char NUL;
+ NOTE that in Windows this is
+ always in LOWER CASE! */
+ ulint full_name_len); /*!< in: full name length where
+ also the null chars count */
/*****************************************************************//**
Convert a table or index name to the MySQL system_charset_info (UTF-8)
and quote it if needed.
@return pointer to the end of buf */
-
+UNIV_INTERN
char*
innobase_convert_name(
/*==================*/
@@ -40,40 +100,184 @@ innobase_convert_name(
ibool table_id);/*!< in: TRUE=id is a table or database name;
FALSE=id is an index name */
-/**********************************************************************
+/******************************************************************//**
Returns true if the thread is the replication thread on the slave
server. Used in srv_conc_enter_innodb() to determine if the thread
should be allowed to enter InnoDB - the replication thread is treated
differently than other threads. Also used in
-srv_conc_force_exit_innodb(). */
-
+srv_conc_force_exit_innodb().
+@return true if thd is the replication thread */
+UNIV_INTERN
ibool
thd_is_replication_slave_thread(
/*============================*/
- /* out: true if thd is the replication thread */
- void* thd); /* in: thread handle (THD*) */
+ void* thd); /*!< in: thread handle (THD*) */
-/**********************************************************************
+/******************************************************************//**
Returns true if the transaction this thread is processing has edited
non-transactional tables. Used by the deadlock detector when deciding
which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables. */
-
+rolling back transactions that have edited non-transactional tables.
+@return true if non-transactional tables have been edited */
+UNIV_INTERN
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
- /* out: true if non-transactional tables have
- been edited */
- void* thd); /* in: thread handle (THD*) */
+ void* thd); /*!< in: thread handle (THD*) */
+
+/*************************************************************//**
+Prints info of a THD object (== user session thread) to the given file. */
+UNIV_INTERN
+void
+innobase_mysql_print_thd(
+/*=====================*/
+ FILE* f, /*!< in: output stream */
+ void* thd, /*!< in: pointer to a MySQL THD object */
+ uint max_query_len); /*!< in: max query length to print, or 0 to
+ use the default max length */
-/**********************************************************************
-Returns true if the thread is executing a SELECT statement. */
+/**************************************************************//**
+Converts a MySQL type to an InnoDB type. Note that this function returns
+the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
+VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
+@return DATA_BINARY, DATA_VARCHAR, ... */
+UNIV_INTERN
+ulint
+get_innobase_type_from_mysql_type(
+/*==============================*/
+ ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
+ 'unsigned type';
+ at least ENUM and SET,
+ and unsigned integer
+ types are 'unsigned types' */
+ const void* field) /*!< in: MySQL Field */
+ __attribute__((nonnull));
+
+/*************************************************************//**
+If you want to print a thd that is not associated with the current thread,
+you must call this function before reserving the InnoDB kernel_mutex, to
+protect MySQL from setting thd->query NULL. If you print a thd of the current
+thread, we know that MySQL cannot modify thd->query, and it is not necessary
+to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
+the kernel_mutex. */
+UNIV_INTERN
+void
+innobase_mysql_prepare_print_arbitrary_thd(void);
+/*============================================*/
+
+/*************************************************************//**
+Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
+In the InnoDB latching order, the mutex sits right above the
+kernel_mutex. In debug builds, we assert that the kernel_mutex is
+released before this function is invoked. */
+UNIV_INTERN
+void
+innobase_mysql_end_print_arbitrary_thd(void);
+/*========================================*/
+
+/******************************************************************//**
+Get the variable length bounds of the given character set. */
+UNIV_INTERN
+void
+innobase_get_cset_width(
+/*====================*/
+ ulint cset, /*!< in: MySQL charset-collation code */
+ ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
+ ulint* mbmaxlen); /*!< out: maximum length of a char (in bytes) */
+
+/******************************************************************//**
+Compares NUL-terminated UTF-8 strings case insensitively.
+@return 0 if a=b, <0 if a<b, >1 if a>b */
+UNIV_INTERN
+int
+innobase_strcasecmp(
+/*================*/
+ const char* a, /*!< in: first string to compare */
+ const char* b); /*!< in: second string to compare */
+
+/******************************************************************//**
+Returns true if the thread is executing a SELECT statement.
+@return true if thd is executing SELECT */
ibool
thd_is_select(
/*==========*/
- /* out: true if thd is executing SELECT */
- const void* thd); /* in: thread handle (THD*) */
+ const void* thd); /*!< in: thread handle (THD*) */
+
+/******************************************************************//**
+Converts an identifier to a table name. */
+UNIV_INTERN
+void
+innobase_convert_from_table_id(
+/*===========================*/
+ struct charset_info_st* cs, /*!< in: the 'from' character set */
+ char* to, /*!< out: converted identifier */
+ const char* from, /*!< in: identifier to convert */
+ ulint len); /*!< in: length of 'to', in bytes; should
+ be at least 5 * strlen(to) + 1 */
+/******************************************************************//**
+Converts an identifier to UTF-8. */
+UNIV_INTERN
+void
+innobase_convert_from_id(
+/*=====================*/
+ struct charset_info_st* cs, /*!< in: the 'from' character set */
+ char* to, /*!< out: converted identifier */
+ const char* from, /*!< in: identifier to convert */
+ ulint len); /*!< in: length of 'to', in bytes; should
+ be at least 3 * strlen(to) + 1 */
+/******************************************************************//**
+Makes all characters in a NUL-terminated UTF-8 string lower case. */
+UNIV_INTERN
+void
+innobase_casedn_str(
+/*================*/
+ char* a); /*!< in/out: string to put in lower case */
+
+/**********************************************************************//**
+Determines the connection character set.
+@return connection character set */
+struct charset_info_st*
+innobase_get_charset(
+/*=================*/
+ void* mysql_thd); /*!< in: MySQL thread handle */
+
+/******************************************************************//**
+This function is used to find the storage length in bytes of the first n
+characters for prefix indexes using a multibyte character set. The function
+finds charset information and returns length of prefix_len characters in the
+index field in bytes.
+@return number of bytes occupied by the first n characters */
+UNIV_INTERN
+ulint
+innobase_get_at_most_n_mbchars(
+/*===========================*/
+ ulint charset_id, /*!< in: character set id */
+ ulint prefix_len, /*!< in: prefix length in bytes of the index
+ (this has to be divided by mbmaxlen to get the
+ number of CHARACTERS n in the prefix) */
+ ulint data_len, /*!< in: length of the string in bytes */
+ const char* str); /*!< in: character string */
+
+/******************************************************************//**
+Returns true if the thread supports XA,
+global value of innodb_supports_xa if thd is NULL.
+@return true if thd supports XA */
+
+ibool
+thd_supports_xa(
+/*============*/
+ void* thd); /*!< in: thread handle (THD*), or NULL to query
+ the global innodb_supports_xa */
+
+/******************************************************************//**
+Returns the lock wait timeout for the current connection.
+@return the lock wait timeout, in seconds */
+
+ulong
+thd_lock_wait_timeout(
+/*==================*/
+ void* thd); /*!< in: thread handle (THD*), or NULL to query
+ the global innodb_lock_wait_timeout */
-#endif
#endif
diff --git a/storage/innodb_plugin/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index 985b76f4f50..985b76f4f50 100644
--- a/storage/innodb_plugin/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
index e119a117c94..977cb829f35 100644
--- a/storage/innobase/include/hash0hash.h
+++ b/storage/innobase/include/hash0hash.h
@@ -1,7 +1,24 @@
-/******************************************************
-The simple hash table utility
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/hash0hash.h
+The simple hash table utility
Created 5/20/1997 Heikki Tuuri
*******************************************************/
@@ -11,7 +28,9 @@ Created 5/20/1997 Heikki Tuuri
#include "univ.i"
#include "mem0mem.h"
-#include "sync0sync.h"
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+#endif /* !UNIV_HOTBACKUP */
typedef struct hash_table_struct hash_table_t;
typedef struct hash_cell_struct hash_cell_t;
@@ -21,59 +40,61 @@ typedef void* hash_node_t;
/* Fix Bug #13859: symbol collision between imap/mysql */
#define hash_create hash0_create
-/*****************************************************************
+/*************************************************************//**
Creates a hash table with >= n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n. */
-
+of cells is chosen to be a prime number slightly bigger than n.
+@return own: created table */
+UNIV_INTERN
hash_table_t*
hash_create(
/*========*/
- /* out, own: created table */
- ulint n); /* in: number of array cells */
-/*****************************************************************
+ ulint n); /*!< in: number of array cells */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Creates a mutex array to protect a hash table. */
-
+UNIV_INTERN
void
hash_create_mutexes_func(
/*=====================*/
- hash_table_t* table, /* in: hash table */
+ hash_table_t* table, /*!< in: hash table */
#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /* in: latching order level of the
+ ulint sync_level, /*!< in: latching order level of the
mutexes: used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes); /* in: number of mutexes */
+ ulint n_mutexes); /*!< in: number of mutexes */
#ifdef UNIV_SYNC_DEBUG
# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n)
#else /* UNIV_SYNC_DEBUG */
# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n)
#endif /* UNIV_SYNC_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************
+/*************************************************************//**
Frees a hash table. */
-
+UNIV_INTERN
void
hash_table_free(
/*============*/
- hash_table_t* table); /* in, own: hash table */
-/******************************************************************
-Calculates the hash value from a folded value. */
+ hash_table_t* table); /*!< in, own: hash table */
+/**************************************************************//**
+Calculates the hash value from a folded value.
+@return hashed value */
UNIV_INLINE
ulint
hash_calc_hash(
/*===========*/
- /* out: hashed value */
- ulint fold, /* in: folded value */
- hash_table_t* table); /* in: hash table */
-/************************************************************************
+ ulint fold, /*!< in: folded value */
+ hash_table_t* table); /*!< in: hash table */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
Assert that the mutex for the table in a hash operation is owned. */
-#ifdef UNIV_SYNC_DEBUG
-# define HASH_ASSERT_OWNED(TABLE, FOLD) \
+# define HASH_ASSERT_OWNED(TABLE, FOLD) \
ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
-#else
+#else /* !UNIV_HOTBACKUP */
# define HASH_ASSERT_OWNED(TABLE, FOLD)
-#endif
+#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
+/*******************************************************************//**
Inserts a struct to a hash table. */
#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
@@ -90,18 +111,26 @@ do {\
if (cell3333->node == NULL) {\
cell3333->node = DATA;\
} else {\
- struct3333 = cell3333->node;\
+ struct3333 = (TYPE*) cell3333->node;\
\
while (struct3333->NAME != NULL) {\
\
- struct3333 = struct3333->NAME;\
+ struct3333 = (TYPE*) struct3333->NAME;\
}\
\
struct3333->NAME = DATA;\
}\
} while (0)
-/***********************************************************************
+#ifdef UNIV_HASH_DEBUG
+# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
+# define HASH_INVALIDATE(DATA, NAME) DATA->NAME = (void*) -1
+#else
+# define HASH_ASSERT_VALID(DATA) do {} while (0)
+# define HASH_INVALIDATE(DATA, NAME) do {} while (0)
+#endif
+
+/*******************************************************************//**
Deletes a struct from a hash table. */
#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
@@ -114,67 +143,107 @@ do {\
cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
\
if (cell3333->node == DATA) {\
+ HASH_ASSERT_VALID(DATA->NAME);\
cell3333->node = DATA->NAME;\
} else {\
- struct3333 = cell3333->node;\
+ struct3333 = (TYPE*) cell3333->node;\
\
while (struct3333->NAME != DATA) {\
\
- struct3333 = struct3333->NAME;\
+ struct3333 = (TYPE*) struct3333->NAME;\
ut_a(struct3333);\
}\
\
struct3333->NAME = DATA->NAME;\
}\
+ HASH_INVALIDATE(DATA, NAME);\
} while (0)
-/***********************************************************************
+/*******************************************************************//**
Gets the first struct in a hash chain, NULL if none. */
#define HASH_GET_FIRST(TABLE, HASH_VAL)\
(hash_get_nth_cell(TABLE, HASH_VAL)->node)
-/***********************************************************************
+/*******************************************************************//**
Gets the next struct in a hash chain, NULL if none. */
#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME)
-/************************************************************************
+/********************************************************************//**
Looks for a struct in a hash table. */
-#define HASH_SEARCH(NAME, TABLE, FOLD, DATA, TEST)\
+#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\
{\
\
HASH_ASSERT_OWNED(TABLE, FOLD)\
\
- (DATA) = HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
+ (DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
+ HASH_ASSERT_VALID(DATA);\
\
while ((DATA) != NULL) {\
+ ASSERTION;\
if (TEST) {\
break;\
} else {\
- (DATA) = HASH_GET_NEXT(NAME, DATA);\
+ HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\
+ (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\
}\
}\
}
-/****************************************************************
-Gets the nth cell in a hash table. */
+/********************************************************************//**
+Looks for an item in all hash buckets. */
+#define HASH_SEARCH_ALL(NAME, TABLE, TYPE, DATA, ASSERTION, TEST) \
+do { \
+ ulint i3333; \
+ \
+ for (i3333 = (TABLE)->n_cells; i3333--; ) { \
+ (DATA) = (TYPE) HASH_GET_FIRST(TABLE, i3333); \
+ \
+ while ((DATA) != NULL) { \
+ HASH_ASSERT_VALID(DATA); \
+ ASSERTION; \
+ \
+ if (TEST) { \
+ break; \
+ } \
+ \
+ (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA); \
+ } \
+ \
+ if ((DATA) != NULL) { \
+ break; \
+ } \
+ } \
+} while (0)
+
+/************************************************************//**
+Gets the nth cell in a hash table.
+@return pointer to cell */
UNIV_INLINE
hash_cell_t*
hash_get_nth_cell(
/*==============*/
- /* out: pointer to cell */
- hash_table_t* table, /* in: hash table */
- ulint n); /* in: cell index */
-/*****************************************************************
-Returns the number of cells in a hash table. */
+ hash_table_t* table, /*!< in: hash table */
+ ulint n); /*!< in: cell index */
+
+/*************************************************************//**
+Clears a hash table so that all the cells become empty. */
+UNIV_INLINE
+void
+hash_table_clear(
+/*=============*/
+ hash_table_t* table); /*!< in/out: hash table */
+
+/*************************************************************//**
+Returns the number of cells in a hash table.
+@return number of cells */
UNIV_INLINE
ulint
hash_get_n_cells(
/*=============*/
- /* out: number of cells */
- hash_table_t* table); /* in: table */
-/***********************************************************************
+ hash_table_t* table); /*!< in: table */
+/*******************************************************************//**
Deletes a struct which is stored in the heap of the hash table, and compacts
the heap. The fold value must be stored in the struct NODE in a field named
'fold'. */
@@ -233,8 +302,9 @@ do {\
mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
} while (0)
-/********************************************************************
-Move all hash table entries from OLD_TABLE to NEW_TABLE.*/
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Move all hash table entries from OLD_TABLE to NEW_TABLE. */
#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
do {\
@@ -258,102 +328,111 @@ do {\
}\
} while (0)
-
-/****************************************************************
-Gets the mutex index for a fold value in a hash table. */
+/************************************************************//**
+Gets the mutex index for a fold value in a hash table.
+@return mutex number */
UNIV_INLINE
ulint
hash_get_mutex_no(
/*==============*/
- /* out: mutex number */
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
-Gets the nth heap in a hash table. */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
+Gets the nth heap in a hash table.
+@return mem heap */
UNIV_INLINE
mem_heap_t*
hash_get_nth_heap(
/*==============*/
- /* out: mem heap */
- hash_table_t* table, /* in: hash table */
- ulint i); /* in: index of the heap */
-/****************************************************************
-Gets the heap for a fold value in a hash table. */
+ hash_table_t* table, /*!< in: hash table */
+ ulint i); /*!< in: index of the heap */
+/************************************************************//**
+Gets the heap for a fold value in a hash table.
+@return mem heap */
UNIV_INLINE
mem_heap_t*
hash_get_heap(
/*==========*/
- /* out: mem heap */
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
-Gets the nth mutex in a hash table. */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
+Gets the nth mutex in a hash table.
+@return mutex */
UNIV_INLINE
mutex_t*
hash_get_nth_mutex(
/*===============*/
- /* out: mutex */
- hash_table_t* table, /* in: hash table */
- ulint i); /* in: index of the mutex */
-/****************************************************************
-Gets the mutex for a fold value in a hash table. */
+ hash_table_t* table, /*!< in: hash table */
+ ulint i); /*!< in: index of the mutex */
+/************************************************************//**
+Gets the mutex for a fold value in a hash table.
+@return mutex */
UNIV_INLINE
mutex_t*
hash_get_mutex(
/*===========*/
- /* out: mutex */
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
Reserves the mutex for a fold value in a hash table. */
-
+UNIV_INTERN
void
hash_mutex_enter(
/*=============*/
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
Releases the mutex for a fold value in a hash table. */
-
+UNIV_INTERN
void
hash_mutex_exit(
/*============*/
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold); /*!< in: fold */
+/************************************************************//**
Reserves all the mutexes of a hash table, in an ascending order. */
-
+UNIV_INTERN
void
hash_mutex_enter_all(
/*=================*/
- hash_table_t* table); /* in: hash table */
-/****************************************************************
+ hash_table_t* table); /*!< in: hash table */
+/************************************************************//**
Releases all the mutexes of a hash table. */
-
+UNIV_INTERN
void
hash_mutex_exit_all(
/*================*/
- hash_table_t* table); /* in: hash table */
-
+ hash_table_t* table); /*!< in: hash table */
+#else /* !UNIV_HOTBACKUP */
+# define hash_get_heap(table, fold) ((table)->heap)
+# define hash_mutex_enter(table, fold) ((void) 0)
+# define hash_mutex_exit(table, fold) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
struct hash_cell_struct{
- void* node; /* hash chain node, NULL if none */
+ void* node; /*!< hash chain node, NULL if none */
};
/* The hash table structure */
struct hash_table_struct {
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
ibool adaptive;/* TRUE if this is the hash table of the
adaptive hash index */
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
ulint n_cells;/* number of cells in the hash table */
- hash_cell_t* array; /* pointer to cell array */
+ hash_cell_t* array; /*!< pointer to cell array */
+#ifndef UNIV_HOTBACKUP
ulint n_mutexes;/* if mutexes != NULL, then the number of
mutexes, must be a power of 2 */
mutex_t* mutexes;/* NULL, or an array of mutexes used to
protect segments of the hash table */
- mem_heap_t** heaps; /* if this is non-NULL, hash chain nodes for
+ mem_heap_t** heaps; /*!< if this is non-NULL, hash chain nodes for
external chaining can be allocated from these
memory heaps; there are then n_mutexes many of
these heaps */
+#endif /* !UNIV_HOTBACKUP */
mem_heap_t* heap;
ulint magic_n;
};
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
index d246d8ee831..19da2d50701 100644
--- a/storage/innobase/include/hash0hash.ic
+++ b/storage/innobase/include/hash0hash.ic
@@ -1,91 +1,122 @@
-/******************************************************
-The simple hash table utility
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1997 Innobase Oy
+/**************************************************//**
+@file include/hash0hash.ic
+The simple hash table utility
Created 5/20/1997 Heikki Tuuri
*******************************************************/
#include "ut0rnd.h"
-/****************************************************************
-Gets the nth cell in a hash table. */
+/************************************************************//**
+Gets the nth cell in a hash table.
+@return pointer to cell */
UNIV_INLINE
hash_cell_t*
hash_get_nth_cell(
/*==============*/
- /* out: pointer to cell */
- hash_table_t* table, /* in: hash table */
- ulint n) /* in: cell index */
+ hash_table_t* table, /*!< in: hash table */
+ ulint n) /*!< in: cell index */
{
ut_ad(n < table->n_cells);
return(table->array + n);
}
-/*****************************************************************
-Returns the number of cells in a hash table. */
+/*************************************************************//**
+Clears a hash table so that all the cells become empty. */
+UNIV_INLINE
+void
+hash_table_clear(
+/*=============*/
+ hash_table_t* table) /*!< in/out: hash table */
+{
+ memset(table->array, 0x0,
+ table->n_cells * sizeof(*table->array));
+}
+
+/*************************************************************//**
+Returns the number of cells in a hash table.
+@return number of cells */
UNIV_INLINE
ulint
hash_get_n_cells(
/*=============*/
- /* out: number of cells */
- hash_table_t* table) /* in: table */
+ hash_table_t* table) /*!< in: table */
{
return(table->n_cells);
}
-/******************************************************************
-Calculates the hash value from a folded value. */
+/**************************************************************//**
+Calculates the hash value from a folded value.
+@return hashed value */
UNIV_INLINE
ulint
hash_calc_hash(
/*===========*/
- /* out: hashed value */
- ulint fold, /* in: folded value */
- hash_table_t* table) /* in: hash table */
+ ulint fold, /*!< in: folded value */
+ hash_table_t* table) /*!< in: hash table */
{
return(ut_hash_ulint(fold, table->n_cells));
}
-/****************************************************************
-Gets the mutex index for a fold value in a hash table. */
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Gets the mutex index for a fold value in a hash table.
+@return mutex number */
UNIV_INLINE
ulint
hash_get_mutex_no(
/*==============*/
- /* out: mutex number */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold */
{
+ ut_ad(ut_is_2pow(table->n_mutexes));
return(ut_2pow_remainder(hash_calc_hash(fold, table),
table->n_mutexes));
}
-/****************************************************************
-Gets the nth heap in a hash table. */
+/************************************************************//**
+Gets the nth heap in a hash table.
+@return mem heap */
UNIV_INLINE
mem_heap_t*
hash_get_nth_heap(
/*==============*/
- /* out: mem heap */
- hash_table_t* table, /* in: hash table */
- ulint i) /* in: index of the heap */
+ hash_table_t* table, /*!< in: hash table */
+ ulint i) /*!< in: index of the heap */
{
ut_ad(i < table->n_mutexes);
return(table->heaps[i]);
}
-/****************************************************************
-Gets the heap for a fold value in a hash table. */
+/************************************************************//**
+Gets the heap for a fold value in a hash table.
+@return mem heap */
UNIV_INLINE
mem_heap_t*
hash_get_heap(
/*==========*/
- /* out: mem heap */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold */
{
ulint i;
@@ -98,30 +129,30 @@ hash_get_heap(
return(hash_get_nth_heap(table, i));
}
-/****************************************************************
-Gets the nth mutex in a hash table. */
+/************************************************************//**
+Gets the nth mutex in a hash table.
+@return mutex */
UNIV_INLINE
mutex_t*
hash_get_nth_mutex(
/*===============*/
- /* out: mutex */
- hash_table_t* table, /* in: hash table */
- ulint i) /* in: index of the mutex */
+ hash_table_t* table, /*!< in: hash table */
+ ulint i) /*!< in: index of the mutex */
{
ut_ad(i < table->n_mutexes);
return(table->mutexes + i);
}
-/****************************************************************
-Gets the mutex for a fold value in a hash table. */
+/************************************************************//**
+Gets the mutex for a fold value in a hash table.
+@return mutex */
UNIV_INLINE
mutex_t*
hash_get_mutex(
/*===========*/
- /* out: mutex */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
+ hash_table_t* table, /*!< in: hash table */
+ ulint fold) /*!< in: fold */
{
ulint i;
@@ -129,3 +160,4 @@ hash_get_mutex(
return(hash_get_nth_mutex(table, i));
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index 77fefe2020b..8aa21fb9d95 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -1,7 +1,24 @@
-/******************************************************
-Insert buffer
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1997 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0ibuf.h
+Insert buffer
Created 7/19/1997 Heikki Tuuri
*******************************************************/
@@ -11,297 +28,354 @@ Created 7/19/1997 Heikki Tuuri
#include "univ.i"
-#include "dict0mem.h"
-#include "dict0dict.h"
#include "mtr0mtr.h"
-#include "que0types.h"
-#include "ibuf0types.h"
+#include "dict0mem.h"
#include "fsp0fsp.h"
-extern ibuf_t* ibuf;
-
-/**********************************************************************
-Creates the insert buffer data struct for a single tablespace. Reads the
-root page of the insert buffer tree in the tablespace. This function can
-be called only after the dictionary system has been initialized, as this
-creates also the insert buffer table and index for this tablespace. */
-
-ibuf_data_t*
-ibuf_data_init_for_space(
-/*=====================*/
- /* out, own: ibuf data struct, linked to the list
- in ibuf control structure. */
- ulint space); /* in: space id */
-/**********************************************************************
+#ifndef UNIV_HOTBACKUP
+# include "ibuf0types.h"
+
+/** Combinations of operations that can be buffered. Because the enum
+values are used for indexing innobase_change_buffering_values[], they
+should start at 0 and there should not be any gaps. */
+typedef enum {
+ IBUF_USE_NONE = 0,
+ IBUF_USE_INSERT, /* insert */
+
+ IBUF_USE_COUNT /* number of entries in ibuf_use_t */
+} ibuf_use_t;
+
+/** Operations that can currently be buffered. */
+extern ibuf_use_t ibuf_use;
+
+/** The insert buffer control structure */
+extern ibuf_t* ibuf;
+
+/* The purpose of the insert buffer is to reduce random disk access.
+When we wish to insert a record into a non-unique secondary index and
+the B-tree leaf page where the record belongs to is not in the buffer
+pool, we insert the record into the insert buffer B-tree, indexed by
+(space_id, page_no). When the page is eventually read into the buffer
+pool, we look up the insert buffer B-tree for any modifications to the
+page, and apply these upon the completion of the read operation. This
+is called the insert buffer merge. */
+
+/* The insert buffer merge must always succeed. To guarantee this,
+the insert buffer subsystem keeps track of the free space in pages for
+which it can buffer operations. Two bits per page in the insert
+buffer bitmap indicate the available space in coarse increments. The
+free bits in the insert buffer bitmap must never exceed the free space
+on a page. It is safe to decrement or reset the bits in the bitmap in
+a mini-transaction that is committed before the mini-transaction that
+affects the free space. It is unsafe to increment the bits in a
+separately committed mini-transaction, because in crash recovery, the
+free bits could momentarily be set too high. */
+
+/******************************************************************//**
Creates the insert buffer data structure at a database startup and
initializes the data structures for the insert buffer of each tablespace. */
-
+UNIV_INTERN
void
ibuf_init_at_db_start(void);
/*=======================*/
-/*************************************************************************
+/*********************************************************************//**
Reads the biggest tablespace id from the high end of the insert buffer
tree and updates the counter in fil_system. */
-
+UNIV_INTERN
void
ibuf_update_max_tablespace_id(void);
/*===============================*/
-/*************************************************************************
+/*********************************************************************//**
Initializes an ibuf bitmap page. */
-
+UNIV_INTERN
void
ibuf_bitmap_page_init(
/*==================*/
- page_t* page, /* in: bitmap page */
- mtr_t* mtr); /* in: mtr */
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to only ibuf bitmap operations, which would result if the latch to the
-bitmap page were kept. */
-
-void
-ibuf_reset_free_bits_with_type(
-/*===========================*/
- ulint type, /* in: index type */
- page_t* page); /* in: index page; free bits are set to 0 if the index
- is non-clustered and non-unique and the page level is
- 0 */
-/****************************************************************************
+ buf_block_t* block, /*!< in: bitmap page */
+ mtr_t* mtr); /*!< in: mtr */
+/************************************************************************//**
Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to solely ibuf bitmap operations, which would result if the latch to
-the bitmap page were kept. */
-
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to decrement or reset the bits in the bitmap in a mini-transaction
+that is committed before the mini-transaction that affects the free
+space. */
+UNIV_INTERN
void
ibuf_reset_free_bits(
/*=================*/
- dict_index_t* index, /* in: index */
- page_t* page); /* in: index page; free bits are set to 0 if
- the index is non-clustered and non-unique and
- the page level is 0 */
-/****************************************************************************
-Updates the free bits of the page in the ibuf bitmap if there is not enough
-free on the page any more. This is done in a separate mini-transaction, hence
-this operation does not restrict further work to only ibuf bitmap operations,
-which would result if the latch to the bitmap page were kept. */
+ buf_block_t* block); /*!< in: index page; free bits are set to 0
+ if the index is a non-clustered
+ non-unique, and page level is 0 */
+/************************************************************************//**
+Updates the free bits of an uncompressed page in the ibuf bitmap if
+there is not enough free on the page any more. This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is
+unsafe to increment the bits in a separately committed
+mini-transaction, because in crash recovery, the free bits could
+momentarily be set too high. It is only safe to use this function for
+decrementing the free bits. Should more free space become available,
+we must not update the free bits here, because that would break crash
+recovery. */
UNIV_INLINE
void
ibuf_update_free_bits_if_full(
/*==========================*/
- dict_index_t* index, /* in: index */
- page_t* page, /* in: index page to which we have added new
+ buf_block_t* block, /*!< in: index page to which we have added new
records; the free bits are updated if the
index is non-clustered and non-unique and
the page level is 0, and the page becomes
fuller */
- ulint max_ins_size,/* in: value of maximum insert size with
+ ulint max_ins_size,/*!< in: value of maximum insert size with
reorganize before the latest operation
performed to the page */
- ulint increase);/* in: upper limit for the additional space
+ ulint increase);/*!< in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
-/**************************************************************************
-Updates the free bits for the page to reflect the present state. Does this
-in the mtr given, which means that the latching order rules virtually
-prevent any further operations for this OS thread until mtr is committed. */
-
+/**********************************************************************//**
+Updates the free bits for an uncompressed page to reflect the present
+state. Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
void
ibuf_update_free_bits_low(
/*======================*/
- dict_index_t* index, /* in: index */
- page_t* page, /* in: index page */
- ulint max_ins_size, /* in: value of maximum insert size
- with reorganize before the latest
- operation performed to the page */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Updates the free bits for the two pages to reflect the present state. Does
-this in the mtr given, which means that the latching order rules virtually
-prevent any further operations until mtr is committed. */
-
+ const buf_block_t* block, /*!< in: index page */
+ ulint max_ins_size, /*!< in: value of
+ maximum insert size
+ with reorganize before
+ the latest operation
+ performed to the page */
+ mtr_t* mtr); /*!< in/out: mtr */
+/**********************************************************************//**
+Updates the free bits for a compressed page to reflect the present
+state. Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_zip(
+/*======================*/
+ buf_block_t* block, /*!< in/out: index page */
+ mtr_t* mtr); /*!< in/out: mtr */
+/**********************************************************************//**
+Updates the free bits for the two pages to reflect the present state.
+Does this in the mtr given, which means that the latching order rules
+virtually prevent any further operations until mtr is committed.
+NOTE: The free bits in the insert buffer bitmap must never exceed the
+free space on a page. It is safe to set the free bits in the same
+mini-transaction that updated the pages. */
+UNIV_INTERN
void
ibuf_update_free_bits_for_two_pages_low(
/*====================================*/
- dict_index_t* index, /* in: index */
- page_t* page1, /* in: index page */
- page_t* page2, /* in: index page */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ buf_block_t* block1, /*!< in: index page */
+ buf_block_t* block2, /*!< in: index page */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
A basic partial test if an insert to the insert buffer could be possible and
recommended. */
UNIV_INLINE
ibool
ibuf_should_try(
/*============*/
- dict_index_t* index, /* in: index where to insert */
- ulint ignore_sec_unique); /* in: if != 0, we should
+ dict_index_t* index, /*!< in: index where to insert */
+ ulint ignore_sec_unique); /*!< in: if != 0, we should
ignore UNIQUE constraint on
a secondary index when we
decide */
-/**********************************************************************
+/******************************************************************//**
Returns TRUE if the current OS thread is performing an insert buffer
-routine. */
+routine.
+For instance, a read-ahead of non-ibuf pages is forbidden by threads
+that are executing an insert buffer routine.
+@return TRUE if inside an insert buffer routine */
+UNIV_INTERN
ibool
ibuf_inside(void);
/*=============*/
- /* out: TRUE if inside an insert buffer routine: for instance,
- a read-ahead of non-ibuf pages is then forbidden */
-/***************************************************************************
-Checks if a page address is an ibuf bitmap page (level 3 page) address. */
+/***********************************************************************//**
+Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@return TRUE if a bitmap page */
UNIV_INLINE
ibool
ibuf_bitmap_page(
/*=============*/
- /* out: TRUE if a bitmap page */
- ulint page_no);/* in: page number */
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint page_no);/*!< in: page number */
+/***********************************************************************//**
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==TRUE.
+@return TRUE if level 2 or level 3 page */
+UNIV_INTERN
ibool
ibuf_page(
/*======*/
- /* out: TRUE if level 2 or level 3 page */
- ulint space, /* in: space id */
- ulint page_no);/* in: page number */
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
-ibool
-ibuf_page_low(
-/*==========*/
- /* out: TRUE if level 2 or level 3 page */
- ulint space, /* in: space id */
- ulint page_no,/* in: page number */
- mtr_t* mtr); /* in: mtr which will contain an x-latch to the
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number */
+ mtr_t* mtr); /*!< in: mtr which will contain an x-latch to the
bitmap page if the page is not one of the fixed
- address ibuf pages */
-/***************************************************************************
+ address ibuf pages, or NULL, in which case a new
+ transaction is created. */
+/***********************************************************************//**
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
-
+UNIV_INTERN
void
-ibuf_free_excess_pages(
-/*===================*/
- ulint space); /* in: space id */
-/*************************************************************************
+ibuf_free_excess_pages(void);
+/*========================*/
+/*********************************************************************//**
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. Does not do insert if the index is clustered
-or unique. */
-
+or unique.
+@return TRUE if success */
+UNIV_INTERN
ibool
ibuf_insert(
/*========*/
- /* out: TRUE if success */
- dtuple_t* entry, /* in: index entry to insert */
- dict_index_t* index, /* in: index where to insert */
- ulint space, /* in: space id where to insert */
- ulint page_no,/* in: page number where to insert */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
+ const dtuple_t* entry, /*!< in: index entry to insert */
+ dict_index_t* index, /*!< in: index where to insert */
+ ulint space, /*!< in: space id where to insert */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number where to insert */
+ que_thr_t* thr); /*!< in: query thread */
+/*********************************************************************//**
When an index page is read from a disk to the buffer pool, this function
inserts to the page the possible index entries buffered in the insert buffer.
The entries are deleted from the insert buffer. If the page is not read, but
created in the buffer pool, this function deletes its buffered entries from
the insert buffer; there can exist entries for such a page if the page
belonged to an index which subsequently was dropped. */
-
+UNIV_INTERN
void
ibuf_merge_or_delete_for_page(
/*==========================*/
- page_t* page, /* in: if page has been read from disk, pointer to
- the page x-latched, else NULL */
- ulint space, /* in: space id of the index page */
- ulint page_no,/* in: page number of the index page */
- ibool update_ibuf_bitmap);/* in: normally this is set to TRUE, but if
- we have deleted or are deleting the tablespace, then we
- naturally do not want to update a non-existent bitmap
- page */
-/*************************************************************************
+ buf_block_t* block, /*!< in: if page has been read from
+ disk, pointer to the page x-latched,
+ else NULL */
+ ulint space, /*!< in: space id of the index page */
+ ulint page_no,/*!< in: page number of the index page */
+ ulint zip_size,/*!< in: compressed page size in bytes,
+ or 0 */
+ ibool update_ibuf_bitmap);/*!< in: normally this is set
+ to TRUE, but if we have deleted or are
+ deleting the tablespace, then we
+ naturally do not want to update a
+ non-existent bitmap page */
+/*********************************************************************//**
Deletes all entries in the insert buffer for a given space id. This is used
in DISCARD TABLESPACE and IMPORT TABLESPACE.
NOTE: this does not update the page free bitmaps in the space. The space will
become CORRUPT when you call this function! */
-
+UNIV_INTERN
void
ibuf_delete_for_discarded_space(
/*============================*/
- ulint space); /* in: space id */
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
+ ulint space); /*!< in: space id */
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
ulint
ibuf_contract(
/*==========*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ibool sync); /* in: TRUE if the caller wants to wait for the
+ ibool sync); /*!< in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
ulint
ibuf_contract_for_n_pages(
/*======================*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ibool sync, /* in: TRUE if the caller wants to wait for the
+ ibool sync, /*!< in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
- ulint n_pages);/* in: try to read at least this many pages to
+ ulint n_pages);/*!< in: try to read at least this many pages to
the buffer pool and merge the ibuf contents to
them */
-/*************************************************************************
-Parses a redo log record of an ibuf bitmap page init. */
-
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Parses a redo log record of an ibuf bitmap page init.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
ibuf_parse_bitmap_init(
/*===================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-#ifdef UNIV_IBUF_DEBUG
-/**********************************************************************
-Gets the ibuf count for a given page. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: block or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/******************************************************************//**
+Gets the ibuf count for a given page.
+@return number of entries in the insert buffer currently buffered for
+this page */
+UNIV_INTERN
ulint
ibuf_count_get(
/*===========*/
- /* out: number of entries in the insert buffer
- currently buffered for this page */
- ulint space, /* in: space id */
- ulint page_no);/* in: page number */
+ ulint space, /*!< in: space id */
+ ulint page_no);/*!< in: page number */
#endif
-/**********************************************************************
-Looks if the insert buffer is empty. */
-
+/******************************************************************//**
+Looks if the insert buffer is empty.
+@return TRUE if empty */
+UNIV_INTERN
ibool
ibuf_is_empty(void);
/*===============*/
- /* out: TRUE if empty */
-/**********************************************************************
+/******************************************************************//**
Prints info of ibuf. */
-
+UNIV_INTERN
void
ibuf_print(
/*=======*/
- FILE* file); /* in: file where to print */
+ FILE* file); /*!< in: file where to print */
+/******************************************************************//**
+Closes insert buffer and frees the data structures. */
+UNIV_INTERN
+void
+ibuf_close(void);
+/*============*/
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
+#endif /* !UNIV_HOTBACKUP */
+
/* The ibuf header page currently contains only the file segment header
for the file segment from which the pages for the ibuf tree are allocated */
#define IBUF_HEADER PAGE_DATA
#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
+/* The insert buffer tree itself is always located in space 0. */
+#define IBUF_SPACE_ID 0
+
#ifndef UNIV_NONINL
#include "ibuf0ibuf.ic"
#endif
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index 4d65a7f5250..15bbe61ab30 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -1,90 +1,109 @@
-/******************************************************
-Insert buffer
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1997 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0ibuf.ic
+Insert buffer
Created 7/19/1997 Heikki Tuuri
*******************************************************/
-#include "buf0lru.h"
#include "page0page.h"
+#include "page0zip.h"
+#ifndef UNIV_HOTBACKUP
+#include "buf0lru.h"
+/** Counter for ibuf_should_try() */
extern ulint ibuf_flush_count;
-/* If this number is n, an index page must contain at least the page size
-per n bytes of free space for ibuf to try to buffer inserts to this page.
-If there is this much of free space, the corresponding bits are set in the
-ibuf bitmap. */
+/** An index page must contain at least UNIV_PAGE_SIZE /
+IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
+buffer inserts to this page. If there is this much of free space, the
+corresponding bits are set in the ibuf bitmap. */
#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
-/* Insert buffer data struct for a single tablespace */
-struct ibuf_data_struct{
- ulint space; /* space id */
- ulint seg_size;/* allocated pages if the file segment
- containing ibuf header and tree */
- ulint size; /* size of the insert buffer tree in pages */
- ibool empty; /* after an insert to the ibuf tree is
- performed, this is set to FALSE, and if a
- contract operation finds the tree empty, this
- is set to TRUE */
- ulint free_list_len;
- /* length of the free list */
- ulint height; /* tree height */
- dict_index_t* index; /* insert buffer index */
- UT_LIST_NODE_T(ibuf_data_t) data_list;
- /* list of ibuf data structs */
- ulint n_inserts;/* number of inserts made to the insert
- buffer */
- ulint n_merges;/* number of pages merged */
- ulint n_merged_recs;/* number of records merged */
-};
-
+/** Insert buffer struct */
struct ibuf_struct{
- ulint size; /* current size of the ibuf index
- trees in pages */
- ulint max_size; /* recommended maximum size in pages
- for the ibuf index tree */
- UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
- /* list of ibuf data structs for
- each tablespace */
+ ulint size; /*!< current size of the ibuf index
+ tree, in pages */
+ ulint max_size; /*!< recommended maximum size of the
+ ibuf index tree, in pages */
+ ulint seg_size; /*!< allocated pages of the file
+ segment containing ibuf header and
+ tree */
+ ibool empty; /*!< after an insert to the ibuf tree
+ is performed, this is set to FALSE,
+ and if a contract operation finds
+ the tree empty, this is set to
+ TRUE */
+ ulint free_list_len; /*!< length of the free list */
+ ulint height; /*!< tree height */
+ dict_index_t* index; /*!< insert buffer index */
+
+ ulint n_inserts; /*!< number of inserts made to
+ the insert buffer */
+ ulint n_merges; /*!< number of pages merged */
+ ulint n_merged_recs; /*!< number of records merged */
};
-/****************************************************************************
+/************************************************************************//**
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
-
+UNIV_INTERN
void
-ibuf_set_free_bits(
-/*===============*/
- ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is reset if the index is
- a non-clustered non-unique, and page level is 0 */
- ulint val, /* in: value to set: < 4 */
- ulint max_val);/* in: ULINT_UNDEFINED or a maximum value which
- the bits must have before setting; this is for
- debugging */
-
-/**************************************************************************
+ibuf_set_free_bits_func(
+/*====================*/
+ buf_block_t* block, /*!< in: index page of a non-clustered index;
+ free bit is reset if page level is 0 */
+#ifdef UNIV_IBUF_DEBUG
+ ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
+ value which the bits must have before
+ setting; this is for debugging */
+#endif /* UNIV_IBUF_DEBUG */
+ ulint val); /*!< in: value to set: < 4 */
+#ifdef UNIV_IBUF_DEBUG
+# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
+#else /* UNIV_IBUF_DEBUG */
+# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
+#endif /* UNIV_IBUF_DEBUG */
+
+/**********************************************************************//**
A basic partial test if an insert to the insert buffer could be possible and
recommended. */
UNIV_INLINE
ibool
ibuf_should_try(
/*============*/
- dict_index_t* index, /* in: index where to insert */
- ulint ignore_sec_unique) /* in: if != 0, we should
+ dict_index_t* index, /*!< in: index where to insert */
+ ulint ignore_sec_unique) /*!< in: if != 0, we should
ignore UNIQUE constraint on
a secondary index when we
decide */
{
- if (!(index->type & DICT_CLUSTERED)
- && (ignore_sec_unique || !(index->type & DICT_UNIQUE))) {
+ if (ibuf_use != IBUF_USE_NONE
+ && !dict_index_is_clust(index)
+ && (ignore_sec_unique || !dict_index_is_unique(index))) {
ibuf_flush_count++;
- if (ibuf_flush_count % 8 == 0) {
+ if (ibuf_flush_count % 4 == 0) {
buf_LRU_try_free_flushed_blocks();
}
@@ -95,36 +114,52 @@ ibuf_should_try(
return(FALSE);
}
-/***************************************************************************
-Checks if a page address is an ibuf bitmap page address. */
+/***********************************************************************//**
+Checks if a page address is an ibuf bitmap page address.
+@return TRUE if a bitmap page */
UNIV_INLINE
ibool
ibuf_bitmap_page(
/*=============*/
- /* out: TRUE if a bitmap page */
- ulint page_no)/* in: page number */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint page_no)/*!< in: page number */
{
- if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_IBUF_BITMAP_OFFSET) {
+ ut_ad(ut_is_2pow(zip_size));
- return(TRUE);
+ if (!zip_size) {
+ return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
+ == FSP_IBUF_BITMAP_OFFSET));
}
- return(FALSE);
+ return(UNIV_UNLIKELY((page_no & (zip_size - 1))
+ == FSP_IBUF_BITMAP_OFFSET));
}
-/*************************************************************************
-Translates the free space on a page to a value in the ibuf bitmap.*/
+/*********************************************************************//**
+Translates the free space on a page to a value in the ibuf bitmap.
+@return value for ibuf bitmap bits */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_bits(
/*===========================*/
- /* out: value for ibuf bitmap bits */
- ulint max_ins_size) /* in: maximum insert size after reorganize
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint max_ins_size) /*!< in: maximum insert size after reorganize
for the page */
{
ulint n;
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ ut_ad(zip_size <= UNIV_PAGE_SIZE);
- n = max_ins_size / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ if (zip_size) {
+ n = max_ins_size
+ / (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ } else {
+ n = max_ins_size
+ / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ }
if (n == 3) {
n = 2;
@@ -137,76 +172,143 @@ ibuf_index_page_calc_free_bits(
return(n);
}
-/*************************************************************************
-Translates the ibuf free bits to the free space on a page in bytes. */
+/*********************************************************************//**
+Translates the ibuf free bits to the free space on a page in bytes.
+@return maximum insert size after reorganize for the page */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_from_bits(
/*================================*/
- /* out: maximum insert size after reorganize for the
- page */
- ulint bits) /* in: value for ibuf bitmap bits */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint bits) /*!< in: value for ibuf bitmap bits */
{
ut_ad(bits < 4);
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ ut_ad(zip_size <= UNIV_PAGE_SIZE);
+
+ if (zip_size) {
+ if (bits == 3) {
+ return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ }
+
+ return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ }
if (bits == 3) {
return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
}
- return(bits * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+ return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
+}
+
+/*********************************************************************//**
+Translates the free space on a compressed page to a value in the ibuf bitmap.
+@return value for ibuf bitmap bits */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_zip(
+/*==========================*/
+ ulint zip_size,
+ /*!< in: compressed page size in bytes */
+ const buf_block_t* block) /*!< in: buffer block */
+{
+ ulint max_ins_size;
+ const page_zip_des_t* page_zip;
+ lint zip_max_ins;
+
+ ut_ad(zip_size == buf_block_get_zip_size(block));
+ ut_ad(zip_size);
+
+ max_ins_size = page_get_max_insert_size_after_reorganize(
+ buf_block_get_frame(block), 1);
+
+ page_zip = buf_block_get_page_zip(block);
+ zip_max_ins = page_zip_max_ins_size(page_zip,
+ FALSE/* not clustered */);
+
+ if (UNIV_UNLIKELY(zip_max_ins < 0)) {
+ return(0);
+ } else if (UNIV_LIKELY(max_ins_size > (ulint) zip_max_ins)) {
+ max_ins_size = (ulint) zip_max_ins;
+ }
+
+ return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
}
-/*************************************************************************
-Translates the free space on a page to a value in the ibuf bitmap.*/
+/*********************************************************************//**
+Translates the free space on a page to a value in the ibuf bitmap.
+@return value for ibuf bitmap bits */
UNIV_INLINE
ulint
ibuf_index_page_calc_free(
/*======================*/
- /* out: value for ibuf bitmap bits */
- page_t* page) /* in: non-unique secondary index page */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ const buf_block_t* block) /*!< in: buffer block */
{
- return(ibuf_index_page_calc_free_bits(
- page_get_max_insert_size_after_reorganize(page, 1)));
+ ut_ad(zip_size == buf_block_get_zip_size(block));
+
+ if (!zip_size) {
+ ulint max_ins_size;
+
+ max_ins_size = page_get_max_insert_size_after_reorganize(
+ buf_block_get_frame(block), 1);
+
+ return(ibuf_index_page_calc_free_bits(0, max_ins_size));
+ } else {
+ return(ibuf_index_page_calc_free_zip(zip_size, block));
+ }
}
-/****************************************************************************
-Updates the free bits of the page in the ibuf bitmap if there is not enough
-free on the page any more. This is done in a separate mini-transaction, hence
-this operation does not restrict further work to only ibuf bitmap operations,
-which would result if the latch to the bitmap page were kept. */
+/************************************************************************//**
+Updates the free bits of an uncompressed page in the ibuf bitmap if
+there is not enough free on the page any more. This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is
+unsafe to increment the bits in a separately committed
+mini-transaction, because in crash recovery, the free bits could
+momentarily be set too high. It is only safe to use this function for
+decrementing the free bits. Should more free space become available,
+we must not update the free bits here, because that would break crash
+recovery. */
UNIV_INLINE
void
ibuf_update_free_bits_if_full(
/*==========================*/
- dict_index_t* index, /* in: index */
- page_t* page, /* in: index page to which we have added new
+ buf_block_t* block, /*!< in: index page to which we have added new
records; the free bits are updated if the
index is non-clustered and non-unique and
the page level is 0, and the page becomes
fuller */
- ulint max_ins_size,/* in: value of maximum insert size with
+ ulint max_ins_size,/*!< in: value of maximum insert size with
reorganize before the latest operation
performed to the page */
- ulint increase)/* in: upper limit for the additional space
+ ulint increase)/*!< in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
{
ulint before;
ulint after;
- before = ibuf_index_page_calc_free_bits(max_ins_size);
+ ut_ad(!buf_block_get_page_zip(block));
+
+ before = ibuf_index_page_calc_free_bits(0, max_ins_size);
if (max_ins_size >= increase) {
#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
#endif
- after = ibuf_index_page_calc_free_bits(max_ins_size
+ after = ibuf_index_page_calc_free_bits(0, max_ins_size
- increase);
#ifdef UNIV_IBUF_DEBUG
- ut_a(after <= ibuf_index_page_calc_free(page));
+ ut_a(after <= ibuf_index_page_calc_free(0, block));
#endif
} else {
- after = ibuf_index_page_calc_free(page);
+ after = ibuf_index_page_calc_free(0, block);
}
if (after == 0) {
@@ -215,10 +317,11 @@ ibuf_update_free_bits_if_full(
cannot make inserts using the insert buffer from slipping
out of the buffer pool */
- buf_page_make_young(page);
+ buf_page_make_young(&block->page);
}
if (before > after) {
- ibuf_set_free_bits(index->type, page, after, before);
+ ibuf_set_free_bits(block, after, before);
}
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ibuf0types.h b/storage/innobase/include/ibuf0types.h
index fb202ac44b0..55944f879b2 100644
--- a/storage/innobase/include/ibuf0types.h
+++ b/storage/innobase/include/ibuf0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Insert buffer global types
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0types.h
+Insert buffer global types
Created 7/29/1997 Heikki Tuuri
*******************************************************/
@@ -9,7 +26,6 @@ Created 7/29/1997 Heikki Tuuri
#ifndef ibuf0types_h
#define ibuf0types_h
-typedef struct ibuf_data_struct ibuf_data_t;
typedef struct ibuf_struct ibuf_t;
#endif
diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h
index d063a360c1f..25a57c9740c 100644
--- a/storage/innobase/include/lock0iter.h
+++ b/storage/innobase/include/lock0iter.h
@@ -1,7 +1,24 @@
-/******************************************************
-Lock queue iterator type and function prototypes.
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 2007 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0iter.h
+Lock queue iterator type and function prototypes.
Created July 16, 2007 Vasil Dimov
*******************************************************/
@@ -13,14 +30,14 @@ Created July 16, 2007 Vasil Dimov
#include "lock0types.h"
typedef struct lock_queue_iterator_struct {
- lock_t* current_lock;
+ const lock_t* current_lock;
/* In case this is a record lock queue (not table lock queue)
then bit_no is the record number within the heap in which the
record is stored. */
- ulint bit_no;
+ ulint bit_no;
} lock_queue_iterator_t;
-/***********************************************************************
+/*******************************************************************//**
Initialize lock queue iterator so that it starts to iterate from
"lock". bit_no specifies the record number within the heap where the
record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
@@ -29,24 +46,24 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
bit_no is calculated in this function by using
lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
of a wait lock. */
-
+UNIV_INTERN
void
lock_queue_iterator_reset(
/*======================*/
- lock_queue_iterator_t* iter, /* out: iterator */
- lock_t* lock, /* in: lock to start from */
- ulint bit_no);/* in: record number in the
+ lock_queue_iterator_t* iter, /*!< out: iterator */
+ const lock_t* lock, /*!< in: lock to start from */
+ ulint bit_no);/*!< in: record number in the
heap */
-/***********************************************************************
+/*******************************************************************//**
Gets the previous lock in the lock queue, returns NULL if there are no
more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned). */
+receded (if not-NULL is returned).
+@return previous lock or NULL */
-lock_t*
+const lock_t*
lock_queue_iterator_get_prev(
/*=========================*/
- /* out: previous lock or NULL */
- lock_queue_iterator_t* iter); /* in/out: iterator */
+ lock_queue_iterator_t* iter); /*!< in/out: iterator */
#endif /* lock0iter_h */
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 635724bf5a1..82e4c9bd976 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction lock system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0lock.h
+The transaction lock system
Created 5/7/1996 Heikki Tuuri
*******************************************************/
@@ -10,14 +27,16 @@ Created 5/7/1996 Heikki Tuuri
#define lock0lock_h
#include "univ.i"
+#include "buf0types.h"
#include "trx0types.h"
+#include "mtr0types.h"
#include "rem0types.h"
#include "dict0types.h"
#include "que0types.h"
-#include "page0types.h"
#include "lock0types.h"
#include "read0types.h"
#include "hash0hash.h"
+#include "ut0vec.h"
#ifdef UNIV_DEBUG
extern ibool lock_print_waits;
@@ -25,321 +44,356 @@ extern ibool lock_print_waits;
/* Buffer for storing information about the most recent deadlock error */
extern FILE* lock_latest_err_file;
-/*************************************************************************
-Gets the size of a lock struct. */
-
+/*********************************************************************//**
+Gets the size of a lock struct.
+@return size in bytes */
+UNIV_INTERN
ulint
lock_get_size(void);
/*===============*/
- /* out: size in bytes */
-/*************************************************************************
+/*********************************************************************//**
Creates the lock system at database start. */
-
+UNIV_INTERN
void
lock_sys_create(
/*============*/
- ulint n_cells); /* in: number of slots in lock hash table */
-/*************************************************************************
-Checks if some transaction has an implicit x-lock on a record in a secondary
-index. */
-
-trx_t*
-lock_sec_rec_some_has_impl_off_kernel(
-/*==================================*/
- /* out: transaction which has the x-lock, or
- NULL */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: secondary index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
+ ulint n_cells); /*!< in: number of slots in lock hash table */
+/*********************************************************************//**
+Closes the lock system at database shutdown. */
+UNIV_INTERN
+void
+lock_sys_close(void);
+/*================*/
+/*********************************************************************//**
Checks if some transaction has an implicit x-lock on a record in a clustered
-index. */
+index.
+@return transaction which has the x-lock, or NULL */
UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
- /* out: transaction which has the x-lock, or
- NULL */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*****************************************************************
-Makes a record to inherit the locks of another record as gap type
-locks, but does not reset the lock bits of the other record. Also
-waiting lock requests on rec are inherited as GRANTED gap locks. */
-
-void
-lock_rec_inherit_to_gap(
-/*====================*/
- rec_t* heir, /* in: record which inherits */
- rec_t* rec); /* in: record from which inherited; does NOT reset
- the locks on this record */
-/*****************************************************************
+ const rec_t* rec, /*!< in: user record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+/*********************************************************************//**
+Gets the heap_no of the smallest user record on a page.
+@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+UNIV_INLINE
+ulint
+lock_get_min_heap_no(
+/*=================*/
+ const buf_block_t* block); /*!< in: buffer block */
+/*************************************************************//**
Updates the lock table when we have reorganized a page. NOTE: we copy
also the locks set on the infimum of the page; the infimum may carry
locks if an update of a record is occurring on the page, and its locks
were temporarily stored on the infimum. */
-
+UNIV_INTERN
void
lock_move_reorganize_page(
/*======================*/
- page_t* page, /* in: old index page */
- page_t* new_page); /* in: reorganized page */
-/*****************************************************************
+ const buf_block_t* block, /*!< in: old index page, now
+ reorganized */
+ const buf_block_t* oblock);/*!< in: copy of the old, not
+ reorganized page */
+/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list end is moved to another page. */
-
+UNIV_INTERN
void
lock_move_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page to move to */
- page_t* page, /* in: index page */
- rec_t* rec); /* in: record on page: this is the
- first record moved */
-/*****************************************************************
+ const buf_block_t* new_block, /*!< in: index page to move to */
+ const buf_block_t* block, /*!< in: index page */
+ const rec_t* rec); /*!< in: record on page: this
+ is the first record moved */
+/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list start is moved to another page. */
-
+UNIV_INTERN
void
lock_move_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page to move to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page: this is the
- first record NOT copied */
- rec_t* old_end); /* in: old previous-to-last record on
- new_page before the records were copied */
-/*****************************************************************
+ const buf_block_t* new_block, /*!< in: index page to move to */
+ const buf_block_t* block, /*!< in: index page */
+ const rec_t* rec, /*!< in: record on page:
+ this is the first
+ record NOT copied */
+ const rec_t* old_end); /*!< in: old
+ previous-to-last
+ record on new_page
+ before the records
+ were copied */
+/*************************************************************//**
Updates the lock table when a page is split to the right. */
-
+UNIV_INTERN
void
lock_update_split_right(
/*====================*/
- page_t* right_page, /* in: right page */
- page_t* left_page); /* in: left page */
-/*****************************************************************
+ const buf_block_t* right_block, /*!< in: right page */
+ const buf_block_t* left_block); /*!< in: left page */
+/*************************************************************//**
Updates the lock table when a page is merged to the right. */
-
+UNIV_INTERN
void
lock_update_merge_right(
/*====================*/
- rec_t* orig_succ, /* in: original successor of infimum
- on the right page before merge */
- page_t* left_page); /* in: merged index page which will be
- discarded */
-/*****************************************************************
+ const buf_block_t* right_block, /*!< in: right page to
+ which merged */
+ const rec_t* orig_succ, /*!< in: original
+ successor of infimum
+ on the right page
+ before merge */
+ const buf_block_t* left_block); /*!< in: merged index
+ page which will be
+ discarded */
+/*************************************************************//**
Updates the lock table when the root page is copied to another in
btr_root_raise_and_insert. Note that we leave lock structs on the
root page, even though they do not make sense on other than leaf
pages: the reason is that in a pessimistic update the infimum record
of the root page will act as a dummy carrier of the locks of the record
to be updated. */
-
+UNIV_INTERN
void
lock_update_root_raise(
/*===================*/
- page_t* new_page, /* in: index page to which copied */
- page_t* root); /* in: root page */
-/*****************************************************************
+ const buf_block_t* block, /*!< in: index page to which copied */
+ const buf_block_t* root); /*!< in: root page */
+/*************************************************************//**
Updates the lock table when a page is copied to another and the original page
is removed from the chain of leaf pages, except if page is the root! */
-
+UNIV_INTERN
void
lock_update_copy_and_discard(
/*=========================*/
- page_t* new_page, /* in: index page to which copied */
- page_t* page); /* in: index page; NOT the root! */
-/*****************************************************************
+ const buf_block_t* new_block, /*!< in: index page to
+ which copied */
+ const buf_block_t* block); /*!< in: index page;
+ NOT the root! */
+/*************************************************************//**
Updates the lock table when a page is split to the left. */
-
+UNIV_INTERN
void
lock_update_split_left(
/*===================*/
- page_t* right_page, /* in: right page */
- page_t* left_page); /* in: left page */
-/*****************************************************************
+ const buf_block_t* right_block, /*!< in: right page */
+ const buf_block_t* left_block); /*!< in: left page */
+/*************************************************************//**
Updates the lock table when a page is merged to the left. */
-
+UNIV_INTERN
void
lock_update_merge_left(
/*===================*/
- page_t* left_page, /* in: left page to which merged */
- rec_t* orig_pred, /* in: original predecessor of supremum
- on the left page before merge */
- page_t* right_page); /* in: merged index page which will be
- discarded */
-/*****************************************************************
+ const buf_block_t* left_block, /*!< in: left page to
+ which merged */
+ const rec_t* orig_pred, /*!< in: original predecessor
+ of supremum on the left page
+ before merge */
+ const buf_block_t* right_block); /*!< in: merged index page
+ which will be discarded */
+/*************************************************************//**
Resets the original locks on heir and replaces them with gap type locks
inherited from rec. */
-
+UNIV_INTERN
void
lock_rec_reset_and_inherit_gap_locks(
/*=================================*/
- rec_t* heir, /* in: heir record */
- rec_t* rec); /* in: record */
-/*****************************************************************
+ const buf_block_t* heir_block, /*!< in: block containing the
+ record which inherits */
+ const buf_block_t* block, /*!< in: block containing the
+ record from which inherited;
+ does NOT reset the locks on
+ this record */
+ ulint heir_heap_no, /*!< in: heap_no of the
+ inheriting record */
+ ulint heap_no); /*!< in: heap_no of the
+ donating record */
+/*************************************************************//**
Updates the lock table when a page is discarded. */
-
+UNIV_INTERN
void
lock_update_discard(
/*================*/
- rec_t* heir, /* in: record which will inherit the locks */
- page_t* page); /* in: index page which will be discarded */
-/*****************************************************************
+ const buf_block_t* heir_block, /*!< in: index page
+ which will inherit the locks */
+ ulint heir_heap_no, /*!< in: heap_no of the record
+ which will inherit the locks */
+ const buf_block_t* block); /*!< in: index page
+ which will be discarded */
+/*************************************************************//**
Updates the lock table when a new user record is inserted. */
-
+UNIV_INTERN
void
lock_update_insert(
/*===============*/
- rec_t* rec); /* in: the inserted record */
-/*****************************************************************
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec); /*!< in: the inserted record */
+/*************************************************************//**
Updates the lock table when a record is removed. */
-
+UNIV_INTERN
void
lock_update_delete(
/*===============*/
- rec_t* rec); /* in: the record to be removed */
-/*************************************************************************
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec); /*!< in: the record to be removed */
+/*********************************************************************//**
Stores on the page infimum record the explicit locks of another record.
This function is used to store the lock state of a record when it is
updated and the size of the record changes in the update. The record
is in such an update moved, perhaps to another page. The infimum record
acts as a dummy carrier record, taking care of lock releases while the
actual record is being moved. */
-
+UNIV_INTERN
void
lock_rec_store_on_page_infimum(
/*===========================*/
- page_t* page, /* in: page containing the record */
- rec_t* rec); /* in: record whose lock state is stored
- on the infimum record of the same page; lock
- bits are reset on the record */
-/*************************************************************************
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec); /*!< in: record whose lock state
+ is stored on the infimum
+ record of the same page; lock
+ bits are reset on the
+ record */
+/*********************************************************************//**
Restores the state of explicit lock requests on a single record, where the
state was stored on the infimum of the page. */
-
+UNIV_INTERN
void
lock_rec_restore_from_page_infimum(
/*===============================*/
- rec_t* rec, /* in: record whose lock state is restored */
- page_t* page); /* in: page (rec is not necessarily on this page)
- whose infimum stored the lock state; lock bits are
- reset on the infimum */
-/*************************************************************************
-Returns TRUE if there are explicit record locks on a page. */
-
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec, /*!< in: record whose lock state
+ is restored */
+ const buf_block_t* donator);/*!< in: page (rec is not
+ necessarily on this page)
+ whose infimum stored the lock
+ state; lock bits are reset on
+ the infimum */
+/*********************************************************************//**
+Returns TRUE if there are explicit record locks on a page.
+@return TRUE if there are explicit record locks on the page */
+UNIV_INTERN
ibool
lock_rec_expl_exist_on_page(
/*========================*/
- /* out: TRUE if there are explicit record locks on
- the page */
- ulint space, /* in: space id */
- ulint page_no);/* in: page number */
-/*************************************************************************
+ ulint space, /*!< in: space id */
+ ulint page_no);/*!< in: page number */
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate insert of
a record. If they do, first tests if the query thread should anyway
be suspended for some reason; if not, then puts the transaction and
the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue. */
-
+for a gap x-lock to the lock queue.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_rec_insert_check_and_lock(
/*===========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record after which to insert */
- dict_index_t* index, /* in: index */
- que_thr_t* thr, /* in: query thread */
- ibool* inherit);/* out: set to TRUE if the new inserted
- record maybe should inherit LOCK_GAP type
- locks from the successor record */
-/*************************************************************************
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
+ set, does nothing */
+ const rec_t* rec, /*!< in: record after which to insert */
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ dict_index_t* index, /*!< in: index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ ibool* inherit);/*!< out: set to TRUE if the new
+ inserted record maybe should inherit
+ LOCK_GAP type locks from the successor
+ record */
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify (update,
delete mark, or delete unmark) of a clustered index record. If they do,
first tests if the query thread should anyway be suspended for some
reason; if not, then puts the transaction and the query thread to the
lock wait state and inserts a waiting request for a record x-lock to the
-lock queue. */
-
+lock queue.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_clust_rec_modify_check_and_lock(
/*=================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record which should be modified */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: record which should be
+ modified */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ que_thr_t* thr); /*!< in: query thread */
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify
-(delete mark or delete unmark) of a secondary index record. */
-
+(delete mark or delete unmark) of a secondary index record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_sec_rec_modify_check_and_lock(
/*===============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record which should be modified;
- NOTE: as this is a secondary index, we
- always have to modify the clustered index
- record first: see the comment below */
- dict_index_t* index, /* in: secondary index */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ const rec_t* rec, /*!< in: record which should be
+ modified; NOTE: as this is a secondary
+ index, we always have to modify the
+ clustered index record first: see the
+ comment below */
+ dict_index_t* index, /*!< in: secondary index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
+/*********************************************************************//**
Like the counterpart for a clustered index below, but now we read a
-secondary index record. */
-
+secondary index record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_sec_rec_read_check_and_lock(
/*=============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: secondary index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: secondary index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr); /*!< in: query thread */
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then
puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. */
-
+lock on the record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_clust_rec_read_check_and_lock(
/*===============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr); /*!< in: query thread */
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then
@@ -347,322 +401,381 @@ puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record. This is an alternative version of
lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets". */
-
+"offsets".
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_clust_rec_read_check_and_lock_alt(
/*===================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: clustered index */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Checks that a record is seen in a consistent read. */
-
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: clustered index */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr); /*!< in: query thread */
+/*********************************************************************//**
+Checks that a record is seen in a consistent read.
+@return TRUE if sees, or FALSE if an earlier version of the record
+should be retrieved */
+UNIV_INTERN
ibool
lock_clust_rec_cons_read_sees(
/*==========================*/
- /* out: TRUE if sees, or FALSE if an earlier
- version of the record should be retrieved */
- rec_t* rec, /* in: user record which should be read or
+ const rec_t* rec, /*!< in: user record which should be read or
passed over by a read cursor */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- read_view_t* view); /* in: consistent read view */
-/*************************************************************************
-Checks that a non-clustered index record is seen in a consistent read. */
-
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ read_view_t* view); /*!< in: consistent read view */
+/*********************************************************************//**
+Checks that a non-clustered index record is seen in a consistent read.
+
+NOTE that a non-clustered index page contains so little information on
+its modifications that also in the case FALSE, the present version of
+rec may be the right, but we must check this from the clustered index
+record.
+
+@return TRUE if certainly sees, or FALSE if an earlier version of the
+clustered index record might be needed */
+UNIV_INTERN
ulint
lock_sec_rec_cons_read_sees(
/*========================*/
- /* out: TRUE if certainly sees, or FALSE if an
- earlier version of the clustered index record
- might be needed: NOTE that a non-clustered
- index page contains so little information on
- its modifications that also in the case FALSE,
- the present version of rec may be the right,
- but we must check this from the clustered
- index record */
- rec_t* rec, /* in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /* in: non-clustered index */
- read_view_t* view); /* in: consistent read view */
-/*************************************************************************
+ const rec_t* rec, /*!< in: user record which
+ should be read or passed over
+ by a read cursor */
+ const read_view_t* view); /*!< in: consistent read view */
+/*********************************************************************//**
Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait. */
-
+be granted immediately, the query thread is put to wait.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_table(
/*=======*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
- dict_table_t* table, /* in: database table in dictionary cache */
- ulint mode, /* in: lock mode */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Checks if there are any locks set on the table. */
-
-ibool
-lock_is_on_table(
-/*=============*/
- /* out: TRUE if there are lock(s) */
- dict_table_t* table); /* in: database table in dictionary cache */
-/*****************************************************************
+ dict_table_t* table, /*!< in: database table in dictionary cache */
+ enum lock_mode mode, /*!< in: lock mode */
+ que_thr_t* thr); /*!< in: query thread */
+/*************************************************************//**
Removes a granted record lock of a transaction from the queue and grants
locks to other transactions waiting in the queue if they now are entitled
to a lock. */
-
+UNIV_INTERN
void
lock_rec_unlock(
/*============*/
- trx_t* trx, /* in: transaction that has set a record
- lock */
- rec_t* rec, /* in: record */
- ulint lock_mode); /* in: LOCK_S or LOCK_X */
-/*************************************************************************
-Releases a table lock.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock(
-/*==============*/
- lock_t* lock); /* in: lock */
-/*************************************************************************
-Releases an auto-inc lock a transaction possibly has on a table.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock_auto_inc(
-/*=======================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
+ trx_t* trx, /*!< in: transaction that has
+ set a record lock */
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec, /*!< in: record */
+ enum lock_mode lock_mode);/*!< in: LOCK_S or LOCK_X */
+/*********************************************************************//**
Releases transaction locks, and releases possible other transactions waiting
because of these locks. */
-
+UNIV_INTERN
void
lock_release_off_kernel(
/*====================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
+ trx_t* trx); /*!< in: transaction */
+/*********************************************************************//**
Cancels a waiting lock request and releases possible other transactions
waiting behind it. */
-
+UNIV_INTERN
void
lock_cancel_waiting_and_release(
/*============================*/
- lock_t* lock); /* in: waiting lock request */
+ lock_t* lock); /*!< in: waiting lock request */
-/*************************************************************************
+/*********************************************************************//**
Removes locks on a table to be dropped or truncated.
If remove_also_table_sx_locks is TRUE then table-level S and X locks are
also removed in addition to other table-level and record-level locks.
No lock, that is going to be removed, is allowed to be a wait lock. */
-
+UNIV_INTERN
void
lock_remove_all_on_table(
/*=====================*/
- dict_table_t* table, /* in: table to be dropped
+ dict_table_t* table, /*!< in: table to be dropped
or truncated */
- ibool remove_also_table_sx_locks);/* in: also removes
+ ibool remove_also_table_sx_locks);/*!< in: also removes
table S and X locks */
-/*************************************************************************
+/*********************************************************************//**
Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table. */
+searching for a lock in the hash table.
+@return folded value */
UNIV_INLINE
ulint
lock_rec_fold(
/*==========*/
- /* out: folded value */
- ulint space, /* in: space */
- ulint page_no);/* in: page number */
-/*************************************************************************
+ ulint space, /*!< in: space */
+ ulint page_no)/*!< in: page number */
+ __attribute__((const));
+/*********************************************************************//**
Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table. */
+searching for a lock in the hash table.
+@return hashed value */
UNIV_INLINE
ulint
lock_rec_hash(
/*==========*/
- /* out: hashed value */
- ulint space, /* in: space */
- ulint page_no);/* in: page number */
-/*************************************************************************
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock. */
+ ulint space, /*!< in: space */
+ ulint page_no);/*!< in: page number */
+
+/**********************************************************************//**
+Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
+if none found.
+@return bit index == heap number of the record, or ULINT_UNDEFINED if
+none found */
+UNIV_INTERN
+ulint
+lock_rec_find_set_bit(
+/*==================*/
+ const lock_t* lock); /*!< in: record lock with at least one
+ bit set */
+/*********************************************************************//**
+Gets the source table of an ALTER TABLE transaction. The table must be
+covered by an IX or IS table lock.
+@return the source table of transaction, if it is covered by an IX or
+IS table lock; dest if there is no source table, and NULL if the
+transaction is locking more than two tables or an inconsistency is
+found */
+UNIV_INTERN
dict_table_t*
lock_get_src_table(
/*===============*/
- /* out: the source table of transaction,
- if it is covered by an IX or IS table lock;
- dest if there is no source table, and
- NULL if the transaction is locking more than
- two tables or an inconsistency is found */
- trx_t* trx, /* in: transaction */
- dict_table_t* dest, /* in: destination of ALTER TABLE */
- ulint* mode); /* out: lock mode of the source table */
-/*************************************************************************
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* dest, /*!< in: destination of ALTER TABLE */
+ enum lock_mode* mode); /*!< out: lock mode of the source table */
+/*********************************************************************//**
Determine if the given table is exclusively "owned" by the given
transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table. */
-
+on the table.
+@return TRUE if table is only locked by trx, with LOCK_IX, and
+possibly LOCK_AUTO_INC */
+UNIV_INTERN
ibool
lock_is_table_exclusive(
/*====================*/
- /* out: TRUE if table is only locked by trx,
- with LOCK_IX, and possibly LOCK_AUTO_INC */
- dict_table_t* table, /* in: table */
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Checks if a lock request lock1 has to wait for request lock2. */
-
+ dict_table_t* table, /*!< in: table */
+ trx_t* trx); /*!< in: transaction */
+/*********************************************************************//**
+Checks if a lock request lock1 has to wait for request lock2.
+@return TRUE if lock1 has to wait for lock2 to be removed */
+UNIV_INTERN
ibool
lock_has_to_wait(
/*=============*/
- /* out: TRUE if lock1 has to wait for lock2 to be
- removed */
- lock_t* lock1, /* in: waiting lock */
- lock_t* lock2); /* in: another lock; NOTE that it is assumed that this
- has a lock bit set on the same record as in lock1 if
- the locks are record locks */
-/*************************************************************************
-Checks that a transaction id is sensible, i.e., not in the future. */
-
+ const lock_t* lock1, /*!< in: waiting lock */
+ const lock_t* lock2); /*!< in: another lock; NOTE that it is
+ assumed that this has a lock bit set
+ on the same record as in lock1 if the
+ locks are record locks */
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return TRUE if ok */
+UNIV_INTERN
ibool
lock_check_trx_id_sanity(
/*=====================*/
- /* out: TRUE if ok */
- dulint trx_id, /* in: trx id */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets, /* in: rec_get_offsets(rec, index) */
- ibool has_kernel_mutex);/* in: TRUE if the caller owns the
+ trx_id_t trx_id, /*!< in: trx id */
+ const rec_t* rec, /*!< in: user record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
+ ibool has_kernel_mutex);/*!< in: TRUE if the caller owns the
kernel mutex */
-/*************************************************************************
-Validates the lock queue on a single record. */
-
-ibool
-lock_rec_queue_validate(
-/*====================*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: record to look at */
- dict_index_t* index, /* in: index, or NULL if not known */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
+/*********************************************************************//**
Prints info of a table lock. */
-
+UNIV_INTERN
void
lock_table_print(
/*=============*/
- FILE* file, /* in: file where to print */
- lock_t* lock); /* in: table type lock */
-/*************************************************************************
+ FILE* file, /*!< in: file where to print */
+ const lock_t* lock); /*!< in: table type lock */
+/*********************************************************************//**
Prints info of a record lock. */
-
+UNIV_INTERN
void
lock_rec_print(
/*===========*/
- FILE* file, /* in: file where to print */
- lock_t* lock); /* in: record type lock */
-/*************************************************************************
+ FILE* file, /*!< in: file where to print */
+ const lock_t* lock); /*!< in: record type lock */
+/*********************************************************************//**
Prints info of locks for all transactions. */
-
+UNIV_INTERN
void
lock_print_info_summary(
/*====================*/
- FILE* file); /* in: file where to print */
-/*************************************************************************
+ FILE* file); /*!< in: file where to print */
+/*********************************************************************//**
Prints info of locks for each transaction. */
-
+UNIV_INTERN
void
lock_print_info_all_transactions(
/*=============================*/
- FILE* file); /* in: file where to print */
-/*************************************************************************
-Validates the lock queue on a table. */
-
-ibool
-lock_table_queue_validate(
-/*======================*/
- /* out: TRUE if ok */
- dict_table_t* table); /* in: table */
-/*************************************************************************
-Validates the record lock queues on a page. */
-
-ibool
-lock_rec_validate_page(
-/*===================*/
- /* out: TRUE if ok */
- ulint space, /* in: space id */
- ulint page_no);/* in: page number */
-/*************************************************************************
-Validates the lock system. */
-
-ibool
-lock_validate(void);
-/*===============*/
- /* out: TRUE if ok */
-/*************************************************************************
+ FILE* file); /*!< in: file where to print */
+/*********************************************************************//**
Return approximate number or record locks (bits set in the bitmap) for
this transaction. Since delete-marked records may be removed, the
record count will not be precise. */
-
+UNIV_INTERN
ulint
lock_number_of_rows_locked(
/*=======================*/
- trx_t* trx); /* in: transaction */
+ trx_t* trx); /*!< in: transaction */
+/*******************************************************************//**
+Check if a transaction holds any autoinc locks.
+@return TRUE if the transaction holds any AUTOINC locks. */
+UNIV_INTERN
+ibool
+lock_trx_holds_autoinc_locks(
+/*=========================*/
+ const trx_t* trx); /*!< in: transaction */
+/*******************************************************************//**
+Release all the transaction's autoinc locks. */
+UNIV_INTERN
+void
+lock_release_autoinc_locks(
+/*=======================*/
+ trx_t* trx); /*!< in/out: transaction */
-/* The lock system */
-extern lock_sys_t* lock_sys;
+/*******************************************************************//**
+Gets the type of a lock. Non-inline version for using outside of the
+lock module.
+@return LOCK_TABLE or LOCK_REC */
+UNIV_INTERN
+ulint
+lock_get_type(
+/*==========*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+Gets the id of the transaction owning a lock.
+@return transaction id */
+UNIV_INTERN
+ullint
+lock_get_trx_id(
+/*============*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+Gets the mode of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return lock mode */
+UNIV_INTERN
+const char*
+lock_get_mode_str(
+/*==============*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+Gets the type of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return lock type */
+UNIV_INTERN
+const char*
+lock_get_type_str(
+/*==============*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+Gets the id of the table on which the lock is.
+@return id of the table */
+UNIV_INTERN
+ullint
+lock_get_table_id(
+/*==============*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+Gets the name of the table on which the lock is.
+The string should not be free()'d or modified.
+@return name of the table */
+UNIV_INTERN
+const char*
+lock_get_table_name(
+/*================*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the index on which the lock is.
+@return index */
+UNIV_INTERN
+const dict_index_t*
+lock_rec_get_index(
+/*===============*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the name of the index on which the lock is.
+The string should not be free()'d or modified.
+@return name of the index */
+UNIV_INTERN
+const char*
+lock_rec_get_index_name(
+/*====================*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the tablespace number on which the lock is.
+@return tablespace number */
+UNIV_INTERN
+ulint
+lock_rec_get_space_id(
+/*==================*/
+ const lock_t* lock); /*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the page number on which the lock is.
+@return page number */
+UNIV_INTERN
+ulint
+lock_rec_get_page_no(
+/*=================*/
+ const lock_t* lock); /*!< in: lock */
-/* Lock modes and types */
-/* Basic modes */
-#define LOCK_NONE 0 /* this flag is used elsewhere to note
- consistent read */
-#define LOCK_IS 2 /* intention shared */
-#define LOCK_IX 3 /* intention exclusive */
-#define LOCK_S 4 /* shared */
-#define LOCK_X 5 /* exclusive */
-#define LOCK_AUTO_INC 6 /* locks the auto-inc counter of a table
- in an exclusive mode */
-#define LOCK_MODE_MASK 0xFUL /* mask used to extract mode from the
+/** Lock modes and types */
+/* @{ */
+#define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the
type_mode field in a lock */
-/* Lock types */
-#define LOCK_TABLE 16 /* these type values should be so high that */
-#define LOCK_REC 32 /* they can be ORed to the lock mode */
-#define LOCK_TYPE_MASK 0xF0UL /* mask used to extract lock type from the
+/** Lock types */
+/* @{ */
+#define LOCK_TABLE 16 /*!< table lock */
+#define LOCK_REC 32 /*!< record lock */
+#define LOCK_TYPE_MASK 0xF0UL /*!< mask used to extract lock type from the
type_mode field in a lock */
-/* Waiting lock flag */
-#define LOCK_WAIT 256 /* this wait bit should be so high that
- it can be ORed to the lock mode and type;
- when this bit is set, it means that the
- lock has not yet been granted, it is just
- waiting for its turn in the wait queue */
+#if LOCK_MODE_MASK & LOCK_TYPE_MASK
+# error "LOCK_MODE_MASK & LOCK_TYPE_MASK"
+#endif
+
+#define LOCK_WAIT 256 /*!< Waiting lock flag; when set, it
+ means that the lock has not yet been
+ granted, it is just waiting for its
+ turn in the wait queue */
/* Precise modes */
-#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock
- in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */
-#define LOCK_GAP 512 /* this gap bit should be so high that
- it can be ORed to the other flags;
- when this bit is set, it means that the
+#define LOCK_ORDINARY 0 /*!< this flag denotes an ordinary
+ next-key lock in contrast to LOCK_GAP
+ or LOCK_REC_NOT_GAP */
+#define LOCK_GAP 512 /*!< when this bit is set, it means that the
lock holds only on the gap before the record;
for instance, an x-lock on the gap does not
give permission to modify the record on which
the bit is set; locks of this type are created
when records are removed from the index chain
of records */
-#define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on
+#define LOCK_REC_NOT_GAP 1024 /*!< this bit means that the lock is only on
the index record and does NOT block inserts
to the gap before the index record; this is
used in the case when we retrieve a record
@@ -670,7 +783,7 @@ extern lock_sys_t* lock_sys;
locking plain SELECTs (not part of UPDATE
or DELETE) when the user has set the READ
COMMITTED isolation level */
-#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting
+#define LOCK_INSERT_INTENTION 2048 /*!< this bit is set when we place a waiting
gap type record lock request in order to let
an insert of an index record to wait until
there are no conflicting locks by other
@@ -678,27 +791,28 @@ extern lock_sys_t* lock_sys;
remains set when the waiting lock is granted,
or if the lock is inherited to a neighboring
record */
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+# error
+#endif
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+# error
+#endif
+/* @} */
-/* When lock bits are reset, the following flags are available: */
-#define LOCK_RELEASE_WAIT 1
-#define LOCK_NOT_RELEASE_WAIT 2
-
-/* Lock operation struct */
+/** Lock operation struct */
typedef struct lock_op_struct lock_op_t;
+/** Lock operation struct */
struct lock_op_struct{
- dict_table_t* table; /* table to be locked */
- ulint mode; /* lock mode */
+ dict_table_t* table; /*!< table to be locked */
+ enum lock_mode mode; /*!< lock mode */
};
-#define LOCK_OP_START 1
-#define LOCK_OP_COMPLETE 2
-
-/* The lock system struct */
+/** The lock system struct */
struct lock_sys_struct{
- hash_table_t* rec_hash; /* hash table of the record locks */
+ hash_table_t* rec_hash; /*!< hash table of the record locks */
};
-/* The lock system */
+/** The lock system */
extern lock_sys_t* lock_sys;
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
index 311623b190b..014722f51c4 100644
--- a/storage/innobase/include/lock0lock.ic
+++ b/storage/innobase/include/lock0lock.ic
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction lock system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0lock.ic
+The transaction lock system
Created 5/7/1996 Heikki Tuuri
*******************************************************/
@@ -21,52 +38,51 @@ Created 5/7/1996 Heikki Tuuri
#include "read0read.h"
#include "log0recv.h"
-/*************************************************************************
+/*********************************************************************//**
Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table. */
+searching for a lock in the hash table.
+@return folded value */
UNIV_INLINE
ulint
lock_rec_fold(
/*==========*/
- /* out: folded value */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space */
+ ulint page_no)/*!< in: page number */
{
return(ut_fold_ulint_pair(space, page_no));
}
-/*************************************************************************
+/*********************************************************************//**
Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table. */
+searching for a lock in the hash table.
+@return hashed value */
UNIV_INLINE
ulint
lock_rec_hash(
/*==========*/
- /* out: hashed value */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space */
+ ulint page_no)/*!< in: page number */
{
return(hash_calc_hash(lock_rec_fold(space, page_no),
lock_sys->rec_hash));
}
-/*************************************************************************
+/*********************************************************************//**
Checks if some transaction has an implicit x-lock on a record in a clustered
-index. */
+index.
+@return transaction which has the x-lock, or NULL */
UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
- /* out: transaction which has the x-lock, or
- NULL */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: user record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
- dulint trx_id;
+ trx_id_t trx_id;
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(page_rec_is_user_rec(rec));
trx_id = row_get_rec_trx_id(rec, index, offsets);
@@ -79,3 +95,27 @@ lock_clust_rec_some_has_impl(
return(NULL);
}
+
+/*********************************************************************//**
+Gets the heap_no of the smallest user record on a page.
+@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+UNIV_INLINE
+ulint
+lock_get_min_heap_no(
+/*=================*/
+ const buf_block_t* block) /*!< in: buffer block */
+{
+ const page_t* page = block->frame;
+
+ if (page_is_comp(page)) {
+ return(rec_get_heap_no_new(
+ page
+ + rec_get_next_offs(page + PAGE_NEW_INFIMUM,
+ TRUE)));
+ } else {
+ return(rec_get_heap_no_old(
+ page
+ + rec_get_next_offs(page + PAGE_OLD_INFIMUM,
+ FALSE)));
+ }
+}
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
index 7703a2b7def..287c151b19f 100644
--- a/storage/innobase/include/lock0priv.h
+++ b/storage/innobase/include/lock0priv.h
@@ -1,7 +1,24 @@
-/******************************************************
-Lock module internal structures and methods.
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 2007 Innobase Oy
+/**************************************************//**
+@file include/lock0priv.h
+Lock module internal structures and methods.
Created July 12, 2007 Vasil Dimov
*******************************************************/
@@ -22,77 +39,67 @@ those functions in lock/ */
#include "trx0types.h"
#include "ut0lst.h"
-/* A table lock */
+/** A table lock */
typedef struct lock_table_struct lock_table_t;
+/** A table lock */
struct lock_table_struct {
- dict_table_t* table; /* database table in dictionary
+ dict_table_t* table; /*!< database table in dictionary
cache */
UT_LIST_NODE_T(lock_t)
- locks; /* list of locks on the same
+ locks; /*!< list of locks on the same
table */
};
-/* Record lock for a page */
+/** Record lock for a page */
typedef struct lock_rec_struct lock_rec_t;
+/** Record lock for a page */
struct lock_rec_struct {
- ulint space; /* space id */
- ulint page_no; /* page number */
- ulint n_bits; /* number of bits in the lock
+ ulint space; /*!< space id */
+ ulint page_no; /*!< page number */
+ ulint n_bits; /*!< number of bits in the lock
bitmap; NOTE: the lock bitmap is
placed immediately after the
lock struct */
};
-/* Lock struct */
+/** Lock struct */
struct lock_struct {
- trx_t* trx; /* transaction owning the
+ trx_t* trx; /*!< transaction owning the
lock */
UT_LIST_NODE_T(lock_t)
- trx_locks; /* list of the locks of the
+ trx_locks; /*!< list of the locks of the
transaction */
- ulint type_mode; /* lock type, mode, LOCK_GAP or
+ ulint type_mode; /*!< lock type, mode, LOCK_GAP or
LOCK_REC_NOT_GAP,
LOCK_INSERT_INTENTION,
wait flag, ORed */
- hash_node_t hash; /* hash chain node for a record
+ hash_node_t hash; /*!< hash chain node for a record
lock */
- dict_index_t* index; /* index for a record lock */
+ dict_index_t* index; /*!< index for a record lock */
union {
- lock_table_t tab_lock;/* table lock */
- lock_rec_t rec_lock;/* record lock */
- } un_member;
+ lock_table_t tab_lock;/*!< table lock */
+ lock_rec_t rec_lock;/*!< record lock */
+ } un_member; /*!< lock details */
};
-/*************************************************************************
-Gets the type of a lock. */
+/*********************************************************************//**
+Gets the type of a lock.
+@return LOCK_TABLE or LOCK_REC */
UNIV_INLINE
ulint
-lock_get_type(
-/*==========*/
- /* out: LOCK_TABLE or LOCK_REC */
- const lock_t* lock); /* in: lock */
-
-/**************************************************************************
-Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found. */
-
-ulint
-lock_rec_find_set_bit(
-/*==================*/
- /* out: bit index == heap number of the record, or
- ULINT_UNDEFINED if none found */
- lock_t* lock); /* in: record lock with at least one bit set */
-
-/*************************************************************************
-Gets the previous record lock set on a record. */
+lock_get_type_low(
+/*==============*/
+ const lock_t* lock); /*!< in: lock */
-lock_t*
+/*********************************************************************//**
+Gets the previous record lock set on a record.
+@return previous lock on the same record, NULL if none exists */
+UNIV_INTERN
+const lock_t*
lock_rec_get_prev(
/*==============*/
- /* out: previous lock on the same record, NULL if
- none exists */
- lock_t* in_lock,/* in: record lock */
- ulint heap_no);/* in: heap number of the record */
+ const lock_t* in_lock,/*!< in: record lock */
+ ulint heap_no);/*!< in: heap number of the record */
#ifndef UNIV_NONINL
#include "lock0priv.ic"
diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic
index 4bc8397509d..30447c99848 100644
--- a/storage/innobase/include/lock0priv.ic
+++ b/storage/innobase/include/lock0priv.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Lock module internal inline methods.
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 2007 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0priv.ic
+Lock module internal inline methods.
Created July 16, 2007 Vasil Dimov
*******************************************************/
@@ -15,14 +32,14 @@ methods but they are used only in that file. */
#error Do not include lock0priv.ic outside of the lock/ module
#endif
-/*************************************************************************
-Gets the type of a lock. */
+/*********************************************************************//**
+Gets the type of a lock.
+@return LOCK_TABLE or LOCK_REC */
UNIV_INLINE
ulint
-lock_get_type(
-/*==========*/
- /* out: LOCK_TABLE or LOCK_REC */
- const lock_t* lock) /* in: lock */
+lock_get_type_low(
+/*==============*/
+ const lock_t* lock) /*!< in: lock */
{
ut_ad(lock);
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index 43fd2d60da5..45f29e90fe9 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction lock system global types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0types.h
+The transaction lock system global types
Created 5/7/1996 Heikki Tuuri
*******************************************************/
@@ -13,4 +30,16 @@ Created 5/7/1996 Heikki Tuuri
typedef struct lock_struct lock_t;
typedef struct lock_sys_struct lock_sys_t;
+/* Basic lock modes */
+enum lock_mode {
+ LOCK_IS = 0, /* intention shared */
+ LOCK_IX, /* intention exclusive */
+ LOCK_S, /* shared */
+ LOCK_X, /* exclusive */
+ LOCK_AUTO_INC, /* locks the auto-inc counter of a table
+ in an exclusive mode */
+ LOCK_NONE, /* this is used elsewhere to note consistent read */
+ LOCK_NUM = LOCK_NONE/* number of lock modes */
+};
+
#endif
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 43662d02a34..135aeb69e2d 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -1,7 +1,48 @@
-/******************************************************
-Database log
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Google Inc.
-(c) 1995 Innobase Oy
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0log.h
+Database log
Created 12/9/1995 Heikki Tuuri
*******************************************************/
@@ -11,71 +52,82 @@ Created 12/9/1995 Heikki Tuuri
#include "univ.i"
#include "ut0byte.h"
+#include "ut0lst.h"
+#ifndef UNIV_HOTBACKUP
#include "sync0sync.h"
#include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
+/** Redo log buffer */
typedef struct log_struct log_t;
+/** Redo log group */
typedef struct log_group_struct log_group_t;
#ifdef UNIV_DEBUG
+/** Flag: write to log file? */
extern ibool log_do_write;
+/** Flag: enable debug output when writing to the log? */
extern ibool log_debug_writes;
#else /* UNIV_DEBUG */
+/** Write to log */
# define log_do_write TRUE
#endif /* UNIV_DEBUG */
-/* Wait modes for log_write_up_to */
+/** Wait modes for log_write_up_to @{ */
#define LOG_NO_WAIT 91
#define LOG_WAIT_ONE_GROUP 92
#define LOG_WAIT_ALL_GROUPS 93
+/* @} */
+/** Maximum number of log groups in log_group_struct::checkpoint_buf */
#define LOG_MAX_N_GROUPS 32
-/********************************************************************
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
so that we know that the limit has been written to a log checkpoint field
on disk. */
-
+UNIV_INTERN
void
log_fsp_current_free_limit_set_and_checkpoint(
/*==========================================*/
- ulint limit); /* in: limit to set */
-/***********************************************************************
-Calculates where in log files we find a specified lsn. */
-
+ ulint limit); /*!< in: limit to set */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Calculates where in log files we find a specified lsn.
+@return log file number */
+UNIV_INTERN
ulint
log_calc_where_lsn_is(
/*==================*/
- /* out: log file number */
- ib_longlong* log_file_offset, /* out: offset in that file
+ ib_int64_t* log_file_offset, /*!< out: offset in that file
(including the header) */
- dulint first_header_lsn, /* in: first log file start
+ ib_uint64_t first_header_lsn, /*!< in: first log file start
lsn */
- dulint lsn, /* in: lsn whose position to
+ ib_uint64_t lsn, /*!< in: lsn whose position to
determine */
- ulint n_log_files, /* in: total number of log
+ ulint n_log_files, /*!< in: total number of log
files */
- ib_longlong log_file_size); /* in: log file size
+ ib_int64_t log_file_size); /*!< in: log file size
(including the header) */
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
Writes to the log the string given. The log must be released with
-log_release. */
+log_release.
+@return end lsn of the log record, zero if did not succeed */
UNIV_INLINE
-dulint
+ib_uint64_t
log_reserve_and_write_fast(
/*=======================*/
- /* out: end lsn of the log record, ut_dulint_zero if
- did not succeed */
- byte* str, /* in: string */
- ulint len, /* in: string length */
- dulint* start_lsn,/* out: start lsn of the log record */
- ibool* success);/* out: TRUE if success */
-/***************************************************************************
+ const void* str, /*!< in: string */
+ ulint len, /*!< in: string length */
+ ib_uint64_t* start_lsn);/*!< out: start lsn of the log record */
+/***********************************************************************//**
Releases the log mutex. */
UNIV_INLINE
void
log_release(void);
/*=============*/
-/***************************************************************************
+/***********************************************************************//**
Checks if there is need for a log buffer flush or a new checkpoint, and does
this if yes. Any database operation should call this when it has modified
more than about 4 pages. NOTE that this function may only be called when the
@@ -84,435 +136,467 @@ UNIV_INLINE
void
log_free_check(void);
/*================*/
-/****************************************************************
+/************************************************************//**
Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release. */
-
-dulint
+released with log_release.
+@return start lsn of the log record */
+UNIV_INTERN
+ib_uint64_t
log_reserve_and_open(
/*=================*/
- /* out: start lsn of the log record */
- ulint len); /* in: length of data to be catenated */
-/****************************************************************
+ ulint len); /*!< in: length of data to be catenated */
+/************************************************************//**
Writes to the log the string given. It is assumed that the caller holds the
log mutex. */
-
+UNIV_INTERN
void
log_write_low(
/*==========*/
- byte* str, /* in: string */
- ulint str_len); /* in: string length */
-/****************************************************************
-Closes the log. */
-
-dulint
+ byte* str, /*!< in: string */
+ ulint str_len); /*!< in: string length */
+/************************************************************//**
+Closes the log.
+@return lsn */
+UNIV_INTERN
+ib_uint64_t
log_close(void);
/*===========*/
- /* out: lsn */
-/****************************************************************
-Gets the current lsn. */
+/************************************************************//**
+Gets the current lsn.
+@return current lsn */
UNIV_INLINE
-dulint
+ib_uint64_t
log_get_lsn(void);
/*=============*/
- /* out: current lsn */
-/**********************************************************
+/****************************************************************
+Gets the log group capacity. It is OK to read the value without
+holding log_sys->mutex because it is constant.
+@return log group capacity */
+UNIV_INLINE
+ulint
+log_get_capacity(void);
+/*==================*/
+/******************************************************//**
Initializes the log. */
-
+UNIV_INTERN
void
log_init(void);
/*==========*/
-/**********************************************************************
+/******************************************************************//**
Inits a log group to the log system. */
-
+UNIV_INTERN
void
log_group_init(
/*===========*/
- ulint id, /* in: group id */
- ulint n_files, /* in: number of log files */
- ulint file_size, /* in: log file size in bytes */
- ulint space_id, /* in: space id of the file space
+ ulint id, /*!< in: group id */
+ ulint n_files, /*!< in: number of log files */
+ ulint file_size, /*!< in: log file size in bytes */
+ ulint space_id, /*!< in: space id of the file space
which contains the log files of this
group */
- ulint archive_space_id); /* in: space id of the file space
+ ulint archive_space_id); /*!< in: space id of the file space
which contains some archived log
files for this group; currently, only
for the first log group this is
used */
-/**********************************************************
+/******************************************************//**
Completes an i/o to a log file. */
-
+UNIV_INTERN
void
log_io_complete(
/*============*/
- log_group_t* group); /* in: log group */
-/**********************************************************
+ log_group_t* group); /*!< in: log group */
+/******************************************************//**
This function is called, e.g., when a transaction wants to commit. It checks
that the log has been written to the log file up to the last log entry written
by the transaction. If there is a flush running, it waits and checks if the
flush flushed enough. If not, starts a new flush. */
-
+UNIV_INTERN
void
log_write_up_to(
/*============*/
- dulint lsn, /* in: log sequence number up to which the log should
- be written, ut_dulint_max if not specified */
- ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk);
- /* in: TRUE if we want the written log also to be
- flushed to disk */
-/********************************************************************
+ ib_uint64_t lsn, /*!< in: log sequence number up to which
+ the log should be written,
+ IB_ULONGLONG_MAX if not specified */
+ ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ or LOG_WAIT_ALL_GROUPS */
+ ibool flush_to_disk);
+ /*!< in: TRUE if we want the written log
+ also to be flushed to disk */
+/****************************************************************//**
Does a syncronous flush of the log buffer to disk. */
-
+UNIV_INTERN
void
log_buffer_flush_to_disk(void);
/*==========================*/
-/********************************************************************
+/****************************************************************//**
+This functions writes the log buffer to the log file and if 'flush'
+is set it forces a flush of the log file as well. This is meant to be
+called from background master thread only as it does not wait for
+the write (+ possible flush) to finish. */
+UNIV_INTERN
+void
+log_buffer_sync_in_background(
+/*==========================*/
+ ibool flush); /*<! in: flush the logs to disk */
+/****************************************************************//**
Advances the smallest lsn for which there are unflushed dirty blocks in the
buffer pool and also may make a new checkpoint. NOTE: this function may only
-be called if the calling thread owns no synchronization objects! */
-
+be called if the calling thread owns no synchronization objects!
+@return FALSE if there was a flush batch of the same type running,
+which means that we could not start this flush batch */
+UNIV_INTERN
ibool
log_preflush_pool_modified_pages(
/*=============================*/
- /* out: FALSE if there was a flush batch of
- the same type running, which means that we
- could not start this flush batch */
- dulint new_oldest, /* in: try to advance oldest_modified_lsn
- at least to this lsn */
- ibool sync); /* in: TRUE if synchronous operation is
- desired */
-/**********************************************************
+ ib_uint64_t new_oldest, /*!< in: try to advance
+ oldest_modified_lsn at least
+ to this lsn */
+ ibool sync); /*!< in: TRUE if synchronous
+ operation is desired */
+/******************************************************//**
Makes a checkpoint. Note that this function does not flush dirty
blocks from the buffer pool: it only checks what is lsn of the oldest
modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool. */
-
+log files. Use log_make_checkpoint_at to flush also the pool.
+@return TRUE if success, FALSE if a checkpoint write was already running */
+UNIV_INTERN
ibool
log_checkpoint(
/*===========*/
- /* out: TRUE if success, FALSE if a checkpoint
- write was already running */
- ibool sync, /* in: TRUE if synchronous operation is
+ ibool sync, /*!< in: TRUE if synchronous operation is
desired */
- ibool write_always); /* in: the function normally checks if the
+ ibool write_always); /*!< in: the function normally checks if the
the new checkpoint would have a greater
lsn than the previous one: if not, then no
physical write is done; by setting this
parameter TRUE, a physical write will always be
made to log files */
-/********************************************************************
+/****************************************************************//**
Makes a checkpoint at a given lsn or later. */
-
+UNIV_INTERN
void
log_make_checkpoint_at(
/*===================*/
- dulint lsn, /* in: make a checkpoint at this or a later
- lsn, if ut_dulint_max, makes a checkpoint at
- the latest lsn */
- ibool write_always); /* in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
-/********************************************************************
+ ib_uint64_t lsn, /*!< in: make a checkpoint at this or a
+ later lsn, if IB_ULONGLONG_MAX, makes
+ a checkpoint at the latest lsn */
+ ibool write_always); /*!< in: the function normally checks if
+ the new checkpoint would have a
+ greater lsn than the previous one: if
+ not, then no physical write is done;
+ by setting this parameter TRUE, a
+ physical write will always be made to
+ log files */
+/****************************************************************//**
Makes a checkpoint at the latest lsn and writes it to first page of each
data file in the database, so that we know that the file spaces contain
all modifications up to that lsn. This can only be called at database
shutdown. This function also writes all log in log files to the log archive. */
-
+UNIV_INTERN
void
logs_empty_and_mark_files_at_shutdown(void);
/*=======================================*/
-/**********************************************************
+/******************************************************//**
Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-
+UNIV_INTERN
void
log_group_read_checkpoint_info(
/*===========================*/
- log_group_t* group, /* in: log group */
- ulint field); /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-/***********************************************************************
+ log_group_t* group, /*!< in: log group */
+ ulint field); /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+/*******************************************************************//**
Gets info from a checkpoint about a log group. */
-
+UNIV_INTERN
void
log_checkpoint_get_nth_group_info(
/*==============================*/
- byte* buf, /* in: buffer containing checkpoint info */
- ulint n, /* in: nth slot */
- ulint* file_no,/* out: archived file number */
- ulint* offset);/* out: archived file offset */
-/**********************************************************
+ const byte* buf, /*!< in: buffer containing checkpoint info */
+ ulint n, /*!< in: nth slot */
+ ulint* file_no,/*!< out: archived file number */
+ ulint* offset);/*!< out: archived file offset */
+/******************************************************//**
Writes checkpoint info to groups. */
-
+UNIV_INTERN
void
log_groups_write_checkpoint_info(void);
/*==================================*/
-/**********************************************************
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/* in: buffer which will be written to the start
- of the first log file */
- dulint start); /* in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
-/************************************************************************
-Starts an archiving operation. */
-
+/********************************************************************//**
+Starts an archiving operation.
+@return TRUE if succeed, FALSE if an archiving operation was already running */
+UNIV_INTERN
ibool
log_archive_do(
/*===========*/
- /* out: TRUE if succeed, FALSE if an archiving
- operation was already running */
- ibool sync, /* in: TRUE if synchronous operation is desired */
- ulint* n_bytes);/* out: archive log buffer size, 0 if nothing to
+ ibool sync, /*!< in: TRUE if synchronous operation is desired */
+ ulint* n_bytes);/*!< out: archive log buffer size, 0 if nothing to
archive */
-/********************************************************************
+/****************************************************************//**
Writes the log contents to the archive up to the lsn when this function was
called, and stops the archiving. When archiving is started again, the archived
log file numbers start from a number one higher, so that the archiving will
not write again to the archived log files which exist when this function
-returns. */
-
+returns.
+@return DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
ulint
log_archive_stop(void);
/*==================*/
- /* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Starts again archiving which has been stopped. */
-
+/****************************************************************//**
+Starts again archiving which has been stopped.
+@return DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
ulint
log_archive_start(void);
/*===================*/
- /* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Stop archiving the log so that a gap may occur in the archived log files. */
-
+/****************************************************************//**
+Stop archiving the log so that a gap may occur in the archived log files.
+@return DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
ulint
log_archive_noarchivelog(void);
/*==========================*/
- /* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Start archiving the log so that a gap may occur in the archived log files. */
-
+/****************************************************************//**
+Start archiving the log so that a gap may occur in the archived log files.
+@return DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
ulint
log_archive_archivelog(void);
/*========================*/
- /* out: DB_SUCCESS or DB_ERROR */
-/**********************************************************
+/******************************************************//**
Generates an archived log file name. */
-
+UNIV_INTERN
void
log_archived_file_name_gen(
/*=======================*/
- char* buf, /* in: buffer where to write */
- ulint id, /* in: group id */
- ulint file_no);/* in: file number */
-/************************************************************************
+ char* buf, /*!< in: buffer where to write */
+ ulint id, /*!< in: group id */
+ ulint file_no);/*!< in: file number */
+#else /* !UNIV_HOTBACKUP */
+/******************************************************//**
+Writes info to a buffer of a log group when log files are created in
+backup restoration. */
+UNIV_INTERN
+void
+log_reset_first_header_and_checkpoint(
+/*==================================*/
+ byte* hdr_buf,/*!< in: buffer which will be written to the
+ start of the first log file */
+ ib_uint64_t start); /*!< in: lsn of the start of the first log file;
+ we pretend that there is a checkpoint at
+ start + LOG_BLOCK_HDR_SIZE */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
Checks that there is enough free space in the log to start a new query step.
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
function may only be called if the calling thread owns no synchronization
objects! */
-
+UNIV_INTERN
void
log_check_margins(void);
/*===================*/
-/**********************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************//**
Reads a specified log segment to a buffer. */
-
+UNIV_INTERN
void
log_group_read_log_seg(
/*===================*/
- ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /* in: buffer where to read */
- log_group_t* group, /* in: log group */
- dulint start_lsn, /* in: read area start */
- dulint end_lsn); /* in: read area end */
-/**********************************************************
+ ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
+ byte* buf, /*!< in: buffer where to read */
+ log_group_t* group, /*!< in: log group */
+ ib_uint64_t start_lsn, /*!< in: read area start */
+ ib_uint64_t end_lsn); /*!< in: read area end */
+/******************************************************//**
Writes a buffer to a log file group. */
-
+UNIV_INTERN
void
log_group_write_buf(
/*================*/
- log_group_t* group, /* in: log group */
- byte* buf, /* in: buffer */
- ulint len, /* in: buffer len; must be divisible
+ log_group_t* group, /*!< in: log group */
+ byte* buf, /*!< in: buffer */
+ ulint len, /*!< in: buffer len; must be divisible
by OS_FILE_LOG_BLOCK_SIZE */
- dulint start_lsn, /* in: start lsn of the buffer; must
+ ib_uint64_t start_lsn, /*!< in: start lsn of the buffer; must
be divisible by
OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset);/* in: start offset of new data in
+ ulint new_data_offset);/*!< in: start offset of new data in
buf: this parameter is used to decide
if we have to write a new log file
header */
-/************************************************************
+/********************************************************//**
Sets the field values in group to correspond to a given lsn. For this function
to work, the values must already be correctly initialized to correspond to
some lsn, for instance, a checkpoint lsn. */
-
+UNIV_INTERN
void
log_group_set_fields(
/*=================*/
- log_group_t* group, /* in: group */
- dulint lsn); /* in: lsn for which the values should be
+ log_group_t* group, /*!< in/out: group */
+ ib_uint64_t lsn); /*!< in: lsn for which the values should be
set */
-/**********************************************************
+/******************************************************//**
Calculates the data capacity of a log group, when the log file headers are not
-included. */
-
+included.
+@return capacity in bytes */
+UNIV_INTERN
ulint
log_group_get_capacity(
/*===================*/
- /* out: capacity in bytes */
- log_group_t* group); /* in: log group */
-/****************************************************************
-Gets a log block flush bit. */
+ const log_group_t* group); /*!< in: log group */
+#endif /* !UNIV_HOTBACKUP */
+/************************************************************//**
+Gets a log block flush bit.
+@return TRUE if this block was the first to be written in a log flush */
UNIV_INLINE
ibool
log_block_get_flush_bit(
/*====================*/
- /* out: TRUE if this block was the first
- to be written in a log flush */
- byte* log_block); /* in: log block */
-/****************************************************************
-Gets a log block number stored in the header. */
+ const byte* log_block); /*!< in: log block */
+/************************************************************//**
+Gets a log block number stored in the header.
+@return log block number stored in the block header */
UNIV_INLINE
ulint
log_block_get_hdr_no(
/*=================*/
- /* out: log block number stored in the block
- header */
- byte* log_block); /* in: log block */
-/****************************************************************
-Gets a log block data length. */
+ const byte* log_block); /*!< in: log block */
+/************************************************************//**
+Gets a log block data length.
+@return log block data length measured as a byte offset from the block start */
UNIV_INLINE
ulint
log_block_get_data_len(
/*===================*/
- /* out: log block data length measured as a
- byte offset from the block start */
- byte* log_block); /* in: log block */
-/****************************************************************
+ const byte* log_block); /*!< in: log block */
+/************************************************************//**
Sets the log block data length. */
UNIV_INLINE
void
log_block_set_data_len(
/*===================*/
- byte* log_block, /* in: log block */
- ulint len); /* in: data length */
-/****************************************************************
-Calculates the checksum for a log block. */
+ byte* log_block, /*!< in/out: log block */
+ ulint len); /*!< in: data length */
+/************************************************************//**
+Calculates the checksum for a log block.
+@return checksum */
UNIV_INLINE
ulint
log_block_calc_checksum(
/*====================*/
- /* out: checksum */
- byte* block); /* in: log block */
-/****************************************************************
-Gets a log block checksum field value. */
+ const byte* block); /*!< in: log block */
+/************************************************************//**
+Gets a log block checksum field value.
+@return checksum */
UNIV_INLINE
ulint
log_block_get_checksum(
/*===================*/
- /* out: checksum */
- byte* log_block); /* in: log block */
-/****************************************************************
+ const byte* log_block); /*!< in: log block */
+/************************************************************//**
Sets a log block checksum field value. */
UNIV_INLINE
void
log_block_set_checksum(
/*===================*/
- byte* log_block, /* in: log block */
- ulint checksum); /* in: checksum */
-/****************************************************************
-Gets a log block first mtr log record group offset. */
+ byte* log_block, /*!< in/out: log block */
+ ulint checksum); /*!< in: checksum */
+/************************************************************//**
+Gets a log block first mtr log record group offset.
+@return first mtr log record group byte offset from the block start, 0
+if none */
UNIV_INLINE
ulint
log_block_get_first_rec_group(
/*==========================*/
- /* out: first mtr log record group byte offset
- from the block start, 0 if none */
- byte* log_block); /* in: log block */
-/****************************************************************
+ const byte* log_block); /*!< in: log block */
+/************************************************************//**
Sets the log block first mtr log record group offset. */
UNIV_INLINE
void
log_block_set_first_rec_group(
/*==========================*/
- byte* log_block, /* in: log block */
- ulint offset); /* in: offset, 0 if none */
-/****************************************************************
-Gets a log block checkpoint number field (4 lowest bytes). */
+ byte* log_block, /*!< in/out: log block */
+ ulint offset); /*!< in: offset, 0 if none */
+/************************************************************//**
+Gets a log block checkpoint number field (4 lowest bytes).
+@return checkpoint no (4 lowest bytes) */
UNIV_INLINE
ulint
log_block_get_checkpoint_no(
/*========================*/
- /* out: checkpoint no (4 lowest bytes) */
- byte* log_block); /* in: log block */
-/****************************************************************
+ const byte* log_block); /*!< in: log block */
+/************************************************************//**
Initializes a log block in the log buffer. */
UNIV_INLINE
void
log_block_init(
/*===========*/
- byte* log_block, /* in: pointer to the log buffer */
- dulint lsn); /* in: lsn within the log block */
-/****************************************************************
+ byte* log_block, /*!< in: pointer to the log buffer */
+ ib_uint64_t lsn); /*!< in: lsn within the log block */
+/************************************************************//**
Initializes a log block in the log buffer in the old, < 3.23.52 format, where
there was no checksum yet. */
UNIV_INLINE
void
log_block_init_in_old_format(
/*=========================*/
- byte* log_block, /* in: pointer to the log buffer */
- dulint lsn); /* in: lsn within the log block */
-/****************************************************************
-Converts a lsn to a log block number. */
+ byte* log_block, /*!< in: pointer to the log buffer */
+ ib_uint64_t lsn); /*!< in: lsn within the log block */
+/************************************************************//**
+Converts a lsn to a log block number.
+@return log block number, it is > 0 and <= 1G */
UNIV_INLINE
ulint
log_block_convert_lsn_to_no(
/*========================*/
- /* out: log block number, it is > 0 and <= 1G */
- dulint lsn); /* in: lsn of a byte within the block */
-/**********************************************************
+ ib_uint64_t lsn); /*!< in: lsn of a byte within the block */
+/******************************************************//**
Prints info of the log. */
-
+UNIV_INTERN
void
log_print(
/*======*/
- FILE* file); /* in: file where to print */
-/**********************************************************
-Peeks the current lsn. */
-
+ FILE* file); /*!< in: file where to print */
+/******************************************************//**
+Peeks the current lsn.
+@return TRUE if success, FALSE if could not get the log system mutex */
+UNIV_INTERN
ibool
log_peek_lsn(
/*=========*/
- /* out: TRUE if success, FALSE if could not get the
- log system mutex */
- dulint* lsn); /* out: if returns TRUE, current lsn is here */
-/**************************************************************************
+ ib_uint64_t* lsn); /*!< out: if returns TRUE, current lsn is here */
+/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
void
log_refresh_stats(void);
/*===================*/
+/**********************************************************
+Shutdown the log system but do not release all the memory. */
+UNIV_INTERN
+void
+log_shutdown(void);
+/*==============*/
+/**********************************************************
+Free the log system data structures. */
+UNIV_INTERN
+void
+log_mem_free(void);
+/*==============*/
extern log_t* log_sys;
/* Values used as flags */
#define LOG_FLUSH 7652559
#define LOG_CHECKPOINT 78656949
-#define LOG_ARCHIVE 11122331
+#ifdef UNIV_LOG_ARCHIVE
+# define LOG_ARCHIVE 11122331
+#endif /* UNIV_LOG_ARCHIVE */
#define LOG_RECOVER 98887331
/* The counting of lsn's starts from this value: this must be non-zero */
-#define LOG_START_LSN ut_dulint_create(0, 16 * OS_FILE_LOG_BLOCK_SIZE)
+#define LOG_START_LSN ((ib_uint64_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE)
#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
@@ -564,7 +648,7 @@ extern log_t* log_sys;
#define LOG_CHECKPOINT_ARCHIVED_LSN 24
#define LOG_CHECKPOINT_GROUP_ARRAY 32
-/* For each value < LOG_MAX_N_GROUPS the following 8 bytes: */
+/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */
#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0
#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4
@@ -632,74 +716,84 @@ extern log_t* log_sys;
#define LOG_GROUP_OK 301
#define LOG_GROUP_CORRUPTED 302
-/* Log group consists of a number of log files, each of the same size; a log
+/** Log group consists of a number of log files, each of the same size; a log
group is implemented as a space in the sense of the module fil0fil. */
-
struct log_group_struct{
/* The following fields are protected by log_sys->mutex */
- ulint id; /* log group id */
- ulint n_files; /* number of files in the group */
- ulint file_size; /* individual log file size in bytes,
+ ulint id; /*!< log group id */
+ ulint n_files; /*!< number of files in the group */
+ ulint file_size; /*!< individual log file size in bytes,
including the log file header */
- ulint space_id; /* file space which implements the log
+ ulint space_id; /*!< file space which implements the log
group */
- ulint state; /* LOG_GROUP_OK or
+ ulint state; /*!< LOG_GROUP_OK or
LOG_GROUP_CORRUPTED */
- dulint lsn; /* lsn used to fix coordinates within
+ ib_uint64_t lsn; /*!< lsn used to fix coordinates within
the log group */
- ulint lsn_offset; /* the offset of the above lsn */
- ulint n_pending_writes;/* number of currently pending flush
+ ulint lsn_offset; /*!< the offset of the above lsn */
+ ulint n_pending_writes;/*!< number of currently pending flush
writes for this log group */
- byte** file_header_bufs;/* buffers for each file header in the
- group */
+ byte** file_header_bufs_ptr;/*!< unaligned buffers */
+ byte** file_header_bufs;/*!< buffers for each file
+ header in the group */
+#ifdef UNIV_LOG_ARCHIVE
/*-----------------------------*/
- byte** archive_file_header_bufs;/* buffers for each file
+ byte** archive_file_header_bufs_ptr;/*!< unaligned buffers */
+ byte** archive_file_header_bufs;/*!< buffers for each file
header in the group */
- ulint archive_space_id;/* file space which implements the log
- group archive */
- ulint archived_file_no;/* file number corresponding to
+ ulint archive_space_id;/*!< file space which
+ implements the log group
+ archive */
+ ulint archived_file_no;/*!< file number corresponding to
log_sys->archived_lsn */
- ulint archived_offset;/* file offset corresponding to
+ ulint archived_offset;/*!< file offset corresponding to
log_sys->archived_lsn, 0 if we have
not yet written to the archive file
number archived_file_no */
- ulint next_archived_file_no;/* during an archive write,
+ ulint next_archived_file_no;/*!< during an archive write,
until the write is completed, we
store the next value for
archived_file_no here: the write
completion function then sets the new
value to ..._file_no */
- ulint next_archived_offset; /* like the preceding field */
+ ulint next_archived_offset; /*!< like the preceding field */
+#endif /* UNIV_LOG_ARCHIVE */
/*-----------------------------*/
- dulint scanned_lsn; /* used only in recovery: recovery scan
+ ib_uint64_t scanned_lsn; /*!< used only in recovery: recovery scan
succeeded up to this lsn in this log
group */
- byte* checkpoint_buf; /* checkpoint header is written from
+ byte* checkpoint_buf_ptr;/*!< unaligned checkpoint header */
+ byte* checkpoint_buf; /*!< checkpoint header is written from
this buffer to the group */
UT_LIST_NODE_T(log_group_t)
- log_groups; /* list of log groups */
+ log_groups; /*!< list of log groups */
};
+/** Redo log buffer */
struct log_struct{
- byte pad[64]; /* padding to prevent other memory
+ byte pad[64]; /*!< padding to prevent other memory
update hotspots from residing on the
same memory cache line */
- dulint lsn; /* log sequence number */
- ulint buf_free; /* first free offset within the log
+ ib_uint64_t lsn; /*!< log sequence number */
+ ulint buf_free; /*!< first free offset within the log
buffer */
- mutex_t mutex; /* mutex protecting the log */
- byte* buf; /* log buffer */
- ulint buf_size; /* log buffer size in bytes */
- ulint max_buf_free; /* recommended maximum value of
+#ifndef UNIV_HOTBACKUP
+ mutex_t mutex; /*!< mutex protecting the log */
+#endif /* !UNIV_HOTBACKUP */
+ byte* buf_ptr; /* unaligned log buffer */
+ byte* buf; /*!< log buffer */
+ ulint buf_size; /*!< log buffer size in bytes */
+ ulint max_buf_free; /*!< recommended maximum value of
buf_free, after which the buffer is
flushed */
- ulint old_buf_free; /* value of buf free when log was
+ ulint old_buf_free; /*!< value of buf free when log was
last time opened; only in the debug
version */
- dulint old_lsn; /* value of lsn when log was last time
- opened; only in the debug version */
+ ib_uint64_t old_lsn; /*!< value of lsn when log was
+ last time opened; only in the
+ debug version */
ibool check_flush_or_checkpoint;
- /* this is set to TRUE when there may
+ /*!< this is set to TRUE when there may
be need to flush the log buffer, or
preflush buffer pool pages, or make
a checkpoint; this MUST be TRUE when
@@ -708,56 +802,59 @@ struct log_struct{
peeked at by log_free_check(), which
does not reserve the log mutex */
UT_LIST_BASE_NODE_T(log_group_t)
- log_groups; /* log groups */
+ log_groups; /*!< log groups */
- /* The fields involved in the log buffer flush */
+#ifndef UNIV_HOTBACKUP
+ /** The fields involved in the log buffer flush @{ */
- ulint buf_next_to_write;/* first offset in the log buffer
+ ulint buf_next_to_write;/*!< first offset in the log buffer
where the byte content may not exist
written to file, e.g., the start
offset of a log record catenated
later; this is advanced when a flush
operation is completed to all the log
groups */
- dulint written_to_some_lsn;
- /* first log sequence number not yet
+ ib_uint64_t written_to_some_lsn;
+ /*!< first log sequence number not yet
written to any log group; for this to
be advanced, it is enough that the
write i/o has been completed for any
one log group */
- dulint written_to_all_lsn;
- /* first log sequence number not yet
+ ib_uint64_t written_to_all_lsn;
+ /*!< first log sequence number not yet
written to some log group; for this to
be advanced, it is enough that the
write i/o has been completed for all
log groups */
- dulint write_lsn; /* end lsn for the current running
+ ib_uint64_t write_lsn; /*!< end lsn for the current running
write */
- ulint write_end_offset;/* the data in buffer has been written
- up to this offset when the current
- write ends: this field will then
- be copied to buf_next_to_write */
- dulint current_flush_lsn;/* end lsn for the current running
+ ulint write_end_offset;/*!< the data in buffer has
+ been written up to this offset
+ when the current write ends:
+ this field will then be copied
+ to buf_next_to_write */
+ ib_uint64_t current_flush_lsn;/*!< end lsn for the current running
write + flush operation */
- dulint flushed_to_disk_lsn;
- /* how far we have written the log
+ ib_uint64_t flushed_to_disk_lsn;
+ /*!< how far we have written the log
AND flushed to disk */
- ulint n_pending_writes;/* number of currently pending flushes
- or writes */
+ ulint n_pending_writes;/*!< number of currently
+ pending flushes or writes */
/* NOTE on the 'flush' in names of the fields below: starting from
4.0.14, we separate the write of the log file and the actual fsync()
or other method to flush it to disk. The names below shhould really
be 'flush_or_write'! */
- os_event_t no_flush_event; /* this event is in the reset state
+ os_event_t no_flush_event; /*!< this event is in the reset state
when a flush or a write is running;
a thread should wait for this without
owning the log mutex, but NOTE that
to set or reset this event, the
thread MUST own the log mutex! */
- ibool one_flushed; /* during a flush, this is first FALSE
- and becomes TRUE when one log group
- has been written or flushed */
- os_event_t one_flushed_event;/* this event is reset when the
+ ibool one_flushed; /*!< during a flush, this is
+ first FALSE and becomes TRUE
+ when one log group has been
+ written or flushed */
+ os_event_t one_flushed_event;/*!< this event is reset when the
flush or write has not yet completed
for any log group; e.g., this means
that a transaction has been committed
@@ -766,97 +863,111 @@ struct log_struct{
but NOTE that to set or reset this
event, the thread MUST own the log
mutex! */
- ulint n_log_ios; /* number of log i/os initiated thus
+ ulint n_log_ios; /*!< number of log i/os initiated thus
far */
- ulint n_log_ios_old; /* number of log i/o's at the
+ ulint n_log_ios_old; /*!< number of log i/o's at the
previous printout */
- time_t last_printout_time;/* when log_print was last time
+ time_t last_printout_time;/*!< when log_print was last time
called */
+ /* @} */
- /* Fields involved in checkpoints */
- ulint log_group_capacity; /* capacity of the log group; if
+ /** Fields involved in checkpoints @{ */
+ ulint log_group_capacity; /*!< capacity of the log group; if
the checkpoint age exceeds this, it is
a serious error because it is possible
we will then overwrite log and spoil
crash recovery */
ulint max_modified_age_async;
- /* when this recommended value for lsn
- - buf_pool_get_oldest_modification()
- is exceeded, we start an asynchronous
- preflush of pool pages */
+ /*!< when this recommended
+ value for lsn -
+ buf_pool_get_oldest_modification()
+ is exceeded, we start an
+ asynchronous preflush of pool pages */
ulint max_modified_age_sync;
- /* when this recommended value for lsn
- - buf_pool_get_oldest_modification()
- is exceeded, we start a synchronous
- preflush of pool pages */
+ /*!< when this recommended
+ value for lsn -
+ buf_pool_get_oldest_modification()
+ is exceeded, we start a
+ synchronous preflush of pool pages */
ulint adm_checkpoint_interval;
- /* administrator-specified checkpoint
+ /*!< administrator-specified checkpoint
interval in terms of log growth in
bytes; the interval actually used by
the database can be smaller */
ulint max_checkpoint_age_async;
- /* when this checkpoint age is exceeded
- we start an asynchronous writing of a
- new checkpoint */
+ /*!< when this checkpoint age
+ is exceeded we start an
+ asynchronous writing of a new
+ checkpoint */
ulint max_checkpoint_age;
- /* this is the maximum allowed value
+ /*!< this is the maximum allowed value
for lsn - last_checkpoint_lsn when a
new query step is started */
- dulint next_checkpoint_no;
- /* next checkpoint number */
- dulint last_checkpoint_lsn;
- /* latest checkpoint lsn */
- dulint next_checkpoint_lsn;
- /* next checkpoint lsn */
+ ib_uint64_t next_checkpoint_no;
+ /*!< next checkpoint number */
+ ib_uint64_t last_checkpoint_lsn;
+ /*!< latest checkpoint lsn */
+ ib_uint64_t next_checkpoint_lsn;
+ /*!< next checkpoint lsn */
ulint n_pending_checkpoint_writes;
- /* number of currently pending
+ /*!< number of currently pending
checkpoint writes */
- rw_lock_t checkpoint_lock;/* this latch is x-locked when a
+ rw_lock_t checkpoint_lock;/*!< this latch is x-locked when a
checkpoint write is running; a thread
should wait for this without owning
the log mutex */
- byte* checkpoint_buf; /* checkpoint header is read to this
+#endif /* !UNIV_HOTBACKUP */
+ byte* checkpoint_buf_ptr;/* unaligned checkpoint header */
+ byte* checkpoint_buf; /*!< checkpoint header is read to this
buffer */
- /* Fields involved in archiving */
- ulint archiving_state;/* LOG_ARCH_ON, LOG_ARCH_STOPPING
+ /* @} */
+#ifdef UNIV_LOG_ARCHIVE
+ /** Fields involved in archiving @{ */
+ ulint archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
LOG_ARCH_STOPPED, LOG_ARCH_OFF */
- dulint archived_lsn; /* archiving has advanced to this
+ ib_uint64_t archived_lsn; /*!< archiving has advanced to this
lsn */
ulint max_archived_lsn_age_async;
- /* recommended maximum age of
+ /*!< recommended maximum age of
archived_lsn, before we start
asynchronous copying to the archive */
ulint max_archived_lsn_age;
- /* maximum allowed age for
+ /*!< maximum allowed age for
archived_lsn */
- dulint next_archived_lsn;/* during an archive write,
+ ib_uint64_t next_archived_lsn;/*!< during an archive write,
until the write is completed, we
store the next value for
archived_lsn here: the write
completion function then sets the new
value to archived_lsn */
- ulint archiving_phase;/* LOG_ARCHIVE_READ or
+ ulint archiving_phase;/*!< LOG_ARCHIVE_READ or
LOG_ARCHIVE_WRITE */
ulint n_pending_archive_ios;
- /* number of currently pending reads
+ /*!< number of currently pending reads
or writes in archiving */
- rw_lock_t archive_lock; /* this latch is x-locked when an
+ rw_lock_t archive_lock; /*!< this latch is x-locked when an
archive write is running; a thread
should wait for this without owning
the log mutex */
- ulint archive_buf_size;/* size of archive_buf */
- byte* archive_buf; /* log segment is written to the
+ ulint archive_buf_size;/*!< size of archive_buf */
+ byte* archive_buf; /*!< log segment is written to the
archive from this buffer */
- os_event_t archiving_on; /* if archiving has been stopped,
+ os_event_t archiving_on; /*!< if archiving has been stopped,
a thread can wait for this event to
become signaled */
+ /* @} */
+#endif /* UNIV_LOG_ARCHIVE */
};
+#ifdef UNIV_LOG_ARCHIVE
+/** Archiving state @{ */
#define LOG_ARCH_ON 71
#define LOG_ARCH_STOPPING 72
#define LOG_ARCH_STOPPING2 73
#define LOG_ARCH_STOPPED 74
#define LOG_ARCH_OFF 75
+/* @} */
+#endif /* UNIV_LOG_ARCHIVE */
#ifndef UNIV_NONINL
#include "log0log.ic"
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index df0a8baf2d5..36d151a3064 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Database log
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1995 Innobase Oy
+/**************************************************//**
+@file include/log0log.ic
+Database log
Created 12/9/1995 Heikki Tuuri
*******************************************************/
@@ -10,27 +27,29 @@ Created 12/9/1995 Heikki Tuuri
#include "mach0data.h"
#include "mtr0mtr.h"
-/**********************************************************
+#ifdef UNIV_LOG_DEBUG
+/******************************************************//**
Checks by parsing that the catenated log segment for a single mtr is
consistent. */
-
+UNIV_INTERN
ibool
log_check_log_recs(
/*===============*/
- byte* buf, /* in: pointer to the start of the log segment
- in the log_sys->buf log buffer */
- ulint len, /* in: segment length in bytes */
- dulint buf_start_lsn); /* in: buffer start lsn */
-
-/****************************************************************
-Gets a log block flush bit. */
+ const byte* buf, /*!< in: pointer to the start of
+ the log segment in the
+ log_sys->buf log buffer */
+ ulint len, /*!< in: segment length in bytes */
+ ib_uint64_t buf_start_lsn); /*!< in: buffer start lsn */
+#endif /* UNIV_LOG_DEBUG */
+
+/************************************************************//**
+Gets a log block flush bit.
+@return TRUE if this block was the first to be written in a log flush */
UNIV_INLINE
ibool
log_block_get_flush_bit(
/*====================*/
- /* out: TRUE if this block was the first
- to be written in a log flush */
- byte* log_block) /* in: log block */
+ const byte* log_block) /*!< in: log block */
{
if (LOG_BLOCK_FLUSH_BIT_MASK
& mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) {
@@ -41,14 +60,14 @@ log_block_get_flush_bit(
return(FALSE);
}
-/****************************************************************
+/************************************************************//**
Sets the log block flush bit. */
UNIV_INLINE
void
log_block_set_flush_bit(
/*====================*/
- byte* log_block, /* in: log block */
- ibool val) /* in: value to set */
+ byte* log_block, /*!< in/out: log block */
+ ibool val) /*!< in: value to set */
{
ulint field;
@@ -63,29 +82,28 @@ log_block_set_flush_bit(
mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field);
}
-/****************************************************************
-Gets a log block number stored in the header. */
+/************************************************************//**
+Gets a log block number stored in the header.
+@return log block number stored in the block header */
UNIV_INLINE
ulint
log_block_get_hdr_no(
/*=================*/
- /* out: log block number stored in the block
- header */
- byte* log_block) /* in: log block */
+ const byte* log_block) /*!< in: log block */
{
return(~LOG_BLOCK_FLUSH_BIT_MASK
& mach_read_from_4(log_block + LOG_BLOCK_HDR_NO));
}
-/****************************************************************
+/************************************************************//**
Sets the log block number stored in the header; NOTE that this must be set
before the flush bit! */
UNIV_INLINE
void
log_block_set_hdr_no(
/*=================*/
- byte* log_block, /* in: log block */
- ulint n) /* in: log block number: must be > 0 and
+ byte* log_block, /*!< in/out: log block */
+ ulint n) /*!< in: log block number: must be > 0 and
< LOG_BLOCK_FLUSH_BIT_MASK */
{
ut_ad(n > 0);
@@ -94,109 +112,99 @@ log_block_set_hdr_no(
mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n);
}
-/****************************************************************
-Gets a log block data length. */
+/************************************************************//**
+Gets a log block data length.
+@return log block data length measured as a byte offset from the block start */
UNIV_INLINE
ulint
log_block_get_data_len(
/*===================*/
- /* out: log block data length measured as a
- byte offset from the block start */
- byte* log_block) /* in: log block */
+ const byte* log_block) /*!< in: log block */
{
return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN));
}
-/****************************************************************
+/************************************************************//**
Sets the log block data length. */
UNIV_INLINE
void
log_block_set_data_len(
/*===================*/
- byte* log_block, /* in: log block */
- ulint len) /* in: data length */
+ byte* log_block, /*!< in/out: log block */
+ ulint len) /*!< in: data length */
{
mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len);
}
-/****************************************************************
-Gets a log block first mtr log record group offset. */
+/************************************************************//**
+Gets a log block first mtr log record group offset.
+@return first mtr log record group byte offset from the block start, 0
+if none */
UNIV_INLINE
ulint
log_block_get_first_rec_group(
/*==========================*/
- /* out: first mtr log record group byte offset
- from the block start, 0 if none */
- byte* log_block) /* in: log block */
+ const byte* log_block) /*!< in: log block */
{
return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP));
}
-/****************************************************************
+/************************************************************//**
Sets the log block first mtr log record group offset. */
UNIV_INLINE
void
log_block_set_first_rec_group(
/*==========================*/
- byte* log_block, /* in: log block */
- ulint offset) /* in: offset, 0 if none */
+ byte* log_block, /*!< in/out: log block */
+ ulint offset) /*!< in: offset, 0 if none */
{
mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset);
}
-/****************************************************************
-Gets a log block checkpoint number field (4 lowest bytes). */
+/************************************************************//**
+Gets a log block checkpoint number field (4 lowest bytes).
+@return checkpoint no (4 lowest bytes) */
UNIV_INLINE
ulint
log_block_get_checkpoint_no(
/*========================*/
- /* out: checkpoint no (4 lowest bytes) */
- byte* log_block) /* in: log block */
+ const byte* log_block) /*!< in: log block */
{
return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO));
}
-/****************************************************************
+/************************************************************//**
Sets a log block checkpoint number field (4 lowest bytes). */
UNIV_INLINE
void
log_block_set_checkpoint_no(
/*========================*/
- byte* log_block, /* in: log block */
- dulint no) /* in: checkpoint no */
+ byte* log_block, /*!< in/out: log block */
+ ib_uint64_t no) /*!< in: checkpoint no */
{
- mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO,
- ut_dulint_get_low(no));
+ mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO, (ulint) no);
}
-/****************************************************************
-Converts a lsn to a log block number. */
+/************************************************************//**
+Converts a lsn to a log block number.
+@return log block number, it is > 0 and <= 1G */
UNIV_INLINE
ulint
log_block_convert_lsn_to_no(
/*========================*/
- /* out: log block number, it is > 0 and <= 1G */
- dulint lsn) /* in: lsn of a byte within the block */
+ ib_uint64_t lsn) /*!< in: lsn of a byte within the block */
{
- ulint no;
-
- no = ut_dulint_get_low(lsn) / OS_FILE_LOG_BLOCK_SIZE;
- no += (ut_dulint_get_high(lsn) % OS_FILE_LOG_BLOCK_SIZE)
- * 2 * (0x80000000UL / OS_FILE_LOG_BLOCK_SIZE);
-
- no = no & 0x3FFFFFFFUL;
-
- return(no + 1);
+ return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1);
}
-/****************************************************************
-Calculates the checksum for a log block. */
+/************************************************************//**
+Calculates the checksum for a log block.
+@return checksum */
UNIV_INLINE
ulint
log_block_calc_checksum(
/*====================*/
- /* out: checksum */
- byte* block) /* in: log block */
+ const byte* block) /*!< in: log block */
{
ulint sum;
ulint sh;
@@ -206,8 +214,10 @@ log_block_calc_checksum(
sh = 0;
for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
- sum = sum & 0x7FFFFFFFUL;
- sum += (((ulint)(*(block + i))) << sh) + (ulint)(*(block + i));
+ ulint b = (ulint) block[i];
+ sum &= 0x7FFFFFFFUL;
+ sum += b;
+ sum += b << sh;
sh++;
if (sh > 24) {
sh = 0;
@@ -217,41 +227,41 @@ log_block_calc_checksum(
return(sum);
}
-/****************************************************************
-Gets a log block checksum field value. */
+/************************************************************//**
+Gets a log block checksum field value.
+@return checksum */
UNIV_INLINE
ulint
log_block_get_checksum(
/*===================*/
- /* out: checksum */
- byte* log_block) /* in: log block */
+ const byte* log_block) /*!< in: log block */
{
return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- LOG_BLOCK_CHECKSUM));
}
-/****************************************************************
+/************************************************************//**
Sets a log block checksum field value. */
UNIV_INLINE
void
log_block_set_checksum(
/*===================*/
- byte* log_block, /* in: log block */
- ulint checksum) /* in: checksum */
+ byte* log_block, /*!< in/out: log block */
+ ulint checksum) /*!< in: checksum */
{
mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- LOG_BLOCK_CHECKSUM,
checksum);
}
-/****************************************************************
+/************************************************************//**
Initializes a log block in the log buffer. */
UNIV_INLINE
void
log_block_init(
/*===========*/
- byte* log_block, /* in: pointer to the log buffer */
- dulint lsn) /* in: lsn within the log block */
+ byte* log_block, /*!< in: pointer to the log buffer */
+ ib_uint64_t lsn) /*!< in: lsn within the log block */
{
ulint no;
@@ -265,15 +275,15 @@ log_block_init(
log_block_set_first_rec_group(log_block, 0);
}
-/****************************************************************
+/************************************************************//**
Initializes a log block in the log buffer in the old format, where there
was no checksum yet. */
UNIV_INLINE
void
log_block_init_in_old_format(
/*=========================*/
- byte* log_block, /* in: pointer to the log buffer */
- dulint lsn) /* in: lsn within the log block */
+ byte* log_block, /*!< in: pointer to the log buffer */
+ ib_uint64_t lsn) /*!< in: lsn within the log block */
{
ulint no;
@@ -288,69 +298,88 @@ log_block_init_in_old_format(
log_block_set_first_rec_group(log_block, 0);
}
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
Writes to the log the string given. The log must be released with
-log_release. */
+log_release.
+@return end lsn of the log record, zero if did not succeed */
UNIV_INLINE
-dulint
+ib_uint64_t
log_reserve_and_write_fast(
/*=======================*/
- /* out: end lsn of the log record, ut_dulint_zero if
- did not succeed */
- byte* str, /* in: string */
- ulint len, /* in: string length */
- dulint* start_lsn,/* out: start lsn of the log record */
- ibool* success)/* out: TRUE if success */
+ const void* str, /*!< in: string */
+ ulint len, /*!< in: string length */
+ ib_uint64_t* start_lsn)/*!< out: start lsn of the log record */
{
- log_t* log = log_sys;
- ulint data_len;
- dulint lsn;
-
- *success = TRUE;
-
- mutex_enter(&(log->mutex));
-
- data_len = len + log->buf_free % OS_FILE_LOG_BLOCK_SIZE;
+ ulint data_len;
+#ifdef UNIV_LOG_LSN_DEBUG
+ /* length of the LSN pseudo-record */
+ ulint lsn_len = 1
+ + mach_get_compressed_size(log_sys->lsn >> 32)
+ + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+#endif /* UNIV_LOG_LSN_DEBUG */
+
+ mutex_enter(&log_sys->mutex);
+
+ data_len = len
+#ifdef UNIV_LOG_LSN_DEBUG
+ + lsn_len
+#endif /* UNIV_LOG_LSN_DEBUG */
+ + log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE;
if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
/* The string does not fit within the current log block
or the log block would become full */
- *success = FALSE;
-
- mutex_exit(&(log->mutex));
+ mutex_exit(&log_sys->mutex);
- return(ut_dulint_zero);
+ return(0);
}
- *start_lsn = log->lsn;
-
- ut_memcpy(log->buf + log->buf_free, str, len);
+ *start_lsn = log_sys->lsn;
+
+#ifdef UNIV_LOG_LSN_DEBUG
+ {
+ /* Write the LSN pseudo-record. */
+ byte* b = &log_sys->buf[log_sys->buf_free];
+ *b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
+ /* Write the LSN in two parts,
+ as a pseudo page number and space id. */
+ b += mach_write_compressed(b, log_sys->lsn >> 32);
+ b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
+ ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
+
+ memcpy(b, str, len);
+ len += lsn_len;
+ }
+#else /* UNIV_LOG_LSN_DEBUG */
+ memcpy(log_sys->buf + log_sys->buf_free, str, len);
+#endif /* UNIV_LOG_LSN_DEBUG */
- log_block_set_data_len(ut_align_down(log->buf + log->buf_free,
- OS_FILE_LOG_BLOCK_SIZE),
+ log_block_set_data_len((byte*) ut_align_down(log_sys->buf
+ + log_sys->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE),
data_len);
#ifdef UNIV_LOG_DEBUG
- log->old_buf_free = log->buf_free;
- log->old_lsn = log->lsn;
+ log_sys->old_buf_free = log_sys->buf_free;
+ log_sys->old_lsn = log_sys->lsn;
#endif
- log->buf_free += len;
-
- ut_ad(log->buf_free <= log->buf_size);
+ log_sys->buf_free += len;
- lsn = ut_dulint_add(log->lsn, len);
+ ut_ad(log_sys->buf_free <= log_sys->buf_size);
- log->lsn = lsn;
+ log_sys->lsn += len;
#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log->buf + log->old_buf_free,
- log->buf_free - log->old_buf_free, log->old_lsn);
+ log_check_log_recs(log_sys->buf + log_sys->old_buf_free,
+ log_sys->buf_free - log_sys->old_buf_free,
+ log_sys->old_lsn);
#endif
- return(lsn);
+ return(log_sys->lsn);
}
-/***************************************************************************
+/***********************************************************************//**
Releases the log mutex. */
UNIV_INLINE
void
@@ -360,15 +389,15 @@ log_release(void)
mutex_exit(&(log_sys->mutex));
}
-/****************************************************************
-Gets the current lsn. */
+/************************************************************//**
+Gets the current lsn.
+@return current lsn */
UNIV_INLINE
-dulint
+ib_uint64_t
log_get_lsn(void)
/*=============*/
- /* out: current lsn */
{
- dulint lsn;
+ ib_uint64_t lsn;
mutex_enter(&(log_sys->mutex));
@@ -379,7 +408,19 @@ log_get_lsn(void)
return(lsn);
}
-/***************************************************************************
+/****************************************************************
+Gets the log group capacity. It is OK to read the value without
+holding log_sys->mutex because it is constant.
+@return log group capacity */
+UNIV_INLINE
+ulint
+log_get_capacity(void)
+/*==================*/
+{
+ return(log_sys->log_group_capacity);
+}
+
+/***********************************************************************//**
Checks if there is need for a log buffer flush or a new checkpoint, and does
this if yes. Any database operation should call this when it has modified
more than about 4 pages. NOTE that this function may only be called when the
@@ -396,3 +437,4 @@ log_free_check(void)
log_check_margins();
}
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 091bbe34562..a3d2bd050f5 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -1,7 +1,24 @@
-/******************************************************
-Recovery
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0recv.h
+Recovery
Created 9/20/1997 Heikki Tuuri
*******************************************************/
@@ -11,173 +28,250 @@ Created 9/20/1997 Heikki Tuuri
#include "univ.i"
#include "ut0byte.h"
-#include "page0types.h"
+#include "buf0types.h"
#include "hash0hash.h"
#include "log0log.h"
#ifdef UNIV_HOTBACKUP
extern ibool recv_replay_file_ops;
-#endif /* UNIV_HOTBACKUP */
-
-/***********************************************************************
-Reads the checkpoint info needed in hot backup. */
+/*******************************************************************//**
+Reads the checkpoint info needed in hot backup.
+@return TRUE if success */
+UNIV_INTERN
ibool
recv_read_cp_info_for_backup(
/*=========================*/
- /* out: TRUE if success */
- byte* hdr, /* in: buffer containing the log group header */
- dulint* lsn, /* out: checkpoint lsn */
- ulint* offset, /* out: checkpoint offset in the log group */
- ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
- database is running with < version 3.23.50 of InnoDB */
- dulint* cp_no, /* out: checkpoint number */
- dulint* first_header_lsn);
- /* out: lsn of of the start of the first log file */
-/***********************************************************************
+ const byte* hdr, /*!< in: buffer containing the log group
+ header */
+ ib_uint64_t* lsn, /*!< out: checkpoint lsn */
+ ulint* offset, /*!< out: checkpoint offset in the log group */
+ ulint* fsp_limit,/*!< out: fsp limit of space 0,
+ 1000000000 if the database is running
+ with < version 3.23.50 of InnoDB */
+ ib_uint64_t* cp_no, /*!< out: checkpoint number */
+ ib_uint64_t* first_header_lsn);
+ /*!< out: lsn of of the start of the
+ first log file */
+/*******************************************************************//**
Scans the log segment and n_bytes_scanned is set to the length of valid
log scanned. */
-
+UNIV_INTERN
void
recv_scan_log_seg_for_backup(
/*=========================*/
- byte* buf, /* in: buffer containing log data */
- ulint buf_len, /* in: data length in that buffer */
- dulint* scanned_lsn, /* in/out: lsn of buffer start,
+ byte* buf, /*!< in: buffer containing log data */
+ ulint buf_len, /*!< in: data length in that buffer */
+ ib_uint64_t* scanned_lsn, /*!< in/out: lsn of buffer start,
we return scanned lsn */
ulint* scanned_checkpoint_no,
- /* in/out: 4 lowest bytes of the
+ /*!< in/out: 4 lowest bytes of the
highest scanned checkpoint number so
far */
- ulint* n_bytes_scanned);/* out: how much we were able to
+ ulint* n_bytes_scanned);/*!< out: how much we were able to
scan, smaller than buf_len if log
data ended here */
-/***********************************************************************
-Returns TRUE if recovery is currently running. */
+#endif /* UNIV_HOTBACKUP */
+/*******************************************************************//**
+Returns TRUE if recovery is currently running.
+@return recv_recovery_on */
UNIV_INLINE
ibool
recv_recovery_is_on(void);
/*=====================*/
-/***********************************************************************
-Returns TRUE if recovery from backup is currently running. */
+#ifdef UNIV_LOG_ARCHIVE
+/*******************************************************************//**
+Returns TRUE if recovery from backup is currently running.
+@return recv_recovery_from_backup_on */
UNIV_INLINE
ibool
recv_recovery_from_backup_is_on(void);
/*=================================*/
-/****************************************************************************
+#endif /* UNIV_LOG_ARCHIVE */
+/************************************************************************//**
Applies the hashed log records to the page, if the page lsn is less than the
lsn of a log record. This can be called when a buffer page has just been
read in, or also for a page already in the buffer pool. */
-
+UNIV_INTERN
void
-recv_recover_page(
-/*==============*/
- ibool recover_backup, /* in: TRUE if we are recovering a backup
- page: then we do not acquire any latches
- since the page was read in outside the
- buffer pool */
- ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
- a freshly read page */
- page_t* page, /* in: buffer page */
- ulint space, /* in: space id */
- ulint page_no); /* in: page number */
-/************************************************************
+recv_recover_page_func(
+/*===================*/
+#ifndef UNIV_HOTBACKUP
+ ibool just_read_in,
+ /*!< in: TRUE if the i/o handler calls
+ this for a freshly read page */
+#endif /* !UNIV_HOTBACKUP */
+ buf_block_t* block); /*!< in/out: buffer block */
+#ifndef UNIV_HOTBACKUP
+/** Wrapper for recv_recover_page_func().
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool.
+@param jri in: TRUE if just read in (the i/o handler calls this for
+a freshly read page)
+@param block in/out: the buffer block
+*/
+# define recv_recover_page(jri, block) recv_recover_page_func(jri, block)
+#else /* !UNIV_HOTBACKUP */
+/** Wrapper for recv_recover_page_func().
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool.
+@param jri in: TRUE if just read in (the i/o handler calls this for
+a freshly read page)
+@param block in/out: the buffer block
+*/
+# define recv_recover_page(jri, block) recv_recover_page_func(block)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************//**
Recovers from a checkpoint. When this function returns, the database is able
to start processing of new user transactions, but the function
recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it. */
-
+the recovery and free the resources used in it.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
-recv_recovery_from_checkpoint_start(
-/*================================*/
- /* out: error code or DB_SUCCESS */
- ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
- dulint limit_lsn, /* in: recover up to this lsn if possible */
- dulint min_flushed_lsn,/* in: min flushed lsn from data files */
- dulint max_flushed_lsn);/* in: max flushed lsn from data files */
-/************************************************************
+recv_recovery_from_checkpoint_start_func(
+/*=====================================*/
+#ifdef UNIV_LOG_ARCHIVE
+ ulint type, /*!< in: LOG_CHECKPOINT or
+ LOG_ARCHIVE */
+ ib_uint64_t limit_lsn, /*!< in: recover up to this lsn
+ if possible */
+#endif /* UNIV_LOG_ARCHIVE */
+ ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn from
+ data files */
+ ib_uint64_t max_flushed_lsn);/*!< in: max flushed lsn from
+ data files */
+#ifdef UNIV_LOG_ARCHIVE
+/** Wrapper for recv_recovery_from_checkpoint_start_func().
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it.
+@param type in: LOG_CHECKPOINT or LOG_ARCHIVE
+@param lim in: recover up to this log sequence number if possible
+@param min in: minimum flushed log sequence number from data files
+@param max in: maximum flushed log sequence number from data files
+@return error code or DB_SUCCESS */
+# define recv_recovery_from_checkpoint_start(type,lim,min,max) \
+ recv_recovery_from_checkpoint_start_func(type,lim,min,max)
+#else /* UNIV_LOG_ARCHIVE */
+/** Wrapper for recv_recovery_from_checkpoint_start_func().
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it.
+@param type ignored: LOG_CHECKPOINT or LOG_ARCHIVE
+@param lim ignored: recover up to this log sequence number if possible
+@param min in: minimum flushed log sequence number from data files
+@param max in: maximum flushed log sequence number from data files
+@return error code or DB_SUCCESS */
+# define recv_recovery_from_checkpoint_start(type,lim,min,max) \
+ recv_recovery_from_checkpoint_start_func(min,max)
+#endif /* UNIV_LOG_ARCHIVE */
+/********************************************************//**
Completes recovery from a checkpoint. */
-
+UNIV_INTERN
void
recv_recovery_from_checkpoint_finish(void);
/*======================================*/
-/***********************************************************
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-
+/*******************************************************//**
+Scans log from a buffer and stores new log data to the parsing buffer.
+Parses and hashes the log records if new data found. Unless
+UNIV_HOTBACKUP is defined, this function will apply log records
+automatically when the hash table becomes full.
+@return TRUE if limit_lsn has been reached, or not able to scan any
+more in this log group */
+UNIV_INTERN
ibool
recv_scan_log_recs(
/*===============*/
- /* out: TRUE if limit_lsn has been reached, or
- not able to scan any more in this log group */
- ibool apply_automatically,/* in: TRUE if we want this function to
- apply log records automatically when the
- hash table becomes full; in the hot backup tool
- the tool does the applying, not this
- function */
- ulint available_memory,/* in: we let the hash table of recs to grow
- to this size, at the maximum */
- ibool store_to_hash, /* in: TRUE if the records should be stored
- to the hash table; this is set to FALSE if just
- debug checking is needed */
- byte* buf, /* in: buffer containing a log segment or
- garbage */
- ulint len, /* in: buffer length */
- dulint start_lsn, /* in: buffer start lsn */
- dulint* contiguous_lsn, /* in/out: it is known that all log groups
- contain contiguous log data up to this lsn */
- dulint* group_scanned_lsn);/* out: scanning succeeded up to this lsn */
-/**********************************************************
+ ulint available_memory,/*!< in: we let the hash table of recs
+ to grow to this size, at the maximum */
+ ibool store_to_hash, /*!< in: TRUE if the records should be
+ stored to the hash table; this is set
+ to FALSE if just debug checking is
+ needed */
+ const byte* buf, /*!< in: buffer containing a log
+ segment or garbage */
+ ulint len, /*!< in: buffer length */
+ ib_uint64_t start_lsn, /*!< in: buffer start lsn */
+ ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
+ groups contain contiguous log data up
+ to this lsn */
+ ib_uint64_t* group_scanned_lsn);/*!< out: scanning succeeded up to
+ this lsn */
+/******************************************************//**
Resets the logs. The contents of log files will be lost! */
-
+UNIV_INTERN
void
recv_reset_logs(
/*============*/
- dulint lsn, /* in: reset to this lsn rounded up to
- be divisible by OS_FILE_LOG_BLOCK_SIZE,
- after which we add LOG_BLOCK_HDR_SIZE */
+ ib_uint64_t lsn, /*!< in: reset to this lsn
+ rounded up to be divisible by
+ OS_FILE_LOG_BLOCK_SIZE, after
+ which we add
+ LOG_BLOCK_HDR_SIZE */
#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /* in: next archived log file number */
+ ulint arch_log_no, /*!< in: next archived log file number */
#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created);/* in: TRUE if resetting logs is done
- at the log creation; FALSE if it is done
- after archive recovery */
+ ibool new_logs_created);/*!< in: TRUE if resetting logs
+ is done at the log creation;
+ FALSE if it is done after
+ archive recovery */
#ifdef UNIV_HOTBACKUP
-/**********************************************************
+/******************************************************//**
Creates new log files after a backup has been restored. */
-
+UNIV_INTERN
void
recv_reset_log_files_for_backup(
/*============================*/
- const char* log_dir, /* in: log file directory path */
- ulint n_log_files, /* in: number of log files */
- ulint log_file_size, /* in: log file size */
- dulint lsn); /* in: new start lsn, must be
+ const char* log_dir, /*!< in: log file directory path */
+ ulint n_log_files, /*!< in: number of log files */
+ ulint log_file_size, /*!< in: log file size */
+ ib_uint64_t lsn); /*!< in: new start lsn, must be
divisible by OS_FILE_LOG_BLOCK_SIZE */
#endif /* UNIV_HOTBACKUP */
-/************************************************************
+/********************************************************//**
Creates the recovery system. */
-
+UNIV_INTERN
void
recv_sys_create(void);
/*=================*/
-/************************************************************
+/**********************************************************//**
+Release recovery system mutexes. */
+UNIV_INTERN
+void
+recv_sys_close(void);
+/*================*/
+/********************************************************//**
+Frees the recovery system memory. */
+UNIV_INTERN
+void
+recv_sys_mem_free(void);
+/*===================*/
+/********************************************************//**
Inits the recovery system for a recovery operation. */
-
+UNIV_INTERN
void
recv_sys_init(
/*==========*/
- ibool recover_from_backup, /* in: TRUE if this is called
- to recover from a hot backup */
- ulint available_memory); /* in: available memory in bytes */
-/***********************************************************************
+ ulint available_memory); /*!< in: available memory in bytes */
+/********************************************************//**
+Reset the state of the recovery system variables. */
+UNIV_INTERN
+void
+recv_sys_var_init(void);
+/*===================*/
+/*******************************************************************//**
Empties the hash table of stored log records, applying them to appropriate
pages. */
-
+UNIV_INTERN
void
recv_apply_hashed_log_recs(
/*=======================*/
- ibool allow_ibuf); /* in: if TRUE, also ibuf operations are
+ ibool allow_ibuf); /*!< in: if TRUE, also ibuf operations are
allowed during the application; if FALSE,
no ibuf operations are allowed, and after
the application all file pages are flushed to
@@ -185,161 +279,207 @@ recv_apply_hashed_log_recs(
alternative means that no new log records
can be generated during the application */
#ifdef UNIV_HOTBACKUP
-/***********************************************************************
+/*******************************************************************//**
Applies log records in the hash table to a backup. */
-
+UNIV_INTERN
void
recv_apply_log_recs_for_backup(void);
/*================================*/
#endif
#ifdef UNIV_LOG_ARCHIVE
-/************************************************************
-Recovers from archived log files, and also from log files, if they exist. */
-
+/********************************************************//**
+Recovers from archived log files, and also from log files, if they exist.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
recv_recovery_from_archive_start(
/*=============================*/
- /* out: error code or DB_SUCCESS */
- dulint min_flushed_lsn,/* in: min flushed lsn field from the
- data files */
- dulint limit_lsn, /* in: recover up to this lsn if possible */
- ulint first_log_no); /* in: number of the first archived log file
- to use in the recovery; the file will be
- searched from INNOBASE_LOG_ARCH_DIR specified
- in server config file */
-/************************************************************
+ ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the
+ data files */
+ ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if
+ possible */
+ ulint first_log_no); /*!< in: number of the first archived
+ log file to use in the recovery; the
+ file will be searched from
+ INNOBASE_LOG_ARCH_DIR specified in
+ server config file */
+/********************************************************//**
Completes recovery from archive. */
-
+UNIV_INTERN
void
recv_recovery_from_archive_finish(void);
/*===================================*/
#endif /* UNIV_LOG_ARCHIVE */
-/* Block of log record data */
+/** Block of log record data */
typedef struct recv_data_struct recv_data_t;
+/** Block of log record data */
struct recv_data_struct{
- recv_data_t* next; /* pointer to the next block or NULL */
- /* the log record data is stored physically
+ recv_data_t* next; /*!< pointer to the next block or NULL */
+ /*!< the log record data is stored physically
immediately after this struct, max amount
RECV_DATA_BLOCK_SIZE bytes of it */
};
-/* Stored log record struct */
+/** Stored log record struct */
typedef struct recv_struct recv_t;
+/** Stored log record struct */
struct recv_struct{
- byte type; /* log record type */
- ulint len; /* log record body length in bytes */
- recv_data_t* data; /* chain of blocks containing the log record
+ byte type; /*!< log record type */
+ ulint len; /*!< log record body length in bytes */
+ recv_data_t* data; /*!< chain of blocks containing the log record
body */
- dulint start_lsn;/* start lsn of the log segment written by
+ ib_uint64_t start_lsn;/*!< start lsn of the log segment written by
the mtr which generated this log record: NOTE
that this is not necessarily the start lsn of
this log record */
- dulint end_lsn;/* end lsn of the log segment written by
+ ib_uint64_t end_lsn;/*!< end lsn of the log segment written by
the mtr which generated this log record: NOTE
that this is not necessarily the end lsn of
this log record */
UT_LIST_NODE_T(recv_t)
- rec_list;/* list of log records for this page */
+ rec_list;/*!< list of log records for this page */
+};
+
+/** States of recv_addr_struct */
+enum recv_addr_state {
+ /** not yet processed */
+ RECV_NOT_PROCESSED,
+ /** page is being read */
+ RECV_BEING_READ,
+ /** log records are being applied on the page */
+ RECV_BEING_PROCESSED,
+ /** log records have been applied on the page, or they have
+ been discarded because the tablespace does not exist */
+ RECV_PROCESSED
};
-/* Hashed page file address struct */
+/** Hashed page file address struct */
typedef struct recv_addr_struct recv_addr_t;
+/** Hashed page file address struct */
struct recv_addr_struct{
- ulint state; /* RECV_NOT_PROCESSED, RECV_BEING_PROCESSED,
- or RECV_PROCESSED */
- ulint space; /* space id */
- ulint page_no;/* page number */
+ enum recv_addr_state state;
+ /*!< recovery state of the page */
+ ulint space; /*!< space id */
+ ulint page_no;/*!< page number */
UT_LIST_BASE_NODE_T(recv_t)
- rec_list;/* list of log records for this page */
- hash_node_t addr_hash;
+ rec_list;/*!< list of log records for this page */
+ hash_node_t addr_hash;/*!< hash node in the hash bucket chain */
};
-/* Recovery system data structure */
+/** Recovery system data structure */
typedef struct recv_sys_struct recv_sys_t;
+/** Recovery system data structure */
struct recv_sys_struct{
- mutex_t mutex; /* mutex protecting the fields apply_log_recs,
+#ifndef UNIV_HOTBACKUP
+ mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
n_addrs, and the state field in each recv_addr
struct */
+#endif /* !UNIV_HOTBACKUP */
ibool apply_log_recs;
- /* this is TRUE when log rec application to
+ /*!< this is TRUE when log rec application to
pages is allowed; this flag tells the
i/o-handler if it should do log record
application */
ibool apply_batch_on;
- /* this is TRUE when a log rec application
+ /*!< this is TRUE when a log rec application
batch is running */
- dulint lsn; /* log sequence number */
+ ib_uint64_t lsn; /*!< log sequence number */
ulint last_log_buf_size;
- /* size of the log buffer when the database
+ /*!< size of the log buffer when the database
last time wrote to the log */
byte* last_block;
- /* possible incomplete last recovered log
+ /*!< possible incomplete last recovered log
block */
byte* last_block_buf_start;
- /* the nonaligned start address of the
+ /*!< the nonaligned start address of the
preceding buffer */
- byte* buf; /* buffer for parsing log records */
- ulint len; /* amount of data in buf */
- dulint parse_start_lsn;
- /* this is the lsn from which we were able to
+ byte* buf; /*!< buffer for parsing log records */
+ ulint len; /*!< amount of data in buf */
+ ib_uint64_t parse_start_lsn;
+ /*!< this is the lsn from which we were able to
start parsing log records and adding them to
- the hash table; ut_dulint_zero if a suitable
+ the hash table; zero if a suitable
start point not found yet */
- dulint scanned_lsn;
- /* the log data has been scanned up to this
+ ib_uint64_t scanned_lsn;
+ /*!< the log data has been scanned up to this
lsn */
ulint scanned_checkpoint_no;
- /* the log data has been scanned up to this
+ /*!< the log data has been scanned up to this
checkpoint number (lowest 4 bytes) */
ulint recovered_offset;
- /* start offset of non-parsed log records in
+ /*!< start offset of non-parsed log records in
buf */
- dulint recovered_lsn;
- /* the log records have been parsed up to
+ ib_uint64_t recovered_lsn;
+ /*!< the log records have been parsed up to
this lsn */
- dulint limit_lsn;/* recovery should be made at most up to this
- lsn */
+ ib_uint64_t limit_lsn;/*!< recovery should be made at most
+ up to this lsn */
ibool found_corrupt_log;
- /* this is set to TRUE if we during log
+ /*!< this is set to TRUE if we during log
scan find a corrupt log block, or a corrupt
log record, or there is a log parsing
buffer overflow */
+#ifdef UNIV_LOG_ARCHIVE
log_group_t* archive_group;
- /* in archive recovery: the log group whose
+ /*!< in archive recovery: the log group whose
archive is read */
- mem_heap_t* heap; /* memory heap of log records and file
+#endif /* !UNIV_LOG_ARCHIVE */
+ mem_heap_t* heap; /*!< memory heap of log records and file
addresses*/
- hash_table_t* addr_hash;/* hash table of file addresses of pages */
- ulint n_addrs;/* number of not processed hashed file
+ hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
+ ulint n_addrs;/*!< number of not processed hashed file
addresses in the hash table */
};
+/** The recovery system */
extern recv_sys_t* recv_sys;
+
+/** TRUE when applying redo log records during crash recovery; FALSE
+otherwise. Note that this is FALSE while a background thread is
+rolling back incomplete transactions. */
extern ibool recv_recovery_on;
+/** If the following is TRUE, the buffer pool file pages must be invalidated
+after recovery and no ibuf operations are allowed; this becomes TRUE if
+the log record hash table becomes too full, and log records must be merged
+to file pages already before the recovery is finished: in this case no
+ibuf operations are allowed, as they could modify the pages read in the
+buffer pool before the pages have been recovered to the up-to-date state.
+
+TRUE means that recovery is running and no operations on the log files
+are allowed yet: the variable name is misleading. */
extern ibool recv_no_ibuf_operations;
+/** TRUE when recv_init_crash_recovery() has been called. */
extern ibool recv_needed_recovery;
-
+#ifdef UNIV_DEBUG
+/** TRUE if writing to the redo log (mtr_commit) is forbidden.
+Protected by log_sys->mutex. */
+extern ibool recv_no_log_write;
+#endif /* UNIV_DEBUG */
+
+/** TRUE if buf_page_is_corrupted() should check if the log sequence
+number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
+recv_recovery_from_checkpoint_start_func(). */
extern ibool recv_lsn_checks_on;
#ifdef UNIV_HOTBACKUP
+/** TRUE when the redo log is being backed up */
extern ibool recv_is_making_a_backup;
#endif /* UNIV_HOTBACKUP */
+/** Maximum page number encountered in the redo log */
extern ulint recv_max_parsed_page_no;
-/* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
+/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
times! */
#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024)
-/* Size of block reads when the log groups are scanned forward to do a
+/** Size of block reads when the log groups are scanned forward to do a
roll-forward */
#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
-/* States of recv_addr_struct */
-#define RECV_NOT_PROCESSED 71
-#define RECV_BEING_READ 72
-#define RECV_BEING_PROCESSED 73
-#define RECV_PROCESSED 74
-
+/** This many frames must be left free in the buffer pool when we scan
+the log and store the scanned log records in the buffer pool: we will
+use these free frames to read in pages when we start applying the
+log records to the database. */
extern ulint recv_n_pool_free_frames;
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic
index 489641bade2..0a8e55b96fa 100644
--- a/storage/innobase/include/log0recv.ic
+++ b/storage/innobase/include/log0recv.ic
@@ -1,30 +1,48 @@
-/******************************************************
-Recovery
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1997 Innobase Oy
+/**************************************************//**
+@file include/log0recv.ic
+Recovery
Created 9/20/1997 Heikki Tuuri
*******************************************************/
-#include "sync0sync.h"
-#include "mem0mem.h"
-#include "log0log.h"
-#include "os0file.h"
-
-extern ibool recv_recovery_from_backup_on;
+#include "univ.i"
-/***********************************************************************
-Returns TRUE if recovery is currently running. */
+/*******************************************************************//**
+Returns TRUE if recovery is currently running.
+@return recv_recovery_on */
UNIV_INLINE
ibool
recv_recovery_is_on(void)
/*=====================*/
{
- return(recv_recovery_on);
+ return(UNIV_UNLIKELY(recv_recovery_on));
}
-/***********************************************************************
-Returns TRUE if recovery from backup is currently running. */
+#ifdef UNIV_LOG_ARCHIVE
+/** TRUE when applying redo log records from an archived log file */
+extern ibool recv_recovery_from_backup_on;
+
+/*******************************************************************//**
+Returns TRUE if recovery from backup is currently running.
+@return recv_recovery_from_backup_on */
UNIV_INLINE
ibool
recv_recovery_from_backup_is_on(void)
@@ -32,4 +50,4 @@ recv_recovery_from_backup_is_on(void)
{
return(recv_recovery_from_backup_on);
}
-
+#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
index d6e040ba9ca..44ee3df22ce 100644
--- a/storage/innobase/include/mach0data.h
+++ b/storage/innobase/include/mach0data.h
@@ -1,9 +1,26 @@
-/**********************************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/mach0data.h
Utilities for converting data from the database file
to the machine format.
-(c) 1995 Innobase Oy
-
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
@@ -18,326 +35,364 @@ in the same format: ascii, big-endian, ... .
All data in the files MUST be accessed using the functions in this
module. */
-/***********************************************************
+/*******************************************************//**
The following function is used to store data in one byte. */
UNIV_INLINE
void
mach_write_to_1(
/*============*/
- byte* b, /* in: pointer to byte where to store */
- ulint n); /* in: ulint integer to be stored, >= 0, < 256 */
-/************************************************************
-The following function is used to fetch data from one byte. */
+ byte* b, /*!< in: pointer to byte where to store */
+ ulint n); /*!< in: ulint integer to be stored, >= 0, < 256 */
+/********************************************************//**
+The following function is used to fetch data from one byte.
+@return ulint integer, >= 0, < 256 */
UNIV_INLINE
ulint
mach_read_from_1(
/*=============*/
- /* out: ulint integer, >= 0, < 256 */
- byte* b); /* in: pointer to byte */
-/***********************************************************
+ const byte* b) /*!< in: pointer to byte */
+ __attribute__((nonnull, pure));
+/*******************************************************//**
The following function is used to store data in two consecutive
bytes. We store the most significant byte to the lower address. */
UNIV_INLINE
void
mach_write_to_2(
/*============*/
- byte* b, /* in: pointer to two bytes where to store */
- ulint n); /* in: ulint integer to be stored, >= 0, < 64k */
-/************************************************************
+ byte* b, /*!< in: pointer to two bytes where to store */
+ ulint n); /*!< in: ulint integer to be stored, >= 0, < 64k */
+/********************************************************//**
The following function is used to fetch data from two consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return ulint integer, >= 0, < 64k */
UNIV_INLINE
ulint
mach_read_from_2(
/*=============*/
- /* out: ulint integer, >= 0, < 64k */
- byte* b); /* in: pointer to two bytes */
+ const byte* b) /*!< in: pointer to two bytes */
+ __attribute__((nonnull, pure));
-/************************************************************
+/********************************************************//**
The following function is used to convert a 16-bit data item
to the canonical format, for fast bytewise equality test
-against memory. */
+against memory.
+@return 16-bit integer in canonical format */
UNIV_INLINE
uint16
mach_encode_2(
/*==========*/
- /* out: 16-bit integer in canonical format */
- ulint n); /* in: integer in machine-dependent format */
-/************************************************************
+ ulint n) /*!< in: integer in machine-dependent format */
+ __attribute__((const));
+/********************************************************//**
The following function is used to convert a 16-bit data item
from the canonical format, for fast bytewise equality test
-against memory. */
+against memory.
+@return integer in machine-dependent format */
UNIV_INLINE
ulint
mach_decode_2(
/*==========*/
- /* out: integer in machine-dependent format */
- uint16 n); /* in: 16-bit integer in canonical format */
-/***********************************************************
+ uint16 n) /*!< in: 16-bit integer in canonical format */
+ __attribute__((const));
+/*******************************************************//**
The following function is used to store data in 3 consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_3(
/*============*/
- byte* b, /* in: pointer to 3 bytes where to store */
- ulint n); /* in: ulint integer to be stored */
-/************************************************************
+ byte* b, /*!< in: pointer to 3 bytes where to store */
+ ulint n); /*!< in: ulint integer to be stored */
+/********************************************************//**
The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return ulint integer */
UNIV_INLINE
ulint
mach_read_from_3(
/*=============*/
- /* out: ulint integer */
- byte* b); /* in: pointer to 3 bytes */
-/***********************************************************
+ const byte* b) /*!< in: pointer to 3 bytes */
+ __attribute__((nonnull, pure));
+/*******************************************************//**
The following function is used to store data in four consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_4(
/*============*/
- byte* b, /* in: pointer to four bytes where to store */
- ulint n); /* in: ulint integer to be stored */
-/************************************************************
+ byte* b, /*!< in: pointer to four bytes where to store */
+ ulint n); /*!< in: ulint integer to be stored */
+/********************************************************//**
The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return ulint integer */
UNIV_INLINE
ulint
mach_read_from_4(
/*=============*/
- /* out: ulint integer */
- byte* b); /* in: pointer to four bytes */
-/*************************************************************
-Writes a ulint in a compressed form (1..5 bytes). */
+ const byte* b) /*!< in: pointer to four bytes */
+ __attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a ulint in a compressed form (1..5 bytes).
+@return stored size in bytes */
UNIV_INLINE
ulint
mach_write_compressed(
/*==================*/
- /* out: stored size in bytes */
- byte* b, /* in: pointer to memory where to store */
- ulint n); /* in: ulint integer to be stored */
-/*************************************************************
-Returns the size of an ulint when written in the compressed form. */
+ byte* b, /*!< in: pointer to memory where to store */
+ ulint n); /*!< in: ulint integer to be stored */
+/*********************************************************//**
+Returns the size of an ulint when written in the compressed form.
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_get_compressed_size(
/*=====================*/
- /* out: compressed size in bytes */
- ulint n); /* in: ulint integer to be stored */
-/*************************************************************
-Reads a ulint in a compressed form. */
+ ulint n) /*!< in: ulint integer to be stored */
+ __attribute__((const));
+/*********************************************************//**
+Reads a ulint in a compressed form.
+@return read integer */
UNIV_INLINE
ulint
mach_read_compressed(
/*=================*/
- /* out: read integer */
- byte* b); /* in: pointer to memory from where to read */
-/***********************************************************
+ const byte* b) /*!< in: pointer to memory from where to read */
+ __attribute__((nonnull, pure));
+/*******************************************************//**
The following function is used to store data in 6 consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_6(
/*============*/
- byte* b, /* in: pointer to 6 bytes where to store */
- dulint n); /* in: dulint integer to be stored */
-/************************************************************
+ byte* b, /*!< in: pointer to 6 bytes where to store */
+ dulint n); /*!< in: dulint integer to be stored */
+/********************************************************//**
The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return dulint integer */
UNIV_INLINE
dulint
mach_read_from_6(
/*=============*/
- /* out: dulint integer */
- byte* b); /* in: pointer to 6 bytes */
-/***********************************************************
+ const byte* b) /*!< in: pointer to 6 bytes */
+ __attribute__((nonnull, pure));
+/*******************************************************//**
The following function is used to store data in 7 consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_7(
/*============*/
- byte* b, /* in: pointer to 7 bytes where to store */
- dulint n); /* in: dulint integer to be stored */
-/************************************************************
+ byte* b, /*!< in: pointer to 7 bytes where to store */
+ dulint n); /*!< in: dulint integer to be stored */
+/********************************************************//**
The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return dulint integer */
UNIV_INLINE
dulint
mach_read_from_7(
/*=============*/
- /* out: dulint integer */
- byte* b); /* in: pointer to 7 bytes */
-/***********************************************************
+ const byte* b) /*!< in: pointer to 7 bytes */
+ __attribute__((nonnull, pure));
+/*******************************************************//**
The following function is used to store data in 8 consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_8(
/*============*/
- byte* b, /* in: pointer to 8 bytes where to store */
- dulint n); /* in: dulint integer to be stored */
-/************************************************************
+ byte* b, /*!< in: pointer to 8 bytes where to store */
+ dulint n); /*!< in: dulint integer to be stored */
+/*******************************************************//**
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_ull(
+/*===========*/
+ byte* b, /*!< in: pointer to 8 bytes where to store */
+ ib_uint64_t n); /*!< in: 64-bit integer to be stored */
+/********************************************************//**
The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return dulint integer */
UNIV_INLINE
dulint
mach_read_from_8(
/*=============*/
- /* out: dulint integer */
- byte* b); /* in: pointer to 8 bytes */
-/*************************************************************
-Writes a dulint in a compressed form (5..9 bytes). */
+ const byte* b) /*!< in: pointer to 8 bytes */
+ __attribute__((nonnull, pure));
+/********************************************************//**
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address.
+@return 64-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_ull(
+/*==========*/
+ const byte* b) /*!< in: pointer to 8 bytes */
+ __attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a dulint in a compressed form (5..9 bytes).
+@return size in bytes */
UNIV_INLINE
ulint
mach_dulint_write_compressed(
/*=========================*/
- /* out: size in bytes */
- byte* b, /* in: pointer to memory where to store */
- dulint n); /* in: dulint integer to be stored */
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
+ byte* b, /*!< in: pointer to memory where to store */
+ dulint n); /*!< in: dulint integer to be stored */
+/*********************************************************//**
+Returns the size of a dulint when written in the compressed form.
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_dulint_get_compressed_size(
/*============================*/
- /* out: compressed size in bytes */
- dulint n); /* in: dulint integer to be stored */
-/*************************************************************
-Reads a dulint in a compressed form. */
+ dulint n); /*!< in: dulint integer to be stored */
+/*********************************************************//**
+Reads a dulint in a compressed form.
+@return read dulint */
UNIV_INLINE
dulint
mach_dulint_read_compressed(
/*========================*/
- /* out: read dulint */
- byte* b); /* in: pointer to memory from where to read */
-/*************************************************************
-Writes a dulint in a compressed form (1..11 bytes). */
+ const byte* b) /*!< in: pointer to memory from where to read */
+ __attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a dulint in a compressed form (1..11 bytes).
+@return size in bytes */
UNIV_INLINE
ulint
mach_dulint_write_much_compressed(
/*==============================*/
- /* out: size in bytes */
- byte* b, /* in: pointer to memory where to store */
- dulint n); /* in: dulint integer to be stored */
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
+ byte* b, /*!< in: pointer to memory where to store */
+ dulint n); /*!< in: dulint integer to be stored */
+/*********************************************************//**
+Returns the size of a dulint when written in the compressed form.
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_dulint_get_much_compressed_size(
/*=================================*/
- /* out: compressed size in bytes */
- dulint n); /* in: dulint integer to be stored */
-/*************************************************************
-Reads a dulint in a compressed form. */
+ dulint n) /*!< in: dulint integer to be stored */
+ __attribute__((const));
+/*********************************************************//**
+Reads a dulint in a compressed form.
+@return read dulint */
UNIV_INLINE
dulint
mach_dulint_read_much_compressed(
/*=============================*/
- /* out: read dulint */
- byte* b); /* in: pointer to memory from where to read */
-/*************************************************************
-Reads a ulint in a compressed form if the log record fully contains it. */
-
+ const byte* b) /*!< in: pointer to memory from where to read */
+ __attribute__((nonnull, pure));
+/*********************************************************//**
+Reads a ulint in a compressed form if the log record fully contains it.
+@return pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
byte*
mach_parse_compressed(
/*==================*/
- /* out: pointer to end of the stored field, NULL if
- not complete */
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- ulint* val); /* out: read value */
-/*************************************************************
-Reads a dulint in a compressed form if the log record fully contains it. */
-
+ byte* ptr, /*!< in: pointer to buffer from where to read */
+ byte* end_ptr,/*!< in: pointer to end of the buffer */
+ ulint* val); /*!< out: read value */
+/*********************************************************//**
+Reads a dulint in a compressed form if the log record fully contains it.
+@return pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
byte*
mach_dulint_parse_compressed(
/*=========================*/
- /* out: pointer to end of the stored field, NULL if
- not complete */
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- dulint* val); /* out: read value */
-/*************************************************************
-Reads a double. It is stored in a little-endian format. */
+ byte* ptr, /*!< in: pointer to buffer from where to read */
+ byte* end_ptr,/*!< in: pointer to end of the buffer */
+ dulint* val); /*!< out: read value */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************//**
+Reads a double. It is stored in a little-endian format.
+@return double read */
UNIV_INLINE
double
mach_double_read(
/*=============*/
- /* out: double read */
- const byte* b); /* in: pointer to memory from where to read */
-/*************************************************************
+ const byte* b) /*!< in: pointer to memory from where to read */
+ __attribute__((nonnull, pure));
+/*********************************************************//**
Writes a double. It is stored in a little-endian format. */
UNIV_INLINE
void
mach_double_write(
/*==============*/
- byte* b, /* in: pointer to memory where to write */
- double d); /* in: double */
-/*************************************************************
-Reads a float. It is stored in a little-endian format. */
+ byte* b, /*!< in: pointer to memory where to write */
+ double d); /*!< in: double */
+/*********************************************************//**
+Reads a float. It is stored in a little-endian format.
+@return float read */
UNIV_INLINE
float
mach_float_read(
/*============*/
- /* out: float read */
- const byte* b); /* in: pointer to memory from where to read */
-/*************************************************************
+ const byte* b) /*!< in: pointer to memory from where to read */
+ __attribute__((nonnull, pure));
+/*********************************************************//**
Writes a float. It is stored in a little-endian format. */
UNIV_INLINE
void
mach_float_write(
/*=============*/
- byte* b, /* in: pointer to memory where to write */
- float d); /* in: float */
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
+ byte* b, /*!< in: pointer to memory where to write */
+ float d); /*!< in: float */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return unsigned long int */
UNIV_INLINE
ulint
mach_read_from_n_little_endian(
/*===========================*/
- /* out: unsigned long int */
- byte* buf, /* in: from where to read */
- ulint buf_size); /* in: from how many bytes to read */
-/*************************************************************
+ const byte* buf, /*!< in: from where to read */
+ ulint buf_size) /*!< in: from how many bytes to read */
+ __attribute__((nonnull, pure));
+/*********************************************************//**
Writes a ulint in the little-endian format. */
UNIV_INLINE
void
mach_write_to_n_little_endian(
/*==========================*/
- byte* dest, /* in: where to write */
- ulint dest_size, /* in: into how many bytes to write */
- ulint n); /* in: unsigned long int to write */
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
+ byte* dest, /*!< in: where to write */
+ ulint dest_size, /*!< in: into how many bytes to write */
+ ulint n); /*!< in: unsigned long int to write */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return unsigned long int */
UNIV_INLINE
ulint
mach_read_from_2_little_endian(
/*===========================*/
- /* out: unsigned long int */
- byte* buf); /* in: from where to read */
-/*************************************************************
+ const byte* buf) /*!< in: from where to read */
+ __attribute__((nonnull, pure));
+/*********************************************************//**
Writes a ulint in the little-endian format. */
UNIV_INLINE
void
mach_write_to_2_little_endian(
/*==========================*/
- byte* dest, /* in: where to write */
- ulint n); /* in: unsigned long int to write */
+ byte* dest, /*!< in: where to write */
+ ulint n); /*!< in: unsigned long int to write */
-/*************************************************************
+/*********************************************************//**
Convert integral type from storage byte order (big endian) to
-host byte order. */
+host byte order.
+@return integer value */
UNIV_INLINE
ullint
mach_read_int_type(
/*===============*/
- /* out: integer value */
- const byte* src, /* in: where to read from */
- ulint len, /* in: length of src */
- ibool unsigned_type); /* in: signed or unsigned flag */
+ const byte* src, /*!< in: where to read from */
+ ulint len, /*!< in: length of src */
+ ibool unsigned_type); /*!< in: signed or unsigned flag */
+#endif /* !UNIV_HOTBACKUP */
+
#ifndef UNIV_NONINL
#include "mach0data.ic"
#endif
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
index dc7918c287b..ef20356bd31 100644
--- a/storage/innobase/include/mach0data.ic
+++ b/storage/innobase/include/mach0data.ic
@@ -1,22 +1,39 @@
-/**********************************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/mach0data.ic
Utilities for converting data from the database file
to the machine format.
-(c) 1995 Innobase Oy
-
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#include "ut0mem.h"
-/***********************************************************
+/*******************************************************//**
The following function is used to store data in one byte. */
UNIV_INLINE
void
mach_write_to_1(
/*============*/
- byte* b, /* in: pointer to byte where to store */
- ulint n) /* in: ulint integer to be stored, >= 0, < 256 */
+ byte* b, /*!< in: pointer to byte where to store */
+ ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */
{
ut_ad(b);
ut_ad(n <= 0xFFUL);
@@ -24,28 +41,28 @@ mach_write_to_1(
b[0] = (byte)n;
}
-/************************************************************
-The following function is used to fetch data from one byte. */
+/********************************************************//**
+The following function is used to fetch data from one byte.
+@return ulint integer, >= 0, < 256 */
UNIV_INLINE
ulint
mach_read_from_1(
/*=============*/
- /* out: ulint integer, >= 0, < 256 */
- byte* b) /* in: pointer to byte */
+ const byte* b) /*!< in: pointer to byte */
{
ut_ad(b);
return((ulint)(b[0]));
}
-/***********************************************************
+/*******************************************************//**
The following function is used to store data in two consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_2(
/*============*/
- byte* b, /* in: pointer to two bytes where to store */
- ulint n) /* in: ulint integer to be stored */
+ byte* b, /*!< in: pointer to two bytes where to store */
+ ulint n) /*!< in: ulint integer to be stored */
{
ut_ad(b);
ut_ad(n <= 0xFFFFUL);
@@ -54,15 +71,15 @@ mach_write_to_2(
b[1] = (byte)(n);
}
-/************************************************************
+/********************************************************//**
The following function is used to fetch data from 2 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return ulint integer */
UNIV_INLINE
ulint
mach_read_from_2(
/*=============*/
- /* out: ulint integer */
- byte* b) /* in: pointer to 2 bytes */
+ const byte* b) /*!< in: pointer to 2 bytes */
{
ut_ad(b);
return( ((ulint)(b[0]) << 8)
@@ -70,46 +87,46 @@ mach_read_from_2(
);
}
-/************************************************************
+/********************************************************//**
The following function is used to convert a 16-bit data item
to the canonical format, for fast bytewise equality test
-against memory. */
+against memory.
+@return 16-bit integer in canonical format */
UNIV_INLINE
uint16
mach_encode_2(
/*==========*/
- /* out: 16-bit integer in canonical format */
- ulint n) /* in: integer in machine-dependent format */
+ ulint n) /*!< in: integer in machine-dependent format */
{
uint16 ret;
ut_ad(2 == sizeof ret);
mach_write_to_2((byte*) &ret, n);
return(ret);
}
-/************************************************************
+/********************************************************//**
The following function is used to convert a 16-bit data item
from the canonical format, for fast bytewise equality test
-against memory. */
+against memory.
+@return integer in machine-dependent format */
UNIV_INLINE
ulint
mach_decode_2(
/*==========*/
- /* out: integer in machine-dependent format */
- uint16 n) /* in: 16-bit integer in canonical format */
+ uint16 n) /*!< in: 16-bit integer in canonical format */
{
ut_ad(2 == sizeof n);
- return(mach_read_from_2((byte*) &n));
+ return(mach_read_from_2((const byte*) &n));
}
-/***********************************************************
+/*******************************************************//**
The following function is used to store data in 3 consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_3(
/*============*/
- byte* b, /* in: pointer to 3 bytes where to store */
- ulint n) /* in: ulint integer to be stored */
+ byte* b, /*!< in: pointer to 3 bytes where to store */
+ ulint n) /*!< in: ulint integer to be stored */
{
ut_ad(b);
ut_ad(n <= 0xFFFFFFUL);
@@ -119,15 +136,15 @@ mach_write_to_3(
b[2] = (byte)(n);
}
-/************************************************************
+/********************************************************//**
The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return ulint integer */
UNIV_INLINE
ulint
mach_read_from_3(
/*=============*/
- /* out: ulint integer */
- byte* b) /* in: pointer to 3 bytes */
+ const byte* b) /*!< in: pointer to 3 bytes */
{
ut_ad(b);
return( ((ulint)(b[0]) << 16)
@@ -136,15 +153,15 @@ mach_read_from_3(
);
}
-/***********************************************************
+/*******************************************************//**
The following function is used to store data in four consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_4(
/*============*/
- byte* b, /* in: pointer to four bytes where to store */
- ulint n) /* in: ulint integer to be stored */
+ byte* b, /*!< in: pointer to four bytes where to store */
+ ulint n) /*!< in: ulint integer to be stored */
{
ut_ad(b);
@@ -154,15 +171,15 @@ mach_write_to_4(
b[3] = (byte)n;
}
-/************************************************************
+/********************************************************//**
The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return ulint integer */
UNIV_INLINE
ulint
mach_read_from_4(
/*=============*/
- /* out: ulint integer */
- byte* b) /* in: pointer to four bytes */
+ const byte* b) /*!< in: pointer to four bytes */
{
ut_ad(b);
return( ((ulint)(b[0]) << 24)
@@ -172,20 +189,20 @@ mach_read_from_4(
);
}
-/*************************************************************
+/*********************************************************//**
Writes a ulint in a compressed form where the first byte codes the
length of the stored ulint. We look at the most significant bits of
the byte. If the most significant bit is zero, it means 1-byte storage,
else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
-else the storage is 5-byte. */
+else the storage is 5-byte.
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_write_compressed(
/*==================*/
- /* out: compressed size in bytes */
- byte* b, /* in: pointer to memory where to store */
- ulint n) /* in: ulint integer (< 2^32) to be stored */
+ byte* b, /*!< in: pointer to memory where to store */
+ ulint n) /*!< in: ulint integer (< 2^32) to be stored */
{
ut_ad(b);
@@ -208,14 +225,14 @@ mach_write_compressed(
}
}
-/*************************************************************
-Returns the size of a ulint when written in the compressed form. */
+/*********************************************************//**
+Returns the size of a ulint when written in the compressed form.
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_get_compressed_size(
/*=====================*/
- /* out: compressed size in bytes */
- ulint n) /* in: ulint integer (< 2^32) to be stored */
+ ulint n) /*!< in: ulint integer (< 2^32) to be stored */
{
if (n < 0x80UL) {
return(1);
@@ -230,14 +247,14 @@ mach_get_compressed_size(
}
}
-/*************************************************************
-Reads a ulint in a compressed form. */
+/*********************************************************//**
+Reads a ulint in a compressed form.
+@return read integer (< 2^32) */
UNIV_INLINE
ulint
mach_read_compressed(
/*=================*/
- /* out: read integer (< 2^32) */
- byte* b) /* in: pointer to memory from where to read */
+ const byte* b) /*!< in: pointer to memory from where to read */
{
ulint flag;
@@ -259,15 +276,15 @@ mach_read_compressed(
}
}
-/***********************************************************
+/*******************************************************//**
The following function is used to store data in 8 consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_8(
/*============*/
- byte* b, /* in: pointer to 8 bytes where to store */
- dulint n) /* in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to 8 bytes where to store */
+ dulint n) /*!< in: dulint integer to be stored */
{
ut_ad(b);
@@ -275,15 +292,31 @@ mach_write_to_8(
mach_write_to_4(b + 4, ut_dulint_get_low(n));
}
-/************************************************************
+/*******************************************************//**
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_ull(
+/*===========*/
+ byte* b, /*!< in: pointer to 8 bytes where to store */
+ ib_uint64_t n) /*!< in: 64-bit integer to be stored */
+{
+ ut_ad(b);
+
+ mach_write_to_4(b, (ulint) (n >> 32));
+ mach_write_to_4(b + 4, (ulint) n);
+}
+
+/********************************************************//**
The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return dulint integer */
UNIV_INLINE
dulint
mach_read_from_8(
/*=============*/
- /* out: dulint integer */
- byte* b) /* in: pointer to 8 bytes */
+ const byte* b) /*!< in: pointer to 8 bytes */
{
ulint high;
ulint low;
@@ -296,15 +329,33 @@ mach_read_from_8(
return(ut_dulint_create(high, low));
}
-/***********************************************************
+/********************************************************//**
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address.
+@return 64-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_ull(
+/*==========*/
+ const byte* b) /*!< in: pointer to 8 bytes */
+{
+ ib_uint64_t ull;
+
+ ull = ((ib_uint64_t) mach_read_from_4(b)) << 32;
+ ull |= (ib_uint64_t) mach_read_from_4(b + 4);
+
+ return(ull);
+}
+
+/*******************************************************//**
The following function is used to store data in 7 consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_7(
/*============*/
- byte* b, /* in: pointer to 7 bytes where to store */
- dulint n) /* in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to 7 bytes where to store */
+ dulint n) /*!< in: dulint integer to be stored */
{
ut_ad(b);
@@ -312,15 +363,15 @@ mach_write_to_7(
mach_write_to_4(b + 3, ut_dulint_get_low(n));
}
-/************************************************************
+/********************************************************//**
The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return dulint integer */
UNIV_INLINE
dulint
mach_read_from_7(
/*=============*/
- /* out: dulint integer */
- byte* b) /* in: pointer to 7 bytes */
+ const byte* b) /*!< in: pointer to 7 bytes */
{
ulint high;
ulint low;
@@ -333,15 +384,15 @@ mach_read_from_7(
return(ut_dulint_create(high, low));
}
-/***********************************************************
+/*******************************************************//**
The following function is used to store data in 6 consecutive
bytes. We store the most significant byte to the lowest address. */
UNIV_INLINE
void
mach_write_to_6(
/*============*/
- byte* b, /* in: pointer to 6 bytes where to store */
- dulint n) /* in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to 6 bytes where to store */
+ dulint n) /*!< in: dulint integer to be stored */
{
ut_ad(b);
@@ -349,15 +400,15 @@ mach_write_to_6(
mach_write_to_4(b + 2, ut_dulint_get_low(n));
}
-/************************************************************
+/********************************************************//**
The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address. */
+bytes. The most significant byte is at the lowest address.
+@return dulint integer */
UNIV_INLINE
dulint
mach_read_from_6(
/*=============*/
- /* out: dulint integer */
- byte* b) /* in: pointer to 7 bytes */
+ const byte* b) /*!< in: pointer to 6 bytes */
{
ulint high;
ulint low;
@@ -370,15 +421,15 @@ mach_read_from_6(
return(ut_dulint_create(high, low));
}
-/*************************************************************
-Writes a dulint in a compressed form (5..9 bytes). */
+/*********************************************************//**
+Writes a dulint in a compressed form (5..9 bytes).
+@return size in bytes */
UNIV_INLINE
ulint
mach_dulint_write_compressed(
/*=========================*/
- /* out: size in bytes */
- byte* b, /* in: pointer to memory where to store */
- dulint n) /* in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to memory where to store */
+ dulint n) /*!< in: dulint integer to be stored */
{
ulint size;
@@ -390,26 +441,26 @@ mach_dulint_write_compressed(
return(size + 4);
}
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
+/*********************************************************//**
+Returns the size of a dulint when written in the compressed form.
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_dulint_get_compressed_size(
/*============================*/
- /* out: compressed size in bytes */
- dulint n) /* in: dulint integer to be stored */
+ dulint n) /*!< in: dulint integer to be stored */
{
return(4 + mach_get_compressed_size(ut_dulint_get_high(n)));
}
-/*************************************************************
-Reads a dulint in a compressed form. */
+/*********************************************************//**
+Reads a dulint in a compressed form.
+@return read dulint */
UNIV_INLINE
dulint
mach_dulint_read_compressed(
/*========================*/
- /* out: read dulint */
- byte* b) /* in: pointer to memory from where to read */
+ const byte* b) /*!< in: pointer to memory from where to read */
{
ulint high;
ulint low;
@@ -426,15 +477,15 @@ mach_dulint_read_compressed(
return(ut_dulint_create(high, low));
}
-/*************************************************************
-Writes a dulint in a compressed form (1..11 bytes). */
+/*********************************************************//**
+Writes a dulint in a compressed form (1..11 bytes).
+@return size in bytes */
UNIV_INLINE
ulint
mach_dulint_write_much_compressed(
/*==============================*/
- /* out: size in bytes */
- byte* b, /* in: pointer to memory where to store */
- dulint n) /* in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to memory where to store */
+ dulint n) /*!< in: dulint integer to be stored */
{
ulint size;
@@ -452,14 +503,14 @@ mach_dulint_write_much_compressed(
return(size);
}
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
+/*********************************************************//**
+Returns the size of a dulint when written in the compressed form.
+@return compressed size in bytes */
UNIV_INLINE
ulint
mach_dulint_get_much_compressed_size(
/*=================================*/
- /* out: compressed size in bytes */
- dulint n) /* in: dulint integer to be stored */
+ dulint n) /*!< in: dulint integer to be stored */
{
if (0 == ut_dulint_get_high(n)) {
return(mach_get_compressed_size(ut_dulint_get_low(n)));
@@ -469,14 +520,14 @@ mach_dulint_get_much_compressed_size(
+ mach_get_compressed_size(ut_dulint_get_low(n)));
}
-/*************************************************************
-Reads a dulint in a compressed form. */
+/*********************************************************//**
+Reads a dulint in a compressed form.
+@return read dulint */
UNIV_INLINE
dulint
mach_dulint_read_much_compressed(
/*=============================*/
- /* out: read dulint */
- byte* b) /* in: pointer to memory from where to read */
+ const byte* b) /*!< in: pointer to memory from where to read */
{
ulint high;
ulint low;
@@ -497,15 +548,15 @@ mach_dulint_read_much_compressed(
return(ut_dulint_create(high, low));
}
-
-/*************************************************************
-Reads a double. It is stored in a little-endian format. */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************//**
+Reads a double. It is stored in a little-endian format.
+@return double read */
UNIV_INLINE
double
mach_double_read(
/*=============*/
- /* out: double read */
- const byte* b) /* in: pointer to memory from where to read */
+ const byte* b) /*!< in: pointer to memory from where to read */
{
double d;
ulint i;
@@ -524,14 +575,14 @@ mach_double_read(
return(d);
}
-/*************************************************************
+/*********************************************************//**
Writes a double. It is stored in a little-endian format. */
UNIV_INLINE
void
mach_double_write(
/*==============*/
- byte* b, /* in: pointer to memory where to write */
- double d) /* in: double */
+ byte* b, /*!< in: pointer to memory where to write */
+ double d) /*!< in: double */
{
ulint i;
byte* ptr;
@@ -547,14 +598,14 @@ mach_double_write(
}
}
-/*************************************************************
-Reads a float. It is stored in a little-endian format. */
+/*********************************************************//**
+Reads a float. It is stored in a little-endian format.
+@return float read */
UNIV_INLINE
float
mach_float_read(
/*============*/
- /* out: float read */
- const byte* b) /* in: pointer to memory from where to read */
+ const byte* b) /*!< in: pointer to memory from where to read */
{
float d;
ulint i;
@@ -573,14 +624,14 @@ mach_float_read(
return(d);
}
-/*************************************************************
+/*********************************************************//**
Writes a float. It is stored in a little-endian format. */
UNIV_INLINE
void
mach_float_write(
/*=============*/
- byte* b, /* in: pointer to memory where to write */
- float d) /* in: float */
+ byte* b, /*!< in: pointer to memory where to write */
+ float d) /*!< in: float */
{
ulint i;
byte* ptr;
@@ -596,18 +647,18 @@ mach_float_write(
}
}
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return unsigned long int */
UNIV_INLINE
ulint
mach_read_from_n_little_endian(
/*===========================*/
- /* out: unsigned long int */
- byte* buf, /* in: from where to read */
- ulint buf_size) /* in: from how many bytes to read */
+ const byte* buf, /*!< in: from where to read */
+ ulint buf_size) /*!< in: from how many bytes to read */
{
ulint n = 0;
- byte* ptr;
+ const byte* ptr;
ut_ad(buf_size <= sizeof(ulint));
ut_ad(buf_size > 0);
@@ -629,15 +680,15 @@ mach_read_from_n_little_endian(
return(n);
}
-/*************************************************************
+/*********************************************************//**
Writes a ulint in the little-endian format. */
UNIV_INLINE
void
mach_write_to_n_little_endian(
/*==========================*/
- byte* dest, /* in: where to write */
- ulint dest_size, /* in: into how many bytes to write */
- ulint n) /* in: unsigned long int to write */
+ byte* dest, /*!< in: where to write */
+ ulint dest_size, /*!< in: into how many bytes to write */
+ ulint n) /*!< in: unsigned long int to write */
{
byte* end;
@@ -661,26 +712,26 @@ mach_write_to_n_little_endian(
ut_ad(n == 0);
}
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return unsigned long int */
UNIV_INLINE
ulint
mach_read_from_2_little_endian(
/*===========================*/
- /* out: unsigned long int */
- byte* buf) /* in: from where to read */
+ const byte* buf) /*!< in: from where to read */
{
return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256);
}
-/*************************************************************
+/*********************************************************//**
Writes a ulint in the little-endian format. */
UNIV_INLINE
void
mach_write_to_2_little_endian(
/*==========================*/
- byte* dest, /* in: where to write */
- ulint n) /* in: unsigned long int to write */
+ byte* dest, /*!< in: where to write */
+ ulint n) /*!< in: unsigned long int to write */
{
ut_ad(n < 256 * 256);
@@ -692,17 +743,17 @@ mach_write_to_2_little_endian(
*dest = (byte)(n & 0xFFUL);
}
-/*************************************************************
+/*********************************************************//**
Convert integral type from storage byte order (big endian) to
-host byte order. */
+host byte order.
+@return integer value */
UNIV_INLINE
ullint
mach_read_int_type(
/*===============*/
- /* out: integer value */
- const byte* src, /* in: where to read from */
- ulint len, /* in: length of src */
- ibool unsigned_type) /* in: signed or unsigned flag */
+ const byte* src, /*!< in: where to read from */
+ ulint len, /*!< in: length of src */
+ ibool unsigned_type) /*!< in: signed or unsigned flag */
{
/* XXX this can be optimized on big-endian machines */
@@ -732,3 +783,4 @@ mach_read_int_type(
return(ret);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h
index 2393e4edb54..a064af5c678 100644
--- a/storage/innobase/include/mem0dbg.h
+++ b/storage/innobase/include/mem0dbg.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0dbg.h
The memory management: the debug code. This is not a compilation module,
but is included in mem0mem.* !
-(c) 1994, 1995 Innobase Oy
-
Created 6/9/1994 Heikki Tuuri
*******************************************************/
@@ -31,96 +48,96 @@ check fields at the both ends of the field. */
#endif
#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/*******************************************************************
+/***************************************************************//**
Checks a memory heap for consistency and prints the contents if requested.
Outputs the sum of sizes of buffers given to the user (only in
the debug version), the physical size of the heap and the number of
blocks in the heap. In case of error returns 0 as sizes and number
of blocks. */
-
+UNIV_INTERN
void
mem_heap_validate_or_print(
/*=======================*/
- mem_heap_t* heap, /* in: memory heap */
- byte* top, /* in: calculate and validate only until
+ mem_heap_t* heap, /*!< in: memory heap */
+ byte* top, /*!< in: calculate and validate only until
this top pointer in the heap is reached,
if this pointer is NULL, ignored */
- ibool print, /* in: if TRUE, prints the contents
+ ibool print, /*!< in: if TRUE, prints the contents
of the heap; works only in
the debug version */
- ibool* error, /* out: TRUE if error */
- ulint* us_size,/* out: allocated memory
+ ibool* error, /*!< out: TRUE if error */
+ ulint* us_size,/*!< out: allocated memory
(for the user) in the heap,
if a NULL pointer is passed as this
argument, it is ignored; in the
non-debug version this is always -1 */
- ulint* ph_size,/* out: physical size of the heap,
+ ulint* ph_size,/*!< out: physical size of the heap,
if a NULL pointer is passed as this
argument, it is ignored */
- ulint* n_blocks); /* out: number of blocks in the heap,
+ ulint* n_blocks); /*!< out: number of blocks in the heap,
if a NULL pointer is passed as this
argument, it is ignored */
-/******************************************************************
-Validates the contents of a memory heap. */
-
+/**************************************************************//**
+Validates the contents of a memory heap.
+@return TRUE if ok */
+UNIV_INTERN
ibool
mem_heap_validate(
/*==============*/
- /* out: TRUE if ok */
- mem_heap_t* heap); /* in: memory heap */
+ mem_heap_t* heap); /*!< in: memory heap */
#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
#ifdef UNIV_DEBUG
-/******************************************************************
-Checks that an object is a memory heap (or a block of it) */
-
+/**************************************************************//**
+Checks that an object is a memory heap (or a block of it)
+@return TRUE if ok */
+UNIV_INTERN
ibool
mem_heap_check(
/*===========*/
- /* out: TRUE if ok */
- mem_heap_t* heap); /* in: memory heap */
+ mem_heap_t* heap); /*!< in: memory heap */
#endif /* UNIV_DEBUG */
#ifdef UNIV_MEM_DEBUG
-/*********************************************************************
-TRUE if no memory is currently allocated. */
-
+/*****************************************************************//**
+TRUE if no memory is currently allocated.
+@return TRUE if no heaps exist */
+UNIV_INTERN
ibool
mem_all_freed(void);
/*===============*/
- /* out: TRUE if no heaps exist */
-/*********************************************************************
-Validates the dynamic memory */
-
+/*****************************************************************//**
+Validates the dynamic memory
+@return TRUE if error */
+UNIV_INTERN
ibool
mem_validate_no_assert(void);
/*=========================*/
- /* out: TRUE if error */
-/****************************************************************
-Validates the dynamic memory */
-
+/************************************************************//**
+Validates the dynamic memory
+@return TRUE if ok */
+UNIV_INTERN
ibool
mem_validate(void);
/*===============*/
- /* out: TRUE if ok */
#endif /* UNIV_MEM_DEBUG */
-/****************************************************************
+/************************************************************//**
Tries to find neigboring memory allocation blocks and dumps to stderr
the neighborhood of a given pointer. */
-
+UNIV_INTERN
void
mem_analyze_corruption(
/*===================*/
- void* ptr); /* in: pointer to place of possible corruption */
-/*********************************************************************
+ void* ptr); /*!< in: pointer to place of possible corruption */
+/*****************************************************************//**
Prints information of dynamic memory usage and currently allocated memory
heaps or buffers. Can only be used in the debug version. */
-
+UNIV_INTERN
void
mem_print_info(void);
/*================*/
-/*********************************************************************
+/*****************************************************************//**
Prints information of dynamic memory usage and currently allocated memory
heaps or buffers since the last ..._print_info or..._print_new_info. */
-
+UNIV_INTERN
void
mem_print_new_info(void);
/*====================*/
diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic
index e8a34adb3fa..cb9245411dc 100644
--- a/storage/innobase/include/mem0dbg.ic
+++ b/storage/innobase/include/mem0dbg.ic
@@ -1,62 +1,81 @@
-/************************************************************************
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/mem0dbg.ic
The memory management: the debug code. This is not an independent
compilation module but is included in mem0mem.*.
-(c) 1994, 1995 Innobase Oy
-
Created 6/8/1994 Heikki Tuuri
*************************************************************************/
#ifdef UNIV_MEM_DEBUG
+# ifndef UNIV_HOTBACKUP
extern mutex_t mem_hash_mutex;
+# endif /* !UNIV_HOTBACKUP */
extern ulint mem_current_allocated_memory;
-/**********************************************************************
+/******************************************************************//**
Initializes an allocated memory field in the debug version. */
-
+UNIV_INTERN
void
mem_field_init(
/*===========*/
- byte* buf, /* in: memory field */
- ulint n); /* in: how many bytes the user requested */
-/**********************************************************************
+ byte* buf, /*!< in: memory field */
+ ulint n); /*!< in: how many bytes the user requested */
+/******************************************************************//**
Erases an allocated memory field in the debug version. */
-
+UNIV_INTERN
void
mem_field_erase(
/*============*/
- byte* buf, /* in: memory field */
- ulint n); /* in: how many bytes the user requested */
-/*******************************************************************
+ byte* buf, /*!< in: memory field */
+ ulint n); /*!< in: how many bytes the user requested */
+/***************************************************************//**
Initializes a buffer to a random combination of hex BA and BE.
Used to initialize allocated memory. */
-
+UNIV_INTERN
void
mem_init_buf(
/*=========*/
- byte* buf, /* in: pointer to buffer */
- ulint n); /* in: length of buffer */
-/*******************************************************************
+ byte* buf, /*!< in: pointer to buffer */
+ ulint n); /*!< in: length of buffer */
+/***************************************************************//**
Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory.*/
-
+Used to erase freed memory. */
+UNIV_INTERN
void
mem_erase_buf(
/*==========*/
- byte* buf, /* in: pointer to buffer */
- ulint n); /* in: length of buffer */
-/*******************************************************************
+ byte* buf, /*!< in: pointer to buffer */
+ ulint n); /*!< in: length of buffer */
+/***************************************************************//**
Inserts a created memory heap to the hash table of
current allocated memory heaps.
Initializes the hash table when first called. */
-
+UNIV_INTERN
void
mem_hash_insert(
/*============*/
- mem_heap_t* heap, /* in: the created heap */
- const char* file_name, /* in: file name of creation */
- ulint line); /* in: line where created */
-/*******************************************************************
+ mem_heap_t* heap, /*!< in: the created heap */
+ const char* file_name, /*!< in: file name of creation */
+ ulint line); /*!< in: line where created */
+/***************************************************************//**
Removes a memory heap (which is going to be freed by the caller)
from the list of live memory heaps. Returns the size of the heap
in terms of how much memory in bytes was allocated for the user of
@@ -64,13 +83,13 @@ the heap (not the total space occupied by the heap).
Also validates the heap.
NOTE: This function does not free the storage occupied by the
heap itself, only the node in the list of heaps. */
-
+UNIV_INTERN
void
mem_hash_remove(
/*============*/
- mem_heap_t* heap, /* in: the heap to be freed */
- const char* file_name, /* in: file name of freeing */
- ulint line); /* in: line where freed */
+ mem_heap_t* heap, /*!< in: the heap to be freed */
+ const char* file_name, /*!< in: file name of freeing */
+ ulint line); /*!< in: line where freed */
void
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index 2d5fd1db6c3..98f8748e529 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -1,7 +1,24 @@
-/******************************************************
-The memory management
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994, 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0mem.h
+The memory management
Created 6/9/1994 Heikki Tuuri
*******************************************************/
@@ -12,9 +29,10 @@ Created 6/9/1994 Heikki Tuuri
#include "univ.i"
#include "ut0mem.h"
#include "ut0byte.h"
-#include "ut0ut.h"
#include "ut0rnd.h"
-#include "sync0sync.h"
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+#endif /* UNIV_HOTBACKUP */
#include "ut0lst.h"
#include "mach0data.h"
@@ -50,84 +68,71 @@ create. The standard size is the maximum (payload) size of the blocks used for
allocations of small buffers. */
#define MEM_BLOCK_START_SIZE 64
-#define MEM_BLOCK_STANDARD_SIZE 8000
+#define MEM_BLOCK_STANDARD_SIZE \
+ (UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
/* If a memory heap is allowed to grow into the buffer pool, the following
is the maximum size for a single allocated buffer: */
#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200)
-/**********************************************************************
+/******************************************************************//**
Initializes the memory system. */
-
+UNIV_INTERN
void
mem_init(
/*=====*/
- ulint size); /* in: common pool size in bytes */
-/******************************************************************
+ ulint size); /*!< in: common pool size in bytes */
+/******************************************************************//**
+Closes the memory system. */
+UNIV_INTERN
+void
+mem_close(void);
+/*===========*/
+
+/**************************************************************//**
Use this macro instead of the corresponding function! Macro for memory
heap creation. */
#define mem_heap_create(N) mem_heap_create_func(\
- (N), NULL, MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
-/******************************************************************
+ (N), MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
+/**************************************************************//**
Use this macro instead of the corresponding function! Macro for memory
heap creation. */
#define mem_heap_create_in_buffer(N) mem_heap_create_func(\
- (N), NULL, MEM_HEAP_BUFFER, __FILE__, __LINE__)
-/******************************************************************
+ (N), MEM_HEAP_BUFFER, __FILE__, __LINE__)
+/**************************************************************//**
Use this macro instead of the corresponding function! Macro for memory
heap creation. */
#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\
- (N), NULL, MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\
+ (N), MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\
__FILE__, __LINE__)
-/******************************************************************
-Use this macro instead of the corresponding function! Macro for fast
-memory heap creation. An initial block of memory B is given by the
-caller, N is its size, and this memory block is not freed by
-mem_heap_free. See the parameter comment in mem_heap_create_func below. */
-
-#define mem_heap_fast_create(N, B) mem_heap_create_func(\
- (N), (B), MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
Use this macro instead of the corresponding function! Macro for memory
heap freeing. */
#define mem_heap_free(heap) mem_heap_free_func(\
(heap), __FILE__, __LINE__)
-/*********************************************************************
+/*****************************************************************//**
NOTE: Use the corresponding macros instead of this function. Creates a
memory heap. For debugging purposes, takes also the file name and line as
-arguments. */
+arguments.
+@return own: memory heap, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
UNIV_INLINE
mem_heap_t*
mem_heap_create_func(
/*=================*/
- /* out, own: memory heap, NULL if
- did not succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps)*/
- ulint n, /* in: desired start block size,
+ ulint n, /*!< in: desired start block size,
this means that a single user buffer
of size n will fit in the block,
- 0 creates a default size block;
- if init_block is not NULL, n tells
- its size in bytes */
- void* init_block, /* in: if very fast creation is
- wanted, the caller can reserve some
- memory from its stack, for example,
- and pass it as the the initial block
- to the heap: then no OS call of malloc
- is needed at the creation. CAUTION:
- the caller must make sure the initial
- block is not unintentionally erased
- (if allocated in the stack), before
- the memory heap is explicitly freed. */
- ulint type, /* in: heap type */
- const char* file_name, /* in: file name where created */
- ulint line); /* in: line where created */
-/*********************************************************************
+ 0 creates a default size block */
+ ulint type, /*!< in: heap type */
+ const char* file_name, /*!< in: file name where created */
+ ulint line); /*!< in: line where created */
+/*****************************************************************//**
NOTE: Use the corresponding macro instead of this function. Frees the space
occupied by a memory heap. In the debug version erases the heap memory
blocks. */
@@ -135,31 +140,41 @@ UNIV_INLINE
void
mem_heap_free_func(
/*===============*/
- mem_heap_t* heap, /* in, own: heap to be freed */
- const char* file_name, /* in: file name where freed */
- ulint line); /* in: line where freed */
-/*******************************************************************
-Allocates n bytes of memory from a memory heap. */
+ mem_heap_t* heap, /*!< in, own: heap to be freed */
+ const char* file_name, /*!< in: file name where freed */
+ ulint line); /*!< in: line where freed */
+/***************************************************************//**
+Allocates and zero-fills n bytes of memory from a memory heap.
+@return allocated, zero-filled storage */
+UNIV_INLINE
+void*
+mem_heap_zalloc(
+/*============*/
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n); /*!< in: number of bytes; if the heap is allowed
+ to grow into the buffer pool, this must be
+ <= MEM_MAX_ALLOC_IN_BUF */
+/***************************************************************//**
+Allocates n bytes of memory from a memory heap.
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
UNIV_INLINE
void*
mem_heap_alloc(
/*===========*/
- /* out: allocated storage, NULL if did not
- succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps) */
- mem_heap_t* heap, /* in: memory heap */
- ulint n); /* in: number of bytes; if the heap is allowed
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n); /*!< in: number of bytes; if the heap is allowed
to grow into the buffer pool, this must be
<= MEM_MAX_ALLOC_IN_BUF */
-/*********************************************************************
-Returns a pointer to the heap top. */
+/*****************************************************************//**
+Returns a pointer to the heap top.
+@return pointer to the heap top */
UNIV_INLINE
byte*
mem_heap_get_heap_top(
/*==================*/
- /* out: pointer to the heap top */
- mem_heap_t* heap); /* in: memory heap */
-/*********************************************************************
+ mem_heap_t* heap); /*!< in: memory heap */
+/*****************************************************************//**
Frees the space in a memory heap exceeding the pointer given. The
pointer must have been acquired from mem_heap_get_heap_top. The first
memory block of the heap is not freed. */
@@ -167,86 +182,71 @@ UNIV_INLINE
void
mem_heap_free_heap_top(
/*===================*/
- mem_heap_t* heap, /* in: heap from which to free */
- byte* old_top);/* in: pointer to old top of heap */
-/*********************************************************************
+ mem_heap_t* heap, /*!< in: heap from which to free */
+ byte* old_top);/*!< in: pointer to old top of heap */
+/*****************************************************************//**
Empties a memory heap. The first memory block of the heap is not freed. */
UNIV_INLINE
void
mem_heap_empty(
/*===========*/
- mem_heap_t* heap); /* in: heap to empty */
-/*********************************************************************
+ mem_heap_t* heap); /*!< in: heap to empty */
+/*****************************************************************//**
Returns a pointer to the topmost element in a memory heap.
-The size of the element must be given. */
+The size of the element must be given.
+@return pointer to the topmost element */
UNIV_INLINE
void*
mem_heap_get_top(
/*=============*/
- /* out: pointer to the topmost element */
- mem_heap_t* heap, /* in: memory heap */
- ulint n); /* in: size of the topmost element */
-/*********************************************************************
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n); /*!< in: size of the topmost element */
+/*****************************************************************//**
Frees the topmost element in a memory heap.
The size of the element must be given. */
UNIV_INLINE
void
mem_heap_free_top(
/*==============*/
- mem_heap_t* heap, /* in: memory heap */
- ulint n); /* in: size of the topmost element */
-/*********************************************************************
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n); /*!< in: size of the topmost element */
+/*****************************************************************//**
Returns the space in bytes occupied by a memory heap. */
UNIV_INLINE
ulint
mem_heap_get_size(
/*==============*/
- mem_heap_t* heap); /* in: heap */
-/******************************************************************
+ mem_heap_t* heap); /*!< in: heap */
+/**************************************************************//**
Use this macro instead of the corresponding function!
Macro for memory buffer allocation */
-#define mem_alloc(N) mem_alloc_func((N), __FILE__, __LINE__)
-/******************************************************************
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
+#define mem_zalloc(N) memset(mem_alloc(N), 0, (N));
-#define mem_alloc_noninline(N) mem_alloc_func_noninline(\
- (N), __FILE__, __LINE__)
-/*******************************************************************
+#define mem_alloc(N) mem_alloc_func((N), NULL, __FILE__, __LINE__)
+#define mem_alloc2(N,S) mem_alloc_func((N), (S), __FILE__, __LINE__)
+/***************************************************************//**
NOTE: Use the corresponding macro instead of this function.
Allocates a single buffer of memory from the dynamic memory of
the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
+with mem_free.
+@return own: free storage */
UNIV_INLINE
void*
mem_alloc_func(
/*===========*/
- /* out, own: free storage */
- ulint n, /* in: desired number of bytes */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
-);
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
+ ulint n, /*!< in: requested size in bytes */
+ ulint* size, /*!< out: allocated size in bytes,
+ or NULL */
+ const char* file_name, /*!< in: file name where created */
+ ulint line); /*!< in: line where created */
-void*
-mem_alloc_func_noninline(
-/*=====================*/
- /* out, own: free storage */
- ulint n, /* in: desired number of bytes */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
- );
-/******************************************************************
+/**************************************************************//**
Use this macro instead of the corresponding function!
Macro for memory buffer freeing */
#define mem_free(PTR) mem_free_func((PTR), __FILE__, __LINE__)
-/*******************************************************************
+/***************************************************************//**
NOTE: Use the corresponding macro instead of this function.
Frees a single buffer of storage from
the dynamic memory of C compiler. Similar to free of C. */
@@ -254,106 +254,90 @@ UNIV_INLINE
void
mem_free_func(
/*==========*/
- void* ptr, /* in, own: buffer to be freed */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
-);
+ void* ptr, /*!< in, own: buffer to be freed */
+ const char* file_name, /*!< in: file name where created */
+ ulint line); /*!< in: line where created */
-/**************************************************************************
-Duplicates a NUL-terminated string. */
+/**********************************************************************//**
+Duplicates a NUL-terminated string.
+@return own: a copy of the string, must be deallocated with mem_free */
UNIV_INLINE
char*
mem_strdup(
/*=======*/
- /* out, own: a copy of the string,
- must be deallocated with mem_free */
- const char* str); /* in: string to be copied */
-/**************************************************************************
-Makes a NUL-terminated copy of a nonterminated string. */
+ const char* str); /*!< in: string to be copied */
+/**********************************************************************//**
+Makes a NUL-terminated copy of a nonterminated string.
+@return own: a copy of the string, must be deallocated with mem_free */
UNIV_INLINE
char*
mem_strdupl(
/*========*/
- /* out, own: a copy of the string,
- must be deallocated with mem_free */
- const char* str, /* in: string to be copied */
- ulint len); /* in: length of str, in bytes */
-
-/**************************************************************************
-Duplicates a NUL-terminated string, allocated from a memory heap. */
+ const char* str, /*!< in: string to be copied */
+ ulint len); /*!< in: length of str, in bytes */
+/**********************************************************************//**
+Duplicates a NUL-terminated string, allocated from a memory heap.
+@return own: a copy of the string */
+UNIV_INTERN
char*
mem_heap_strdup(
/*============*/
- /* out, own: a copy of the string */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* str); /* in: string to be copied */
-/**************************************************************************
+ mem_heap_t* heap, /*!< in: memory heap where string is allocated */
+ const char* str); /*!< in: string to be copied */
+/**********************************************************************//**
Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap. */
+allocated from a memory heap.
+@return own: a copy of the string */
UNIV_INLINE
char*
mem_heap_strdupl(
/*=============*/
- /* out, own: a copy of the string */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* str, /* in: string to be copied */
- ulint len); /* in: length of str, in bytes */
-
-/**************************************************************************
-Concatenate two strings and return the result, using a memory heap. */
-
+ mem_heap_t* heap, /*!< in: memory heap where string is allocated */
+ const char* str, /*!< in: string to be copied */
+ ulint len); /*!< in: length of str, in bytes */
+
+/**********************************************************************//**
+Concatenate two strings and return the result, using a memory heap.
+@return own: the result */
+UNIV_INTERN
char*
mem_heap_strcat(
/*============*/
- /* out, own: the result */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* s1, /* in: string 1 */
- const char* s2); /* in: string 2 */
-
-/**************************************************************************
-Duplicate a block of data, allocated from a memory heap. */
-
+ mem_heap_t* heap, /*!< in: memory heap where string is allocated */
+ const char* s1, /*!< in: string 1 */
+ const char* s2); /*!< in: string 2 */
+
+/**********************************************************************//**
+Duplicate a block of data, allocated from a memory heap.
+@return own: a copy of the data */
+UNIV_INTERN
void*
mem_heap_dup(
/*=========*/
- /* out, own: a copy of the data */
- mem_heap_t* heap, /* in: memory heap where copy is allocated */
- const void* data, /* in: data to be copied */
- ulint len); /* in: length of data, in bytes */
+ mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
+ const void* data, /*!< in: data to be copied */
+ ulint len); /*!< in: length of data, in bytes */
-/**************************************************************************
-Concatenate two memory blocks and return the result, using a memory heap. */
-
-void*
-mem_heap_cat(
-/*=========*/
- /* out, own: the result */
- mem_heap_t* heap, /* in: memory heap where result is allocated */
- const void* b1, /* in: block 1 */
- ulint len1, /* in: length of b1, in bytes */
- const void* b2, /* in: block 2 */
- ulint len2); /* in: length of b2, in bytes */
-
-/********************************************************************
+/****************************************************************//**
A simple (s)printf replacement that dynamically allocates the space for the
formatted string from the given heap. This supports a very limited set of
the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type). */
-
+required for the 'u' type).
+@return heap-allocated formatted string */
+UNIV_INTERN
char*
mem_heap_printf(
/*============*/
- /* out: heap-allocated formatted string */
- mem_heap_t* heap, /* in: memory heap */
- const char* format, /* in: format string */
+ mem_heap_t* heap, /*!< in: memory heap */
+ const char* format, /*!< in: format string */
...) __attribute__ ((format (printf, 2, 3)));
#ifdef MEM_PERIODIC_CHECK
-/**********************************************************************
+/******************************************************************//**
Goes through the list of all allocated mem blocks, checks their magic
numbers, and reports possible corruption. */
-
+UNIV_INTERN
void
mem_validate_all_blocks(void);
/*=========================*/
@@ -366,7 +350,7 @@ mem_validate_all_blocks(void);
struct mem_block_info_struct {
ulint magic_n;/* magic number for debugging */
char file_name[8];/* file name where the mem heap was created */
- ulint line; /* line number where the mem heap was created */
+ ulint line; /*!< line number where the mem heap was created */
UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
the list this is the base node of the list of blocks;
in subsequent blocks this is undefined */
@@ -374,22 +358,25 @@ struct mem_block_info_struct {
and prev in the list. The first block allocated
to the heap is also the first block in this list,
though it also contains the base node of the list. */
- ulint len; /* physical length of this block in bytes */
- ulint type; /* type of heap: MEM_HEAP_DYNAMIC, or
+ ulint len; /*!< physical length of this block in bytes */
+ ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or
MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
- ibool init_block; /* TRUE if this is the first block used in fast
- creation of a heap: the memory will be freed
- by the creator, not by mem_heap_free */
- ulint free; /* offset in bytes of the first free position for
+ ulint free; /*!< offset in bytes of the first free position for
user data in the block */
- ulint start; /* the value of the struct field 'free' at the
+ ulint start; /*!< the value of the struct field 'free' at the
creation of the block */
- byte* free_block;
+#ifndef UNIV_HOTBACKUP
+ void* free_block;
/* if the MEM_HEAP_BTR_SEARCH bit is set in type,
and this is the heap root, this can contain an
allocated buffer frame, which can be appended as a
free block to the heap, if we need more space;
otherwise, this is NULL */
+ void* buf_block;
+ /* if this block has been allocated from the buffer
+ pool, this contains the buf_block_t handle;
+ otherwise, this is NULL */
+#endif /* !UNIV_HOTBACKUP */
#ifdef MEM_PERIODIC_CHECK
UT_LIST_NODE_T(mem_block_t) mem_block_list;
/* List of all mem blocks allocated; protected
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index 6227a27f277..e7080d8c508 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -1,60 +1,75 @@
-/************************************************************************
-The memory management
+/*****************************************************************************
-(c) 1994, 1995 Innobase Oy
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-#include "mem0dbg.ic"
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-#include "mem0pool.h"
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-/*******************************************************************
-Creates a memory heap block where data can be allocated. */
+*****************************************************************************/
+/********************************************************************//**
+@file include/mem0mem.ic
+The memory management
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0dbg.ic"
+#ifndef UNIV_HOTBACKUP
+# include "mem0pool.h"
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************************//**
+Creates a memory heap block where data can be allocated.
+@return own: memory heap block, NULL if did not succeed (only possible
+for MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
mem_block_t*
mem_heap_create_block(
/*==================*/
- /* out, own: memory heap block, NULL if
- did not succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps) */
- mem_heap_t* heap, /* in: memory heap or NULL if first block
+ mem_heap_t* heap, /*!< in: memory heap or NULL if first block
should be created */
- ulint n, /* in: number of bytes needed for user data, or
- if init_block is not NULL, its size in bytes */
- void* init_block, /* in: init block in fast create,
- type must be MEM_HEAP_DYNAMIC */
- ulint type, /* in: type of heap: MEM_HEAP_DYNAMIC or
+ ulint n, /*!< in: number of bytes needed for user data */
+ ulint type, /*!< in: type of heap: MEM_HEAP_DYNAMIC or
MEM_HEAP_BUFFER */
- const char* file_name,/* in: file name where created */
- ulint line); /* in: line where created */
-/**********************************************************************
+ const char* file_name,/*!< in: file name where created */
+ ulint line); /*!< in: line where created */
+/******************************************************************//**
Frees a block from a memory heap. */
-
+UNIV_INTERN
void
mem_heap_block_free(
/*================*/
- mem_heap_t* heap, /* in: heap */
- mem_block_t* block); /* in: block to free */
-/**********************************************************************
+ mem_heap_t* heap, /*!< in: heap */
+ mem_block_t* block); /*!< in: block to free */
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
Frees the free_block field from a memory heap. */
-
+UNIV_INTERN
void
mem_heap_free_block_free(
/*=====================*/
- mem_heap_t* heap); /* in: heap */
-/*******************************************************************
-Adds a new block to a memory heap. */
-
+ mem_heap_t* heap); /*!< in: heap */
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Adds a new block to a memory heap.
+@return created block, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
mem_block_t*
mem_heap_add_block(
/*===============*/
- /* out: created block, NULL if did not
- succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps)*/
- mem_heap_t* heap, /* in: memory heap */
- ulint n); /* in: number of bytes user needs */
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n); /*!< in: number of bytes user needs */
UNIV_INLINE
void
@@ -122,17 +137,33 @@ mem_block_get_start(mem_block_t* block)
return(block->start);
}
-/*******************************************************************
-Allocates n bytes of memory from a memory heap. */
+/***************************************************************//**
+Allocates and zero-fills n bytes of memory from a memory heap.
+@return allocated, zero-filled storage */
+UNIV_INLINE
+void*
+mem_heap_zalloc(
+/*============*/
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n) /*!< in: number of bytes; if the heap is allowed
+ to grow into the buffer pool, this must be
+ <= MEM_MAX_ALLOC_IN_BUF */
+{
+ ut_ad(heap);
+ ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH));
+ return(memset(mem_heap_alloc(heap, n), 0, n));
+}
+
+/***************************************************************//**
+Allocates n bytes of memory from a memory heap.
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
UNIV_INLINE
void*
mem_heap_alloc(
/*===========*/
- /* out: allocated storage, NULL if did not
- succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps) */
- mem_heap_t* heap, /* in: memory heap */
- ulint n) /* in: number of bytes; if the heap is allowed
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n) /*!< in: number of bytes; if the heap is allowed
to grow into the buffer pool, this must be
<= MEM_MAX_ALLOC_IN_BUF */
{
@@ -186,14 +217,14 @@ mem_heap_alloc(
return(buf);
}
-/*********************************************************************
-Returns a pointer to the heap top. */
+/*****************************************************************//**
+Returns a pointer to the heap top.
+@return pointer to the heap top */
UNIV_INLINE
byte*
mem_heap_get_heap_top(
/*==================*/
- /* out: pointer to the heap top */
- mem_heap_t* heap) /* in: memory heap */
+ mem_heap_t* heap) /*!< in: memory heap */
{
mem_block_t* block;
byte* buf;
@@ -207,7 +238,7 @@ mem_heap_get_heap_top(
return(buf);
}
-/*********************************************************************
+/*****************************************************************//**
Frees the space in a memory heap exceeding the pointer given. The
pointer must have been acquired from mem_heap_get_heap_top. The first
memory block of the heap is not freed. */
@@ -215,8 +246,8 @@ UNIV_INLINE
void
mem_heap_free_heap_top(
/*===================*/
- mem_heap_t* heap, /* in: heap from which to free */
- byte* old_top)/* in: pointer to old top of heap */
+ mem_heap_t* heap, /*!< in: heap from which to free */
+ byte* old_top)/*!< in: pointer to old top of heap */
{
mem_block_t* block;
mem_block_t* prev_block;
@@ -291,31 +322,32 @@ mem_heap_free_heap_top(
}
}
-/*********************************************************************
+/*****************************************************************//**
Empties a memory heap. The first memory block of the heap is not freed. */
UNIV_INLINE
void
mem_heap_empty(
/*===========*/
- mem_heap_t* heap) /* in: heap to empty */
+ mem_heap_t* heap) /*!< in: heap to empty */
{
mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap));
-
+#ifndef UNIV_HOTBACKUP
if (heap->free_block) {
mem_heap_free_block_free(heap);
}
+#endif /* !UNIV_HOTBACKUP */
}
-/*********************************************************************
+/*****************************************************************//**
Returns a pointer to the topmost element in a memory heap. The size of the
-element must be given. */
+element must be given.
+@return pointer to the topmost element */
UNIV_INLINE
void*
mem_heap_get_top(
/*=============*/
- /* out: pointer to the topmost element */
- mem_heap_t* heap, /* in: memory heap */
- ulint n) /* in: size of the topmost element */
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n) /*!< in: size of the topmost element */
{
mem_block_t* block;
void* buf;
@@ -341,15 +373,15 @@ mem_heap_get_top(
return(buf);
}
-/*********************************************************************
+/*****************************************************************//**
Frees the topmost element in a memory heap. The size of the element must be
given. */
UNIV_INLINE
void
mem_heap_free_top(
/*==============*/
- mem_heap_t* heap, /* in: memory heap */
- ulint n) /* in: size of the topmost element */
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n) /*!< in: size of the topmost element */
{
mem_block_t* block;
@@ -384,48 +416,32 @@ mem_heap_free_top(
}
}
-/*********************************************************************
+/*****************************************************************//**
NOTE: Use the corresponding macros instead of this function. Creates a
memory heap. For debugging purposes, takes also the file name and line as
-argument. */
+argument.
+@return own: memory heap, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
UNIV_INLINE
mem_heap_t*
mem_heap_create_func(
/*=================*/
- /* out, own: memory heap, NULL if
- did not succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps)*/
- ulint n, /* in: desired start block size,
+ ulint n, /*!< in: desired start block size,
this means that a single user buffer
of size n will fit in the block,
- 0 creates a default size block;
- if init_block is not NULL, n tells
- its size in bytes */
- void* init_block, /* in: if very fast creation is
- wanted, the caller can reserve some
- memory from its stack, for example,
- and pass it as the the initial block
- to the heap: then no OS call of malloc
- is needed at the creation. CAUTION:
- the caller must make sure the initial
- block is not unintentionally erased
- (if allocated in the stack), before
- the memory heap is explicitly freed. */
- ulint type, /* in: heap type */
- const char* file_name, /* in: file name where created */
- ulint line) /* in: line where created */
+ 0 creates a default size block */
+ ulint type, /*!< in: heap type */
+ const char* file_name, /*!< in: file name where created */
+ ulint line) /*!< in: line where created */
{
mem_block_t* block;
- if (n > 0) {
- block = mem_heap_create_block(NULL, n, init_block, type,
- file_name, line);
- } else {
- block = mem_heap_create_block(NULL, MEM_BLOCK_START_SIZE,
- init_block, type,
- file_name, line);
+ if (!n) {
+ n = MEM_BLOCK_START_SIZE;
}
+ block = mem_heap_create_block(NULL, n, type, file_name, line);
+
if (block == NULL) {
return(NULL);
@@ -445,7 +461,7 @@ mem_heap_create_func(
return(block);
}
-/*********************************************************************
+/*****************************************************************//**
NOTE: Use the corresponding macro instead of this function. Frees the space
occupied by a memory heap. In the debug version erases the heap memory
blocks. */
@@ -453,9 +469,9 @@ UNIV_INLINE
void
mem_heap_free_func(
/*===============*/
- mem_heap_t* heap, /* in, own: heap to be freed */
+ mem_heap_t* heap, /*!< in, own: heap to be freed */
const char* file_name __attribute__((unused)),
- /* in: file name where freed */
+ /*!< in: file name where freed */
ulint line __attribute__((unused)))
{
mem_block_t* block;
@@ -473,10 +489,11 @@ mem_heap_free_func(
mem_hash_remove(heap, file_name, line);
#endif
-
+#ifndef UNIV_HOTBACKUP
if (heap->free_block) {
mem_heap_free_block_free(heap);
}
+#endif /* !UNIV_HOTBACKUP */
while (block != NULL) {
/* Store the contents of info before freeing current block
@@ -490,32 +507,44 @@ mem_heap_free_func(
}
}
-/*******************************************************************
+/***************************************************************//**
NOTE: Use the corresponding macro instead of this function.
Allocates a single buffer of memory from the dynamic memory of
the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
+with mem_free.
+@return own: free storage */
UNIV_INLINE
void*
mem_alloc_func(
/*===========*/
- /* out, own: free storage */
- ulint n, /* in: desired number of bytes */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
- )
+ ulint n, /*!< in: desired number of bytes */
+ ulint* size, /*!< out: allocated size in bytes,
+ or NULL */
+ const char* file_name, /*!< in: file name where created */
+ ulint line) /*!< in: line where created */
{
mem_heap_t* heap;
void* buf;
- heap = mem_heap_create_func(n, NULL, MEM_HEAP_DYNAMIC, file_name,
- line);
+ heap = mem_heap_create_func(n, MEM_HEAP_DYNAMIC, file_name, line);
/* Note that as we created the first block in the heap big enough
for the buffer requested by the caller, the buffer will be in the
first block and thus we can calculate the pointer to the heap from
the pointer to the buffer when we free the memory buffer. */
+ if (UNIV_LIKELY_NULL(size)) {
+ /* Adjust the allocation to the actual size of the
+ memory block. */
+ ulint m = mem_block_get_len(heap)
+ - mem_block_get_free(heap);
+#ifdef UNIV_MEM_DEBUG
+ m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
+#endif /* UNIV_MEM_DEBUG */
+ ut_ad(m >= n);
+ *size = n = m;
+ }
+
buf = mem_heap_alloc(heap, n);
ut_a((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE
@@ -523,7 +552,7 @@ mem_alloc_func(
return(buf);
}
-/*******************************************************************
+/***************************************************************//**
NOTE: Use the corresponding macro instead of this function. Frees a single
buffer of storage from the dynamic memory of the C compiler. Similar to the
free of C. */
@@ -531,10 +560,9 @@ UNIV_INLINE
void
mem_free_func(
/*==========*/
- void* ptr, /* in, own: buffer to be freed */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
- )
+ void* ptr, /*!< in, own: buffer to be freed */
+ const char* file_name, /*!< in: file name where created */
+ ulint line) /*!< in: line where created */
{
mem_heap_t* heap;
@@ -543,13 +571,13 @@ mem_free_func(
mem_heap_free_func(heap, file_name, line);
}
-/*********************************************************************
+/*****************************************************************//**
Returns the space in bytes occupied by a memory heap. */
UNIV_INLINE
ulint
mem_heap_get_size(
/*==============*/
- mem_heap_t* heap) /* in: heap */
+ mem_heap_t* heap) /*!< in: heap */
{
mem_block_t* block;
ulint size = 0;
@@ -563,57 +591,56 @@ mem_heap_get_size(
size += mem_block_get_len(block);
block = UT_LIST_GET_NEXT(list, block);
}
-
+#ifndef UNIV_HOTBACKUP
if (heap->free_block) {
size += UNIV_PAGE_SIZE;
}
+#endif /* !UNIV_HOTBACKUP */
return(size);
}
-/**************************************************************************
-Duplicates a NUL-terminated string. */
+/**********************************************************************//**
+Duplicates a NUL-terminated string.
+@return own: a copy of the string, must be deallocated with mem_free */
UNIV_INLINE
char*
mem_strdup(
/*=======*/
- /* out, own: a copy of the string,
- must be deallocated with mem_free */
- const char* str) /* in: string to be copied */
+ const char* str) /*!< in: string to be copied */
{
ulint len = strlen(str) + 1;
- return(memcpy(mem_alloc(len), str, len));
+ return((char*) memcpy(mem_alloc(len), str, len));
}
-/**************************************************************************
-Makes a NUL-terminated copy of a nonterminated string. */
+/**********************************************************************//**
+Makes a NUL-terminated copy of a nonterminated string.
+@return own: a copy of the string, must be deallocated with mem_free */
UNIV_INLINE
char*
mem_strdupl(
/*========*/
- /* out, own: a copy of the string,
- must be deallocated with mem_free */
- const char* str, /* in: string to be copied */
- ulint len) /* in: length of str, in bytes */
+ const char* str, /*!< in: string to be copied */
+ ulint len) /*!< in: length of str, in bytes */
{
- char* s = mem_alloc(len + 1);
+ char* s = (char*) mem_alloc(len + 1);
s[len] = 0;
- return(memcpy(s, str, len));
+ return((char*) memcpy(s, str, len));
}
-/**************************************************************************
+/**********************************************************************//**
Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap. */
+allocated from a memory heap.
+@return own: a copy of the string */
UNIV_INLINE
char*
mem_heap_strdupl(
/*=============*/
- /* out, own: a copy of the string */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* str, /* in: string to be copied */
- ulint len) /* in: length of str, in bytes */
+ mem_heap_t* heap, /*!< in: memory heap where string is allocated */
+ const char* str, /*!< in: string to be copied */
+ ulint len) /*!< in: length of str, in bytes */
{
- char* s = mem_heap_alloc(heap, len + 1);
+ char* s = (char*) mem_heap_alloc(heap, len + 1);
s[len] = 0;
- return(memcpy(s, str, len));
+ return((char*) memcpy(s, str, len));
}
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
index bf659ca9a72..5e93bf88a47 100644
--- a/storage/innobase/include/mem0pool.h
+++ b/storage/innobase/include/mem0pool.h
@@ -1,7 +1,24 @@
-/******************************************************
-The lowest-level memory management
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994, 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0pool.h
+The lowest-level memory management
Created 6/9/1994 Heikki Tuuri
*******************************************************/
@@ -13,92 +30,103 @@ Created 6/9/1994 Heikki Tuuri
#include "os0file.h"
#include "ut0lst.h"
+/** Memory area header */
typedef struct mem_area_struct mem_area_t;
+/** Memory pool */
typedef struct mem_pool_struct mem_pool_t;
-/* The common memory pool */
+/** The common memory pool */
extern mem_pool_t* mem_comm_pool;
-/* Memory area header */
+/** Memory area header */
struct mem_area_struct{
- ulint size_and_free; /* memory area size is obtained by
+ ulint size_and_free; /*!< memory area size is obtained by
anding with ~MEM_AREA_FREE; area in
a free list if ANDing with
MEM_AREA_FREE results in nonzero */
UT_LIST_NODE_T(mem_area_t)
- free_list; /* free list node */
+ free_list; /*!< free list node */
};
-/* Each memory area takes this many extra bytes for control information */
+/** Each memory area takes this many extra bytes for control information */
#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_struct),\
UNIV_MEM_ALIGNMENT))
-/************************************************************************
-Creates a memory pool. */
-
+/********************************************************************//**
+Creates a memory pool.
+@return memory pool */
+UNIV_INTERN
mem_pool_t*
mem_pool_create(
/*============*/
- /* out: memory pool */
- ulint size); /* in: pool size in bytes */
-/************************************************************************
+ ulint size); /*!< in: pool size in bytes */
+/********************************************************************//**
+Frees a memory pool. */
+UNIV_INTERN
+void
+mem_pool_free(
+/*==========*/
+ mem_pool_t* pool); /*!< in, own: memory pool */
+/********************************************************************//**
Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*! */
-
+used in mem0mem.*!
+@return own: allocated memory buffer */
+UNIV_INTERN
void*
mem_area_alloc(
/*===========*/
- /* out, own: allocated memory buffer */
- ulint size, /* in: allocated size in bytes; for optimum
+ ulint* psize, /*!< in: requested size in bytes; for optimum
space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE */
- mem_pool_t* pool); /* in: memory pool */
-/************************************************************************
+ minus MEM_AREA_EXTRA_SIZE;
+ out: allocated size in bytes (greater than
+ or equal to the requested size) */
+ mem_pool_t* pool); /*!< in: memory pool */
+/********************************************************************//**
Frees memory to a pool. */
-
+UNIV_INTERN
void
mem_area_free(
/*==========*/
- void* ptr, /* in, own: pointer to allocated memory
+ void* ptr, /*!< in, own: pointer to allocated memory
buffer */
- mem_pool_t* pool); /* in: memory pool */
-/************************************************************************
-Returns the amount of reserved memory. */
-
+ mem_pool_t* pool); /*!< in: memory pool */
+/********************************************************************//**
+Returns the amount of reserved memory.
+@return reserved mmeory in bytes */
+UNIV_INTERN
ulint
mem_pool_get_reserved(
/*==================*/
- /* out: reserved mmeory in bytes */
- mem_pool_t* pool); /* in: memory pool */
-/************************************************************************
+ mem_pool_t* pool); /*!< in: memory pool */
+/********************************************************************//**
Reserves the mem pool mutex. */
-
+UNIV_INTERN
void
mem_pool_mutex_enter(void);
/*======================*/
-/************************************************************************
+/********************************************************************//**
Releases the mem pool mutex. */
-
+UNIV_INTERN
void
mem_pool_mutex_exit(void);
/*=====================*/
-/************************************************************************
-Validates a memory pool. */
-
+/********************************************************************//**
+Validates a memory pool.
+@return TRUE if ok */
+UNIV_INTERN
ibool
mem_pool_validate(
/*==============*/
- /* out: TRUE if ok */
- mem_pool_t* pool); /* in: memory pool */
-/************************************************************************
+ mem_pool_t* pool); /*!< in: memory pool */
+/********************************************************************//**
Prints info of a memory pool. */
-
+UNIV_INTERN
void
mem_pool_print_info(
/*================*/
- FILE* outfile,/* in: output file to write to */
- mem_pool_t* pool); /* in: memory pool */
+ FILE* outfile,/*!< in: output file to write to */
+ mem_pool_t* pool); /*!< in: memory pool */
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/mem0pool.ic
index 4e8c08733ed..b891dd6dea0 100644
--- a/storage/innobase/include/mem0pool.ic
+++ b/storage/innobase/include/mem0pool.ic
@@ -1,7 +1,24 @@
-/************************************************************************
-The lowest-level memory management
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994, 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/mem0pool.ic
+The lowest-level memory management
Created 6/8/1994 Heikki Tuuri
*************************************************************************/
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index 6a3920aa8a1..6322af2a569 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -1,7 +1,24 @@
-/******************************************************
-Mini-transaction logging routines
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0log.h
+Mini-transaction logging routines
Created 12/7/1995 Heikki Tuuri
*******************************************************/
@@ -13,202 +30,218 @@ Created 12/7/1995 Heikki Tuuri
#include "mtr0mtr.h"
#include "dict0types.h"
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
Writes 1 - 4 bytes to a file page buffered in the buffer pool.
Writes the corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
void
mlog_write_ulint(
/*=============*/
- byte* ptr, /* in: pointer where to write */
- ulint val, /* in: value to write */
- byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
+ byte* ptr, /*!< in: pointer where to write */
+ ulint val, /*!< in: value to write */
+ byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
Writes 8 bytes to a file page buffered in the buffer pool.
Writes the corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
void
mlog_write_dulint(
/*==============*/
- byte* ptr, /* in: pointer where to write */
- dulint val, /* in: value to write */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
+ byte* ptr, /*!< in: pointer where to write */
+ dulint val, /*!< in: value to write */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
Writes a string to a file page buffered in the buffer pool. Writes the
corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
void
mlog_write_string(
/*==============*/
- byte* ptr, /* in: pointer where to write */
- const byte* str, /* in: string to write */
- ulint len, /* in: string length */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
+ byte* ptr, /*!< in: pointer where to write */
+ const byte* str, /*!< in: string to write */
+ ulint len, /*!< in: string length */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
+Logs a write of a string to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_log_string(
+/*============*/
+ byte* ptr, /*!< in: pointer written to */
+ ulint len, /*!< in: string length */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
Writes initial part of a log record consisting of one-byte item
type and four-byte space and page numbers. */
-
+UNIV_INTERN
void
mlog_write_initial_log_record(
/*==========================*/
- byte* ptr, /* in: pointer to (inside) a buffer frame
- holding the file page where modification
- is made */
- byte type, /* in: log item type: MLOG_1BYTE, ... */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Writes a log record about an .ibd file create/delete/rename. */
+ const byte* ptr, /*!< in: pointer to (inside) a buffer
+ frame holding the file page where
+ modification is made */
+ byte type, /*!< in: log item type: MLOG_1BYTE, ... */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
+Writes a log record about an .ibd file create/delete/rename.
+@return new value of log_ptr */
UNIV_INLINE
byte*
mlog_write_initial_log_record_for_file_op(
/*======================================*/
- /* out: new value of log_ptr */
- ulint type, /* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+ ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
MLOG_FILE_RENAME */
- ulint space_id,/* in: space id, if applicable */
- ulint page_no,/* in: page number (not relevant currently) */
- byte* log_ptr,/* in: pointer to mtr log which has been opened */
- mtr_t* mtr); /* in: mtr */
-/************************************************************
+ ulint space_id,/*!< in: space id, if applicable */
+ ulint page_no,/*!< in: page number (not relevant currently) */
+ byte* log_ptr,/*!< in: pointer to mtr log which has been opened */
+ mtr_t* mtr); /*!< in: mtr */
+/********************************************************//**
Catenates 1 - 4 bytes to the mtr log. */
UNIV_INLINE
void
mlog_catenate_ulint(
/*================*/
- mtr_t* mtr, /* in: mtr */
- ulint val, /* in: value to write */
- ulint type); /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-/************************************************************
+ mtr_t* mtr, /*!< in: mtr */
+ ulint val, /*!< in: value to write */
+ ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+/********************************************************//**
Catenates n bytes to the mtr log. */
-
+UNIV_INTERN
void
mlog_catenate_string(
/*=================*/
- mtr_t* mtr, /* in: mtr */
- const byte* str, /* in: string to write */
- ulint len); /* in: string length */
-/************************************************************
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* str, /*!< in: string to write */
+ ulint len); /*!< in: string length */
+/********************************************************//**
Catenates a compressed ulint to mlog. */
UNIV_INLINE
void
mlog_catenate_ulint_compressed(
/*===========================*/
- mtr_t* mtr, /* in: mtr */
- ulint val); /* in: value to write */
-/************************************************************
+ mtr_t* mtr, /*!< in: mtr */
+ ulint val); /*!< in: value to write */
+/********************************************************//**
Catenates a compressed dulint to mlog. */
UNIV_INLINE
void
mlog_catenate_dulint_compressed(
/*============================*/
- mtr_t* mtr, /* in: mtr */
- dulint val); /* in: value to write */
-/************************************************************
-Opens a buffer to mlog. It must be closed with mlog_close. */
+ mtr_t* mtr, /*!< in: mtr */
+ dulint val); /*!< in: value to write */
+/********************************************************//**
+Opens a buffer to mlog. It must be closed with mlog_close.
+@return buffer, NULL if log mode MTR_LOG_NONE */
UNIV_INLINE
byte*
mlog_open(
/*======*/
- /* out: buffer, NULL if log mode MTR_LOG_NONE */
- mtr_t* mtr, /* in: mtr */
- ulint size); /* in: buffer size in bytes; MUST be
+ mtr_t* mtr, /*!< in: mtr */
+ ulint size); /*!< in: buffer size in bytes; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
-/************************************************************
+/********************************************************//**
Closes a buffer opened to mlog. */
UNIV_INLINE
void
mlog_close(
/*=======*/
- mtr_t* mtr, /* in: mtr */
- byte* ptr); /* in: buffer space from ptr up was not used */
-/************************************************************
+ mtr_t* mtr, /*!< in: mtr */
+ byte* ptr); /*!< in: buffer space from ptr up was not used */
+/********************************************************//**
Writes the initial part of a log record (3..11 bytes).
If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly! */
+size parameters to mlog_open() should be adjusted accordingly!
+@return new value of log_ptr */
UNIV_INLINE
byte*
mlog_write_initial_log_record_fast(
/*===============================*/
- /* out: new value of log_ptr */
- byte* ptr, /* in: pointer to (inside) a buffer frame holding the
- file page where modification is made */
- byte type, /* in: log item type: MLOG_1BYTE, ... */
- byte* log_ptr,/* in: pointer to mtr log which has been opened */
- mtr_t* mtr); /* in: mtr */
-/************************************************************
-Parses an initial log record written by mlog_write_initial_log_record. */
-
+ const byte* ptr, /*!< in: pointer to (inside) a buffer
+ frame holding the file page where
+ modification is made */
+ byte type, /*!< in: log item type: MLOG_1BYTE, ... */
+ byte* log_ptr,/*!< in: pointer to mtr log which has
+ been opened */
+ mtr_t* mtr); /*!< in: mtr */
+#else /* !UNIV_HOTBACKUP */
+# define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0)
+# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte *) 0)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************//**
+Parses an initial log record written by mlog_write_initial_log_record.
+@return parsed record end, NULL if not a complete record */
+UNIV_INTERN
byte*
mlog_parse_initial_log_record(
/*==========================*/
- /* out: parsed record end, NULL if not a complete
- record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* type, /* out: log record type: MLOG_1BYTE, ... */
- ulint* space, /* out: space id */
- ulint* page_no);/* out: page number */
-/************************************************************
-Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ byte* type, /*!< out: log record type: MLOG_1BYTE, ... */
+ ulint* space, /*!< out: space id */
+ ulint* page_no);/*!< out: page number */
+/********************************************************//**
+Parses a log record written by mlog_write_ulint or mlog_write_dulint.
+@return parsed record end, NULL if not a complete record */
+UNIV_INTERN
byte*
mlog_parse_nbytes(
/*==============*/
- /* out: parsed record end, NULL if not a complete
- record */
- ulint type, /* in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* page); /* in: page where to apply the log record, or NULL */
-/************************************************************
-Parses a log record written by mlog_write_string. */
-
+ ulint type, /*!< in: log record type: MLOG_1BYTE, ... */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ byte* page, /*!< in: page where to apply the log record, or NULL */
+ void* page_zip);/*!< in/out: compressed page, or NULL */
+/********************************************************//**
+Parses a log record written by mlog_write_string.
+@return parsed record end, NULL if not a complete record */
+UNIV_INTERN
byte*
mlog_parse_string(
/*==============*/
- /* out: parsed record end, NULL if not a complete
- record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* page); /* in: page where to apply the log record, or NULL */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ byte* page, /*!< in: page where to apply the log record, or NULL */
+ void* page_zip);/*!< in/out: compressed page, or NULL */
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
Opens a buffer for mlog, writes the initial log record and,
if needed, the field lengths of an index. Reserves space
for further log entries. The log entry must be closed with
-mtr_close(). */
-
+mtr_close().
+@return buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
- /* out: buffer, NULL if log mode
- MTR_LOG_NONE */
- mtr_t* mtr, /* in: mtr */
- byte* rec, /* in: index record or page */
- dict_index_t* index, /* in: record descriptor */
- byte type, /* in: log item type */
- ulint size); /* in: requested buffer size in bytes
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* rec, /*!< in: index record or page */
+ dict_index_t* index, /*!< in: record descriptor */
+ byte type, /*!< in: log item type */
+ ulint size); /*!< in: requested buffer size in bytes
(if 0, calls mlog_close() and returns NULL) */
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************
-Parses a log record written by mlog_open_and_write_index. */
-
+/********************************************************//**
+Parses a log record written by mlog_open_and_write_index.
+@return parsed record end, NULL if not a complete record */
+UNIV_INTERN
byte*
mlog_parse_index(
/*=============*/
- /* out: parsed record end,
- NULL if not a complete record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- /* out: new value of log_ptr */
- ibool comp, /* in: TRUE=compact record format */
- dict_index_t** index); /* out, own: dummy index */
+ byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
+ ibool comp, /*!< in: TRUE=compact record format */
+ dict_index_t** index); /*!< out, own: dummy index */
+#ifndef UNIV_HOTBACKUP
/* Insert, update, and maybe other functions may use this value to define an
extra mlog buffer size for variable size data */
#define MLOG_BUF_MARGIN 256
+#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "mtr0log.ic"
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
index 1626f1e77e5..5c24c38b337 100644
--- a/storage/innobase/include/mtr0log.ic
+++ b/storage/innobase/include/mtr0log.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Mini-transaction logging routines
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0log.ic
+Mini-transaction logging routines
Created 12/7/1995 Heikki Tuuri
*******************************************************/
@@ -12,15 +29,15 @@ Created 12/7/1995 Heikki Tuuri
#include "fsp0types.h"
#include "trx0sys.h"
-/************************************************************
-Opens a buffer to mlog. It must be closed with mlog_close. */
+/********************************************************//**
+Opens a buffer to mlog. It must be closed with mlog_close.
+@return buffer, NULL if log mode MTR_LOG_NONE */
UNIV_INLINE
byte*
mlog_open(
/*======*/
- /* out: buffer, NULL if log mode MTR_LOG_NONE */
- mtr_t* mtr, /* in: mtr */
- ulint size) /* in: buffer size in bytes; MUST be
+ mtr_t* mtr, /*!< in: mtr */
+ ulint size) /*!< in: buffer size in bytes; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
{
dyn_array_t* mlog;
@@ -37,14 +54,14 @@ mlog_open(
return(dyn_array_open(mlog, size));
}
-/************************************************************
+/********************************************************//**
Closes a buffer opened to mlog. */
UNIV_INLINE
void
mlog_close(
/*=======*/
- mtr_t* mtr, /* in: mtr */
- byte* ptr) /* in: buffer space from ptr up was not used */
+ mtr_t* mtr, /*!< in: mtr */
+ byte* ptr) /*!< in: buffer space from ptr up was not used */
{
dyn_array_t* mlog;
@@ -55,15 +72,16 @@ mlog_close(
dyn_array_close(mlog, ptr);
}
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
UNIV_INLINE
void
mlog_catenate_ulint(
/*================*/
- mtr_t* mtr, /* in: mtr */
- ulint val, /* in: value to write */
- ulint type) /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint val, /*!< in: value to write */
+ ulint type) /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
{
dyn_array_t* mlog;
byte* ptr;
@@ -87,7 +105,7 @@ mlog_catenate_ulint(
#if MLOG_8BYTES != 8
# error "MLOG_8BYTES != 8"
#endif
- ptr = dyn_array_push(mlog, type);
+ ptr = (byte*) dyn_array_push(mlog, type);
if (type == MLOG_4BYTES) {
mach_write_to_4(ptr, val);
@@ -99,14 +117,14 @@ mlog_catenate_ulint(
}
}
-/************************************************************
+/********************************************************//**
Catenates a compressed ulint to mlog. */
UNIV_INLINE
void
mlog_catenate_ulint_compressed(
/*===========================*/
- mtr_t* mtr, /* in: mtr */
- ulint val) /* in: value to write */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint val) /*!< in: value to write */
{
byte* log_ptr;
@@ -123,14 +141,14 @@ mlog_catenate_ulint_compressed(
mlog_close(mtr, log_ptr);
}
-/************************************************************
+/********************************************************//**
Catenates a compressed dulint to mlog. */
UNIV_INLINE
void
mlog_catenate_dulint_compressed(
/*============================*/
- mtr_t* mtr, /* in: mtr */
- dulint val) /* in: value to write */
+ mtr_t* mtr, /*!< in: mtr */
+ dulint val) /*!< in: value to write */
{
byte* log_ptr;
@@ -147,34 +165,37 @@ mlog_catenate_dulint_compressed(
mlog_close(mtr, log_ptr);
}
-/************************************************************
+/********************************************************//**
Writes the initial part of a log record (3..11 bytes).
If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly! */
+size parameters to mlog_open() should be adjusted accordingly!
+@return new value of log_ptr */
UNIV_INLINE
byte*
mlog_write_initial_log_record_fast(
/*===============================*/
- /* out: new value of log_ptr */
- byte* ptr, /* in: pointer to (inside) a buffer frame holding the
- file page where modification is made */
- byte type, /* in: log item type: MLOG_1BYTE, ... */
- byte* log_ptr,/* in: pointer to mtr log which has been opened */
- mtr_t* mtr) /* in: mtr */
+ const byte* ptr, /*!< in: pointer to (inside) a buffer
+ frame holding the file page where
+ modification is made */
+ byte type, /*!< in: log item type: MLOG_1BYTE, ... */
+ byte* log_ptr,/*!< in: pointer to mtr log which has
+ been opened */
+ mtr_t* mtr) /*!< in: mtr */
{
+#ifdef UNIV_DEBUG
buf_block_t* block;
+#endif
+ const byte* page;
ulint space;
ulint offset;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
ut_ad(type <= MLOG_BIGGEST_TYPE);
ut_ad(ptr && log_ptr);
- block = buf_block_align(ptr);
-
- space = buf_block_get_space(block);
- offset = buf_block_get_page_no(block);
+ page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+ space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
/* check whether the page is in the doublewrite buffer;
the doublewrite buffer is located in pages
@@ -206,13 +227,14 @@ mlog_write_initial_log_record_fast(
mtr->n_log_recs++;
#ifdef UNIV_LOG_DEBUG
- /* fprintf(stderr,
- "Adding to mtr log record type %lu space %lu page no %lu\n",
- type, space, offset); */
+ fprintf(stderr,
+ "Adding to mtr log record type %lu space %lu page no %lu\n",
+ (ulong) type, space, offset);
#endif
#ifdef UNIV_DEBUG
/* We now assume that all x-latched pages have been modified! */
+ block = (buf_block_t*) buf_block_align(ptr);
if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
@@ -222,19 +244,19 @@ mlog_write_initial_log_record_fast(
return(log_ptr);
}
-/************************************************************
-Writes a log record about an .ibd file create/delete/rename. */
+/********************************************************//**
+Writes a log record about an .ibd file create/delete/rename.
+@return new value of log_ptr */
UNIV_INLINE
byte*
mlog_write_initial_log_record_for_file_op(
/*======================================*/
- /* out: new value of log_ptr */
- ulint type, /* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+ ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
MLOG_FILE_RENAME */
- ulint space_id,/* in: space id, if applicable */
- ulint page_no,/* in: page number (not relevant currently) */
- byte* log_ptr,/* in: pointer to mtr log which has been opened */
- mtr_t* mtr) /* in: mtr */
+ ulint space_id,/*!< in: space id, if applicable */
+ ulint page_no,/*!< in: page number (not relevant currently) */
+ byte* log_ptr,/*!< in: pointer to mtr log which has been opened */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(log_ptr);
@@ -249,3 +271,4 @@ mlog_write_initial_log_record_for_file_op(
return(log_ptr);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 2a160d27e0c..bc3f1951be9 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -1,7 +1,24 @@
-/******************************************************
-Mini-transaction buffer
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0mtr.h
+Mini-transaction buffer
Created 11/26/1995 Heikki Tuuri
*******************************************************/
@@ -37,288 +54,339 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
#define MTR_MEMO_S_LOCK 55
#define MTR_MEMO_X_LOCK 56
-/* Log item types: we have made them to be of the type 'byte'
-for the compiler to warn if val and type parameters are switched
-in a call to mlog_write_ulint. NOTE! For 1 - 8 bytes, the
-flag value must give the length also! */
-#define MLOG_SINGLE_REC_FLAG 128 /* if the mtr contains only
+/** @name Log item types
+The log items are declared 'byte' so that the compiler can warn if val
+and type parameters are switched in a call to mlog_write_ulint. NOTE!
+For 1 - 8 bytes, the flag value must give the length also! @{ */
+#define MLOG_SINGLE_REC_FLAG 128 /*!< if the mtr contains only
one log record for one page,
i.e., write_initial_log_record
has been called only once,
this flag is ORed to the type
of that first log record */
-#define MLOG_1BYTE (1) /* one byte is written */
-#define MLOG_2BYTES (2) /* 2 bytes ... */
-#define MLOG_4BYTES (4) /* 4 bytes ... */
-#define MLOG_8BYTES (8) /* 8 bytes ... */
-#define MLOG_REC_INSERT ((byte)9) /* record insert */
-#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /* mark clustered index record
+#define MLOG_1BYTE (1) /*!< one byte is written */
+#define MLOG_2BYTES (2) /*!< 2 bytes ... */
+#define MLOG_4BYTES (4) /*!< 4 bytes ... */
+#define MLOG_8BYTES (8) /*!< 8 bytes ... */
+#define MLOG_REC_INSERT ((byte)9) /*!< record insert */
+#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /*!< mark clustered index record
deleted */
-#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /* mark secondary index record
+#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /*!< mark secondary index record
deleted */
-#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /* update of a record,
+#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /*!< update of a record,
preserves record field sizes */
-#define MLOG_REC_DELETE ((byte)14) /* delete a record from a
+#define MLOG_REC_DELETE ((byte)14) /*!< delete a record from a
page */
-#define MLOG_LIST_END_DELETE ((byte)15) /* delete record list end on
+#define MLOG_LIST_END_DELETE ((byte)15) /*!< delete record list end on
index page */
-#define MLOG_LIST_START_DELETE ((byte)16) /* delete record list start on
+#define MLOG_LIST_START_DELETE ((byte)16) /*!< delete record list start on
index page */
-#define MLOG_LIST_END_COPY_CREATED ((byte)17) /* copy record list end to a
+#define MLOG_LIST_END_COPY_CREATED ((byte)17) /*!< copy record list end to a
new created index page */
-#define MLOG_PAGE_REORGANIZE ((byte)18) /* reorganize an index page */
-#define MLOG_PAGE_CREATE ((byte)19) /* create an index page */
-#define MLOG_UNDO_INSERT ((byte)20) /* insert entry in an undo
+#define MLOG_PAGE_REORGANIZE ((byte)18) /*!< reorganize an
+ index page in
+ ROW_FORMAT=REDUNDANT */
+#define MLOG_PAGE_CREATE ((byte)19) /*!< create an index page */
+#define MLOG_UNDO_INSERT ((byte)20) /*!< insert entry in an undo
log */
-#define MLOG_UNDO_ERASE_END ((byte)21) /* erase an undo log
+#define MLOG_UNDO_ERASE_END ((byte)21) /*!< erase an undo log
page end */
-#define MLOG_UNDO_INIT ((byte)22) /* initialize a page in an
+#define MLOG_UNDO_INIT ((byte)22) /*!< initialize a page in an
undo log */
-#define MLOG_UNDO_HDR_DISCARD ((byte)23) /* discard an update undo log
+#define MLOG_UNDO_HDR_DISCARD ((byte)23) /*!< discard an update undo log
header */
-#define MLOG_UNDO_HDR_REUSE ((byte)24) /* reuse an insert undo log
+#define MLOG_UNDO_HDR_REUSE ((byte)24) /*!< reuse an insert undo log
header */
-#define MLOG_UNDO_HDR_CREATE ((byte)25) /* create an undo log header */
-#define MLOG_REC_MIN_MARK ((byte)26) /* mark an index record as the
- predefined minimum record */
-#define MLOG_IBUF_BITMAP_INIT ((byte)27) /* initialize an ibuf bitmap
- page */
+#define MLOG_UNDO_HDR_CREATE ((byte)25) /*!< create an undo
+ log header */
+#define MLOG_REC_MIN_MARK ((byte)26) /*!< mark an index
+ record as the
+ predefined minimum
+ record */
+#define MLOG_IBUF_BITMAP_INIT ((byte)27) /*!< initialize an
+ ibuf bitmap page */
/*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */
-#define MLOG_INIT_FILE_PAGE ((byte)29) /* this means that a file page
- is taken into use and the prior
- contents of the page should be
- ignored: in recovery we must
- not trust the lsn values stored
- to the file page */
-#define MLOG_WRITE_STRING ((byte)30) /* write a string to a page */
-#define MLOG_MULTI_REC_END ((byte)31) /* if a single mtr writes
- log records for several pages,
+#ifdef UNIV_LOG_LSN_DEBUG
+# define MLOG_LSN ((byte)28) /* current LSN */
+#endif
+#define MLOG_INIT_FILE_PAGE ((byte)29) /*!< this means that a
+ file page is taken
+ into use and the prior
+ contents of the page
+ should be ignored: in
+ recovery we must not
+ trust the lsn values
+ stored to the file
+ page */
+#define MLOG_WRITE_STRING ((byte)30) /*!< write a string to
+ a page */
+#define MLOG_MULTI_REC_END ((byte)31) /*!< if a single mtr writes
+ several log records,
this log record ends the
sequence of these records */
-#define MLOG_DUMMY_RECORD ((byte)32) /* dummy log record used to
+#define MLOG_DUMMY_RECORD ((byte)32) /*!< dummy log record used to
pad a log block full */
-#define MLOG_FILE_CREATE ((byte)33) /* log record about an .ibd
+#define MLOG_FILE_CREATE ((byte)33) /*!< log record about an .ibd
file creation */
-#define MLOG_FILE_RENAME ((byte)34) /* log record about an .ibd
+#define MLOG_FILE_RENAME ((byte)34) /*!< log record about an .ibd
file rename */
-#define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd
+#define MLOG_FILE_DELETE ((byte)35) /*!< log record about an .ibd
file deletion */
-#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record
- as the predefined minimum
+#define MLOG_COMP_REC_MIN_MARK ((byte)36) /*!< mark a compact
+ index record as the
+ predefined minimum
record */
-#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact
+#define MLOG_COMP_PAGE_CREATE ((byte)37) /*!< create a compact
index page */
-#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */
+#define MLOG_COMP_REC_INSERT ((byte)38) /*!< compact record insert */
#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
- /* mark compact clustered index
- record deleted */
-#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index
- record deleted; this log
- record type is redundant, as
- MLOG_REC_SEC_DELETE_MARK is
- independent of the record
- format. */
-#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record,
- preserves record field sizes */
-#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record
+ /*!< mark compact
+ clustered index record
+ deleted */
+#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact
+ secondary index record
+ deleted; this log
+ record type is
+ redundant, as
+ MLOG_REC_SEC_DELETE_MARK
+ is independent of the
+ record format. */
+#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a
+ compact record,
+ preserves record field
+ sizes */
+#define MLOG_COMP_REC_DELETE ((byte)42) /*!< delete a compact record
from a page */
-#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list
+#define MLOG_COMP_LIST_END_DELETE ((byte)43) /*!< delete compact record list
end on index page */
-#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list
+#define MLOG_COMP_LIST_START_DELETE ((byte)44) /*!< delete compact record list
start on index page */
#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
- /* copy compact record list end
- to a new created index page */
-#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */
+ /*!< copy compact
+ record list end to a
+ new created index
+ page */
+#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /*!< reorganize an index page */
+#define MLOG_FILE_CREATE2 ((byte)47) /*!< log record about creating
+ an .ibd file, with format */
+#define MLOG_ZIP_WRITE_NODE_PTR ((byte)48) /*!< write the node pointer of
+ a record on a compressed
+ non-leaf B-tree page */
+#define MLOG_ZIP_WRITE_BLOB_PTR ((byte)49) /*!< write the BLOB pointer
+ of an externally stored column
+ on a compressed page */
+#define MLOG_ZIP_WRITE_HEADER ((byte)50) /*!< write to compressed page
+ header */
+#define MLOG_ZIP_PAGE_COMPRESS ((byte)51) /*!< compress an index page */
+#define MLOG_BIGGEST_TYPE ((byte)51) /*!< biggest value (used in
+ assertions) */
+/* @} */
-#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in
- asserts) */
+/** @name Flags for MLOG_FILE operations
+(stored in the page number parameter, called log_flags in the
+functions). The page number parameter was originally written as 0. @{ */
+#define MLOG_FILE_FLAG_TEMP 1 /*!< identifies TEMPORARY TABLE in
+ MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
+/* @} */
-/*******************************************************************
+/***************************************************************//**
Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
+and buffer in the memory buffer given by the caller.
+@return mtr buffer which also acts as the mtr handle */
UNIV_INLINE
mtr_t*
mtr_start(
/*======*/
- /* out: mtr buffer which also acts as
- the mtr handle */
- mtr_t* mtr); /* in: memory buffer for the mtr buffer */
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
-
-mtr_t*
-mtr_start_noninline(
-/*================*/
- /* out: mtr buffer which also acts as
- the mtr handle */
- mtr_t* mtr); /* in: memory buffer for the mtr buffer */
-/*******************************************************************
+ mtr_t* mtr); /*!< in: memory buffer for the mtr buffer */
+/***************************************************************//**
Commits a mini-transaction. */
-
+UNIV_INTERN
void
mtr_commit(
/*=======*/
- mtr_t* mtr); /* in: mini-transaction */
-/**************************************************************
-Sets and returns a savepoint in mtr. */
+ mtr_t* mtr); /*!< in: mini-transaction */
+/**********************************************************//**
+Sets and returns a savepoint in mtr.
+@return savepoint */
UNIV_INLINE
ulint
mtr_set_savepoint(
/*==============*/
- /* out: savepoint */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************//**
Releases the latches stored in an mtr memo down to a savepoint.
NOTE! The mtr must not have made changes to buffer pages after the
savepoint, as these can be handled only by mtr_commit. */
-
+UNIV_INTERN
void
mtr_rollback_to_savepoint(
/*======================*/
- mtr_t* mtr, /* in: mtr */
- ulint savepoint); /* in: savepoint */
-/**************************************************************
+ mtr_t* mtr, /*!< in: mtr */
+ ulint savepoint); /*!< in: savepoint */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
Releases the (index tree) s-latch stored in an mtr memo after a
savepoint. */
UNIV_INLINE
void
mtr_release_s_latch_at_savepoint(
/*=============================*/
- mtr_t* mtr, /* in: mtr */
- ulint savepoint, /* in: savepoint */
- rw_lock_t* lock); /* in: latch to release */
-/*******************************************************************
-Gets the logging mode of a mini-transaction. */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint savepoint, /*!< in: savepoint */
+ rw_lock_t* lock); /*!< in: latch to release */
+#else /* !UNIV_HOTBACKUP */
+# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Gets the logging mode of a mini-transaction.
+@return logging mode: MTR_LOG_NONE, ... */
UNIV_INLINE
ulint
mtr_get_log_mode(
/*=============*/
- /* out: logging mode: MTR_LOG_NONE, ... */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Changes the logging mode of a mini-transaction. */
+ mtr_t* mtr); /*!< in: mtr */
+/***************************************************************//**
+Changes the logging mode of a mini-transaction.
+@return old mode */
UNIV_INLINE
ulint
mtr_set_log_mode(
/*=============*/
- /* out: old mode */
- mtr_t* mtr, /* in: mtr */
- ulint mode); /* in: logging mode: MTR_LOG_NONE, ... */
-/************************************************************
-Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
-
+ mtr_t* mtr, /*!< in: mtr */
+ ulint mode); /*!< in: logging mode: MTR_LOG_NONE, ... */
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return value read */
+UNIV_INTERN
ulint
mtr_read_ulint(
/*===========*/
- /* out: value read */
- byte* ptr, /* in: pointer from where to read */
- ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Reads 8 bytes from a file page buffered in the buffer pool. */
-
+ const byte* ptr, /*!< in: pointer from where to read */
+ ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
+Reads 8 bytes from a file page buffered in the buffer pool.
+@return value read */
+UNIV_INTERN
dulint
mtr_read_dulint(
/*============*/
- /* out: value read */
- byte* ptr, /* in: pointer from where to read */
- mtr_t* mtr); /* in: mini-transaction handle */
-/*************************************************************************
+ const byte* ptr, /*!< in: pointer from where to read */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
This macro locks an rw-lock in s-mode. */
#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\
(MTR))
-/*************************************************************************
+/*********************************************************************//**
This macro locks an rw-lock in x-mode. */
#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\
(MTR))
-/*************************************************************************
+/*********************************************************************//**
NOTE! Use the macro above!
Locks a lock in s-mode. */
UNIV_INLINE
void
mtr_s_lock_func(
/*============*/
- rw_lock_t* lock, /* in: rw-lock */
- const char* file, /* in: file name */
- ulint line, /* in: line number */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
+ rw_lock_t* lock, /*!< in: rw-lock */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line number */
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************************//**
NOTE! Use the macro above!
Locks a lock in x-mode. */
UNIV_INLINE
void
mtr_x_lock_func(
/*============*/
- rw_lock_t* lock, /* in: rw-lock */
- const char* file, /* in: file name */
- ulint line, /* in: line number */
- mtr_t* mtr); /* in: mtr */
+ rw_lock_t* lock, /*!< in: rw-lock */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line number */
+ mtr_t* mtr); /*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
-/*******************************************************
+/***************************************************//**
Releases an object in the memo stack. */
-
+UNIV_INTERN
void
mtr_memo_release(
/*=============*/
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object */
- ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */
+ mtr_t* mtr, /*!< in: mtr */
+ void* object, /*!< in: object */
+ ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
#ifdef UNIV_DEBUG
-/**************************************************************
-Checks if memo contains the given item. */
+# ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Checks if memo contains the given item.
+@return TRUE if contains */
UNIV_INLINE
ibool
mtr_memo_contains(
/*==============*/
- /* out: TRUE if contains */
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object to search */
- ulint type); /* in: type of object */
-/*************************************************************
-Prints info of an mtr handle. */
+ mtr_t* mtr, /*!< in: mtr */
+ const void* object, /*!< in: object to search */
+ ulint type); /*!< in: type of object */
+/**********************************************************//**
+Checks if memo contains the given page.
+@return TRUE if contains */
+UNIV_INTERN
+ibool
+mtr_memo_contains_page(
+/*===================*/
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* ptr, /*!< in: pointer to buffer frame */
+ ulint type); /*!< in: type of object */
+/*********************************************************//**
+Prints info of an mtr handle. */
+UNIV_INTERN
void
mtr_print(
/*======*/
- mtr_t* mtr); /* in: mtr */
+ mtr_t* mtr); /*!< in: mtr */
+# else /* !UNIV_HOTBACKUP */
+# define mtr_memo_contains(mtr, object, type) TRUE
+# define mtr_memo_contains_page(mtr, ptr, type) TRUE
+# endif /* !UNIV_HOTBACKUP */
#endif /* UNIV_DEBUG */
/*######################################################################*/
#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */
-/*******************************************************************
-Returns the log object of a mini-transaction buffer. */
+/***************************************************************//**
+Returns the log object of a mini-transaction buffer.
+@return log */
UNIV_INLINE
dyn_array_t*
mtr_get_log(
/*========*/
- /* out: log */
- mtr_t* mtr); /* in: mini-transaction */
-/*******************************************************
+ mtr_t* mtr); /*!< in: mini-transaction */
+/***************************************************//**
Pushes an object to an mtr memo stack. */
UNIV_INLINE
void
mtr_memo_push(
/*==========*/
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object */
- ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */
+ mtr_t* mtr, /*!< in: mtr */
+ void* object, /*!< in: object */
+ ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
/* Type definition of a mini-transaction memo stack slot. */
typedef struct mtr_memo_slot_struct mtr_memo_slot_t;
struct mtr_memo_slot_struct{
- ulint type; /* type of the stored object (MTR_MEMO_S_LOCK, ...) */
- void* object; /* pointer to the object */
+ ulint type; /*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
+ void* object; /*!< pointer to the object */
};
/* Mini-transaction handle and buffer */
struct mtr_struct{
- ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
- dyn_array_t memo; /* memo stack for locks etc. */
- dyn_array_t log; /* mini-transaction log */
+#ifdef UNIV_DEBUG
+ ulint state; /*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
+#endif
+ dyn_array_t memo; /*!< memo stack for locks etc. */
+ dyn_array_t log; /*!< mini-transaction log */
ibool modifications;
/* TRUE if the mtr made modifications to
buffer pool pages */
@@ -327,14 +395,18 @@ struct mtr_struct{
have been written to the mtr log */
ulint log_mode; /* specifies which operations should be
logged; default value MTR_LOG_ALL */
- dulint start_lsn;/* start lsn of the possible log entry for
+ ib_uint64_t start_lsn;/* start lsn of the possible log entry for
this mtr */
- dulint end_lsn;/* end lsn of the possible log entry for
+ ib_uint64_t end_lsn;/* end lsn of the possible log entry for
this mtr */
+#ifdef UNIV_DEBUG
ulint magic_n;
+#endif /* UNIV_DEBUG */
};
-#define MTR_MAGIC_N 54551
+#ifdef UNIV_DEBUG
+# define MTR_MAGIC_N 54551
+#endif /* UNIV_DEBUG */
#define MTR_ACTIVE 12231
#define MTR_COMMITTING 56456
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index 81eec3bfc92..310c7c4117f 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -1,25 +1,43 @@
-/******************************************************
-Mini-transaction buffer
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0mtr.ic
+Mini-transaction buffer
Created 11/26/1995 Heikki Tuuri
*******************************************************/
-#include "sync0sync.h"
-#include "sync0rw.h"
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+# include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
#include "mach0data.h"
-/*******************************************************************
+/***************************************************************//**
Starts a mini-transaction and creates a mini-transaction handle
-and a buffer in the memory buffer given by the caller. */
+and a buffer in the memory buffer given by the caller.
+@return mtr buffer which also acts as the mtr handle */
UNIV_INLINE
mtr_t*
mtr_start(
/*======*/
- /* out: mtr buffer which also acts as
- the mtr handle */
- mtr_t* mtr) /* in: memory buffer for the mtr buffer */
+ mtr_t* mtr) /*!< in: memory buffer for the mtr buffer */
{
dyn_array_create(&(mtr->memo));
dyn_array_create(&(mtr->log));
@@ -28,22 +46,21 @@ mtr_start(
mtr->modifications = FALSE;
mtr->n_log_recs = 0;
-#ifdef UNIV_DEBUG
- mtr->state = MTR_ACTIVE;
- mtr->magic_n = MTR_MAGIC_N;
-#endif
+ ut_d(mtr->state = MTR_ACTIVE);
+ ut_d(mtr->magic_n = MTR_MAGIC_N);
+
return(mtr);
}
-/*******************************************************
+/***************************************************//**
Pushes an object to an mtr memo stack. */
UNIV_INLINE
void
mtr_memo_push(
/*==========*/
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object */
- ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */
+ mtr_t* mtr, /*!< in: mtr */
+ void* object, /*!< in: object */
+ ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
{
dyn_array_t* memo;
mtr_memo_slot_t* slot;
@@ -56,20 +73,20 @@ mtr_memo_push(
memo = &(mtr->memo);
- slot = dyn_array_push(memo, sizeof(mtr_memo_slot_t));
+ slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
slot->object = object;
slot->type = type;
}
-/**************************************************************
-Sets and returns a savepoint in mtr. */
+/**********************************************************//**
+Sets and returns a savepoint in mtr.
+@return savepoint */
UNIV_INLINE
ulint
mtr_set_savepoint(
/*==============*/
- /* out: savepoint */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
dyn_array_t* memo;
@@ -81,16 +98,17 @@ mtr_set_savepoint(
return(dyn_array_get_data_size(memo));
}
-/**************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
Releases the (index tree) s-latch stored in an mtr memo after a
savepoint. */
UNIV_INLINE
void
mtr_release_s_latch_at_savepoint(
/*=============================*/
- mtr_t* mtr, /* in: mtr */
- ulint savepoint, /* in: savepoint */
- rw_lock_t* lock) /* in: latch to release */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint savepoint, /*!< in: savepoint */
+ rw_lock_t* lock) /*!< in: latch to release */
{
mtr_memo_slot_t* slot;
dyn_array_t* memo;
@@ -103,7 +121,7 @@ mtr_release_s_latch_at_savepoint(
ut_ad(dyn_array_get_data_size(memo) > savepoint);
- slot = dyn_array_get_element(memo, savepoint);
+ slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
ut_ad(slot->object == lock);
ut_ad(slot->type == MTR_MEMO_S_LOCK);
@@ -113,17 +131,17 @@ mtr_release_s_latch_at_savepoint(
slot->object = NULL;
}
-#ifdef UNIV_DEBUG
-/**************************************************************
-Checks if memo contains the given item. */
+# ifdef UNIV_DEBUG
+/**********************************************************//**
+Checks if memo contains the given item.
+@return TRUE if contains */
UNIV_INLINE
ibool
mtr_memo_contains(
/*==============*/
- /* out: TRUE if contains */
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object to search */
- ulint type) /* in: type of object */
+ mtr_t* mtr, /*!< in: mtr */
+ const void* object, /*!< in: object to search */
+ ulint type) /*!< in: type of object */
{
mtr_memo_slot_t* slot;
dyn_array_t* memo;
@@ -149,16 +167,17 @@ mtr_memo_contains(
return(FALSE);
}
-#endif /* UNIV_DEBUG */
+# endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
-Returns the log object of a mini-transaction buffer. */
+/***************************************************************//**
+Returns the log object of a mini-transaction buffer.
+@return log */
UNIV_INLINE
dyn_array_t*
mtr_get_log(
/*========*/
- /* out: log */
- mtr_t* mtr) /* in: mini-transaction */
+ mtr_t* mtr) /*!< in: mini-transaction */
{
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
@@ -166,14 +185,14 @@ mtr_get_log(
return(&(mtr->log));
}
-/*******************************************************************
-Gets the logging mode of a mini-transaction. */
+/***************************************************************//**
+Gets the logging mode of a mini-transaction.
+@return logging mode: MTR_LOG_NONE, ... */
UNIV_INLINE
ulint
mtr_get_log_mode(
/*=============*/
- /* out: logging mode: MTR_LOG_NONE, ... */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mtr);
ut_ad(mtr->log_mode >= MTR_LOG_ALL);
@@ -182,15 +201,15 @@ mtr_get_log_mode(
return(mtr->log_mode);
}
-/*******************************************************************
-Changes the logging mode of a mini-transaction. */
+/***************************************************************//**
+Changes the logging mode of a mini-transaction.
+@return old mode */
UNIV_INLINE
ulint
mtr_set_log_mode(
/*=============*/
- /* out: old mode */
- mtr_t* mtr, /* in: mtr */
- ulint mode) /* in: logging mode: MTR_LOG_NONE, ... */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint mode) /*!< in: logging mode: MTR_LOG_NONE, ... */
{
ulint old_mode;
@@ -212,16 +231,17 @@ mtr_set_log_mode(
return(old_mode);
}
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
Locks a lock in s-mode. */
UNIV_INLINE
void
mtr_s_lock_func(
/*============*/
- rw_lock_t* lock, /* in: rw-lock */
- const char* file, /* in: file name */
- ulint line, /* in: line number */
- mtr_t* mtr) /* in: mtr */
+ rw_lock_t* lock, /*!< in: rw-lock */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line number */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mtr);
ut_ad(lock);
@@ -231,16 +251,16 @@ mtr_s_lock_func(
mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
}
-/*************************************************************************
+/*********************************************************************//**
Locks a lock in x-mode. */
UNIV_INLINE
void
mtr_x_lock_func(
/*============*/
- rw_lock_t* lock, /* in: rw-lock */
- const char* file, /* in: file name */
- ulint line, /* in: line number */
- mtr_t* mtr) /* in: mtr */
+ rw_lock_t* lock, /*!< in: rw-lock */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line number */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mtr);
ut_ad(lock);
@@ -249,3 +269,4 @@ mtr_x_lock_func(
mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index e3b6ec9a84f..83a7aaf3839 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Mini-transaction buffer global types
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0types.h
+Mini-transaction buffer global types
Created 11/26/1995 Heikki Tuuri
*******************************************************/
diff --git a/storage/innodb_plugin/include/mysql_addons.h b/storage/innobase/include/mysql_addons.h
index 17660c18710..17660c18710 100644
--- a/storage/innodb_plugin/include/mysql_addons.h
+++ b/storage/innobase/include/mysql_addons.h
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index e09e1e00408..16568579f31 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -1,7 +1,50 @@
-/******************************************************
-The interface to the operating system file io
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
-(c) 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file include/os0file.h
+The interface to the operating system file io
Created 10/21/1995 Heikki Tuuri
*******************************************************/
@@ -17,47 +60,63 @@ Created 10/21/1995 Heikki Tuuri
#include <time.h>
#endif
+/** File node of a tablespace or the log data space */
typedef struct fil_node_struct fil_node_t;
#ifdef UNIV_DO_FLUSH
extern ibool os_do_not_call_flush_at_each_write;
#endif /* UNIV_DO_FLUSH */
extern ibool os_has_said_disk_full;
+/** Flag: enable debug printout for asynchronous i/o */
extern ibool os_aio_print_debug;
+/** Number of pending os_file_pread() operations */
extern ulint os_file_n_pending_preads;
+/** Number of pending os_file_pwrite() operations */
extern ulint os_file_n_pending_pwrites;
+/** Number of pending read operations */
extern ulint os_n_pending_reads;
+/** Number of pending write operations */
extern ulint os_n_pending_writes;
#ifdef __WIN__
-/* We define always WIN_ASYNC_IO, and check at run-time whether
+/** We define always WIN_ASYNC_IO, and check at run-time whether
the OS actually supports it: Win 95 does not, NT does. */
#define WIN_ASYNC_IO
+/** Use unbuffered I/O */
#define UNIV_NON_BUFFERED_IO
#endif
#ifdef __WIN__
+/** File handle */
#define os_file_t HANDLE
+/** Convert a C file descriptor to a native file handle
+@param fd file descriptor
+@return native file handle */
+#define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
#else
+/** File handle */
typedef int os_file_t;
+/** Convert a C file descriptor to a native file handle
+@param fd file descriptor
+@return native file handle */
+#define OS_FILE_FROM_FD(fd) fd
#endif
+/** Umask for creating files */
extern ulint os_innodb_umask;
-/* If this flag is TRUE, then we will use the native aio of the
+/** If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
extern ibool os_aio_use_native_aio;
-#define OS_FILE_SECTOR_SIZE 512
-
-/* The next value should be smaller or equal to the smallest sector size used
+/** The next value should be smaller or equal to the smallest sector size used
on any disk. A log block is required to be a portion of disk which is written
so that if the start and the end of a block get written to disk, then the
whole block gets written. This should be true even in most cases of a crash:
@@ -66,7 +125,7 @@ log. */
#define OS_FILE_LOG_BLOCK_SIZE 512
-/* Options for file_create */
+/** Options for file_create @{ */
#define OS_FILE_OPEN 51
#define OS_FILE_CREATE 52
#define OS_FILE_OVERWRITE 53
@@ -82,12 +141,14 @@ log. */
/* Options for file_create */
#define OS_FILE_AIO 61
#define OS_FILE_NORMAL 62
+/* @} */
-/* Types for file create */
+/** Types for file create @{ */
#define OS_DATA_FILE 100
#define OS_LOG_FILE 101
+/* @} */
-/* Error codes from os_file_get_last_error */
+/** Error codes from os_file_get_last_error @{ */
#define OS_FILE_NOT_FOUND 71
#define OS_FILE_DISK_FULL 72
#define OS_FILE_ALREADY_EXISTS 73
@@ -96,25 +157,27 @@ log. */
to become available again */
#define OS_FILE_SHARING_VIOLATION 76
#define OS_FILE_ERROR_NOT_SPECIFIED 77
- /* 78 is used in the plugin */
+#define OS_FILE_INSUFFICIENT_RESOURCE 78
#define OS_FILE_OPERATION_ABORTED 79
+/* @} */
-/* Types for aio operations */
+/** Types for aio operations @{ */
#define OS_FILE_READ 10
#define OS_FILE_WRITE 11
#define OS_FILE_LOG 256 /* This can be ORed to type */
+/* @} */
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /* Win NT does not allow more
+#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more
than 64 */
-/* Modes for aio operations */
-#define OS_AIO_NORMAL 21 /* Normal asynchronous i/o not for ibuf
+/** Modes for aio operations @{ */
+#define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf
pages or ibuf bitmap pages */
-#define OS_AIO_IBUF 22 /* Asynchronous i/o for ibuf pages or ibuf
+#define OS_AIO_IBUF 22 /*!< Asynchronous i/o for ibuf pages or ibuf
bitmap pages */
-#define OS_AIO_LOG 23 /* Asynchronous i/o for the log */
-#define OS_AIO_SYNC 24 /* Asynchronous i/o where the calling thread
+#define OS_AIO_LOG 23 /*!< Asynchronous i/o for the log */
+#define OS_AIO_SYNC 24 /*!< Asynchronous i/o where the calling thread
will itself wait for the i/o to complete,
doing also the job of the i/o-handler thread;
can be used for any pages, ibuf or non-ibuf.
@@ -124,16 +187,18 @@ log. */
the file seek and read or write, causing a
bottleneck for parallelism. */
-#define OS_AIO_SIMULATED_WAKE_LATER 512 /* This can be ORed to mode
+#define OS_AIO_SIMULATED_WAKE_LATER 512 /*!< This can be ORed to mode
in the call of os_aio(...),
if the caller wants to post several i/o
requests in a batch, and only after that
wake the i/o-handler thread; this has
effect only in simulated aio */
-#define OS_WIN31 1
-#define OS_WIN95 2
-#define OS_WINNT 3
-#define OS_WIN2000 4
+/* @} */
+
+#define OS_WIN31 1 /*!< Microsoft Windows 3.x */
+#define OS_WIN95 2 /*!< Microsoft Windows 95 */
+#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
+#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
@@ -157,150 +222,157 @@ bigger than 4000 bytes */
/* Struct used in fetching information of a file in a directory */
struct os_file_stat_struct{
- char name[OS_FILE_MAX_PATH]; /* path to a file */
- os_file_type_t type; /* file type */
- ib_longlong size; /* file size */
- time_t ctime; /* creation time */
- time_t mtime; /* modification time */
- time_t atime; /* access time */
+ char name[OS_FILE_MAX_PATH]; /*!< path to a file */
+ os_file_type_t type; /*!< file type */
+ ib_int64_t size; /*!< file size */
+ time_t ctime; /*!< creation time */
+ time_t mtime; /*!< modification time */
+ time_t atime; /*!< access time */
};
typedef struct os_file_stat_struct os_file_stat_t;
#ifdef __WIN__
-typedef HANDLE os_file_dir_t; /* directory stream */
+typedef HANDLE os_file_dir_t; /*!< directory stream */
#else
-typedef DIR* os_file_dir_t; /* directory stream */
+typedef DIR* os_file_dir_t; /*!< directory stream */
#endif
-/***************************************************************************
-Gets the operating system version. Currently works only on Windows. */
-
+/***********************************************************************//**
+Gets the operating system version. Currently works only on Windows.
+@return OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
+UNIV_INTERN
ulint
os_get_os_version(void);
/*===================*/
- /* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
-/********************************************************************
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
Creates the seek mutexes used in positioned reads and writes. */
-
+UNIV_INTERN
void
os_io_init_simple(void);
/*===================*/
-/***************************************************************************
+/***********************************************************************//**
Creates a temporary file. This function is like tmpfile(3), but
the temporary file is created in the MySQL temporary directory.
On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag. */
+library of Netware does not expose the delete-on-close flag.
+@return temporary file handle, or NULL on error */
FILE*
os_file_create_tmpfile(void);
/*========================*/
- /* out: temporary file handle, or NULL on error */
-/***************************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
The os_file_opendir() function opens a directory stream corresponding to the
directory named by the dirname argument. The directory stream is positioned
at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing. */
-
+and '..' items at the start of the directory listing.
+@return directory stream, NULL if error */
+UNIV_INTERN
os_file_dir_t
os_file_opendir(
/*============*/
- /* out: directory stream, NULL if
- error */
- const char* dirname, /* in: directory name; it must not
+ const char* dirname, /*!< in: directory name; it must not
contain a trailing '\' or '/' */
- ibool error_is_fatal);/* in: TRUE if we should treat an
+ ibool error_is_fatal);/*!< in: TRUE if we should treat an
error as a fatal error; if we try to
open symlinks then we do not wish a
fatal error if it happens not to be
a directory */
-/***************************************************************************
-Closes a directory stream. */
-
+/***********************************************************************//**
+Closes a directory stream.
+@return 0 if success, -1 if failure */
+UNIV_INTERN
int
os_file_closedir(
/*=============*/
- /* out: 0 if success, -1 if failure */
- os_file_dir_t dir); /* in: directory stream */
-/***************************************************************************
+ os_file_dir_t dir); /*!< in: directory stream */
+/***********************************************************************//**
This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory. */
-
+over the '.' and '..' entries in the directory.
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
+UNIV_INTERN
int
os_file_readdir_next_file(
/*======================*/
- /* out: 0 if ok, -1 if error, 1 if at the end
- of the directory */
- const char* dirname,/* in: directory name or path */
- os_file_dir_t dir, /* in: directory stream */
- os_file_stat_t* info); /* in/out: buffer where the info is returned */
-/*********************************************************************
+ const char* dirname,/*!< in: directory name or path */
+ os_file_dir_t dir, /*!< in: directory stream */
+ os_file_stat_t* info); /*!< in/out: buffer where the info is returned */
+/*****************************************************************//**
This function attempts to create a directory named pathname. The new directory
gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true. */
-
+fail_if_exists arguments is true.
+@return TRUE if call succeeds, FALSE on error */
+UNIV_INTERN
ibool
os_file_create_directory(
/*=====================*/
- /* out: TRUE if call succeeds,
- FALSE on error */
- const char* pathname, /* in: directory name as
+ const char* pathname, /*!< in: directory name as
null-terminated string */
- ibool fail_if_exists);/* in: if TRUE, pre-existing directory
+ ibool fail_if_exists);/*!< in: if TRUE, pre-existing directory
is treated as an error. */
-/********************************************************************
-A simple function to open or create a file. */
-
+/****************************************************************//**
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
os_file_t
os_file_create_simple(
/*==================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error), or
OS_FILE_CREATE_PATH if new file
(if exists, error) and subdirectories along
its path are created (if needed)*/
- ulint access_type,/* in: OS_FILE_READ_ONLY or
+ ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
- ibool* success);/* out: TRUE if succeed, FALSE if error */
-/********************************************************************
-A simple function to open or create a file. */
-
+ ibool* success);/*!< out: TRUE if succeed, FALSE if error */
+/****************************************************************//**
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
os_file_t
os_file_create_simple_no_error_handling(
/*====================================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
is opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error) */
- ulint access_type,/* in: OS_FILE_READ_ONLY,
+ ulint access_type,/*!< in: OS_FILE_READ_ONLY,
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
- ibool* success);/* out: TRUE if succeed, FALSE if error */
-/********************************************************************
-Opens an existing file or creates a new. */
-
+ ibool* success);/*!< out: TRUE if succeed, FALSE if error */
+/****************************************************************//**
+Tries to disable OS caching on an opened file descriptor. */
+UNIV_INTERN
+void
+os_file_set_nocache(
+/*================*/
+ int fd, /*!< in: file descriptor to alter */
+ const char* file_name, /*!< in: file name, used in the
+ diagnostic message */
+ const char* operation_name);/*!< in: "open" or "create"; used in the
+ diagnostic message */
+/****************************************************************//**
+Opens an existing file or creates a new.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
os_file_t
os_file_create(
/*===========*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
is opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error),
@@ -308,192 +380,190 @@ os_file_create(
or an old overwritten;
OS_FILE_OPEN_RAW, if a raw device or disk
partition should be opened */
- ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
+ ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
non-buffered i/o is desired,
OS_FILE_NORMAL, if any normal file;
NOTE that it also depends on type, os_aio_..
and srv_.. variables whether we really use
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
- ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success);/* out: TRUE if succeed, FALSE if error */
-/***************************************************************************
-Deletes a file. The file has to be closed before calling this. */
-
+ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
+ ibool* success);/*!< out: TRUE if succeed, FALSE if error */
+/***********************************************************************//**
+Deletes a file. The file has to be closed before calling this.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_delete(
/*===========*/
- /* out: TRUE if success */
- const char* name); /* in: file path as a null-terminated string */
-
-/***************************************************************************
-Deletes a file if it exists. The file has to be closed before calling this. */
+ const char* name); /*!< in: file path as a null-terminated string */
+/***********************************************************************//**
+Deletes a file if it exists. The file has to be closed before calling this.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_delete_if_exists(
/*=====================*/
- /* out: TRUE if success */
- const char* name); /* in: file path as a null-terminated string */
-/***************************************************************************
+ const char* name); /*!< in: file path as a null-terminated string */
+/***********************************************************************//**
Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function. */
-
+file is closed before calling this function.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_rename(
/*===========*/
- /* out: TRUE if success */
- const char* oldpath, /* in: old file path as a
+ const char* oldpath, /*!< in: old file path as a
null-terminated string */
- const char* newpath); /* in: new file path */
-/***************************************************************************
+ const char* newpath); /*!< in: new file path */
+/***********************************************************************//**
Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error. */
-
+os_file_get_last_error.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_close(
/*==========*/
- /* out: TRUE if success */
- os_file_t file); /* in, own: handle to a file */
-/***************************************************************************
-Closes a file handle. */
-
+ os_file_t file); /*!< in, own: handle to a file */
+#ifdef UNIV_HOTBACKUP
+/***********************************************************************//**
+Closes a file handle.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_close_no_error_handling(
/*============================*/
- /* out: TRUE if success */
- os_file_t file); /* in, own: handle to a file */
-/***************************************************************************
-Gets a file size. */
-
+ os_file_t file); /*!< in, own: handle to a file */
+#endif /* UNIV_HOTBACKUP */
+/***********************************************************************//**
+Gets a file size.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_get_size(
/*=============*/
- /* out: TRUE if success */
- os_file_t file, /* in: handle to a file */
- ulint* size, /* out: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ ulint* size, /*!< out: least significant 32 bits of file
size */
- ulint* size_high);/* out: most significant 32 bits of size */
-/***************************************************************************
-Gets file size as a 64-bit integer ib_longlong. */
-
-ib_longlong
+ ulint* size_high);/*!< out: most significant 32 bits of size */
+/***********************************************************************//**
+Gets file size as a 64-bit integer ib_int64_t.
+@return size in bytes, -1 if error */
+UNIV_INTERN
+ib_int64_t
os_file_get_size_as_iblonglong(
/*===========================*/
- /* out: size in bytes, -1 if error */
- os_file_t file); /* in: handle to a file */
-/***************************************************************************
-Write the specified number of zeros to a newly created file. */
-
+ os_file_t file); /*!< in: handle to a file */
+/***********************************************************************//**
+Write the specified number of zeros to a newly created file.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_set_size(
/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- os_file_t file, /* in: handle to a file */
- ulint size, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ ulint size, /*!< in: least significant 32 bits of file
size */
- ulint size_high);/* in: most significant 32 bits of size */
-/***************************************************************************
-Truncates a file at its current position. */
-
+ ulint size_high);/*!< in: most significant 32 bits of size */
+/***********************************************************************//**
+Truncates a file at its current position.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_set_eof(
/*============*/
- /* out: TRUE if success */
- FILE* file); /* in: file to be truncated */
-/***************************************************************************
-Flushes the write buffers of a given file to the disk. */
-
+ FILE* file); /*!< in: file to be truncated */
+/***********************************************************************//**
+Flushes the write buffers of a given file to the disk.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_flush(
/*==========*/
- /* out: TRUE if success */
- os_file_t file); /* in, own: handle to a file */
-/***************************************************************************
+ os_file_t file); /*!< in, own: handle to a file */
+/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned. */
-
+the OS error number + 100 is returned.
+@return error number, or OS error number + 100 */
+UNIV_INTERN
ulint
os_file_get_last_error(
/*===================*/
- /* out: error number, or OS error
- number + 100 */
- ibool report_all_errors); /* in: TRUE if we want an error message
+ ibool report_all_errors); /*!< in: TRUE if we want an error message
printed of all errors */
-/***********************************************************************
-Requests a synchronous read operation. */
-
+/*******************************************************************//**
+Requests a synchronous read operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
ibool
os_file_read(
/*=========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to read */
- ulint offset_high,/* in: most significant 32 bits of
+ ulint offset_high,/*!< in: most significant 32 bits of
offset */
- ulint n); /* in: number of bytes to read */
-/***********************************************************************
+ ulint n); /*!< in: number of bytes to read */
+/*******************************************************************//**
Rewind file to its start, read at most size - 1 bytes from it to str, and
NUL-terminate str. All errors are silently ignored. This function is
mostly meant to be used with temporary files. */
-
+UNIV_INTERN
void
os_file_read_string(
/*================*/
- FILE* file, /* in: file to read from */
- char* str, /* in: buffer where to read */
- ulint size); /* in: size of buffer */
-/***********************************************************************
+ FILE* file, /*!< in: file to read from */
+ char* str, /*!< in: buffer where to read */
+ ulint size); /*!< in: size of buffer */
+/*******************************************************************//**
Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE. */
-
+any error handling. In case of error it returns FALSE.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
ibool
os_file_read_no_error_handling(
/*===========================*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to read */
- ulint offset_high,/* in: most significant 32 bits of
+ ulint offset_high,/*!< in: most significant 32 bits of
offset */
- ulint n); /* in: number of bytes to read */
-
-/***********************************************************************
-Requests a synchronous write operation. */
+ ulint n); /*!< in: number of bytes to read */
+/*******************************************************************//**
+Requests a synchronous write operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
ibool
os_file_write(
/*==========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- os_file_t file, /* in: handle to a file */
- const void* buf, /* in: buffer from which to write */
- ulint offset, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ const void* buf, /*!< in: buffer from which to write */
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to write */
- ulint offset_high,/* in: most significant 32 bits of
+ ulint offset_high,/*!< in: most significant 32 bits of
offset */
- ulint n); /* in: number of bytes to write */
-/***********************************************************************
-Check the existence and type of the given file. */
-
+ ulint n); /*!< in: number of bytes to write */
+/*******************************************************************//**
+Check the existence and type of the given file.
+@return TRUE if call succeeded */
+UNIV_INTERN
ibool
os_file_status(
/*===========*/
- /* out: TRUE if call succeeded */
- const char* path, /* in: pathname of the file */
- ibool* exists, /* out: TRUE if file exists */
- os_file_type_t* type); /* out: type of the file (if it exists) */
-/********************************************************************
+ const char* path, /*!< in: pathname of the file */
+ ibool* exists, /*!< out: TRUE if file exists */
+ os_file_type_t* type); /*!< out: type of the file (if it exists) */
+/****************************************************************//**
The function os_file_dirname returns a directory component of a
null-terminated pathname string. In the usual case, dirname returns
the string up to, but not including, the final '/', and basename
@@ -519,50 +589,54 @@ returned by dirname and basename for different paths:
"/" "/" "/"
"." "." "."
".." "." ".."
-*/
+@return own: directory component of the pathname */
+UNIV_INTERN
char*
os_file_dirname(
/*============*/
- /* out, own: directory component of the
- pathname */
- const char* path); /* in: pathname */
-/********************************************************************
-Creates all missing subdirectories along the given path. */
-
+ const char* path); /*!< in: pathname */
+/****************************************************************//**
+Creates all missing subdirectories along the given path.
+@return TRUE if call succeeded FALSE otherwise */
+UNIV_INTERN
ibool
os_file_create_subdirs_if_needed(
/*=============================*/
- /* out: TRUE if call succeeded
- FALSE otherwise */
- const char* path); /* in: path name */
-/****************************************************************************
-Initializes the asynchronous io system. Creates separate aio array for
-non-ibuf read and write, a third aio array for the ibuf i/o, with just one
-segment, two aio arrays for log reads and writes with one segment, and a
-synchronous aio array of the specified size. The combined number of segments
-in the three first aio arrays is the parameter n_segments given to the
-function. The caller must create an i/o handler thread for each segment in
-the four first arrays, but not for the sync aio array. */
-
+ const char* path); /*!< in: path name */
+/***********************************************************************
+Initializes the asynchronous io system. Creates one array each for ibuf
+and log i/o. Also creates one array each for read and write where each
+array is divided logically into n_read_segs and n_write_segs
+respectively. The caller must create an i/o handler thread for each
+segment in these arrays. This function also creates the sync array.
+No i/o handler thread needs to be created for that */
+UNIV_INTERN
void
os_aio_init(
/*========*/
- ulint n, /* in: maximum number of pending aio operations
- allowed; n must be divisible by n_segments */
- ulint n_segments, /* in: combined number of segments in the four
- first aio arrays; must be >= 4 */
- ulint n_slots_sync); /* in: number of slots in the sync aio array */
+ ulint n_per_seg, /*<! in: maximum number of pending aio
+ operations allowed per segment */
+ ulint n_read_segs, /*<! in: number of reader threads */
+ ulint n_write_segs, /*<! in: number of writer threads */
+ ulint n_slots_sync); /*<! in: number of slots in the sync aio
+ array */
/***********************************************************************
-Requests an asynchronous i/o operation. */
+Frees the asynchronous io system. */
+UNIV_INTERN
+void
+os_aio_free(void);
+/*=============*/
+/*******************************************************************//**
+Requests an asynchronous i/o operation.
+@return TRUE if request was queued successfully, FALSE if fail */
+UNIV_INTERN
ibool
os_aio(
/*===*/
- /* out: TRUE if request was queued
- successfully, FALSE if fail */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
- ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the
last flag advises this function not to wake
i/o-handler threads, but the caller will
@@ -575,65 +649,68 @@ os_aio(
because i/os are not actually handled until
all have been posted: use with great
caution! */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read or from which
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read or from which
to write */
- ulint offset, /* in: least significant 32 bits of file
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to read or write */
- ulint offset_high, /* in: most significant 32 bits of
+ ulint offset_high, /*!< in: most significant 32 bits of
offset */
- ulint n, /* in: number of bytes to read or write */
- fil_node_t* message1,/* in: messages for the aio handler (these
- can be used to identify a completed aio
- operation); if mode is OS_AIO_SYNC, these
- are ignored */
- void* message2);
-/****************************************************************************
+ ulint n, /*!< in: number of bytes to read or write */
+ fil_node_t* message1,/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+ void* message2);/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
-
+UNIV_INTERN
void
os_aio_wake_all_threads_at_shutdown(void);
/*=====================================*/
-/****************************************************************************
+/************************************************************************//**
Waits until there are no pending writes in os_aio_write_array. There can
be other, synchronous, pending writes. */
-
+UNIV_INTERN
void
os_aio_wait_until_no_pending_writes(void);
/*=====================================*/
-/**************************************************************************
+/**********************************************************************//**
Wakes up simulated aio i/o-handler threads if they have something to do. */
-
+UNIV_INTERN
void
os_aio_simulated_wake_handler_threads(void);
/*=======================================*/
-/**************************************************************************
+/**********************************************************************//**
This function can be called if one wants to post a batch of reads and
prefers an i/o-handler thread to handle them all at once later. You must
call os_aio_simulated_wake_handler_threads later to ensure the threads
are not left sleeping! */
-
+UNIV_INTERN
void
os_aio_simulated_put_read_threads_to_sleep(void);
/*============================================*/
#ifdef WIN_ASYNC_IO
-/**************************************************************************
+/**********************************************************************//**
This function is only used in Windows asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
for completed requests. The aio array of pending requests is divided
into segments. The thread specifies which segment or slot it wants to wait
for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing! */
-
+therefore no other thread is allowed to do the freeing!
+@return TRUE if the aio operation succeeded */
+UNIV_INTERN
ibool
os_aio_windows_handle(
/*==================*/
- /* out: TRUE if the aio operation succeeded */
- ulint segment, /* in: the number of the segment in the aio
+ ulint segment, /*!< in: the number of the segment in the aio
arrays to wait for; segment 0 is the ibuf
i/o thread, segment 1 the log i/o thread,
then follow the non-ibuf read threads, and as
@@ -641,95 +718,88 @@ os_aio_windows_handle(
this is ULINT_UNDEFINED, then it means that
sync aio is used, and this parameter is
ignored */
- ulint pos, /* this parameter is used only in sync aio:
+ ulint pos, /*!< this parameter is used only in sync aio:
wait for the aio slot at this position */
- fil_node_t**message1, /* out: the messages passed with the aio
+ fil_node_t**message1, /*!< out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2,
- ulint* type); /* out: OS_FILE_WRITE or ..._READ */
+ ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
#endif
-/* Currently we do not use Posix async i/o */
-#ifdef POSIX_ASYNC_IO
-/**************************************************************************
-This function is only used in Posix asynchronous i/o. Waits for an aio
-operation to complete. */
-
-ibool
-os_aio_posix_handle(
-/*================*/
- /* out: TRUE if the aio operation succeeded */
- ulint array_no, /* in: array number 0 - 3 */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2);
-#endif
-/**************************************************************************
+/**********************************************************************//**
Does simulated aio. This function should be called by an i/o-handler
-thread. */
-
+thread.
+@return TRUE if the aio operation succeeded */
+UNIV_INTERN
ibool
os_aio_simulated_handle(
/*====================*/
- /* out: TRUE if the aio operation succeeded */
- ulint segment, /* in: the number of the segment in the aio
+ ulint segment, /*!< in: the number of the segment in the aio
arrays to wait for; segment 0 is the ibuf
i/o thread, segment 1 the log i/o thread,
then follow the non-ibuf read threads, and as
the last are the non-ibuf write threads */
- fil_node_t**message1, /* out: the messages passed with the aio
+ fil_node_t**message1, /*!< out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2,
- ulint* type); /* out: OS_FILE_WRITE or ..._READ */
-/**************************************************************************
-Validates the consistency of the aio system. */
-
+ ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
+/**********************************************************************//**
+Validates the consistency of the aio system.
+@return TRUE if ok */
+UNIV_INTERN
ibool
os_aio_validate(void);
/*=================*/
- /* out: TRUE if ok */
-/**************************************************************************
+/**********************************************************************//**
Prints info of the aio arrays. */
-
+UNIV_INTERN
void
os_aio_print(
/*=========*/
- FILE* file); /* in: file where to print */
-/**************************************************************************
+ FILE* file); /*!< in: file where to print */
+/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
void
os_aio_refresh_stats(void);
/*======================*/
#ifdef UNIV_DEBUG
-/**************************************************************************
+/**********************************************************************//**
Checks that all slots in the system have been freed, that is, there are
no pending io operations. */
-
+UNIV_INTERN
ibool
os_aio_all_slots_free(void);
/*=======================*/
#endif /* UNIV_DEBUG */
-/***********************************************************************
-This function returns information about the specified file */
+/*******************************************************************//**
+This function returns information about the specified file
+@return TRUE if stat information found */
+UNIV_INTERN
ibool
os_file_get_status(
/*===============*/
- /* out: TRUE if stat
- information found */
- const char* path, /* in: pathname of the file */
- os_file_stat_t* stat_info); /* information of a file in a
+ const char* path, /*!< in: pathname of the file */
+ os_file_stat_t* stat_info); /*!< information of a file in a
directory */
+#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
+/*********************************************************************//**
+Creates a temporary file that will be deleted on close.
+This function is defined in ha_innodb.cc.
+@return temporary file descriptor, or < 0 on error */
+UNIV_INTERN
+int
+innobase_mysql_tmpfile(void);
+/*========================*/
+#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
+
#endif
diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
index f54e08de7ee..fd46bd7db87 100644
--- a/storage/innobase/include/os0proc.h
+++ b/storage/innobase/include/os0proc.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0proc.h
The interface to the operating system
process control primitives
-(c) 1995 Innobase Oy
-
Created 9/30/1995 Heikki Tuuri
*******************************************************/
@@ -20,126 +37,38 @@ Created 9/30/1995 Heikki Tuuri
typedef void* os_process_t;
typedef unsigned long int os_process_id_t;
-/* The cell type in os_awe_allocate_mem page info */
-#if defined(__WIN2000__) && defined(ULONG_PTR)
-typedef ULONG_PTR os_awe_t;
-#else
-typedef ulint os_awe_t;
-#endif
-
-/* Physical page size when Windows AWE is used. This is the normal
-page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
-pages. */
-#define OS_AWE_X86_PAGE_SIZE 4096
-
extern ibool os_use_large_pages;
/* Large page size. This may be a boot-time option on some platforms */
extern ulint os_large_page_size;
-/********************************************************************
-Windows AWE support. Tries to enable the "lock pages in memory" privilege for
-the current process so that the current process can allocate memory-locked
-virtual address space to act as the window where AWE maps physical memory. */
-
-ibool
-os_awe_enable_lock_pages_in_mem(void);
-/*=================================*/
- /* out: TRUE if success, FALSE if error;
- prints error info to stderr if no success */
-/********************************************************************
-Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
-processor. */
-
-ibool
-os_awe_allocate_physical_mem(
-/*=========================*/
- /* out: TRUE if success */
- os_awe_t** page_info, /* out, own: array of opaque data containing
- the info for allocated physical memory pages;
- each allocated 4 kB physical memory page has
- one slot of type os_awe_t in the array */
- ulint n_megabytes); /* in: number of megabytes to allocate */
-/********************************************************************
-Allocates a window in the virtual address space where we can map then
-pages of physical memory. */
-
-byte*
-os_awe_allocate_virtual_mem_window(
-/*===============================*/
- /* out, own: allocated memory, or NULL if did not
- succeed */
- ulint size); /* in: virtual memory allocation size in bytes, must
- be < 2 GB */
-/********************************************************************
-With this function you can map parts of physical memory allocated with
-the ..._allocate_physical_mem to the virtual address space allocated with
-the previous function. Intel implements this so that the process page
-tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
-showed that this takes < 1 microsecond, much better than the estimated 80 us
-for copying a 16 kB page memory to memory. But, the operation will at least
-partially invalidate the translation lookaside buffer (TLB) of all
-processors. Under a real-world load the performance hit may be bigger. */
-
-ibool
-os_awe_map_physical_mem_to_window(
-/*==============================*/
- /* out: TRUE if success; the function
- calls exit(1) in case of an error */
- byte* ptr, /* in: a page-aligned pointer to
- somewhere in the virtual address
- space window; we map the physical mem
- pages here */
- ulint n_mem_pages, /* in: number of 4 kB mem pages to
- map */
- os_awe_t* page_info); /* in: array of page infos for those
- pages; each page has one slot in the
- array */
-/********************************************************************
+/****************************************************************//**
Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current
thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'. */
-
+the thread id is not the same as one sees in 'top'.
+@return process id as a number */
+UNIV_INTERN
ulint
os_proc_get_number(void);
/*====================*/
-/********************************************************************
-Allocates non-cacheable memory. */
-
-void*
-os_mem_alloc_nocache(
-/*=================*/
- /* out: allocated memory */
- ulint n); /* in: number of bytes */
-/********************************************************************
-Allocates large pages memory. */
-
+/****************************************************************//**
+Allocates large pages memory.
+@return allocated memory */
+UNIV_INTERN
void*
os_mem_alloc_large(
/*===============*/
- /* out: allocated memory */
- ulint n, /* in: number of bytes */
- ibool set_to_zero, /* in: TRUE if allocated memory
- should be set to zero if
- UNIV_SET_MEM_TO_ZERO is defined */
- ibool assert_on_error);/* in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
-/********************************************************************
+ ulint* n); /*!< in/out: number of bytes */
+/****************************************************************//**
Frees large pages memory. */
-
+UNIV_INTERN
void
os_mem_free_large(
/*==============*/
-void *ptr); /* in: number of bytes */
-/********************************************************************
-Sets the priority boost for threads released from waiting within the current
-process. */
-
-void
-os_process_set_priority_boost(
-/*==========================*/
- ibool do_boost); /* in: TRUE if priority boost should be done,
- FALSE if not */
+ void *ptr, /*!< in: pointer returned by
+ os_mem_alloc_large() */
+ ulint size); /*!< in: size returned by
+ os_mem_alloc_large() */
#ifndef UNIV_NONINL
#include "os0proc.ic"
diff --git a/storage/innobase/include/os0proc.ic b/storage/innobase/include/os0proc.ic
index 651ba1f17e3..c9641644525 100644
--- a/storage/innobase/include/os0proc.ic
+++ b/storage/innobase/include/os0proc.ic
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0proc.ic
The interface to the operating system
process control primitives
-(c) 1995 Innobase Oy
-
Created 9/30/1995 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
index a39a331c297..0c22162b900 100644
--- a/storage/innobase/include/os0sync.h
+++ b/storage/innobase/include/os0sync.h
@@ -1,11 +1,36 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0sync.h
The interface to the operating system
synchronization primitives.
-(c) 1995 Innobase Oy
-
Created 9/6/1995 Heikki Tuuri
*******************************************************/
+
#ifndef os0sync_h
#define os0sync_h
@@ -14,52 +39,65 @@ Created 9/6/1995 Heikki Tuuri
#ifdef __WIN__
+/** Native mutex */
#define os_fast_mutex_t CRITICAL_SECTION
+/** Native event */
typedef HANDLE os_native_event_t;
+/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
+/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
+/** An asynchronous signal sent between threads */
struct os_event_struct {
os_native_event_t handle;
- /* Windows event */
+ /*!< Windows event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
- /* list of all created events */
+ /*!< list of all created events */
};
#else
+/** Native mutex */
typedef pthread_mutex_t os_fast_mutex_t;
+/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
+/** Operating system event handle */
typedef os_event_struct_t* os_event_t;
+/** An asynchronous signal sent between threads */
struct os_event_struct {
- os_fast_mutex_t os_mutex; /* this mutex protects the next
+ os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
- ibool is_set; /* this is TRUE when the event is
+ ibool is_set; /*!< this is TRUE when the event is
in the signaled state, i.e., a thread
does not stop if it tries to wait for
this event */
- ib_longlong signal_count; /* this is incremented each time
+ ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */
- pthread_cond_t cond_var; /* condition variable is used in
+ pthread_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
- /* list of all created events */
+ /*!< list of all created events */
};
#endif
+/** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t;
+/** Operating system mutex handle */
typedef os_mutex_str_t* os_mutex_t;
+/** Denotes an infinite delay for os_event_wait_time() */
#define OS_SYNC_INFINITE_TIME ((ulint)(-1))
+/** Return value of os_event_wait_time() when the time is exceeded */
#define OS_SYNC_TIME_EXCEEDED 1
-/* Mutex protecting counts and the event and OS 'slow' mutex lists */
+/** Mutex protecting counts and the event and OS 'slow' mutex lists */
extern os_mutex_t os_sync_mutex;
-/* This is incremented by 1 in os_thread_create and decremented by 1 in
+/** This is incremented by 1 in os_thread_create and decremented by 1 in
os_thread_exit */
extern ulint os_thread_count;
@@ -67,70 +105,58 @@ extern ulint os_event_count;
extern ulint os_mutex_count;
extern ulint os_fast_mutex_count;
-/*************************************************************
+/*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */
-
+UNIV_INTERN
void
os_sync_init(void);
/*==============*/
-/*************************************************************
+/*********************************************************//**
Frees created events and OS 'slow' mutexes. */
-
+UNIV_INTERN
void
os_sync_free(void);
/*==============*/
-/*************************************************************
+/*********************************************************//**
Creates an event semaphore, i.e., a semaphore which may just have two states:
signaled and nonsignaled. The created event is manual reset: it must be reset
-explicitly by calling sync_os_reset_event. */
-
+explicitly by calling sync_os_reset_event.
+@return the event handle */
+UNIV_INTERN
os_event_t
os_event_create(
/*============*/
- /* out: the event handle */
- const char* name); /* in: the name of the event, if NULL
+ const char* name); /*!< in: the name of the event, if NULL
the event is created without a name */
-#ifdef __WIN__
-/*************************************************************
-Creates an auto-reset event semaphore, i.e., an event which is automatically
-reset when a single thread is released. Works only in Windows. */
-
-os_event_t
-os_event_create_auto(
-/*=================*/
- /* out: the event handle */
- const char* name); /* in: the name of the event, if NULL
- the event is created without a name */
-#endif
-/**************************************************************
+/**********************************************************//**
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
-
+UNIV_INTERN
void
os_event_set(
/*=========*/
- os_event_t event); /* in: event to set */
-/**************************************************************
+ os_event_t event); /*!< in: event to set */
+/**********************************************************//**
Resets an event semaphore to the nonsignaled state. Waiting threads will
stop to wait for the event.
The return value should be passed to os_even_wait_low() if it is desired
that this thread should not wait in case of an intervening call to
os_event_set() between this os_event_reset() and the
os_event_wait_low() call. See comments for os_event_wait_low(). */
-
-ib_longlong
+UNIV_INTERN
+ib_int64_t
os_event_reset(
/*===========*/
- os_event_t event); /* in: event to reset */
-/**************************************************************
+ os_event_t event); /*!< in: event to reset */
+/**********************************************************//**
Frees an event object. */
-
+UNIV_INTERN
void
os_event_free(
/*==========*/
- os_event_t event); /* in: event to free */
+ os_event_t event); /*!< in: event to free */
-/**************************************************************
+/**********************************************************//**
Waits for an event object until it is in the signaled state. If
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
@@ -150,116 +176,267 @@ thread C calls os_event_wait() [infinite wait!]
Where such a scenario is possible, to avoid infinite wait, the
value returned by os_event_reset() should be passed in as
reset_sig_count. */
-
-#define os_event_wait(event) os_event_wait_low((event), 0)
-
+UNIV_INTERN
void
os_event_wait_low(
/*==============*/
- os_event_t event, /* in: event to wait */
- ib_longlong reset_sig_count);/* in: zero or the value
+ os_event_t event, /*!< in: event to wait */
+ ib_int64_t reset_sig_count);/*!< in: zero or the value
returned by previous call of
os_event_reset(). */
-/**************************************************************
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite. */
+#define os_event_wait(event) os_event_wait_low(event, 0)
+/**********************************************************//**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. In Unix the timeout is always infinite.
+@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+UNIV_INTERN
ulint
os_event_wait_time(
/*===============*/
- /* out: 0 if success,
- OS_SYNC_TIME_EXCEEDED if timeout
- was exceeded */
- os_event_t event, /* in: event to wait */
- ulint time); /* in: timeout in microseconds, or
+ os_event_t event, /*!< in: event to wait */
+ ulint time); /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
#ifdef __WIN__
-/**************************************************************
+/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled. */
-
+one is signaled or becomes signaled.
+@return index of the event which was signaled */
+UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
- /* out: index of the event
- which was signaled */
- ulint n, /* in: number of events in the
+ ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array);
- /* in: pointer to an array of event
+ /*!< in: pointer to an array of event
handles */
#endif
-/*************************************************************
+/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
-
+mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+@return the mutex handle */
+UNIV_INTERN
os_mutex_t
os_mutex_create(
/*============*/
- /* out: the mutex handle */
- const char* name); /* in: the name of the mutex, if NULL
+ const char* name); /*!< in: the name of the mutex, if NULL
the mutex is created without a name */
-/**************************************************************
+/**********************************************************//**
Acquires ownership of a mutex semaphore. */
-
+UNIV_INTERN
void
os_mutex_enter(
/*===========*/
- os_mutex_t mutex); /* in: mutex to acquire */
-/**************************************************************
+ os_mutex_t mutex); /*!< in: mutex to acquire */
+/**********************************************************//**
Releases ownership of a mutex. */
-
+UNIV_INTERN
void
os_mutex_exit(
/*==========*/
- os_mutex_t mutex); /* in: mutex to release */
-/**************************************************************
+ os_mutex_t mutex); /*!< in: mutex to release */
+/**********************************************************//**
Frees an mutex object. */
-
+UNIV_INTERN
void
os_mutex_free(
/*==========*/
- os_mutex_t mutex); /* in: mutex to free */
-/**************************************************************
+ os_mutex_t mutex); /*!< in: mutex to free */
+/**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock! */
+as os_fast_mutex_lock!
+@return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE
ulint
os_fast_mutex_trylock(
/*==================*/
- /* out: 0 if success, != 0 if
- was reserved by another
- thread */
- os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */
-/**************************************************************
+ os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
+/**********************************************************//**
Releases ownership of a fast mutex. */
-
+UNIV_INTERN
void
os_fast_mutex_unlock(
/*=================*/
- os_fast_mutex_t* fast_mutex); /* in: mutex to release */
-/*************************************************************
+ os_fast_mutex_t* fast_mutex); /*!< in: mutex to release */
+/*********************************************************//**
Initializes an operating system fast mutex semaphore. */
-
+UNIV_INTERN
void
os_fast_mutex_init(
/*===============*/
- os_fast_mutex_t* fast_mutex); /* in: fast mutex */
-/**************************************************************
+ os_fast_mutex_t* fast_mutex); /*!< in: fast mutex */
+/**********************************************************//**
Acquires ownership of a fast mutex. */
-
+UNIV_INTERN
void
os_fast_mutex_lock(
/*===============*/
- os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */
-/**************************************************************
+ os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
+/**********************************************************//**
Frees an mutex object. */
-
+UNIV_INTERN
void
os_fast_mutex_free(
/*===============*/
- os_fast_mutex_t* fast_mutex); /* in: mutex to free */
+ os_fast_mutex_t* fast_mutex); /*!< in: mutex to free */
+
+/**********************************************************//**
+Atomic compare-and-swap and increment for InnoDB. */
+
+#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
+
+#define HAVE_ATOMIC_BUILTINS
+
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+
+# define os_compare_and_swap(ptr, old_val, new_val) \
+ __sync_bool_compare_and_swap(ptr, old_val, new_val)
+
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+ os_compare_and_swap(ptr, old_val, new_val)
+
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+ os_compare_and_swap(ptr, old_val, new_val)
+
+# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC
+# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+ os_compare_and_swap(ptr, old_val, new_val)
+# define INNODB_RW_LOCKS_USE_ATOMICS
+# define IB_ATOMICS_STARTUP_MSG \
+ "Mutexes and rw_locks use GCC atomic builtins"
+# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
+# define IB_ATOMICS_STARTUP_MSG \
+ "Mutexes use GCC atomic builtins, rw_locks do not"
+# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+
+# define os_atomic_increment(ptr, amount) \
+ __sync_add_and_fetch(ptr, amount)
+
+# define os_atomic_increment_lint(ptr, amount) \
+ os_atomic_increment(ptr, amount)
+
+# define os_atomic_increment_ulint(ptr, amount) \
+ os_atomic_increment(ptr, amount)
+
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val */
+
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+ __sync_lock_test_and_set(ptr, new_val)
+
+#elif defined(HAVE_IB_SOLARIS_ATOMICS)
+
+#define HAVE_ATOMIC_BUILTINS
+
+/* If not compiling with GCC or GCC doesn't support the atomic
+intrinsics and running on Solaris >= 10 use Solaris atomics */
+
+#include <atomic.h>
+
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+ (atomic_cas_ulong(ptr, old_val, new_val) == old_val)
+
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+ ((lint)atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
+
+# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS
+# if SIZEOF_PTHREAD_T == 4
+# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+ ((pthread_t)atomic_cas_32(ptr, old_val, new_val) == old_val)
+# elif SIZEOF_PTHREAD_T == 8
+# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+ ((pthread_t)atomic_cas_64(ptr, old_val, new_val) == old_val)
+# else
+# error "SIZEOF_PTHREAD_T != 4 or 8"
+# endif /* SIZEOF_PTHREAD_T CHECK */
+# define INNODB_RW_LOCKS_USE_ATOMICS
+# define IB_ATOMICS_STARTUP_MSG \
+ "Mutexes and rw_locks use Solaris atomic functions"
+# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
+# define IB_ATOMICS_STARTUP_MSG \
+ "Mutexes use Solaris atomic functions, rw_locks do not"
+# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+
+# define os_atomic_increment_lint(ptr, amount) \
+ atomic_add_long_nv((ulong_t*) ptr, amount)
+
+# define os_atomic_increment_ulint(ptr, amount) \
+ atomic_add_long_nv(ptr, amount)
+
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val */
+
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+ atomic_swap_uchar(ptr, new_val)
+
+#elif defined(HAVE_WINDOWS_ATOMICS)
+
+#define HAVE_ATOMIC_BUILTINS
+
+/* On Windows, use Windows atomics / interlocked */
+# ifdef _WIN64
+# define win_cmp_and_xchg InterlockedCompareExchange64
+# define win_xchg_and_add InterlockedExchangeAdd64
+# else /* _WIN64 */
+# define win_cmp_and_xchg InterlockedCompareExchange
+# define win_xchg_and_add InterlockedExchangeAdd
+# endif
+
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+ (win_cmp_and_xchg(ptr, new_val, old_val) == old_val)
+
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+ (win_cmp_and_xchg(ptr, new_val, old_val) == old_val)
+
+/* windows thread objects can always be passed to windows atomic functions */
+# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+ (InterlockedCompareExchange(ptr, new_val, old_val) == old_val)
+# define INNODB_RW_LOCKS_USE_ATOMICS
+# define IB_ATOMICS_STARTUP_MSG \
+ "Mutexes and rw_locks use Windows interlocked functions"
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+
+# define os_atomic_increment_lint(ptr, amount) \
+ (win_xchg_and_add(ptr, amount) + amount)
+
+# define os_atomic_increment_ulint(ptr, amount) \
+ ((ulint) (win_xchg_and_add(ptr, amount) + amount))
+
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val.
+InterlockedExchange() operates on LONG, and the LONG will be
+clobbered */
+
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+ ((byte) InterlockedExchange(ptr, new_val))
+
+#else
+# define IB_ATOMICS_STARTUP_MSG \
+ "Mutexes and rw_locks use InnoDB's own implementation"
+#endif
#ifndef UNIV_NONINL
#include "os0sync.ic"
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
index 75dea9369c2..1f3ce38fa65 100644
--- a/storage/innobase/include/os0sync.ic
+++ b/storage/innobase/include/os0sync.ic
@@ -1,7 +1,24 @@
-/******************************************************
-The interface to the operating system synchronization primitives.
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0sync.ic
+The interface to the operating system synchronization primitives.
Created 9/6/1995 Heikki Tuuri
*******************************************************/
@@ -10,30 +27,21 @@ Created 9/6/1995 Heikki Tuuri
#include <winbase.h>
#endif
-/**************************************************************
+/**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock! */
+as os_fast_mutex_lock!
+@return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE
ulint
os_fast_mutex_trylock(
/*==================*/
- /* out: 0 if success, != 0 if
- was reserved by another
- thread */
- os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
+ os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
EnterCriticalSection(fast_mutex);
return(0);
#else
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- /* Since the hot backup version is standalone, MySQL does not redefine
- pthread_mutex_trylock for HP-UX-10.20, and consequently we must invert
- the return value here */
-
- return((ulint) (1 - pthread_mutex_trylock(fast_mutex)));
-#else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system
libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
@@ -42,5 +50,4 @@ os_fast_mutex_trylock(
return((ulint) pthread_mutex_trylock(fast_mutex));
#endif
-#endif
}
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
index 3cf05feb3a9..6583de0005f 100644
--- a/storage/innobase/include/os0thread.h
+++ b/storage/innobase/include/os0thread.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0thread.h
The interface to the operating system
process and thread control primitives
-(c) 1995 Innobase Oy
-
Created 9/8/1995 Heikki Tuuri
*******************************************************/
@@ -27,11 +44,11 @@ can wait inside InnoDB */
#ifdef __WIN__
typedef void* os_thread_t;
-typedef ulint os_thread_id_t; /* In Windows the thread id
+typedef unsigned long os_thread_id_t; /*!< In Windows the thread id
is an unsigned long int */
#else
typedef pthread_t os_thread_t;
-typedef os_thread_t os_thread_id_t; /* In Unix we use the thread
+typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread
handle itself as the id of
the thread */
#endif
@@ -39,101 +56,101 @@ typedef os_thread_t os_thread_id_t; /* In Unix we use the thread
/* Define a function pointer type to use in a typecast */
typedef void* (*os_posix_f_t) (void*);
-/*******************************************************************
-Compares two thread ids for equality. */
-
+/***************************************************************//**
+Compares two thread ids for equality.
+@return TRUE if equal */
+UNIV_INTERN
ibool
os_thread_eq(
/*=========*/
- /* out: TRUE if equal */
- os_thread_id_t a, /* in: OS thread or thread id */
- os_thread_id_t b); /* in: OS thread or thread id */
-/********************************************************************
+ os_thread_id_t a, /*!< in: OS thread or thread id */
+ os_thread_id_t b); /*!< in: OS thread or thread id */
+/****************************************************************//**
Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though! */
-
+unique for the thread though!
+@return thread identifier as a number */
+UNIV_INTERN
ulint
os_thread_pf(
/*=========*/
- /* out: unsigned long int */
- os_thread_id_t a); /* in: thread or thread id */
-/********************************************************************
+ os_thread_id_t a); /*!< in: OS thread identifier */
+/****************************************************************//**
Creates a new thread of execution. The execution starts from
the function given. The start function takes a void* parameter
and returns a ulint.
NOTE: We count the number of threads in os_thread_exit(). A created
-thread should always use that to exit and not use return() to exit. */
-
+thread should always use that to exit and not use return() to exit.
+@return handle to the thread */
+UNIV_INTERN
os_thread_t
os_thread_create(
/*=============*/
- /* out: handle to the thread */
#ifndef __WIN__
os_posix_f_t start_f,
#else
- ulint (*start_f)(void*), /* in: pointer to function
+ ulint (*start_f)(void*), /*!< in: pointer to function
from which to start */
#endif
- void* arg, /* in: argument to start
+ void* arg, /*!< in: argument to start
function */
- os_thread_id_t* thread_id); /* out: id of the created
+ os_thread_id_t* thread_id); /*!< out: id of the created
thread, or NULL */
-int
-os_thread_join(
-/*===========*/
- os_thread_id_t thread_id); /* in: id of the thread to join */
-/*********************************************************************
-Exits the current thread. */
+/*****************************************************************//**
+Exits the current thread. */
+UNIV_INTERN
void
os_thread_exit(
/*===========*/
- void* exit_value); /* in: exit value; in Windows this void*
+ void* exit_value); /*!< in: exit value; in Windows this void*
is cast as a DWORD */
-/*********************************************************************
-Returns the thread identifier of current thread. */
-
+/*****************************************************************//**
+Returns the thread identifier of current thread.
+@return current thread identifier */
+UNIV_INTERN
os_thread_id_t
os_thread_get_curr_id(void);
/*========================*/
-/*********************************************************************
-Returns handle to the current thread. */
-
+/*****************************************************************//**
+Returns handle to the current thread.
+@return current thread handle */
+UNIV_INTERN
os_thread_t
os_thread_get_curr(void);
/*====================*/
-/*********************************************************************
+/*****************************************************************//**
Advises the os to give up remainder of the thread's time slice. */
-
+UNIV_INTERN
void
os_thread_yield(void);
/*=================*/
-/*********************************************************************
+/*****************************************************************//**
The thread sleeps at least the time given in microseconds. */
-
+UNIV_INTERN
void
os_thread_sleep(
/*============*/
- ulint tm); /* in: time in microseconds */
-/**********************************************************************
-Gets a thread priority. */
-
+ ulint tm); /*!< in: time in microseconds */
+/******************************************************************//**
+Gets a thread priority.
+@return priority */
+UNIV_INTERN
ulint
os_thread_get_priority(
/*===================*/
- /* out: priority */
- os_thread_t handle);/* in: OS handle to the thread */
-/**********************************************************************
+ os_thread_t handle);/*!< in: OS handle to the thread */
+/******************************************************************//**
Sets a thread priority. */
-
+UNIV_INTERN
void
os_thread_set_priority(
/*===================*/
- os_thread_t handle, /* in: OS handle to the thread */
- ulint pri); /* in: priority: one of OS_PRIORITY_... */
-/**********************************************************************
-Gets the last operating system error code for the calling thread. */
-
+ os_thread_t handle, /*!< in: OS handle to the thread */
+ ulint pri); /*!< in: priority: one of OS_PRIORITY_... */
+/******************************************************************//**
+Gets the last operating system error code for the calling thread.
+@return last error on Windows, 0 otherwise */
+UNIV_INTERN
ulint
os_thread_get_last_error(void);
/*==========================*/
diff --git a/storage/innobase/include/os0thread.ic b/storage/innobase/include/os0thread.ic
index a75aa3abb34..f89bc40b4fa 100644
--- a/storage/innobase/include/os0thread.ic
+++ b/storage/innobase/include/os0thread.ic
@@ -1,8 +1,25 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0thread.ic
The interface to the operating system
process and thread control primitives
-(c) 1995 Innobase Oy
-
Created 9/8/1995 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index 04f731414a3..1544b0abe1c 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -1,7 +1,24 @@
-/************************************************************************
-The page cursor
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1994-1996 Innobase Oy
+/********************************************************************//**
+@file include/page0cur.h
+The page cursor
Created 10/4/1994 Heikki Tuuri
*************************************************************************/
@@ -11,7 +28,7 @@ Created 10/4/1994 Heikki Tuuri
#include "univ.i"
-#include "page0types.h"
+#include "buf0types.h"
#include "page0page.h"
#include "rem0rec.h"
#include "data0data.h"
@@ -35,248 +52,291 @@ Created 10/4/1994 Heikki Tuuri
# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */
#endif /* UNIV_SEARCH_DEBUG */
-#ifdef PAGE_CUR_ADAPT
-# ifdef UNIV_SEARCH_PERF_STAT
-extern ulint page_cur_short_succ;
-# endif /* UNIV_SEARCH_PERF_STAT */
-#endif /* PAGE_CUR_ADAPT */
-
-/*************************************************************
-Gets pointer to the page frame where the cursor is positioned. */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return page */
UNIV_INLINE
page_t*
page_cur_get_page(
/*==============*/
- /* out: page */
- page_cur_t* cur); /* in: page cursor */
-/*************************************************************
-Gets the record where the cursor is positioned. */
+ page_cur_t* cur); /*!< in: page cursor */
+/*********************************************************//**
+Gets pointer to the buffer block where the cursor is positioned.
+@return page */
+UNIV_INLINE
+buf_block_t*
+page_cur_get_block(
+/*===============*/
+ page_cur_t* cur); /*!< in: page cursor */
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return page */
+UNIV_INLINE
+page_zip_des_t*
+page_cur_get_page_zip(
+/*==================*/
+ page_cur_t* cur); /*!< in: page cursor */
+/*********************************************************//**
+Gets the record where the cursor is positioned.
+@return record */
UNIV_INLINE
rec_t*
page_cur_get_rec(
/*=============*/
- /* out: record */
- page_cur_t* cur); /* in: page cursor */
-/*************************************************************
+ page_cur_t* cur); /*!< in: page cursor */
+#else /* UNIV_DEBUG */
+# define page_cur_get_page(cur) page_align((cur)->rec)
+# define page_cur_get_block(cur) (cur)->block
+# define page_cur_get_page_zip(cur) buf_block_get_page_zip((cur)->block)
+# define page_cur_get_rec(cur) (cur)->rec
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
Sets the cursor object to point before the first user record
on the page. */
UNIV_INLINE
void
page_cur_set_before_first(
/*======================*/
- page_t* page, /* in: index page */
- page_cur_t* cur); /* in: cursor */
-/*************************************************************
+ const buf_block_t* block, /*!< in: index page */
+ page_cur_t* cur); /*!< in: cursor */
+/*********************************************************//**
Sets the cursor object to point after the last user record on
the page. */
UNIV_INLINE
void
page_cur_set_after_last(
/*====================*/
- page_t* page, /* in: index page */
- page_cur_t* cur); /* in: cursor */
-/*************************************************************
-Returns TRUE if the cursor is before first user record on page. */
+ const buf_block_t* block, /*!< in: index page */
+ page_cur_t* cur); /*!< in: cursor */
+/*********************************************************//**
+Returns TRUE if the cursor is before first user record on page.
+@return TRUE if at start */
UNIV_INLINE
ibool
page_cur_is_before_first(
/*=====================*/
- /* out: TRUE if at start */
- const page_cur_t* cur); /* in: cursor */
-/*************************************************************
-Returns TRUE if the cursor is after last user record. */
+ const page_cur_t* cur); /*!< in: cursor */
+/*********************************************************//**
+Returns TRUE if the cursor is after last user record.
+@return TRUE if at end */
UNIV_INLINE
ibool
page_cur_is_after_last(
/*===================*/
- /* out: TRUE if at end */
- const page_cur_t* cur); /* in: cursor */
-/**************************************************************
+ const page_cur_t* cur); /*!< in: cursor */
+/**********************************************************//**
Positions the cursor on the given record. */
UNIV_INLINE
void
page_cur_position(
/*==============*/
- rec_t* rec, /* in: record on a page */
- page_cur_t* cur); /* in: page cursor */
-/**************************************************************
+ const rec_t* rec, /*!< in: record on a page */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ page_cur_t* cur); /*!< out: page cursor */
+/**********************************************************//**
Invalidates a page cursor by setting the record pointer NULL. */
UNIV_INLINE
void
page_cur_invalidate(
/*================*/
- page_cur_t* cur); /* in: page cursor */
-/**************************************************************
+ page_cur_t* cur); /*!< out: page cursor */
+/**********************************************************//**
Moves the cursor to the next record on page. */
UNIV_INLINE
void
page_cur_move_to_next(
/*==================*/
- page_cur_t* cur); /* in: cursor; must not be after last */
-/**************************************************************
+ page_cur_t* cur); /*!< in/out: cursor; must not be after last */
+/**********************************************************//**
Moves the cursor to the previous record on page. */
UNIV_INLINE
void
page_cur_move_to_prev(
/*==================*/
- page_cur_t* cur); /* in: cursor; must not before first */
-/***************************************************************
+ page_cur_t* cur); /*!< in/out: cursor; not before first */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
page_cur_tuple_insert(
/*==================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mini-transaction handle */
-/***************************************************************
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ const dtuple_t* tuple, /*!< in: pointer to a data tuple */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
page_cur_rec_insert(
/*================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- rec_t* rec, /* in: record to insert */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets,/* in: rec_get_offsets(rec, index) */
- mtr_t* mtr); /* in: mini-transaction handle */
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The record to be
-inserted can be in a data tuple or as a physical record. The other parameter
-must then be NULL. The cursor stays at the same position. */
-
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ const rec_t* rec, /*!< in: record to insert */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+/***********************************************************//**
+Inserts a record next to page cursor on an uncompressed page.
+Returns pointer to inserted record if succeed, i.e., enough
+space available, NULL otherwise. The cursor stays at the same position.
+@return pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
rec_t*
page_cur_insert_rec_low(
/*====================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
- dict_index_t* index, /* in: record descriptor */
- rec_t* rec, /* in: pointer to a physical record or NULL */
- ulint* offsets,/* in: rec_get_offsets(rec, index) or NULL */
- mtr_t* mtr); /* in: mini-transaction handle */
-/*****************************************************************
+ rec_t* current_rec,/*!< in: pointer to current record after
+ which the new record is inserted */
+ dict_index_t* index, /*!< in: record descriptor */
+ const rec_t* rec, /*!< in: pointer to a physical record */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+/***********************************************************//**
+Inserts a record next to page cursor on a compressed and uncompressed
+page. Returns pointer to inserted record if succeed, i.e.,
+enough space available, NULL otherwise.
+The cursor stays at the same position.
+@return pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_zip(
+/*====================*/
+ rec_t** current_rec,/*!< in/out: pointer to current record after
+ which the new record is inserted */
+ buf_block_t* block, /*!< in: buffer block of *current_rec */
+ dict_index_t* index, /*!< in: record descriptor */
+ const rec_t* rec, /*!< in: pointer to a physical record */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+/*************************************************************//**
Copies records from page to a newly created page, from a given record onward,
including that record. Infimum and supremum records are not copied. */
-
+UNIV_INTERN
void
page_copy_rec_list_end_to_created_page(
/*===================================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: first record to copy */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
+ page_t* new_page, /*!< in/out: index page to copy to */
+ rec_t* rec, /*!< in: first record to copy */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr); /*!< in: mtr */
+/***********************************************************//**
Deletes a record at the page cursor. The cursor is moved to the
next record after the deleted one. */
-
+UNIV_INTERN
void
page_cur_delete_rec(
/*================*/
- page_cur_t* cursor, /* in: a page cursor */
- dict_index_t* index, /* in: record descriptor */
- const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr); /* in: mini-transaction handle */
-/********************************************************************
-Searches the right position for a page cursor. */
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ dict_index_t* index, /*!< in: record descriptor */
+ const ulint* offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Searches the right position for a page cursor.
+@return number of matched fields on the left */
UNIV_INLINE
ulint
page_cur_search(
/*============*/
- /* out: number of matched fields on the left */
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- page_cur_t* cursor);/* out: page cursor */
-/********************************************************************
+ const buf_block_t* block, /*!< in: buffer block */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ ulint mode, /*!< in: PAGE_CUR_L,
+ PAGE_CUR_LE, PAGE_CUR_G, or
+ PAGE_CUR_GE */
+ page_cur_t* cursor);/*!< out: page cursor */
+/****************************************************************//**
Searches the right position for a page cursor. */
-
+UNIV_INTERN
void
page_cur_search_with_match(
/*=======================*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- ulint* iup_matched_fields,
- /* in/out: already matched fields in upper
- limit record */
- ulint* iup_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- ulint* ilow_matched_fields,
- /* in/out: already matched fields in lower
- limit record */
- ulint* ilow_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- page_cur_t* cursor); /* out: page cursor */
-/***************************************************************
+ const buf_block_t* block, /*!< in: buffer block */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ ulint mode, /*!< in: PAGE_CUR_L,
+ PAGE_CUR_LE, PAGE_CUR_G, or
+ PAGE_CUR_GE */
+ ulint* iup_matched_fields,
+ /*!< in/out: already matched
+ fields in upper limit record */
+ ulint* iup_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ ulint* ilow_matched_fields,
+ /*!< in/out: already matched
+ fields in lower limit record */
+ ulint* ilow_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ page_cur_t* cursor);/*!< out: page cursor */
+/***********************************************************//**
Positions a page cursor on a randomly chosen user record on a page. If there
are no user records, sets the cursor on the infimum record. */
-
+UNIV_INTERN
void
page_cur_open_on_rnd_user_rec(
/*==========================*/
- page_t* page, /* in: page */
- page_cur_t* cursor);/* in/out: page cursor */
-/***************************************************************
-Parses a log record of a record insert on a page. */
-
+ buf_block_t* block, /*!< in: page */
+ page_cur_t* cursor);/*!< out: page cursor */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a log record of a record insert on a page.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
page_cur_parse_insert_rec(
/*======================*/
- /* out: end of log record or NULL */
- ibool is_short,/* in: TRUE if short inserts */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/**************************************************************
-Parses a log record of copying a record list end to a new created page. */
-
+ ibool is_short,/*!< in: TRUE if short inserts */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/**********************************************************//**
+Parses a log record of copying a record list end to a new created page.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
page_parse_copy_rec_list_to_created_page(
/*=====================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses log record of a record delete on a page. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/***********************************************************//**
+Parses log record of a record delete on a page.
+@return pointer to record end or NULL */
+UNIV_INTERN
byte*
page_cur_parse_delete_rec(
/*======================*/
- /* out: pointer to record end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr); /*!< in: mtr or NULL */
-/* Index page cursor */
+/** Index page cursor */
struct page_cur_struct{
- byte* rec; /* pointer to a record on page */
+ byte* rec; /*!< pointer to a record on page */
+ buf_block_t* block; /*!< pointer to the block containing rec */
};
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index b747874abc2..3520677dfb3 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,158 +1,218 @@
-/************************************************************************
-The page cursor
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1994-1996 Innobase Oy
+/********************************************************************//**
+@file include/page0cur.ic
+The page cursor
Created 10/4/1994 Heikki Tuuri
*************************************************************************/
#include "page0page.h"
+#include "buf0types.h"
-
-/*************************************************************
-Gets pointer to the page frame where the cursor is positioned. */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return page */
UNIV_INLINE
page_t*
page_cur_get_page(
/*==============*/
- /* out: page */
- page_cur_t* cur) /* in: page cursor */
+ page_cur_t* cur) /*!< in: page cursor */
+{
+ ut_ad(cur);
+ ut_ad(page_align(cur->rec) == cur->block->frame);
+
+ return(page_align(cur->rec));
+}
+
+/*********************************************************//**
+Gets pointer to the buffer block where the cursor is positioned.
+@return page */
+UNIV_INLINE
+buf_block_t*
+page_cur_get_block(
+/*===============*/
+ page_cur_t* cur) /*!< in: page cursor */
{
ut_ad(cur);
+ ut_ad(page_align(cur->rec) == cur->block->frame);
+ return(cur->block);
+}
- return(buf_frame_align(cur->rec));
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return page */
+UNIV_INLINE
+page_zip_des_t*
+page_cur_get_page_zip(
+/*==================*/
+ page_cur_t* cur) /*!< in: page cursor */
+{
+ return(buf_block_get_page_zip(page_cur_get_block(cur)));
}
-/*************************************************************
-Gets the record where the cursor is positioned. */
+/*********************************************************//**
+Gets the record where the cursor is positioned.
+@return record */
UNIV_INLINE
rec_t*
page_cur_get_rec(
/*=============*/
- /* out: record */
- page_cur_t* cur) /* in: page cursor */
+ page_cur_t* cur) /*!< in: page cursor */
{
ut_ad(cur);
+ ut_ad(page_align(cur->rec) == cur->block->frame);
return(cur->rec);
}
+#endif /* UNIV_DEBUG */
-/*************************************************************
+/*********************************************************//**
Sets the cursor object to point before the first user record
on the page. */
UNIV_INLINE
void
page_cur_set_before_first(
/*======================*/
- page_t* page, /* in: index page */
- page_cur_t* cur) /* in: cursor */
+ const buf_block_t* block, /*!< in: index page */
+ page_cur_t* cur) /*!< in: cursor */
{
- cur->rec = page_get_infimum_rec(page);
+ cur->block = (buf_block_t*) block;
+ cur->rec = page_get_infimum_rec(buf_block_get_frame(cur->block));
}
-/*************************************************************
+/*********************************************************//**
Sets the cursor object to point after the last user record on
the page. */
UNIV_INLINE
void
page_cur_set_after_last(
/*====================*/
- page_t* page, /* in: index page */
- page_cur_t* cur) /* in: cursor */
+ const buf_block_t* block, /*!< in: index page */
+ page_cur_t* cur) /*!< in: cursor */
{
- cur->rec = page_get_supremum_rec(page);
+ cur->block = (buf_block_t*) block;
+ cur->rec = page_get_supremum_rec(buf_block_get_frame(cur->block));
}
-/*************************************************************
-Returns TRUE if the cursor is before first user record on page. */
+/*********************************************************//**
+Returns TRUE if the cursor is before first user record on page.
+@return TRUE if at start */
UNIV_INLINE
ibool
page_cur_is_before_first(
/*=====================*/
- /* out: TRUE if at start */
- const page_cur_t* cur) /* in: cursor */
+ const page_cur_t* cur) /*!< in: cursor */
{
+ ut_ad(cur);
+ ut_ad(page_align(cur->rec) == cur->block->frame);
return(page_rec_is_infimum(cur->rec));
}
-/*************************************************************
-Returns TRUE if the cursor is after last user record. */
+/*********************************************************//**
+Returns TRUE if the cursor is after last user record.
+@return TRUE if at end */
UNIV_INLINE
ibool
page_cur_is_after_last(
/*===================*/
- /* out: TRUE if at end */
- const page_cur_t* cur) /* in: cursor */
+ const page_cur_t* cur) /*!< in: cursor */
{
+ ut_ad(cur);
+ ut_ad(page_align(cur->rec) == cur->block->frame);
return(page_rec_is_supremum(cur->rec));
}
-/**************************************************************
+/**********************************************************//**
Positions the cursor on the given record. */
UNIV_INLINE
void
page_cur_position(
/*==============*/
- rec_t* rec, /* in: record on a page */
- page_cur_t* cur) /* in: page cursor */
+ const rec_t* rec, /*!< in: record on a page */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ page_cur_t* cur) /*!< out: page cursor */
{
- ut_ad(rec && cur);
+ ut_ad(rec && block && cur);
+ ut_ad(page_align(rec) == block->frame);
- cur->rec = rec;
+ cur->rec = (rec_t*) rec;
+ cur->block = (buf_block_t*) block;
}
-/**************************************************************
+/**********************************************************//**
Invalidates a page cursor by setting the record pointer NULL. */
UNIV_INLINE
void
page_cur_invalidate(
/*================*/
- page_cur_t* cur) /* in: page cursor */
+ page_cur_t* cur) /*!< out: page cursor */
{
ut_ad(cur);
cur->rec = NULL;
+ cur->block = NULL;
}
-/**************************************************************
+/**********************************************************//**
Moves the cursor to the next record on page. */
UNIV_INLINE
void
page_cur_move_to_next(
/*==================*/
- page_cur_t* cur) /* in: cursor; must not be after last */
+ page_cur_t* cur) /*!< in/out: cursor; must not be after last */
{
ut_ad(!page_cur_is_after_last(cur));
cur->rec = page_rec_get_next(cur->rec);
}
-/**************************************************************
+/**********************************************************//**
Moves the cursor to the previous record on page. */
UNIV_INLINE
void
page_cur_move_to_prev(
/*==================*/
- page_cur_t* cur) /* in: page cursor, not before first */
+ page_cur_t* cur) /*!< in/out: page cursor, not before first */
{
ut_ad(!page_cur_is_before_first(cur));
cur->rec = page_rec_get_prev(cur->rec);
}
-/********************************************************************
-Searches the right position for a page cursor. */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Searches the right position for a page cursor.
+@return number of matched fields on the left */
UNIV_INLINE
ulint
page_cur_search(
/*============*/
- /* out: number of matched fields on the left */
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- page_cur_t* cursor) /* out: page cursor */
+ const buf_block_t* block, /*!< in: buffer block */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ ulint mode, /*!< in: PAGE_CUR_L,
+ PAGE_CUR_LE, PAGE_CUR_G, or
+ PAGE_CUR_GE */
+ page_cur_t* cursor) /*!< out: page cursor */
{
ulint low_matched_fields = 0;
ulint low_matched_bytes = 0;
@@ -161,7 +221,7 @@ page_cur_search(
ut_ad(dtuple_check_typed(tuple));
- page_cur_search_with_match(page, index, tuple, mode,
+ page_cur_search_with_match(block, index, tuple, mode,
&up_matched_fields,
&up_matched_bytes,
&low_matched_fields,
@@ -170,41 +230,70 @@ page_cur_search(
return(low_matched_fields);
}
-/***************************************************************
+/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
page_cur_tuple_insert(
/*==================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mini-transaction handle */
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ const dtuple_t* tuple, /*!< in: pointer to a data tuple */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
- return(page_cur_insert_rec_low(cursor, tuple, index, NULL, NULL, mtr));
+ mem_heap_t* heap;
+ ulint* offsets;
+ ulint size
+ = rec_get_converted_size(index, tuple, n_ext);
+ rec_t* rec;
+
+ heap = mem_heap_create(size
+ + (4 + REC_OFFS_HEADER_SIZE
+ + dtuple_get_n_fields(tuple))
+ * sizeof *offsets);
+ rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(heap, size),
+ index, tuple, n_ext);
+ offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+
+ if (buf_block_get_page_zip(cursor->block)) {
+ rec = page_cur_insert_rec_zip(&cursor->rec, cursor->block,
+ index, rec, offsets, mtr);
+ } else {
+ rec = page_cur_insert_rec_low(cursor->rec,
+ index, rec, offsets, mtr);
+ }
+
+ mem_heap_free(heap);
+ return(rec);
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************
+/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+@return pointer to record if succeed, NULL otherwise */
UNIV_INLINE
rec_t*
page_cur_rec_insert(
/*================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- rec_t* rec, /* in: record to insert */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets,/* in: rec_get_offsets(rec, index) */
- mtr_t* mtr) /* in: mini-transaction handle */
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ const rec_t* rec, /*!< in: record to insert */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
- return(page_cur_insert_rec_low(cursor, NULL, index, rec,
- offsets, mtr));
+ if (buf_block_get_page_zip(cursor->block)) {
+ return(page_cur_insert_rec_zip(&cursor->rec, cursor->block,
+ index, rec, offsets, mtr));
+ } else {
+ return(page_cur_insert_rec_low(cursor->rec,
+ index, rec, offsets, mtr));
+ }
}
-
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 273007c2778..3899499fb6a 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1,7 +1,24 @@
-/******************************************************
-Index page routines
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1994-1996 Innobase Oy
+/**************************************************//**
+@file include/page0page.h
+Index page routines
Created 2/2/1994 Heikki Tuuri
*******************************************************/
@@ -50,8 +67,8 @@ typedef byte page_header_t;
#define PAGE_N_RECS 16 /* number of user records on the page */
#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified
a record on the page; a dulint; defined only
- in secondary indexes; specifically, not in an
- ibuf tree; NOTE: this may be modified only
+ in secondary indexes and in the insert buffer
+ tree; NOTE: this may be modified only
when the thread has an x-latch to the page,
and ALSO an x-latch to btr_search_latch
if there is a hash index to the page! */
@@ -59,8 +76,11 @@ typedef byte page_header_t;
header which are set in a page create */
/*----*/
#define PAGE_LEVEL 26 /* level of the node in an index tree; the
- leaf level is the level 0 */
-#define PAGE_INDEX_ID 28 /* index id where the page belongs */
+ leaf level is the level 0. This field should
+ not be written to after page creation. */
+#define PAGE_INDEX_ID 28 /* index id where the page belongs.
+ This field should not be written to after
+ page creation. */
#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in
a B-tree: defined only on the root page of a
B-tree, but not in the root of an ibuf tree */
@@ -100,6 +120,14 @@ typedef byte page_header_t;
a new-style compact page */
/*-----------------------------*/
+/* Heap numbers */
+#define PAGE_HEAP_NO_INFIMUM 0 /* page infimum */
+#define PAGE_HEAP_NO_SUPREMUM 1 /* page supremum */
+#define PAGE_HEAP_NO_USER_LOW 2 /* first user record in
+ creation (insertion) order,
+ not necessarily collation order;
+ this record may have been deleted */
+
/* Directions of cursor movement */
#define PAGE_LEFT 1
#define PAGE_RIGHT 2
@@ -132,245 +160,296 @@ directory. */
#define PAGE_DIR_SLOT_MAX_N_OWNED 8
#define PAGE_DIR_SLOT_MIN_N_OWNED 4
-/****************************************************************
-Gets the start of a page. */
+/************************************************************//**
+Gets the start of a page.
+@return start of the page */
UNIV_INLINE
page_t*
page_align(
/*=======*/
- /* out: start of the page */
- void* ptr) /* in: pointer to page frame */
+ const void* ptr) /*!< in: pointer to page frame */
__attribute__((const));
-/****************************************************************
-Gets the offset within a page. */
+/************************************************************//**
+Gets the offset within a page.
+@return offset from the start of the page */
UNIV_INLINE
ulint
page_offset(
/*========*/
- /* out: offset from the start of the page */
- const void* ptr) /* in: pointer to page frame */
+ const void* ptr) /*!< in: pointer to page frame */
__attribute__((const));
-/*****************************************************************
+/*************************************************************//**
Returns the max trx id field value. */
UNIV_INLINE
-dulint
+trx_id_t
page_get_max_trx_id(
/*================*/
- page_t* page); /* in: page */
-/*****************************************************************
+ const page_t* page); /*!< in: page */
+/*************************************************************//**
Sets the max trx id field value. */
-
+UNIV_INTERN
void
page_set_max_trx_id(
/*================*/
- page_t* page, /* in: page */
- dulint trx_id);/* in: transaction id */
-/*****************************************************************
+ buf_block_t* block, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr); /*!< in/out: mini-transaction, or NULL */
+/*************************************************************//**
Sets the max trx id field value if trx_id is bigger than the previous
value. */
UNIV_INLINE
void
page_update_max_trx_id(
/*===================*/
- page_t* page, /* in: page */
- dulint trx_id); /* in: transaction id */
-/*****************************************************************
+ buf_block_t* block, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
+/*************************************************************//**
Reads the given header field. */
UNIV_INLINE
ulint
page_header_get_field(
/*==================*/
- page_t* page, /* in: page */
- ulint field); /* in: PAGE_N_DIR_SLOTS, ... */
-/*****************************************************************
+ const page_t* page, /*!< in: page */
+ ulint field); /*!< in: PAGE_N_DIR_SLOTS, ... */
+/*************************************************************//**
Sets the given header field. */
UNIV_INLINE
void
page_header_set_field(
/*==================*/
- page_t* page, /* in: page */
- ulint field, /* in: PAGE_N_DIR_SLOTS, ... */
- ulint val); /* in: value */
-/*****************************************************************
-Returns the pointer stored in the given header field. */
+ page_t* page, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ ulint field, /*!< in: PAGE_N_DIR_SLOTS, ... */
+ ulint val); /*!< in: value */
+/*************************************************************//**
+Returns the offset stored in the given header field.
+@return offset from the start of the page, or 0 */
UNIV_INLINE
-byte*
-page_header_get_ptr(
-/*================*/
- /* out: pointer or NULL */
- page_t* page, /* in: page */
- ulint field); /* in: PAGE_FREE, ... */
-/*****************************************************************
+ulint
+page_header_get_offs(
+/*=================*/
+ const page_t* page, /*!< in: page */
+ ulint field) /*!< in: PAGE_FREE, ... */
+ __attribute__((nonnull, pure));
+
+/*************************************************************//**
+Returns the pointer stored in the given header field, or NULL. */
+#define page_header_get_ptr(page, field) \
+ (page_header_get_offs(page, field) \
+ ? page + page_header_get_offs(page, field) : NULL)
+/*************************************************************//**
Sets the pointer stored in the given header field. */
UNIV_INLINE
void
page_header_set_ptr(
/*================*/
- page_t* page, /* in: page */
- ulint field, /* in: PAGE_FREE, ... */
- byte* ptr); /* in: pointer or NULL*/
-/*****************************************************************
+ page_t* page, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ ulint field, /*!< in/out: PAGE_FREE, ... */
+ const byte* ptr); /*!< in: pointer or NULL*/
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Resets the last insert info field in the page header. Writes to mlog
about this operation. */
UNIV_INLINE
void
page_header_reset_last_insert(
/*==========================*/
- page_t* page, /* in: page */
- mtr_t* mtr); /* in: mtr */
-/****************************************************************
-Gets the first record on the page. */
+ page_t* page, /*!< in: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ mtr_t* mtr); /*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/************************************************************//**
+Gets the offset of the first record on the page.
+@return offset of the first record in record list, relative from page */
UNIV_INLINE
-rec_t*
-page_get_infimum_rec(
-/*=================*/
- /* out: the first record in record list */
- page_t* page); /* in: page which must have record(s) */
-/****************************************************************
-Gets the last record on the page. */
+ulint
+page_get_infimum_offset(
+/*====================*/
+ const page_t* page); /*!< in: page which must have record(s) */
+/************************************************************//**
+Gets the offset of the last record on the page.
+@return offset of the last record in record list, relative from page */
UNIV_INLINE
-rec_t*
-page_get_supremum_rec(
-/*==================*/
- /* out: the last record in record list */
- page_t* page); /* in: page which must have record(s) */
-/****************************************************************
+ulint
+page_get_supremum_offset(
+/*=====================*/
+ const page_t* page); /*!< in: page which must have record(s) */
+#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
+#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
+/************************************************************//**
Returns the middle record of record list. If there are an even number
-of records in the list, returns the first record of upper half-list. */
-
+of records in the list, returns the first record of upper half-list.
+@return middle record */
+UNIV_INTERN
rec_t*
page_get_middle_rec(
/*================*/
- /* out: middle record */
- page_t* page); /* in: page */
-/*****************************************************************
+ page_t* page); /*!< in: page */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Compares a data tuple to a physical record. Differs from the function
cmp_dtuple_rec_with_match in the way that the record must reside on an
index page, and also page infimum and supremum records can be given in
the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order. */
+the positive infinity in the alphabetical order.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
UNIV_INLINE
int
page_cmp_dtuple_rec_with_match(
/*===========================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record on a page; may also
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record on a page; may also
be page infimum or supremum, in which case
matched-parameter values below are not
affected */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields, /* in/out: number of already completely
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when function returns
contains the value for current comparison */
- ulint* matched_bytes); /* in/out: number of already matched
+ ulint* matched_bytes); /*!< in/out: number of already matched
bytes within the first field not completely
matched; when function returns contains the
value for current comparison */
-/*****************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/*************************************************************//**
+Gets the page number.
+@return page number */
+UNIV_INLINE
+ulint
+page_get_page_no(
+/*=============*/
+ const page_t* page); /*!< in: page */
+/*************************************************************//**
+Gets the tablespace identifier.
+@return space id */
+UNIV_INLINE
+ulint
+page_get_space_id(
+/*==============*/
+ const page_t* page); /*!< in: page */
+/*************************************************************//**
Gets the number of user records on page (the infimum and supremum records
-are not user records). */
+are not user records).
+@return number of user records */
UNIV_INLINE
ulint
page_get_n_recs(
/*============*/
- /* out: number of user records */
- page_t* page); /* in: index page */
-/*******************************************************************
+ const page_t* page); /*!< in: index page */
+/***************************************************************//**
Returns the number of records before the given record in chain.
-The number includes infimum and supremum records. */
-
+The number includes infimum and supremum records.
+@return number of records */
+UNIV_INTERN
ulint
page_rec_get_n_recs_before(
/*=======================*/
- /* out: number of records */
- rec_t* rec); /* in: the physical record */
-/*****************************************************************
-Gets the number of records in the heap. */
+ const rec_t* rec); /*!< in: the physical record */
+/*************************************************************//**
+Gets the number of records in the heap.
+@return number of user records */
UNIV_INLINE
ulint
page_dir_get_n_heap(
/*================*/
- /* out: number of user records */
- page_t* page); /* in: index page */
-/*****************************************************************
+ const page_t* page); /*!< in: index page */
+/*************************************************************//**
Sets the number of records in the heap. */
UNIV_INLINE
void
page_dir_set_n_heap(
/*================*/
- page_t* page, /* in: index page */
- ulint n_heap);/* in: number of records */
-/*****************************************************************
-Gets the number of dir slots in directory. */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL.
+ Note that the size of the dense page directory
+ in the compressed page trailer is
+ n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
+ ulint n_heap);/*!< in: number of records */
+/*************************************************************//**
+Gets the number of dir slots in directory.
+@return number of slots */
UNIV_INLINE
ulint
page_dir_get_n_slots(
/*=================*/
- /* out: number of slots */
- page_t* page); /* in: index page */
-/*****************************************************************
+ const page_t* page); /*!< in: index page */
+/*************************************************************//**
Sets the number of dir slots in directory. */
UNIV_INLINE
void
page_dir_set_n_slots(
/*=================*/
- /* out: number of slots */
- page_t* page, /* in: index page */
- ulint n_slots);/* in: number of slots */
-/*****************************************************************
-Gets pointer to nth directory slot. */
+ page_t* page, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ ulint n_slots);/*!< in: number of slots */
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Gets pointer to nth directory slot.
+@return pointer to dir slot */
UNIV_INLINE
page_dir_slot_t*
page_dir_get_nth_slot(
/*==================*/
- /* out: pointer to dir slot */
- page_t* page, /* in: index page */
- ulint n); /* in: position */
-/******************************************************************
-Used to check the consistency of a record on a page. */
+ const page_t* page, /*!< in: index page */
+ ulint n); /*!< in: position */
+#else /* UNIV_DEBUG */
+# define page_dir_get_nth_slot(page, n) \
+ ((page) + UNIV_PAGE_SIZE - PAGE_DIR \
+ - (n + 1) * PAGE_DIR_SLOT_SIZE)
+#endif /* UNIV_DEBUG */
+/**************************************************************//**
+Used to check the consistency of a record on a page.
+@return TRUE if succeed */
UNIV_INLINE
ibool
page_rec_check(
/*===========*/
- /* out: TRUE if succeed */
- rec_t* rec); /* in: record */
-/*******************************************************************
-Gets the record pointed to by a directory slot. */
+ const rec_t* rec); /*!< in: record */
+/***************************************************************//**
+Gets the record pointed to by a directory slot.
+@return pointer to record */
UNIV_INLINE
-rec_t*
+const rec_t*
page_dir_slot_get_rec(
/*==================*/
- /* out: pointer to record */
- page_dir_slot_t* slot); /* in: directory slot */
-/*******************************************************************
+ const page_dir_slot_t* slot); /*!< in: directory slot */
+/***************************************************************//**
This is used to set the record offset in a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_rec(
/*==================*/
- page_dir_slot_t* slot, /* in: directory slot */
- rec_t* rec); /* in: record on the page */
-/*******************************************************************
-Gets the number of records owned by a directory slot. */
+ page_dir_slot_t* slot, /*!< in: directory slot */
+ rec_t* rec); /*!< in: record on the page */
+/***************************************************************//**
+Gets the number of records owned by a directory slot.
+@return number of records */
UNIV_INLINE
ulint
page_dir_slot_get_n_owned(
/*======================*/
- /* out: number of records */
- page_dir_slot_t* slot); /* in: page directory slot */
-/*******************************************************************
+ const page_dir_slot_t* slot); /*!< in: page directory slot */
+/***************************************************************//**
This is used to set the owned records field of a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_n_owned(
/*======================*/
- page_dir_slot_t* slot, /* in: directory slot */
- ulint n); /* in: number of records owned
- by the slot */
-/****************************************************************
+ page_dir_slot_t*slot, /*!< in/out: directory slot */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint n); /*!< in: number of records owned by the slot */
+/************************************************************//**
Calculates the space reserved for directory slots of a given
number of records. The exact value is a fraction number
n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
@@ -379,443 +458,550 @@ UNIV_INLINE
ulint
page_dir_calc_reserved_space(
/*=========================*/
- ulint n_recs); /* in: number of records */
-/*******************************************************************
-Looks for the directory slot which owns the given record. */
-
+ ulint n_recs); /*!< in: number of records */
+/***************************************************************//**
+Looks for the directory slot which owns the given record.
+@return the directory slot number */
+UNIV_INTERN
ulint
page_dir_find_owner_slot(
/*=====================*/
- /* out: the directory slot number */
- rec_t* rec); /* in: the physical record */
-/****************************************************************
-Determine whether the page is in new-style compact format. */
+ const rec_t* rec); /*!< in: the physical record */
+/************************************************************//**
+Determine whether the page is in new-style compact format.
+@return nonzero if the page is in compact format, zero if it is in
+old-style format */
UNIV_INLINE
ulint
page_is_comp(
/*=========*/
- /* out: nonzero if the page is in compact
- format, zero if it is in old-style format */
- page_t* page); /* in: index page */
-/****************************************************************
-TRUE if the record is on a page in compact format. */
+ const page_t* page); /*!< in: index page */
+/************************************************************//**
+TRUE if the record is on a page in compact format.
+@return nonzero if in compact format */
UNIV_INLINE
ulint
page_rec_is_comp(
/*=============*/
- /* out: nonzero if in compact format */
- const rec_t* rec); /* in: record */
-/****************************************************************
-Gets the pointer to the next record on the page. */
+ const rec_t* rec); /*!< in: record */
+/***************************************************************//**
+Returns the heap number of a record.
+@return heap number */
+UNIV_INLINE
+ulint
+page_rec_get_heap_no(
+/*=================*/
+ const rec_t* rec); /*!< in: the physical record */
+/************************************************************//**
+Determine whether the page is a B-tree leaf.
+@return TRUE if the page is a B-tree leaf */
+UNIV_INLINE
+ibool
+page_is_leaf(
+/*=========*/
+ const page_t* page) /*!< in: page */
+ __attribute__((nonnull, pure));
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_low(
+/*==================*/
+ const rec_t* rec, /*!< in: pointer to record */
+ ulint comp); /*!< in: nonzero=compact page layout */
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return pointer to next record */
UNIV_INLINE
rec_t*
page_rec_get_next(
/*==============*/
- /* out: pointer to next record */
- rec_t* rec); /* in: pointer to record, must not be page
- supremum */
-/****************************************************************
+ rec_t* rec); /*!< in: pointer to record */
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_const(
+/*====================*/
+ const rec_t* rec); /*!< in: pointer to record */
+/************************************************************//**
Sets the pointer to the next record on the page. */
UNIV_INLINE
void
page_rec_set_next(
/*==============*/
- rec_t* rec, /* in: pointer to record, must not be
- page supremum */
- rec_t* next); /* in: pointer to next record, must not
- be page infimum */
-/****************************************************************
-Gets the pointer to the previous record. */
+ rec_t* rec, /*!< in: pointer to record,
+ must not be page supremum */
+ rec_t* next); /*!< in: pointer to next record,
+ must not be page infimum */
+/************************************************************//**
+Gets the pointer to the previous record.
+@return pointer to previous record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_prev_const(
+/*====================*/
+ const rec_t* rec); /*!< in: pointer to record, must not be page
+ infimum */
+/************************************************************//**
+Gets the pointer to the previous record.
+@return pointer to previous record */
UNIV_INLINE
rec_t*
page_rec_get_prev(
/*==============*/
- /* out: pointer to previous record */
- rec_t* rec); /* in: pointer to record,
+ rec_t* rec); /*!< in: pointer to record,
must not be page infimum */
-/****************************************************************
-TRUE if the record is a user record on the page. */
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return TRUE if a user record */
UNIV_INLINE
ibool
page_rec_is_user_rec_low(
/*=====================*/
- /* out: TRUE if a user record */
- ulint offset);/* in: record offset on page */
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
+ ulint offset) /*!< in: record offset on page */
+ __attribute__((const));
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return TRUE if the supremum record */
UNIV_INLINE
ibool
page_rec_is_supremum_low(
/*=====================*/
- /* out: TRUE if the supremum record */
- ulint offset);/* in: record offset on page */
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
+ ulint offset) /*!< in: record offset on page */
+ __attribute__((const));
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return TRUE if the infimum record */
UNIV_INLINE
ibool
page_rec_is_infimum_low(
/*====================*/
- /* out: TRUE if the infimum record */
- ulint offset);/* in: record offset on page */
+ ulint offset) /*!< in: record offset on page */
+ __attribute__((const));
-/****************************************************************
-TRUE if the record is a user record on the page. */
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return TRUE if a user record */
UNIV_INLINE
ibool
page_rec_is_user_rec(
/*=================*/
- /* out: TRUE if a user record */
- const rec_t* rec); /* in: record */
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
+ const rec_t* rec) /*!< in: record */
+ __attribute__((const));
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return TRUE if the supremum record */
UNIV_INLINE
ibool
page_rec_is_supremum(
/*=================*/
- /* out: TRUE if the supremum record */
- const rec_t* rec); /* in: record */
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
+ const rec_t* rec) /*!< in: record */
+ __attribute__((const));
+
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return TRUE if the infimum record */
UNIV_INLINE
ibool
page_rec_is_infimum(
/*================*/
- /* out: TRUE if the infimum record */
- const rec_t* rec); /* in: record */
-/*******************************************************************
-Looks for the record which owns the given record. */
+ const rec_t* rec) /*!< in: record */
+ __attribute__((const));
+/***************************************************************//**
+Looks for the record which owns the given record.
+@return the owner record */
UNIV_INLINE
rec_t*
page_rec_find_owner_rec(
/*====================*/
- /* out: the owner record */
- rec_t* rec); /* in: the physical record */
-/***************************************************************************
+ rec_t* rec); /*!< in: the physical record */
+/***********************************************************************//**
This is a low-level operation which is used in a database index creation
to update the page number of a created B-tree to a data dictionary
record. */
-
+UNIV_INTERN
void
page_rec_write_index_page_no(
/*=========================*/
- rec_t* rec, /* in: record to update */
- ulint i, /* in: index of the field to update */
- ulint page_no,/* in: value to write */
- mtr_t* mtr); /* in: mtr */
-/****************************************************************
+ rec_t* rec, /*!< in: record to update */
+ ulint i, /*!< in: index of the field to update */
+ ulint page_no,/*!< in: value to write */
+ mtr_t* mtr); /*!< in: mtr */
+/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
-of record heap. */
+of record heap.
+@return maximum combined size for inserted records */
UNIV_INLINE
ulint
page_get_max_insert_size(
/*=====================*/
- /* out: maximum combined size for inserted records */
- page_t* page, /* in: index page */
- ulint n_recs); /* in: number of records */
-/****************************************************************
+ const page_t* page, /*!< in: index page */
+ ulint n_recs);/*!< in: number of records */
+/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
-of record heap if page is first reorganized. */
+of record heap if page is first reorganized.
+@return maximum combined size for inserted records */
UNIV_INLINE
ulint
page_get_max_insert_size_after_reorganize(
/*======================================*/
- /* out: maximum combined size for inserted records */
- page_t* page, /* in: index page */
- ulint n_recs);/* in: number of records */
-/*****************************************************************
-Calculates free space if a page is emptied. */
+ const page_t* page, /*!< in: index page */
+ ulint n_recs);/*!< in: number of records */
+/*************************************************************//**
+Calculates free space if a page is emptied.
+@return free space */
UNIV_INLINE
ulint
page_get_free_space_of_empty(
/*=========================*/
- /* out: free space */
- ulint comp) /* in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
__attribute__((const));
-/*****************************************************************
-Calculates free space if a page is emptied. */
-
+/**********************************************************//**
+Returns the base extra size of a physical record. This is the
+size of the fixed header, independent of the record size.
+@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+UNIV_INLINE
ulint
-page_get_free_space_of_empty_noninline(
-/*===================================*/
- /* out: free space */
- ulint comp) /* in: nonzero=compact page format */
- __attribute__((const));
-/****************************************************************
+page_rec_get_base_extra_size(
+/*=========================*/
+ const rec_t* rec); /*!< in: physical record */
+/************************************************************//**
Returns the sum of the sizes of the records in the record list
-excluding the infimum and supremum records. */
+excluding the infimum and supremum records.
+@return data in bytes */
UNIV_INLINE
ulint
page_get_data_size(
/*===============*/
- /* out: data in bytes */
- page_t* page); /* in: index page */
-/****************************************************************
-Allocates a block of memory from an index page. */
-
+ const page_t* page); /*!< in: index page */
+/************************************************************//**
+Allocates a block of memory from the head of the free list
+of an index page. */
+UNIV_INLINE
+void
+page_mem_alloc_free(
+/*================*/
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
+ space available for inserting the record,
+ or NULL */
+ rec_t* next_rec,/*!< in: pointer to the new head of the
+ free record list */
+ ulint need); /*!< in: number of bytes allocated */
+/************************************************************//**
+Allocates a block of memory from the heap of an index page.
+@return pointer to start of allocated buffer, or NULL if allocation fails */
+UNIV_INTERN
byte*
-page_mem_alloc(
-/*===========*/
- /* out: pointer to start of allocated
- buffer, or NULL if allocation fails */
- page_t* page, /* in: index page */
- ulint need, /* in: number of bytes needed */
- dict_index_t* index, /* in: record descriptor */
- ulint* heap_no);/* out: this contains the heap number
+page_mem_alloc_heap(
+/*================*/
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
+ space available for inserting the record,
+ or NULL */
+ ulint need, /*!< in: total number of bytes needed */
+ ulint* heap_no);/*!< out: this contains the heap number
of the allocated record
if allocation succeeds */
-/****************************************************************
+/************************************************************//**
Puts a record to free list. */
UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: pointer to the (origin of) record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-The index page creation function. */
-
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ rec_t* rec, /*!< in: pointer to the (origin of) record */
+ dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Create an uncompressed B-tree index page.
+@return pointer to the page */
+UNIV_INTERN
page_t*
page_create(
/*========*/
- /* out: pointer to the page */
- buf_frame_t* frame, /* in: a buffer frame where the page is
- created */
- mtr_t* mtr, /* in: mini-transaction handle */
- ulint comp); /* in: nonzero=compact page format */
-/*****************************************************************
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page. */
+ buf_block_t* block, /*!< in: a buffer block where the
+ page is created */
+ mtr_t* mtr, /*!< in: mini-transaction handle */
+ ulint comp); /*!< in: nonzero=compact page format */
+/**********************************************************//**
+Create a compressed B-tree index page.
+@return pointer to the page */
+UNIV_INTERN
+page_t*
+page_create_zip(
+/*============*/
+ buf_block_t* block, /*!< in/out: a buffer frame where the
+ page is created */
+ dict_index_t* index, /*!< in: the index of the page */
+ ulint level, /*!< in: the B-tree level of the page */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/*************************************************************//**
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page. */
+UNIV_INTERN
void
page_copy_rec_list_end_no_locks(
/*============================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ buf_block_t* new_block, /*!< in: index page to copy to */
+ buf_block_t* block, /*!< in: index page of rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr); /*!< in: mtr */
+/*************************************************************//**
Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page. */
-
-void
+The records are copied to the start of the record list on new_page.
+@return pointer to the original successor of the infimum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
page_copy_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ buf_block_t* new_block, /*!< in/out: index page to copy to */
+ buf_block_t* block, /*!< in: index page containing rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
+/*************************************************************//**
Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page. */
-
-void
+The records are copied to the end of the record list on new_page.
+@return pointer to the original predecessor of the supremum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
page_copy_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ buf_block_t* new_block, /*!< in/out: index page to copy to */
+ buf_block_t* block, /*!< in: index page containing rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
+/*************************************************************//**
Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */
-
+UNIV_INTERN
void
page_delete_rec_list_end(
/*=====================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- ulint n_recs, /* in: number of records to delete,
+ rec_t* rec, /*!< in: pointer to record on page */
+ buf_block_t* block, /*!< in: buffer block of the page */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint n_recs, /*!< in: number of records to delete,
or ULINT_UNDEFINED if not known */
- ulint size, /* in: the sum of the sizes of the
+ ulint size, /*!< in: the sum of the sizes of the
records in the end of the chain to
delete, or ULINT_UNDEFINED if not known */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
+/*************************************************************//**
Deletes records from page, up to the given record, NOT including
that record. Infimum and supremum records are not deleted. */
-
+UNIV_INTERN
void
page_delete_rec_list_start(
/*=======================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ rec_t* rec, /*!< in: record on page */
+ buf_block_t* block, /*!< in: buffer block of the page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
+/*************************************************************//**
Moves record list end to another page. Moved records include
-split_rec. */
-
-void
+split_rec.
+@return TRUE on success; FALSE on compression failure (new_block will
+be decompressed) */
+UNIV_INTERN
+ibool
page_move_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record to move */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
+ buf_block_t* new_block, /*!< in/out: index page where to move */
+ buf_block_t* block, /*!< in: index page from where to move */
+ rec_t* split_rec, /*!< in: first record to move */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull(1, 2, 4, 5)));
+/*************************************************************//**
Moves record list start to another page. Moved records do not include
-split_rec. */
-
-void
+split_rec.
+@return TRUE on success; FALSE on compression failure */
+UNIV_INTERN
+ibool
page_move_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record not to move */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
+ buf_block_t* new_block, /*!< in/out: index page where to move */
+ buf_block_t* block, /*!< in/out: page containing split_rec */
+ rec_t* split_rec, /*!< in: first record not to move */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull(1, 2, 4, 5)));
+/****************************************************************//**
Splits a directory slot which owns too many records. */
-
+UNIV_INTERN
void
page_dir_split_slot(
/*================*/
- page_t* page, /* in: the index page in question */
- ulint slot_no); /* in: the directory slot */
-/*****************************************************************
+ page_t* page, /*!< in: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be written, or NULL */
+ ulint slot_no)/*!< in: the directory slot */
+ __attribute__((nonnull(1)));
+/*************************************************************//**
Tries to balance the given directory slot with too few records
with the upper neighbor, so that there are at least the minimum number
of records owned by the slot; this may result in the merging of
two slots. */
-
+UNIV_INTERN
void
page_dir_balance_slot(
/*==================*/
- page_t* page, /* in: index page */
- ulint slot_no); /* in: the directory slot */
-/**************************************************************
-Parses a log record of a record list end or start deletion. */
-
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint slot_no)/*!< in: the directory slot */
+ __attribute__((nonnull(1)));
+/**********************************************************//**
+Parses a log record of a record list end or start deletion.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
page_parse_delete_rec_list(
/*=======================*/
- /* out: end of log record or NULL */
- byte type, /* in: MLOG_LIST_END_DELETE,
+ byte type, /*!< in: MLOG_LIST_END_DELETE,
MLOG_LIST_START_DELETE,
MLOG_COMP_LIST_END_DELETE or
MLOG_COMP_LIST_START_DELETE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses a redo log record of creating a page. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in/out: buffer block or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/***********************************************************//**
+Parses a redo log record of creating a page.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
page_parse_create(
/*==============*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint comp, /* in: nonzero=compact page format */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/****************************************************************
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ ulint comp, /*!< in: nonzero=compact page format */
+ buf_block_t* block, /*!< in: block or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/************************************************************//**
Prints record contents including the data relevant only in
the index page context. */
-
+UNIV_INTERN
void
page_rec_print(
/*===========*/
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: record descriptor */
-/*******************************************************************
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets);/*!< in: record descriptor */
+/***************************************************************//**
This is used to print the contents of the directory for
debugging purposes. */
-
+UNIV_INTERN
void
page_dir_print(
/*===========*/
- page_t* page, /* in: index page */
- ulint pr_n); /* in: print n first and n last entries */
-/*******************************************************************
+ page_t* page, /*!< in: index page */
+ ulint pr_n); /*!< in: print n first and n last entries */
+/***************************************************************//**
This is used to print the contents of the page record list for
debugging purposes. */
-
+UNIV_INTERN
void
page_print_list(
/*============*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: dictionary index of the page */
- ulint pr_n); /* in: print n first and n last entries */
-/*******************************************************************
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index, /*!< in: dictionary index of the page */
+ ulint pr_n); /*!< in: print n first and n last entries */
+/***************************************************************//**
Prints the info in a page header. */
-
+UNIV_INTERN
void
page_header_print(
/*==============*/
- page_t* page);
-/*******************************************************************
+ const page_t* page); /*!< in: index page */
+/***************************************************************//**
This is used to print the contents of the page for
debugging purposes. */
-
+UNIV_INTERN
void
page_print(
/*=======*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: dictionary index of the page */
- ulint dn, /* in: print dn first and last entries
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index, /*!< in: dictionary index of the page */
+ ulint dn, /*!< in: print dn first and last entries
in directory */
- ulint rn); /* in: print rn first and last records
+ ulint rn); /*!< in: print rn first and last records
in directory */
-/*******************************************************************
+/***************************************************************//**
The following is used to validate a record on a page. This function
differs from rec_validate as it can also check the n_owned field and
-the heap_no field. */
-
+the heap_no field.
+@return TRUE if ok */
+UNIV_INTERN
ibool
page_rec_validate(
/*==============*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
+ rec_t* rec, /*!< in: physical record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
bug fixed in 4.0.14 has caused corruption to users' databases. */
-
+UNIV_INTERN
void
page_check_dir(
/*===========*/
- page_t* page); /* in: index page */
-/*******************************************************************
+ const page_t* page); /*!< in: index page */
+/***************************************************************//**
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
-even if the page is total garbage. */
-
+even if the page is total garbage.
+@return TRUE if ok */
+UNIV_INTERN
ibool
-page_simple_validate(
-/*=================*/
- /* out: TRUE if ok */
- page_t* page); /* in: index page */
-/*******************************************************************
-This function checks the consistency of an index page. */
-
+page_simple_validate_old(
+/*=====================*/
+ page_t* page); /*!< in: old-style index page */
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_new(
+/*=====================*/
+ page_t* block); /*!< in: new-style index page */
+/***************************************************************//**
+This function checks the consistency of an index page.
+@return TRUE if ok */
+UNIV_INTERN
ibool
page_validate(
/*==========*/
- /* out: TRUE if ok */
- page_t* page, /* in: index page */
- dict_index_t* index); /* in: data dictionary index containing
+ page_t* page, /*!< in: index page */
+ dict_index_t* index); /*!< in: data dictionary index containing
the page record type definition */
-/*******************************************************************
-Looks in the page record list for a record with the given heap number. */
+/***************************************************************//**
+Looks in the page record list for a record with the given heap number.
+@return record, NULL if not found */
-rec_t*
+const rec_t*
page_find_rec_with_heap_no(
/*=======================*/
- /* out: record, NULL if not found */
- page_t* page, /* in: index page */
- ulint heap_no);/* in: heap number */
+ const page_t* page, /*!< in: index page */
+ ulint heap_no);/*!< in: heap number */
#ifdef UNIV_MATERIALIZE
#undef UNIV_INLINE
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index d9e67f3eeeb..8f794410f20 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,81 +1,117 @@
-/******************************************************
-Index page routines
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994-1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0page.ic
+Index page routines
Created 2/2/1994 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
-#include "rem0cmp.h"
+#ifdef UNIV_DEBUG
+# include "log0recv.h"
+#endif /* !UNIV_DEBUG */
+#ifndef UNIV_HOTBACKUP
+# include "rem0cmp.h"
+#endif /* !UNIV_HOTBACKUP */
#include "mtr0log.h"
+#include "page0zip.h"
#ifdef UNIV_MATERIALIZE
#undef UNIV_INLINE
#define UNIV_INLINE
#endif
-/****************************************************************
-Gets the start of a page. */
+/************************************************************//**
+Gets the start of a page.
+@return start of the page */
UNIV_INLINE
page_t*
page_align(
/*=======*/
- /* out: start of the page */
- void* ptr) /* in: pointer to page frame */
+ const void* ptr) /*!< in: pointer to page frame */
{
return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
}
-/****************************************************************
-Gets the offset within a page. */
+/************************************************************//**
+Gets the offset within a page.
+@return offset from the start of the page */
UNIV_INLINE
ulint
page_offset(
/*========*/
- /* out: offset from the start of the page */
- const void* ptr) /* in: pointer to page frame */
+ const void* ptr) /*!< in: pointer to page frame */
{
return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
}
-/*****************************************************************
+/*************************************************************//**
Returns the max trx id field value. */
UNIV_INLINE
-dulint
+trx_id_t
page_get_max_trx_id(
/*================*/
- page_t* page) /* in: page */
+ const page_t* page) /*!< in: page */
{
ut_ad(page);
return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
}
-/*****************************************************************
+/*************************************************************//**
Sets the max trx id field value if trx_id is bigger than the previous
value. */
UNIV_INLINE
void
page_update_max_trx_id(
/*===================*/
- page_t* page, /* in: page */
- dulint trx_id) /* in: transaction id */
+ buf_block_t* block, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ut_ad(page);
-
- if (ut_dulint_cmp(page_get_max_trx_id(page), trx_id) < 0) {
-
- page_set_max_trx_id(page, trx_id);
+ ut_ad(block);
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+ /* During crash recovery, this function may be called on
+ something else than a leaf page of a secondary index or the
+ insert buffer index tree (dict_index_is_sec_or_ibuf() returns
+ TRUE for the dummy indexes constructed during redo log
+ application). In that case, PAGE_MAX_TRX_ID is unused,
+ and trx_id is usually zero. */
+ ut_ad(!ut_dulint_is_zero(trx_id) || recv_recovery_is_on());
+ ut_ad(page_is_leaf(buf_block_get_frame(block)));
+
+ if (ut_dulint_cmp(page_get_max_trx_id(buf_block_get_frame(block)),
+ trx_id) < 0) {
+
+ page_set_max_trx_id(block, page_zip, trx_id, mtr);
}
}
-/*****************************************************************
+/*************************************************************//**
Reads the given header field. */
UNIV_INLINE
ulint
page_header_get_field(
/*==================*/
- page_t* page, /* in: page */
- ulint field) /* in: PAGE_LEVEL, ... */
+ const page_t* page, /*!< in: page */
+ ulint field) /*!< in: PAGE_LEVEL, ... */
{
ut_ad(page);
ut_ad(field <= PAGE_INDEX_ID);
@@ -83,15 +119,17 @@ page_header_get_field(
return(mach_read_from_2(page + PAGE_HEADER + field));
}
-/*****************************************************************
+/*************************************************************//**
Sets the given header field. */
UNIV_INLINE
void
page_header_set_field(
/*==================*/
- page_t* page, /* in: page */
- ulint field, /* in: PAGE_LEVEL, ... */
- ulint val) /* in: value */
+ page_t* page, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ ulint field, /*!< in: PAGE_N_DIR_SLOTS, ... */
+ ulint val) /*!< in: value */
{
ut_ad(page);
ut_ad(field <= PAGE_N_RECS);
@@ -99,17 +137,21 @@ page_header_set_field(
ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
mach_write_to_2(page + PAGE_HEADER + field, val);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page_zip_write_header(page_zip,
+ page + PAGE_HEADER + field, 2, NULL);
+ }
}
-/*****************************************************************
-Returns the pointer stored in the given header field. */
+/*************************************************************//**
+Returns the offset stored in the given header field.
+@return offset from the start of the page, or 0 */
UNIV_INLINE
-byte*
-page_header_get_ptr(
-/*================*/
- /* out: pointer or NULL */
- page_t* page, /* in: page */
- ulint field) /* in: PAGE_FREE, ... */
+ulint
+page_header_get_offs(
+/*=================*/
+ const page_t* page, /*!< in: page */
+ ulint field) /*!< in: PAGE_FREE, ... */
{
ulint offs;
@@ -122,23 +164,20 @@ page_header_get_ptr(
ut_ad((field != PAGE_HEAP_TOP) || offs);
- if (offs == 0) {
-
- return(NULL);
- }
-
- return(page + offs);
+ return(offs);
}
-/*****************************************************************
+/*************************************************************//**
Sets the pointer stored in the given header field. */
UNIV_INLINE
void
page_header_set_ptr(
/*================*/
- page_t* page, /* in: page */
- ulint field, /* in: PAGE_FREE, ... */
- byte* ptr) /* in: pointer or NULL*/
+ page_t* page, /*!< in: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ ulint field, /*!< in: PAGE_FREE, ... */
+ const byte* ptr) /*!< in: pointer or NULL*/
{
ulint offs;
@@ -155,95 +194,136 @@ page_header_set_ptr(
ut_ad((field != PAGE_HEAP_TOP) || offs);
- page_header_set_field(page, field, offs);
+ page_header_set_field(page, page_zip, field, offs);
}
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Resets the last insert info field in the page header. Writes to mlog
about this operation. */
UNIV_INLINE
void
page_header_reset_last_insert(
/*==========================*/
- page_t* page, /* in: page */
- mtr_t* mtr) /* in: mtr */
+ page_t* page, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(page && mtr);
- mlog_write_ulint(page + PAGE_HEADER + PAGE_LAST_INSERT, 0,
- MLOG_2BYTES, mtr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
+ page_zip_write_header(page_zip,
+ page + (PAGE_HEADER + PAGE_LAST_INSERT),
+ 2, mtr);
+ } else {
+ mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0,
+ MLOG_2BYTES, mtr);
+ }
}
+#endif /* !UNIV_HOTBACKUP */
-/****************************************************************
-Determine whether the page is in new-style compact format. */
+/************************************************************//**
+Determine whether the page is in new-style compact format.
+@return nonzero if the page is in compact format, zero if it is in
+old-style format */
UNIV_INLINE
ulint
page_is_comp(
/*=========*/
- /* out: nonzero if the page is in compact
- format, zero if it is in old-style format */
- page_t* page) /* in: index page */
+ const page_t* page) /*!< in: index page */
{
return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000,
0x8000));
}
-/****************************************************************
-TRUE if the record is on a page in compact format. */
+/************************************************************//**
+TRUE if the record is on a page in compact format.
+@return nonzero if in compact format */
UNIV_INLINE
ulint
page_rec_is_comp(
/*=============*/
- /* out: nonzero if in compact format */
- const rec_t* rec) /* in: record */
+ const rec_t* rec) /*!< in: record */
{
- return(page_is_comp(page_align((rec_t*) rec)));
+ return(page_is_comp(page_align(rec)));
}
-/****************************************************************
-Gets the first record on the page. */
+/***************************************************************//**
+Returns the heap number of a record.
+@return heap number */
UNIV_INLINE
-rec_t*
-page_get_infimum_rec(
+ulint
+page_rec_get_heap_no(
/*=================*/
- /* out: the first record in record list */
- page_t* page) /* in: page which must have record(s) */
+ const rec_t* rec) /*!< in: the physical record */
+{
+ if (page_rec_is_comp(rec)) {
+ return(rec_get_heap_no_new(rec));
+ } else {
+ return(rec_get_heap_no_old(rec));
+ }
+}
+
+/************************************************************//**
+Determine whether the page is a B-tree leaf.
+@return TRUE if the page is a B-tree leaf */
+UNIV_INLINE
+ibool
+page_is_leaf(
+/*=========*/
+ const page_t* page) /*!< in: page */
+{
+ return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
+}
+
+/************************************************************//**
+Gets the offset of the first record on the page.
+@return offset of the first record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_infimum_offset(
+/*====================*/
+ const page_t* page) /*!< in: page which must have record(s) */
{
ut_ad(page);
+ ut_ad(!page_offset(page));
if (page_is_comp(page)) {
- return(page + PAGE_NEW_INFIMUM);
+ return(PAGE_NEW_INFIMUM);
} else {
- return(page + PAGE_OLD_INFIMUM);
+ return(PAGE_OLD_INFIMUM);
}
}
-/****************************************************************
-Gets the last record on the page. */
+/************************************************************//**
+Gets the offset of the last record on the page.
+@return offset of the last record in record list, relative from page */
UNIV_INLINE
-rec_t*
-page_get_supremum_rec(
-/*==================*/
- /* out: the last record in record list */
- page_t* page) /* in: page which must have record(s) */
+ulint
+page_get_supremum_offset(
+/*=====================*/
+ const page_t* page) /*!< in: page which must have record(s) */
{
ut_ad(page);
+ ut_ad(!page_offset(page));
if (page_is_comp(page)) {
- return(page + PAGE_NEW_SUPREMUM);
+ return(PAGE_NEW_SUPREMUM);
} else {
- return(page + PAGE_OLD_SUPREMUM);
+ return(PAGE_OLD_SUPREMUM);
}
}
-/****************************************************************
-TRUE if the record is a user record on the page. */
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return TRUE if a user record */
UNIV_INLINE
ibool
page_rec_is_user_rec_low(
/*=====================*/
- /* out: TRUE if a user record */
- ulint offset) /* in: record offset on page */
+ ulint offset) /*!< in: record offset on page */
{
ut_ad(offset >= PAGE_NEW_INFIMUM);
#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM
@@ -272,14 +352,14 @@ page_rec_is_user_rec_low(
&& UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM));
}
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return TRUE if the supremum record */
UNIV_INLINE
ibool
page_rec_is_supremum_low(
/*=====================*/
- /* out: TRUE if the supremum record */
- ulint offset) /* in: record offset on page */
+ ulint offset) /*!< in: record offset on page */
{
ut_ad(offset >= PAGE_NEW_INFIMUM);
ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
@@ -288,14 +368,14 @@ page_rec_is_supremum_low(
|| UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM));
}
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return TRUE if the infimum record */
UNIV_INLINE
ibool
page_rec_is_infimum_low(
/*====================*/
- /* out: TRUE if the infimum record */
- ulint offset) /* in: record offset on page */
+ ulint offset) /*!< in: record offset on page */
{
ut_ad(offset >= PAGE_NEW_INFIMUM);
ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
@@ -304,65 +384,65 @@ page_rec_is_infimum_low(
|| UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM));
}
-/****************************************************************
-TRUE if the record is a user record on the page. */
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return TRUE if a user record */
UNIV_INLINE
ibool
page_rec_is_user_rec(
/*=================*/
- /* out: TRUE if a user record */
- const rec_t* rec) /* in: record */
+ const rec_t* rec) /*!< in: record */
{
return(page_rec_is_user_rec_low(page_offset(rec)));
}
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return TRUE if the supremum record */
UNIV_INLINE
ibool
page_rec_is_supremum(
/*=================*/
- /* out: TRUE if the supremum record */
- const rec_t* rec) /* in: record */
+ const rec_t* rec) /*!< in: record */
{
return(page_rec_is_supremum_low(page_offset(rec)));
}
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return TRUE if the infimum record */
UNIV_INLINE
ibool
page_rec_is_infimum(
/*================*/
- /* out: TRUE if the infimum record */
- const rec_t* rec) /* in: record */
+ const rec_t* rec) /*!< in: record */
{
return(page_rec_is_infimum_low(page_offset(rec)));
}
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Compares a data tuple to a physical record. Differs from the function
cmp_dtuple_rec_with_match in the way that the record must reside on an
index page, and also page infimum and supremum records can be given in
the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order. */
+the positive infinity in the alphabetical order.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
UNIV_INLINE
int
page_cmp_dtuple_rec_with_match(
/*===========================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record on a page; may also
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record on a page; may also
be page infimum or supremum, in which case
matched-parameter values below are not
affected */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields, /* in/out: number of already completely
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when function returns
contains the value for current comparison */
- ulint* matched_bytes) /* in/out: number of already matched
+ ulint* matched_bytes) /*!< in/out: number of already matched
bytes within the first field not completely
matched; when function returns contains the
value for current comparison */
@@ -388,163 +468,208 @@ page_cmp_dtuple_rec_with_match(
matched_fields,
matched_bytes));
}
+#endif /* !UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Gets the page number.
+@return page number */
+UNIV_INLINE
+ulint
+page_get_page_no(
+/*=============*/
+ const page_t* page) /*!< in: page */
+{
+ ut_ad(page == page_align((page_t*) page));
+ return(mach_read_from_4(page + FIL_PAGE_OFFSET));
+}
+
+/*************************************************************//**
+Gets the tablespace identifier.
+@return space id */
+UNIV_INLINE
+ulint
+page_get_space_id(
+/*==============*/
+ const page_t* page) /*!< in: page */
+{
+ ut_ad(page == page_align((page_t*) page));
+ return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+}
-/*****************************************************************
+/*************************************************************//**
Gets the number of user records on page (infimum and supremum records
-are not user records). */
+are not user records).
+@return number of user records */
UNIV_INLINE
ulint
page_get_n_recs(
/*============*/
- /* out: number of user records */
- page_t* page) /* in: index page */
+ const page_t* page) /*!< in: index page */
{
return(page_header_get_field(page, PAGE_N_RECS));
}
-/*****************************************************************
-Gets the number of dir slots in directory. */
+/*************************************************************//**
+Gets the number of dir slots in directory.
+@return number of slots */
UNIV_INLINE
ulint
page_dir_get_n_slots(
/*=================*/
- /* out: number of slots */
- page_t* page) /* in: index page */
+ const page_t* page) /*!< in: index page */
{
return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
}
-/*****************************************************************
+/*************************************************************//**
Sets the number of dir slots in directory. */
UNIV_INLINE
void
page_dir_set_n_slots(
/*=================*/
- /* out: number of slots */
- page_t* page, /* in: index page */
- ulint n_slots)/* in: number of slots */
+ page_t* page, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ ulint n_slots)/*!< in: number of slots */
{
- page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots);
+ page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
}
-/*****************************************************************
-Gets the number of records in the heap. */
+/*************************************************************//**
+Gets the number of records in the heap.
+@return number of user records */
UNIV_INLINE
ulint
page_dir_get_n_heap(
/*================*/
- /* out: number of user records */
- page_t* page) /* in: index page */
+ const page_t* page) /*!< in: index page */
{
return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
}
-/*****************************************************************
+/*************************************************************//**
Sets the number of records in the heap. */
UNIV_INLINE
void
page_dir_set_n_heap(
/*================*/
- page_t* page, /* in: index page */
- ulint n_heap) /* in: number of records */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL.
+ Note that the size of the dense page directory
+ in the compressed page trailer is
+ n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
+ ulint n_heap) /*!< in: number of records */
{
ut_ad(n_heap < 0x8000);
+ ut_ad(!page_zip || n_heap
+ == (page_header_get_field(page, PAGE_N_HEAP) & 0x7fff) + 1);
- page_header_set_field(page, PAGE_N_HEAP, n_heap
+ page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap
| (0x8000
& page_header_get_field(page, PAGE_N_HEAP)));
}
-/*****************************************************************
-Gets pointer to nth directory slot. */
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Gets pointer to nth directory slot.
+@return pointer to dir slot */
UNIV_INLINE
page_dir_slot_t*
page_dir_get_nth_slot(
/*==================*/
- /* out: pointer to dir slot */
- page_t* page, /* in: index page */
- ulint n) /* in: position */
+ const page_t* page, /*!< in: index page */
+ ulint n) /*!< in: position */
{
ut_ad(page_dir_get_n_slots(page) > n);
- return(page + UNIV_PAGE_SIZE - PAGE_DIR
+ return((page_dir_slot_t*)
+ page + UNIV_PAGE_SIZE - PAGE_DIR
- (n + 1) * PAGE_DIR_SLOT_SIZE);
}
+#endif /* UNIV_DEBUG */
-/******************************************************************
-Used to check the consistency of a record on a page. */
+/**************************************************************//**
+Used to check the consistency of a record on a page.
+@return TRUE if succeed */
UNIV_INLINE
ibool
page_rec_check(
/*===========*/
- /* out: TRUE if succeed */
- rec_t* rec) /* in: record */
+ const rec_t* rec) /*!< in: record */
{
- page_t* page;
+ const page_t* page = page_align(rec);
ut_a(rec);
- page = buf_frame_align(rec);
-
- ut_a(rec <= page_header_get_ptr(page, PAGE_HEAP_TOP));
- ut_a(rec >= page + PAGE_DATA);
+ ut_a(page_offset(rec) <= page_header_get_field(page, PAGE_HEAP_TOP));
+ ut_a(page_offset(rec) >= PAGE_DATA);
return(TRUE);
}
-/*******************************************************************
-Gets the record pointed to by a directory slot. */
+/***************************************************************//**
+Gets the record pointed to by a directory slot.
+@return pointer to record */
UNIV_INLINE
-rec_t*
+const rec_t*
page_dir_slot_get_rec(
/*==================*/
- /* out: pointer to record */
- page_dir_slot_t* slot) /* in: directory slot */
+ const page_dir_slot_t* slot) /*!< in: directory slot */
{
- return(buf_frame_align(slot) + mach_read_from_2(slot));
+ return(page_align(slot) + mach_read_from_2(slot));
}
-/*******************************************************************
+/***************************************************************//**
This is used to set the record offset in a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_rec(
/*==================*/
- page_dir_slot_t* slot, /* in: directory slot */
- rec_t* rec) /* in: record on the page */
+ page_dir_slot_t* slot, /*!< in: directory slot */
+ rec_t* rec) /*!< in: record on the page */
{
ut_ad(page_rec_check(rec));
mach_write_to_2(slot, page_offset(rec));
}
-/*******************************************************************
-Gets the number of records owned by a directory slot. */
+/***************************************************************//**
+Gets the number of records owned by a directory slot.
+@return number of records */
UNIV_INLINE
ulint
page_dir_slot_get_n_owned(
/*======================*/
- /* out: number of records */
- page_dir_slot_t* slot) /* in: page directory slot */
+ const page_dir_slot_t* slot) /*!< in: page directory slot */
{
- rec_t* rec = page_dir_slot_get_rec(slot);
- return(rec_get_n_owned(rec, page_rec_is_comp(rec)));
+ const rec_t* rec = page_dir_slot_get_rec(slot);
+ if (page_rec_is_comp(slot)) {
+ return(rec_get_n_owned_new(rec));
+ } else {
+ return(rec_get_n_owned_old(rec));
+ }
}
-/*******************************************************************
+/***************************************************************//**
This is used to set the owned records field of a directory slot. */
UNIV_INLINE
void
page_dir_slot_set_n_owned(
/*======================*/
- page_dir_slot_t* slot, /* in: directory slot */
- ulint n) /* in: number of records owned
- by the slot */
+ page_dir_slot_t*slot, /*!< in/out: directory slot */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint n) /*!< in: number of records owned by the slot */
{
- rec_t* rec = page_dir_slot_get_rec(slot);
- rec_set_n_owned(rec, page_rec_is_comp(rec), n);
+ rec_t* rec = (rec_t*) page_dir_slot_get_rec(slot);
+ if (page_rec_is_comp(slot)) {
+ rec_set_n_owned_new(rec, page_zip, n);
+ } else {
+ ut_ad(!page_zip);
+ rec_set_n_owned_old(rec, n);
+ }
}
-/****************************************************************
+/************************************************************//**
Calculates the space reserved for directory slots of a given number of
records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
@@ -552,41 +677,41 @@ UNIV_INLINE
ulint
page_dir_calc_reserved_space(
/*=========================*/
- ulint n_recs) /* in: number of records */
+ ulint n_recs) /*!< in: number of records */
{
return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
/ PAGE_DIR_SLOT_MIN_N_OWNED);
}
-/****************************************************************
-Gets the pointer to the next record on the page. */
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return pointer to next record */
UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
- /* out: pointer to next record */
- rec_t* rec) /* in: pointer to record */
+const rec_t*
+page_rec_get_next_low(
+/*==================*/
+ const rec_t* rec, /*!< in: pointer to record */
+ ulint comp) /*!< in: nonzero=compact page layout */
{
- ulint offs;
- page_t* page;
+ ulint offs;
+ const page_t* page;
ut_ad(page_rec_check(rec));
page = page_align(rec);
- offs = rec_get_next_offs(rec, page_is_comp(page));
+ offs = rec_get_next_offs(rec, comp);
if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) {
fprintf(stderr,
"InnoDB: Next record offset is nonsensical %lu"
" in record at offset %lu\n"
- "InnoDB: rec address %p, first buffer frame %p\n"
- "InnoDB: buffer pool high end %p, buf fix count %lu\n",
- (ulong)offs, (ulong)(rec - page),
- (void*) rec, (void*) buf_pool->frame_zero,
- (void*) buf_pool->high_end,
- (ulong) buf_block_align(rec)->buf_fix_count);
- buf_page_print(page);
+ "InnoDB: rec address %p, space id %lu, page %lu\n",
+ (ulong)offs, (ulong) page_offset(rec),
+ (void*) rec,
+ (ulong) page_get_space_id(page),
+ (ulong) page_get_page_no(page));
+ buf_page_print(page, 0);
ut_error;
}
@@ -599,49 +724,78 @@ page_rec_get_next(
return(page + offs);
}
-/****************************************************************
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return pointer to next record */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+ rec_t* rec) /*!< in: pointer to record */
+{
+ return((rec_t*) page_rec_get_next_low(rec, page_rec_is_comp(rec)));
+}
+
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_const(
+/*====================*/
+ const rec_t* rec) /*!< in: pointer to record */
+{
+ return(page_rec_get_next_low(rec, page_rec_is_comp(rec)));
+}
+
+/************************************************************//**
Sets the pointer to the next record on the page. */
UNIV_INLINE
void
page_rec_set_next(
/*==============*/
- rec_t* rec, /* in: pointer to record, must not be page supremum */
- rec_t* next) /* in: pointer to next record, must not be page
- infimum */
+ rec_t* rec, /*!< in: pointer to record,
+ must not be page supremum */
+ rec_t* next) /*!< in: pointer to next record,
+ must not be page infimum */
{
- page_t* page;
ulint offs;
ut_ad(page_rec_check(rec));
ut_ad(!page_rec_is_supremum(rec));
- page = page_align(rec);
+ ut_ad(rec != next);
+
+ ut_ad(!next || !page_rec_is_infimum(next));
+ ut_ad(!next || page_align(rec) == page_align(next));
- if (next) {
- ut_ad(!page_rec_is_infimum(next));
- ut_ad(page == page_align(next));
- offs = (ulint) (next - page);
+ if (UNIV_LIKELY(next != NULL)) {
+ offs = page_offset(next);
} else {
offs = 0;
}
- rec_set_next_offs(rec, page_is_comp(page), offs);
+ if (page_rec_is_comp(rec)) {
+ rec_set_next_offs_new(rec, offs);
+ } else {
+ rec_set_next_offs_old(rec, offs);
+ }
}
-/****************************************************************
-Gets the pointer to the previous record. */
+/************************************************************//**
+Gets the pointer to the previous record.
+@return pointer to previous record */
UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
- /* out: pointer to previous record */
- rec_t* rec) /* in: pointer to record, must not be page
- infimum */
+const rec_t*
+page_rec_get_prev_const(
+/*====================*/
+ const rec_t* rec) /*!< in: pointer to record, must not be page
+ infimum */
{
- page_dir_slot_t* slot;
+ const page_dir_slot_t* slot;
ulint slot_no;
- rec_t* rec2;
- rec_t* prev_rec = NULL;
- page_t* page;
+ const rec_t* rec2;
+ const rec_t* prev_rec = NULL;
+ const page_t* page;
ut_ad(page_rec_check(rec));
@@ -657,9 +811,16 @@ page_rec_get_prev(
rec2 = page_dir_slot_get_rec(slot);
- while (rec != rec2) {
- prev_rec = rec2;
- rec2 = page_rec_get_next(rec2);
+ if (page_is_comp(page)) {
+ while (rec != rec2) {
+ prev_rec = rec2;
+ rec2 = page_rec_get_next_low(rec2, TRUE);
+ }
+ } else {
+ while (rec != rec2) {
+ prev_rec = rec2;
+ rec2 = page_rec_get_next_low(rec2, FALSE);
+ }
}
ut_a(prev_rec);
@@ -667,23 +828,36 @@ page_rec_get_prev(
return(prev_rec);
}
-/*******************************************************************
-Looks for the record which owns the given record. */
+/************************************************************//**
+Gets the pointer to the previous record.
+@return pointer to previous record */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+ rec_t* rec) /*!< in: pointer to record, must not be page
+ infimum */
+{
+ return((rec_t*) page_rec_get_prev_const(rec));
+}
+
+/***************************************************************//**
+Looks for the record which owns the given record.
+@return the owner record */
UNIV_INLINE
rec_t*
page_rec_find_owner_rec(
/*====================*/
- /* out: the owner record */
- rec_t* rec) /* in: the physical record */
+ rec_t* rec) /*!< in: the physical record */
{
ut_ad(page_rec_check(rec));
if (page_rec_is_comp(rec)) {
- while (rec_get_n_owned(rec, TRUE) == 0) {
+ while (rec_get_n_owned_new(rec) == 0) {
rec = page_rec_get_next(rec);
}
} else {
- while (rec_get_n_owned(rec, FALSE) == 0) {
+ while (rec_get_n_owned_old(rec) == 0) {
rec = page_rec_get_next(rec);
}
}
@@ -691,15 +865,31 @@ page_rec_find_owner_rec(
return(rec);
}
-/****************************************************************
+/**********************************************************//**
+Returns the base extra size of a physical record. This is the
+size of the fixed header, independent of the record size.
+@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+UNIV_INLINE
+ulint
+page_rec_get_base_extra_size(
+/*=========================*/
+ const rec_t* rec) /*!< in: physical record */
+{
+#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
+# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
+#endif
+ return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
+}
+
+/************************************************************//**
Returns the sum of the sizes of the records in the record list, excluding
-the infimum and supremum records. */
+the infimum and supremum records.
+@return data in bytes */
UNIV_INLINE
ulint
page_get_data_size(
/*===============*/
- /* out: data in bytes */
- page_t* page) /* in: index page */
+ const page_t* page) /*!< in: index page */
{
ulint ret;
@@ -714,14 +904,48 @@ page_get_data_size(
return(ret);
}
-/*****************************************************************
-Calculates free space if a page is emptied. */
+
+/************************************************************//**
+Allocates a block of memory from the free list of an index page. */
+UNIV_INLINE
+void
+page_mem_alloc_free(
+/*================*/
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
+ space available for inserting the record,
+ or NULL */
+ rec_t* next_rec,/*!< in: pointer to the new head of the
+ free record list */
+ ulint need) /*!< in: number of bytes allocated */
+{
+ ulint garbage;
+
+#ifdef UNIV_DEBUG
+ const rec_t* old_rec = page_header_get_ptr(page, PAGE_FREE);
+ ulint next_offs;
+
+ ut_ad(old_rec);
+ next_offs = rec_get_next_offs(old_rec, page_is_comp(page));
+ ut_ad(next_rec == (next_offs ? page + next_offs : NULL));
+#endif
+
+ page_header_set_ptr(page, page_zip, PAGE_FREE, next_rec);
+
+ garbage = page_header_get_field(page, PAGE_GARBAGE);
+ ut_ad(garbage >= need);
+
+ page_header_set_field(page, page_zip, PAGE_GARBAGE, garbage - need);
+}
+
+/*************************************************************//**
+Calculates free space if a page is emptied.
+@return free space */
UNIV_INLINE
ulint
page_get_free_space_of_empty(
/*=========================*/
- /* out: free space */
- ulint comp) /* in: nonzero=compact page layout */
+ ulint comp) /*!< in: nonzero=compact page layout */
{
if (UNIV_LIKELY(comp)) {
return((ulint)(UNIV_PAGE_SIZE
@@ -736,20 +960,20 @@ page_get_free_space_of_empty(
- 2 * PAGE_DIR_SLOT_SIZE));
}
-/****************************************************************
+/************************************************************//**
Each user record on a page, and also the deleted user records in the heap
takes its size plus the fraction of the dir cell size /
PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
value of page_get_free_space_of_empty, the insert is impossible, otherwise
it is allowed. This function returns the maximum combined size of records
-which can be inserted on top of the record heap. */
+which can be inserted on top of the record heap.
+@return maximum combined size for inserted records */
UNIV_INLINE
ulint
page_get_max_insert_size(
/*=====================*/
- /* out: maximum combined size for inserted records */
- page_t* page, /* in: index page */
- ulint n_recs) /* in: number of records */
+ const page_t* page, /*!< in: index page */
+ ulint n_recs) /*!< in: number of records */
{
ulint occupied;
ulint free_space;
@@ -782,16 +1006,16 @@ page_get_max_insert_size(
return(free_space - occupied);
}
-/****************************************************************
+/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
-of the record heap if a page is first reorganized. */
+of the record heap if a page is first reorganized.
+@return maximum combined size for inserted records */
UNIV_INLINE
ulint
page_get_max_insert_size_after_reorganize(
/*======================================*/
- /* out: maximum combined size for inserted records */
- page_t* page, /* in: index page */
- ulint n_recs) /* in: number of records */
+ const page_t* page, /*!< in: index page */
+ ulint n_recs) /*!< in: number of records */
{
ulint occupied;
ulint free_space;
@@ -809,40 +1033,38 @@ page_get_max_insert_size_after_reorganize(
return(free_space - occupied);
}
-/****************************************************************
+/************************************************************//**
Puts a record to free list. */
UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: pointer to the (origin of) record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ rec_t* rec, /*!< in: pointer to the (origin of) record */
+ dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
rec_t* free;
ulint garbage;
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
+ ut_ad(rec_offs_validate(rec, index, offsets));
free = page_header_get_ptr(page, PAGE_FREE);
page_rec_set_next(rec, free);
- page_header_set_ptr(page, PAGE_FREE, rec);
-
-#if 0 /* It's better not to destroy the user's data. */
-
- /* Clear the data bytes of the deleted record in order to improve
- the compression ratio of the page and to make it easier to read
- page dumps in corruption reports. The extra bytes of the record
- cannot be cleared, because page_mem_alloc() needs them in order
- to determine the size of the deleted record. */
- memset(rec, 0, rec_offs_data_size(offsets));
-#endif
+ page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
garbage = page_header_get_field(page, PAGE_GARBAGE);
- page_header_set_field(page, PAGE_GARBAGE,
+ page_header_set_field(page, page_zip, PAGE_GARBAGE,
garbage + rec_offs_size(offsets));
+
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page_zip_dir_delete(page_zip, rec, index, offsets, free);
+ } else {
+ page_header_set_field(page, page_zip, PAGE_N_RECS,
+ page_get_n_recs(page) - 1);
+ }
}
#ifdef UNIV_MATERIALIZE
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
index 1fbeeb0f60f..d9a277bf208 100644
--- a/storage/innobase/include/page0types.h
+++ b/storage/innobase/include/page0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Index page routines
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994-1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0types.h
+Index page routines
Created 2/2/1994 Heikki Tuuri
*******************************************************/
@@ -10,13 +27,124 @@ Created 2/2/1994 Heikki Tuuri
#define page0types_h
#include "univ.i"
+#include "dict0types.h"
+#include "mtr0types.h"
-/* Type of the index page */
-/* The following define eliminates a name collision on HP-UX */
+/** Eliminates a name collision on HP-UX */
#define page_t ib_page_t
+/** Type of the index page */
typedef byte page_t;
-typedef struct page_search_struct page_search_t;
+/** Index page cursor */
typedef struct page_cur_struct page_cur_t;
+/** Compressed index page */
+typedef byte page_zip_t;
+/** Compressed page descriptor */
+typedef struct page_zip_des_struct page_zip_des_t;
+
+/* The following definitions would better belong to page0zip.h,
+but we cannot include page0zip.h from rem0rec.ic, because
+page0*.h includes rem0rec.h and may include rem0rec.ic. */
+
+/** Number of bits needed for representing different compressed page sizes */
+#define PAGE_ZIP_SSIZE_BITS 3
+
+/** log2 of smallest compressed page size */
+#define PAGE_ZIP_MIN_SIZE_SHIFT 10
+/** Smallest compressed page size */
+#define PAGE_ZIP_MIN_SIZE (1 << PAGE_ZIP_MIN_SIZE_SHIFT)
+
+/** Number of supported compressed page sizes */
+#define PAGE_ZIP_NUM_SSIZE (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
+#if PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)
+# error "PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)"
+#endif
+
+/** Compressed page descriptor */
+struct page_zip_des_struct
+{
+ page_zip_t* data; /*!< compressed page data */
+
+#ifdef UNIV_DEBUG
+ unsigned m_start:16; /*!< start offset of modification log */
+#endif /* UNIV_DEBUG */
+ unsigned m_end:16; /*!< end offset of modification log */
+ unsigned m_nonempty:1; /*!< TRUE if the modification log
+ is not empty */
+ unsigned n_blobs:12; /*!< number of externally stored
+ columns on the page; the maximum
+ is 744 on a 16 KiB page */
+ unsigned ssize:PAGE_ZIP_SSIZE_BITS;
+ /*!< 0 or compressed page size;
+ the size in bytes is
+ PAGE_ZIP_MIN_SIZE << (ssize - 1). */
+};
+
+/** Compression statistics for a given page size */
+struct page_zip_stat_struct {
+ /** Number of page compressions */
+ ulint compressed;
+ /** Number of successful page compressions */
+ ulint compressed_ok;
+ /** Number of page decompressions */
+ ulint decompressed;
+ /** Duration of page compressions in microseconds */
+ ib_uint64_t compressed_usec;
+ /** Duration of page decompressions in microseconds */
+ ib_uint64_t decompressed_usec;
+};
+
+/** Compression statistics */
+typedef struct page_zip_stat_struct page_zip_stat_t;
+
+/** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */
+extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
+
+/**********************************************************************//**
+Write the "deleted" flag of a record on a compressed page. The flag must
+already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_deleted(
+/*=====================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ const byte* rec, /*!< in: record on the uncompressed page */
+ ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
+ __attribute__((nonnull));
+
+/**********************************************************************//**
+Write the "owned" flag of a record on a compressed page. The n_owned field
+must already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_owned(
+/*===================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ const byte* rec, /*!< in: record on the uncompressed page */
+ ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
+ __attribute__((nonnull));
+
+/**********************************************************************//**
+Shift the dense page directory when a record is deleted. */
+UNIV_INTERN
+void
+page_zip_dir_delete(
+/*================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ byte* rec, /*!< in: deleted record */
+ dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec) */
+ const byte* free) /*!< in: previous start of the free list */
+ __attribute__((nonnull(1,2,3,4)));
+/**********************************************************************//**
+Add a slot to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_add_slot(
+/*==================*/
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ ulint is_clustered) /*!< in: nonzero for clustered index,
+ zero for others */
+ __attribute__((nonnull));
#endif
diff --git a/storage/innodb_plugin/include/page0zip.h b/storage/innobase/include/page0zip.h
index 574809e5227..574809e5227 100644
--- a/storage/innodb_plugin/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
diff --git a/storage/innodb_plugin/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index 75cc7a9fcc4..75cc7a9fcc4 100644
--- a/storage/innodb_plugin/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
diff --git a/storage/innobase/include/pars0grm.h b/storage/innobase/include/pars0grm.h
index 0062b8314ee..3de233eed3a 100644
--- a/storage/innobase/include/pars0grm.h
+++ b/storage/innobase/include/pars0grm.h
@@ -1,27 +1,29 @@
-/* A Bison parser, made by GNU Bison 1.875d. */
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
-/* Skeleton parser for Yacc-like parsing with Bison,
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+As a special exception, when this file is copied by Bison into a
+Bison output file, you may use that output file without restriction.
+This special exception was added by the Free Software Foundation
+in version 1.24 of Bison.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-/* As a special exception, when this file is copied by Bison into a
- Bison output file, you may use that output file without restriction.
- This special exception was added by the Free Software Foundation
- in version 1.24 of Bison. */
+*****************************************************************************/
+
+/* A Bison parser, made by GNU Bison 1.875d. */
/* Tokens. */
#ifndef YYTOKENTYPE
diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h
index ff92cc062d9..42d956068f8 100644
--- a/storage/innobase/include/pars0opt.h
+++ b/storage/innobase/include/pars0opt.h
@@ -1,7 +1,24 @@
-/******************************************************
-Simple SQL optimizer
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1997 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0opt.h
+Simple SQL optimizer
Created 12/21/1997 Heikki Tuuri
*******************************************************/
@@ -16,40 +33,40 @@ Created 12/21/1997 Heikki Tuuri
#include "dict0types.h"
#include "row0sel.h"
-/***********************************************************************
+/*******************************************************************//**
Optimizes a select. Decides which indexes to tables to use. The tables
are accessed in the order that they were written to the FROM part in the
select statement. */
-
+UNIV_INTERN
void
opt_search_plan(
/*============*/
- sel_node_t* sel_node); /* in: parsed select node */
-/***********************************************************************
+ sel_node_t* sel_node); /*!< in: parsed select node */
+/*******************************************************************//**
Looks for occurrences of the columns of the table in the query subgraph and
adds them to the list of columns if an occurrence of the same column does not
already exist in the list. If the column is already in the list, puts a value
indirection to point to the occurrence in the column list, except if the
column occurrence we are looking at is in the column list, in which case
nothing is done. */
-
+UNIV_INTERN
void
opt_find_all_cols(
/*==============*/
- ibool copy_val, /* in: if TRUE, new found columns are
+ ibool copy_val, /*!< in: if TRUE, new found columns are
added as columns to copy */
- dict_index_t* index, /* in: index to use */
- sym_node_list_t* col_list, /* in: base node of a list where
+ dict_index_t* index, /*!< in: index to use */
+ sym_node_list_t* col_list, /*!< in: base node of a list where
to add new found columns */
- plan_t* plan, /* in: plan or NULL */
- que_node_t* exp); /* in: expression or condition */
-/************************************************************************
+ plan_t* plan, /*!< in: plan or NULL */
+ que_node_t* exp); /*!< in: expression or condition */
+/********************************************************************//**
Prints info of a query plan. */
-
+UNIV_INTERN
void
opt_print_query_plan(
/*=================*/
- sel_node_t* sel_node); /* in: select node */
+ sel_node_t* sel_node); /*!< in: select node */
#ifndef UNIV_NONINL
#include "pars0opt.ic"
diff --git a/storage/innobase/include/pars0opt.ic b/storage/innobase/include/pars0opt.ic
index 0bfa8526bee..e0bb6bf1af2 100644
--- a/storage/innobase/include/pars0opt.ic
+++ b/storage/innobase/include/pars0opt.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Simple SQL optimizer
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0opt.ic
+Simple SQL optimizer
Created 12/21/1997 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index 1c6c550d313..fe5d76ebbb0 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0pars.h
+SQL parser
Created 11/19/1996 Heikki Tuuri
*******************************************************/
@@ -17,18 +34,19 @@ Created 11/19/1996 Heikki Tuuri
#include "trx0types.h"
#include "ut0vec.h"
-/* Type of the user functions. The first argument is always InnoDB-supplied
+/** Type of the user functions. The first argument is always InnoDB-supplied
and varies in type, while 'user_arg' is a user-supplied argument. The
meaning of the return type also varies. See the individual use cases, e.g.
the FETCH statement, for details on them. */
typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg);
+/** If the following is set TRUE, the parser will emit debugging
+information */
extern int yydebug;
-/* If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-
#ifdef UNIV_SQL_DEBUG
+/** If the following is set TRUE, the lexer will print the SQL string
+as it tokenizes it */
extern ibool pars_print_lexed;
#endif /* UNIV_SQL_DEBUG */
@@ -77,315 +95,309 @@ extern ulint pars_star_denoter;
int
yyparse(void);
-/*****************************************************************
-Parses an SQL string returning the query graph. */
-
+/*************************************************************//**
+Parses an SQL string returning the query graph.
+@return own: the query graph */
+UNIV_INTERN
que_t*
pars_sql(
/*=====*/
- /* out, own: the query graph */
- pars_info_t* info, /* in: extra information, or NULL */
- const char* str); /* in: SQL string */
-/*****************************************************************
+ pars_info_t* info, /*!< in: extra information, or NULL */
+ const char* str); /*!< in: SQL string */
+/*************************************************************//**
Retrieves characters to the lexical analyzer. */
-
+UNIV_INTERN
void
pars_get_lex_chars(
/*===============*/
- char* buf, /* in/out: buffer where to copy */
- int* result, /* out: number of characters copied or EOF */
- int max_size); /* in: maximum number of characters which fit
+ char* buf, /*!< in/out: buffer where to copy */
+ int* result, /*!< out: number of characters copied or EOF */
+ int max_size); /*!< in: maximum number of characters which fit
in the buffer */
-/*****************************************************************
+/*************************************************************//**
Called by yyparse on error. */
-
+UNIV_INTERN
void
yyerror(
/*====*/
- const char* s); /* in: error message string */
-/*************************************************************************
-Parses a variable declaration. */
-
+ const char* s); /*!< in: error message string */
+/*********************************************************************//**
+Parses a variable declaration.
+@return own: symbol table node of type SYM_VAR */
+UNIV_INTERN
sym_node_t*
pars_variable_declaration(
/*======================*/
- /* out, own: symbol table node of type
- SYM_VAR */
- sym_node_t* node, /* in: symbol table node allocated for the
+ sym_node_t* node, /*!< in: symbol table node allocated for the
id of the variable */
- pars_res_word_t* type); /* in: pointer to a type token */
-/*************************************************************************
-Parses a function expression. */
-
+ pars_res_word_t* type); /*!< in: pointer to a type token */
+/*********************************************************************//**
+Parses a function expression.
+@return own: function node in a query tree */
+UNIV_INTERN
func_node_t*
pars_func(
/*======*/
- /* out, own: function node in a query tree */
- que_node_t* res_word,/* in: function name reserved word */
- que_node_t* arg); /* in: first argument in the argument list */
-/*************************************************************************
-Parses an operator expression. */
-
+ que_node_t* res_word,/*!< in: function name reserved word */
+ que_node_t* arg); /*!< in: first argument in the argument list */
+/*********************************************************************//**
+Parses an operator expression.
+@return own: function node in a query tree */
+UNIV_INTERN
func_node_t*
pars_op(
/*====*/
- /* out, own: function node in a query tree */
- int func, /* in: operator token code */
- que_node_t* arg1, /* in: first argument */
- que_node_t* arg2); /* in: second argument or NULL for an unary
+ int func, /*!< in: operator token code */
+ que_node_t* arg1, /*!< in: first argument */
+ que_node_t* arg2); /*!< in: second argument or NULL for an unary
operator */
-/*************************************************************************
-Parses an ORDER BY clause. Order by a single column only is supported. */
-
+/*********************************************************************//**
+Parses an ORDER BY clause. Order by a single column only is supported.
+@return own: order-by node in a query tree */
+UNIV_INTERN
order_node_t*
pars_order_by(
/*==========*/
- /* out, own: order-by node in a query tree */
- sym_node_t* column, /* in: column name */
- pars_res_word_t* asc); /* in: &pars_asc_token or pars_desc_token */
-/*************************************************************************
+ sym_node_t* column, /*!< in: column name */
+ pars_res_word_t* asc); /*!< in: &pars_asc_token or pars_desc_token */
+/*********************************************************************//**
Parses a select list; creates a query graph node for the whole SELECT
-statement. */
-
+statement.
+@return own: select node in a query tree */
+UNIV_INTERN
sel_node_t*
pars_select_list(
/*=============*/
- /* out, own: select node in a query
- tree */
- que_node_t* select_list, /* in: select list */
- sym_node_t* into_list); /* in: variables list or NULL */
-/*************************************************************************
-Parses a cursor declaration. */
-
+ que_node_t* select_list, /*!< in: select list */
+ sym_node_t* into_list); /*!< in: variables list or NULL */
+/*********************************************************************//**
+Parses a cursor declaration.
+@return sym_node */
+UNIV_INTERN
que_node_t*
pars_cursor_declaration(
/*====================*/
- /* out: sym_node */
- sym_node_t* sym_node, /* in: cursor id node in the symbol
+ sym_node_t* sym_node, /*!< in: cursor id node in the symbol
table */
- sel_node_t* select_node); /* in: select node */
-/*************************************************************************
-Parses a function declaration. */
-
+ sel_node_t* select_node); /*!< in: select node */
+/*********************************************************************//**
+Parses a function declaration.
+@return sym_node */
+UNIV_INTERN
que_node_t*
pars_function_declaration(
/*======================*/
- /* out: sym_node */
- sym_node_t* sym_node); /* in: function id node in the symbol
+ sym_node_t* sym_node); /*!< in: function id node in the symbol
table */
-/*************************************************************************
-Parses a select statement. */
-
+/*********************************************************************//**
+Parses a select statement.
+@return own: select node in a query tree */
+UNIV_INTERN
sel_node_t*
pars_select_statement(
/*==================*/
- /* out, own: select node in a query
- tree */
- sel_node_t* select_node, /* in: select node already containing
+ sel_node_t* select_node, /*!< in: select node already containing
the select list */
- sym_node_t* table_list, /* in: table list */
- que_node_t* search_cond, /* in: search condition or NULL */
- pars_res_word_t* for_update, /* in: NULL or &pars_update_token */
- pars_res_word_t* consistent_read,/* in: NULL or
+ sym_node_t* table_list, /*!< in: table list */
+ que_node_t* search_cond, /*!< in: search condition or NULL */
+ pars_res_word_t* for_update, /*!< in: NULL or &pars_update_token */
+ pars_res_word_t* consistent_read,/*!< in: NULL or
&pars_consistent_token */
- order_node_t* order_by); /* in: NULL or an order-by node */
-/*************************************************************************
-Parses a column assignment in an update. */
-
+ order_node_t* order_by); /*!< in: NULL or an order-by node */
+/*********************************************************************//**
+Parses a column assignment in an update.
+@return column assignment node */
+UNIV_INTERN
col_assign_node_t*
pars_column_assignment(
/*===================*/
- /* out: column assignment node */
- sym_node_t* column, /* in: column to assign */
- que_node_t* exp); /* in: value to assign */
-/*************************************************************************
-Parses a delete or update statement start. */
-
+ sym_node_t* column, /*!< in: column to assign */
+ que_node_t* exp); /*!< in: value to assign */
+/*********************************************************************//**
+Parses a delete or update statement start.
+@return own: update node in a query tree */
+UNIV_INTERN
upd_node_t*
pars_update_statement_start(
/*========================*/
- /* out, own: update node in a query
- tree */
- ibool is_delete, /* in: TRUE if delete */
- sym_node_t* table_sym, /* in: table name node */
- col_assign_node_t* col_assign_list);/* in: column assignment list, NULL
+ ibool is_delete, /*!< in: TRUE if delete */
+ sym_node_t* table_sym, /*!< in: table name node */
+ col_assign_node_t* col_assign_list);/*!< in: column assignment list, NULL
if delete */
-/*************************************************************************
-Parses an update or delete statement. */
-
+/*********************************************************************//**
+Parses an update or delete statement.
+@return own: update node in a query tree */
+UNIV_INTERN
upd_node_t*
pars_update_statement(
/*==================*/
- /* out, own: update node in a query
- tree */
- upd_node_t* node, /* in: update node */
- sym_node_t* cursor_sym, /* in: pointer to a cursor entry in
+ upd_node_t* node, /*!< in: update node */
+ sym_node_t* cursor_sym, /*!< in: pointer to a cursor entry in
the symbol table or NULL */
- que_node_t* search_cond); /* in: search condition or NULL */
-/*************************************************************************
-Parses an insert statement. */
-
+ que_node_t* search_cond); /*!< in: search condition or NULL */
+/*********************************************************************//**
+Parses an insert statement.
+@return own: update node in a query tree */
+UNIV_INTERN
ins_node_t*
pars_insert_statement(
/*==================*/
- /* out, own: update node in a query
- tree */
- sym_node_t* table_sym, /* in: table name node */
- que_node_t* values_list, /* in: value expression list or NULL */
- sel_node_t* select); /* in: select condition or NULL */
-/*************************************************************************
-Parses a procedure parameter declaration. */
-
+ sym_node_t* table_sym, /*!< in: table name node */
+ que_node_t* values_list, /*!< in: value expression list or NULL */
+ sel_node_t* select); /*!< in: select condition or NULL */
+/*********************************************************************//**
+Parses a procedure parameter declaration.
+@return own: symbol table node of type SYM_VAR */
+UNIV_INTERN
sym_node_t*
pars_parameter_declaration(
/*=======================*/
- /* out, own: symbol table node of type
- SYM_VAR */
- sym_node_t* node, /* in: symbol table node allocated for the
+ sym_node_t* node, /*!< in: symbol table node allocated for the
id of the parameter */
ulint param_type,
- /* in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type); /* in: pointer to a type token */
-/*************************************************************************
-Parses an elsif element. */
-
+ /*!< in: PARS_INPUT or PARS_OUTPUT */
+ pars_res_word_t* type); /*!< in: pointer to a type token */
+/*********************************************************************//**
+Parses an elsif element.
+@return elsif node */
+UNIV_INTERN
elsif_node_t*
pars_elsif_element(
/*===============*/
- /* out: elsif node */
- que_node_t* cond, /* in: if-condition */
- que_node_t* stat_list); /* in: statement list */
-/*************************************************************************
-Parses an if-statement. */
-
+ que_node_t* cond, /*!< in: if-condition */
+ que_node_t* stat_list); /*!< in: statement list */
+/*********************************************************************//**
+Parses an if-statement.
+@return if-statement node */
+UNIV_INTERN
if_node_t*
pars_if_statement(
/*==============*/
- /* out: if-statement node */
- que_node_t* cond, /* in: if-condition */
- que_node_t* stat_list, /* in: statement list */
- que_node_t* else_part); /* in: else-part statement list */
-/*************************************************************************
-Parses a for-loop-statement. */
-
+ que_node_t* cond, /*!< in: if-condition */
+ que_node_t* stat_list, /*!< in: statement list */
+ que_node_t* else_part); /*!< in: else-part statement list */
+/*********************************************************************//**
+Parses a for-loop-statement.
+@return for-statement node */
+UNIV_INTERN
for_node_t*
pars_for_statement(
/*===============*/
- /* out: for-statement node */
- sym_node_t* loop_var, /* in: loop variable */
- que_node_t* loop_start_limit,/* in: loop start expression */
- que_node_t* loop_end_limit, /* in: loop end expression */
- que_node_t* stat_list); /* in: statement list */
-/*************************************************************************
-Parses a while-statement. */
-
+ sym_node_t* loop_var, /*!< in: loop variable */
+ que_node_t* loop_start_limit,/*!< in: loop start expression */
+ que_node_t* loop_end_limit, /*!< in: loop end expression */
+ que_node_t* stat_list); /*!< in: statement list */
+/*********************************************************************//**
+Parses a while-statement.
+@return while-statement node */
+UNIV_INTERN
while_node_t*
pars_while_statement(
/*=================*/
- /* out: while-statement node */
- que_node_t* cond, /* in: while-condition */
- que_node_t* stat_list); /* in: statement list */
-/*************************************************************************
-Parses an exit statement. */
-
+ que_node_t* cond, /*!< in: while-condition */
+ que_node_t* stat_list); /*!< in: statement list */
+/*********************************************************************//**
+Parses an exit statement.
+@return exit statement node */
+UNIV_INTERN
exit_node_t*
pars_exit_statement(void);
/*=====================*/
- /* out: exit statement node */
-/*************************************************************************
-Parses a return-statement. */
-
+/*********************************************************************//**
+Parses a return-statement.
+@return return-statement node */
+UNIV_INTERN
return_node_t*
pars_return_statement(void);
/*=======================*/
- /* out: return-statement node */
-/*************************************************************************
-Parses a procedure call. */
-
+/*********************************************************************//**
+Parses a procedure call.
+@return function node */
+UNIV_INTERN
func_node_t*
pars_procedure_call(
/*================*/
- /* out: function node */
- que_node_t* res_word,/* in: procedure name reserved word */
- que_node_t* args); /* in: argument list */
-/*************************************************************************
-Parses an assignment statement. */
-
+ que_node_t* res_word,/*!< in: procedure name reserved word */
+ que_node_t* args); /*!< in: argument list */
+/*********************************************************************//**
+Parses an assignment statement.
+@return assignment statement node */
+UNIV_INTERN
assign_node_t*
pars_assignment_statement(
/*======================*/
- /* out: assignment statement node */
- sym_node_t* var, /* in: variable to assign */
- que_node_t* val); /* in: value to assign */
-/*************************************************************************
+ sym_node_t* var, /*!< in: variable to assign */
+ que_node_t* val); /*!< in: value to assign */
+/*********************************************************************//**
Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL. */
-
+non-NULL.
+@return fetch statement node */
+UNIV_INTERN
fetch_node_t*
pars_fetch_statement(
/*=================*/
- /* out: fetch statement node */
- sym_node_t* cursor, /* in: cursor node */
- sym_node_t* into_list, /* in: variables to set, or NULL */
- sym_node_t* user_func); /* in: user function name, or NULL */
-/*************************************************************************
-Parses an open or close cursor statement. */
-
+ sym_node_t* cursor, /*!< in: cursor node */
+ sym_node_t* into_list, /*!< in: variables to set, or NULL */
+ sym_node_t* user_func); /*!< in: user function name, or NULL */
+/*********************************************************************//**
+Parses an open or close cursor statement.
+@return fetch statement node */
+UNIV_INTERN
open_node_t*
pars_open_statement(
/*================*/
- /* out: fetch statement node */
- ulint type, /* in: ROW_SEL_OPEN_CURSOR
+ ulint type, /*!< in: ROW_SEL_OPEN_CURSOR
or ROW_SEL_CLOSE_CURSOR */
- sym_node_t* cursor); /* in: cursor node */
-/*************************************************************************
-Parses a row_printf-statement. */
-
+ sym_node_t* cursor); /*!< in: cursor node */
+/*********************************************************************//**
+Parses a row_printf-statement.
+@return row_printf-statement node */
+UNIV_INTERN
row_printf_node_t*
pars_row_printf_statement(
/*======================*/
- /* out: row_printf-statement node */
- sel_node_t* sel_node); /* in: select node */
-/*************************************************************************
-Parses a commit statement. */
-
+ sel_node_t* sel_node); /*!< in: select node */
+/*********************************************************************//**
+Parses a commit statement.
+@return own: commit node struct */
+UNIV_INTERN
commit_node_t*
pars_commit_statement(void);
/*=======================*/
-/*************************************************************************
-Parses a rollback statement. */
-
+/*********************************************************************//**
+Parses a rollback statement.
+@return own: rollback node struct */
+UNIV_INTERN
roll_node_t*
pars_rollback_statement(void);
/*=========================*/
-/*************************************************************************
-Parses a column definition at a table creation. */
-
+/*********************************************************************//**
+Parses a column definition at a table creation.
+@return column sym table node */
+UNIV_INTERN
sym_node_t*
pars_column_def(
/*============*/
- /* out: column sym table
- node */
- sym_node_t* sym_node, /* in: column node in the
+ sym_node_t* sym_node, /*!< in: column node in the
symbol table */
- pars_res_word_t* type, /* in: data type */
- sym_node_t* len, /* in: length of column, or
+ pars_res_word_t* type, /*!< in: data type */
+ sym_node_t* len, /*!< in: length of column, or
NULL */
- void* is_unsigned, /* in: if not NULL, column
+ void* is_unsigned, /*!< in: if not NULL, column
is of type UNSIGNED. */
- void* is_not_null); /* in: if not NULL, column
+ void* is_not_null); /*!< in: if not NULL, column
is of type NOT NULL. */
-/*************************************************************************
-Parses a table creation operation. */
-
+/*********************************************************************//**
+Parses a table creation operation.
+@return table create subgraph */
+UNIV_INTERN
tab_node_t*
pars_create_table(
/*==============*/
- /* out: table create subgraph */
- sym_node_t* table_sym, /* in: table name node in the symbol
+ sym_node_t* table_sym, /*!< in: table name node in the symbol
table */
- sym_node_t* column_defs, /* in: list of column names */
- void* not_fit_in_memory);/* in: a non-NULL pointer means that
+ sym_node_t* column_defs, /*!< in: list of column names */
+ void* not_fit_in_memory);/*!< in: a non-NULL pointer means that
this is a table which in simulations
should be simulated as not fitting
in memory; thread is put to sleep
@@ -395,99 +407,99 @@ pars_create_table(
will forget about non-NULL value if
it has to reload the table definition
from disk */
-/*************************************************************************
-Parses an index creation operation. */
-
+/*********************************************************************//**
+Parses an index creation operation.
+@return index create subgraph */
+UNIV_INTERN
ind_node_t*
pars_create_index(
/*==============*/
- /* out: index create subgraph */
- pars_res_word_t* unique_def, /* in: not NULL if a unique index */
- pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */
- sym_node_t* index_sym, /* in: index name node in the symbol
+ pars_res_word_t* unique_def, /*!< in: not NULL if a unique index */
+ pars_res_word_t* clustered_def, /*!< in: not NULL if a clustered index */
+ sym_node_t* index_sym, /*!< in: index name node in the symbol
table */
- sym_node_t* table_sym, /* in: table name node in the symbol
+ sym_node_t* table_sym, /*!< in: table name node in the symbol
table */
- sym_node_t* column_list); /* in: list of column names */
-/*************************************************************************
-Parses a procedure definition. */
-
+ sym_node_t* column_list); /*!< in: list of column names */
+/*********************************************************************//**
+Parses a procedure definition.
+@return query fork node */
+UNIV_INTERN
que_fork_t*
pars_procedure_definition(
/*======================*/
- /* out: query fork node */
- sym_node_t* sym_node, /* in: procedure id node in the symbol
+ sym_node_t* sym_node, /*!< in: procedure id node in the symbol
table */
- sym_node_t* param_list, /* in: parameter declaration list */
- que_node_t* stat_list); /* in: statement list */
+ sym_node_t* param_list, /*!< in: parameter declaration list */
+ que_node_t* stat_list); /*!< in: statement list */
-/*****************************************************************
+/*************************************************************//**
Parses a stored procedure call, when this is not within another stored
procedure, that is, the client issues a procedure call directly.
In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used. */
-
+parsed procedure tree, not via InnoDB SQL, so this function is not used.
+@return query graph */
+UNIV_INTERN
que_fork_t*
pars_stored_procedure_call(
/*=======================*/
- /* out: query graph */
- sym_node_t* sym_node); /* in: stored procedure name */
-/**********************************************************************
+ sym_node_t* sym_node); /*!< in: stored procedure name */
+/******************************************************************//**
Completes a query graph by adding query thread and fork nodes
above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE. */
-
+type QUE_FORK_MYSQL_INTERFACE.
+@return query thread node to run */
+UNIV_INTERN
que_thr_t*
pars_complete_graph_for_exec(
/*=========================*/
- /* out: query thread node to run */
- que_node_t* node, /* in: root node for an incomplete
+ que_node_t* node, /*!< in: root node for an incomplete
query graph */
- trx_t* trx, /* in: transaction handle */
- mem_heap_t* heap); /* in: memory heap from which allocated */
-
-/********************************************************************
-Create parser info struct.*/
+ trx_t* trx, /*!< in: transaction handle */
+ mem_heap_t* heap); /*!< in: memory heap from which allocated */
+/****************************************************************//**
+Create parser info struct.
+@return own: info struct */
+UNIV_INTERN
pars_info_t*
pars_info_create(void);
/*==================*/
- /* out, own: info struct */
-
-/********************************************************************
-Free info struct and everything it contains.*/
+/****************************************************************//**
+Free info struct and everything it contains. */
+UNIV_INTERN
void
pars_info_free(
/*===========*/
- pars_info_t* info); /* in: info struct */
+ pars_info_t* info); /*!< in, own: info struct */
-/********************************************************************
+/****************************************************************//**
Add bound literal. */
-
+UNIV_INTERN
void
pars_info_add_literal(
/*==================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const void* address, /* in: address */
- ulint length, /* in: length of data */
- ulint type, /* in: type, e.g. DATA_FIXBINARY */
- ulint prtype); /* in: precise type, e.g.
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const void* address, /*!< in: address */
+ ulint length, /*!< in: length of data */
+ ulint type, /*!< in: type, e.g. DATA_FIXBINARY */
+ ulint prtype); /*!< in: precise type, e.g.
DATA_UNSIGNED */
-/********************************************************************
+/****************************************************************//**
Equivalent to pars_info_add_literal(info, name, str, strlen(str),
DATA_VARCHAR, DATA_ENGLISH). */
-
+UNIV_INTERN
void
pars_info_add_str_literal(
/*======================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const char* str); /* in: string */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const char* str); /*!< in: string */
-/********************************************************************
+/****************************************************************//**
Equivalent to:
char buf[4];
@@ -496,15 +508,15 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-
+UNIV_INTERN
void
pars_info_add_int4_literal(
/*=======================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- lint val); /* in: value */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ lint val); /*!< in: value */
-/********************************************************************
+/****************************************************************//**
Equivalent to:
char buf[8];
@@ -513,216 +525,221 @@ pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-
+UNIV_INTERN
void
pars_info_add_dulint_literal(
/*=========================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- dulint val); /* in: value */
-/********************************************************************
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ dulint val); /*!< in: value */
+/****************************************************************//**
Add user function. */
-
+UNIV_INTERN
void
pars_info_add_function(
/*===================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: function name */
- pars_user_func_cb_t func, /* in: function address */
- void* arg); /* in: user-supplied argument */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: function name */
+ pars_user_func_cb_t func, /*!< in: function address */
+ void* arg); /*!< in: user-supplied argument */
-/********************************************************************
+/****************************************************************//**
Add bound id. */
-
+UNIV_INTERN
void
pars_info_add_id(
/*=============*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const char* id); /* in: id */
-
-/********************************************************************
-Get user function with the given name.*/
-
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const char* id); /*!< in: id */
+
+/****************************************************************//**
+Get user function with the given name.
+@return user func, or NULL if not found */
+UNIV_INTERN
pars_user_func_t*
pars_info_get_user_func(
/*====================*/
- /* out: user func, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name); /* in: function name to find*/
-
-/********************************************************************
-Get bound literal with the given name.*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name); /*!< in: function name to find*/
+/****************************************************************//**
+Get bound literal with the given name.
+@return bound literal, or NULL if not found */
+UNIV_INTERN
pars_bound_lit_t*
pars_info_get_bound_lit(
/*====================*/
- /* out: bound literal, or NULL if
- not found */
- pars_info_t* info, /* in: info struct */
- const char* name); /* in: bound literal name to find */
-
-/********************************************************************
-Get bound id with the given name.*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name); /*!< in: bound literal name to find */
+/****************************************************************//**
+Get bound id with the given name.
+@return bound id, or NULL if not found */
+UNIV_INTERN
pars_bound_id_t*
pars_info_get_bound_id(
/*===================*/
- /* out: bound id, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name); /* in: bound id name to find */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name); /*!< in: bound id name to find */
+/******************************************************************//**
+Release any resources used by the lexer. */
+UNIV_INTERN
+void
+pars_lexer_close(void);
+/*==================*/
-/* Extra information supplied for pars_sql(). */
+/** Extra information supplied for pars_sql(). */
struct pars_info_struct {
- mem_heap_t* heap; /* our own memory heap */
+ mem_heap_t* heap; /*!< our own memory heap */
- ib_vector_t* funcs; /* user functions, or NUll
+ ib_vector_t* funcs; /*!< user functions, or NUll
(pars_user_func_t*) */
- ib_vector_t* bound_lits; /* bound literals, or NULL
+ ib_vector_t* bound_lits; /*!< bound literals, or NULL
(pars_bound_lit_t*) */
- ib_vector_t* bound_ids; /* bound ids, or NULL
+ ib_vector_t* bound_ids; /*!< bound ids, or NULL
(pars_bound_id_t*) */
- ibool graph_owns_us; /* if TRUE (which is the default),
+ ibool graph_owns_us; /*!< if TRUE (which is the default),
que_graph_free() will free us */
};
-/* User-supplied function and argument. */
+/** User-supplied function and argument. */
struct pars_user_func_struct {
- const char* name; /* function name */
- pars_user_func_cb_t func; /* function address */
- void* arg; /* user-supplied argument */
+ const char* name; /*!< function name */
+ pars_user_func_cb_t func; /*!< function address */
+ void* arg; /*!< user-supplied argument */
};
-/* Bound literal. */
+/** Bound literal. */
struct pars_bound_lit_struct {
- const char* name; /* name */
- const void* address; /* address */
- ulint length; /* length of data */
- ulint type; /* type, e.g. DATA_FIXBINARY */
- ulint prtype; /* precise type, e.g. DATA_UNSIGNED */
+ const char* name; /*!< name */
+ const void* address; /*!< address */
+ ulint length; /*!< length of data */
+ ulint type; /*!< type, e.g. DATA_FIXBINARY */
+ ulint prtype; /*!< precise type, e.g. DATA_UNSIGNED */
};
-/* Bound id. */
+/** Bound identifier. */
struct pars_bound_id_struct {
- const char* name; /* name */
- const char* id; /* id */
+ const char* name; /*!< name */
+ const char* id; /*!< identifier */
};
-/* Struct used to denote a reserved word in a parsing tree */
+/** Struct used to denote a reserved word in a parsing tree */
struct pars_res_word_struct{
- int code; /* the token code for the reserved word from
+ int code; /*!< the token code for the reserved word from
pars0grm.h */
};
-/* A predefined function or operator node in a parsing tree; this construct
+/** A predefined function or operator node in a parsing tree; this construct
is also used for some non-functions like the assignment ':=' */
struct func_node_struct{
- que_common_t common; /* type: QUE_NODE_FUNC */
- int func; /* token code of the function name */
- ulint class; /* class of the function */
- que_node_t* args; /* argument(s) of the function */
+ que_common_t common; /*!< type: QUE_NODE_FUNC */
+ int func; /*!< token code of the function name */
+ ulint class; /*!< class of the function */
+ que_node_t* args; /*!< argument(s) of the function */
UT_LIST_NODE_T(func_node_t) cond_list;
- /* list of comparison conditions; defined
+ /*!< list of comparison conditions; defined
only for comparison operator nodes except,
presently, for OPT_SCROLL_TYPE ones */
UT_LIST_NODE_T(func_node_t) func_node_list;
- /* list of function nodes in a parsed
+ /*!< list of function nodes in a parsed
query graph */
};
-/* An order-by node in a select */
+/** An order-by node in a select */
struct order_node_struct{
- que_common_t common; /* type: QUE_NODE_ORDER */
- sym_node_t* column; /* order-by column */
- ibool asc; /* TRUE if ascending, FALSE if descending */
+ que_common_t common; /*!< type: QUE_NODE_ORDER */
+ sym_node_t* column; /*!< order-by column */
+ ibool asc; /*!< TRUE if ascending, FALSE if descending */
};
-/* Procedure definition node */
+/** Procedure definition node */
struct proc_node_struct{
- que_common_t common; /* type: QUE_NODE_PROC */
- sym_node_t* proc_id; /* procedure name symbol in the symbol
+ que_common_t common; /*!< type: QUE_NODE_PROC */
+ sym_node_t* proc_id; /*!< procedure name symbol in the symbol
table of this same procedure */
- sym_node_t* param_list; /* input and output parameters */
- que_node_t* stat_list; /* statement list */
- sym_tab_t* sym_tab; /* symbol table of this procedure */
+ sym_node_t* param_list; /*!< input and output parameters */
+ que_node_t* stat_list; /*!< statement list */
+ sym_tab_t* sym_tab; /*!< symbol table of this procedure */
};
-/* elsif-element node */
+/** elsif-element node */
struct elsif_node_struct{
- que_common_t common; /* type: QUE_NODE_ELSIF */
- que_node_t* cond; /* if condition */
- que_node_t* stat_list; /* statement list */
+ que_common_t common; /*!< type: QUE_NODE_ELSIF */
+ que_node_t* cond; /*!< if condition */
+ que_node_t* stat_list; /*!< statement list */
};
-/* if-statement node */
+/** if-statement node */
struct if_node_struct{
- que_common_t common; /* type: QUE_NODE_IF */
- que_node_t* cond; /* if condition */
- que_node_t* stat_list; /* statement list */
- que_node_t* else_part; /* else-part statement list */
- elsif_node_t* elsif_list; /* elsif element list */
+ que_common_t common; /*!< type: QUE_NODE_IF */
+ que_node_t* cond; /*!< if condition */
+ que_node_t* stat_list; /*!< statement list */
+ que_node_t* else_part; /*!< else-part statement list */
+ elsif_node_t* elsif_list; /*!< elsif element list */
};
-/* while-statement node */
+/** while-statement node */
struct while_node_struct{
- que_common_t common; /* type: QUE_NODE_WHILE */
- que_node_t* cond; /* while condition */
- que_node_t* stat_list; /* statement list */
+ que_common_t common; /*!< type: QUE_NODE_WHILE */
+ que_node_t* cond; /*!< while condition */
+ que_node_t* stat_list; /*!< statement list */
};
-/* for-loop-statement node */
+/** for-loop-statement node */
struct for_node_struct{
- que_common_t common; /* type: QUE_NODE_FOR */
- sym_node_t* loop_var; /* loop variable: this is the
+ que_common_t common; /*!< type: QUE_NODE_FOR */
+ sym_node_t* loop_var; /*!< loop variable: this is the
dereferenced symbol from the
variable declarations, not the
symbol occurrence in the for loop
definition */
- que_node_t* loop_start_limit;/* initial value of loop variable */
- que_node_t* loop_end_limit; /* end value of loop variable */
- lint loop_end_value; /* evaluated value for the end value:
+ que_node_t* loop_start_limit;/*!< initial value of loop variable */
+ que_node_t* loop_end_limit; /*!< end value of loop variable */
+ lint loop_end_value; /*!< evaluated value for the end value:
it is calculated only when the loop
is entered, and will not change within
the loop */
- que_node_t* stat_list; /* statement list */
+ que_node_t* stat_list; /*!< statement list */
};
-/* exit statement node */
+/** exit statement node */
struct exit_node_struct{
- que_common_t common; /* type: QUE_NODE_EXIT */
+ que_common_t common; /*!< type: QUE_NODE_EXIT */
};
-/* return-statement node */
+/** return-statement node */
struct return_node_struct{
- que_common_t common; /* type: QUE_NODE_RETURN */
+ que_common_t common; /*!< type: QUE_NODE_RETURN */
};
-/* Assignment statement node */
+/** Assignment statement node */
struct assign_node_struct{
- que_common_t common; /* type: QUE_NODE_ASSIGNMENT */
- sym_node_t* var; /* variable to set */
- que_node_t* val; /* value to assign */
+ que_common_t common; /*!< type: QUE_NODE_ASSIGNMENT */
+ sym_node_t* var; /*!< variable to set */
+ que_node_t* val; /*!< value to assign */
};
-/* Column assignment node */
+/** Column assignment node */
struct col_assign_node_struct{
- que_common_t common; /* type: QUE_NODE_COL_ASSIGN */
- sym_node_t* col; /* column to set */
- que_node_t* val; /* value to assign */
+ que_common_t common; /*!< type: QUE_NODE_COL_ASSIGN */
+ sym_node_t* col; /*!< column to set */
+ que_node_t* val; /*!< value to assign */
};
-/* Classes of functions */
-#define PARS_FUNC_ARITH 1 /* +, -, *, / */
-#define PARS_FUNC_LOGICAL 2
-#define PARS_FUNC_CMP 3
-#define PARS_FUNC_PREDEFINED 4 /* TO_NUMBER, SUBSTR, ... */
-#define PARS_FUNC_AGGREGATE 5 /* COUNT, DISTINCT, SUM */
-#define PARS_FUNC_OTHER 6 /* these are not real functions,
+/** Classes of functions */
+/* @{ */
+#define PARS_FUNC_ARITH 1 /*!< +, -, *, / */
+#define PARS_FUNC_LOGICAL 2 /*!< AND, OR, NOT */
+#define PARS_FUNC_CMP 3 /*!< comparison operators */
+#define PARS_FUNC_PREDEFINED 4 /*!< TO_NUMBER, SUBSTR, ... */
+#define PARS_FUNC_AGGREGATE 5 /*!< COUNT, DISTINCT, SUM */
+#define PARS_FUNC_OTHER 6 /*!< these are not real functions,
e.g., := */
+/* @} */
#ifndef UNIV_NONINL
#include "pars0pars.ic"
diff --git a/storage/innobase/include/pars0pars.ic b/storage/innobase/include/pars0pars.ic
index 155b6659ace..ae6c13cd671 100644
--- a/storage/innobase/include/pars0pars.ic
+++ b/storage/innobase/include/pars0pars.ic
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0pars.ic
+SQL parser
Created 11/19/1996 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
index fc7df92ff60..6d1a4b82414 100644
--- a/storage/innobase/include/pars0sym.h
+++ b/storage/innobase/include/pars0sym.h
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser symbol table
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1997 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0sym.h
+SQL parser symbol table
Created 12/15/1997 Heikki Tuuri
*******************************************************/
@@ -16,87 +33,105 @@ Created 12/15/1997 Heikki Tuuri
#include "pars0types.h"
#include "row0types.h"
-/**********************************************************************
-Creates a symbol table for a single stored procedure or query. */
-
+/******************************************************************//**
+Creates a symbol table for a single stored procedure or query.
+@return own: symbol table */
+UNIV_INTERN
sym_tab_t*
sym_tab_create(
/*===========*/
- /* out, own: symbol table */
- mem_heap_t* heap); /* in: memory heap where to create */
-/**********************************************************************
+ mem_heap_t* heap); /*!< in: memory heap where to create */
+/******************************************************************//**
Frees the memory allocated dynamically AFTER parsing phase for variables
etc. in the symbol table. Does not free the mem heap where the table was
originally created. Frees also SQL explicit cursor definitions. */
-
+UNIV_INTERN
void
sym_tab_free_private(
/*=================*/
- sym_tab_t* sym_tab); /* in, own: symbol table */
-/**********************************************************************
-Adds an integer literal to a symbol table. */
-
+ sym_tab_t* sym_tab); /*!< in, own: symbol table */
+/******************************************************************//**
+Adds an integer literal to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_int_lit(
/*================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- ulint val); /* in: integer value */
-/**********************************************************************
-Adds an string literal to a symbol table. */
-
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ ulint val); /*!< in: integer value */
+/******************************************************************//**
+Adds an string literal to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_str_lit(
/*================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- byte* str, /* in: string with no quotes around
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ byte* str, /*!< in: string with no quotes around
it */
- ulint len); /* in: string length */
-/**********************************************************************
-Add a bound literal to a symbol table. */
-
+ ulint len); /*!< in: string length */
+/******************************************************************//**
+Add a bound literal to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_bound_lit(
/*==================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- const char* name, /* in: name of bound literal */
- ulint* lit_type); /* out: type of literal (PARS_*_LIT) */
-/**********************************************************************
-Adds an SQL null literal to a symbol table. */
-
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ const char* name, /*!< in: name of bound literal */
+ ulint* lit_type); /*!< out: type of literal (PARS_*_LIT) */
+/******************************************************************//**
+Adds an SQL null literal to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_null_lit(
/*=================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab); /* in: symbol table */
-/**********************************************************************
-Adds an identifier to a symbol table. */
-
+ sym_tab_t* sym_tab); /*!< in: symbol table */
+/******************************************************************//**
+Adds an identifier to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_id(
/*===========*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- byte* name, /* in: identifier name */
- ulint len); /* in: identifier length */
-
-/**********************************************************************
-Add a bound identifier to a symbol table. */
-
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ byte* name, /*!< in: identifier name */
+ ulint len); /*!< in: identifier length */
+
+/******************************************************************//**
+Add a bound identifier to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_bound_id(
/*===========*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- const char* name); /* in: name of bound id */
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ const char* name); /*!< in: name of bound id */
+/** Index of sym_node_struct::field_nos corresponding to the clustered index */
#define SYM_CLUST_FIELD_NO 0
+/** Index of sym_node_struct::field_nos corresponding to a secondary index */
#define SYM_SEC_FIELD_NO 1
+/** Types of a symbol table node */
+enum sym_tab_entry {
+ SYM_VAR = 91, /*!< declared parameter or local
+ variable of a procedure */
+ SYM_IMPLICIT_VAR, /*!< storage for a intermediate result
+ of a calculation */
+ SYM_LIT, /*!< literal */
+ SYM_TABLE, /*!< database table name */
+ SYM_COLUMN, /*!< database table name */
+ SYM_CURSOR, /*!< named cursor */
+ SYM_PROCEDURE_NAME, /*!< stored procedure name */
+ SYM_INDEX, /*!< database index name */
+ SYM_FUNCTION /*!< user function name */
+};
+
+/** Symbol table node */
struct sym_node_struct{
- que_common_t common; /* node type:
+ que_common_t common; /*!< node type:
QUE_NODE_SYMBOL */
/* NOTE: if the data field in 'common.val' is not NULL and the symbol
table node is not for a temporary column, the memory for the value has
@@ -116,25 +151,25 @@ struct sym_node_struct{
TODO: It would be cleaner to make 'indirection' a boolean field and
always use 'alias' to refer to the primary node. */
- sym_node_t* indirection; /* pointer to
+ sym_node_t* indirection; /*!< pointer to
another symbol table
node which contains
the value for this
node, NULL otherwise */
- sym_node_t* alias; /* pointer to
+ sym_node_t* alias; /*!< pointer to
another symbol table
node for which this
node is an alias,
NULL otherwise */
- UT_LIST_NODE_T(sym_node_t) col_var_list; /* list of table
+ UT_LIST_NODE_T(sym_node_t) col_var_list; /*!< list of table
columns or a list of
input variables for an
explicit cursor */
- ibool copy_val; /* TRUE if a column
+ ibool copy_val; /*!< TRUE if a column
and its value should
be copied to dynamic
memory when fetched */
- ulint field_nos[2]; /* if a column, in
+ ulint field_nos[2]; /*!< if a column, in
the position
SYM_CLUST_FIELD_NO is
the field number in the
@@ -146,76 +181,62 @@ struct sym_node_struct{
use first; if not found
from the index, then
ULINT_UNDEFINED */
- ibool resolved; /* TRUE if the
+ ibool resolved; /*!< TRUE if the
meaning of a variable
or a column has been
resolved; for literals
this is always TRUE */
- ulint token_type; /* SYM_VAR, SYM_COLUMN,
- SYM_IMPLICIT_VAR,
- SYM_LIT, SYM_TABLE,
- SYM_CURSOR, ... */
- const char* name; /* name of an id */
- ulint name_len; /* id name length */
- dict_table_t* table; /* table definition
+ enum sym_tab_entry token_type; /*!< type of the
+ parsed token */
+ const char* name; /*!< name of an id */
+ ulint name_len; /*!< id name length */
+ dict_table_t* table; /*!< table definition
if a table id or a
column id */
- ulint col_no; /* column number if a
+ ulint col_no; /*!< column number if a
column */
- sel_buf_t* prefetch_buf; /* NULL, or a buffer
+ sel_buf_t* prefetch_buf; /*!< NULL, or a buffer
for cached column
values for prefetched
rows */
- sel_node_t* cursor_def; /* cursor definition
+ sel_node_t* cursor_def; /*!< cursor definition
select node if a
named cursor */
- ulint param_type; /* PARS_INPUT,
+ ulint param_type; /*!< PARS_INPUT,
PARS_OUTPUT, or
PARS_NOT_PARAM if not a
procedure parameter */
- sym_tab_t* sym_table; /* back pointer to
+ sym_tab_t* sym_table; /*!< back pointer to
the symbol table */
- UT_LIST_NODE_T(sym_node_t) sym_list; /* list of symbol
+ UT_LIST_NODE_T(sym_node_t) sym_list; /*!< list of symbol
nodes */
};
+/** Symbol table */
struct sym_tab_struct{
que_t* query_graph;
- /* query graph generated by the
+ /*!< query graph generated by the
parser */
const char* sql_string;
- /* SQL string to parse */
+ /*!< SQL string to parse */
size_t string_len;
- /* SQL string length */
+ /*!< SQL string length */
int next_char_pos;
- /* position of the next character in
+ /*!< position of the next character in
sql_string to give to the lexical
analyzer */
- pars_info_t* info; /* extra information, or NULL */
+ pars_info_t* info; /*!< extra information, or NULL */
sym_node_list_t sym_list;
- /* list of symbol nodes in the symbol
+ /*!< list of symbol nodes in the symbol
table */
UT_LIST_BASE_NODE_T(func_node_t)
func_node_list;
- /* list of function nodes in the
+ /*!< list of function nodes in the
parsed query graph */
- mem_heap_t* heap; /* memory heap from which we can
+ mem_heap_t* heap; /*!< memory heap from which we can
allocate space */
};
-/* Types of a symbol table entry */
-#define SYM_VAR 91 /* declared parameter or local
- variable of a procedure */
-#define SYM_IMPLICIT_VAR 92 /* storage for a intermediate result
- of a calculation */
-#define SYM_LIT 93 /* literal */
-#define SYM_TABLE 94 /* database table name */
-#define SYM_COLUMN 95 /* database table name */
-#define SYM_CURSOR 96 /* named cursor */
-#define SYM_PROCEDURE_NAME 97 /* stored procedure name */
-#define SYM_INDEX 98 /* database index name */
-#define SYM_FUNCTION 99 /* user function name */
-
#ifndef UNIV_NONINL
#include "pars0sym.ic"
#endif
diff --git a/storage/innobase/include/pars0sym.ic b/storage/innobase/include/pars0sym.ic
index 9508d423769..9eb09db3a47 100644
--- a/storage/innobase/include/pars0sym.ic
+++ b/storage/innobase/include/pars0sym.ic
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser symbol table
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0sym.ic
+SQL parser symbol table
Created 12/15/1997 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/pars0types.h b/storage/innobase/include/pars0types.h
index bf7df89a883..e0a8a86bf07 100644
--- a/storage/innobase/include/pars0types.h
+++ b/storage/innobase/include/pars0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser global types
+/*****************************************************************************
+
+Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0types.h
+SQL parser global types
Created 1/11/1998 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index 8fbf5330c89..420f34550e2 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -1,7 +1,24 @@
-/******************************************************
-Query graph
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0que.h
+Query graph
Created 5/27/1996 Heikki Tuuri
*******************************************************/
@@ -23,349 +40,335 @@ Created 5/27/1996 Heikki Tuuri
of SQL execution in the UNIV_SQL_DEBUG version */
extern ibool que_trace_on;
-/***************************************************************************
+/***********************************************************************//**
Adds a query graph to the session's list of graphs. */
-
+UNIV_INTERN
void
que_graph_publish(
/*==============*/
- que_t* graph, /* in: graph */
- sess_t* sess); /* in: session */
-/***************************************************************************
-Creates a query graph fork node. */
-
+ que_t* graph, /*!< in: graph */
+ sess_t* sess); /*!< in: session */
+/***********************************************************************//**
+Creates a query graph fork node.
+@return own: fork node */
+UNIV_INTERN
que_fork_t*
que_fork_create(
/*============*/
- /* out, own: fork node */
- que_t* graph, /* in: graph, if NULL then this
+ que_t* graph, /*!< in: graph, if NULL then this
fork node is assumed to be the
graph root */
- que_node_t* parent, /* in: parent node */
- ulint fork_type, /* in: fork type */
- mem_heap_t* heap); /* in: memory heap where created */
-/***************************************************************************
+ que_node_t* parent, /*!< in: parent node */
+ ulint fork_type, /*!< in: fork type */
+ mem_heap_t* heap); /*!< in: memory heap where created */
+/***********************************************************************//**
Gets the first thr in a fork. */
UNIV_INLINE
que_thr_t*
que_fork_get_first_thr(
/*===================*/
- que_fork_t* fork); /* in: query fork */
-/***************************************************************************
+ que_fork_t* fork); /*!< in: query fork */
+/***********************************************************************//**
Gets the child node of the first thr in a fork. */
UNIV_INLINE
que_node_t*
que_fork_get_child(
/*===============*/
- que_fork_t* fork); /* in: query fork */
-/***************************************************************************
+ que_fork_t* fork); /*!< in: query fork */
+/***********************************************************************//**
Sets the parent of a graph node. */
UNIV_INLINE
void
que_node_set_parent(
/*================*/
- que_node_t* node, /* in: graph node */
- que_node_t* parent);/* in: parent */
-/***************************************************************************
-Creates a query graph thread node. */
-
+ que_node_t* node, /*!< in: graph node */
+ que_node_t* parent);/*!< in: parent */
+/***********************************************************************//**
+Creates a query graph thread node.
+@return own: query thread node */
+UNIV_INTERN
que_thr_t*
que_thr_create(
/*===========*/
- /* out, own: query thread node */
- que_fork_t* parent, /* in: parent node, i.e., a fork node */
- mem_heap_t* heap); /* in: memory heap where created */
-/**************************************************************************
-Checks if the query graph is in a state where it should be freed, and
-frees it in that case. If the session is in a state where it should be
-closed, also this is done. */
-
-ibool
-que_graph_try_free(
-/*===============*/
- /* out: TRUE if freed */
- que_t* graph); /* in: query graph */
-/**************************************************************************
+ que_fork_t* parent, /*!< in: parent node, i.e., a fork node */
+ mem_heap_t* heap); /*!< in: memory heap where created */
+/**********************************************************************//**
Frees a query graph, but not the heap where it was created. Does not free
explicit cursor declarations, they are freed in que_graph_free. */
-
+UNIV_INTERN
void
que_graph_free_recursive(
/*=====================*/
- que_node_t* node); /* in: query graph node */
-/**************************************************************************
+ que_node_t* node); /*!< in: query graph node */
+/**********************************************************************//**
Frees a query graph. */
-
+UNIV_INTERN
void
que_graph_free(
/*===========*/
- que_t* graph); /* in: query graph; we assume that the memory
+ que_t* graph); /*!< in: query graph; we assume that the memory
heap where this graph was created is private
to this graph: if not, then use
que_graph_free_recursive and free the heap
afterwards! */
-/**************************************************************************
+/**********************************************************************//**
Stops a query thread if graph or trx is in a state requiring it. The
conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved. */
-
+to be reserved.
+@return TRUE if stopped */
+UNIV_INTERN
ibool
que_thr_stop(
/*=========*/
- /* out: TRUE if stopped */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction. */
-
+UNIV_INTERN
void
que_thr_move_to_run_state_for_mysql(
/*================================*/
- que_thr_t* thr, /* in: an query thread */
- trx_t* trx); /* in: transaction */
-/**************************************************************************
+ que_thr_t* thr, /*!< in: an query thread */
+ trx_t* trx); /*!< in: transaction */
+/**********************************************************************//**
A patch for MySQL used to 'stop' a dummy query thread used in MySQL
select, when there is no error or lock wait. */
-
+UNIV_INTERN
void
que_thr_stop_for_mysql_no_error(
/*============================*/
- que_thr_t* thr, /* in: query thread */
- trx_t* trx); /* in: transaction */
-/**************************************************************************
+ que_thr_t* thr, /*!< in: query thread */
+ trx_t* trx); /*!< in: transaction */
+/**********************************************************************//**
A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
query thread is stopped and made inactive, except in the case where
it was put to the lock wait state in lock0lock.c, but the lock has already
been granted or the transaction chosen as a victim in deadlock resolution. */
-
+UNIV_INTERN
void
que_thr_stop_for_mysql(
/*===================*/
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
Run a query thread. Handles lock waits. */
-
+UNIV_INTERN
void
que_run_threads(
/*============*/
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
After signal handling is finished, returns control to a query graph error
handling routine. (Currently, just returns the control to the root of the
graph so that the graph can communicate an error message to the client.) */
-
+UNIV_INTERN
void
que_fork_error_handle(
/*==================*/
- trx_t* trx, /* in: trx */
- que_t* fork); /* in: query graph which was run before signal
+ trx_t* trx, /*!< in: trx */
+ que_t* fork); /*!< in: query graph which was run before signal
handling started, NULL not allowed */
-/**************************************************************************
+/**********************************************************************//**
Moves a suspended query thread to the QUE_THR_RUNNING state and releases
a single worker thread to execute it. This function should be used to end
the wait state of a query thread waiting for a lock or a stored procedure
completion. */
-
+UNIV_INTERN
void
que_thr_end_wait(
/*=============*/
- que_thr_t* thr, /* in: query thread in the
+ que_thr_t* thr, /*!< in: query thread in the
QUE_THR_LOCK_WAIT,
or QUE_THR_PROCEDURE_WAIT, or
QUE_THR_SIG_REPLY_WAIT state */
- que_thr_t** next_thr); /* in/out: next query thread to run;
+ que_thr_t** next_thr); /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
a new query thread */
-/**************************************************************************
+/**********************************************************************//**
Same as que_thr_end_wait, but no parameter next_thr available. */
-
+UNIV_INTERN
void
que_thr_end_wait_no_next_thr(
/*=========================*/
- que_thr_t* thr); /* in: query thread in the
+ que_thr_t* thr); /*!< in: query thread in the
QUE_THR_LOCK_WAIT,
or QUE_THR_PROCEDURE_WAIT, or
QUE_THR_SIG_REPLY_WAIT state */
-/**************************************************************************
+/**********************************************************************//**
Starts execution of a command in a query fork. Picks a query thread which
is not in the QUE_THR_RUNNING state and moves it to that state. If none
can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned. */
-
+is returned.
+@return a query thread of the graph moved to QUE_THR_RUNNING state, or
+NULL; the query thread should be executed by que_run_threads by the
+caller */
+UNIV_INTERN
que_thr_t*
que_fork_start_command(
/*===================*/
- /* out: a query thread of the graph moved to
- QUE_THR_RUNNING state, or NULL; the query
- thread should be executed by que_run_threads
- by the caller */
- que_fork_t* fork); /* in: a query fork */
-/***************************************************************************
+ que_fork_t* fork); /*!< in: a query fork */
+/***********************************************************************//**
Gets the trx of a query thread. */
UNIV_INLINE
trx_t*
thr_get_trx(
/*========*/
- que_thr_t* thr); /* in: query thread */
-/***************************************************************************
+ que_thr_t* thr); /*!< in: query thread */
+/***********************************************************************//**
Gets the type of a graph node. */
UNIV_INLINE
ulint
que_node_get_type(
/*==============*/
- que_node_t* node); /* in: graph node */
-/***************************************************************************
+ que_node_t* node); /*!< in: graph node */
+/***********************************************************************//**
Gets pointer to the value data type field of a graph node. */
UNIV_INLINE
dtype_t*
que_node_get_data_type(
/*===================*/
- que_node_t* node); /* in: graph node */
-/***************************************************************************
+ que_node_t* node); /*!< in: graph node */
+/***********************************************************************//**
Gets pointer to the value dfield of a graph node. */
UNIV_INLINE
dfield_t*
que_node_get_val(
/*=============*/
- que_node_t* node); /* in: graph node */
-/***************************************************************************
-Gets the value buffer size of a graph node. */
+ que_node_t* node); /*!< in: graph node */
+/***********************************************************************//**
+Gets the value buffer size of a graph node.
+@return val buffer size, not defined if val.data == NULL in node */
UNIV_INLINE
ulint
que_node_get_val_buf_size(
/*======================*/
- /* out: val buffer size, not defined if
- val.data == NULL in node */
- que_node_t* node); /* in: graph node */
-/***************************************************************************
+ que_node_t* node); /*!< in: graph node */
+/***********************************************************************//**
Sets the value buffer size of a graph node. */
UNIV_INLINE
void
que_node_set_val_buf_size(
/*======================*/
- que_node_t* node, /* in: graph node */
- ulint size); /* in: size */
-/*************************************************************************
+ que_node_t* node, /*!< in: graph node */
+ ulint size); /*!< in: size */
+/*********************************************************************//**
Gets the next list node in a list of query graph nodes. */
UNIV_INLINE
que_node_t*
que_node_get_next(
/*==============*/
- que_node_t* node); /* in: node in a list */
-/*************************************************************************
-Gets the parent node of a query graph node. */
+ que_node_t* node); /*!< in: node in a list */
+/*********************************************************************//**
+Gets the parent node of a query graph node.
+@return parent node or NULL */
UNIV_INLINE
que_node_t*
que_node_get_parent(
/*================*/
- /* out: parent node or NULL */
- que_node_t* node); /* in: node */
-/********************************************************************
+ que_node_t* node); /*!< in: node */
+/****************************************************************//**
Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop. */
-
+given node, or NULL if the node is not within a loop.
+@return containing loop node, or NULL. */
+UNIV_INTERN
que_node_t*
que_node_get_containing_loop_node(
/*==============================*/
- /* out: containing loop node, or NULL. */
- que_node_t* node); /* in: node */
-/*************************************************************************
-Catenates a query graph node to a list of them, possible empty list. */
+ que_node_t* node); /*!< in: node */
+/*********************************************************************//**
+Catenates a query graph node to a list of them, possible empty list.
+@return one-way list of nodes */
UNIV_INLINE
que_node_t*
que_node_list_add_last(
/*===================*/
- /* out: one-way list of nodes */
- que_node_t* node_list, /* in: node list, or NULL */
- que_node_t* node); /* in: node */
-/*************************************************************************
-Gets a query graph node list length. */
+ que_node_t* node_list, /*!< in: node list, or NULL */
+ que_node_t* node); /*!< in: node */
+/*********************************************************************//**
+Gets a query graph node list length.
+@return length, for NULL list 0 */
UNIV_INLINE
ulint
que_node_list_get_len(
/*==================*/
- /* out: length, for NULL list 0 */
- que_node_t* node_list); /* in: node list, or NULL */
-/**************************************************************************
+ que_node_t* node_list); /*!< in: node list, or NULL */
+/**********************************************************************//**
Checks if graph, trx, or session is in a state where the query thread should
-be stopped. */
+be stopped.
+@return TRUE if should be stopped; NOTE that if the peek is made
+without reserving the kernel mutex, then another peek with the mutex
+reserved is necessary before deciding the actual stopping */
UNIV_INLINE
ibool
que_thr_peek_stop(
/*==============*/
- /* out: TRUE if should be stopped; NOTE that
- if the peek is made without reserving the
- kernel mutex, then another peek with the
- mutex reserved is necessary before deciding
- the actual stopping */
- que_thr_t* thr); /* in: query thread */
-/***************************************************************************
-Returns TRUE if the query graph is for a SELECT statement. */
+ que_thr_t* thr); /*!< in: query thread */
+/***********************************************************************//**
+Returns TRUE if the query graph is for a SELECT statement.
+@return TRUE if a select */
UNIV_INLINE
ibool
que_graph_is_select(
/*================*/
- /* out: TRUE if a select */
- que_t* graph); /* in: graph */
-/**************************************************************************
+ que_t* graph); /*!< in: graph */
+/**********************************************************************//**
Prints info of an SQL query graph node. */
-
+UNIV_INTERN
void
que_node_print_info(
/*================*/
- que_node_t* node); /* in: query graph node */
-/*************************************************************************
-Evaluate the given SQL */
-
+ que_node_t* node); /*!< in: query graph node */
+/*********************************************************************//**
+Evaluate the given SQL
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
que_eval_sql(
/*=========*/
- /* out: error code or DB_SUCCESS */
- pars_info_t* info, /* in: info struct, or NULL */
- const char* sql, /* in: SQL string */
+ pars_info_t* info, /*!< in: info struct, or NULL */
+ const char* sql, /*!< in: SQL string */
ibool reserve_dict_mutex,
- /* in: if TRUE, acquire/release
+ /*!< in: if TRUE, acquire/release
dict_sys->mutex around call to pars_sql. */
- trx_t* trx); /* in: trx */
+ trx_t* trx); /*!< in: trx */
/* Query graph query thread node: the fields are protected by the kernel
mutex with the exceptions named below */
struct que_thr_struct{
- que_common_t common; /* type: QUE_NODE_THR */
- ulint magic_n; /* magic number to catch memory
+ que_common_t common; /*!< type: QUE_NODE_THR */
+ ulint magic_n; /*!< magic number to catch memory
corruption */
- que_node_t* child; /* graph child node */
- que_t* graph; /* graph where this node belongs */
- ibool is_active; /* TRUE if the thread has been set
+ que_node_t* child; /*!< graph child node */
+ que_t* graph; /*!< graph where this node belongs */
+ ibool is_active; /*!< TRUE if the thread has been set
to the run state in
que_thr_move_to_run_state, but not
deactivated in
que_thr_dec_reference_count */
- ulint state; /* state of the query thread */
+ ulint state; /*!< state of the query thread */
UT_LIST_NODE_T(que_thr_t)
- thrs; /* list of thread nodes of the fork
+ thrs; /*!< list of thread nodes of the fork
node */
UT_LIST_NODE_T(que_thr_t)
- trx_thrs; /* lists of threads in wait list of
+ trx_thrs; /*!< lists of threads in wait list of
the trx */
UT_LIST_NODE_T(que_thr_t)
- queue; /* list of runnable thread nodes in
+ queue; /*!< list of runnable thread nodes in
the server task queue */
/*------------------------------*/
/* The following fields are private to the OS thread executing the
query thread, and are not protected by the kernel mutex: */
- que_node_t* run_node; /* pointer to the node where the
+ que_node_t* run_node; /*!< pointer to the node where the
subgraph down from this node is
currently executed */
- que_node_t* prev_node; /* pointer to the node from which
+ que_node_t* prev_node; /*!< pointer to the node from which
the control came */
- ulint resource; /* resource usage of the query thread
+ ulint resource; /*!< resource usage of the query thread
thus far */
- ulint lock_state; /* lock state of thread (table or
+ ulint lock_state; /*!< lock state of thread (table or
row) */
};
@@ -374,49 +377,49 @@ struct que_thr_struct{
/* Query graph fork node: its fields are protected by the kernel mutex */
struct que_fork_struct{
- que_common_t common; /* type: QUE_NODE_FORK */
- que_t* graph; /* query graph of this node */
- ulint fork_type; /* fork type */
- ulint n_active_thrs; /* if this is the root of a graph, the
+ que_common_t common; /*!< type: QUE_NODE_FORK */
+ que_t* graph; /*!< query graph of this node */
+ ulint fork_type; /*!< fork type */
+ ulint n_active_thrs; /*!< if this is the root of a graph, the
number query threads that have been
started in que_thr_move_to_run_state
but for which que_thr_dec_refer_count
has not yet been called */
- trx_t* trx; /* transaction: this is set only in
+ trx_t* trx; /*!< transaction: this is set only in
the root node */
- ulint state; /* state of the fork node */
- que_thr_t* caller; /* pointer to a possible calling query
+ ulint state; /*!< state of the fork node */
+ que_thr_t* caller; /*!< pointer to a possible calling query
thread */
UT_LIST_BASE_NODE_T(que_thr_t)
- thrs; /* list of query threads */
+ thrs; /*!< list of query threads */
/*------------------------------*/
/* The fields in this section are defined only in the root node */
- sym_tab_t* sym_tab; /* symbol table of the query,
+ sym_tab_t* sym_tab; /*!< symbol table of the query,
generated by the parser, or NULL
if the graph was created 'by hand' */
- pars_info_t* info; /* in: info struct, or NULL */
+ pars_info_t* info; /*!< info struct, or NULL */
/* The following cur_... fields are relevant only in a select graph */
- ulint cur_end; /* QUE_CUR_NOT_DEFINED, QUE_CUR_START,
+ ulint cur_end; /*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START,
QUE_CUR_END */
- ulint cur_pos; /* if there are n rows in the result
+ ulint cur_pos; /*!< if there are n rows in the result
set, values 0 and n + 1 mean before
first row, or after last row, depending
on cur_end; values 1...n mean a row
index */
- ibool cur_on_row; /* TRUE if cursor is on a row, i.e.,
+ ibool cur_on_row; /*!< TRUE if cursor is on a row, i.e.,
it is not before the first row or
after the last row */
- dulint n_inserts; /* number of rows inserted */
- dulint n_updates; /* number of rows updated */
- dulint n_deletes; /* number of rows deleted */
- sel_node_t* last_sel_node; /* last executed select node, or NULL
+ dulint n_inserts; /*!< number of rows inserted */
+ dulint n_updates; /*!< number of rows updated */
+ dulint n_deletes; /*!< number of rows deleted */
+ sel_node_t* last_sel_node; /*!< last executed select node, or NULL
if none */
UT_LIST_NODE_T(que_fork_t)
- graphs; /* list of query graphs of a session
+ graphs; /*!< list of query graphs of a session
or a stored procedure */
/*------------------------------*/
- mem_heap_t* heap; /* memory heap where the fork was
+ mem_heap_t* heap; /*!< memory heap where the fork was
created */
};
diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic
index a20108a7820..a1c0dc1e77a 100644
--- a/storage/innobase/include/que0que.ic
+++ b/storage/innobase/include/que0que.ic
@@ -1,44 +1,61 @@
-/******************************************************
-Query graph
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0que.ic
+Query graph
Created 5/27/1996 Heikki Tuuri
*******************************************************/
#include "usr0sess.h"
-/***************************************************************************
+/***********************************************************************//**
Gets the trx of a query thread. */
UNIV_INLINE
trx_t*
thr_get_trx(
/*========*/
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
ut_ad(thr);
return(thr->graph->trx);
}
-/***************************************************************************
+/***********************************************************************//**
Gets the first thr in a fork. */
UNIV_INLINE
que_thr_t*
que_fork_get_first_thr(
/*===================*/
- que_fork_t* fork) /* in: query fork */
+ que_fork_t* fork) /*!< in: query fork */
{
return(UT_LIST_GET_FIRST(fork->thrs));
}
-/***************************************************************************
+/***********************************************************************//**
Gets the child node of the first thr in a fork. */
UNIV_INLINE
que_node_t*
que_fork_get_child(
/*===============*/
- que_fork_t* fork) /* in: query fork */
+ que_fork_t* fork) /*!< in: query fork */
{
que_thr_t* thr;
@@ -47,102 +64,101 @@ que_fork_get_child(
return(thr->child);
}
-/***************************************************************************
+/***********************************************************************//**
Gets the type of a graph node. */
UNIV_INLINE
ulint
que_node_get_type(
/*==============*/
- que_node_t* node) /* in: graph node */
+ que_node_t* node) /*!< in: graph node */
{
ut_ad(node);
return(((que_common_t*)node)->type);
}
-/***************************************************************************
+/***********************************************************************//**
Gets pointer to the value dfield of a graph node. */
UNIV_INLINE
dfield_t*
que_node_get_val(
/*=============*/
- que_node_t* node) /* in: graph node */
+ que_node_t* node) /*!< in: graph node */
{
ut_ad(node);
return(&(((que_common_t*)node)->val));
}
-/***************************************************************************
-Gets the value buffer size of a graph node. */
+/***********************************************************************//**
+Gets the value buffer size of a graph node.
+@return val buffer size, not defined if val.data == NULL in node */
UNIV_INLINE
ulint
que_node_get_val_buf_size(
/*======================*/
- /* out: val buffer size, not defined if
- val.data == NULL in node */
- que_node_t* node) /* in: graph node */
+ que_node_t* node) /*!< in: graph node */
{
ut_ad(node);
return(((que_common_t*)node)->val_buf_size);
}
-/***************************************************************************
+/***********************************************************************//**
Sets the value buffer size of a graph node. */
UNIV_INLINE
void
que_node_set_val_buf_size(
/*======================*/
- que_node_t* node, /* in: graph node */
- ulint size) /* in: size */
+ que_node_t* node, /*!< in: graph node */
+ ulint size) /*!< in: size */
{
ut_ad(node);
((que_common_t*)node)->val_buf_size = size;
}
-/***************************************************************************
+/***********************************************************************//**
Sets the parent of a graph node. */
UNIV_INLINE
void
que_node_set_parent(
/*================*/
- que_node_t* node, /* in: graph node */
- que_node_t* parent) /* in: parent */
+ que_node_t* node, /*!< in: graph node */
+ que_node_t* parent) /*!< in: parent */
{
ut_ad(node);
((que_common_t*)node)->parent = parent;
}
-/***************************************************************************
+/***********************************************************************//**
Gets pointer to the value data type field of a graph node. */
UNIV_INLINE
dtype_t*
que_node_get_data_type(
/*===================*/
- que_node_t* node) /* in: graph node */
+ que_node_t* node) /*!< in: graph node */
{
ut_ad(node);
- return(&(((que_common_t*)node)->val.type));
+ return(dfield_get_type(&((que_common_t*) node)->val));
}
-/*************************************************************************
-Catenates a query graph node to a list of them, possible empty list. */
+/*********************************************************************//**
+Catenates a query graph node to a list of them, possible empty list.
+@return one-way list of nodes */
UNIV_INLINE
que_node_t*
que_node_list_add_last(
/*===================*/
- /* out: one-way list of nodes */
- que_node_t* node_list, /* in: node list, or NULL */
- que_node_t* node) /* in: node */
+ que_node_t* node_list, /*!< in: node list, or NULL */
+ que_node_t* node) /*!< in: node */
{
que_common_t* cnode;
que_common_t* cnode2;
- cnode = node;
+ cnode = (que_common_t*) node;
cnode->brother = NULL;
@@ -151,10 +167,10 @@ que_node_list_add_last(
return(node);
}
- cnode2 = node_list;
+ cnode2 = (que_common_t*) node_list;
while (cnode2->brother != NULL) {
- cnode2 = cnode2->brother;
+ cnode2 = (que_common_t*) cnode2->brother;
}
cnode2->brother = node;
@@ -162,66 +178,64 @@ que_node_list_add_last(
return(node_list);
}
-/*************************************************************************
-Gets the next list node in a list of query graph nodes. */
+/*********************************************************************//**
+Gets the next list node in a list of query graph nodes.
+@return next node in a list of nodes */
UNIV_INLINE
que_node_t*
que_node_get_next(
/*==============*/
- /* out: next node in a list of nodes */
- que_node_t* node) /* in: node in a list */
+ que_node_t* node) /*!< in: node in a list */
{
return(((que_common_t*)node)->brother);
}
-/*************************************************************************
-Gets a query graph node list length. */
+/*********************************************************************//**
+Gets a query graph node list length.
+@return length, for NULL list 0 */
UNIV_INLINE
ulint
que_node_list_get_len(
/*==================*/
- /* out: length, for NULL list 0 */
- que_node_t* node_list) /* in: node list, or NULL */
+ que_node_t* node_list) /*!< in: node list, or NULL */
{
- que_common_t* cnode;
- ulint len;
+ const que_common_t* cnode;
+ ulint len;
- cnode = node_list;
+ cnode = (const que_common_t*) node_list;
len = 0;
while (cnode != NULL) {
len++;
- cnode = cnode->brother;
+ cnode = (const que_common_t*) cnode->brother;
}
return(len);
}
-/*************************************************************************
-Gets the parent node of a query graph node. */
+/*********************************************************************//**
+Gets the parent node of a query graph node.
+@return parent node or NULL */
UNIV_INLINE
que_node_t*
que_node_get_parent(
/*================*/
- /* out: parent node or NULL */
- que_node_t* node) /* in: node */
+ que_node_t* node) /*!< in: node */
{
return(((que_common_t*)node)->parent);
}
-/**************************************************************************
+/**********************************************************************//**
Checks if graph, trx, or session is in a state where the query thread should
-be stopped. */
+be stopped.
+@return TRUE if should be stopped; NOTE that if the peek is made
+without reserving the kernel mutex, then another peek with the mutex
+reserved is necessary before deciding the actual stopping */
UNIV_INLINE
ibool
que_thr_peek_stop(
/*==============*/
- /* out: TRUE if should be stopped; NOTE that
- if the peek is made without reserving the
- kernel mutex, then another peek with the
- mutex reserved is necessary before deciding
- the actual stopping */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
que_t* graph;
@@ -240,14 +254,14 @@ que_thr_peek_stop(
return(FALSE);
}
-/***************************************************************************
-Returns TRUE if the query graph is for a SELECT statement. */
+/***********************************************************************//**
+Returns TRUE if the query graph is for a SELECT statement.
+@return TRUE if a select */
UNIV_INLINE
ibool
que_graph_is_select(
/*================*/
- /* out: TRUE if a select */
- que_t* graph) /* in: graph */
+ que_t* graph) /*!< in: graph */
{
if (graph->fork_type == QUE_FORK_SELECT_SCROLL
|| graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) {
diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h
index 30e3f0a172b..ea976074768 100644
--- a/storage/innobase/include/que0types.h
+++ b/storage/innobase/include/que0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Query graph global types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0types.h
+Query graph global types
Created 5/27/1996 Heikki Tuuri
*******************************************************/
@@ -27,10 +44,10 @@ typedef struct que_common_struct que_common_t;
substruct must be 'common' */
struct que_common_struct{
- ulint type; /* query node type */
- que_node_t* parent; /* back pointer to parent node, or NULL */
+ ulint type; /*!< query node type */
+ que_node_t* parent; /*!< back pointer to parent node, or NULL */
que_node_t* brother;/* pointer to a possible brother node */
- dfield_t val; /* evaluated value for an expression */
+ dfield_t val; /*!< evaluated value for an expression */
ulint val_buf_size;
/* buffer size for the evaluated value data,
if the buffer has been allocated dynamically:
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
index 97b6d7e9dd9..4d9a9fade36 100644
--- a/storage/innobase/include/read0read.h
+++ b/storage/innobase/include/read0read.h
@@ -1,7 +1,24 @@
-/******************************************************
-Cursor read
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0read.h
+Cursor read
Created 2/16/1997 Heikki Tuuri
*******************************************************/
@@ -17,144 +34,156 @@ Created 2/16/1997 Heikki Tuuri
#include "trx0trx.h"
#include "read0types.h"
-/*************************************************************************
+/*********************************************************************//**
Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view. */
-
+point in time are seen in the view.
+@return own: read view struct */
+UNIV_INTERN
read_view_t*
read_view_open_now(
/*===============*/
- /* out, own: read view struct */
- dulint cr_trx_id, /* in: trx_id of creating
- transaction, or (0, 0) used in
- purge */
- mem_heap_t* heap); /* in: memory heap from which
+ trx_id_t cr_trx_id, /*!< in: trx_id of creating
+ transaction, or ut_dulint_zero
+ used in purge */
+ mem_heap_t* heap); /*!< in: memory heap from which
allocated */
-/*************************************************************************
+/*********************************************************************//**
Makes a copy of the oldest existing read view, or opens a new. The view
-must be closed with ..._close. */
-
+must be closed with ..._close.
+@return own: read view struct */
+UNIV_INTERN
read_view_t*
read_view_oldest_copy_or_open_new(
/*==============================*/
- /* out, own: read view struct */
- dulint cr_trx_id, /* in: trx_id of creating
- transaction, or (0, 0) used in
- purge */
- mem_heap_t* heap); /* in: memory heap from which
+ trx_id_t cr_trx_id, /*!< in: trx_id of creating
+ transaction, or ut_dulint_zero
+ used in purge */
+ mem_heap_t* heap); /*!< in: memory heap from which
allocated */
-/*************************************************************************
+/*********************************************************************//**
Closes a read view. */
-
+UNIV_INTERN
void
read_view_close(
/*============*/
- read_view_t* view); /* in: read view */
-/*************************************************************************
+ read_view_t* view); /*!< in: read view */
+/*********************************************************************//**
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-
+UNIV_INTERN
void
read_view_close_for_mysql(
/*======================*/
- trx_t* trx); /* in: trx which has a read view */
-/*************************************************************************
-Checks if a read view sees the specified transaction. */
+ trx_t* trx); /*!< in: trx which has a read view */
+/*********************************************************************//**
+Checks if a read view sees the specified transaction.
+@return TRUE if sees */
UNIV_INLINE
ibool
read_view_sees_trx_id(
/*==================*/
- /* out: TRUE if sees */
- read_view_t* view, /* in: read view */
- dulint trx_id);/* in: trx id */
-/*************************************************************************
+ const read_view_t* view, /*!< in: read view */
+ trx_id_t trx_id);/*!< in: trx id */
+/*********************************************************************//**
Prints a read view to stderr. */
-
+UNIV_INTERN
void
read_view_print(
/*============*/
- read_view_t* view); /* in: read view */
-/*************************************************************************
+ const read_view_t* view); /*!< in: read view */
+/*********************************************************************//**
Create a consistent cursor view for mysql to be used in cursors. In this
consistent read view modifications done by the creating transaction or future
transactions are not visible. */
-
+UNIV_INTERN
cursor_view_t*
read_cursor_view_create_for_mysql(
/*==============================*/
- trx_t* cr_trx);/* in: trx where cursor view is created */
-/*************************************************************************
+ trx_t* cr_trx);/*!< in: trx where cursor view is created */
+/*********************************************************************//**
Close a given consistent cursor view for mysql and restore global read view
back to a transaction read view. */
-
+UNIV_INTERN
void
read_cursor_view_close_for_mysql(
/*=============================*/
- trx_t* trx, /* in: trx */
- cursor_view_t* curview); /* in: cursor view to be closed */
-/*************************************************************************
+ trx_t* trx, /*!< in: trx */
+ cursor_view_t* curview); /*!< in: cursor view to be closed */
+/*********************************************************************//**
This function sets a given consistent cursor view to a transaction
read view if given consistent cursor view is not NULL. Otherwise, function
restores a global read view to a transaction read view. */
-
+UNIV_INTERN
void
read_cursor_set_for_mysql(
/*======================*/
- trx_t* trx, /* in: transaction where cursor is set */
- cursor_view_t* curview);/* in: consistent cursor view to be set */
+ trx_t* trx, /*!< in: transaction where cursor is set */
+ cursor_view_t* curview);/*!< in: consistent cursor view to be set */
-/* Read view lists the trx ids of those transactions for which a consistent
+/** Read view lists the trx ids of those transactions for which a consistent
read should not see the modifications to the database. */
struct read_view_struct{
- ulint type; /* VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
- dulint undo_no; /* (0, 0) or if type is VIEW_HIGH_GRANULARITY
+ ulint type; /*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
+ undo_no_t undo_no;/*!< ut_dulint_zero or if type is
+ VIEW_HIGH_GRANULARITY
transaction undo_no when this high-granularity
consistent read view was created */
- dulint low_limit_no; /* The view does not need to see the undo
+ trx_id_t low_limit_no;
+ /*!< The view does not need to see the undo
logs for transactions whose transaction number
is strictly smaller (<) than this value: they
can be removed in purge if not needed by other
views */
- dulint low_limit_id; /* The read should not see any transaction
- with trx id >= this value */
- dulint up_limit_id; /* The read should see all trx ids which
- are strictly smaller (<) than this value */
- ulint n_trx_ids; /* Number of cells in the trx_ids array */
- dulint* trx_ids; /* Additional trx ids which the read should
+ trx_id_t low_limit_id;
+ /*!< The read should not see any transaction
+ with trx id >= this value. In other words,
+ this is the "high water mark". */
+ trx_id_t up_limit_id;
+ /*!< The read should see all trx ids which
+ are strictly smaller (<) than this value.
+ In other words,
+ this is the "low water mark". */
+ ulint n_trx_ids;
+ /*!< Number of cells in the trx_ids array */
+ trx_id_t* trx_ids;/*!< Additional trx ids which the read should
not see: typically, these are the active
transactions at the time when the read is
serialized, except the reading transaction
itself; the trx ids in this array are in a
- descending order */
- dulint creator_trx_id; /* trx id of creating transaction, or
- (0, 0) used in purge */
+ descending order. These trx_ids should be
+ between the "low" and "high" water marks,
+ that is, up_limit_id and low_limit_id. */
+ trx_id_t creator_trx_id;
+ /*!< trx id of creating transaction, or
+ ut_dulint_zero used in purge */
UT_LIST_NODE_T(read_view_t) view_list;
- /* List of read views in trx_sys */
+ /*!< List of read views in trx_sys */
};
-/* Read view types */
-#define VIEW_NORMAL 1 /* Normal consistent read view
+/** Read view types @{ */
+#define VIEW_NORMAL 1 /*!< Normal consistent read view
where transaction does not see changes
made by active transactions except
creating transaction. */
-#define VIEW_HIGH_GRANULARITY 2 /* High-granularity read view where
+#define VIEW_HIGH_GRANULARITY 2 /*!< High-granularity read view where
transaction does not see changes
made by active transactions and own
changes after a point in time when this
read view was created. */
+/* @} */
-/* Implement InnoDB framework to support consistent read views in
+/** Implement InnoDB framework to support consistent read views in
cursors. This struct holds both heap where consistent read view
is allocated and pointer to a read view. */
struct cursor_view_struct{
mem_heap_t* heap;
- /* Memory heap for the cursor view */
+ /*!< Memory heap for the cursor view */
read_view_t* read_view;
- /* Consistent read view of the cursor*/
+ /*!< Consistent read view of the cursor*/
ulint n_mysql_tables_in_use;
- /* number of Innobase tables used in the
+ /*!< number of Innobase tables used in the
processing of this cursor */
};
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
index 3aded1ca07c..9924967cc2d 100644
--- a/storage/innobase/include/read0read.ic
+++ b/storage/innobase/include/read0read.ic
@@ -1,50 +1,67 @@
-/******************************************************
-Cursor read
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0read.ic
+Cursor read
Created 2/16/1997 Heikki Tuuri
*******************************************************/
-/*************************************************************************
-Gets the nth trx id in a read view. */
+/*********************************************************************//**
+Gets the nth trx id in a read view.
+@return trx id */
UNIV_INLINE
-dulint
+trx_id_t
read_view_get_nth_trx_id(
/*=====================*/
- /* out: trx id */
- read_view_t* view, /* in: read view */
- ulint n) /* in: position */
+ const read_view_t* view, /*!< in: read view */
+ ulint n) /*!< in: position */
{
ut_ad(n < view->n_trx_ids);
return(*(view->trx_ids + n));
}
-/*************************************************************************
+/*********************************************************************//**
Sets the nth trx id in a read view. */
UNIV_INLINE
void
read_view_set_nth_trx_id(
/*=====================*/
- read_view_t* view, /* in: read view */
- ulint n, /* in: position */
- dulint trx_id) /* in: trx id to set */
+ read_view_t* view, /*!< in: read view */
+ ulint n, /*!< in: position */
+ trx_id_t trx_id) /*!< in: trx id to set */
{
ut_ad(n < view->n_trx_ids);
*(view->trx_ids + n) = trx_id;
}
-/*************************************************************************
-Checks if a read view sees the specified transaction. */
+/*********************************************************************//**
+Checks if a read view sees the specified transaction.
+@return TRUE if sees */
UNIV_INLINE
ibool
read_view_sees_trx_id(
/*==================*/
- /* out: TRUE if sees */
- read_view_t* view, /* in: read view */
- dulint trx_id) /* in: trx id */
+ const read_view_t* view, /*!< in: read view */
+ trx_id_t trx_id) /*!< in: trx id */
{
ulint n_ids;
int cmp;
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index 7d42728523e..caf69e3fb51 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Cursor read
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0types.h
+Cursor read
Created 2/16/1997 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
index c6a6e5de4db..072f74267ea 100644
--- a/storage/innobase/include/rem0cmp.h
+++ b/storage/innobase/include/rem0cmp.h
@@ -1,7 +1,24 @@
-/***********************************************************************
-Comparison services for records
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-2001 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/rem0cmp.h
+Comparison services for records
Created 7/1/1994 Heikki Tuuri
************************************************************************/
@@ -15,155 +32,159 @@ Created 7/1/1994 Heikki Tuuri
#include "dict0dict.h"
#include "rem0rec.h"
-/*****************************************************************
-Returns TRUE if two columns are equal for comparison purposes. */
-
+/*************************************************************//**
+Returns TRUE if two columns are equal for comparison purposes.
+@return TRUE if the columns are considered equal in comparisons */
+UNIV_INTERN
ibool
cmp_cols_are_equal(
/*===============*/
- /* out: TRUE if the columns are
- considered equal in comparisons */
- const dict_col_t* col1, /* in: column 1 */
- const dict_col_t* col2, /* in: column 2 */
+ const dict_col_t* col1, /*!< in: column 1 */
+ const dict_col_t* col2, /*!< in: column 2 */
ibool check_charsets);
- /* in: whether to check charsets */
-/*****************************************************************
+ /*!< in: whether to check charsets */
+/*************************************************************//**
This function is used to compare two data fields for which we know the
-data type. */
+data type.
+@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
UNIV_INLINE
int
cmp_data_data(
/*==========*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- byte* data1, /* in: data field (== a pointer to a memory
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ const byte* data1, /*!< in: data field (== a pointer to a memory
buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
+ ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
+ const byte* data2, /*!< in: data field (== a pointer to a memory
buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
+ ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
+/*************************************************************//**
This function is used to compare two data fields for which we know the
-data type. */
-
+data type.
+@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
int
cmp_data_data_slow(
/*===============*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- byte* data1, /* in: data field (== a pointer to a memory
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ const byte* data1, /*!< in: data field (== a pointer to a memory
buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
+ ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
+ const byte* data2, /*!< in: data field (== a pointer to a memory
buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
+ ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
+/*************************************************************//**
This function is used to compare two dfields where at least the first
-has its data type field set. */
+has its data type field set.
+@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
+respectively */
UNIV_INLINE
int
cmp_dfield_dfield(
/*==============*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2);/* in: data field */
-/*****************************************************************
+ const dfield_t* dfield1,/*!< in: data field; must have type field set */
+ const dfield_t* dfield2);/*!< in: data field */
+/*************************************************************//**
This function is used to compare a data tuple to a physical record.
Only dtuple->n_fields_cmp first fields are taken into account for
-the the data tuple! If we denote by n = n_fields_cmp, then rec must
+the data tuple! If we denote by n = n_fields_cmp, then rec must
have either m >= n fields, or it must differ from dtuple in some of
the m fields rec has. If rec has an externally stored field we do not
compare it but return with value 0 if such a comparison should be
-made. */
-
+made.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared, or until
+the first externally stored field in rec */
+UNIV_INTERN
int
cmp_dtuple_rec_with_match(
/*======================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared, or
- until the first externally stored field in
- rec */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record which differs from
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record which differs from
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields, /* in/out: number of already completely
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
- ulint* matched_bytes); /* in/out: number of already matched
+ ulint* matched_bytes); /*!< in/out: number of already matched
bytes within the first field not completely
matched; when function returns, contains the
value for current comparison */
-/******************************************************************
-Compares a data tuple to a physical record. */
-
+/**************************************************************//**
+Compares a data tuple to a physical record.
+@see cmp_dtuple_rec_with_match
+@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
+UNIV_INTERN
int
cmp_dtuple_rec(
/*===========*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively; see the comments
- for cmp_dtuple_rec_with_match */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/******************************************************************
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/**************************************************************//**
Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record. */
-
+is allowed to be a prefix of the corresponding field in the record.
+@return TRUE if prefix */
+UNIV_INTERN
ibool
cmp_dtuple_is_prefix_of_rec(
/*========================*/
- /* out: TRUE if prefix */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*****************************************************************
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/*************************************************************//**
+Compare two physical records that contain the same number of columns,
+none of which are stored externally.
+@return 1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
+UNIV_INTERN
+int
+cmp_rec_rec_simple(
+/*===============*/
+ const rec_t* rec1, /*!< in: physical record */
+ const rec_t* rec2, /*!< in: physical record */
+ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+ const dict_index_t* index); /*!< in: data dictionary index */
+/*************************************************************//**
This function is used to compare two physical records. Only the common
first fields are compared, and if an externally stored field is
-encountered, then 0 is returned. */
-
+encountered, then 0 is returned.
+@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
+UNIV_INTERN
int
cmp_rec_rec_with_match(
/*===================*/
- /* out: 1, 0 , -1 if rec1 is greater, equal,
- less, respectively, than rec2; only the common
- first fields are compared */
- rec_t* rec1, /* in: physical record */
- rec_t* rec2, /* in: physical record */
- const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /* in: data dictionary index */
- ulint* matched_fields, /* in/out: number of already completely
+ const rec_t* rec1, /*!< in: physical record */
+ const rec_t* rec2, /*!< in: physical record */
+ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
+ dict_index_t* index, /*!< in: data dictionary index */
+ ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when the function returns,
contains the value the for current
comparison */
- ulint* matched_bytes);/* in/out: number of already matched
+ ulint* matched_bytes);/*!< in/out: number of already matched
bytes within the first field not completely
matched; when the function returns, contains
the value for the current comparison */
-/*****************************************************************
+/*************************************************************//**
This function is used to compare two physical records. Only the common
-first fields are compared. */
+first fields are compared.
+@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
+rec2; only the common first fields are compared */
UNIV_INLINE
int
cmp_rec_rec(
/*========*/
- /* out: 1, 0 , -1 if rec1 is greater, equal,
- less, respectively, than rec2; only the common
- first fields are compared */
- rec_t* rec1, /* in: physical record */
- rec_t* rec2, /* in: physical record */
- const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
- dict_index_t* index); /* in: data dictionary index */
+ const rec_t* rec1, /*!< in: physical record */
+ const rec_t* rec2, /*!< in: physical record */
+ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
+ dict_index_t* index); /*!< in: data dictionary index */
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
index 52dc7ff5dc9..39ef5f4fba3 100644
--- a/storage/innobase/include/rem0cmp.ic
+++ b/storage/innobase/include/rem0cmp.ic
@@ -1,43 +1,59 @@
-/***********************************************************************
-Comparison services for records
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/rem0cmp.ic
+Comparison services for records
Created 7/1/1994 Heikki Tuuri
************************************************************************/
-/*****************************************************************
+/*************************************************************//**
This function is used to compare two data fields for which we know the
-data type. */
+data type.
+@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
UNIV_INLINE
int
cmp_data_data(
/*==========*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- byte* data1, /* in: data field (== a pointer to a memory
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ const byte* data1, /*!< in: data field (== a pointer to a memory
buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
+ ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
+ const byte* data2, /*!< in: data field (== a pointer to a memory
buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
+ ulint len2) /*!< in: data field length or UNIV_SQL_NULL */
{
return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
}
-/*****************************************************************
+/*************************************************************//**
This function is used to compare two dfields where at least the first
-has its data type field set. */
+has its data type field set.
+@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
+respectively */
UNIV_INLINE
int
cmp_dfield_dfield(
/*==============*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2)/* in: data field */
+ const dfield_t* dfield1,/*!< in: data field; must have type field set */
+ const dfield_t* dfield2)/*!< in: data field */
{
const dtype_t* type;
@@ -46,27 +62,26 @@ cmp_dfield_dfield(
type = dfield_get_type(dfield1);
return(cmp_data_data(type->mtype, type->prtype,
- dfield_get_data(dfield1),
+ (const byte*) dfield_get_data(dfield1),
dfield_get_len(dfield1),
- dfield_get_data(dfield2),
+ (const byte*) dfield_get_data(dfield2),
dfield_get_len(dfield2)));
}
-/*****************************************************************
+/*************************************************************//**
This function is used to compare two physical records. Only the common
-first fields are compared. */
+first fields are compared.
+@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
+rec2; only the common first fields are compared */
UNIV_INLINE
int
cmp_rec_rec(
/*========*/
- /* out: 1, 0 , -1 if rec1 is greater, equal,
- less, respectively, than rec2; only the common
- first fields are compared */
- rec_t* rec1, /* in: physical record */
- rec_t* rec2, /* in: physical record */
- const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
- dict_index_t* index) /* in: data dictionary index */
+ const rec_t* rec1, /*!< in: physical record */
+ const rec_t* rec2, /*!< in: physical record */
+ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
+ dict_index_t* index) /*!< in: data dictionary index */
{
ulint match_f = 0;
ulint match_b = 0;
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index abc204bb583..17d08afabb9 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -1,7 +1,24 @@
-/************************************************************************
-Record manager
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0rec.h
+Record manager
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
@@ -13,12 +30,16 @@ Created 5/30/1994 Heikki Tuuri
#include "data0data.h"
#include "rem0types.h"
#include "mtr0types.h"
+#include "page0types.h"
/* Info bit denoting the predefined minimum record: this bit is set
if and only if the record is the first user record on a non-leaf
B-tree page that is the leftmost page on its level
(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
#define REC_INFO_MIN_REC_FLAG 0x10UL
+/* The deleted flag in info bits */
+#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
+ record has been delete marked */
/* Number of extra bytes in an old-style record,
in addition to the data and the offsets */
@@ -33,336 +54,469 @@ in addition to the data and the offsets */
#define REC_STATUS_INFIMUM 2
#define REC_STATUS_SUPREMUM 3
+/* The following four constants are needed in page0zip.c in order to
+efficiently compress and decompress pages. */
+
+/* The offset of heap_no in a compact record */
+#define REC_NEW_HEAP_NO 4
+/* The shift of heap_no in a compact record.
+The status is stored in the low-order bits. */
+#define REC_HEAP_NO_SHIFT 3
+
+/* Length of a B-tree node pointer, in bytes */
+#define REC_NODE_PTR_SIZE 4
+
+#ifdef UNIV_DEBUG
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE 4
+#else /* UNIV_DEBUG */
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE 2
+#endif /* UNIV_DEBUG */
+
/* Number of elements that should be initially allocated for the
offsets[] array, first passed to rec_get_offsets() */
#define REC_OFFS_NORMAL_SIZE 100
#define REC_OFFS_SMALL_SIZE 10
-/**********************************************************
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+const rec_t*
+rec_get_next_ptr_const(
+/*===================*/
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp); /*!< in: nonzero=compact page format */
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+rec_t*
+rec_get_next_ptr(
+/*=============*/
+ rec_t* rec, /*!< in: physical record */
+ ulint comp); /*!< in: nonzero=compact page format */
+/******************************************************//**
The following function is used to get the offset of the
-next chained record on the same page. */
+next chained record on the same page.
+@return the page offset of the next chained record, or 0 if none */
UNIV_INLINE
ulint
rec_get_next_offs(
/*==============*/
- /* out: the page offset of the next
- chained record */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp); /*!< in: nonzero=compact page format */
+/******************************************************//**
The following function is used to set the next record offset field
-of the record. */
+of an old-style record. */
UNIV_INLINE
void
-rec_set_next_offs(
-/*==============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint next); /* in: offset of the next record */
-/**********************************************************
+rec_set_next_offs_old(
+/*==================*/
+ rec_t* rec, /*!< in: old-style physical record */
+ ulint next); /*!< in: offset of the next record */
+/******************************************************//**
+The following function is used to set the next record offset field
+of a new-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_new(
+/*==================*/
+ rec_t* rec, /*!< in/out: new-style physical record */
+ ulint next); /*!< in: offset of the next record */
+/******************************************************//**
The following function is used to get the number of fields
-in an old-style record. */
+in an old-style record.
+@return number of data fields */
UNIV_INLINE
ulint
rec_get_n_fields_old(
/*=================*/
- /* out: number of data fields */
- rec_t* rec); /* in: physical record */
-/**********************************************************
+ const rec_t* rec); /*!< in: physical record */
+/******************************************************//**
The following function is used to get the number of fields
-in a record. */
+in a record.
+@return number of data fields */
UNIV_INLINE
ulint
rec_get_n_fields(
/*=============*/
- /* out: number of data fields */
- rec_t* rec, /* in: physical record */
- dict_index_t* index); /* in: record descriptor */
-/**********************************************************
-The following function is used to get the number of records
-owned by the previous directory record. */
-UNIV_INLINE
-ulint
-rec_get_n_owned(
-/*============*/
- /* out: number of owned records */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the number of owned
-records. */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index); /*!< in: record descriptor */
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_old(
+/*================*/
+ const rec_t* rec); /*!< in: old-style physical record */
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_old(
+/*================*/
+ rec_t* rec, /*!< in: old-style physical record */
+ ulint n_owned); /*!< in: the number of owned */
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_new(
+/*================*/
+ const rec_t* rec); /*!< in: new-style physical record */
+/******************************************************//**
+The following function is used to set the number of owned records. */
UNIV_INLINE
void
-rec_set_n_owned(
-/*============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint n_owned); /* in: the number of owned */
-/**********************************************************
+rec_set_n_owned_new(
+/*================*/
+ rec_t* rec, /*!< in/out: new-style physical record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint n_owned);/*!< in: the number of owned */
+/******************************************************//**
The following function is used to retrieve the info bits of
-a record. */
+a record.
+@return info bits */
UNIV_INLINE
ulint
rec_get_info_bits(
/*==============*/
- /* out: info bits */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp); /*!< in: nonzero=compact page format */
+/******************************************************//**
The following function is used to set the info bits of a record. */
UNIV_INLINE
void
-rec_set_info_bits(
-/*==============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint bits); /* in: info bits */
-/**********************************************************
-The following function retrieves the status bits of a new-style record. */
+rec_set_info_bits_old(
+/*==================*/
+ rec_t* rec, /*!< in: old-style physical record */
+ ulint bits); /*!< in: info bits */
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_new(
+/*==================*/
+ rec_t* rec, /*!< in/out: new-style physical record */
+ ulint bits); /*!< in: info bits */
+/******************************************************//**
+The following function retrieves the status bits of a new-style record.
+@return status bits */
UNIV_INLINE
ulint
rec_get_status(
/*===========*/
- /* out: status bits */
- rec_t* rec); /* in: physical record */
+ const rec_t* rec); /*!< in: physical record */
-/**********************************************************
+/******************************************************//**
The following function is used to set the status bits of a new-style record. */
UNIV_INLINE
void
rec_set_status(
/*===========*/
- rec_t* rec, /* in: physical record */
- ulint bits); /* in: info bits */
+ rec_t* rec, /*!< in/out: physical record */
+ ulint bits); /*!< in: info bits */
-/**********************************************************
+/******************************************************//**
The following function is used to retrieve the info and status
-bits of a record. (Only compact records have status bits.) */
+bits of a record. (Only compact records have status bits.)
+@return info bits */
UNIV_INLINE
ulint
rec_get_info_and_status_bits(
/*=========================*/
- /* out: info bits */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp); /*!< in: nonzero=compact page format */
+/******************************************************//**
The following function is used to set the info and status
bits of a record. (Only compact records have status bits.) */
UNIV_INLINE
void
rec_set_info_and_status_bits(
/*=========================*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint bits); /* in: info bits */
+ rec_t* rec, /*!< in/out: compact physical record */
+ ulint bits); /*!< in: info bits */
-/**********************************************************
-The following function tells if record is delete marked. */
+/******************************************************//**
+The following function tells if record is delete marked.
+@return nonzero if delete marked */
UNIV_INLINE
ulint
rec_get_deleted_flag(
/*=================*/
- /* out: nonzero if delete marked */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp); /*!< in: nonzero=compact page format */
+/******************************************************//**
The following function is used to set the deleted bit. */
UNIV_INLINE
void
-rec_set_deleted_flag(
-/*=================*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint flag); /* in: nonzero if delete marked */
-/**********************************************************
-The following function tells if a new-style record is a node pointer. */
+rec_set_deleted_flag_old(
+/*=====================*/
+ rec_t* rec, /*!< in: old-style physical record */
+ ulint flag); /*!< in: nonzero if delete marked */
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_new(
+/*=====================*/
+ rec_t* rec, /*!< in/out: new-style physical record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint flag); /*!< in: nonzero if delete marked */
+/******************************************************//**
+The following function tells if a new-style record is a node pointer.
+@return TRUE if node pointer */
UNIV_INLINE
ibool
rec_get_node_ptr_flag(
/*==================*/
- /* out: TRUE if node pointer */
- rec_t* rec); /* in: physical record */
-/**********************************************************
+ const rec_t* rec); /*!< in: physical record */
+/******************************************************//**
The following function is used to get the order number
-of the record in the heap of the index page. */
+of an old-style record in the heap of the index page.
+@return heap order number */
UNIV_INLINE
ulint
-rec_get_heap_no(
-/*============*/
- /* out: heap order number */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
+rec_get_heap_no_old(
+/*================*/
+ const rec_t* rec); /*!< in: physical record */
+/******************************************************//**
The following function is used to set the heap number
-field in the record. */
+field in an old-style record. */
UNIV_INLINE
void
-rec_set_heap_no(
-/*============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint heap_no);/* in: the heap number */
-/**********************************************************
+rec_set_heap_no_old(
+/*================*/
+ rec_t* rec, /*!< in: physical record */
+ ulint heap_no);/*!< in: the heap number */
+/******************************************************//**
+The following function is used to get the order number
+of a new-style record in the heap of the index page.
+@return heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_new(
+/*================*/
+ const rec_t* rec); /*!< in: physical record */
+/******************************************************//**
+The following function is used to set the heap number
+field in a new-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_new(
+/*================*/
+ rec_t* rec, /*!< in/out: physical record */
+ ulint heap_no);/*!< in: the heap number */
+/******************************************************//**
The following function is used to test whether the data offsets
-in the record are stored in one-byte or two-byte format. */
+in the record are stored in one-byte or two-byte format.
+@return TRUE if 1-byte form */
UNIV_INLINE
ibool
rec_get_1byte_offs_flag(
/*====================*/
- /* out: TRUE if 1-byte form */
- rec_t* rec); /* in: physical record */
-/**********************************************************
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array. */
+ const rec_t* rec); /*!< in: physical record */
+/******************************************************//**
+Determine how many of the first n columns in a compact
+physical record are stored externally.
+@return number of externally stored columns */
+UNIV_INTERN
+ulint
+rec_get_n_extern_new(
+/*=================*/
+ const rec_t* rec, /*!< in: compact physical record */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint n); /*!< in: number of columns to scan */
+
+/******************************************************//**
+The following function determines the offsets to each field
+in the record. It can reuse a previously allocated array.
+@return the new offsets */
+UNIV_INTERN
ulint*
rec_get_offsets_func(
/*=================*/
- /* out: the new offsets */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets,/* in: array consisting of offsets[0]
- allocated elements, or an array from
- rec_get_offsets(), or NULL */
- ulint n_fields,/* in: maximum number of initialized fields
- (ULINT_UNDEFINED if all fields) */
- mem_heap_t** heap, /* in/out: memory heap */
- const char* file, /* in: file name where called */
- ulint line); /* in: line number where called */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets,/*!< in/out: array consisting of
+ offsets[0] allocated elements,
+ or an array from rec_get_offsets(),
+ or NULL */
+ ulint n_fields,/*!< in: maximum number of
+ initialized fields
+ (ULINT_UNDEFINED if all fields) */
+ mem_heap_t** heap, /*!< in/out: memory heap */
+ const char* file, /*!< in: file name where called */
+ ulint line); /*!< in: line number where called */
#define rec_get_offsets(rec,index,offsets,n,heap) \
rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
-/****************************************************************
-Validates offsets returned by rec_get_offsets(). */
+/******************************************************//**
+Determine the offset to each field in a leaf-page record
+in ROW_FORMAT=COMPACT. This is a special case of
+rec_init_offsets() and rec_get_offsets_func(). */
+UNIV_INTERN
+void
+rec_init_offsets_comp_ordinary(
+/*===========================*/
+ const rec_t* rec, /*!< in: physical record in
+ ROW_FORMAT=COMPACT */
+ ulint extra, /*!< in: number of bytes to reserve
+ between the record header and
+ the data payload
+ (usually REC_N_NEW_EXTRA_BYTES) */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets);/*!< in/out: array of offsets;
+ in: n=rec_offs_n_fields(offsets) */
+
+/******************************************************//**
+The following function determines the offsets to each field
+in the record. It can reuse a previously allocated array. */
+UNIV_INTERN
+void
+rec_get_offsets_reverse(
+/*====================*/
+ const byte* extra, /*!< in: the extra bytes of a
+ compact record in reverse order,
+ excluding the fixed-size
+ REC_N_NEW_EXTRA_BYTES */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint node_ptr,/*!< in: nonzero=node pointer,
+ 0=leaf node */
+ ulint* offsets);/*!< in/out: array consisting of
+ offsets[0] allocated elements */
+
+/************************************************************//**
+Validates offsets returned by rec_get_offsets().
+@return TRUE if valid */
UNIV_INLINE
ibool
rec_offs_validate(
/*==============*/
- /* out: TRUE if valid */
- rec_t* rec, /* in: record or NULL */
- dict_index_t* index, /* in: record descriptor or NULL */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/****************************************************************
+ const rec_t* rec, /*!< in: record or NULL */
+ const dict_index_t* index, /*!< in: record descriptor or NULL */
+ const ulint* offsets);/*!< in: array returned by
+ rec_get_offsets() */
+#ifdef UNIV_DEBUG
+/************************************************************//**
Updates debug data in offsets, in order to avoid bogus
rec_offs_validate() failures. */
UNIV_INLINE
void
rec_offs_make_valid(
/*================*/
- rec_t* rec, /* in: record */
- dict_index_t* index,/* in: record descriptor */
- ulint* offsets);/* in: array returned by rec_get_offsets() */
-
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in an old-style record. */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets);/*!< in: array returned by
+ rec_get_offsets() */
+#else
+# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
+#endif /* UNIV_DEBUG */
-byte*
-rec_get_nth_field_old(
-/*==================*/
- /* out: pointer to the field */
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- ulint* len); /* out: length of the field; UNIV_SQL_NULL
- if SQL null */
-/****************************************************************
+/************************************************************//**
+The following function is used to get the offset to the nth
+data field in an old-style record.
+@return offset to the field */
+UNIV_INTERN
+ulint
+rec_get_nth_field_offs_old(
+/*=======================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n, /*!< in: index of the field */
+ ulint* len); /*!< out: length of the field; UNIV_SQL_NULL
+ if SQL null */
+#define rec_get_nth_field_old(rec, n, len) \
+((rec) + rec_get_nth_field_offs_old(rec, n, len))
+/************************************************************//**
Gets the physical size of an old-style field.
Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size. */
+if the data type is of a fixed size.
+@return field size in bytes */
UNIV_INLINE
ulint
rec_get_nth_field_size(
/*===================*/
- /* out: field size in bytes */
- rec_t* rec, /* in: record */
- ulint n); /* in: index of the field */
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in a record. */
+ const rec_t* rec, /*!< in: record */
+ ulint n); /*!< in: index of the field */
+/************************************************************//**
+The following function is used to get an offset to the nth
+data field in a record.
+@return offset from the origin of rec */
UNIV_INLINE
-byte*
-rec_get_nth_field(
-/*==============*/
- /* out: pointer to the field */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n, /* in: index of the field */
- ulint* len); /* out: length of the field; UNIV_SQL_NULL
+ulint
+rec_get_nth_field_offs(
+/*===================*/
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n, /*!< in: index of the field */
+ ulint* len); /*!< out: length of the field; UNIV_SQL_NULL
if SQL null */
-/**********************************************************
+#define rec_get_nth_field(rec, offsets, n, len) \
+((rec) + rec_get_nth_field_offs(offsets, n, len))
+/******************************************************//**
Determine if the offsets are for a record in the new
-compact format. */
+compact format.
+@return nonzero if compact format */
UNIV_INLINE
ulint
rec_offs_comp(
/*==========*/
- /* out: nonzero if compact format */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**********************************************************
-Returns nonzero if the extern bit is set in nth field of rec. */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/******************************************************//**
+Determine if the offsets are for a record containing
+externally stored columns.
+@return nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_any_extern(
+/*================*/
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/******************************************************//**
+Returns nonzero if the extern bit is set in nth field of rec.
+@return nonzero if externally stored */
UNIV_INLINE
ulint
rec_offs_nth_extern(
/*================*/
- /* out: nonzero if externally stored */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n); /* in: nth field */
-/**********************************************************
-Returns nonzero if the SQL NULL bit is set in nth field of rec. */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n); /*!< in: nth field */
+/******************************************************//**
+Returns nonzero if the SQL NULL bit is set in nth field of rec.
+@return nonzero if SQL NULL */
UNIV_INLINE
ulint
rec_offs_nth_sql_null(
/*==================*/
- /* out: nonzero if SQL NULL */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n); /* in: nth field */
-/**********************************************************
-Gets the physical size of a field. */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n); /*!< in: nth field */
+/******************************************************//**
+Gets the physical size of a field.
+@return length of field */
UNIV_INLINE
ulint
rec_offs_nth_size(
/*==============*/
- /* out: length of field */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n); /* in: nth field */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n); /*!< in: nth field */
-/**********************************************************
-Returns TRUE if the extern bit is set in any of the fields
-of rec. */
-UNIV_INLINE
-ibool
-rec_offs_any_extern(
-/*================*/
- /* out: TRUE if a field is stored externally */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/***************************************************************
-Sets the value of the ith field extern storage bit. */
+/******************************************************//**
+Returns the number of extern bits set in a record.
+@return number of externally stored fields */
UNIV_INLINE
-void
-rec_set_nth_field_extern_bit(
-/*=========================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-/***************************************************************
-Sets TRUE the extern storage bits of fields mentioned in an array. */
-
-void
-rec_set_field_extern_bits(
-/*======================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- const ulint* vec, /* in: array of field numbers */
- ulint n_fields,/* in: number of fields numbers */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-/***************************************************************
+ulint
+rec_offs_n_extern(
+/*==============*/
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/***********************************************************//**
This is used to modify the value of an already existing field in a record.
The previous value must have exactly the same size as the new value. If len
is UNIV_SQL_NULL then the field is treated as an SQL null.
@@ -372,197 +526,285 @@ UNIV_INLINE
void
rec_set_nth_field(
/*==============*/
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n, /* in: index number of the field */
- const void* data, /* in: pointer to the data if not SQL null */
- ulint len); /* in: length of the data or UNIV_SQL_NULL */
-/**************************************************************
+ rec_t* rec, /*!< in: record */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n, /*!< in: index number of the field */
+ const void* data, /*!< in: pointer to the data if not SQL null */
+ ulint len); /*!< in: length of the data or UNIV_SQL_NULL */
+/**********************************************************//**
The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
+is the distance from record origin to record end in bytes.
+@return size */
UNIV_INLINE
ulint
rec_get_data_size_old(
/*==================*/
- /* out: size */
- rec_t* rec); /* in: physical record */
-/**************************************************************
-The following function returns the number of fields in a record. */
+ const rec_t* rec); /*!< in: physical record */
+/**********************************************************//**
+The following function returns the number of allocated elements
+for an array of offsets.
+@return number of elements */
+UNIV_INLINE
+ulint
+rec_offs_get_n_alloc(
+/*=================*/
+ const ulint* offsets);/*!< in: array for rec_get_offsets() */
+/**********************************************************//**
+The following function sets the number of allocated elements
+for an array of offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_alloc(
+/*=================*/
+ ulint* offsets, /*!< out: array for rec_get_offsets(),
+ must be allocated */
+ ulint n_alloc); /*!< in: number of elements */
+#define rec_offs_init(offsets) \
+ rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
+/**********************************************************//**
+The following function returns the number of fields in a record.
+@return number of fields */
UNIV_INLINE
ulint
rec_offs_n_fields(
/*==============*/
- /* out: number of fields */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
The following function returns the data size of a physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
+is the distance from record origin to record end in bytes.
+@return size */
UNIV_INLINE
ulint
rec_offs_data_size(
/*===============*/
- /* out: size */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
Returns the total size of record minus data size of record.
The value returned by the function is the distance from record
-start to record origin in bytes. */
+start to record origin in bytes.
+@return size */
UNIV_INLINE
ulint
rec_offs_extra_size(
/*================*/
- /* out: size */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns the total size of a physical record. */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Returns the total size of a physical record.
+@return size */
UNIV_INLINE
ulint
rec_offs_size(
/*==========*/
- /* out: size */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns a pointer to the start of the record. */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Returns a pointer to the start of the record.
+@return pointer to start */
UNIV_INLINE
byte*
rec_get_start(
/*==========*/
- /* out: pointer to start */
- rec_t* rec, /* in: pointer to record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns a pointer to the end of the record. */
+ rec_t* rec, /*!< in: pointer to record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/**********************************************************//**
+Returns a pointer to the end of the record.
+@return pointer to end */
UNIV_INLINE
byte*
rec_get_end(
/*========*/
- /* out: pointer to end */
- rec_t* rec, /* in: pointer to record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Copies a physical record to a buffer. */
+ rec_t* rec, /*!< in: pointer to record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Copies a physical record to a buffer.
+@return pointer to the origin of the copy */
UNIV_INLINE
rec_t*
rec_copy(
/*=====*/
- /* out: pointer to the origin of the copy */
- void* buf, /* in: buffer */
- const rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/******************************************************************
+ void* buf, /*!< in: buffer */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
Copies the first n fields of a physical record to a new physical record in
-a buffer. */
-
+a buffer.
+@return own: copied record */
+UNIV_INTERN
rec_t*
rec_copy_prefix_to_buf(
/*===================*/
- /* out, own: copied record */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint n_fields, /* in: number of fields to copy */
- byte** buf, /* in/out: memory buffer
- for the copied prefix, or NULL */
- ulint* buf_size); /* in/out: buffer size */
-/****************************************************************
-Folds a prefix of a physical record to a ulint. */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint n_fields, /*!< in: number of fields
+ to copy */
+ byte** buf, /*!< in/out: memory buffer
+ for the copied prefix,
+ or NULL */
+ ulint* buf_size); /*!< in/out: buffer size */
+/************************************************************//**
+Folds a prefix of a physical record to a ulint.
+@return the folded value */
UNIV_INLINE
ulint
rec_fold(
/*=====*/
- /* out: the folded value */
- rec_t* rec, /* in: the physical record */
- const ulint* offsets, /* in: array returned by
+ const rec_t* rec, /*!< in: the physical record */
+ const ulint* offsets, /*!< in: array returned by
rec_get_offsets() */
- ulint n_fields, /* in: number of complete
+ ulint n_fields, /*!< in: number of complete
fields to fold */
- ulint n_bytes, /* in: number of bytes to fold
+ ulint n_bytes, /*!< in: number of bytes to fold
in an incomplete last field */
- dulint tree_id); /* in: index tree id */
-/*************************************************************
-Builds a physical record out of a data tuple and stores it beginning from
-address destination. */
-
+ dulint tree_id) /*!< in: index tree id */
+ __attribute__((pure));
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************//**
+Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_rec_comp(
+/*===========================*/
+ rec_t* rec, /*!< in: origin of record */
+ ulint extra, /*!< in: number of bytes to
+ reserve between the record
+ header and the data payload
+ (normally REC_N_NEW_EXTRA_BYTES) */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint status, /*!< in: status bits of the record */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields);/*!< in: number of data fields */
+/*********************************************************//**
+Builds a physical record out of a data tuple and
+stores it into the given buffer.
+@return pointer to the origin of physical record */
+UNIV_INTERN
rec_t*
rec_convert_dtuple_to_rec(
/*======================*/
- /* out: pointer to the origin
- of physical record */
- byte* buf, /* in: start address of the
- physical record */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple);/* in: data tuple */
-/**************************************************************
+ byte* buf, /*!< in: start address of the
+ physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ ulint n_ext); /*!< in: number of
+ externally stored columns */
+/**********************************************************//**
Returns the extra size of an old-style physical record if we know its
-data size and number of fields. */
+data size and number of fields.
+@return extra size */
UNIV_INLINE
ulint
rec_get_converted_extra_size(
/*=========================*/
- /* out: extra size */
- ulint data_size, /* in: data size */
- ulint n_fields) /* in: number of fields */
+ ulint data_size, /*!< in: data size */
+ ulint n_fields, /*!< in: number of fields */
+ ulint n_ext) /*!< in: number of externally stored columns */
__attribute__((const));
-/**************************************************************
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+ const dict_index_t* index, /*!< in: record descriptor;
+ dict_table_is_comp() is
+ assumed to hold, even if
+ it does not */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra); /*!< out: extra size */
+/**********************************************************//**
+Determines the size of a data tuple in ROW_FORMAT=COMPACT.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp(
+/*========================*/
+ const dict_index_t* index, /*!< in: record descriptor;
+ dict_table_is_comp() is
+ assumed to hold, even if
+ it does not */
+ ulint status, /*!< in: status bits of the record */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra); /*!< out: extra size */
+/**********************************************************//**
The following function returns the size of a data tuple when converted to
-a physical record. */
+a physical record.
+@return size */
UNIV_INLINE
ulint
rec_get_converted_size(
/*===================*/
- /* out: size */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple);/* in: data tuple */
-/******************************************************************
+ dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ ulint n_ext); /*!< in: number of externally stored columns */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
Copies the first n fields of a physical record to a data tuple.
The fields are copied to the memory heap. */
-
+UNIV_INTERN
void
rec_copy_prefix_to_dtuple(
/*======================*/
- dtuple_t* tuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint n_fields, /* in: number of fields to copy */
- mem_heap_t* heap); /* in: memory heap */
-/*******************************************************************
-Validates the consistency of a physical record. */
-
+ dtuple_t* tuple, /*!< out: data tuple */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint n_fields, /*!< in: number of fields
+ to copy */
+ mem_heap_t* heap); /*!< in: memory heap */
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Validates the consistency of a physical record.
+@return TRUE if ok */
+UNIV_INTERN
ibool
rec_validate(
/*=========*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
Prints an old-style physical record. */
-
+UNIV_INTERN
void
rec_print_old(
/*==========*/
- FILE* file, /* in: file where to print */
- rec_t* rec); /* in: physical record */
-/*******************************************************************
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec); /*!< in: physical record */
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
+record header. */
+UNIV_INTERN
+void
+rec_print_comp(
+/*===========*/
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
Prints a physical record. */
-
+UNIV_INTERN
void
rec_print_new(
/*==========*/
- FILE* file, /* in: file where to print */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
Prints a physical record. */
-
+UNIV_INTERN
void
rec_print(
/*======*/
- FILE* file, /* in: file where to print */
- rec_t* rec, /* in: physical record */
- dict_index_t* index); /* in: record descriptor */
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec, /*!< in: physical record */
+ dict_index_t* index); /*!< in: record descriptor */
+#endif /* UNIV_HOTBACKUP */
#define REC_INFO_BITS 6 /* This is single byte bit-field */
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index d91fb4c4391..8e5bd9a7fcd 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,7 +1,24 @@
-/************************************************************************
-Record manager
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0rec.ic
+Record manager
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
@@ -48,7 +65,7 @@ most significant bytes and bits are written below less significant.
- offset_of_this_record) mod 64Ki,
where mod is the modulo as a non-negative
number;
- we can calculate the the offset of the next
+ we can calculate the offset of the next
record with the formula:
relative_offset + offset_of_this_record
mod UNIV_PAGE_SIZE
@@ -84,9 +101,11 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_NEW_STATUS_SHIFT 0
#define REC_OLD_HEAP_NO 5
-#define REC_NEW_HEAP_NO 4
#define REC_HEAP_NO_MASK 0xFFF8UL
+#if 0 /* defined in rem0rec.h for use of page0zip.c */
+#define REC_NEW_HEAP_NO 4
#define REC_HEAP_NO_SHIFT 3
+#endif
#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */
#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */
@@ -98,9 +117,6 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_INFO_BITS_MASK 0xF0UL
#define REC_INFO_BITS_SHIFT 0
-/* The deleted flag in info bits */
-#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
- record has been delete marked */
/* The following masks are used to filter the SQL null bit from
one-byte and two-byte offsets */
@@ -128,78 +144,52 @@ a field stored to another page: */
# error "sum of new-style masks != 0xFFFFFFUL"
#endif
-/***************************************************************
+/***********************************************************//**
Sets the value of the ith field SQL null bit of an old-style record. */
-
+UNIV_INTERN
void
rec_set_nth_field_null_bit(
/*=======================*/
- rec_t* rec, /* in: record */
- ulint i, /* in: ith field */
- ibool val); /* in: value to set */
-/***************************************************************
+ rec_t* rec, /*!< in: record */
+ ulint i, /*!< in: ith field */
+ ibool val); /*!< in: value to set */
+/***********************************************************//**
Sets an old-style record field to SQL null.
The physical size of the field is not changed. */
-
+UNIV_INTERN
void
rec_set_nth_field_sql_null(
/*=======================*/
- rec_t* rec, /* in: record */
- ulint n); /* in: index of the field */
-
-/***************************************************************
-Sets the value of the ith field extern storage bit of an old-style record. */
+ rec_t* rec, /*!< in: record */
+ ulint n); /*!< in: index of the field */
-void
-rec_set_nth_field_extern_bit_old(
-/*=============================*/
- rec_t* rec, /* in: old-style record */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page where
- rec is, or NULL; in the NULL case we do not
- write to log about the change */
-/***************************************************************
-Sets the value of the ith field extern storage bit of a new-style record. */
-
-void
-rec_set_nth_field_extern_bit_new(
-/*=============================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- ulint ith, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-
-/**********************************************************
+/******************************************************//**
Gets a bit field from within 1 byte. */
UNIV_INLINE
ulint
rec_get_bit_field_1(
/*================*/
- rec_t* rec, /* in: pointer to record origin */
- ulint offs, /* in: offset from the origin down */
- ulint mask, /* in: mask used to filter bits */
- ulint shift) /* in: shift right applied after masking */
+ const rec_t* rec, /*!< in: pointer to record origin */
+ ulint offs, /*!< in: offset from the origin down */
+ ulint mask, /*!< in: mask used to filter bits */
+ ulint shift) /*!< in: shift right applied after masking */
{
ut_ad(rec);
return((mach_read_from_1(rec - offs) & mask) >> shift);
}
-/**********************************************************
+/******************************************************//**
Sets a bit field within 1 byte. */
UNIV_INLINE
void
rec_set_bit_field_1(
/*================*/
- rec_t* rec, /* in: pointer to record origin */
- ulint val, /* in: value to set */
- ulint offs, /* in: offset from the origin down */
- ulint mask, /* in: mask used to filter bits */
- ulint shift) /* in: shift right applied after masking */
+ rec_t* rec, /*!< in: pointer to record origin */
+ ulint val, /*!< in: value to set */
+ ulint offs, /*!< in: offset from the origin down */
+ ulint mask, /*!< in: mask used to filter bits */
+ ulint shift) /*!< in: shift right applied after masking */
{
ut_ad(rec);
ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
@@ -213,33 +203,33 @@ rec_set_bit_field_1(
| (val << shift));
}
-/**********************************************************
+/******************************************************//**
Gets a bit field from within 2 bytes. */
UNIV_INLINE
ulint
rec_get_bit_field_2(
/*================*/
- rec_t* rec, /* in: pointer to record origin */
- ulint offs, /* in: offset from the origin down */
- ulint mask, /* in: mask used to filter bits */
- ulint shift) /* in: shift right applied after masking */
+ const rec_t* rec, /*!< in: pointer to record origin */
+ ulint offs, /*!< in: offset from the origin down */
+ ulint mask, /*!< in: mask used to filter bits */
+ ulint shift) /*!< in: shift right applied after masking */
{
ut_ad(rec);
return((mach_read_from_2(rec - offs) & mask) >> shift);
}
-/**********************************************************
+/******************************************************//**
Sets a bit field within 2 bytes. */
UNIV_INLINE
void
rec_set_bit_field_2(
/*================*/
- rec_t* rec, /* in: pointer to record origin */
- ulint val, /* in: value to set */
- ulint offs, /* in: offset from the origin down */
- ulint mask, /* in: mask used to filter bits */
- ulint shift) /* in: shift right applied after masking */
+ rec_t* rec, /*!< in: pointer to record origin */
+ ulint val, /*!< in: value to set */
+ ulint offs, /*!< in: offset from the origin down */
+ ulint mask, /*!< in: mask used to filter bits */
+ ulint shift) /*!< in: shift right applied after masking */
{
ut_ad(rec);
ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
@@ -255,17 +245,86 @@ rec_set_bit_field_2(
| (val << shift));
}
-/**********************************************************
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+const rec_t*
+rec_get_next_ptr_const(
+/*===================*/
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp) /*!< in: nonzero=compact page format */
+{
+ ulint field_value;
+
+ ut_ad(REC_NEXT_MASK == 0xFFFFUL);
+ ut_ad(REC_NEXT_SHIFT == 0);
+
+ field_value = mach_read_from_2(rec - REC_NEXT);
+
+ if (UNIV_UNLIKELY(field_value == 0)) {
+
+ return(NULL);
+ }
+
+ if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
+#if UNIV_PAGE_SIZE <= 32768
+ /* Note that for 64 KiB pages, field_value can 'wrap around'
+ and the debug assertion is not valid */
+
+ /* In the following assertion, field_value is interpreted
+ as signed 16-bit integer in 2's complement arithmetics.
+ If all platforms defined int16_t in the standard headers,
+ the expression could be written simpler as
+ (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+ */
+ ut_ad((field_value >= 32768
+ ? field_value - 65536
+ : field_value)
+ + ut_align_offset(rec, UNIV_PAGE_SIZE)
+ < UNIV_PAGE_SIZE);
+#endif
+ /* There must be at least REC_N_NEW_EXTRA_BYTES + 1
+ between each record. */
+ ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
+ && field_value < 32768)
+ || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
+
+ return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+ + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+ } else {
+ ut_ad(field_value < UNIV_PAGE_SIZE);
+
+ return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+ + field_value);
+ }
+}
+
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+rec_t*
+rec_get_next_ptr(
+/*=============*/
+ rec_t* rec, /*!< in: physical record */
+ ulint comp) /*!< in: nonzero=compact page format */
+{
+ return((rec_t*) rec_get_next_ptr_const(rec, comp));
+}
+
+/******************************************************//**
The following function is used to get the offset of the next chained record
-on the same page. */
+on the same page.
+@return the page offset of the next chained record, or 0 if none */
UNIV_INLINE
ulint
rec_get_next_offs(
/*==============*/
- /* out: the page offset of the next chained record, or
- 0 if none */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp) /*!< in: nonzero=compact page format */
{
ulint field_value;
#if REC_NEXT_MASK != 0xFFFFUL
@@ -277,7 +336,7 @@ rec_get_next_offs(
field_value = mach_read_from_2(rec - REC_NEXT);
- if (comp) {
+ if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
#if UNIV_PAGE_SIZE <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */
@@ -294,11 +353,17 @@ rec_get_next_offs(
+ ut_align_offset(rec, UNIV_PAGE_SIZE)
< UNIV_PAGE_SIZE);
#endif
- if (field_value == 0) {
+ if (UNIV_UNLIKELY(field_value == 0)) {
return(0);
}
+ /* There must be at least REC_N_NEW_EXTRA_BYTES + 1
+ between each record. */
+ ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
+ && field_value < 32768)
+ || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
+
return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
} else {
ut_ad(field_value < UNIV_PAGE_SIZE);
@@ -307,16 +372,15 @@ rec_get_next_offs(
}
}
-/**********************************************************
-The following function is used to set the next record offset field of the
-record. */
+/******************************************************//**
+The following function is used to set the next record offset field
+of an old-style record. */
UNIV_INLINE
void
-rec_set_next_offs(
-/*==============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint next) /* in: offset of the next record, or 0 if none */
+rec_set_next_offs_old(
+/*==================*/
+ rec_t* rec, /*!< in: old-style physical record */
+ ulint next) /*!< in: offset of the next record */
{
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
@@ -327,37 +391,49 @@ rec_set_next_offs(
# error "REC_NEXT_SHIFT != 0"
#endif
- if (comp) {
- ulint field_value;
+ mach_write_to_2(rec - REC_NEXT, next);
+}
- if (next) {
- /* The following two statements calculate
- next - offset_of_rec mod 64Ki, where mod is the modulo
- as a non-negative number */
+/******************************************************//**
+The following function is used to set the next record offset field
+of a new-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_new(
+/*==================*/
+ rec_t* rec, /*!< in/out: new-style physical record */
+ ulint next) /*!< in: offset of the next record */
+{
+ ulint field_value;
- field_value = (ulint)((lint)next
- - (lint)ut_align_offset(
- rec, UNIV_PAGE_SIZE));
- field_value &= REC_NEXT_MASK;
- } else {
- field_value = 0;
- }
+ ut_ad(rec);
+ ut_ad(UNIV_PAGE_SIZE > next);
- mach_write_to_2(rec - REC_NEXT, field_value);
+ if (UNIV_UNLIKELY(!next)) {
+ field_value = 0;
} else {
- mach_write_to_2(rec - REC_NEXT, next);
+ /* The following two statements calculate
+ next - offset_of_rec mod 64Ki, where mod is the modulo
+ as a non-negative number */
+
+ field_value = (ulint)
+ ((lint) next
+ - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
+ field_value &= REC_NEXT_MASK;
}
+
+ mach_write_to_2(rec - REC_NEXT, field_value);
}
-/**********************************************************
+/******************************************************//**
The following function is used to get the number of fields
-in an old-style record. */
+in an old-style record.
+@return number of data fields */
UNIV_INLINE
ulint
rec_get_n_fields_old(
/*=================*/
- /* out: number of data fields */
- rec_t* rec) /* in: physical record */
+ const rec_t* rec) /*!< in: physical record */
{
ulint ret;
@@ -372,15 +448,15 @@ rec_get_n_fields_old(
return(ret);
}
-/**********************************************************
+/******************************************************//**
The following function is used to set the number of fields
in an old-style record. */
UNIV_INLINE
void
rec_set_n_fields_old(
/*=================*/
- rec_t* rec, /* in: physical record */
- ulint n_fields) /* in: the number of fields */
+ rec_t* rec, /*!< in: physical record */
+ ulint n_fields) /*!< in: the number of fields */
{
ut_ad(rec);
ut_ad(n_fields <= REC_MAX_N_FIELDS);
@@ -390,14 +466,14 @@ rec_set_n_fields_old(
REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
}
-/**********************************************************
-The following function retrieves the status bits of a new-style record. */
+/******************************************************//**
+The following function retrieves the status bits of a new-style record.
+@return status bits */
UNIV_INLINE
ulint
rec_get_status(
/*===========*/
- /* out: status bits */
- rec_t* rec) /* in: physical record */
+ const rec_t* rec) /*!< in: physical record */
{
ulint ret;
@@ -410,16 +486,16 @@ rec_get_status(
return(ret);
}
-/**********************************************************
+/******************************************************//**
The following function is used to get the number of fields
-in a record. */
+in a record.
+@return number of data fields */
UNIV_INLINE
ulint
rec_get_n_fields(
/*=============*/
- /* out: number of data fields */
- rec_t* rec, /* in: physical record */
- dict_index_t* index) /* in: record descriptor */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index) /*!< in: record descriptor */
{
ut_ad(rec);
ut_ad(index);
@@ -442,113 +518,129 @@ rec_get_n_fields(
}
}
-/**********************************************************
+/******************************************************//**
The following function is used to get the number of records owned by the
-previous directory record. */
+previous directory record.
+@return number of owned records */
UNIV_INLINE
ulint
-rec_get_n_owned(
-/*============*/
- /* out: number of owned records */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
+rec_get_n_owned_old(
+/*================*/
+ const rec_t* rec) /*!< in: old-style physical record */
{
- ulint ret;
-
- ut_ad(rec);
+ return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED,
+ REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
+}
- ret = rec_get_bit_field_1(rec,
- comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
- ut_ad(ret <= REC_MAX_N_OWNED);
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_old(
+/*================*/
+ rec_t* rec, /*!< in: old-style physical record */
+ ulint n_owned) /*!< in: the number of owned */
+{
+ rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED,
+ REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
+}
- return(ret);
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_new(
+/*================*/
+ const rec_t* rec) /*!< in: new-style physical record */
+{
+ return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED,
+ REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
}
-/**********************************************************
+/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
void
-rec_set_n_owned(
-/*============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint n_owned) /* in: the number of owned */
+rec_set_n_owned_new(
+/*================*/
+ rec_t* rec, /*!< in/out: new-style physical record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint n_owned)/*!< in: the number of owned */
{
- ut_ad(rec);
- ut_ad(n_owned <= REC_MAX_N_OWNED);
-
- rec_set_bit_field_1(rec, n_owned,
- comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
+ rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
+ if (UNIV_LIKELY_NULL(page_zip)
+ && UNIV_LIKELY(rec_get_status(rec)
+ != REC_STATUS_SUPREMUM)) {
+ page_zip_rec_set_owned(page_zip, rec, n_owned);
+ }
}
-/**********************************************************
-The following function is used to retrieve the info bits of a record. */
+/******************************************************//**
+The following function is used to retrieve the info bits of a record.
+@return info bits */
UNIV_INLINE
ulint
rec_get_info_bits(
/*==============*/
- /* out: info bits */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp) /*!< in: nonzero=compact page format */
{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_1(rec,
- comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
- ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
-
- return(ret);
+ return(rec_get_bit_field_1(
+ rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+ REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
}
-/**********************************************************
+/******************************************************//**
The following function is used to set the info bits of a record. */
UNIV_INLINE
void
-rec_set_info_bits(
-/*==============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint bits) /* in: info bits */
+rec_set_info_bits_old(
+/*==================*/
+ rec_t* rec, /*!< in: old-style physical record */
+ ulint bits) /*!< in: info bits */
{
- ut_ad(rec);
- ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
-
- rec_set_bit_field_1(rec, bits,
- comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+ rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
+ REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
+}
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_new(
+/*==================*/
+ rec_t* rec, /*!< in/out: new-style physical record */
+ ulint bits) /*!< in: info bits */
+{
+ rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
}
-/**********************************************************
+/******************************************************//**
The following function is used to set the status bits of a new-style record. */
UNIV_INLINE
void
rec_set_status(
/*===========*/
- rec_t* rec, /* in: physical record */
- ulint bits) /* in: info bits */
+ rec_t* rec, /*!< in/out: physical record */
+ ulint bits) /*!< in: info bits */
{
- ut_ad(rec);
- ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0);
-
rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
}
-/**********************************************************
+/******************************************************//**
The following function is used to retrieve the info and status
-bits of a record. (Only compact records have status bits.) */
+bits of a record. (Only compact records have status bits.)
+@return info bits */
UNIV_INLINE
ulint
rec_get_info_and_status_bits(
/*=========================*/
- /* out: info bits */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp) /*!< in: nonzero=compact page format */
{
ulint bits;
#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
@@ -563,38 +655,33 @@ rec_get_info_and_status_bits(
}
return(bits);
}
-/**********************************************************
+/******************************************************//**
The following function is used to set the info and status
bits of a record. (Only compact records have status bits.) */
UNIV_INLINE
void
rec_set_info_and_status_bits(
/*=========================*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint bits) /* in: info bits */
+ rec_t* rec, /*!< in/out: physical record */
+ ulint bits) /*!< in: info bits */
{
#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
#endif
- if (comp) {
- rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
- } else {
- ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
- }
- rec_set_info_bits(rec, comp, bits & ~REC_NEW_STATUS_MASK);
+ rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
+ rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
}
-/**********************************************************
-The following function tells if record is delete marked. */
+/******************************************************//**
+The following function tells if record is delete marked.
+@return nonzero if delete marked */
UNIV_INLINE
ulint
rec_get_deleted_flag(
/*=================*/
- /* out: nonzero if delete marked */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
+ const rec_t* rec, /*!< in: physical record */
+ ulint comp) /*!< in: nonzero=compact page format */
{
if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
return(UNIV_UNLIKELY(
@@ -609,19 +696,18 @@ rec_get_deleted_flag(
}
}
-/**********************************************************
+/******************************************************//**
The following function is used to set the deleted bit. */
UNIV_INLINE
void
-rec_set_deleted_flag(
-/*=================*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint flag) /* in: nonzero if delete marked */
+rec_set_deleted_flag_old(
+/*=====================*/
+ rec_t* rec, /*!< in: old-style physical record */
+ ulint flag) /*!< in: nonzero if delete marked */
{
ulint val;
- val = rec_get_info_bits(rec, comp);
+ val = rec_get_info_bits(rec, FALSE);
if (flag) {
val |= REC_INFO_DELETED_FLAG;
@@ -629,70 +715,113 @@ rec_set_deleted_flag(
val &= ~REC_INFO_DELETED_FLAG;
}
- rec_set_info_bits(rec, comp, val);
+ rec_set_info_bits_old(rec, val);
}
-/**********************************************************
-The following function tells if a new-style record is a node pointer. */
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_new(
+/*=====================*/
+ rec_t* rec, /*!< in/out: new-style physical record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint flag) /*!< in: nonzero if delete marked */
+{
+ ulint val;
+
+ val = rec_get_info_bits(rec, TRUE);
+
+ if (flag) {
+ val |= REC_INFO_DELETED_FLAG;
+ } else {
+ val &= ~REC_INFO_DELETED_FLAG;
+ }
+
+ rec_set_info_bits_new(rec, val);
+
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page_zip_rec_set_deleted(page_zip, rec, flag);
+ }
+}
+
+/******************************************************//**
+The following function tells if a new-style record is a node pointer.
+@return TRUE if node pointer */
UNIV_INLINE
ibool
rec_get_node_ptr_flag(
/*==================*/
- /* out: TRUE if node pointer */
- rec_t* rec) /* in: physical record */
+ const rec_t* rec) /*!< in: physical record */
{
return(REC_STATUS_NODE_PTR == rec_get_status(rec));
}
-/**********************************************************
-The following function is used to get the order number of the record in the
-heap of the index page. */
+/******************************************************//**
+The following function is used to get the order number
+of an old-style record in the heap of the index page.
+@return heap order number */
UNIV_INLINE
ulint
-rec_get_heap_no(
-/*============*/
- /* out: heap order number */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
+rec_get_heap_no_old(
+/*================*/
+ const rec_t* rec) /*!< in: physical record */
{
- ulint ret;
-
- ut_ad(rec);
+ return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO,
+ REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
+}
- ret = rec_get_bit_field_2(rec,
- comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
- ut_ad(ret <= REC_MAX_HEAP_NO);
+/******************************************************//**
+The following function is used to set the heap number
+field in an old-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_old(
+/*================*/
+ rec_t* rec, /*!< in: physical record */
+ ulint heap_no)/*!< in: the heap number */
+{
+ rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO,
+ REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
+}
- return(ret);
+/******************************************************//**
+The following function is used to get the order number
+of a new-style record in the heap of the index page.
+@return heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_new(
+/*================*/
+ const rec_t* rec) /*!< in: physical record */
+{
+ return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO,
+ REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
}
-/**********************************************************
-The following function is used to set the heap number field in the record. */
+/******************************************************//**
+The following function is used to set the heap number
+field in a new-style record. */
UNIV_INLINE
void
-rec_set_heap_no(
-/*============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint heap_no)/* in: the heap number */
+rec_set_heap_no_new(
+/*================*/
+ rec_t* rec, /*!< in/out: physical record */
+ ulint heap_no)/*!< in: the heap number */
{
- ut_ad(heap_no <= REC_MAX_HEAP_NO);
-
- rec_set_bit_field_2(rec, heap_no,
- comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
+ rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
}
-/**********************************************************
+/******************************************************//**
The following function is used to test whether the data offsets in the record
-are stored in one-byte or two-byte format. */
+are stored in one-byte or two-byte format.
+@return TRUE if 1-byte form */
UNIV_INLINE
ibool
rec_get_1byte_offs_flag(
/*====================*/
- /* out: TRUE if 1-byte form */
- rec_t* rec) /* in: physical record */
+ const rec_t* rec) /*!< in: physical record */
{
#if TRUE != 1
#error "TRUE != 1"
@@ -702,14 +831,14 @@ rec_get_1byte_offs_flag(
REC_OLD_SHORT_SHIFT));
}
-/**********************************************************
+/******************************************************//**
The following function is used to set the 1-byte offsets flag. */
UNIV_INLINE
void
rec_set_1byte_offs_flag(
/*====================*/
- rec_t* rec, /* in: physical record */
- ibool flag) /* in: TRUE if 1byte form */
+ rec_t* rec, /*!< in: physical record */
+ ibool flag) /*!< in: TRUE if 1byte form */
{
#if TRUE != 1
#error "TRUE != 1"
@@ -720,18 +849,17 @@ rec_set_1byte_offs_flag(
REC_OLD_SHORT_SHIFT);
}
-/**********************************************************
+/******************************************************//**
Returns the offset of nth field end if the record is stored in the 1-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
+value.
+@return offset of the start of the field, SQL null flag ORed */
UNIV_INLINE
ulint
rec_1_get_field_end_info(
/*=====================*/
- /* out: offset of the start of the field, SQL null
- flag ORed */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
ut_ad(n < rec_get_n_fields_old(rec));
@@ -739,18 +867,18 @@ rec_1_get_field_end_info(
return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1)));
}
-/**********************************************************
+/******************************************************//**
Returns the offset of nth field end if the record is stored in the 2-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
+value.
+@return offset of the start of the field, SQL null flag and extern
+storage flag ORed */
UNIV_INLINE
ulint
rec_2_get_field_end_info(
/*=====================*/
- /* out: offset of the start of the field, SQL null
- flag and extern storage flag ORed */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
ut_ad(n < rec_get_n_fields_old(rec));
@@ -758,61 +886,54 @@ rec_2_get_field_end_info(
return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
}
-#ifdef UNIV_DEBUG
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 4
-#else /* UNIV_DEBUG */
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 2
-#endif /* UNIV_DEBUG */
-
/* Get the base address of offsets. The extra_size is stored at
this position, and following positions hold the end offsets of
the fields. */
#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
-/**************************************************************
+/**********************************************************//**
The following function returns the number of allocated elements
-for an array of offsets. */
+for an array of offsets.
+@return number of elements */
UNIV_INLINE
ulint
rec_offs_get_n_alloc(
/*=================*/
- /* out: number of elements */
- const ulint* offsets)/* in: array for rec_get_offsets() */
+ const ulint* offsets)/*!< in: array for rec_get_offsets() */
{
ulint n_alloc;
ut_ad(offsets);
n_alloc = offsets[0];
- ut_ad(n_alloc > 0);
+ ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
+ UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
return(n_alloc);
}
-/**************************************************************
+/**********************************************************//**
The following function sets the number of allocated elements
for an array of offsets. */
UNIV_INLINE
void
rec_offs_set_n_alloc(
/*=================*/
- ulint* offsets, /* out: array for rec_get_offsets(),
+ ulint* offsets, /*!< out: array for rec_get_offsets(),
must be allocated */
- ulint n_alloc) /* in: number of elements */
+ ulint n_alloc) /*!< in: number of elements */
{
ut_ad(offsets);
- ut_ad(n_alloc > 0);
+ ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets);
offsets[0] = n_alloc;
}
-/**************************************************************
-The following function returns the number of fields in a record. */
+/**********************************************************//**
+The following function returns the number of fields in a record.
+@return number of fields */
UNIV_INLINE
ulint
rec_offs_n_fields(
/*==============*/
- /* out: number of fields */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint n_fields;
ut_ad(offsets);
@@ -824,16 +945,17 @@ rec_offs_n_fields(
return(n_fields);
}
-/****************************************************************
-Validates offsets returned by rec_get_offsets(). */
+/************************************************************//**
+Validates offsets returned by rec_get_offsets().
+@return TRUE if valid */
UNIV_INLINE
ibool
rec_offs_validate(
/*==============*/
- /* out: TRUE if valid */
- rec_t* rec, /* in: record or NULL */
- dict_index_t* index, /* in: record descriptor or NULL */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const rec_t* rec, /*!< in: record or NULL */
+ const dict_index_t* index, /*!< in: record descriptor or NULL */
+ const ulint* offsets)/*!< in: array returned by
+ rec_get_offsets() */
{
ulint i = rec_offs_n_fields(offsets);
ulint last = ULINT_MAX;
@@ -878,52 +1000,50 @@ rec_offs_validate(
}
return(TRUE);
}
-/****************************************************************
+#ifdef UNIV_DEBUG
+/************************************************************//**
Updates debug data in offsets, in order to avoid bogus
rec_offs_validate() failures. */
UNIV_INLINE
void
rec_offs_make_valid(
/*================*/
- rec_t* rec __attribute__((unused)),
- /* in: record */
- dict_index_t* index __attribute__((unused)),
- /* in: record descriptor */
- ulint* offsets __attribute__((unused)))
- /* in: array returned by rec_get_offsets() */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets)/*!< in: array returned by
+ rec_get_offsets() */
{
-#ifdef UNIV_DEBUG
+ ut_ad(rec);
+ ut_ad(index);
+ ut_ad(offsets);
ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
offsets[2] = (ulint) rec;
offsets[3] = (ulint) index;
-#endif /* UNIV_DEBUG */
}
+#endif /* UNIV_DEBUG */
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in a record. */
+/************************************************************//**
+The following function is used to get an offset to the nth
+data field in a record.
+@return offset from the origin of rec */
UNIV_INLINE
-byte*
-rec_get_nth_field(
-/*==============*/
- /* out: pointer to the field */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n, /* in: index of the field */
- ulint* len) /* out: length of the field; UNIV_SQL_NULL
+ulint
+rec_get_nth_field_offs(
+/*===================*/
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n, /*!< in: index of the field */
+ ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
if SQL null */
{
- byte* field;
+ ulint offs;
ulint length;
- ut_ad(rec);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
ut_ad(len);
if (UNIV_UNLIKELY(n == 0)) {
- field = rec;
+ offs = 0;
} else {
- field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK);
+ offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK;
}
length = rec_offs_base(offsets)[1 + n];
@@ -932,36 +1052,50 @@ rec_get_nth_field(
length = UNIV_SQL_NULL;
} else {
length &= REC_OFFS_MASK;
- length -= field - rec;
+ length -= offs;
}
*len = length;
- return(field);
+ return(offs);
}
-/**********************************************************
+/******************************************************//**
Determine if the offsets are for a record in the new
-compact format. */
+compact format.
+@return nonzero if compact format */
UNIV_INLINE
ulint
rec_offs_comp(
/*==========*/
- /* out: nonzero if compact format */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
}
-/**********************************************************
-Returns nonzero if the extern bit is set in nth field of rec. */
+/******************************************************//**
+Determine if the offsets are for a record containing
+externally stored columns.
+@return nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_any_extern(
+/*================*/
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
+}
+
+/******************************************************//**
+Returns nonzero if the extern bit is set in nth field of rec.
+@return nonzero if externally stored */
UNIV_INLINE
ulint
rec_offs_nth_extern(
/*================*/
- /* out: nonzero if externally stored */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n) /* in: nth field */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n) /*!< in: nth field */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
@@ -969,15 +1103,15 @@ rec_offs_nth_extern(
& REC_OFFS_EXTERNAL));
}
-/**********************************************************
-Returns nonzero if the SQL NULL bit is set in nth field of rec. */
+/******************************************************//**
+Returns nonzero if the SQL NULL bit is set in nth field of rec.
+@return nonzero if SQL NULL */
UNIV_INLINE
ulint
rec_offs_nth_sql_null(
/*==================*/
- /* out: nonzero if SQL NULL */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n) /* in: nth field */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n) /*!< in: nth field */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
@@ -985,15 +1119,15 @@ rec_offs_nth_sql_null(
& REC_OFFS_SQL_NULL));
}
-/**********************************************************
-Gets the physical size of a field. */
+/******************************************************//**
+Gets the physical size of a field.
+@return length of field */
UNIV_INLINE
ulint
rec_offs_nth_size(
/*==============*/
- /* out: length of field */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n) /* in: nth field */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n) /*!< in: nth field */
{
ut_ad(rec_offs_validate(NULL, NULL, offsets));
ut_ad(n < rec_offs_n_fields(offsets));
@@ -1004,60 +1138,43 @@ rec_offs_nth_size(
& REC_OFFS_MASK);
}
-/**********************************************************
-Returns TRUE if the extern bit is set in any of the fields
-of an old-style record. */
+/******************************************************//**
+Returns the number of extern bits set in a record.
+@return number of externally stored fields */
UNIV_INLINE
-ibool
-rec_offs_any_extern(
-/*================*/
- /* out: TRUE if a field is stored externally */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ulint
+rec_offs_n_extern(
+/*==============*/
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
- ulint i;
- for (i = rec_offs_n_fields(offsets); i--; ) {
- if (rec_offs_nth_extern(offsets, i)) {
- return(TRUE);
+ ulint n = 0;
+
+ if (rec_offs_any_extern(offsets)) {
+ ulint i;
+
+ for (i = rec_offs_n_fields(offsets); i--; ) {
+ if (rec_offs_nth_extern(offsets, i)) {
+ n++;
+ }
}
}
- return(FALSE);
-}
-/***************************************************************
-Sets the value of the ith field extern storage bit. */
-UNIV_INLINE
-void
-rec_set_nth_field_extern_bit(
-/*=========================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-{
- if (dict_table_is_comp(index->table)) {
- rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr);
- } else {
- rec_set_nth_field_extern_bit_old(rec, i, val, mtr);
- }
+ return(n);
}
-/**********************************************************
+/******************************************************//**
Returns the offset of n - 1th field end if the record is stored in the 1-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
value. This function and the 2-byte counterpart are defined here because the
C-compiler was not able to sum negative and positive constant offsets, and
-warned of constant arithmetic overflow within the compiler. */
+warned of constant arithmetic overflow within the compiler.
+@return offset of the start of the PREVIOUS field, SQL null flag ORed */
UNIV_INLINE
ulint
rec_1_get_prev_field_end_info(
/*==========================*/
- /* out: offset of the start of the PREVIOUS field, SQL
- null flag ORed */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1065,18 +1182,17 @@ rec_1_get_prev_field_end_info(
return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n)));
}
-/**********************************************************
+/******************************************************//**
Returns the offset of n - 1th field end if the record is stored in the 2-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
+value.
+@return offset of the start of the PREVIOUS field, SQL null flag ORed */
UNIV_INLINE
ulint
rec_2_get_prev_field_end_info(
/*==========================*/
- /* out: offset of the start of the PREVIOUS field, SQL
- null flag ORed */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1084,16 +1200,16 @@ rec_2_get_prev_field_end_info(
return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n)));
}
-/**********************************************************
+/******************************************************//**
Sets the field end info for the nth field if the record is stored in the
1-byte format. */
UNIV_INLINE
void
rec_1_set_field_end_info(
/*=====================*/
- rec_t* rec, /* in: record */
- ulint n, /* in: field index */
- ulint info) /* in: value to set */
+ rec_t* rec, /*!< in: record */
+ ulint n, /*!< in: field index */
+ ulint info) /*!< in: value to set */
{
ut_ad(rec_get_1byte_offs_flag(rec));
ut_ad(n < rec_get_n_fields_old(rec));
@@ -1101,16 +1217,16 @@ rec_1_set_field_end_info(
mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info);
}
-/**********************************************************
+/******************************************************//**
Sets the field end info for the nth field if the record is stored in the
2-byte format. */
UNIV_INLINE
void
rec_2_set_field_end_info(
/*=====================*/
- rec_t* rec, /* in: record */
- ulint n, /* in: field index */
- ulint info) /* in: value to set */
+ rec_t* rec, /*!< in: record */
+ ulint n, /*!< in: field index */
+ ulint info) /*!< in: value to set */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
ut_ad(n < rec_get_n_fields_old(rec));
@@ -1118,16 +1234,16 @@ rec_2_set_field_end_info(
mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info);
}
-/**********************************************************
+/******************************************************//**
Returns the offset of nth field start if the record is stored in the 1-byte
-offsets form. */
+offsets form.
+@return offset of the start of the field */
UNIV_INLINE
ulint
rec_1_get_field_start_offs(
/*=======================*/
- /* out: offset of the start of the field */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1141,16 +1257,16 @@ rec_1_get_field_start_offs(
& ~REC_1BYTE_SQL_NULL_MASK);
}
-/**********************************************************
+/******************************************************//**
Returns the offset of nth field start if the record is stored in the 2-byte
-offsets form. */
+offsets form.
+@return offset of the start of the field */
UNIV_INLINE
ulint
rec_2_get_field_start_offs(
/*=======================*/
- /* out: offset of the start of the field */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1164,18 +1280,18 @@ rec_2_get_field_start_offs(
& ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
}
-/**********************************************************
+/******************************************************//**
The following function is used to read the offset of the start of a data field
in the record. The start of an SQL null field is the end offset of the
previous non-null field, or 0, if none exists. If n is the number of the last
-field + 1, then the end offset of the last field is returned. */
+field + 1, then the end offset of the last field is returned.
+@return offset of the start of the field */
UNIV_INLINE
ulint
rec_get_field_start_offs(
/*=====================*/
- /* out: offset of the start of the field */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
{
ut_ad(rec);
ut_ad(n <= rec_get_n_fields_old(rec));
@@ -1193,17 +1309,17 @@ rec_get_field_start_offs(
return(rec_2_get_field_start_offs(rec, n));
}
-/****************************************************************
+/************************************************************//**
Gets the physical size of an old-style field.
Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size. */
+if the data type is of a fixed size.
+@return field size in bytes */
UNIV_INLINE
ulint
rec_get_nth_field_size(
/*===================*/
- /* out: field size in bytes */
- rec_t* rec, /* in: record */
- ulint n) /* in: index of the field */
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: index of the field */
{
ulint os;
ulint next_os;
@@ -1216,7 +1332,7 @@ rec_get_nth_field_size(
return(next_os - os);
}
-/***************************************************************
+/***********************************************************//**
This is used to modify the value of an already existing field in a record.
The previous value must have exactly the same size as the new value. If len
is UNIV_SQL_NULL then the field is treated as an SQL null.
@@ -1226,12 +1342,12 @@ UNIV_INLINE
void
rec_set_nth_field(
/*==============*/
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n, /* in: index number of the field */
- const void* data, /* in: pointer to the data
+ rec_t* rec, /*!< in: record */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n, /*!< in: index number of the field */
+ const void* data, /*!< in: pointer to the data
if not SQL null */
- ulint len) /* in: length of the data or UNIV_SQL_NULL */
+ ulint len) /*!< in: length of the data or UNIV_SQL_NULL */
{
byte* data2;
ulint len2;
@@ -1260,32 +1376,32 @@ rec_set_nth_field(
ut_memcpy(data2, data, len);
}
-/**************************************************************
+/**********************************************************//**
The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
+is the distance from record origin to record end in bytes.
+@return size */
UNIV_INLINE
ulint
rec_get_data_size_old(
/*==================*/
- /* out: size */
- rec_t* rec) /* in: physical record */
+ const rec_t* rec) /*!< in: physical record */
{
ut_ad(rec);
return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec)));
}
-/**************************************************************
+/**********************************************************//**
The following function sets the number of fields in offsets. */
UNIV_INLINE
void
rec_offs_set_n_fields(
/*==================*/
- ulint* offsets, /* in/out: array returned by
+ ulint* offsets, /*!< in/out: array returned by
rec_get_offsets() */
- ulint n_fields) /* in: number of fields */
+ ulint n_fields) /*!< in: number of fields */
{
ut_ad(offsets);
ut_ad(n_fields > 0);
@@ -1295,17 +1411,17 @@ rec_offs_set_n_fields(
offsets[1] = n_fields;
}
-/**************************************************************
+/**********************************************************//**
The following function returns the data size of a physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
+is the distance from record origin to record end in bytes.
+@return size */
UNIV_INLINE
ulint
rec_offs_data_size(
/*===============*/
- /* out: size */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint size;
@@ -1316,79 +1432,81 @@ rec_offs_data_size(
return(size);
}
-/**************************************************************
+/**********************************************************//**
Returns the total size of record minus data size of record. The value
returned by the function is the distance from record start to record origin
-in bytes. */
+in bytes.
+@return size */
UNIV_INLINE
ulint
rec_offs_extra_size(
/*================*/
- /* out: size */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint size;
ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT;
+ size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL);
ut_ad(size < UNIV_PAGE_SIZE);
return(size);
}
-/**************************************************************
-Returns the total size of a physical record. */
+/**********************************************************//**
+Returns the total size of a physical record.
+@return size */
UNIV_INLINE
ulint
rec_offs_size(
/*==========*/
- /* out: size */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
}
-/**************************************************************
-Returns a pointer to the end of the record. */
+/**********************************************************//**
+Returns a pointer to the end of the record.
+@return pointer to end */
UNIV_INLINE
byte*
rec_get_end(
/*========*/
- /* out: pointer to end */
- rec_t* rec, /* in: pointer to record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ rec_t* rec, /*!< in: pointer to record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
return(rec + rec_offs_data_size(offsets));
}
-/**************************************************************
-Returns a pointer to the start of the record. */
+/**********************************************************//**
+Returns a pointer to the start of the record.
+@return pointer to start */
UNIV_INLINE
byte*
rec_get_start(
/*==========*/
- /* out: pointer to start */
- rec_t* rec, /* in: pointer to record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ rec_t* rec, /*!< in: pointer to record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
return(rec - rec_offs_extra_size(offsets));
}
-/*******************************************************************
-Copies a physical record to a buffer. */
+/***************************************************************//**
+Copies a physical record to a buffer.
+@return pointer to the origin of the copy */
UNIV_INLINE
rec_t*
rec_copy(
/*=====*/
- /* out: pointer to the origin of the copy */
- void* buf, /* in: buffer */
- const rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ void* buf, /*!< in: buffer */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint extra_len;
ulint data_len;
ut_ad(rec && buf);
ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
- ut_ad(rec_validate((rec_t*) rec, offsets));
+ ut_ad(rec_validate(rec, offsets));
extra_len = rec_offs_extra_size(offsets);
data_len = rec_offs_data_size(offsets);
@@ -1398,18 +1516,19 @@ rec_copy(
return((byte*)buf + extra_len);
}
-/**************************************************************
+/**********************************************************//**
Returns the extra size of an old-style physical record if we know its
-data size and number of fields. */
+data size and number of fields.
+@return extra size */
UNIV_INLINE
ulint
rec_get_converted_extra_size(
/*=========================*/
- /* out: extra size */
- ulint data_size, /* in: data size */
- ulint n_fields) /* in: number of fields */
+ ulint data_size, /*!< in: data size */
+ ulint n_fields, /*!< in: number of fields */
+ ulint n_ext) /*!< in: number of externally stored columns */
{
- if (data_size <= REC_1BYTE_OFFS_LIMIT) {
+ if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
return(REC_N_OLD_EXTRA_BYTES + n_fields);
}
@@ -1417,26 +1536,17 @@ rec_get_converted_extra_size(
return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields);
}
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a new-style physical record. */
-
-ulint
-rec_get_converted_size_new(
-/*=======================*/
- /* out: size */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple);/* in: data tuple */
-/**************************************************************
+/**********************************************************//**
The following function returns the size of a data tuple when converted to
-a physical record. */
+a physical record.
+@return size */
UNIV_INLINE
ulint
rec_get_converted_size(
/*===================*/
- /* out: size */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple) /* in: data tuple */
+ dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ ulint n_ext) /*!< in: number of externally stored columns */
{
ulint data_size;
ulint extra_size;
@@ -1453,42 +1563,47 @@ rec_get_converted_size(
: dict_index_get_n_fields(index)));
if (dict_table_is_comp(index->table)) {
- return(rec_get_converted_size_new(index, dtuple));
+ return(rec_get_converted_size_comp(index,
+ dtuple_get_info_bits(dtuple)
+ & REC_NEW_STATUS_MASK,
+ dtuple->fields,
+ dtuple->n_fields, NULL));
}
- data_size = dtuple_get_data_size(dtuple);
+ data_size = dtuple_get_data_size(dtuple, 0);
extra_size = rec_get_converted_extra_size(
- data_size, dtuple_get_n_fields(dtuple));
+ data_size, dtuple_get_n_fields(dtuple), n_ext);
return(data_size + extra_size);
}
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
Folds a prefix of a physical record to a ulint. Folds only existing fields,
-that is, checks that we do not run out of the record. */
+that is, checks that we do not run out of the record.
+@return the folded value */
UNIV_INLINE
ulint
rec_fold(
/*=====*/
- /* out: the folded value */
- rec_t* rec, /* in: the physical record */
- const ulint* offsets, /* in: array returned by
+ const rec_t* rec, /*!< in: the physical record */
+ const ulint* offsets, /*!< in: array returned by
rec_get_offsets() */
- ulint n_fields, /* in: number of complete
+ ulint n_fields, /*!< in: number of complete
fields to fold */
- ulint n_bytes, /* in: number of bytes to fold
+ ulint n_bytes, /*!< in: number of bytes to fold
in an incomplete last field */
- dulint tree_id) /* in: index tree id */
+ dulint tree_id) /*!< in: index tree id */
{
- ulint i;
- byte* data;
- ulint len;
- ulint fold;
- ulint n_fields_rec;
+ ulint i;
+ const byte* data;
+ ulint len;
+ ulint fold;
+ ulint n_fields_rec;
ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(rec_validate((rec_t*) rec, offsets));
+ ut_ad(rec_validate(rec, offsets));
ut_ad(n_fields + n_bytes > 0);
n_fields_rec = rec_offs_n_fields(offsets);
@@ -1529,3 +1644,4 @@ rec_fold(
return(fold);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
index 79c162392d2..8b84d4af233 100644
--- a/storage/innobase/include/rem0types.h
+++ b/storage/innobase/include/rem0types.h
@@ -1,7 +1,24 @@
-/************************************************************************
-Record manager global types
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0types.h
+Record manager global types
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
@@ -17,4 +34,13 @@ typedef byte rec_t;
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
#define REC_MAX_N_OWNED (16 - 1)
+/* REC_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
+indexed column length (or indexed prefix length). It is set to 3*256,
+so that one can create a column prefix index on 256 characters of a
+TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
+a character may take at most 3 bytes.
+This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
+files would be at risk! */
+#define REC_MAX_INDEX_COL_LEN 768
+
#endif
diff --git a/storage/innodb_plugin/include/row0ext.h b/storage/innobase/include/row0ext.h
index 43d82d644e6..43d82d644e6 100644
--- a/storage/innodb_plugin/include/row0ext.h
+++ b/storage/innobase/include/row0ext.h
diff --git a/storage/innodb_plugin/include/row0ext.ic b/storage/innobase/include/row0ext.ic
index 82771a9312a..82771a9312a 100644
--- a/storage/innodb_plugin/include/row0ext.ic
+++ b/storage/innobase/include/row0ext.ic
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index b4bcc8ac5ca..9f93565ddb7 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -1,7 +1,24 @@
-/******************************************************
-Insert into a table
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ins.h
+Insert into a table
Created 4/20/1996 Heikki Tuuri
*******************************************************/
@@ -16,128 +33,98 @@ Created 4/20/1996 Heikki Tuuri
#include "trx0types.h"
#include "row0types.h"
-/*******************************************************************
+/***************************************************************//**
Checks if foreign key constraint fails for an index entry. Sets shared locks
which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_foreign_key_check_lock. */
-
+the caller must have a shared latch on dict_foreign_key_check_lock.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
+DB_ROW_IS_REFERENCED */
+UNIV_INTERN
ulint
row_ins_check_foreign_constraint(
/*=============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_NO_REFERENCED_ROW,
- or DB_ROW_IS_REFERENCED */
- ibool check_ref,/* in: TRUE If we want to check that
+ ibool check_ref,/*!< in: TRUE If we want to check that
the referenced table is ok, FALSE if we
- want to to check the foreign key table */
- dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the
+ want to check the foreign key table */
+ dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the
tables mentioned in it must be in the
dictionary cache if they exist at all */
- dict_table_t* table, /* in: if check_ref is TRUE, then the foreign
+ dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign
table, else the referenced table */
- dtuple_t* entry, /* in: index entry for index */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Creates an insert node struct. */
-
+ dtuple_t* entry, /*!< in: index entry for index */
+ que_thr_t* thr); /*!< in: query thread */
+/*********************************************************************//**
+Creates an insert node struct.
+@return own: insert node struct */
+UNIV_INTERN
ins_node_t*
ins_node_create(
/*============*/
- /* out, own: insert node struct */
- ulint ins_type, /* in: INS_VALUES, ... */
- dict_table_t* table, /* in: table where to insert */
- mem_heap_t* heap); /* in: mem heap where created */
-/*************************************************************************
+ ulint ins_type, /*!< in: INS_VALUES, ... */
+ dict_table_t* table, /*!< in: table where to insert */
+ mem_heap_t* heap); /*!< in: mem heap where created */
+/*********************************************************************//**
Sets a new row to insert for an INS_DIRECT node. This function is only used
if we have constructed the row separately, which is a rare case; this
function is quite slow. */
-
+UNIV_INTERN
void
ins_node_set_new_row(
/*=================*/
- ins_node_t* node, /* in: insert node */
- dtuple_t* row); /* in: new row (or first row) for the node */
-/*******************************************************************
-Tries to insert an index entry to an index. If the index is clustered
-and a record with the same unique key is found, the other record is
-necessarily marked deleted by a committed transaction, or a unique key
-violation error occurs. The delete marked record is then updated to an
-existing record, and we must write an undo log record on the delete
-marked record. If the index is secondary, and a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index. */
-
-ulint
-row_ins_index_entry_low(
-/*====================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
- if pessimistic retry needed, or error code */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr); /* in: query thread */
-/*******************************************************************
+ ins_node_t* node, /*!< in: insert node */
+ dtuple_t* row); /*!< in: new row (or first row) for the node */
+/***************************************************************//**
Inserts an index entry to index. Tries first optimistic, then pessimistic
descent down the tree. If the entry matches enough to a delete marked record,
performs the insert by updating or delete unmarking the delete marked
-record. */
-
+record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
ulint
row_ins_index_entry(
/*================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DUPLICATE_KEY, or some other error code */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr); /* in: query thread */
-/***************************************************************
-Inserts a row to a table. */
-
-ulint
-row_ins(
-/*====*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- ins_node_t* node, /* in: row insert node */
- que_thr_t* thr); /* in: query thread */
-/***************************************************************
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in: index entry to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ ibool foreign,/*!< in: TRUE=check foreign key constraints */
+ que_thr_t* thr); /*!< in: query thread */
+/***********************************************************//**
Inserts a row to a table. This is a high-level function used in
-SQL execution graphs. */
-
+SQL execution graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_ins_step(
/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
+ que_thr_t* thr); /*!< in: query thread */
+/***********************************************************//**
+Creates an entry template for each index of a table. */
+UNIV_INTERN
+void
+ins_node_create_entry_list(
+/*=======================*/
+ ins_node_t* node); /*!< in: row insert node */
/* Insert node structure */
struct ins_node_struct{
- que_common_t common; /* node type: QUE_NODE_INSERT */
+ que_common_t common; /*!< node type: QUE_NODE_INSERT */
ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
- dtuple_t* row; /* row to insert */
- dict_table_t* table; /* table where to insert */
- sel_node_t* select; /* select in searched insert */
+ dtuple_t* row; /*!< row to insert */
+ dict_table_t* table; /*!< table where to insert */
+ sel_node_t* select; /*!< select in searched insert */
que_node_t* values_list;/* list of expressions to evaluate and
insert in an INS_VALUES insert */
- ulint state; /* node execution state */
- dict_index_t* index; /* NULL, or the next index where the index
+ ulint state; /*!< node execution state */
+ dict_index_t* index; /*!< NULL, or the next index where the index
entry should be inserted */
- dtuple_t* entry; /* NULL, or entry to insert in the index;
+ dtuple_t* entry; /*!< NULL, or entry to insert in the index;
after a successful insert of the entry,
this should be reset to NULL */
UT_LIST_BASE_NODE_T(dtuple_t)
entry_list;/* list of entries, one for each index */
byte* row_id_buf;/* buffer for the row id sys field in row */
- dulint trx_id; /* trx id or the last trx which executed the
+ trx_id_t trx_id; /*!< trx id or the last trx which executed the
node */
byte* trx_id_buf;/* buffer for the trx id sys field in row */
mem_heap_t* entry_sys_heap;
diff --git a/storage/innobase/include/row0ins.ic b/storage/innobase/include/row0ins.ic
index 80a232d41ee..84f6da255bf 100644
--- a/storage/innobase/include/row0ins.ic
+++ b/storage/innobase/include/row0ins.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Insert into a table
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ins.ic
+Insert into a table
Created 4/20/1996 Heikki Tuuri
*******************************************************/
diff --git a/storage/innodb_plugin/include/row0merge.h b/storage/innobase/include/row0merge.h
index 62a5efd11f7..62a5efd11f7 100644
--- a/storage/innodb_plugin/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 5430190fa51..b05241f00f8 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0mysql.h
Interface between Innobase row operations and MySQL.
Contains also create table and other data dictionary operations.
-(c) 2000 Innobase Oy
-
Created 9/17/2000 Heikki Tuuri
*******************************************************/
@@ -23,229 +40,239 @@ extern ibool row_rollback_on_timeout;
typedef struct row_prebuilt_struct row_prebuilt_t;
-/***********************************************************************
+/*******************************************************************//**
Frees the blob heap in prebuilt when no longer needed. */
-
+UNIV_INTERN
void
row_mysql_prebuilt_free_blob_heap(
/*==============================*/
- row_prebuilt_t* prebuilt); /* in: prebuilt struct of a
+ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a
ha_innobase:: table handle */
-/***********************************************************************
+/*******************************************************************//**
Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format. */
-
+format.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
byte*
row_mysql_store_true_var_len(
/*=========================*/
- /* out: pointer to the data, we skip the 1 or 2 bytes
- at the start that are used to store the len */
- byte* dest, /* in: where to store */
- ulint len, /* in: length, must fit in two bytes */
- ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */
-/***********************************************************************
+ byte* dest, /*!< in: where to store */
+ ulint len, /*!< in: length, must fit in two bytes */
+ ulint lenlen);/*!< in: storage length of len: either 1 or 2 bytes */
+/*******************************************************************//**
Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data. */
-
-byte*
+returns a pointer to the data.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
+const byte*
row_mysql_read_true_varchar(
/*========================*/
- /* out: pointer to the data, we skip the 1 or 2 bytes
- at the start that are used to store the len */
- ulint* len, /* out: variable-length field length */
- byte* field, /* in: field in the MySQL format */
- ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */
-/***********************************************************************
+ ulint* len, /*!< out: variable-length field length */
+ const byte* field, /*!< in: field in the MySQL format */
+ ulint lenlen);/*!< in: storage length of len: either 1
+ or 2 bytes */
+/*******************************************************************//**
Stores a reference to a BLOB in the MySQL format. */
-
+UNIV_INTERN
void
row_mysql_store_blob_ref(
/*=====================*/
- byte* dest, /* in: where to store */
- ulint col_len, /* in: dest buffer size: determines into
+ byte* dest, /*!< in: where to store */
+ ulint col_len,/*!< in: dest buffer size: determines into
how many bytes the BLOB length is stored,
- this may vary from 1 to 4 bytes */
- byte* data, /* in: BLOB data */
- ulint len); /* in: BLOB length */
-/***********************************************************************
-Reads a reference to a BLOB in the MySQL format. */
-
-byte*
+ the space for the length may vary from 1
+ to 4 bytes */
+ const void* data, /*!< in: BLOB data; if the value to store
+ is SQL NULL this should be NULL pointer */
+ ulint len); /*!< in: BLOB length; if the value to store
+ is SQL NULL this should be 0; remember
+ also to set the NULL bit in the MySQL record
+ header! */
+/*******************************************************************//**
+Reads a reference to a BLOB in the MySQL format.
+@return pointer to BLOB data */
+UNIV_INTERN
+const byte*
row_mysql_read_blob_ref(
/*====================*/
- /* out: pointer to BLOB data */
- ulint* len, /* out: BLOB length */
- byte* ref, /* in: BLOB reference in the MySQL format */
- ulint col_len); /* in: BLOB reference length (not BLOB
- length) */
-/******************************************************************
+ ulint* len, /*!< out: BLOB length */
+ const byte* ref, /*!< in: BLOB reference in the
+ MySQL format */
+ ulint col_len); /*!< in: BLOB reference length
+ (not BLOB length) */
+/**************************************************************//**
Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c. */
-
+row0sel.c.
+@return up to which byte we used buf in the conversion */
+UNIV_INTERN
byte*
row_mysql_store_col_in_innobase_format(
/*===================================*/
- /* out: up to which byte we used
- buf in the conversion */
- dfield_t* dfield, /* in/out: dfield where dtype
+ dfield_t* dfield, /*!< in/out: dfield where dtype
information must be already set when
this function is called! */
- byte* buf, /* in/out: buffer for a converted
+ byte* buf, /*!< in/out: buffer for a converted
integer value; this must be at least
col_len long then! */
- ibool row_format_col, /* TRUE if the mysql_data is from
+ ibool row_format_col, /*!< TRUE if the mysql_data is from
a MySQL row, FALSE if from a MySQL
key value;
in MySQL, a true VARCHAR storage
format differs in a row and in a
key value: in a key value the length
is always stored in 2 bytes! */
- byte* mysql_data, /* in: MySQL column value, not
+ const byte* mysql_data, /*!< in: MySQL column value, not
SQL NULL; NOTE that dfield may also
get a pointer to mysql_data,
therefore do not discard this as long
as dfield is used! */
- ulint col_len, /* in: MySQL column length; NOTE that
+ ulint col_len, /*!< in: MySQL column length; NOTE that
this is the storage length of the
column in the MySQL format row, not
necessarily the length of the actual
payload data; if the column is a true
VARCHAR then this is irrelevant */
- ulint comp); /* in: nonzero=compact format */
-/********************************************************************
-Handles user errors and lock waits detected by the database engine. */
-
+ ulint comp); /*!< in: nonzero=compact format */
+/****************************************************************//**
+Handles user errors and lock waits detected by the database engine.
+@return TRUE if it was a lock wait and we should continue running the
+query thread */
+UNIV_INTERN
ibool
row_mysql_handle_errors(
/*====================*/
- /* out: TRUE if it was a lock wait and
- we should continue running the query thread */
- ulint* new_err,/* out: possible new error encountered in
+ ulint* new_err,/*!< out: possible new error encountered in
rollback, or the old error which was
during the function entry */
- trx_t* trx, /* in: transaction */
- que_thr_t* thr, /* in: query thread */
- trx_savept_t* savept);/* in: savepoint */
-/************************************************************************
-Create a prebuilt struct for a MySQL table handle. */
-
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t* thr, /*!< in: query thread */
+ trx_savept_t* savept);/*!< in: savepoint */
+/********************************************************************//**
+Create a prebuilt struct for a MySQL table handle.
+@return own: a prebuilt struct */
+UNIV_INTERN
row_prebuilt_t*
row_create_prebuilt(
/*================*/
- /* out, own: a prebuilt struct */
- dict_table_t* table); /* in: Innobase table handle */
-/************************************************************************
+ dict_table_t* table); /*!< in: Innobase table handle */
+/********************************************************************//**
Free a prebuilt struct for a MySQL table handle. */
-
+UNIV_INTERN
void
row_prebuilt_free(
/*==============*/
- row_prebuilt_t* prebuilt); /* in, own: prebuilt struct */
-/*************************************************************************
+ row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
+ ibool dict_locked); /*!< in: TRUE=data dictionary locked */
+/*********************************************************************//**
Updates the transaction pointers in query graphs stored in the prebuilt
struct. */
-
+UNIV_INTERN
void
row_update_prebuilt_trx(
/*====================*/
- /* out: prebuilt dtuple */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
- handle */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Unlocks an AUTO_INC type lock possibly reserved by trx. */
-
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
+ in MySQL handle */
+ trx_t* trx); /*!< in: transaction handle */
+/*********************************************************************//**
+Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
+function should be called at the the end of an SQL statement, by the
+connection thread that owns the transaction (trx->mysql_thd). */
+UNIV_INTERN
void
row_unlock_table_autoinc_for_mysql(
/*===============================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
+ trx_t* trx); /*!< in/out: transaction */
+/*********************************************************************//**
Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
AUTO_INC lock gives exclusive access to the auto-inc counter of the
table. The lock is reserved only for the duration of an SQL statement.
It is not compatible with another AUTO_INC or exclusive lock on the
-table. */
-
+table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_lock_table_autoinc_for_mysql(
/*=============================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in the MySQL
+ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in the MySQL
table handle */
-/*************************************************************************
-Sets a table lock on the table mentioned in prebuilt. */
-
+/*********************************************************************//**
+Sets a table lock on the table mentioned in prebuilt.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_lock_table_for_mysql(
/*=====================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
table handle */
- dict_table_t* table, /* in: table to lock, or NULL
+ dict_table_t* table, /*!< in: table to lock, or NULL
if prebuilt->table should be
locked as
prebuilt->select_lock_type */
- ulint mode); /* in: lock mode of table
+ ulint mode); /*!< in: lock mode of table
(ignored if table==NULL) */
-/*************************************************************************
-Does an insert for MySQL. */
-
+/*********************************************************************//**
+Does an insert for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_insert_for_mysql(
/*=================*/
- /* out: error code or DB_SUCCESS */
- byte* mysql_rec, /* in: row in the MySQL format */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ byte* mysql_rec, /*!< in: row in the MySQL format */
+ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
handle */
-/*************************************************************************
+/*********************************************************************//**
Builds a dummy query graph used in selects. */
-
+UNIV_INTERN
void
row_prebuild_sel_graph(
/*===================*/
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
handle */
-/*************************************************************************
+/*********************************************************************//**
Gets pointer to a prebuilt update vector used in updates. If the update
graph has not yet been built in the prebuilt struct, then this function
-first builds it. */
-
+first builds it.
+@return prebuilt update vector */
+UNIV_INTERN
upd_t*
row_get_prebuilt_update_vector(
/*===========================*/
- /* out: prebuilt update vector */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
handle */
-/*************************************************************************
+/*********************************************************************//**
Checks if a table is such that we automatically created a clustered
-index on it (on row id). */
-
+index on it (on row id).
+@return TRUE if the clustered index was generated automatically */
+UNIV_INTERN
ibool
row_table_got_default_clust_index(
/*==============================*/
- dict_table_t* table);
-/*************************************************************************
+ const dict_table_t* table); /*!< in: table */
+/*********************************************************************//**
Calculates the key number used inside MySQL for an Innobase index. We have
-to take into account if we generated a default clustered index for the table */
-
+to take into account if we generated a default clustered index for the table
+@return the key number used inside MySQL */
+UNIV_INTERN
ulint
row_get_mysql_key_number_for_index(
/*===============================*/
- dict_index_t* index);
-/*************************************************************************
-Does an update or delete of a row for MySQL. */
-
+ const dict_index_t* index); /*!< in: index */
+/*********************************************************************//**
+Does an update or delete of a row for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_update_for_mysql(
/*=================*/
- /* out: error code or DB_SUCCESS */
- byte* mysql_rec, /* in: the row to be updated, in
+ byte* mysql_rec, /*!< in: the row to be updated, in
the MySQL format */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
handle */
-/*************************************************************************
+/*********************************************************************//**
This can only be used when srv_locks_unsafe_for_binlog is TRUE or
session is using a READ COMMITTED isolation level. Before
calling this function we must use trx_reset_new_rec_lock_info() and
@@ -255,223 +282,235 @@ and also under prebuilt->clust_pcur. Currently, this is only used and tested
in the case of an UPDATE or a DELETE statement, where the row lock is of the
LOCK_X type.
Thus, this implements a 'mini-rollback' that releases the latest record
-locks we set. */
-
+locks we set.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_unlock_for_mysql(
/*=================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL
handle */
- ibool has_latches_on_recs);/* TRUE if called so that we have
+ ibool has_latches_on_recs);/*!< TRUE if called so that we have
the latches on the records under pcur
and clust_pcur, and we do not need to
reposition the cursors. */
-/*************************************************************************
+/*********************************************************************//**
Creates an query graph node of 'update' type to be used in the MySQL
-interface. */
-
+interface.
+@return own: update node */
+UNIV_INTERN
upd_node_t*
row_create_update_node_for_mysql(
/*=============================*/
- /* out, own: update node */
- dict_table_t* table, /* in: table to update */
- mem_heap_t* heap); /* in: mem heap from which allocated */
-/**************************************************************************
-Does a cascaded delete or set null in a foreign key operation. */
-
+ dict_table_t* table, /*!< in: table to update */
+ mem_heap_t* heap); /*!< in: mem heap from which allocated */
+/**********************************************************************//**
+Does a cascaded delete or set null in a foreign key operation.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
row_update_cascade_for_mysql(
/*=========================*/
- /* out: error code or DB_SUCCESS */
- que_thr_t* thr, /* in: query thread */
- upd_node_t* node, /* in: update node used in the cascade
+ que_thr_t* thr, /*!< in: query thread */
+ upd_node_t* node, /*!< in: update node used in the cascade
or set null operation */
- dict_table_t* table); /* in: table where we do the operation */
-/*************************************************************************
+ dict_table_t* table); /*!< in: table where we do the operation */
+/*********************************************************************//**
Locks the data dictionary exclusively for performing a table create or other
data dictionary modification operation. */
-
+UNIV_INTERN
void
-row_mysql_lock_data_dictionary(
-/*===========================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
+row_mysql_lock_data_dictionary_func(
+/*================================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const char* file, /*!< in: file name */
+ ulint line); /*!< in: line number */
+#define row_mysql_lock_data_dictionary(trx) \
+ row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__)
+/*********************************************************************//**
Unlocks the data dictionary exclusive lock. */
-
+UNIV_INTERN
void
row_mysql_unlock_data_dictionary(
/*=============================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
+ trx_t* trx); /*!< in/out: transaction */
+/*********************************************************************//**
Locks the data dictionary in shared mode from modifications, for performing
foreign key check, rollback, or other operation invisible to MySQL. */
-
+UNIV_INTERN
void
-row_mysql_freeze_data_dictionary(
-/*=============================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
+row_mysql_freeze_data_dictionary_func(
+/*==================================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const char* file, /*!< in: file name */
+ ulint line); /*!< in: line number */
+#define row_mysql_freeze_data_dictionary(trx) \
+ row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__)
+/*********************************************************************//**
Unlocks the data dictionary shared lock. */
-
+UNIV_INTERN
void
row_mysql_unfreeze_data_dictionary(
/*===============================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
+ trx_t* trx); /*!< in/out: transaction */
+/*********************************************************************//**
Creates a table for MySQL. If the name of the table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also start the printing of monitor
output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). */
-
+InnoDB will try to invoke mem_validate().
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_create_table_for_mysql(
/*=======================*/
- /* out: error code or DB_SUCCESS */
- dict_table_t* table, /* in: table definition */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
+ dict_table_t* table, /*!< in, own: table definition
+ (will be freed) */
+ trx_t* trx); /*!< in: transaction handle */
+/*********************************************************************//**
Does an index creation operation for MySQL. TODO: currently failure
to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table. */
-
+currently as all indexes must be created at the same time as the table.
+@return error number or DB_SUCCESS */
+UNIV_INTERN
int
row_create_index_for_mysql(
/*=======================*/
- /* out: error number or DB_SUCCESS */
- dict_index_t* index, /* in: index definition */
- trx_t* trx, /* in: transaction handle */
- const ulint* field_lengths); /* in: if not NULL, must contain
+ dict_index_t* index, /*!< in, own: index definition
+ (will be freed) */
+ trx_t* trx, /*!< in: transaction handle */
+ const ulint* field_lengths); /*!< in: if not NULL, must contain
dict_index_get_n_fields(index)
actual field lengths for the
index columns, which are
then checked for not being too
large. */
-/*************************************************************************
+/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
should be called after the indexes for a table have been created.
Each foreign key constraint must be accompanied with indexes in
bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. */
-
+fields than mentioned in the constraint.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_table_add_foreign_constraints(
/*==============================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- const char* sql_string, /* in: table create statement where
+ trx_t* trx, /*!< in: transaction */
+ const char* sql_string, /*!< in: table create statement where
foreign keys are declared like:
FOREIGN KEY (a, b) REFERENCES table2(c, d),
table2 can be written also with the
database name before it: test.table2 */
- const char* name, /* in: table full name in the
+ const char* name, /*!< in: table full name in the
normalized form
database_name/table_name */
- ibool reject_fks); /* in: if TRUE, fail with error
+ ibool reject_fks); /*!< in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
-/*************************************************************************
+/*********************************************************************//**
The master thread in srv0srv.c calls this regularly to drop tables which
we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix. */
-
+dropping of tables is needed in ALTER TABLE on Unix.
+@return how many tables dropped + remaining tables in list */
+UNIV_INTERN
ulint
row_drop_tables_for_mysql_in_background(void);
/*=========================================*/
- /* out: how many tables dropped
- + remaining tables in list */
-/*************************************************************************
+/*********************************************************************//**
Get the background drop list length. NOTE: the caller must own the kernel
-mutex! */
-
+mutex!
+@return how many tables in list */
+UNIV_INTERN
ulint
row_get_background_drop_list_len_low(void);
/*======================================*/
- /* out: how many tables in list */
-/*************************************************************************
-Truncates a table for MySQL. */
-
+/*********************************************************************//**
+Truncates a table for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_truncate_table_for_mysql(
/*=========================*/
- /* out: error code or DB_SUCCESS */
- dict_table_t* table, /* in: table handle */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Drops a table for MySQL. If the name of the dropped table ends in
+ dict_table_t* table, /*!< in: table handle */
+ trx_t* trx); /*!< in: transaction handle */
+/*********************************************************************//**
+Drops a table for MySQL. If the name of the dropped table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. */
-
+output by the master thread. If the data dictionary was not already locked
+by the transaction, the transaction will be committed. Otherwise, the
+data dictionary will remain locked.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_drop_table_for_mysql(
/*=====================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx, /* in: transaction handle */
- ibool drop_db);/* in: TRUE=dropping whole database */
+ const char* name, /*!< in: table name */
+ trx_t* trx, /*!< in: transaction handle */
+ ibool drop_db);/*!< in: TRUE=dropping whole database */
-/*************************************************************************
+/*********************************************************************//**
Discards the tablespace of a table which stored in an .ibd file. Discarding
means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE. */
-
+the table. Also the flag table->ibd_file_missing is set TRUE.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_discard_tablespace_for_mysql(
/*=============================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx); /* in: transaction handle */
-/*********************************************************************
+ const char* name, /*!< in: table name */
+ trx_t* trx); /*!< in: transaction handle */
+/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary. */
-
+of the table in the data dictionary.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_import_tablespace_for_mysql(
/*============================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Drops a database for MySQL. */
-
+ const char* name, /*!< in: table name */
+ trx_t* trx); /*!< in: transaction handle */
+/*********************************************************************//**
+Drops a database for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_drop_database_for_mysql(
/*========================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: database name which ends to '/' */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Renames a table for MySQL. */
-
-int
+ const char* name, /*!< in: database name which ends to '/' */
+ trx_t* trx); /*!< in: transaction handle */
+/*********************************************************************//**
+Renames a table for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
row_rename_table_for_mysql(
/*=======================*/
- /* out: error code or DB_SUCCESS */
- const char* old_name, /* in: old table name */
- const char* new_name, /* in: new table name */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Checks a table for corruption. */
-
+ const char* old_name, /*!< in: old table name */
+ const char* new_name, /*!< in: new table name */
+ trx_t* trx, /*!< in: transaction handle */
+ ibool commit); /*!< in: if TRUE then commit trx */
+/*********************************************************************//**
+Checks a table for corruption.
+@return DB_ERROR or DB_SUCCESS */
+UNIV_INTERN
ulint
row_check_table_for_mysql(
/*======================*/
- /* out: DB_ERROR or DB_SUCCESS */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
handle */
-/*************************************************************************
-Determines if a table is a magic monitor table. */
-
+/*********************************************************************//**
+Determines if a table is a magic monitor table.
+@return TRUE if monitor table */
+UNIV_INTERN
ibool
row_is_magic_monitor_table(
/*=======================*/
- /* out: TRUE if monitor table */
- const char* table_name); /* in: name of the table, in the
+ const char* table_name); /*!< in: name of the table, in the
form database/table_name */
/* A struct describing a place for an individual column in the MySQL
@@ -481,24 +520,24 @@ Innobase and MySQL. */
typedef struct mysql_row_templ_struct mysql_row_templ_t;
struct mysql_row_templ_struct {
- ulint col_no; /* column number of the column */
- ulint rec_field_no; /* field number of the column in an
+ ulint col_no; /*!< column number of the column */
+ ulint rec_field_no; /*!< field number of the column in an
Innobase record in the current index;
not defined if template_type is
ROW_MYSQL_WHOLE_ROW */
- ulint mysql_col_offset; /* offset of the column in the MySQL
+ ulint mysql_col_offset; /*!< offset of the column in the MySQL
row format */
- ulint mysql_col_len; /* length of the column in the MySQL
+ ulint mysql_col_len; /*!< length of the column in the MySQL
row format */
- ulint mysql_null_byte_offset; /* MySQL NULL bit byte offset in a
+ ulint mysql_null_byte_offset; /*!< MySQL NULL bit byte offset in a
MySQL record */
- ulint mysql_null_bit_mask; /* bit mask to get the NULL bit,
+ ulint mysql_null_bit_mask; /*!< bit mask to get the NULL bit,
zero if column cannot be NULL */
- ulint type; /* column type in Innobase mtype
+ ulint type; /*!< column type in Innobase mtype
numbers DATA_CHAR... */
- ulint mysql_type; /* MySQL type code; this is always
+ ulint mysql_type; /*!< MySQL type code; this is always
< 256 */
- ulint mysql_length_bytes; /* if mysql_type
+ ulint mysql_length_bytes; /*!< if mysql_type
== DATA_MYSQL_TRUE_VARCHAR, this tells
whether we should use 1 or 2 bytes to
store the MySQL true VARCHAR data
@@ -506,13 +545,13 @@ struct mysql_row_templ_struct {
format (NOTE that the MySQL key value
format always uses 2 bytes for the data
len) */
- ulint charset; /* MySQL charset-collation code
+ ulint charset; /*!< MySQL charset-collation code
of the column, or zero */
- ulint mbminlen; /* minimum length of a char, in bytes,
+ ulint mbminlen; /*!< minimum length of a char, in bytes,
or zero if not a char type */
- ulint mbmaxlen; /* maximum length of a char, in bytes,
+ ulint mbmaxlen; /*!< maximum length of a char, in bytes,
or zero if not a char type */
- ulint is_unsigned; /* if a column type is an integer
+ ulint is_unsigned; /*!< if a column type is an integer
type and this field is != 0, then
it is an unsigned integer type */
};
@@ -524,80 +563,81 @@ struct mysql_row_templ_struct {
#define ROW_PREBUILT_ALLOCATED 78540783
#define ROW_PREBUILT_FREED 26423527
-/* A struct for (sometimes lazily) prebuilt structures in an Innobase table
+/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
handle used within MySQL; these are used to save CPU time. */
struct row_prebuilt_struct {
- ulint magic_n; /* this magic number is set to
- ROW_PREBUILT_ALLOCATED when created
- and to ROW_PREBUILT_FREED when the
- struct has been freed; used in
- debugging */
- dict_table_t* table; /* Innobase table handle */
- trx_t* trx; /* current transaction handle */
- ibool sql_stat_start; /* TRUE when we start processing of
+ ulint magic_n; /*!< this magic number is set to
+ ROW_PREBUILT_ALLOCATED when created,
+ or ROW_PREBUILT_FREED when the
+ struct has been freed */
+ dict_table_t* table; /*!< Innobase table handle */
+ dict_index_t* index; /*!< current index for a search, if
+ any */
+ trx_t* trx; /*!< current transaction handle */
+ unsigned sql_stat_start:1;/*!< TRUE when we start processing of
an SQL statement: we may have to set
an intention lock on the table,
create a consistent read view etc. */
- ibool mysql_has_locked; /* this is set TRUE when MySQL
+ unsigned mysql_has_locked:1;/*!< this is set TRUE when MySQL
calls external_lock on this handle
with a lock flag, and set FALSE when
with the F_UNLOCK flag */
- ibool clust_index_was_generated;
- /* if the user did not define a
+ unsigned clust_index_was_generated:1;
+ /*!< if the user did not define a
primary key in MySQL, then Innobase
automatically generated a clustered
index where the ordering column is
the row id: in this case this flag
is set to TRUE */
- dict_index_t* index; /* current index for a search, if
- any */
- ulint read_just_key; /* set to 1 when MySQL calls
+ unsigned index_usable:1; /*!< caches the value of
+ row_merge_is_index_usable(trx,index) */
+ unsigned read_just_key:1;/*!< set to 1 when MySQL calls
ha_innobase::extra with the
argument HA_EXTRA_KEYREAD; it is enough
to read just columns defined in
the index (i.e., no read of the
clustered index record necessary) */
- ibool used_in_HANDLER;/* TRUE if we have been using this
+ unsigned used_in_HANDLER:1;/*!< TRUE if we have been using this
handle in a MySQL HANDLER low level
index cursor command: then we must
store the pcur position even in a
unique search from a clustered index,
because HANDLER allows NEXT and PREV
in such a situation */
- ulint template_type; /* ROW_MYSQL_WHOLE_ROW,
+ unsigned template_type:2;/*!< ROW_MYSQL_WHOLE_ROW,
ROW_MYSQL_REC_FIELDS,
ROW_MYSQL_DUMMY_TEMPLATE, or
ROW_MYSQL_NO_TEMPLATE */
- ulint n_template; /* number of elements in the
+ unsigned n_template:10; /*!< number of elements in the
template */
- ulint null_bitmap_len;/* number of bytes in the SQL NULL
+ unsigned null_bitmap_len:10;/*!< number of bytes in the SQL NULL
bitmap at the start of a row in the
MySQL format */
- ibool need_to_access_clustered; /* if we are fetching
+ unsigned need_to_access_clustered:1; /*!< if we are fetching
columns through a secondary index
and at least one column is not in
the secondary index, then this is
set to TRUE */
- ibool templ_contains_blob;/* TRUE if the template contains
+ unsigned templ_contains_blob:1;/*!< TRUE if the template contains
BLOB column(s) */
- mysql_row_templ_t* mysql_template;/* template used to transform
+ mysql_row_templ_t* mysql_template;/*!< template used to transform
rows fast between MySQL and Innobase
formats; memory for this template
is not allocated from 'heap' */
- mem_heap_t* heap; /* memory heap from which
+ mem_heap_t* heap; /*!< memory heap from which
these auxiliary structures are
allocated when needed */
- ins_node_t* ins_node; /* Innobase SQL insert node
+ ins_node_t* ins_node; /*!< Innobase SQL insert node
used to perform inserts
to the table */
- byte* ins_upd_rec_buff;/* buffer for storing data converted
+ byte* ins_upd_rec_buff;/*!< buffer for storing data converted
to the Innobase format from the MySQL
format */
- const byte* default_rec; /* the default values of all columns
+ const byte* default_rec; /*!< the default values of all columns
(a "default row") in MySQL format */
ulint hint_need_to_fetch_extra_cols;
- /* normally this is set to 0; if this
+ /*!< normally this is set to 0; if this
is set to ROW_RETRIEVE_PRIMARY_KEY,
then we should at least retrieve all
columns in the primary key; if this
@@ -605,33 +645,33 @@ struct row_prebuilt_struct {
we must retrieve all columns in the
key (if read_just_key == 1), or all
columns in the table */
- upd_node_t* upd_node; /* Innobase SQL update node used
+ upd_node_t* upd_node; /*!< Innobase SQL update node used
to perform updates and deletes */
- que_fork_t* ins_graph; /* Innobase SQL query graph used
+ que_fork_t* ins_graph; /*!< Innobase SQL query graph used
in inserts */
- que_fork_t* upd_graph; /* Innobase SQL query graph used
+ que_fork_t* upd_graph; /*!< Innobase SQL query graph used
in updates or deletes */
- btr_pcur_t* pcur; /* persistent cursor used in selects
+ btr_pcur_t* pcur; /*!< persistent cursor used in selects
and updates */
- btr_pcur_t* clust_pcur; /* persistent cursor used in
+ btr_pcur_t* clust_pcur; /*!< persistent cursor used in
some selects and updates */
- que_fork_t* sel_graph; /* dummy query graph used in
+ que_fork_t* sel_graph; /*!< dummy query graph used in
selects */
- dtuple_t* search_tuple; /* prebuilt dtuple used in selects */
+ dtuple_t* search_tuple; /*!< prebuilt dtuple used in selects */
byte row_id[DATA_ROW_ID_LEN];
- /* if the clustered index was
+ /*!< if the clustered index was
generated, the row id of the
last row fetched is stored
here */
- dtuple_t* clust_ref; /* prebuilt dtuple used in
+ dtuple_t* clust_ref; /*!< prebuilt dtuple used in
sel/upd/del */
- ulint select_lock_type;/* LOCK_NONE, LOCK_S, or LOCK_X */
- ulint stored_select_lock_type;/* this field is used to
+ ulint select_lock_type;/*!< LOCK_NONE, LOCK_S, or LOCK_X */
+ ulint stored_select_lock_type;/*!< this field is used to
remember the original select_lock_type
that was decided in ha_innodb.cc,
::store_lock(), ::external_lock(),
etc. */
- ulint row_read_type; /* ROW_READ_WITH_LOCKS if row locks
+ ulint row_read_type; /*!< ROW_READ_WITH_LOCKS if row locks
should be the obtained for records
under an UPDATE or DELETE cursor.
If innodb_locks_unsafe_for_binlog
@@ -656,7 +696,7 @@ struct row_prebuilt_struct {
This eliminates lock waits in some
cases; note that this breaks
serializability. */
- ulint new_rec_locks; /* normally 0; if
+ ulint new_rec_locks; /*!< normally 0; if
srv_locks_unsafe_for_binlog is
TRUE or session is using READ
COMMITTED isolation level, in a
@@ -671,15 +711,15 @@ struct row_prebuilt_struct {
these can be used to implement a
'mini-rollback' that releases
the latest record locks */
- ulint mysql_prefix_len;/* byte offset of the end of
+ ulint mysql_prefix_len;/*!< byte offset of the end of
the last requested column */
- ulint mysql_row_len; /* length in bytes of a row in the
+ ulint mysql_row_len; /*!< length in bytes of a row in the
MySQL format */
- ulint n_rows_fetched; /* number of rows fetched after
+ ulint n_rows_fetched; /*!< number of rows fetched after
positioning the current cursor */
- ulint fetch_direction;/* ROW_SEL_NEXT or ROW_SEL_PREV */
+ ulint fetch_direction;/*!< ROW_SEL_NEXT or ROW_SEL_PREV */
byte* fetch_cache[MYSQL_FETCH_CACHE_SIZE];
- /* a cache for fetched rows if we
+ /*!< a cache for fetched rows if we
fetch many rows from the same cursor:
it saves CPU time to fetch them in a
batch; we reserve mysql_row_len
@@ -688,34 +728,35 @@ struct row_prebuilt_struct {
allocated mem buf start, because
there is a 4 byte magic number at the
start and at the end */
- ibool keep_other_fields_on_keyread; /* when using fetch
+ ibool keep_other_fields_on_keyread; /*!< when using fetch
cache with HA_EXTRA_KEYREAD, don't
overwrite other fields in mysql row
row buffer.*/
- ulint fetch_cache_first;/* position of the first not yet
+ ulint fetch_cache_first;/*!< position of the first not yet
fetched row in fetch_cache */
- ulint n_fetch_cached; /* number of not yet fetched rows
+ ulint n_fetch_cached; /*!< number of not yet fetched rows
in fetch_cache */
- mem_heap_t* blob_heap; /* in SELECTS BLOB fields are copied
+ mem_heap_t* blob_heap; /*!< in SELECTS BLOB fields are copied
to this heap */
- mem_heap_t* old_vers_heap; /* memory heap where a previous
+ mem_heap_t* old_vers_heap; /*!< memory heap where a previous
version is built in consistent read */
/*----------------------*/
- ulonglong autoinc_last_value;/* last value of AUTO-INC interval */
- ulonglong autoinc_increment;/* The increment step of the auto
+ ulonglong autoinc_last_value;
+ /*!< last value of AUTO-INC interval */
+ ulonglong autoinc_increment;/*!< The increment step of the auto
increment column. Value must be
greater than or equal to 1. Required to
calculate the next value */
- ulonglong autoinc_offset; /* The offset passed to
+ ulonglong autoinc_offset; /*!< The offset passed to
get_auto_increment() by MySQL. Required
to calculate the next value */
- ulint autoinc_error; /* The actual error code encountered
+ ulint autoinc_error; /*!< The actual error code encountered
while trying to init or read the
autoinc value from the table. We
store it here so that we can return
it to MySQL */
/*----------------------*/
- ulint magic_n2; /* this should be the same as
+ ulint magic_n2; /*!< this should be the same as
magic_n */
};
diff --git a/storage/innobase/include/row0mysql.ic b/storage/innobase/include/row0mysql.ic
index aa8a70d8761..35033aa2ad1 100644
--- a/storage/innobase/include/row0mysql.ic
+++ b/storage/innobase/include/row0mysql.ic
@@ -1,7 +1,24 @@
-/******************************************************
-MySQL interface for Innobase
+/*****************************************************************************
+
+Copyright (c) 2001, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(C) 2001 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0mysql.ic
+MySQL interface for Innobase
Created 1/23/2001 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index 174dd239eb5..89ec54fb54a 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -1,7 +1,24 @@
-/******************************************************
-Purge obsolete records
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1997 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0purge.h
+Purge obsolete records
Created 3/14/1997 Heikki Tuuri
*******************************************************/
@@ -18,56 +35,56 @@ Created 3/14/1997 Heikki Tuuri
#include "que0types.h"
#include "row0types.h"
-/************************************************************************
-Creates a purge node to a query graph. */
-
+/********************************************************************//**
+Creates a purge node to a query graph.
+@return own: purge node */
+UNIV_INTERN
purge_node_t*
row_purge_node_create(
/*==================*/
- /* out, own: purge node */
- que_thr_t* parent, /* in: parent node, i.e., a thr node */
- mem_heap_t* heap); /* in: memory heap where created */
-/***************************************************************
+ que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
+ mem_heap_t* heap); /*!< in: memory heap where created */
+/***********************************************************//**
Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph. */
-
+function used in an SQL execution graph.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_purge_step(
/*===========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
+ que_thr_t* thr); /*!< in: query thread */
/* Purge node structure */
struct purge_node_struct{
- que_common_t common; /* node type: QUE_NODE_PURGE */
+ que_common_t common; /*!< node type: QUE_NODE_PURGE */
/*----------------------*/
/* Local storage for this graph node */
- dulint roll_ptr;/* roll pointer to undo log record */
+ roll_ptr_t roll_ptr;/* roll pointer to undo log record */
trx_undo_rec_t* undo_rec;/* undo log record */
trx_undo_inf_t* reservation;/* reservation for the undo log record in
the purge array */
- dulint undo_no;/* undo number of the record */
+ undo_no_t undo_no;/* undo number of the record */
ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
... */
- btr_pcur_t pcur; /* persistent cursor used in searching the
+ btr_pcur_t pcur; /*!< persistent cursor used in searching the
clustered index record */
ibool found_clust;/* TRUE if the clustered index record
determined by ref was found in the clustered
index, and we were able to position pcur on
it */
- dict_table_t* table; /* table where purge is done */
+ dict_table_t* table; /*!< table where purge is done */
ulint cmpl_info;/* compiler analysis info of an update */
- upd_t* update; /* update vector for a clustered index
+ upd_t* update; /*!< update vector for a clustered index
record */
- dtuple_t* ref; /* NULL, or row reference to the next row to
+ dtuple_t* ref; /*!< NULL, or row reference to the next row to
handle */
- dtuple_t* row; /* NULL, or a copy (also fields copied to
+ dtuple_t* row; /*!< NULL, or a copy (also fields copied to
heap) of the indexed fields of the row to
handle */
- dict_index_t* index; /* NULL, or the next index whose record should
+ dict_index_t* index; /*!< NULL, or the next index whose record should
be handled */
- mem_heap_t* heap; /* memory heap used as auxiliary storage for
+ mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
row; this must be emptied after a successful
purge of a row */
};
diff --git a/storage/innobase/include/row0purge.ic b/storage/innobase/include/row0purge.ic
index 50aabf0bc1b..23d7d3845a4 100644
--- a/storage/innobase/include/row0purge.ic
+++ b/storage/innobase/include/row0purge.ic
@@ -1,8 +1,25 @@
+/*****************************************************************************
-/******************************************************
-Purge obsolete records
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1997 Innobase Oy
+*****************************************************************************/
+
+
+/**************************************************//**
+@file include/row0purge.ic
+Purge obsolete records
Created 3/14/1997 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index bea7627cd86..723b7b53395 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -1,7 +1,24 @@
-/******************************************************
-General row routines
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1996 Innobase Oy
+/**************************************************//**
+@file include/row0row.h
+General row routines
Created 4/20/1996 Heikki Tuuri
*******************************************************/
@@ -17,219 +34,242 @@ Created 4/20/1996 Heikki Tuuri
#include "mtr0mtr.h"
#include "rem0types.h"
#include "read0types.h"
+#include "row0types.h"
#include "btr0types.h"
-/*************************************************************************
-Reads the trx id field from a clustered index record. */
+/*********************************************************************//**
+Gets the offset of the trx id field, in bytes relative to the origin of
+a clustered index record.
+@return offset of DATA_TRX_ID */
+UNIV_INTERN
+ulint
+row_get_trx_id_offset(
+/*==================*/
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+/*********************************************************************//**
+Reads the trx id field from a clustered index record.
+@return value of the field */
UNIV_INLINE
-dulint
+trx_id_t
row_get_rec_trx_id(
/*===============*/
- /* out: value of the field */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Reads the roll pointer field from a clustered index record. */
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+/*********************************************************************//**
+Reads the roll pointer field from a clustered index record.
+@return value of the field */
UNIV_INLINE
-dulint
+roll_ptr_t
row_get_rec_roll_ptr(
/*=================*/
- /* out: value of the field */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Writes the trx id field to a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_trx_id(
-/*===============*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint trx_id);/* in: value of the field */
-/*************************************************************************
-Sets the roll pointer field in a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_roll_ptr(
-/*=================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint roll_ptr);/* in: value of the field */
-/*********************************************************************
-When an insert to a table is performed, this function builds the entry which
-has to be inserted to an index on the table. */
-
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INTERN
dtuple_t*
row_build_index_entry(
/*==================*/
- /* out: index entry which should be inserted */
- dtuple_t* row, /* in: row which should be inserted to the
- table */
- dict_index_t* index, /* in: index on the table */
- mem_heap_t* heap); /* in: memory heap from which the memory for
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ row_ext_t* ext, /*!< in: externally stored column prefixes,
+ or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap); /*!< in: memory heap from which the memory for
the index entry is allocated */
-/***********************************************************************
-An inverse function to dict_row_build_index_entry. Builds a row from a
-record in a clustered index. */
-
+/*******************************************************************//**
+An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index.
+@return own: row built; see the NOTE below! */
+UNIV_INTERN
dtuple_t*
row_build(
/*======*/
- /* out, own: row built; see the NOTE below! */
- ulint type, /* in: ROW_COPY_POINTERS or ROW_COPY_DATA;
- the latter copies also the data fields to
- heap while the first only places pointers to
- data fields on the index page, and thus is
- more efficient */
- dict_index_t* index, /* in: clustered index */
- rec_t* rec, /* in: record in the clustered index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/* in: rec_get_offsets(rec, index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
- mem_heap_t* heap); /* in: memory heap from which the memory
- needed is allocated */
-/***********************************************************************
-Converts an index record to a typed data tuple. */
-
+ ulint type, /*!< in: ROW_COPY_POINTERS or
+ ROW_COPY_DATA; the latter
+ copies also the data fields to
+ heap while the first only
+ places pointers to data fields
+ on the index page, and thus is
+ more efficient */
+ const dict_index_t* index, /*!< in: clustered index */
+ const rec_t* rec, /*!< in: record in the clustered
+ index; NOTE: in the case
+ ROW_COPY_POINTERS the data
+ fields in the row will point
+ directly into this record,
+ therefore, the buffer page of
+ this record must be at least
+ s-latched and the latch held
+ as long as the row dtuple is used! */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index)
+ or NULL, in which case this function
+ will invoke rec_get_offsets() */
+ const dict_table_t* col_table,
+ /*!< in: table, to check which
+ externally stored columns
+ occur in the ordering columns
+ of an index, or NULL if
+ index->table should be
+ consulted instead; the user
+ columns in this table should be
+ the same columns as in index->table */
+ row_ext_t** ext, /*!< out, own: cache of
+ externally stored column
+ prefixes, or NULL */
+ mem_heap_t* heap); /*!< in: memory heap from which
+ the memory needed is allocated */
+/*******************************************************************//**
+Converts an index record to a typed data tuple.
+@return index entry built; does not set info_bits, and the data fields
+in the entry will point directly to rec */
+UNIV_INTERN
+dtuple_t*
+row_rec_to_index_entry_low(
+/*=======================*/
+ const rec_t* rec, /*!< in: record in the index */
+ const dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint* n_ext, /*!< out: number of externally
+ stored columns */
+ mem_heap_t* heap); /*!< in: memory heap from which
+ the memory needed is allocated */
+/*******************************************************************//**
+Converts an index record to a typed data tuple. NOTE that externally
+stored (often big) fields are NOT copied to heap.
+@return own: index entry built; see the NOTE below! */
+UNIV_INTERN
dtuple_t*
row_rec_to_index_entry(
/*===================*/
- /* out, own: index entry built; see the
- NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap as the latter only places pointers to
- data fields on the index page */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the dtuple is used! */
- mem_heap_t* heap); /* in: memory heap from which the memory
- needed is allocated */
-/***********************************************************************
+ ulint type, /*!< in: ROW_COPY_DATA, or
+ ROW_COPY_POINTERS: the former
+ copies also the data fields to
+ heap as the latter only places
+ pointers to data fields on the
+ index page */
+ const rec_t* rec, /*!< in: record in the index;
+ NOTE: in the case
+ ROW_COPY_POINTERS the data
+ fields in the row will point
+ directly into this record,
+ therefore, the buffer page of
+ this record must be at least
+ s-latched and the latch held
+ as long as the dtuple is used! */
+ const dict_index_t* index, /*!< in: index */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
+ ulint* n_ext, /*!< out: number of externally
+ stored columns */
+ mem_heap_t* heap); /*!< in: memory heap from which
+ the memory needed is allocated */
+/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
+search the clustered index record.
+@return own: row reference built; see the NOTE below! */
+UNIV_INTERN
dtuple_t*
row_build_row_ref(
/*==============*/
- /* out, own: row reference built; see the
- NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+ ulint type, /*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
the former copies also the data fields to
heap, whereas the latter only places pointers
to data fields on the index page */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
+ dict_index_t* index, /*!< in: secondary index */
+ const rec_t* rec, /*!< in: record in the index;
NOTE: in the case ROW_COPY_POINTERS
the data fields in the row will point
directly into this record, therefore,
the buffer page of this record must be
at least s-latched and the latch held
as long as the row reference is used! */
- mem_heap_t* heap); /* in: memory heap from which the memory
+ mem_heap_t* heap); /*!< in: memory heap from which the memory
needed is allocated */
-/***********************************************************************
+/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
-
+UNIV_INTERN
void
row_build_row_ref_in_tuple(
/*=======================*/
- dtuple_t* ref, /* in/out: row reference built; see the
- NOTE below! */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: the data fields in ref will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- trx_t* trx); /* in: transaction */
-/***********************************************************************
-From a row build a row reference with which we can search the clustered
-index record. */
-
-void
-row_build_row_ref_from_row(
-/*=======================*/
- dtuple_t* ref, /* in/out: row reference built; see the
- NOTE below! ref must have the right number
- of fields! */
- dict_table_t* table, /* in: table */
- dtuple_t* row); /* in: row
- NOTE: the data fields in ref will point
- directly into data of this row */
-/***********************************************************************
+ dtuple_t* ref, /*!< in/out: row reference built;
+ see the NOTE below! */
+ const rec_t* rec, /*!< in: record in the index;
+ NOTE: the data fields in ref
+ will point directly into this
+ record, therefore, the buffer
+ page of this record must be at
+ least s-latched and the latch
+ held as long as the row
+ reference is used! */
+ const dict_index_t* index, /*!< in: secondary index */
+ ulint* offsets,/*!< in: rec_get_offsets(rec, index)
+ or NULL */
+ trx_t* trx); /*!< in: transaction */
+/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
UNIV_INLINE
void
row_build_row_ref_fast(
/*===================*/
- dtuple_t* ref, /* in: typed data tuple where the
+ dtuple_t* ref, /*!< in/out: typed data tuple where the
reference is built */
- const ulint* map, /* in: array of field numbers in rec
+ const ulint* map, /*!< in: array of field numbers in rec
telling how ref should be built from
the fields of rec */
- rec_t* rec, /* in: record in the index; must be
+ const rec_t* rec, /*!< in: record in the index; must be
preserved while ref is used, as we do
not copy field values to heap */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
Searches the clustered index record for a row, if we have the row
-reference. */
-
+reference.
+@return TRUE if found */
+UNIV_INTERN
ibool
row_search_on_row_ref(
/*==================*/
- /* out: TRUE if found */
- btr_pcur_t* pcur, /* in/out: persistent cursor, which must
- be closed by the caller */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- dict_table_t* table, /* in: table */
- dtuple_t* ref, /* in: row reference */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
+ btr_pcur_t* pcur, /*!< out: persistent cursor, which must
+ be closed by the caller */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ const dict_table_t* table, /*!< in: table */
+ const dtuple_t* ref, /*!< in: row reference */
+ mtr_t* mtr); /*!< in/out: mtr */
+/*********************************************************************//**
Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved. */
-
+on the secondary index record are preserved.
+@return record or NULL, if no record found */
+UNIV_INTERN
rec_t*
row_get_clust_rec(
/*==============*/
- /* out: record or NULL, if no record found */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index, /* in: secondary index */
- dict_index_t** clust_index,/* out: clustered index */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Searches an index record. */
-
+ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ const rec_t* rec, /*!< in: record in a secondary index */
+ dict_index_t* index, /*!< in: secondary index */
+ dict_index_t** clust_index,/*!< out: clustered index */
+ mtr_t* mtr); /*!< in: mtr */
+/***************************************************************//**
+Searches an index record.
+@return TRUE if found */
+UNIV_INTERN
ibool
row_search_index_entry(
/*===================*/
- /* out: TRUE if found */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- btr_pcur_t* pcur, /* in/out: persistent cursor, which must
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* entry, /*!< in: index entry */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
be closed by the caller */
- mtr_t* mtr); /* in: mtr */
+ mtr_t* mtr); /*!< in: mtr */
#define ROW_COPY_DATA 1
@@ -243,6 +283,26 @@ row_search_index_entry(
No new latches may be obtained while the kernel mutex is reserved.
However, the kernel mutex can be reserved while latches are owned. */
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) using
+"dict_field" and writes the result to "buf".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size is positive) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return number of bytes that were written */
+UNIV_INTERN
+ulint
+row_raw_format(
+/*===========*/
+ const char* data, /*!< in: raw data */
+ ulint data_len, /*!< in: raw data length
+ in bytes */
+ const dict_field_t* dict_field, /*!< in: index field */
+ char* buf, /*!< out: output buffer */
+ ulint buf_size); /*!< in: output buffer size
+ in bytes */
+
#ifndef UNIV_NONINL
#include "row0row.ic"
#endif
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index de417f3d971..05c007641af 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -1,7 +1,24 @@
-/******************************************************
-General row routines
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0row.ic
+General row routines
Created 4/20/1996 Heikki Tuuri
*******************************************************/
@@ -10,161 +27,82 @@ Created 4/20/1996 Heikki Tuuri
#include "rem0rec.h"
#include "trx0undo.h"
-/*************************************************************************
-Reads the trx id or roll ptr field from a clustered index record: this function
-is slower than the specialized inline functions. */
-
-dulint
-row_get_rec_sys_field(
-/*==================*/
- /* out: value of the field */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Sets the trx id or roll ptr field in a clustered index record: this function
-is slower than the specialized inline functions. */
-
-void
-row_set_rec_sys_field(
-/*==================*/
- /* out: value of the field */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint val); /* in: value to set */
-
-/*************************************************************************
-Reads the trx id field from a clustered index record. */
+/*********************************************************************//**
+Reads the trx id field from a clustered index record.
+@return value of the field */
UNIV_INLINE
-dulint
+trx_id_t
row_get_rec_trx_id(
/*===============*/
- /* out: value of the field */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint offset;
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
- if (offset) {
- return(trx_read_trx_id(rec + offset));
- } else {
- return(row_get_rec_sys_field(DATA_TRX_ID,
- rec, index, offsets));
+ if (!offset) {
+ offset = row_get_trx_id_offset(rec, index, offsets);
}
+
+ return(trx_read_trx_id(rec + offset));
}
-/*************************************************************************
-Reads the roll pointer field from a clustered index record. */
+/*********************************************************************//**
+Reads the roll pointer field from a clustered index record.
+@return value of the field */
UNIV_INLINE
-dulint
+roll_ptr_t
row_get_rec_roll_ptr(
/*=================*/
- /* out: value of the field */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint offset;
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
- if (offset) {
- return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
- } else {
- return(row_get_rec_sys_field(DATA_ROLL_PTR,
- rec, index, offsets));
+ if (!offset) {
+ offset = row_get_trx_id_offset(rec, index, offsets);
}
-}
-/*************************************************************************
-Writes the trx id field to a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_trx_id(
-/*===============*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint trx_id) /* in: value of the field */
-{
- ulint offset;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (offset) {
- trx_write_trx_id(rec + offset, trx_id);
- } else {
- row_set_rec_sys_field(DATA_TRX_ID,
- rec, index, offsets, trx_id);
- }
-}
-
-/*************************************************************************
-Sets the roll pointer field in a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_roll_ptr(
-/*=================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint roll_ptr)/* in: value of the field */
-{
- ulint offset;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (offset) {
- trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
- } else {
- row_set_rec_sys_field(DATA_ROLL_PTR,
- rec, index, offsets, roll_ptr);
- }
+ return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
}
-/***********************************************************************
+/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
UNIV_INLINE
void
row_build_row_ref_fast(
/*===================*/
- dtuple_t* ref, /* in: typed data tuple where the
+ dtuple_t* ref, /*!< in/out: typed data tuple where the
reference is built */
- const ulint* map, /* in: array of field numbers in rec
+ const ulint* map, /*!< in: array of field numbers in rec
telling how ref should be built from
the fields of rec */
- rec_t* rec, /* in: record in the index; must be
+ const rec_t* rec, /*!< in: record in the index; must be
preserved while ref is used, as we do
not copy field values to heap */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
dfield_t* dfield;
- byte* field;
+ const byte* field;
ulint len;
ulint ref_len;
ulint field_no;
ulint i;
ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(!rec_offs_any_extern(offsets));
ref_len = dtuple_get_n_fields(ref);
for (i = 0; i < ref_len; i++) {
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index a0a4ccb973b..01a5afaa23e 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -1,7 +1,24 @@
-/******************************************************
-Select
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0sel.h
+Select
Created 12/19/1997 Heikki Tuuri
*******************************************************/
@@ -21,191 +38,188 @@ Created 12/19/1997 Heikki Tuuri
#include "read0read.h"
#include "row0mysql.h"
-/*************************************************************************
-Creates a select node struct. */
-
+/*********************************************************************//**
+Creates a select node struct.
+@return own: select node struct */
+UNIV_INTERN
sel_node_t*
sel_node_create(
/*============*/
- /* out, own: select node struct */
- mem_heap_t* heap); /* in: memory heap where created */
-/*************************************************************************
+ mem_heap_t* heap); /*!< in: memory heap where created */
+/*********************************************************************//**
Frees the memory private to a select node when a query graph is freed,
does not free the heap where the node was originally created. */
-
+UNIV_INTERN
void
sel_node_free_private(
/*==================*/
- sel_node_t* node); /* in: select node struct */
-/*************************************************************************
+ sel_node_t* node); /*!< in: select node struct */
+/*********************************************************************//**
Frees a prefetch buffer for a column, including the dynamically allocated
memory for data stored there. */
-
+UNIV_INTERN
void
sel_col_prefetch_buf_free(
/*======================*/
- sel_buf_t* prefetch_buf); /* in, own: prefetch buffer */
-/*************************************************************************
-Gets the plan node for the nth table in a join. */
+ sel_buf_t* prefetch_buf); /*!< in, own: prefetch buffer */
+/*********************************************************************//**
+Gets the plan node for the nth table in a join.
+@return plan node */
UNIV_INLINE
plan_t*
sel_node_get_nth_plan(
/*==================*/
- sel_node_t* node,
- ulint i);
-/**************************************************************************
+ sel_node_t* node, /*!< in: select node */
+ ulint i); /*!< in: get ith plan node */
+/**********************************************************************//**
Performs a select step. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_sel_step(
/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of an open or close cursor statement node. */
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an open or close cursor statement node.
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
open_step(
/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs a fetch for a cursor. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/**********************************************************************//**
+Performs a fetch for a cursor.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
fetch_step(
/*=======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/********************************************************************
-Sample callback function for fetch that prints each row.*/
-
+ que_thr_t* thr); /*!< in: query thread */
+/****************************************************************//**
+Sample callback function for fetch that prints each row.
+@return always returns non-NULL */
+UNIV_INTERN
void*
row_fetch_print(
/*============*/
- /* out: always returns non-NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg); /* in: not used */
-/********************************************************************
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg); /*!< in: not used */
+/****************************************************************//**
Callback function for fetch that stores an unsigned 4 byte integer to the
location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4. */
-
+= 4.
+@return always returns NULL */
+UNIV_INTERN
void*
row_fetch_store_uint4(
/*==================*/
- /* out: always returns NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg); /* in: data pointer */
-/***************************************************************
-Prints a row in a select result. */
-
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg); /*!< in: data pointer */
+/***********************************************************//**
+Prints a row in a select result.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_printf_step(
/*============*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/********************************************************************
+ que_thr_t* thr); /*!< in: query thread */
+/****************************************************************//**
Converts a key value stored in MySQL format to an Innobase dtuple. The last
field of the key value may be just a prefix of a fixed length field: hence
the parameter key_len. But currently we do not allow search keys where the
last field is only a prefix of the full key field len and print a warning if
such appears. */
-
+UNIV_INTERN
void
row_sel_convert_mysql_key_to_innobase(
/*==================================*/
- dtuple_t* tuple, /* in: tuple where to build;
+ dtuple_t* tuple, /*!< in/out: tuple where to build;
NOTE: we assume that the type info
in the tuple is already according
to index! */
- byte* buf, /* in: buffer to use in field
+ byte* buf, /*!< in: buffer to use in field
conversions */
- ulint buf_len, /* in: buffer length */
- dict_index_t* index, /* in: index of the key value */
- byte* key_ptr, /* in: MySQL key value */
- ulint key_len, /* in: MySQL key value length */
- trx_t* trx); /* in: transaction */
-/************************************************************************
+ ulint buf_len, /*!< in: buffer length */
+ dict_index_t* index, /*!< in: index of the key value */
+ const byte* key_ptr, /*!< in: MySQL key value */
+ ulint key_len, /*!< in: MySQL key value length */
+ trx_t* trx); /*!< in: transaction */
+/********************************************************************//**
Searches for rows in the database. This is used in the interface to
MySQL. This function opens a cursor, and also implements fetch next
and fetch prev. NOTE that if we do a search with a full key value
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor! */
-
+position and fetch next or fetch prev must not be tried to the cursor!
+@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
+DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
+UNIV_INTERN
ulint
row_search_for_mysql(
/*=================*/
- /* out: DB_SUCCESS,
- DB_RECORD_NOT_FOUND,
- DB_END_OF_INDEX, DB_DEADLOCK,
- DB_LOCK_TABLE_FULL,
- or DB_TOO_BIG_RECORD */
- byte* buf, /* in/out: buffer for the fetched
+ byte* buf, /*!< in/out: buffer for the fetched
row in the MySQL format */
- ulint mode, /* in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct for the
+ ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
table handle; this contains the info
of search_tuple, index; if search
tuple contains 0 fields then we
position the cursor at the start or
the end of the index, depending on
'mode' */
- ulint match_mode, /* in: 0 or ROW_SEL_EXACT or
+ ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
ROW_SEL_EXACT_PREFIX */
- ulint direction); /* in: 0 or ROW_SEL_NEXT or
+ ulint direction); /*!< in: 0 or ROW_SEL_NEXT or
ROW_SEL_PREV; NOTE: if this is != 0,
then prebuilt must have a pcur
with stored position! In opening of a
cursor 'direction' should be 0. */
-/***********************************************************************
+/*******************************************************************//**
Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache. */
-
+consistent read result, or store it to the query cache.
+@return TRUE if storing or retrieving from the query cache is permitted */
+UNIV_INTERN
ibool
row_search_check_if_query_cache_permitted(
/*======================================*/
- /* out: TRUE if storing or retrieving
- from the query cache is permitted */
- trx_t* trx, /* in: transaction object */
- const char* norm_name); /* in: concatenation of database name,
+ trx_t* trx, /*!< in: transaction object */
+ const char* norm_name); /*!< in: concatenation of database name,
'/' char, table name */
-/***********************************************************************
-Read the max AUTOINC value from an index. */
-
+/*******************************************************************//**
+Read the max AUTOINC value from an index.
+@return DB_SUCCESS if all OK else error code */
+UNIV_INTERN
ulint
row_search_max_autoinc(
/*===================*/
- /* out: DB_SUCCESS if all OK else
- error code */
- dict_index_t* index, /* in: index to search */
- const char* col_name, /* in: autoinc column name */
- ib_ulonglong* value); /* out: AUTOINC value read */
+ dict_index_t* index, /*!< in: index to search */
+ const char* col_name, /*!< in: autoinc column name */
+ ib_uint64_t* value); /*!< out: AUTOINC value read */
-/* A structure for caching column values for prefetched rows */
+/** A structure for caching column values for prefetched rows */
struct sel_buf_struct{
- byte* data; /* data, or NULL; if not NULL, this field
+ byte* data; /*!< data, or NULL; if not NULL, this field
has allocated memory which must be explicitly
freed; can be != NULL even when len is
UNIV_SQL_NULL */
- ulint len; /* data length or UNIV_SQL_NULL */
+ ulint len; /*!< data length or UNIV_SQL_NULL */
ulint val_buf_size;
- /* size of memory buffer allocated for data:
+ /*!< size of memory buffer allocated for data:
this can be more than len; this is defined
when data != NULL */
};
+/** Query plan */
struct plan_struct{
- dict_table_t* table; /* table struct in the dictionary
+ dict_table_t* table; /*!< table struct in the dictionary
cache */
- dict_index_t* index; /* table index used in the search */
- btr_pcur_t pcur; /* persistent cursor used to search
+ dict_index_t* index; /*!< table index used in the search */
+ btr_pcur_t pcur; /*!< persistent cursor used to search
the index */
- ibool asc; /* TRUE if cursor traveling upwards */
- ibool pcur_is_open; /* TRUE if pcur has been positioned
+ ibool asc; /*!< TRUE if cursor traveling upwards */
+ ibool pcur_is_open; /*!< TRUE if pcur has been positioned
and we can try to fetch new rows */
- ibool cursor_at_end; /* TRUE if the cursor is open but
+ ibool cursor_at_end; /*!< TRUE if the cursor is open but
we know that there are no more
qualifying rows left to retrieve from
the index tree; NOTE though, that
@@ -213,31 +227,34 @@ struct plan_struct{
the prefetch stack; always FALSE when
pcur_is_open is FALSE */
ibool stored_cursor_rec_processed;
- /* TRUE if the pcur position has been
+ /*!< TRUE if the pcur position has been
stored and the record it is positioned
on has already been processed */
- que_node_t** tuple_exps; /* array of expressions which are used
- to calculate the field values in the
- search tuple: there is one expression
- for each field in the search tuple */
- dtuple_t* tuple; /* search tuple */
- ulint mode; /* search mode: PAGE_CUR_G, ... */
- ulint n_exact_match; /* number of first fields in the search
- tuple which must be exactly matched */
- ibool unique_search; /* TRUE if we are searching an
+ que_node_t** tuple_exps; /*!< array of expressions
+ which are used to calculate
+ the field values in the search
+ tuple: there is one expression
+ for each field in the search
+ tuple */
+ dtuple_t* tuple; /*!< search tuple */
+ ulint mode; /*!< search mode: PAGE_CUR_G, ... */
+ ulint n_exact_match; /*!< number of first fields in
+ the search tuple which must be
+ exactly matched */
+ ibool unique_search; /*!< TRUE if we are searching an
index record with a unique key */
- ulint n_rows_fetched; /* number of rows fetched using pcur
+ ulint n_rows_fetched; /*!< number of rows fetched using pcur
after it was opened */
- ulint n_rows_prefetched;/* number of prefetched rows cached
+ ulint n_rows_prefetched;/*!< number of prefetched rows cached
for fetch: fetching several rows in
the same mtr saves CPU time */
- ulint first_prefetched;/* index of the first cached row in
+ ulint first_prefetched;/*!< index of the first cached row in
select buffer arrays for each column */
- ibool no_prefetch; /* no prefetch for this table */
- sym_node_list_t columns; /* symbol table nodes for the columns
+ ibool no_prefetch; /*!< no prefetch for this table */
+ sym_node_list_t columns; /*!< symbol table nodes for the columns
to retrieve from the table */
UT_LIST_BASE_NODE_T(func_node_t)
- end_conds; /* conditions which determine the
+ end_conds; /*!< conditions which determine the
fetch limit of the index segment we
have to look at: when one of these
fails, the result set has been
@@ -246,9 +263,9 @@ struct plan_struct{
so that in a comparison the column
for this table is the first argument */
UT_LIST_BASE_NODE_T(func_node_t)
- other_conds; /* the rest of search conditions we can
+ other_conds; /*!< the rest of search conditions we can
test at this table in a join */
- ibool must_get_clust; /* TRUE if index is a non-clustered
+ ibool must_get_clust; /*!< TRUE if index is a non-clustered
index and we must also fetch the
clustered index record; this is the
case if the non-clustered record does
@@ -256,59 +273,63 @@ struct plan_struct{
if this is a single-table explicit
cursor, or a searched update or
delete */
- ulint* clust_map; /* map telling how clust_ref is built
+ ulint* clust_map; /*!< map telling how clust_ref is built
from the fields of a non-clustered
record */
- dtuple_t* clust_ref; /* the reference to the clustered
+ dtuple_t* clust_ref; /*!< the reference to the clustered
index entry is built here if index is
a non-clustered index */
- btr_pcur_t clust_pcur; /* if index is non-clustered, we use
+ btr_pcur_t clust_pcur; /*!< if index is non-clustered, we use
this pcur to search the clustered
index */
- mem_heap_t* old_vers_heap; /* memory heap used in building an old
+ mem_heap_t* old_vers_heap; /*!< memory heap used in building an old
version of a row, or NULL */
};
+/** Select node states */
+enum sel_node_state {
+ SEL_NODE_CLOSED, /*!< it is a declared cursor which is not
+ currently open */
+ SEL_NODE_OPEN, /*!< intention locks not yet set on tables */
+ SEL_NODE_FETCH, /*!< intention locks have been set */
+ SEL_NODE_NO_MORE_ROWS /*!< cursor has reached the result set end */
+};
+
+/** Select statement node */
struct sel_node_struct{
- que_common_t common; /* node type: QUE_NODE_SELECT */
- ulint state; /* node state */
- que_node_t* select_list; /* select list */
- sym_node_t* into_list; /* variables list or NULL */
- sym_node_t* table_list; /* table list */
- ibool asc; /* TRUE if the rows should be fetched
+ que_common_t common; /*!< node type: QUE_NODE_SELECT */
+ enum sel_node_state
+ state; /*!< node state */
+ que_node_t* select_list; /*!< select list */
+ sym_node_t* into_list; /*!< variables list or NULL */
+ sym_node_t* table_list; /*!< table list */
+ ibool asc; /*!< TRUE if the rows should be fetched
in an ascending order */
- ibool set_x_locks; /* TRUE if the cursor is for update or
+ ibool set_x_locks; /*!< TRUE if the cursor is for update or
delete, which means that a row x-lock
should be placed on the cursor row */
- ibool select_will_do_update;
- /* TRUE if the select is for a searched
- update which can be performed in-place:
- in this case the select will take care
- of the update */
- ulint latch_mode; /* BTR_SEARCH_LEAF, or BTR_MODIFY_LEAF
- if select_will_do_update is TRUE */
- ulint row_lock_mode; /* LOCK_X or LOCK_S */
- ulint n_tables; /* number of tables */
- ulint fetch_table; /* number of the next table to access
+ ulint row_lock_mode; /*!< LOCK_X or LOCK_S */
+ ulint n_tables; /*!< number of tables */
+ ulint fetch_table; /*!< number of the next table to access
in the join */
- plan_t* plans; /* array of n_tables many plan nodes
+ plan_t* plans; /*!< array of n_tables many plan nodes
containing the search plan and the
search data structures */
- que_node_t* search_cond; /* search condition */
- read_view_t* read_view; /* if the query is a non-locking
+ que_node_t* search_cond; /*!< search condition */
+ read_view_t* read_view; /*!< if the query is a non-locking
consistent read, its read view is
placed here, otherwise NULL */
- ibool consistent_read;/* TRUE if the select is a consistent,
+ ibool consistent_read;/*!< TRUE if the select is a consistent,
non-locking read */
- order_node_t* order_by; /* order by column definition, or
+ order_node_t* order_by; /*!< order by column definition, or
NULL */
- ibool is_aggregate; /* TRUE if the select list consists of
+ ibool is_aggregate; /*!< TRUE if the select list consists of
aggregate functions */
ibool aggregate_already_fetched;
- /* TRUE if the aggregate row has
+ /*!< TRUE if the aggregate row has
already been fetched for the current
cursor */
- ibool can_get_updated;/* this is TRUE if the select
+ ibool can_get_updated;/*!< this is TRUE if the select
is in a single-table explicit
cursor which can get updated
within the stored procedure,
@@ -319,31 +340,22 @@ struct sel_node_struct{
checks from a stored procedure
if it contains positioned
update or delete statements */
- sym_node_t* explicit_cursor;/* not NULL if an explicit cursor */
+ sym_node_t* explicit_cursor;/*!< not NULL if an explicit cursor */
UT_LIST_BASE_NODE_T(sym_node_t)
- copy_variables; /* variables whose values we have to
+ copy_variables; /*!< variables whose values we have to
copy when an explicit cursor is opened,
so that they do not change between
fetches */
};
-/* Select node states */
-#define SEL_NODE_CLOSED 0 /* it is a declared cursor which is not
- currently open */
-#define SEL_NODE_OPEN 1 /* intention locks not yet set on
- tables */
-#define SEL_NODE_FETCH 2 /* intention locks have been set */
-#define SEL_NODE_NO_MORE_ROWS 3 /* cursor has reached the result set
- end */
-
-/* Fetch statement node */
+/** Fetch statement node */
struct fetch_node_struct{
- que_common_t common; /* type: QUE_NODE_FETCH */
- sel_node_t* cursor_def; /* cursor definition */
- sym_node_t* into_list; /* variables to set */
+ que_common_t common; /*!< type: QUE_NODE_FETCH */
+ sel_node_t* cursor_def; /*!< cursor definition */
+ sym_node_t* into_list; /*!< variables to set */
pars_user_func_t*
- func; /* User callback function or NULL.
+ func; /*!< User callback function or NULL.
The first argument to the function
is a sel_node_t*, containing the
results of the SELECT operation for
@@ -357,33 +369,42 @@ struct fetch_node_struct{
(and a useful debugging tool). */
};
-/* Open or close cursor statement node */
+/** Open or close cursor operation type */
+enum open_node_op {
+ ROW_SEL_OPEN_CURSOR, /*!< open cursor */
+ ROW_SEL_CLOSE_CURSOR /*!< close cursor */
+};
+
+/** Open or close cursor statement node */
struct open_node_struct{
- que_common_t common; /* type: QUE_NODE_OPEN */
- ulint op_type; /* ROW_SEL_OPEN_CURSOR or
- ROW_SEL_CLOSE_CURSOR */
- sel_node_t* cursor_def; /* cursor definition */
+ que_common_t common; /*!< type: QUE_NODE_OPEN */
+ enum open_node_op
+ op_type; /*!< operation type: open or
+ close cursor */
+ sel_node_t* cursor_def; /*!< cursor definition */
};
-/* Row printf statement node */
+/** Row printf statement node */
struct row_printf_node_struct{
- que_common_t common; /* type: QUE_NODE_ROW_PRINTF */
- sel_node_t* sel_node; /* select */
+ que_common_t common; /*!< type: QUE_NODE_ROW_PRINTF */
+ sel_node_t* sel_node; /*!< select */
};
-#define ROW_SEL_OPEN_CURSOR 0
-#define ROW_SEL_CLOSE_CURSOR 1
-
-/* Flags for the MySQL interface */
-#define ROW_SEL_NEXT 1
-#define ROW_SEL_PREV 2
+/** Search direction for the MySQL interface */
+enum row_sel_direction {
+ ROW_SEL_NEXT = 1, /*!< ascending direction */
+ ROW_SEL_PREV = 2 /*!< descending direction */
+};
-#define ROW_SEL_EXACT 1 /* search using a complete key value */
-#define ROW_SEL_EXACT_PREFIX 2 /* search using a key prefix which
- must match to rows: the prefix may
- contain an incomplete field (the
- last field in prefix may be just
- a prefix of a fixed length column) */
+/** Match mode for the MySQL interface */
+enum row_sel_match_mode {
+ ROW_SEL_EXACT = 1, /*!< search using a complete key value */
+ ROW_SEL_EXACT_PREFIX /*!< search using a key prefix which
+ must match rows: the prefix may
+ contain an incomplete field (the last
+ field in prefix may be just a prefix
+ of a fixed length column) */
+};
#ifndef UNIV_NONINL
#include "row0sel.ic"
diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic
index 1f92b99271e..5907f9913da 100644
--- a/storage/innobase/include/row0sel.ic
+++ b/storage/innobase/include/row0sel.ic
@@ -1,29 +1,46 @@
-/******************************************************
-Select
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0sel.ic
+Select
Created 12/19/1997 Heikki Tuuri
*******************************************************/
#include "que0que.h"
-/*************************************************************************
-Gets the plan node for the nth table in a join. */
+/*********************************************************************//**
+Gets the plan node for the nth table in a join.
+@return plan node */
UNIV_INLINE
plan_t*
sel_node_get_nth_plan(
/*==================*/
- /* out: plan node */
- sel_node_t* node, /* in: select node */
- ulint i) /* in: get ith plan node */
+ sel_node_t* node, /*!< in: select node */
+ ulint i) /*!< in: get ith plan node */
{
ut_ad(i < node->n_tables);
return(node->plans + i);
}
-/*************************************************************************
+/*********************************************************************//**
Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means
that it will start fetching from the start of the result set again, regardless
of where it was before, and it will set intention locks on the tables. */
@@ -31,19 +48,19 @@ UNIV_INLINE
void
sel_node_reset_cursor(
/*==================*/
- sel_node_t* node) /* in: select node */
+ sel_node_t* node) /*!< in: select node */
{
node->state = SEL_NODE_OPEN;
}
-/**************************************************************************
-Performs an execution step of an open or close cursor statement node. */
+/**********************************************************************//**
+Performs an execution step of an open or close cursor statement node.
+@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
open_step(
/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
sel_node_t* sel_node;
open_node_t* node;
@@ -51,7 +68,7 @@ open_step(
ut_ad(thr);
- node = thr->run_node;
+ node = (open_node_t*) thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_OPEN);
sel_node = node->cursor_def;
diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h
index 56ca8711848..7920fd75061 100644
--- a/storage/innobase/include/row0types.h
+++ b/storage/innobase/include/row0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Row operation global types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0types.h
+Row operation global types
Created 12/27/1996 Heikki Tuuri
*******************************************************/
@@ -34,4 +51,9 @@ typedef struct undo_node_struct undo_node_t;
typedef struct purge_node_struct purge_node_t;
+typedef struct row_ext_struct row_ext_t;
+
+/* MySQL data types */
+typedef struct st_table TABLE;
+
#endif
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
index e28d5363048..77b071c3a6b 100644
--- a/storage/innobase/include/row0uins.h
+++ b/storage/innobase/include/row0uins.h
@@ -1,7 +1,24 @@
-/******************************************************
-Fresh insert undo
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0uins.h
+Fresh insert undo
Created 2/25/1997 Heikki Tuuri
*******************************************************/
@@ -17,17 +34,18 @@ Created 2/25/1997 Heikki Tuuri
#include "row0types.h"
#include "mtr0mtr.h"
-/***************************************************************
+/***********************************************************//**
Undoes a fresh insert of a row to a table. A fresh insert means that
the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. */
-
+marked, at the time of the insert. InnoDB is eager in a rollback:
+if it figures out that an index record will be removed in the purge
+anyway, it will remove it in the rollback.
+@return DB_SUCCESS */
+UNIV_INTERN
ulint
row_undo_ins(
/*=========*/
- /* out: DB_SUCCESS */
- undo_node_t* node); /* in: row undo node */
-
+ undo_node_t* node); /*!< in: row undo node */
#ifndef UNIV_NONINL
#include "row0uins.ic"
diff --git a/storage/innobase/include/row0uins.ic b/storage/innobase/include/row0uins.ic
index 2b3d5a10f95..27606150d8e 100644
--- a/storage/innobase/include/row0uins.ic
+++ b/storage/innobase/include/row0uins.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Fresh insert undo
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0uins.ic
+Fresh insert undo
Created 2/25/1997 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
index f22945e6f12..ed44cc8d601 100644
--- a/storage/innobase/include/row0umod.h
+++ b/storage/innobase/include/row0umod.h
@@ -1,7 +1,24 @@
-/******************************************************
-Undo modify of a row
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0umod.h
+Undo modify of a row
Created 2/27/1997 Heikki Tuuri
*******************************************************/
@@ -17,15 +34,15 @@ Created 2/27/1997 Heikki Tuuri
#include "row0types.h"
#include "mtr0mtr.h"
-/***************************************************************
-Undoes a modify operation on a row of a table. */
-
+/***********************************************************//**
+Undoes a modify operation on a row of a table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
row_undo_mod(
/*=========*/
- /* out: DB_SUCCESS or error code */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr); /* in: query thread */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr); /*!< in: query thread */
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/row0umod.ic b/storage/innobase/include/row0umod.ic
index fcbf4dbc1f3..ea3fd3b43c7 100644
--- a/storage/innobase/include/row0umod.ic
+++ b/storage/innobase/include/row0umod.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Undo modify of a row
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0umod.ic
+Undo modify of a row
Created 2/27/1997 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index 0be09ed1822..6eb4ca448b3 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -1,7 +1,24 @@
-/******************************************************
-Row undo
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1997 Innobase Oy
+/**************************************************//**
+@file include/row0undo.h
+Row undo
Created 1/8/1997 Heikki Tuuri
*******************************************************/
@@ -19,38 +36,37 @@ Created 1/8/1997 Heikki Tuuri
#include "que0types.h"
#include "row0types.h"
-/************************************************************************
-Creates a row undo node to a query graph. */
-
+/********************************************************************//**
+Creates a row undo node to a query graph.
+@return own: undo node */
+UNIV_INTERN
undo_node_t*
row_undo_node_create(
/*=================*/
- /* out, own: undo node */
- trx_t* trx, /* in: transaction */
- que_thr_t* parent, /* in: parent node, i.e., a thr node */
- mem_heap_t* heap); /* in: memory heap where created */
-/***************************************************************
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
+ mem_heap_t* heap); /*!< in: memory heap where created */
+/***********************************************************//**
Looks for the clustered index record when node has the row reference.
The pcur in node is used in the search. If found, stores the row to node,
and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case. */
-
+by the caller in any case.
+@return TRUE if found; NOTE the node->pcur must be closed by the
+caller, regardless of the return value */
+UNIV_INTERN
ibool
row_undo_search_clust_to_pcur(
/*==========================*/
- /* out: TRUE if found; NOTE the node->pcur
- must be closed by the caller, regardless of
- the return value */
- undo_node_t* node); /* in: row undo node */
-/***************************************************************
+ undo_node_t* node); /*!< in: row undo node */
+/***********************************************************//**
Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs. */
-
+in SQL execution graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_undo_step(
/*==========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
+ que_thr_t* thr); /*!< in: query thread */
/* A single query thread will try to perform the undo for all successive
versions of a clustered index record, if the transaction has modified it
@@ -67,46 +83,57 @@ just in the case where the transaction modified the same record several times
and another thread is currently doing the undo for successive versions of
that index record. */
-/* Undo node structure */
+/** Execution state of an undo node */
+enum undo_exec {
+ UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next
+ undo log record */
+ UNDO_NODE_PREV_VERS, /*!< the roll ptr to previous
+ version of a row is stored in
+ node, and undo should be done
+ based on it */
+ UNDO_NODE_INSERT, /*!< undo a fresh insert of a
+ row to a table */
+ UNDO_NODE_MODIFY /*!< undo a modify operation
+ (DELETE or UPDATE) on a row
+ of a table */
+};
+/** Undo node structure */
struct undo_node_struct{
- que_common_t common; /* node type: QUE_NODE_UNDO */
- ulint state; /* node execution state */
- trx_t* trx; /* trx for which undo is done */
- dulint roll_ptr;/* roll pointer to undo log record */
- trx_undo_rec_t* undo_rec;/* undo log record */
- dulint undo_no;/* undo number of the record */
- ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+ que_common_t common; /*!< node type: QUE_NODE_UNDO */
+ enum undo_exec state; /*!< node execution state */
+ trx_t* trx; /*!< trx for which undo is done */
+ roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */
+ trx_undo_rec_t* undo_rec;/*!< undo log record */
+ undo_no_t undo_no;/*!< undo number of the record */
+ ulint rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
... */
- dulint new_roll_ptr; /* roll ptr to restore to clustered index
+ roll_ptr_t new_roll_ptr;
+ /*!< roll ptr to restore to clustered index
record */
- dulint new_trx_id; /* trx id to restore to clustered index
+ trx_id_t new_trx_id; /*!< trx id to restore to clustered index
record */
- btr_pcur_t pcur; /* persistent cursor used in searching the
+ btr_pcur_t pcur; /*!< persistent cursor used in searching the
clustered index record */
- dict_table_t* table; /* table where undo is done */
- ulint cmpl_info;/* compiler analysis of an update */
- upd_t* update; /* update vector for a clustered index
+ dict_table_t* table; /*!< table where undo is done */
+ ulint cmpl_info;/*!< compiler analysis of an update */
+ upd_t* update; /*!< update vector for a clustered index
record */
- dtuple_t* ref; /* row reference to the next row to handle */
- dtuple_t* row; /* a copy (also fields copied to heap) of the
+ dtuple_t* ref; /*!< row reference to the next row to handle */
+ dtuple_t* row; /*!< a copy (also fields copied to heap) of the
row to handle */
- dict_index_t* index; /* the next index whose record should be
+ row_ext_t* ext; /*!< NULL, or prefixes of the externally
+ stored columns of the row */
+ dtuple_t* undo_row;/*!< NULL, or the row after undo */
+ row_ext_t* undo_ext;/*!< NULL, or prefixes of the externally
+ stored columns of undo_row */
+ dict_index_t* index; /*!< the next index whose record should be
handled */
- mem_heap_t* heap; /* memory heap used as auxiliary storage for
+ mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
row; this must be emptied after undo is tried
on a row */
};
-/* Execution states for an undo node */
-#define UNDO_NODE_FETCH_NEXT 1 /* we should fetch the next undo log
- record */
-#define UNDO_NODE_PREV_VERS 2 /* the roll ptr to previous version of
- a row is stored in node, and undo
- should be done based on it */
-#define UNDO_NODE_INSERT 3
-#define UNDO_NODE_MODIFY 4
-
#ifndef UNIV_NONINL
#include "row0undo.ic"
diff --git a/storage/innobase/include/row0undo.ic b/storage/innobase/include/row0undo.ic
index e7f89c7de67..dc788debc14 100644
--- a/storage/innobase/include/row0undo.ic
+++ b/storage/innobase/include/row0undo.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Row undo
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0undo.ic
+Row undo
Created 1/8/1997 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index efbc6d6facf..635d746d5a1 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -1,7 +1,24 @@
-/******************************************************
-Update of a row
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0upd.h
+Update of a row
Created 12/27/1996 Heikki Tuuri
*******************************************************/
@@ -11,333 +28,364 @@ Created 12/27/1996 Heikki Tuuri
#include "univ.i"
#include "data0data.h"
+#include "row0types.h"
#include "btr0types.h"
-#include "btr0pcur.h"
#include "dict0types.h"
#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "pars0types.h"
-/*************************************************************************
-Creates an update vector object. */
+#ifndef UNIV_HOTBACKUP
+# include "btr0pcur.h"
+# include "que0types.h"
+# include "pars0types.h"
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Creates an update vector object.
+@return own: update vector object */
UNIV_INLINE
upd_t*
upd_create(
/*=======*/
- /* out, own: update vector object */
- ulint n, /* in: number of fields */
- mem_heap_t* heap); /* in: heap from which memory allocated */
-/*************************************************************************
+ ulint n, /*!< in: number of fields */
+ mem_heap_t* heap); /*!< in: heap from which memory allocated */
+/*********************************************************************//**
Returns the number of fields in the update vector == number of columns
-to be updated by an update vector. */
+to be updated by an update vector.
+@return number of fields */
UNIV_INLINE
ulint
upd_get_n_fields(
/*=============*/
- /* out: number of fields */
- upd_t* update); /* in: update vector */
-/*************************************************************************
-Returns the nth field of an update vector. */
+ const upd_t* update); /*!< in: update vector */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the nth field of an update vector.
+@return update vector field */
UNIV_INLINE
upd_field_t*
upd_get_nth_field(
/*==============*/
- /* out: update vector field */
- upd_t* update, /* in: update vector */
- ulint n); /* in: field position in update vector */
-/*************************************************************************
+ const upd_t* update, /*!< in: update vector */
+ ulint n); /*!< in: field position in update vector */
+#else
+# define upd_get_nth_field(update, n) ((update)->fields + (n))
+#endif
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
Sets an index field number to be updated by an update vector field. */
UNIV_INLINE
void
upd_field_set_field_no(
/*===================*/
- upd_field_t* upd_field, /* in: update vector field */
- ulint field_no, /* in: field number in a clustered
+ upd_field_t* upd_field, /*!< in: update vector field */
+ ulint field_no, /*!< in: field number in a clustered
index */
- dict_index_t* index, /* in: index */
- trx_t* trx); /* in: transaction */
-/*************************************************************************
+ dict_index_t* index, /*!< in: index */
+ trx_t* trx); /*!< in: transaction */
+/*********************************************************************//**
+Returns a field of an update vector by field_no.
+@return update vector field, or NULL */
+UNIV_INLINE
+const upd_field_t*
+upd_get_field_by_field_no(
+/*======================*/
+ const upd_t* update, /*!< in: update vector */
+ ulint no) /*!< in: field_no */
+ __attribute__((nonnull, pure));
+/*********************************************************************//**
Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record. */
-
+to determine their positions within a clustered index record.
+@return new pointer to mlog */
+UNIV_INTERN
byte*
row_upd_write_sys_vals_to_log(
/*==========================*/
- /* out: new pointer to mlog */
- dict_index_t* index, /* in: clustered index */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr,/* in: roll ptr of the undo log record */
- byte* log_ptr,/* pointer to a buffer of size > 20 opened
+ dict_index_t* index, /*!< in: clustered index */
+ trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
+ byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
in mlog */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************************//**
Updates the trx id and roll ptr field in a clustered index record when
a row is updated or marked deleted. */
UNIV_INLINE
void
row_upd_rec_sys_fields(
/*===================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr);/* in: roll ptr of the undo log record */
-/*************************************************************************
+ rec_t* rec, /*!< in/out: record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */
+/*********************************************************************//**
Sets the trx id or roll ptr field of a clustered index entry. */
-
+UNIV_INTERN
void
row_upd_index_entry_sys_field(
/*==========================*/
- dtuple_t* entry, /* in: index entry, where the memory buffers
+ const dtuple_t* entry, /*!< in: index entry, where the memory buffers
for sys fields are already allocated:
the function just copies the new values to
them */
- dict_index_t* index, /* in: clustered index */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- dulint val); /* in: value to write */
-/*************************************************************************
-Creates an update node for a query graph. */
-
+ dict_index_t* index, /*!< in: clustered index */
+ ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
+ dulint val); /*!< in: value to write */
+/*********************************************************************//**
+Creates an update node for a query graph.
+@return own: update node */
+UNIV_INTERN
upd_node_t*
upd_node_create(
/*============*/
- /* out, own: update node */
- mem_heap_t* heap); /* in: mem heap where created */
-/***************************************************************
+ mem_heap_t* heap); /*!< in: mem heap where created */
+/***********************************************************//**
Writes to the redo log the new values of the fields occurring in the index. */
-
+UNIV_INTERN
void
row_upd_index_write_log(
/*====================*/
- upd_t* update, /* in: update vector */
- byte* log_ptr,/* in: pointer to mlog buffer: must contain at least
- MLOG_BUF_MARGIN bytes of free space; the buffer is
- closed within this function */
- mtr_t* mtr); /* in: mtr into whose log to write */
-/***************************************************************
+ const upd_t* update, /*!< in: update vector */
+ byte* log_ptr,/*!< in: pointer to mlog buffer: must
+ contain at least MLOG_BUF_MARGIN bytes
+ of free space; the buffer is closed
+ within this function */
+ mtr_t* mtr); /*!< in: mtr into whose log to write */
+/***********************************************************//**
Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update. */
-
+field to be updated is stored externally in rec or update.
+@return TRUE if the update changes the size of some field in index or
+the field is external in rec or update */
+UNIV_INTERN
ibool
row_upd_changes_field_size_or_external(
/*===================================*/
- /* out: TRUE if the update changes the size of
- some field in index or the field is external
- in rec or update */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- upd_t* update);/* in: update vector */
-/***************************************************************
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const upd_t* update);/*!< in: update vector */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. This function is used only for
-a clustered index */
-
+given. No field size changes are allowed. */
+UNIV_INTERN
void
row_upd_rec_in_place(
/*=================*/
- rec_t* rec, /* in/out: record where replaced */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update);/* in: update vector */
-/*******************************************************************
+ rec_t* rec, /*!< in/out: record where replaced */
+ dict_index_t* index, /*!< in: the index the record belongs to */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const upd_t* update, /*!< in: update vector */
+ page_zip_des_t* page_zip);/*!< in: compressed page with enough space
+ available, or NULL */
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings! */
-
+the fields as binary strings!
+@return own: update vector of differing fields */
+UNIV_INTERN
upd_t*
row_upd_build_sec_rec_difference_binary(
/*====================================*/
- /* out, own: update vector of differing
- fields */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: entry to insert */
- rec_t* rec, /* in: secondary index record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap); /* in: memory heap from which allocated */
-/*******************************************************************
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* entry, /*!< in: entry to insert */
+ const rec_t* rec, /*!< in: secondary index record */
+ trx_t* trx, /*!< in: transaction */
+ mem_heap_t* heap); /*!< in: memory heap from which allocated */
+/***************************************************************//**
Builds an update vector from those fields, excluding the roll ptr and
trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings! */
-
+the equal ordering fields. NOTE: we compare the fields as binary strings!
+@return own: update vector of differing fields, excluding roll ptr and
+trx id */
+UNIV_INTERN
upd_t*
row_upd_build_difference_binary(
/*============================*/
- /* out, own: update vector of differing
- fields, excluding roll ptr and trx id */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* entry, /* in: entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- rec_t* rec, /* in: clustered index record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap); /* in: memory heap from which allocated */
-/***************************************************************
+ dict_index_t* index, /*!< in: clustered index */
+ const dtuple_t* entry, /*!< in: entry to insert */
+ const rec_t* rec, /*!< in: clustered index record */
+ trx_t* trx, /*!< in: transaction */
+ mem_heap_t* heap); /*!< in: memory heap from which allocated */
+/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
-
+UNIV_INTERN
void
row_upd_index_replace_new_col_vals_index_pos(
/*=========================================*/
- dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that this may also be a
+ dtuple_t* entry, /*!< in/out: index entry where replaced;
+ the clustered index record must be
+ covered by a lock or a page latch to
+ prevent deletion (rollback or purge) */
+ dict_index_t* index, /*!< in: index; NOTE that this may also be a
non-clustered index */
- upd_t* update, /* in: an update vector built for the index so
+ const upd_t* update, /*!< in: an update vector built for the index so
that the field number in an upd_field is the
index position */
ibool order_only,
- /* in: if TRUE, limit the replacement to
+ /*!< in: if TRUE, limit the replacement to
ordering fields of index; note that this
does not work for non-clustered indexes. */
- mem_heap_t* heap); /* in: memory heap to which we allocate and
- copy the new values, set this as NULL if you
- do not want allocation */
-/***************************************************************
+ mem_heap_t* heap) /*!< in: memory heap for allocating and
+ copying the new values */
+ __attribute__((nonnull));
+/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
-
+UNIV_INTERN
void
row_upd_index_replace_new_col_vals(
/*===============================*/
- dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that this may also be a
+ dtuple_t* entry, /*!< in/out: index entry where replaced;
+ the clustered index record must be
+ covered by a lock or a page latch to
+ prevent deletion (rollback or purge) */
+ dict_index_t* index, /*!< in: index; NOTE that this may also be a
non-clustered index */
- upd_t* update, /* in: an update vector built for the
+ const upd_t* update, /*!< in: an update vector built for the
CLUSTERED index so that the field number in
an upd_field is the clustered index position */
- mem_heap_t* heap); /* in: memory heap to which we allocate and
- copy the new values, set this as NULL if you
- do not want allocation */
-/***************************************************************
+ mem_heap_t* heap) /*!< in: memory heap for allocating and
+ copying the new values */
+ __attribute__((nonnull));
+/***********************************************************//**
+Replaces the new column values stored in the update vector. */
+UNIV_INTERN
+void
+row_upd_replace(
+/*============*/
+ dtuple_t* row, /*!< in/out: row where replaced,
+ indexed by col_no;
+ the clustered index record must be
+ covered by a lock or a page latch to
+ prevent deletion (rollback or purge) */
+ row_ext_t** ext, /*!< out, own: NULL, or externally
+ stored column prefixes */
+ const dict_index_t* index, /*!< in: clustered index */
+ const upd_t* update, /*!< in: an update vector built for the
+ clustered index */
+ mem_heap_t* heap); /*!< in: memory heap */
+/***********************************************************//**
Checks if an update vector changes an ordering field of an index record.
+
This function is fast if the update vector is short or the number of ordering
fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector changes an ordering field in the index record */
+UNIV_INTERN
ibool
row_upd_changes_ord_field_binary(
/*=============================*/
- /* out: TRUE if update vector changes
- an ordering field in the index record;
- NOTE: the fields are compared as binary
- strings */
- dtuple_t* row, /* in: old value of row, or NULL if the
+ const dtuple_t* row, /*!< in: old value of row, or NULL if the
row and the data values in update are not
known when this function is called, e.g., at
compile time */
- dict_index_t* index, /* in: index of the record */
- upd_t* update);/* in: update vector for the row; NOTE: the
+ dict_index_t* index, /*!< in: index of the record */
+ const upd_t* update);/*!< in: update vector for the row; NOTE: the
field numbers in this MUST be clustered index
positions! */
-/***************************************************************
+/***********************************************************//**
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector may change an ordering field in an index
+record */
+UNIV_INTERN
ibool
row_upd_changes_some_index_ord_field_binary(
/*========================================*/
- /* out: TRUE if update vector may change
- an ordering field in an index record */
- dict_table_t* table, /* in: table */
- upd_t* update);/* in: update vector for the row */
-/***************************************************************
+ const dict_table_t* table, /*!< in: table */
+ const upd_t* update);/*!< in: update vector for the row */
+/***********************************************************//**
Updates a row in a table. This is a high-level function used
-in SQL execution graphs. */
-
+in SQL execution graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_upd_step(
/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Performs an in-place update for the current clustered index record in
-select. */
-
-void
-row_upd_in_place_in_select(
-/*=======================*/
- sel_node_t* sel_node, /* in: select node */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
-Parses the log data of system field values. */
-
+ que_thr_t* thr); /*!< in: query thread */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Parses the log data of system field values.
+@return log data end or NULL */
+UNIV_INTERN
byte*
row_upd_parse_sys_vals(
/*===================*/
- /* out: log data end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint* pos, /* out: TRX_ID position in record */
- dulint* trx_id, /* out: trx id */
- dulint* roll_ptr);/* out: roll ptr */
-/*************************************************************************
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ ulint* pos, /*!< out: TRX_ID position in record */
+ trx_id_t* trx_id, /*!< out: trx id */
+ roll_ptr_t* roll_ptr);/*!< out: roll ptr */
+/*********************************************************************//**
Updates the trx id and roll ptr field in a clustered index record in database
recovery. */
-
+UNIV_INTERN
void
row_upd_rec_sys_fields_in_recovery(
/*===============================*/
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint pos, /* in: TRX_ID position in rec */
- dulint trx_id, /* in: transaction id */
- dulint roll_ptr);/* in: roll ptr of the undo log record */
-/*************************************************************************
-Parses the log data written by row_upd_index_write_log. */
-
+ rec_t* rec, /*!< in/out: record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint pos, /*!< in: TRX_ID position in rec */
+ trx_id_t trx_id, /*!< in: transaction id */
+ roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */
+/*********************************************************************//**
+Parses the log data written by row_upd_index_write_log.
+@return log data end or NULL */
+UNIV_INTERN
byte*
row_upd_index_parse(
/*================*/
- /* out: log data end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- mem_heap_t* heap, /* in: memory heap where update vector is
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ mem_heap_t* heap, /*!< in: memory heap where update vector is
built */
- upd_t** update_out);/* out: update vector */
+ upd_t** update_out);/*!< out: update vector */
/* Update vector field */
struct upd_field_struct{
- ulint field_no; /* field number in an index, usually
+ unsigned field_no:16; /*!< field number in an index, usually
the clustered index, but in updating
a secondary index record in btr0cur.c
this is the position in the secondary
index */
- que_node_t* exp; /* expression for calculating a new
+#ifndef UNIV_HOTBACKUP
+ unsigned orig_len:16; /*!< original length of the locally
+ stored part of an externally stored
+ column, or 0 */
+ que_node_t* exp; /*!< expression for calculating a new
value: it refers to column values and
constants in the symbol table of the
query graph */
- dfield_t new_val; /* new value for the column */
- ibool extern_storage; /* this is set to TRUE if dfield
- actually contains a reference to
- an externally stored field */
+#endif /* !UNIV_HOTBACKUP */
+ dfield_t new_val; /*!< new value for the column */
};
/* Update vector structure */
struct upd_struct{
- ulint info_bits; /* new value of info bits to record;
+ ulint info_bits; /*!< new value of info bits to record;
default is 0 */
- ulint n_fields; /* number of update fields */
- upd_field_t* fields; /* array of update fields */
+ ulint n_fields; /*!< number of update fields */
+ upd_field_t* fields; /*!< array of update fields */
};
+#ifndef UNIV_HOTBACKUP
/* Update node structure which also implements the delete operation
of a row */
struct upd_node_struct{
- que_common_t common; /* node type: QUE_NODE_UPDATE */
+ que_common_t common; /*!< node type: QUE_NODE_UPDATE */
ibool is_delete;/* TRUE if delete, FALSE if update */
ibool searched_update;
/* TRUE if searched update, FALSE if
positioned */
- ibool select_will_do_update;
- /* TRUE if a searched update where ordering
- fields will not be updated, and the size of
- the fields will not change: in this case the
- select node will take care of the update */
ibool in_mysql_interface;
/* TRUE if the update node was created
for the MySQL interface */
@@ -349,16 +397,16 @@ struct upd_node_struct{
or ... SET NULL for foreign keys */
mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade
node is created */
- sel_node_t* select; /* query graph subtree implementing a base
+ sel_node_t* select; /*!< query graph subtree implementing a base
table cursor: the rows returned will be
updated */
- btr_pcur_t* pcur; /* persistent cursor placed on the clustered
+ btr_pcur_t* pcur; /*!< persistent cursor placed on the clustered
index record which should be updated or
deleted; the cursor is stored in the graph
of 'select' field above, except in the case
of the MySQL interface */
- dict_table_t* table; /* table where updated */
- upd_t* update; /* update vector for the row */
+ dict_table_t* table; /*!< table where updated */
+ upd_t* update; /*!< update vector for the row */
ulint update_n_fields;
/* when this struct is used to implement
a cascade operation for foreign keys, we store
@@ -377,17 +425,18 @@ struct upd_node_struct{
UPD_NODE_NO_SIZE_CHANGE, ORed */
/*----------------------*/
/* Local storage for this graph node */
- ulint state; /* node execution state */
- dict_index_t* index; /* NULL, or the next index whose record should
+ ulint state; /*!< node execution state */
+ dict_index_t* index; /*!< NULL, or the next index whose record should
be updated */
- dtuple_t* row; /* NULL, or a copy (also fields copied to
+ dtuple_t* row; /*!< NULL, or a copy (also fields copied to
heap) of the row to update; this must be reset
to NULL after a successful update */
- ulint* ext_vec;/* array describing which fields are stored
- externally in the clustered index record of
- row */
- ulint n_ext_vec;/* number of fields in ext_vec */
- mem_heap_t* heap; /* memory heap used as auxiliary storage;
+ row_ext_t* ext; /*!< NULL, or prefixes of the externally
+ stored columns in the old row */
+ dtuple_t* upd_row;/* NULL, or a copy of the updated row */
+ row_ext_t* upd_ext;/* NULL, or prefixes of the externally
+ stored columns in upd_row */
+ mem_heap_t* heap; /*!< memory heap used as auxiliary storage;
this must be emptied after a successful
update */
/*----------------------*/
@@ -425,6 +474,8 @@ struct upd_node_struct{
#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be
changed in the update */
+#endif /* !UNIV_HOTBACKUP */
+
#ifndef UNIV_NONINL
#include "row0upd.ic"
#endif
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 6173849e68f..18e22f1eca9 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -1,87 +1,107 @@
-/******************************************************
-Update of a row
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0upd.ic
+Update of a row
Created 12/27/1996 Heikki Tuuri
*******************************************************/
#include "mtr0log.h"
-#include "trx0trx.h"
-#include "trx0undo.h"
-#include "row0row.h"
-#include "btr0sea.h"
-
-/*************************************************************************
-Creates an update vector object. */
+#ifndef UNIV_HOTBACKUP
+# include "trx0trx.h"
+# include "trx0undo.h"
+# include "row0row.h"
+# include "btr0sea.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "page0zip.h"
+
+/*********************************************************************//**
+Creates an update vector object.
+@return own: update vector object */
UNIV_INLINE
upd_t*
upd_create(
/*=======*/
- /* out, own: update vector object */
- ulint n, /* in: number of fields */
- mem_heap_t* heap) /* in: heap from which memory allocated */
+ ulint n, /*!< in: number of fields */
+ mem_heap_t* heap) /*!< in: heap from which memory allocated */
{
upd_t* update;
- ulint i;
- update = mem_heap_alloc(heap, sizeof(upd_t));
+ update = (upd_t*) mem_heap_alloc(heap, sizeof(upd_t));
update->info_bits = 0;
update->n_fields = n;
- update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
-
- for (i = 0; i < n; i++) {
- update->fields[i].extern_storage = 0;
- }
+ update->fields = (upd_field_t*)
+ mem_heap_alloc(heap, sizeof(upd_field_t) * n);
return(update);
}
-/*************************************************************************
+/*********************************************************************//**
Returns the number of fields in the update vector == number of columns
-to be updated by an update vector. */
+to be updated by an update vector.
+@return number of fields */
UNIV_INLINE
ulint
upd_get_n_fields(
/*=============*/
- /* out: number of fields */
- upd_t* update) /* in: update vector */
+ const upd_t* update) /*!< in: update vector */
{
ut_ad(update);
return(update->n_fields);
}
-/*************************************************************************
-Returns the nth field of an update vector. */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the nth field of an update vector.
+@return update vector field */
UNIV_INLINE
upd_field_t*
upd_get_nth_field(
/*==============*/
- /* out: update vector field */
- upd_t* update, /* in: update vector */
- ulint n) /* in: field position in update vector */
+ const upd_t* update, /*!< in: update vector */
+ ulint n) /*!< in: field position in update vector */
{
ut_ad(update);
ut_ad(n < update->n_fields);
- return(update->fields + n);
+ return((upd_field_t*) update->fields + n);
}
+#endif /* UNIV_DEBUG */
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
Sets an index field number to be updated by an update vector field. */
UNIV_INLINE
void
upd_field_set_field_no(
/*===================*/
- upd_field_t* upd_field, /* in: update vector field */
- ulint field_no, /* in: field number in a clustered
+ upd_field_t* upd_field, /*!< in: update vector field */
+ ulint field_no, /*!< in: field number in a clustered
index */
- dict_index_t* index, /* in: index */
- trx_t* trx) /* in: transaction */
+ dict_index_t* index, /*!< in: index */
+ trx_t* trx) /*!< in: transaction */
{
upd_field->field_no = field_no;
+ upd_field->orig_len = 0;
if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) {
fprintf(stderr,
@@ -94,29 +114,71 @@ upd_field_set_field_no(
}
dict_col_copy_type(dict_index_get_nth_col(index, field_no),
- dfield_get_type(&(upd_field->new_val)));
+ dfield_get_type(&upd_field->new_val));
}
-/*************************************************************************
+/*********************************************************************//**
+Returns a field of an update vector by field_no.
+@return update vector field, or NULL */
+UNIV_INLINE
+const upd_field_t*
+upd_get_field_by_field_no(
+/*======================*/
+ const upd_t* update, /*!< in: update vector */
+ ulint no) /*!< in: field_no */
+{
+ ulint i;
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+ const upd_field_t* uf = upd_get_nth_field(update, i);
+
+ if (uf->field_no == no) {
+
+ return(uf);
+ }
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
Updates the trx id and roll ptr field in a clustered index record when
a row is updated or marked deleted. */
UNIV_INLINE
void
row_upd_rec_sys_fields(
/*===================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr)/* in: roll ptr of the undo log record */
+ rec_t* rec, /*!< in/out: record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be updated, or NULL */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */
{
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
#ifdef UNIV_SYNC_DEBUG
- ut_ad(!buf_block_align(rec)->is_hashed
- || rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ if (!rw_lock_own(&btr_search_latch, RW_LOCK_EX)) {
+ ut_ad(!buf_block_align(rec)->is_hashed);
+ }
#endif /* UNIV_SYNC_DEBUG */
- row_set_rec_trx_id(rec, index, offsets, trx->id);
- row_set_rec_roll_ptr(rec, index, offsets, roll_ptr);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ ulint pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+ page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets,
+ pos, trx->id, roll_ptr);
+ } else {
+ ulint offset = index->trx_id_offset;
+
+ if (!offset) {
+ offset = row_get_trx_id_offset(rec, index, offsets);
+ }
+
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
+#endif
+ trx_write_trx_id(rec + offset, trx->id);
+ trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
+ }
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index e1377112d2a..5a2e38230d5 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,7 +1,24 @@
-/******************************************************
-Row versions
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1997 Innobase Oy
+/**************************************************//**
+@file include/row0vers.h
+Row versions
Created 2/6/1997 Heikki Tuuri
*******************************************************/
@@ -18,103 +35,102 @@ Created 2/6/1997 Heikki Tuuri
#include "mtr0mtr.h"
#include "read0types.h"
-/*********************************************************************
+/*****************************************************************//**
Finds out if an active transaction has inserted or modified a secondary
index record. NOTE: the kernel mutex is temporarily released in this
-function! */
-
+function!
+@return NULL if committed, else the active transaction */
+UNIV_INTERN
trx_t*
row_vers_impl_x_locked_off_kernel(
/*==============================*/
- /* out: NULL if committed, else the active
- transaction; NOTE that the kernel mutex is
- temporarily released! */
- rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index, /* in: the secondary index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*********************************************************************
+ const rec_t* rec, /*!< in: record in a secondary index */
+ dict_index_t* index, /*!< in: the secondary index */
+ const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view. */
-
+index record, because it is >= the purge view.
+@return TRUE if earlier version should be preserved */
+UNIV_INTERN
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
- /* out: TRUE if earlier version should be preserved */
- dulint trx_id, /* in: transaction id in the version */
- mtr_t* mtr); /* in: mtr holding the latch on the clustered index
- record; it will also hold the latch on purge_view */
-/*********************************************************************
+ trx_id_t trx_id, /*!< in: transaction id in the version */
+ mtr_t* mtr); /*!< in: mtr holding the latch on the
+ clustered index record; it will also
+ hold the latch on purge_view */
+/*****************************************************************//**
Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE. */
-
+this case we return TRUE.
+@return TRUE if earlier version should have */
+UNIV_INTERN
ibool
row_vers_old_has_index_entry(
/*=========================*/
- /* out: TRUE if earlier version should have */
- ibool also_curr,/* in: TRUE if also rec is included in the
+ ibool also_curr,/*!< in: TRUE if also rec is included in the
versions to search; otherwise only versions
prior to it are searched */
- rec_t* rec, /* in: record in the clustered index; the
+ const rec_t* rec, /*!< in: record in the clustered index; the
caller must have a latch on the page */
- mtr_t* mtr, /* in: mtr holding the latch on rec; it will
+ mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
also hold the latch on purge_view */
- dict_index_t* index, /* in: the secondary index */
- dtuple_t* ientry); /* in: the secondary index entry */
-/*********************************************************************
+ dict_index_t* index, /*!< in: the secondary index */
+ const dtuple_t* ientry);/*!< in: the secondary index entry */
+/*****************************************************************//**
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version. */
-
+the consistent read should not see rec in its present version.
+@return DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
ulint
row_vers_build_for_consistent_read(
/*===============================*/
- /* out: DB_SUCCESS or DB_MISSING_HISTORY */
- rec_t* rec, /* in: record in a clustered index; the
+ const rec_t* rec, /*!< in: record in a clustered index; the
caller must have a latch on the page; this
latch locks the top of the stack of versions
of this records */
- mtr_t* mtr, /* in: mtr holding the latch on rec; it will
+ mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
also hold the latch on purge_view */
- dict_index_t* index, /* in: the clustered index */
- ulint** offsets,/* in/out: offsets returned by
+ dict_index_t* index, /*!< in: the clustered index */
+ ulint** offsets,/*!< in/out: offsets returned by
rec_get_offsets(rec, index) */
- read_view_t* view, /* in: the consistent read view */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
+ read_view_t* view, /*!< in: the consistent read view */
+ mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
- mem_heap_t* in_heap,/* in: memory heap from which the memory for
- old_vers is allocated; memory for possible
+ mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
+ *old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers);/* out, own: old version, or NULL if the
+ rec_t** old_vers);/*!< out, own: old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
-/*********************************************************************
+/*****************************************************************//**
Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read. */
-
+which should be seen by a semi-consistent read.
+@return DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
ulint
row_vers_build_for_semi_consistent_read(
/*====================================*/
- /* out: DB_SUCCESS or DB_MISSING_HISTORY */
- rec_t* rec, /* in: record in a clustered index; the
+ const rec_t* rec, /*!< in: record in a clustered index; the
caller must have a latch on the page; this
latch locks the top of the stack of versions
of this records */
- mtr_t* mtr, /* in: mtr holding the latch on rec */
- dict_index_t* index, /* in: the clustered index */
- ulint** offsets,/* in/out: offsets returned by
+ mtr_t* mtr, /*!< in: mtr holding the latch on rec */
+ dict_index_t* index, /*!< in: the clustered index */
+ ulint** offsets,/*!< in/out: offsets returned by
rec_get_offsets(rec, index) */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
+ mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
- mem_heap_t* in_heap,/* in: memory heap from which the memory for
- old_vers is allocated; memory for possible
+ mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
+ *old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers);/* out, own: rec, old version, or NULL if the
+ const rec_t** old_vers);/*!< out: rec, old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
diff --git a/storage/innobase/include/row0vers.ic b/storage/innobase/include/row0vers.ic
index ab1e264635b..8bb3a5c0cb3 100644
--- a/storage/innobase/include/row0vers.ic
+++ b/storage/innobase/include/row0vers.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Row versions
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0vers.ic
+Row versions
Created 2/6/1997 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/srv0que.h b/storage/innobase/include/srv0que.h
index 05c339cdd32..82ee7739ef7 100644
--- a/storage/innobase/include/srv0que.h
+++ b/storage/innobase/include/srv0que.h
@@ -1,53 +1,42 @@
-/******************************************************
-Server query execution
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0que.h
+Server query execution
Created 6/5/1996 Heikki Tuuri
*******************************************************/
-
#ifndef srv0que_h
#define srv0que_h
#include "univ.i"
#include "que0types.h"
-/**************************************************************************
-Checks if there is work to do in the server task queue. If there is, the
-thread starts processing a task. Before leaving, it again checks the task
-queue and picks a new task if any exists. This is called by a SRV_WORKER
-thread. */
-
-void
-srv_que_task_queue_check(void);
-/*==========================*/
-/**************************************************************************
-Performs round-robin on the server tasks. This is called by a SRV_WORKER
-thread every second or so. */
-
-que_thr_t*
-srv_que_round_robin(
-/*================*/
- /* out: the new (may be == thr) query thread
- to run */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if
-there exists one suspended. */
-
-void
-srv_que_task_enqueue(
-/*=================*/
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if
-there exists one suspended. */
-
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
- que_thr_t* thr); /* in: query thread */
+ que_thr_t* thr); /*!< in: query thread */
#endif
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 67144a41d3d..228c9f6600a 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -1,16 +1,66 @@
-/******************************************************
-The server main program
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, 2009, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
-(c) 1995 Innobase Oy
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file include/srv0srv.h
+The server main program
Created 10/10/1995 Heikki Tuuri
*******************************************************/
-
#ifndef srv0srv_h
#define srv0srv_h
#include "univ.i"
+#ifndef UNIV_HOTBACKUP
#include "sync0sync.h"
#include "os0sync.h"
#include "que0types.h"
@@ -18,7 +68,7 @@ Created 10/10/1995 Heikki Tuuri
extern const char* srv_main_thread_op_info;
-/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
+/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
extern const char srv_mysql50_table_name_prefix[9];
/* When this event is set the lock timeout and InnoDB monitor
@@ -57,8 +107,23 @@ extern char* srv_data_home;
extern char* srv_arch_dir;
#endif /* UNIV_LOG_ARCHIVE */
+/** store to its own file each table created by an user; data
+dictionary tables are in the system tablespace 0 */
+#ifndef UNIV_HOTBACKUP
+extern my_bool srv_file_per_table;
+#else
extern ibool srv_file_per_table;
+#endif /* UNIV_HOTBACKUP */
+/** The file format to use on new *.ibd files. */
+extern ulint srv_file_format;
+/** Whether to check file format during startup. A value of
+DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
+set it to the highest format we support. */
+extern ulint srv_check_file_format_at_startup;
+/** Place locks to records only i.e. do not use next-key locking except
+on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
+#endif /* !UNIV_HOTBACKUP */
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
@@ -67,29 +132,45 @@ extern ulint* srv_data_file_is_raw_partition;
extern ibool srv_auto_extend_last_data_file;
extern ulint srv_last_file_size_max;
+extern char** srv_log_group_home_dirs;
+#ifndef UNIV_HOTBACKUP
extern ulong srv_auto_extend_increment;
extern ibool srv_created_new_raw;
-#define SRV_NEW_RAW 1
-#define SRV_OLD_RAW 2
-
-extern char** srv_log_group_home_dirs;
-
extern ulint srv_n_log_groups;
extern ulint srv_n_log_files;
extern ulint srv_log_file_size;
extern ulint srv_log_buffer_size;
extern ulong srv_flush_log_at_trx_commit;
-
-extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
- character set */
-extern ulint srv_pool_size;
-extern ulint srv_awe_window_size;
+extern char srv_adaptive_flushing;
+
+
+/* The sort order table of the MySQL latin1_swedish_ci character set
+collation */
+extern const byte* srv_latin1_ordering;
+#ifndef UNIV_HOTBACKUP
+extern my_bool srv_use_sys_malloc;
+#else
+extern ibool srv_use_sys_malloc;
+#endif /* UNIV_HOTBACKUP */
+extern ulint srv_buf_pool_size; /*!< requested size in bytes */
+extern ulint srv_buf_pool_old_size; /*!< previously requested size */
+extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
extern ulint srv_n_file_io_threads;
+extern ulong srv_read_ahead_threshold;
+extern ulint srv_n_read_io_threads;
+extern ulint srv_n_write_io_threads;
+
+/* Number of IO operations per second the server can do */
+extern ulong srv_io_capacity;
+/* Returns the number of IO operations that is X percent of the
+capacity. PCT_IO(5) -> returns the number of IO operations that
+is 5% of the max where max is srv_io_capacity. */
+#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0)))
#ifdef UNIV_LOG_ARCHIVE
extern ibool srv_log_archive_on;
@@ -97,8 +178,6 @@ extern ibool srv_archive_recovery;
extern dulint srv_archive_recovery_limit_lsn;
#endif /* UNIV_LOG_ARCHIVE */
-extern ulint srv_lock_wait_timeout;
-
extern char* srv_file_flush_method_str;
extern ulint srv_unix_file_flush_method;
extern ulint srv_win_file_flush_method;
@@ -123,6 +202,8 @@ extern ulint srv_fast_shutdown; /* If this is 1, do not do a
transactions). */
extern ibool srv_innodb_status;
+extern unsigned long long srv_stats_sample_pages;
+
extern ibool srv_use_doublewrite_buf;
extern ibool srv_use_checksums;
@@ -131,8 +212,8 @@ extern int srv_query_thread_priority;
extern ulong srv_max_buf_pool_modified_pct;
extern ulong srv_max_purge_lag;
-extern ibool srv_use_awe;
-extern ibool srv_use_adaptive_hash_indexes;
+
+extern ulong srv_replication_delay;
/*-------------------------------------------*/
extern ulint srv_n_rows_inserted;
@@ -152,18 +233,25 @@ extern ibool srv_error_monitor_active;
extern ulong srv_n_spin_wait_rounds;
extern ulong srv_n_free_tickets_to_enter;
extern ulong srv_thread_sleep_delay;
-extern ulint srv_spin_wait_delay;
+extern ulong srv_spin_wait_delay;
extern ibool srv_priority_boost;
-extern ulint srv_pool_size;
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
+#ifdef UNIV_DEBUG
extern ibool srv_print_thread_releases;
extern ibool srv_print_lock_waits;
extern ibool srv_print_buf_io;
extern ibool srv_print_log_io;
extern ibool srv_print_latch_waits;
+#else /* UNIV_DEBUG */
+# define srv_print_thread_releases FALSE
+# define srv_print_lock_waits FALSE
+# define srv_print_buf_io FALSE
+# define srv_print_log_io FALSE
+# define srv_print_latch_waits FALSE
+#endif /* UNIV_DEBUG */
extern ulint srv_activity_count;
extern ulint srv_fatal_semaphore_wait_threshold;
@@ -175,7 +263,7 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
same DRAM page as other hotspot semaphores */
#define kernel_mutex (*kernel_mutex_temp)
-#define SRV_MAX_N_IO_THREADS 100
+#define SRV_MAX_N_IO_THREADS 130
/* Array of English strings describing the current state of an
i/o handler thread */
@@ -224,332 +312,349 @@ extern ulint srv_buf_pool_wait_free;
buffer pool to disk */
extern ulint srv_buf_pool_flushed;
-/* variable to count the number of buffer pool reads that led to the
+/** Number of buffer pool reads that led to the
reading of a disk page */
extern ulint srv_buf_pool_reads;
-/* variable to count the number of sequential read-aheads were done */
-extern ulint srv_read_ahead_seq;
-
-/* variable to count the number of random read-aheads were done */
-extern ulint srv_read_ahead_rnd;
-
-/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
-NOT update cardinality for indexes of InnoDB table". By default we are
-running with the fix disabled because MySQL 5.1 is frozen for such
-behavioral changes. */
-extern char srv_use_legacy_cardinality_algorithm;
-
-/* In this structure we store status variables to be passed to MySQL */
+/** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc;
+/** Status variables to be passed to MySQL */
extern export_struc export_vars;
+/** The server system */
typedef struct srv_sys_struct srv_sys_t;
-/* The server system */
+/** The server system */
extern srv_sys_t* srv_sys;
+#endif /* !UNIV_HOTBACKUP */
+
+/** Types of raw partitions in innodb_data_file_path */
+enum {
+ SRV_NOT_RAW = 0, /*!< Not a raw partition */
+ SRV_NEW_RAW, /*!< A 'newraw' partition, only to be
+ initialized */
+ SRV_OLD_RAW /*!< An initialized raw partition */
+};
-/* Alternatives for the file flush option in Unix; see the InnoDB manual
+/** Alternatives for the file flush option in Unix; see the InnoDB manual
about what these mean */
-#define SRV_UNIX_FSYNC 1 /* This is the default */
-#define SRV_UNIX_O_DSYNC 2
-#define SRV_UNIX_LITTLESYNC 3
-#define SRV_UNIX_NOSYNC 4
-#define SRV_UNIX_O_DIRECT 5
+enum {
+ SRV_UNIX_FSYNC = 1, /*!< fsync, the default */
+ SRV_UNIX_O_DSYNC, /*!< open log files in O_SYNC mode */
+ SRV_UNIX_LITTLESYNC, /*!< do not call os_file_flush()
+ when writing data files, but do flush
+ after writing to log files */
+ SRV_UNIX_NOSYNC, /*!< do not flush after writing */
+ SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
+ data files */
+};
-/* Alternatives for file i/o in Windows */
-#define SRV_WIN_IO_NORMAL 1
-#define SRV_WIN_IO_UNBUFFERED 2 /* This is the default */
+/** Alternatives for file i/o in Windows */
+enum {
+ SRV_WIN_IO_NORMAL = 1, /*!< buffered I/O */
+ SRV_WIN_IO_UNBUFFERED /*!< unbuffered I/O; this is the default */
+};
-/* Alternatives for srv_force_recovery. Non-zero values are intended
+/** Alternatives for srv_force_recovery. Non-zero values are intended
to help the user get a damaged database up so that he can dump intact
tables and rows with SELECT INTO OUTFILE. The database must not otherwise
be used with these options! A bigger number below means that all precautions
of lower numbers are included. */
-
-#define SRV_FORCE_IGNORE_CORRUPT 1 /* let the server run even if it
+enum {
+ SRV_FORCE_IGNORE_CORRUPT = 1, /*!< let the server run even if it
detects a corrupt page */
-#define SRV_FORCE_NO_BACKGROUND 2 /* prevent the main thread from
+ SRV_FORCE_NO_BACKGROUND = 2, /*!< prevent the main thread from
running: if a crash would occur
in purge, this prevents it */
-#define SRV_FORCE_NO_TRX_UNDO 3 /* do not run trx rollback after
+ SRV_FORCE_NO_TRX_UNDO = 3, /*!< do not run trx rollback after
recovery */
-#define SRV_FORCE_NO_IBUF_MERGE 4 /* prevent also ibuf operations:
+ SRV_FORCE_NO_IBUF_MERGE = 4, /*!< prevent also ibuf operations:
if they would cause a crash, better
not do them */
-#define SRV_FORCE_NO_UNDO_LOG_SCAN 5 /* do not look at undo logs when
+ SRV_FORCE_NO_UNDO_LOG_SCAN = 5, /*!< do not look at undo logs when
starting the database: InnoDB will
treat even incomplete transactions
as committed */
-#define SRV_FORCE_NO_LOG_REDO 6 /* do not do the log roll-forward
+ SRV_FORCE_NO_LOG_REDO = 6 /*!< do not do the log roll-forward
in connection with recovery */
+};
-/*************************************************************************
-Boots Innobase server. */
+#ifndef UNIV_HOTBACKUP
+/** Types of threads existing in the system. */
+enum srv_thread_type {
+ SRV_COM = 1, /**< threads serving communication and queries */
+ SRV_CONSOLE, /**< thread serving console */
+ SRV_WORKER, /**< threads serving parallelized queries and
+ queries released from lock wait */
+#if 0
+ /* Utility threads */
+ SRV_BUFFER, /**< thread flushing dirty buffer blocks */
+ SRV_RECOVERY, /**< threads finishing a recovery */
+ SRV_INSERT, /**< thread flushing the insert buffer to disk */
+#endif
+ SRV_MASTER /**< the master thread, (whose type number must
+ be biggest) */
+};
+/*********************************************************************//**
+Boots Innobase server.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
srv_boot(void);
/*==========*/
- /* out: DB_SUCCESS or error code */
-/*************************************************************************
+/*********************************************************************//**
Initializes the server. */
-
+UNIV_INTERN
void
srv_init(void);
/*==========*/
-/*************************************************************************
-Frees the OS fast mutex created in srv_boot(). */
-
+/*********************************************************************//**
+Frees the data structures created in srv_init(). */
+UNIV_INTERN
void
srv_free(void);
/*==========*/
-/*************************************************************************
+/*********************************************************************//**
Initializes the synchronization primitives, memory system, and the thread
local storage. */
-
+UNIV_INTERN
void
srv_general_init(void);
/*==================*/
-/*************************************************************************
-Gets the number of threads in the system. */
-
+/*********************************************************************//**
+Gets the number of threads in the system.
+@return sum of srv_n_threads[] */
+UNIV_INTERN
ulint
srv_get_n_threads(void);
/*===================*/
-/*************************************************************************
-Returns the calling thread type. */
+/*********************************************************************//**
+Returns the calling thread type.
+@return SRV_COM, ... */
-ulint
+enum srv_thread_type
srv_get_thread_type(void);
/*=====================*/
- /* out: SRV_COM, ... */
-/*************************************************************************
+/*********************************************************************//**
Sets the info describing an i/o thread current state. */
-
+UNIV_INTERN
void
srv_set_io_thread_op_info(
/*======================*/
- ulint i, /* in: the 'segment' of the i/o thread */
- const char* str); /* in: constant char string describing the
+ ulint i, /*!< in: the 'segment' of the i/o thread */
+ const char* str); /*!< in: constant char string describing the
state */
-/*************************************************************************
+/*********************************************************************//**
Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller! */
-
+NOTE! The server mutex has to be reserved by the caller!
+@return number of threads released: this may be less than n if not
+enough threads were suspended at the moment */
+UNIV_INTERN
ulint
srv_release_threads(
/*================*/
- /* out: number of threads released: this may be
- < n if not enough threads were suspended at the
- moment */
- ulint type, /* in: thread type */
- ulint n); /* in: number of threads to release */
-/*************************************************************************
-The master thread controlling the server. */
-
+ enum srv_thread_type type, /*!< in: thread type */
+ ulint n); /*!< in: number of threads to release */
+/*********************************************************************//**
+The master thread controlling the server.
+@return a dummy parameter */
+UNIV_INTERN
os_thread_ret_t
srv_master_thread(
/*==============*/
- /* out: a dummy parameter */
- void* arg); /* in: a dummy parameter required by
+ void* arg); /*!< in: a dummy parameter required by
os_thread_create */
-/***********************************************************************
+/*******************************************************************//**
Tells the Innobase server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
in the MySQL interface. Note that there is a small chance that the master
thread stays suspended (we do not protect our operation with the kernel
mutex, for performace reasons). */
-
+UNIV_INTERN
void
srv_active_wake_master_thread(void);
/*===============================*/
-/***********************************************************************
+/*******************************************************************//**
Wakes up the master thread if it is suspended or being suspended. */
-
+UNIV_INTERN
void
srv_wake_master_thread(void);
/*========================*/
-/*************************************************************************
+/*********************************************************************//**
Puts an OS thread to wait if there are too many concurrent threads
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-
+UNIV_INTERN
void
srv_conc_enter_innodb(
/*==================*/
- trx_t* trx); /* in: transaction object associated with the
+ trx_t* trx); /*!< in: transaction object associated with the
thread */
-/*************************************************************************
+/*********************************************************************//**
This lets a thread enter InnoDB regardless of the number of threads inside
InnoDB. This must be called when a thread ends a lock wait. */
-
+UNIV_INTERN
void
srv_conc_force_enter_innodb(
/*========================*/
- trx_t* trx); /* in: transaction object associated with the
+ trx_t* trx); /*!< in: transaction object associated with the
thread */
-/*************************************************************************
+/*********************************************************************//**
This must be called when a thread exits InnoDB in a lock wait or at the
end of an SQL statement. */
-
+UNIV_INTERN
void
srv_conc_force_exit_innodb(
/*=======================*/
- trx_t* trx); /* in: transaction object associated with the
+ trx_t* trx); /*!< in: transaction object associated with the
thread */
-/*************************************************************************
+/*********************************************************************//**
This must be called when a thread exits InnoDB. */
-
+UNIV_INTERN
void
srv_conc_exit_innodb(
/*=================*/
- trx_t* trx); /* in: transaction object associated with the
+ trx_t* trx); /*!< in: transaction object associated with the
thread */
-/*******************************************************************
+/***************************************************************//**
Puts a MySQL OS thread to wait for a lock to be released. If an error
occurs during the wait trx->error_state associated with thr is
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
are possible errors. DB_DEADLOCK is returned if selective deadlock
resolution chose this transaction as a victim. */
-
+UNIV_INTERN
void
srv_suspend_mysql_thread(
/*=====================*/
- que_thr_t* thr); /* in: query thread associated with the MySQL
+ que_thr_t* thr); /*!< in: query thread associated with the MySQL
OS thread */
-/************************************************************************
+/********************************************************************//**
Releases a MySQL OS thread waiting for a lock to be released, if the
thread is already suspended. */
-
+UNIV_INTERN
void
srv_release_mysql_thread_if_suspended(
/*==================================*/
- que_thr_t* thr); /* in: query thread associated with the
+ que_thr_t* thr); /*!< in: query thread associated with the
MySQL OS thread */
-/*************************************************************************
+/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors. */
-
+This also prints the info output by various InnoDB monitors.
+@return a dummy parameter */
+UNIV_INTERN
os_thread_ret_t
srv_lock_timeout_and_monitor_thread(
/*================================*/
- /* out: a dummy parameter */
- void* arg); /* in: a dummy parameter required by
+ void* arg); /*!< in: a dummy parameter required by
os_thread_create */
-/*************************************************************************
+/*********************************************************************//**
A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs. */
-
+too long. These can be used to track bugs which cause hangs.
+@return a dummy parameter */
+UNIV_INTERN
os_thread_ret_t
srv_error_monitor_thread(
/*=====================*/
- /* out: a dummy parameter */
- void* arg); /* in: a dummy parameter required by
+ void* arg); /*!< in: a dummy parameter required by
os_thread_create */
-/**********************************************************************
+/******************************************************************//**
Outputs to a file the output of the InnoDB Monitor. */
-
+UNIV_INTERN
void
srv_printf_innodb_monitor(
/*======================*/
- FILE* file, /* in: output stream */
- ulint* trx_start, /* out: file position of the start of
+ FILE* file, /*!< in: output stream */
+ ulint* trx_start, /*!< out: file position of the start of
the list of active transactions */
- ulint* trx_end); /* out: file position of the end of
+ ulint* trx_end); /*!< out: file position of the end of
the list of active transactions */
-/**********************************************************************
+/******************************************************************//**
Function to pass InnoDB status variables to MySQL */
-
+UNIV_INTERN
void
srv_export_innodb_status(void);
-/*=====================*/
+/*==========================*/
-/* Types for the threads existing in the system. Threads of types 4 - 9
-are called utility threads. Note that utility threads are mainly disk
-bound, except that version threads 6 - 7 may also be CPU bound, if
-cleaning versions from the buffer pool. */
-
-#define SRV_COM 1 /* threads serving communication and queries */
-#define SRV_CONSOLE 2 /* thread serving console */
-#define SRV_WORKER 3 /* threads serving parallelized queries and
- queries released from lock wait */
-#define SRV_BUFFER 4 /* thread flushing dirty buffer blocks,
- not currently in use */
-#define SRV_RECOVERY 5 /* threads finishing a recovery,
- not currently in use */
-#define SRV_INSERT 6 /* thread flushing the insert buffer to disk,
- not currently in use */
-#define SRV_MASTER 7 /* the master thread, (whose type number must
- be biggest) */
-
-/* Thread slot in the thread table */
+/** Thread slot in the thread table */
typedef struct srv_slot_struct srv_slot_t;
-/* Thread table is an array of slots */
+/** Thread table is an array of slots */
typedef srv_slot_t srv_table_t;
-/* In this structure we store status variables to be passed to MySQL */
+/** Status variables to be passed to MySQL */
struct export_var_struct{
- ulint innodb_data_pending_reads;
- ulint innodb_data_pending_writes;
- ulint innodb_data_pending_fsyncs;
- ulint innodb_data_fsyncs;
- ulint innodb_data_read;
- ulint innodb_data_writes;
- ulint innodb_data_written;
- ulint innodb_data_reads;
- ulint innodb_buffer_pool_pages_total;
- ulint innodb_buffer_pool_pages_data;
- ulint innodb_buffer_pool_pages_dirty;
- ulint innodb_buffer_pool_pages_misc;
- ulint innodb_buffer_pool_pages_free;
+ ulint innodb_data_pending_reads; /*!< Pending reads */
+ ulint innodb_data_pending_writes; /*!< Pending writes */
+ ulint innodb_data_pending_fsyncs; /*!< Pending fsyncs */
+ ulint innodb_data_fsyncs; /*!< Number of fsyncs so far */
+ ulint innodb_data_read; /*!< Data bytes read */
+ ulint innodb_data_writes; /*!< I/O write requests */
+ ulint innodb_data_written; /*!< Data bytes written */
+ ulint innodb_data_reads; /*!< I/O read requests */
+ ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
+ ulint innodb_buffer_pool_pages_data; /*!< Data pages */
+ ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
+ ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */
+ ulint innodb_buffer_pool_pages_free; /*!< Free pages */
#ifdef UNIV_DEBUG
- ulint innodb_buffer_pool_pages_latched;
+ ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
#endif /* UNIV_DEBUG */
- ulint innodb_buffer_pool_read_requests;
- ulint innodb_buffer_pool_reads;
- ulint innodb_buffer_pool_wait_free;
- ulint innodb_buffer_pool_pages_flushed;
- ulint innodb_buffer_pool_write_requests;
- ulint innodb_buffer_pool_read_ahead_seq;
- ulint innodb_buffer_pool_read_ahead_rnd;
- ulint innodb_dblwr_pages_written;
- ulint innodb_dblwr_writes;
- ulint innodb_log_waits;
- ulint innodb_log_write_requests;
- ulint innodb_log_writes;
- ulint innodb_os_log_written;
- ulint innodb_os_log_fsyncs;
- ulint innodb_os_log_pending_writes;
- ulint innodb_os_log_pending_fsyncs;
- ulint innodb_page_size;
- ulint innodb_pages_created;
- ulint innodb_pages_read;
- ulint innodb_pages_written;
- ulint innodb_row_lock_waits;
- ulint innodb_row_lock_current_waits;
- ib_longlong innodb_row_lock_time;
- ulint innodb_row_lock_time_avg;
- ulint innodb_row_lock_time_max;
- ulint innodb_rows_read;
- ulint innodb_rows_inserted;
- ulint innodb_rows_updated;
- ulint innodb_rows_deleted;
+ ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */
+ ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
+ ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
+ ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
+ ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+ ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
+ ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
+ ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
+ ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
+ ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */
+ ulint innodb_log_waits; /*!< srv_log_waits */
+ ulint innodb_log_write_requests; /*!< srv_log_write_requests */
+ ulint innodb_log_writes; /*!< srv_log_writes */
+ ulint innodb_os_log_written; /*!< srv_os_log_written */
+ ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */
+ ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
+ ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */
+ ulint innodb_page_size; /*!< UNIV_PAGE_SIZE */
+ ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */
+ ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read */
+ ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */
+ ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
+ ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
+ ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
+ / 1000 */
+ ulint innodb_row_lock_time_avg; /*!< srv_n_lock_wait_time
+ / 1000
+ / srv_n_lock_wait_count */
+ ulint innodb_row_lock_time_max; /*!< srv_n_lock_max_wait_time
+ / 1000 */
+ ulint innodb_rows_read; /*!< srv_n_rows_read */
+ ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */
+ ulint innodb_rows_updated; /*!< srv_n_rows_updated */
+ ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
};
-/* The server system struct */
+/** The server system struct */
struct srv_sys_struct{
- srv_table_t* threads; /* server thread table */
+ srv_table_t* threads; /*!< server thread table */
UT_LIST_BASE_NODE_T(que_thr_t)
- tasks; /* task queue */
- dict_index_t* dummy_ind1; /* dummy index for old-style
- supremum and infimum records */
- dict_index_t* dummy_ind2; /* dummy index for new-style
- supremum and infimum records */
+ tasks; /*!< task queue */
};
extern ulint srv_n_threads_active[];
+#else /* !UNIV_HOTBACKUP */
+# define srv_use_checksums TRUE
+# define srv_use_adaptive_hash_indexes FALSE
+# define srv_force_recovery 0UL
+# define srv_set_io_thread_op_info(t,info) ((void) 0)
+# define srv_is_being_started 0
+# define srv_win_file_flush_method SRV_WIN_IO_UNBUFFERED
+# define srv_unix_file_flush_method SRV_UNIX_O_DSYNC
+# define srv_start_raw_disk_in_use 0
+# define srv_file_per_table 1
+#endif /* !UNIV_HOTBACKUP */
#endif
-
diff --git a/storage/innobase/include/srv0srv.ic b/storage/innobase/include/srv0srv.ic
index 73e0729660f..8a1a678a016 100644
--- a/storage/innobase/include/srv0srv.ic
+++ b/storage/innobase/include/srv0srv.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Server main program
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0srv.ic
+Server main program
Created 10/4/1995 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index a04930d6516..8abf15da9c1 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -1,112 +1,134 @@
-/******************************************************
-Starts the Innobase database server
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995-2000 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0start.h
+Starts the Innobase database server
Created 10/10/1995 Heikki Tuuri
*******************************************************/
-
#ifndef srv0start_h
#define srv0start_h
#include "univ.i"
#include "ut0byte.h"
-/*************************************************************************
+/*********************************************************************//**
Normalizes a directory path for Windows: converts slashes to backslashes. */
-
+UNIV_INTERN
void
srv_normalize_path_for_win(
/*=======================*/
- char* str); /* in/out: null-terminated character string */
-/*************************************************************************
+ char* str); /*!< in/out: null-terminated character string */
+/*********************************************************************//**
Reads the data files and their sizes from a character string given in
-the .cnf file. */
-
+the .cnf file.
+@return TRUE if ok, FALSE on parse error */
+UNIV_INTERN
ibool
srv_parse_data_file_paths_and_sizes(
/*================================*/
- /* out: TRUE if ok, FALSE if parsing
- error */
- char* str, /* in: the data file path string */
- char*** data_file_names, /* out, own: array of data file
- names */
- ulint** data_file_sizes, /* out, own: array of data file sizes
- in megabytes */
- ulint** data_file_is_raw_partition,/* out, own: array of flags
- showing which data files are raw
- partitions */
- ulint* n_data_files, /* out: number of data files */
- ibool* is_auto_extending, /* out: TRUE if the last data file is
- auto-extending */
- ulint* max_auto_extend_size); /* out: max auto extend size for the
- last file if specified, 0 if not */
-/*************************************************************************
+ char* str); /*!< in/out: the data file path string */
+/*********************************************************************//**
Reads log group home directories from a character string given in
-the .cnf file. */
-
+the .cnf file.
+@return TRUE if ok, FALSE on parse error */
+UNIV_INTERN
ibool
srv_parse_log_group_home_dirs(
/*==========================*/
- /* out: TRUE if ok, FALSE if parsing
- error */
- char* str, /* in: character string */
- char*** log_group_home_dirs); /* out, own: log group home dirs */
-/*************************************************************************
+ char* str); /*!< in/out: character string */
+/*********************************************************************//**
+Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
+and srv_parse_log_group_home_dirs(). */
+UNIV_INTERN
+void
+srv_free_paths_and_sizes(void);
+/*==========================*/
+/*********************************************************************//**
Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty. */
-
+and the string is not empty.
+@return string which has the separator if the string is not empty */
+UNIV_INTERN
char*
srv_add_path_separator_if_needed(
/*=============================*/
- /* out: string which has the separator if the
- string is not empty */
- char* str); /* in: null-terminated character string */
-/********************************************************************
+ char* str); /*!< in: null-terminated character string */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
Starts Innobase and creates a new database if database files
-are not found and the user wants. Server parameters are
-read from a file of name "srv_init" in the ib_home directory. */
-
+are not found and the user wants.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
int
innobase_start_or_create_for_mysql(void);
/*====================================*/
- /* out: DB_SUCCESS or error code */
-/********************************************************************
-Shuts down the Innobase database. */
+/****************************************************************//**
+Shuts down the Innobase database.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
int
innobase_shutdown_for_mysql(void);
/*=============================*/
- /* out: DB_SUCCESS or error code */
-extern dulint srv_shutdown_lsn;
-extern dulint srv_start_lsn;
+/** Log sequence number at shutdown */
+extern ib_uint64_t srv_shutdown_lsn;
+/** Log sequence number immediately after startup */
+extern ib_uint64_t srv_start_lsn;
#ifdef __NETWARE__
void set_panic_flag_for_netware(void);
#endif
#ifdef HAVE_DARWIN_THREADS
+/** TRUE if the F_FULLFSYNC option is available */
extern ibool srv_have_fullfsync;
#endif
-extern ulint srv_sizeof_trx_t_in_ha_innodb_cc;
-
+/** TRUE if the server is being started */
extern ibool srv_is_being_started;
+/** TRUE if the server was successfully started */
+extern ibool srv_was_started;
+/** TRUE if the server is being started, before rolling back any
+incomplete transactions */
extern ibool srv_startup_is_before_trx_rollback_phase;
-extern ibool srv_is_being_shut_down;
+/** TRUE if a raw partition is in use */
extern ibool srv_start_raw_disk_in_use;
-/* At a shutdown the value first climbs from 0 to SRV_SHUTDOWN_CLEANUP
-and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-extern ulint srv_shutdown_state;
+/** Shutdown state */
+enum srv_shutdown_state {
+ SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */
+ SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in
+ logs_empty_and_mark_files_at_shutdown() */
+ SRV_SHUTDOWN_LAST_PHASE,/*!< Last phase after ensuring that
+ the buffer pool can be freed: flush
+ all file spaces and close all files */
+ SRV_SHUTDOWN_EXIT_THREADS/*!< Exit all threads */
+};
-#define SRV_SHUTDOWN_CLEANUP 1
-#define SRV_SHUTDOWN_LAST_PHASE 2
-#define SRV_SHUTDOWN_EXIT_THREADS 3
+/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
+SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
+extern enum srv_shutdown_state srv_shutdown_state;
+#endif /* !UNIV_HOTBACKUP */
-/* Log 'spaces' have id's >= this */
+/** Log 'spaces' have id's >= this */
#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
#endif
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index fae26b7a63e..5f1280f5e28 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -1,7 +1,24 @@
-/******************************************************
-The wait array used in synchronization primitives
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0arr.h
+The wait array used in synchronization primitives
Created 9/5/1995 Heikki Tuuri
*******************************************************/
@@ -14,105 +31,108 @@ Created 9/5/1995 Heikki Tuuri
#include "ut0mem.h"
#include "os0thread.h"
+/** Synchronization wait array cell */
typedef struct sync_cell_struct sync_cell_t;
+/** Synchronization wait array */
typedef struct sync_array_struct sync_array_t;
-#define SYNC_ARRAY_OS_MUTEX 1
-#define SYNC_ARRAY_MUTEX 2
+/** Parameters for sync_array_create() @{ */
+#define SYNC_ARRAY_OS_MUTEX 1 /*!< protected by os_mutex_t */
+#define SYNC_ARRAY_MUTEX 2 /*!< protected by mutex_t */
+/* @} */
-/***********************************************************************
+/*******************************************************************//**
Creates a synchronization wait array. It is protected by a mutex
which is automatically reserved when the functions operating on it
-are called. */
-
+are called.
+@return own: created wait array */
+UNIV_INTERN
sync_array_t*
sync_array_create(
/*==============*/
- /* out, own: created wait array */
- ulint n_cells, /* in: number of cells in the array
+ ulint n_cells, /*!< in: number of cells in the array
to create */
- ulint protection); /* in: either SYNC_ARRAY_OS_MUTEX or
+ ulint protection); /*!< in: either SYNC_ARRAY_OS_MUTEX or
SYNC_ARRAY_MUTEX: determines the type
of mutex protecting the data structure */
-/**********************************************************************
+/******************************************************************//**
Frees the resources in a wait array. */
-
+UNIV_INTERN
void
sync_array_free(
/*============*/
- sync_array_t* arr); /* in, own: sync wait array */
-/**********************************************************************
+ sync_array_t* arr); /*!< in, own: sync wait array */
+/******************************************************************//**
Reserves a wait array cell for waiting for an object.
The event of the cell is reset to nonsignalled state. */
-
+UNIV_INTERN
void
sync_array_reserve_cell(
/*====================*/
- sync_array_t* arr, /* in: wait array */
- void* object, /* in: pointer to the object to wait for */
- ulint type, /* in: lock request type */
- const char* file, /* in: file where requested */
- ulint line, /* in: line where requested */
- ulint* index); /* out: index of the reserved cell */
-/**********************************************************************
+ sync_array_t* arr, /*!< in: wait array */
+ void* object, /*!< in: pointer to the object to wait for */
+ ulint type, /*!< in: lock request type */
+ const char* file, /*!< in: file where requested */
+ ulint line, /*!< in: line where requested */
+ ulint* index); /*!< out: index of the reserved cell */
+/******************************************************************//**
This function should be called when a thread starts to wait on
a wait array cell. In the debug version this function checks
if the wait for a semaphore will result in a deadlock, in which
case prints info and asserts. */
-
+UNIV_INTERN
void
sync_array_wait_event(
/*==================*/
- sync_array_t* arr, /* in: wait array */
- ulint index); /* in: index of the reserved cell */
-/**********************************************************************
+ sync_array_t* arr, /*!< in: wait array */
+ ulint index); /*!< in: index of the reserved cell */
+/******************************************************************//**
Frees the cell. NOTE! sync_array_wait_event frees the cell
automatically! */
-
+UNIV_INTERN
void
sync_array_free_cell(
/*=================*/
- sync_array_t* arr, /* in: wait array */
- ulint index); /* in: index of the cell in array */
-/**************************************************************************
+ sync_array_t* arr, /*!< in: wait array */
+ ulint index); /*!< in: index of the cell in array */
+/**********************************************************************//**
Note that one of the wait objects was signalled. */
-
+UNIV_INTERN
void
sync_array_object_signalled(
/*========================*/
- sync_array_t* arr); /* in: wait array */
-/**************************************************************************
+ sync_array_t* arr); /*!< in: wait array */
+/**********************************************************************//**
If the wakeup algorithm does not work perfectly at semaphore relases,
this function will do the waking (see the comment in mutex_exit). This
function should be called about every 1 second in the server. */
-
+UNIV_INTERN
void
sync_arr_wake_threads_if_sema_free(void);
/*====================================*/
-/**************************************************************************
-Prints warnings of long semaphore waits to stderr. */
-
+/**********************************************************************//**
+Prints warnings of long semaphore waits to stderr.
+@return TRUE if fatal semaphore wait threshold was exceeded */
+UNIV_INTERN
ibool
sync_array_print_long_waits(void);
/*=============================*/
- /* out: TRUE if fatal semaphore wait threshold
- was exceeded */
-/************************************************************************
+/********************************************************************//**
Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */
-
+UNIV_INTERN
void
sync_array_validate(
/*================*/
- sync_array_t* arr); /* in: sync wait array */
-/**************************************************************************
+ sync_array_t* arr); /*!< in: sync wait array */
+/**********************************************************************//**
Prints info of the wait array. */
-
+UNIV_INTERN
void
sync_array_print_info(
/*==================*/
- FILE* file, /* in: file where to print */
- sync_array_t* arr); /* in: wait array */
+ FILE* file, /*!< in: file where to print */
+ sync_array_t* arr); /*!< in: wait array */
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic
index dbe35c033e5..bf57f5b2dc2 100644
--- a/storage/innobase/include/sync0arr.ic
+++ b/storage/innobase/include/sync0arr.ic
@@ -1,10 +1,27 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0arr.ic
The wait array for synchronization primitives
Inline code
-(c) 1995 Innobase Oy
-
Created 9/5/1995 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index 008df80a2c7..aedfd5f3f86 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -1,7 +1,31 @@
-/******************************************************
-The read-write lock (for threads, not for database transactions)
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0rw.h
+The read-write lock (for threads, not for database transactions)
Created 9/11/1995 Heikki Tuuri
*******************************************************/
@@ -10,6 +34,7 @@ Created 9/11/1995 Heikki Tuuri
#define sync0rw_h
#include "univ.i"
+#ifndef UNIV_HOTBACKUP
#include "ut0lst.h"
#include "sync0sync.h"
#include "os0sync.h"
@@ -17,6 +42,7 @@ Created 9/11/1995 Heikki Tuuri
/* The following undef is to prevent a name conflict with a macro
in MySQL: */
#undef rw_lock_t
+#endif /* !UNIV_HOTBACKUP */
/* Latch types; these are used also in btr0btr.h: keep the numerical values
smaller than 30 and the order of the numerical values like below! */
@@ -24,6 +50,13 @@ smaller than 30 and the order of the numerical values like below! */
#define RW_X_LATCH 2
#define RW_NO_LATCH 3
+#ifndef UNIV_HOTBACKUP
+/* We decrement lock_word by this amount for each x_lock. It is also the
+start value for the lock_word, meaning that it limits the maximum number
+of concurrent read locks before the rw_lock breaks. The current value of
+0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
+#define X_LOCK_DECR 0x00100000
+
typedef struct rw_lock_struct rw_lock_t;
#ifdef UNIV_SYNC_DEBUG
typedef struct rw_lock_debug_struct rw_lock_debug_t;
@@ -40,23 +73,39 @@ To modify the debug info list of an rw-lock, this mutex has to be
acquired in addition to the mutex protecting the lock. */
extern mutex_t rw_lock_debug_mutex;
-extern os_event_t rw_lock_debug_event; /* If deadlock detection does
+extern os_event_t rw_lock_debug_event; /*!< If deadlock detection does
not get immediately the mutex it
may wait for this event */
-extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if
+extern ibool rw_lock_debug_waiters; /*!< This is set to TRUE, if
there may be waiters for the event */
#endif /* UNIV_SYNC_DEBUG */
-extern ulint rw_s_system_call_count;
-extern ulint rw_s_spin_wait_count;
-extern ulint rw_s_exit_count;
-extern ulint rw_s_os_wait_count;
-extern ulint rw_x_system_call_count;
-extern ulint rw_x_spin_wait_count;
-extern ulint rw_x_os_wait_count;
-extern ulint rw_x_exit_count;
-
-/**********************************************************************
+/** number of spin waits on rw-latches,
+resulted during exclusive (write) locks */
+extern ib_int64_t rw_s_spin_wait_count;
+/** number of spin loop rounds on rw-latches,
+resulted during exclusive (write) locks */
+extern ib_int64_t rw_s_spin_round_count;
+/** number of unlocks (that unlock shared locks),
+set only when UNIV_SYNC_PERF_STAT is defined */
+extern ib_int64_t rw_s_exit_count;
+/** number of OS waits on rw-latches,
+resulted during shared (read) locks */
+extern ib_int64_t rw_s_os_wait_count;
+/** number of spin waits on rw-latches,
+resulted during shared (read) locks */
+extern ib_int64_t rw_x_spin_wait_count;
+/** number of spin loop rounds on rw-latches,
+resulted during shared (read) locks */
+extern ib_int64_t rw_x_spin_round_count;
+/** number of OS waits on rw-latches,
+resulted during exclusive (write) locks */
+extern ib_int64_t rw_x_os_wait_count;
+/** number of unlocks (that unlock exclusive locks),
+set only when UNIV_SYNC_PERF_STAT is defined */
+extern ib_int64_t rw_x_exit_count;
+
+/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
@@ -74,62 +123,77 @@ is necessary only if the memory block containing it is freed. */
rw_lock_create_func((L), __FILE__, __LINE__)
#endif /* UNIV_DEBUG */
-/**********************************************************************
+/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
is necessary only if the memory block containing it is freed. */
-
+UNIV_INTERN
void
rw_lock_create_func(
/*================*/
- rw_lock_t* lock, /* in: pointer to memory */
+ rw_lock_t* lock, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
- ulint level, /* in: level */
+ ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
- const char* cmutex_name, /* in: mutex name */
+ const char* cmutex_name, /*!< in: mutex name */
#endif /* UNIV_DEBUG */
- const char* cfile_name, /* in: file name where created */
- ulint cline); /* in: file line where created */
-/**********************************************************************
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline); /*!< in: file line where created */
+/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
rw-lock is checked to be in the non-locked state. */
-
+UNIV_INTERN
void
rw_lock_free(
/*=========*/
- rw_lock_t* lock); /* in: rw-lock */
+ rw_lock_t* lock); /*!< in: rw-lock */
#ifdef UNIV_DEBUG
-/**********************************************************************
+/******************************************************************//**
Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks. */
-
+simultaneous shared and exclusive locks.
+@return TRUE */
+UNIV_INTERN
ibool
rw_lock_validate(
/*=============*/
- rw_lock_t* lock);
+ rw_lock_t* lock); /*!< in: rw-lock */
#endif /* UNIV_DEBUG */
-/******************************************************************
+/**************************************************************//**
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
#define rw_lock_s_lock(M) rw_lock_s_lock_func(\
(M), 0, __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\
(M), (P), __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
-#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(\
- (M), __FILE__, __LINE__)
-/**********************************************************************
+#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\
+ (M), 0, (F), (L))
+/******************************************************************//**
+Low-level function which tries to lock an rw-lock in s-mode. Performs no
+spinning.
+@return TRUE if success */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_low(
+/*===============*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass __attribute__((unused)),
+ /*!< in: pass value; != 0, if the lock will be
+ passed to another thread to unlock */
+ const char* file_name, /*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function, except if
you supply the file name and line number. Lock an rw-lock in shared mode
for the current thread. If the rw-lock is locked in exclusive mode, or
@@ -140,82 +204,63 @@ UNIV_INLINE
void
rw_lock_s_lock_func(
/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function, except if
-you supply the file name and line number. Lock an rw-lock in shared mode
-for the current thread if the lock can be acquired immediately. */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_func_nowait(
-/*=======================*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately. */
+obtained immediately.
+@return TRUE if success */
UNIV_INLINE
ibool
rw_lock_x_lock_func_nowait(
/*=======================*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
Releases a shared mode lock. */
UNIV_INLINE
void
rw_lock_s_unlock_func(
/*==================*/
- rw_lock_t* lock /* in: rw-lock */
#ifdef UNIV_SYNC_DEBUG
- ,ulint pass /* in: pass value; != 0, if the lock may have
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
#endif
- );
-/***********************************************************************
-Releases a shared mode lock. */
+ rw_lock_t* lock); /*!< in/out: rw-lock */
#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L, 0)
+# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L)
#else
-#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L)
+# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
#endif
-/***********************************************************************
+/*******************************************************************//**
Releases a shared mode lock. */
+#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0)
-#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L, P)
-#else
-#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
-#endif
-/******************************************************************
+/**************************************************************//**
NOTE! The following macro should be used in rw x-locking, not the
corresponding function. */
#define rw_lock_x_lock(M) rw_lock_x_lock_func(\
(M), 0, __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
NOTE! The following macro should be used in rw x-locking, not the
corresponding function. */
#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\
(M), (P), __FILE__, __LINE__)
-/******************************************************************
+/**************************************************************//**
NOTE! The following macros should be used in rw x-locking, not the
corresponding function. */
#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\
(M), __FILE__, __LINE__)
-/**********************************************************************
+/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread. If the rw-lock is locked
in shared or exclusive mode, or there is an exclusive lock request waiting,
@@ -224,44 +269,37 @@ for the lock, before suspending the thread. If the same thread has an x-lock
on the rw-lock, locking succeed, with the following exception: if pass != 0,
only a single x-lock may be taken on the lock. NOTE: If the same thread has
an s-lock, locking does not succeed! */
-
+UNIV_INTERN
void
rw_lock_x_lock_func(
/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
Releases an exclusive mode lock. */
UNIV_INLINE
void
rw_lock_x_unlock_func(
/*==================*/
- rw_lock_t* lock /* in: rw-lock */
#ifdef UNIV_SYNC_DEBUG
- ,ulint pass /* in: pass value; != 0, if the lock may have
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
#endif
- );
-/***********************************************************************
-Releases an exclusive mode lock. */
+ rw_lock_t* lock); /*!< in/out: rw-lock */
#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L, 0)
+# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
#else
-#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L)
+# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
#endif
-/***********************************************************************
+/*******************************************************************//**
Releases an exclusive mode lock. */
+#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0)
-#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L, P)
-#else
-#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
-#endif
-/**********************************************************************
+/******************************************************************//**
Low-level function which locks an rw-lock in s-mode when we know that it
is possible and none else is currently accessing the rw-lock structure.
Then we can do the locking without reserving the mutex. */
@@ -269,11 +307,10 @@ UNIV_INLINE
void
rw_lock_s_lock_direct(
/*==================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name, /* in: file name where requested */
- ulint line /* in: line where lock requested */
-);
-/**********************************************************************
+ rw_lock_t* lock, /*!< in/out: rw-lock */
+ const char* file_name, /*!< in: file name where requested */
+ ulint line); /*!< in: line where lock requested */
+/******************************************************************//**
Low-level function which locks an rw-lock in x-mode when we know that it
is not locked and none else is currently accessing the rw-lock structure.
Then we can do the locking without reserving the mutex. */
@@ -281,11 +318,10 @@ UNIV_INLINE
void
rw_lock_x_lock_direct(
/*==================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name, /* in: file name where requested */
- ulint line /* in: line where lock requested */
-);
-/**********************************************************************
+ rw_lock_t* lock, /*!< in/out: rw-lock */
+ const char* file_name, /*!< in: file name where requested */
+ ulint line); /*!< in: line where lock requested */
+/******************************************************************//**
This function is used in the insert buffer to move the ownership of an
x-latch on a buffer frame to the current thread. The x-latch was set by
the buffer read operation and it protected the buffer frame while the
@@ -293,208 +329,250 @@ read was done. The ownership is moved because we want that the current
thread is able to acquire a second x-latch which is stored in an mtr.
This, in turn, is needed to pass the debug checks of index page
operations. */
-
+UNIV_INTERN
void
rw_lock_x_lock_move_ownership(
/*==========================*/
- rw_lock_t* lock); /* in: lock which was x-locked in the
+ rw_lock_t* lock); /*!< in: lock which was x-locked in the
buffer read */
-/**********************************************************************
+/******************************************************************//**
Releases a shared mode lock when we know there are no waiters and none
else will access the lock during the time this function is executed. */
UNIV_INLINE
void
rw_lock_s_unlock_direct(
/*====================*/
- rw_lock_t* lock); /* in: rw-lock */
-/**********************************************************************
+ rw_lock_t* lock); /*!< in/out: rw-lock */
+/******************************************************************//**
Releases an exclusive mode lock when we know there are no waiters, and
none else will access the lock durint the time this function is executed. */
UNIV_INLINE
void
rw_lock_x_unlock_direct(
/*====================*/
- rw_lock_t* lock); /* in: rw-lock */
-/**********************************************************************
+ rw_lock_t* lock); /*!< in/out: rw-lock */
+/******************************************************************//**
Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call. */
+mutex, so the caller must be sure it is not changed during the call.
+@return value of writer_count */
UNIV_INLINE
ulint
rw_lock_get_x_lock_count(
/*=====================*/
- /* out: value of writer_count */
- rw_lock_t* lock); /* in: rw-lock */
-/************************************************************************
-Accessor functions for rw lock. */
+ const rw_lock_t* lock); /*!< in: rw-lock */
+/********************************************************************//**
+Check if there are threads waiting for the rw-lock.
+@return 1 if waiters, 0 otherwise */
UNIV_INLINE
ulint
rw_lock_get_waiters(
/*================*/
- rw_lock_t* lock);
+ const rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
+Returns the write-status of the lock - this function made more sense
+with the old rw_lock implementation.
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
UNIV_INLINE
ulint
rw_lock_get_writer(
/*===============*/
- rw_lock_t* lock);
+ const rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
+Returns the number of readers.
+@return number of readers */
UNIV_INLINE
ulint
rw_lock_get_reader_count(
/*=====================*/
- rw_lock_t* lock);
+ const rw_lock_t* lock); /*!< in: rw-lock */
+/******************************************************************//**
+Decrements lock_word the specified amount if it is greater than 0.
+This is used by both s_lock and x_lock operations.
+@return TRUE if decr occurs */
+UNIV_INLINE
+ibool
+rw_lock_lock_word_decr(
+/*===================*/
+ rw_lock_t* lock, /*!< in/out: rw-lock */
+ ulint amount); /*!< in: amount to decrement */
+/******************************************************************//**
+Increments lock_word the specified amount and returns new value.
+@return lock->lock_word after increment */
+UNIV_INLINE
+lint
+rw_lock_lock_word_incr(
+/*===================*/
+ rw_lock_t* lock, /*!< in/out: rw-lock */
+ ulint amount); /*!< in: amount to increment */
+/******************************************************************//**
+This function sets the lock->writer_thread and lock->recursive fields.
+For platforms where we are using atomic builtins instead of lock->mutex
+it sets the lock->writer_thread field using atomics to ensure memory
+ordering. Note that it is assumed that the caller of this function
+effectively owns the lock i.e.: nobody else is allowed to modify
+lock->writer_thread at this point in time.
+The protocol is that lock->writer_thread MUST be updated BEFORE the
+lock->recursive flag is set. */
+UNIV_INLINE
+void
+rw_lock_set_writer_id_and_recursion_flag(
+/*=====================================*/
+ rw_lock_t* lock, /*!< in/out: lock to work on */
+ ibool recursive); /*!< in: TRUE if recursion
+ allowed */
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
Checks if the thread has locked the rw-lock in the specified mode, with
the pass value == 0. */
-
+UNIV_INTERN
ibool
rw_lock_own(
/*========*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type); /* in: lock type: RW_LOCK_SHARED,
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
Checks if somebody has locked the rw-lock in the specified mode. */
-
+UNIV_INTERN
ibool
rw_lock_is_locked(
/*==============*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type); /* in: lock type: RW_LOCK_SHARED,
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
#ifdef UNIV_SYNC_DEBUG
-/*******************************************************************
+/***************************************************************//**
Prints debug info of an rw-lock. */
-
+UNIV_INTERN
void
rw_lock_print(
/*==========*/
- rw_lock_t* lock); /* in: rw-lock */
-/*******************************************************************
+ rw_lock_t* lock); /*!< in: rw-lock */
+/***************************************************************//**
Prints debug info of currently locked rw-locks. */
-
+UNIV_INTERN
void
rw_lock_list_print_info(
/*====================*/
- FILE* file); /* in: file where to print */
-/*******************************************************************
+ FILE* file); /*!< in: file where to print */
+/***************************************************************//**
Returns the number of currently locked rw-locks.
-Works only in the debug version. */
-
+Works only in the debug version.
+@return number of locked rw-locks */
+UNIV_INTERN
ulint
rw_lock_n_locked(void);
/*==================*/
/*#####################################################################*/
-/**********************************************************************
+/******************************************************************//**
Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
because the debug mutex is also acquired in sync0arr while holding the OS
mutex protecting the sync array, and the ordinary mutex_enter might
recursively call routines in sync0arr, leading to a deadlock on the OS
mutex. */
-
+UNIV_INTERN
void
rw_lock_debug_mutex_enter(void);
/*==========================*/
-/**********************************************************************
+/******************************************************************//**
Releases the debug mutex. */
-
+UNIV_INTERN
void
rw_lock_debug_mutex_exit(void);
/*==========================*/
-/*************************************************************************
+/*********************************************************************//**
Prints info of a debug struct. */
-
+UNIV_INTERN
void
rw_lock_debug_print(
/*================*/
- rw_lock_debug_t* info); /* in: debug struct */
+ rw_lock_debug_t* info); /*!< in: debug struct */
#endif /* UNIV_SYNC_DEBUG */
/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a read-write lock. Several threads may have a shared lock
-simultaneously in this lock, but only one writer may have an exclusive lock,
-in which case no shared locks are allowed. To prevent starving of a writer
-blocked by readers, a writer may queue for the lock by setting the writer
-field. Then no new readers are allowed in. */
-
+Do not use its fields directly! */
+
+/** The structure used in the spin lock implementation of a read-write
+lock. Several threads may have a shared lock simultaneously in this
+lock, but only one writer may have an exclusive lock, in which case no
+shared locks are allowed. To prevent starving of a writer blocked by
+readers, a writer may queue for x-lock by decrementing lock_word: no
+new readers will be let in while the thread waits for readers to
+exit. */
struct rw_lock_struct {
- os_event_t event; /* Used by sync0arr.c for thread queueing */
-
-#ifdef __WIN__
- os_event_t wait_ex_event; /* This windows specific event is
- used by the thread which has set the
- lock state to RW_LOCK_WAIT_EX. The
- rw_lock design guarantees that this
- thread will be the next one to proceed
- once the current the event gets
- signalled. See LEMMA 2 in sync0sync.c */
-#endif
+ volatile lint lock_word;
+ /*!< Holds the state of the lock. */
+ volatile ulint waiters;/*!< 1: there are waiters */
+ volatile ibool recursive;/*!< Default value FALSE which means the lock
+ is non-recursive. The value is typically set
+ to TRUE making normal rw_locks recursive. In
+ case of asynchronous IO, when a non-zero
+ value of 'pass' is passed then we keep the
+ lock non-recursive.
+ This flag also tells us about the state of
+ writer_thread field. If this flag is set
+ then writer_thread MUST contain the thread
+ id of the current x-holder or wait-x thread.
+ This flag must be reset in x_unlock
+ functions before incrementing the lock_word */
+ volatile os_thread_id_t writer_thread;
+ /*!< Thread id of writer thread. Is only
+ guaranteed to have sane and non-stale
+ value iff recursive flag is set. */
+ os_event_t event; /*!< Used by sync0arr.c for thread queueing */
+ os_event_t wait_ex_event;
+ /*!< Event for next-writer to wait on. A thread
+ must decrement lock_word before waiting. */
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+ mutex_t mutex; /*!< The mutex protecting rw_lock_struct */
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
- ulint reader_count; /* Number of readers who have locked this
- lock in the shared mode */
- ulint writer; /* This field is set to RW_LOCK_EX if there
- is a writer owning the lock (in exclusive
- mode), RW_LOCK_WAIT_EX if a writer is
- queueing for the lock, and
- RW_LOCK_NOT_LOCKED, otherwise. */
- os_thread_id_t writer_thread;
- /* Thread id of a possible writer thread */
- ulint writer_count; /* Number of times the same thread has
- recursively locked the lock in the exclusive
- mode */
- mutex_t mutex; /* The mutex protecting rw_lock_struct */
- ulint pass; /* Default value 0. This is set to some
- value != 0 given by the caller of an x-lock
- operation, if the x-lock is to be passed to
- another thread to unlock (which happens in
- asynchronous i/o). */
- ulint waiters; /* This ulint is set to 1 if there are
- waiters (readers or writers) in the global
- wait array, waiting for this rw_lock.
- Otherwise, == 0. */
UT_LIST_NODE_T(rw_lock_t) list;
- /* All allocated rw locks are put into a
+ /*!< All allocated rw locks are put into a
list */
#ifdef UNIV_SYNC_DEBUG
UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
- /* In the debug version: pointer to the debug
+ /*!< In the debug version: pointer to the debug
info list of the lock */
- ulint level; /* Level in the global latching order. */
+ ulint level; /*!< Level in the global latching order. */
#endif /* UNIV_SYNC_DEBUG */
- const char* cfile_name;/* File name where lock created */
- const char* last_s_file_name;/* File name where last s-locked */
- const char* last_x_file_name;/* File name where last x-locked */
+ ulint count_os_wait; /*!< Count of os_waits. May not be accurate */
+ const char* cfile_name;/*!< File name where lock created */
+ /* last s-lock file/line is not guaranteed to be correct */
+ const char* last_s_file_name;/*!< File name where last s-locked */
+ const char* last_x_file_name;/*!< File name where last x-locked */
ibool writer_is_wait_ex;
- /* This is TRUE if the writer field is
+ /*!< This is TRUE if the writer field is
RW_LOCK_WAIT_EX; this field is located far
from the memory update hotspot fields which
are at the start of this struct, thus we can
peek this field without causing much memory
bus traffic */
- unsigned cline:14; /* Line where created */
- unsigned last_s_line:14; /* Line number where last time s-locked */
- unsigned last_x_line:14; /* Line number where last time x-locked */
- ulint magic_n;
+ unsigned cline:14; /*!< Line where created */
+ unsigned last_s_line:14; /*!< Line number where last time s-locked */
+ unsigned last_x_line:14; /*!< Line number where last time x-locked */
+ ulint magic_n; /*!< RW_LOCK_MAGIC_N */
};
+/** Value of rw_lock_struct::magic_n */
#define RW_LOCK_MAGIC_N 22643
#ifdef UNIV_SYNC_DEBUG
-/* The structure for storing debug info of an rw-lock */
+/** The structure for storing debug info of an rw-lock */
struct rw_lock_debug_struct {
- os_thread_id_t thread_id; /* The thread id of the thread which
+ os_thread_id_t thread_id; /*!< The thread id of the thread which
locked the rw-lock */
- ulint pass; /* Pass value given in the lock operation */
- ulint lock_type; /* Type of the lock: RW_LOCK_EX,
+ ulint pass; /*!< Pass value given in the lock operation */
+ ulint lock_type; /*!< Type of the lock: RW_LOCK_EX,
RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
- const char* file_name;/* File name where the lock was obtained */
- ulint line; /* Line where the rw-lock was locked */
+ const char* file_name;/*!< File name where the lock was obtained */
+ ulint line; /*!< Line where the rw-lock was locked */
UT_LIST_NODE_T(rw_lock_debug_t) list;
- /* Debug structs are linked in a two-way
+ /*!< Debug structs are linked in a two-way
list */
};
#endif /* UNIV_SYNC_DEBUG */
@@ -502,5 +580,6 @@ struct rw_lock_debug_struct {
#ifndef UNIV_NONINL
#include "sync0rw.ic"
#endif
+#endif /* !UNIV_HOTBACKUP */
#endif
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index eea639f26f4..7116f1b7c9b 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -1,101 +1,162 @@
-/******************************************************
-The read-write lock (for threads)
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0rw.ic
+The read-write lock (for threads)
Created 9/11/1995 Heikki Tuuri
*******************************************************/
-/**********************************************************************
+/******************************************************************//**
Lock an rw-lock in shared mode for the current thread. If the rw-lock is
locked in exclusive mode, or there is an exclusive lock request waiting,
the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
waiting for the lock before suspending the thread. */
-
+UNIV_INTERN
void
rw_lock_s_lock_spin(
/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
Inserts the debug information for an rw-lock. */
-
+UNIV_INTERN
void
rw_lock_add_debug_info(
/*===================*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint pass, /* in: pass value */
- ulint lock_type, /* in: lock type */
- const char* file_name, /* in: file where requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint pass, /*!< in: pass value */
+ ulint lock_type, /*!< in: lock type */
+ const char* file_name, /*!< in: file where requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
Removes a debug information struct for an rw-lock. */
-
+UNIV_INTERN
void
rw_lock_remove_debug_info(
/*======================*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint pass, /* in: pass value */
- ulint lock_type); /* in: lock type */
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint pass, /*!< in: pass value */
+ ulint lock_type); /*!< in: lock type */
#endif /* UNIV_SYNC_DEBUG */
-/************************************************************************
-Accessor functions for rw lock. */
+/********************************************************************//**
+Check if there are threads waiting for the rw-lock.
+@return 1 if waiters, 0 otherwise */
UNIV_INLINE
ulint
rw_lock_get_waiters(
/*================*/
- rw_lock_t* lock)
+ const rw_lock_t* lock) /*!< in: rw-lock */
{
return(lock->waiters);
}
+
+/********************************************************************//**
+Sets lock->waiters to 1. It is not an error if lock->waiters is already
+1. On platforms where ATOMIC builtins are used this function enforces a
+memory barrier. */
UNIV_INLINE
void
-rw_lock_set_waiters(
-/*================*/
- rw_lock_t* lock,
- ulint flag)
+rw_lock_set_waiter_flag(
+/*====================*/
+ rw_lock_t* lock) /*!< in/out: rw-lock */
{
- lock->waiters = flag;
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ os_compare_and_swap_ulint(&lock->waiters, 0, 1);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+ lock->waiters = 1;
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
}
+
+/********************************************************************//**
+Resets lock->waiters to 0. It is not an error if lock->waiters is already
+0. On platforms where ATOMIC builtins are used this function enforces a
+memory barrier. */
UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- rw_lock_t* lock)
+void
+rw_lock_reset_waiter_flag(
+/*======================*/
+ rw_lock_t* lock) /*!< in/out: rw-lock */
{
- return(lock->writer);
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ os_compare_and_swap_ulint(&lock->waiters, 1, 0);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+ lock->waiters = 0;
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
}
+
+/******************************************************************//**
+Returns the write-status of the lock - this function made more sense
+with the old rw_lock implementation.
+@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
UNIV_INLINE
-void
-rw_lock_set_writer(
+ulint
+rw_lock_get_writer(
/*===============*/
- rw_lock_t* lock,
- ulint flag)
+ const rw_lock_t* lock) /*!< in: rw-lock */
{
- lock->writer = flag;
+ lint lock_word = lock->lock_word;
+ if (lock_word > 0) {
+ /* return NOT_LOCKED in s-lock state, like the writer
+ member of the old lock implementation. */
+ return(RW_LOCK_NOT_LOCKED);
+ } else if (((-lock_word) % X_LOCK_DECR) == 0) {
+ return(RW_LOCK_EX);
+ } else {
+ ut_ad(lock_word > -X_LOCK_DECR);
+ return(RW_LOCK_WAIT_EX);
+ }
}
+
+/******************************************************************//**
+Returns the number of readers.
+@return number of readers */
UNIV_INLINE
ulint
rw_lock_get_reader_count(
/*=====================*/
- rw_lock_t* lock)
+ const rw_lock_t* lock) /*!< in: rw-lock */
{
- return(lock->reader_count);
-}
-UNIV_INLINE
-void
-rw_lock_set_reader_count(
-/*=====================*/
- rw_lock_t* lock,
- ulint count)
-{
- lock->reader_count = count;
+ lint lock_word = lock->lock_word;
+ if (lock_word > 0) {
+ /* s-locked, no x-waiters */
+ return(X_LOCK_DECR - lock_word);
+ } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
+ /* s-locked, with x-waiters */
+ return((ulint)(-lock_word));
+ }
+ return(0);
}
+
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
UNIV_INLINE
mutex_t*
rw_lock_get_mutex(
@@ -104,57 +165,167 @@ rw_lock_get_mutex(
{
return(&(lock->mutex));
}
+#endif
-/**********************************************************************
+/******************************************************************//**
Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call. */
+mutex, so the caller must be sure it is not changed during the call.
+@return value of writer_count */
UNIV_INLINE
ulint
rw_lock_get_x_lock_count(
/*=====================*/
- /* out: value of writer_count */
- rw_lock_t* lock) /* in: rw-lock */
+ const rw_lock_t* lock) /*!< in: rw-lock */
{
- return(lock->writer_count);
+ lint lock_copy = lock->lock_word;
+ /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
+ if (lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
+ return(0);
+ }
+ return(((-lock_copy) / X_LOCK_DECR) + 1);
}
-/**********************************************************************
+/******************************************************************//**
+Two different implementations for decrementing the lock_word of a rw_lock:
+one for systems supporting atomic operations, one for others. This does
+does not support recusive x-locks: they should be handled by the caller and
+need not be atomic since they are performed by the current lock holder.
+Returns true if the decrement was made, false if not.
+@return TRUE if decr occurs */
+UNIV_INLINE
+ibool
+rw_lock_lock_word_decr(
+/*===================*/
+ rw_lock_t* lock, /*!< in/out: rw-lock */
+ ulint amount) /*!< in: amount to decrement */
+{
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ lint local_lock_word = lock->lock_word;
+ while (local_lock_word > 0) {
+ if (os_compare_and_swap_lint(&lock->lock_word,
+ local_lock_word,
+ local_lock_word - amount)) {
+ return(TRUE);
+ }
+ local_lock_word = lock->lock_word;
+ }
+ return(FALSE);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+ ibool success = FALSE;
+ mutex_enter(&(lock->mutex));
+ if (lock->lock_word > 0) {
+ lock->lock_word -= amount;
+ success = TRUE;
+ }
+ mutex_exit(&(lock->mutex));
+ return(success);
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
+Increments lock_word the specified amount and returns new value.
+@return lock->lock_word after increment */
+UNIV_INLINE
+lint
+rw_lock_lock_word_incr(
+/*===================*/
+ rw_lock_t* lock, /*!< in/out: rw-lock */
+ ulint amount) /*!< in: amount of increment */
+{
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ return(os_atomic_increment_lint(&lock->lock_word, amount));
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+ lint local_lock_word;
+
+ mutex_enter(&(lock->mutex));
+
+ lock->lock_word += amount;
+ local_lock_word = lock->lock_word;
+
+ mutex_exit(&(lock->mutex));
+
+ return(local_lock_word);
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
+This function sets the lock->writer_thread and lock->recursive fields.
+For platforms where we are using atomic builtins instead of lock->mutex
+it sets the lock->writer_thread field using atomics to ensure memory
+ordering. Note that it is assumed that the caller of this function
+effectively owns the lock i.e.: nobody else is allowed to modify
+lock->writer_thread at this point in time.
+The protocol is that lock->writer_thread MUST be updated BEFORE the
+lock->recursive flag is set. */
+UNIV_INLINE
+void
+rw_lock_set_writer_id_and_recursion_flag(
+/*=====================================*/
+ rw_lock_t* lock, /*!< in/out: lock to work on */
+ ibool recursive) /*!< in: TRUE if recursion
+ allowed */
+{
+ os_thread_id_t curr_thread = os_thread_get_curr_id();
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ os_thread_id_t local_thread;
+ ibool success;
+
+ /* Prevent Valgrind warnings about writer_thread being
+ uninitialized. It does not matter if writer_thread is
+ uninitialized, because we are comparing writer_thread against
+ itself, and the operation should always succeed. */
+ UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread);
+
+ local_thread = lock->writer_thread;
+ success = os_compare_and_swap_thread_id(
+ &lock->writer_thread, local_thread, curr_thread);
+ ut_a(success);
+ lock->recursive = recursive;
+
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+ mutex_enter(&lock->mutex);
+ lock->writer_thread = curr_thread;
+ lock->recursive = recursive;
+ mutex_exit(&lock->mutex);
+
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning. */
+spinning.
+@return TRUE if success */
UNIV_INLINE
ibool
rw_lock_s_lock_low(
/*===============*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass __attribute__((unused)),
- /* in: pass value; != 0, if the lock will be
+ /*!< in: pass value; != 0, if the lock will be
passed to another thread to unlock */
- const char* file_name, /* in: file name where lock requested */
- ulint line) /* in: line where requested */
+ const char* file_name, /*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
{
- ut_ad(mutex_own(rw_lock_get_mutex(lock)));
-
- /* Check if the writer field is free */
-
- if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) {
- /* Set the shared lock by incrementing the reader count */
- lock->reader_count++;
+ /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
+ if (!rw_lock_lock_word_decr(lock, 1)) {
+ /* Locking did not succeed */
+ return(FALSE);
+ }
#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
- line);
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
#endif
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
- return(TRUE); /* locking succeeded */
- }
+ /* These debugging values are not set safely: they may be incorrect
+ or even refer to a line that is invalid for the file name. */
+ lock->last_s_file_name = file_name;
+ lock->last_s_line = line;
- return(FALSE); /* locking did not succeed */
+ return(TRUE); /* locking succeeded */
}
-/**********************************************************************
+/******************************************************************//**
Low-level function which locks an rw-lock in s-mode when we know that it
is possible and none else is currently accessing the rw-lock structure.
Then we can do the locking without reserving the mutex. */
@@ -162,15 +333,14 @@ UNIV_INLINE
void
rw_lock_s_lock_direct(
/*==================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name, /* in: file name where requested */
- ulint line) /* in: line where lock requested */
+ rw_lock_t* lock, /*!< in/out: rw-lock */
+ const char* file_name, /*!< in: file name where requested */
+ ulint line) /*!< in: line where lock requested */
{
- ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
- ut_ad(rw_lock_get_reader_count(lock) == 0);
+ ut_ad(lock->lock_word == X_LOCK_DECR);
- /* Set the shared lock by incrementing the reader count */
- lock->reader_count++;
+ /* Indicate there is a new reader by decrementing lock_word */
+ lock->lock_word--;
lock->last_s_file_name = file_name;
lock->last_s_line = line;
@@ -180,7 +350,7 @@ rw_lock_s_lock_direct(
#endif
}
-/**********************************************************************
+/******************************************************************//**
Low-level function which locks an rw-lock in x-mode when we know that it
is not locked and none else is currently accessing the rw-lock structure.
Then we can do the locking without reserving the mutex. */
@@ -188,18 +358,16 @@ UNIV_INLINE
void
rw_lock_x_lock_direct(
/*==================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name, /* in: file name where requested */
- ulint line) /* in: line where lock requested */
+ rw_lock_t* lock, /*!< in/out: rw-lock */
+ const char* file_name, /*!< in: file name where requested */
+ ulint line) /*!< in: line where lock requested */
{
ut_ad(rw_lock_validate(lock));
- ut_ad(rw_lock_get_reader_count(lock) == 0);
- ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+ ut_ad(lock->lock_word == X_LOCK_DECR);
- rw_lock_set_writer(lock, RW_LOCK_EX);
+ lock->lock_word -= X_LOCK_DECR;
lock->writer_thread = os_thread_get_curr_id();
- lock->writer_count++;
- lock->pass = 0;
+ lock->recursive = TRUE;
lock->last_x_file_name = file_name;
lock->last_x_line = line;
@@ -209,7 +377,7 @@ rw_lock_x_lock_direct(
#endif
}
-/**********************************************************************
+/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in shared mode for the current thread. If the rw-lock is locked
in exclusive mode, or there is an exclusive lock request waiting, the
@@ -219,11 +387,11 @@ UNIV_INLINE
void
rw_lock_s_lock_func(
/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
{
/* NOTE: As we do not know the thread ids for threads which have
s-locked a latch, and s-lockers will be served only after waiting
@@ -240,15 +408,12 @@ rw_lock_s_lock_func(
ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
#endif /* UNIV_SYNC_DEBUG */
- mutex_enter(rw_lock_get_mutex(lock));
-
- if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) {
- mutex_exit(rw_lock_get_mutex(lock));
+ /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
+ if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
return; /* Success */
} else {
/* Did not succeed, try spin wait */
- mutex_exit(rw_lock_get_mutex(lock));
rw_lock_s_lock_spin(lock, pass, file_name, line);
@@ -256,136 +421,91 @@ rw_lock_s_lock_func(
}
}
-/**********************************************************************
+/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in shared mode for the current thread if the lock can be acquired
-immediately. */
+rw-lock in exclusive mode for the current thread if the lock can be
+obtained immediately.
+@return TRUE if success */
UNIV_INLINE
ibool
-rw_lock_s_lock_func_nowait(
+rw_lock_x_lock_func_nowait(
/*=======================*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
{
- ibool success = FALSE;
-
- mutex_enter(rw_lock_get_mutex(lock));
+ os_thread_id_t curr_thread = os_thread_get_curr_id();
- if (lock->writer == RW_LOCK_NOT_LOCKED) {
- /* Set the shared lock by incrementing the reader count */
- lock->reader_count++;
+ ibool success;
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
- line);
-#endif
-
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ success = os_compare_and_swap_lint(&lock->lock_word, X_LOCK_DECR, 0);
+#else
+ success = FALSE;
+ mutex_enter(&(lock->mutex));
+ if (lock->lock_word == X_LOCK_DECR) {
+ lock->lock_word = 0;
success = TRUE;
}
+ mutex_exit(&(lock->mutex));
- mutex_exit(rw_lock_get_mutex(lock));
-
- return(success);
-}
+#endif
+ if (success) {
+ rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately. */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_func_nowait(
-/*=======================*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
-{
- ibool success = FALSE;
- os_thread_id_t curr_thread = os_thread_get_curr_id();
- mutex_enter(rw_lock_get_mutex(lock));
+ } else if (lock->recursive
+ && os_thread_eq(lock->writer_thread, curr_thread)) {
+ /* Relock: this lock_word modification is safe since no other
+ threads can modify (lock, unlock, or reserve) lock_word while
+ there is an exclusive writer and this is the writer thread. */
+ lock->lock_word -= X_LOCK_DECR;
- if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) {
- } else if (UNIV_LIKELY(rw_lock_get_writer(lock)
- == RW_LOCK_NOT_LOCKED)) {
- rw_lock_set_writer(lock, RW_LOCK_EX);
- lock->writer_thread = curr_thread;
- lock->pass = 0;
-relock:
- lock->writer_count++;
+ ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
+ } else {
+ /* Failure */
+ return(FALSE);
+ }
#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
#endif
- lock->last_x_file_name = file_name;
- lock->last_x_line = line;
-
- success = TRUE;
- } else if (rw_lock_get_writer(lock) == RW_LOCK_EX
- && lock->pass == 0
- && os_thread_eq(lock->writer_thread, curr_thread)) {
- goto relock;
- }
-
- mutex_exit(rw_lock_get_mutex(lock));
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
ut_ad(rw_lock_validate(lock));
- return(success);
+ return(TRUE);
}
-/**********************************************************************
+/******************************************************************//**
Releases a shared mode lock. */
UNIV_INLINE
void
rw_lock_s_unlock_func(
/*==================*/
- rw_lock_t* lock /* in: rw-lock */
#ifdef UNIV_SYNC_DEBUG
- ,ulint pass /* in: pass value; != 0, if the lock may have
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
#endif
- )
+ rw_lock_t* lock) /*!< in/out: rw-lock */
{
- mutex_t* mutex = &(lock->mutex);
- ibool sg = FALSE;
-
- /* Acquire the mutex protecting the rw-lock fields */
- mutex_enter(mutex);
-
- /* Reset the shared lock by decrementing the reader count */
-
- ut_a(lock->reader_count > 0);
- lock->reader_count--;
+ ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
#ifdef UNIV_SYNC_DEBUG
rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
#endif
- /* If there may be waiters and this was the last s-lock,
- signal the object */
-
- if (UNIV_UNLIKELY(lock->waiters)
- && lock->reader_count == 0) {
- sg = TRUE;
-
- rw_lock_set_waiters(lock, 0);
- }
-
- mutex_exit(mutex);
+ /* Increment lock_word to indicate 1 less reader */
+ if (rw_lock_lock_word_incr(lock, 1) == 0) {
- if (UNIV_UNLIKELY(sg)) {
-#ifdef __WIN__
+ /* wait_ex waiter exists. It may not be asleep, but we signal
+ anyway. We do not wake other waiters, because they can't
+ exist without wait_ex waiter and wait_ex waiter goes first.*/
os_event_set(lock->wait_ex_event);
-#endif
- os_event_set(lock->event);
sync_array_object_signalled(sync_primary_wait_array);
+
}
ut_ad(rw_lock_validate(lock));
@@ -395,25 +515,24 @@ rw_lock_s_unlock_func(
#endif
}
-/**********************************************************************
+/******************************************************************//**
Releases a shared mode lock when we know there are no waiters and none
else will access the lock during the time this function is executed. */
UNIV_INLINE
void
rw_lock_s_unlock_direct(
/*====================*/
- rw_lock_t* lock) /* in: rw-lock */
+ rw_lock_t* lock) /*!< in/out: rw-lock */
{
- /* Reset the shared lock by decrementing the reader count */
-
- ut_ad(lock->reader_count > 0);
-
- lock->reader_count--;
+ ut_ad(lock->lock_word < X_LOCK_DECR);
#ifdef UNIV_SYNC_DEBUG
rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
#endif
+ /* Decrease reader count by incrementing lock_word */
+ lock->lock_word++;
+
ut_ad(!lock->waiters);
ut_ad(rw_lock_validate(lock));
#ifdef UNIV_SYNC_PERF_STAT
@@ -421,55 +540,46 @@ rw_lock_s_unlock_direct(
#endif
}
-/**********************************************************************
+/******************************************************************//**
Releases an exclusive mode lock. */
UNIV_INLINE
void
rw_lock_x_unlock_func(
/*==================*/
- rw_lock_t* lock /* in: rw-lock */
#ifdef UNIV_SYNC_DEBUG
- ,ulint pass /* in: pass value; != 0, if the lock may have
+ ulint pass, /*!< in: pass value; != 0, if the lock may have
been passed to another thread to unlock */
#endif
- )
+ rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ibool sg = FALSE;
-
- /* Acquire the mutex protecting the rw-lock fields */
- mutex_enter(&(lock->mutex));
-
- /* Reset the exclusive lock if this thread no longer has an x-mode
- lock */
-
- ut_ad(lock->writer_count > 0);
-
- lock->writer_count--;
-
- if (lock->writer_count == 0) {
- rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+
+ /* lock->recursive flag also indicates if lock->writer_thread is
+ valid or stale. If we are the last of the recursive callers
+ then we must unset lock->recursive flag to indicate that the
+ lock->writer_thread is now stale.
+ Note that since we still hold the x-lock we can safely read the
+ lock_word. */
+ if (lock->lock_word == 0) {
+ /* Last caller in a possible recursive chain. */
+ lock->recursive = FALSE;
+ UNIV_MEM_INVALID(&lock->writer_thread,
+ sizeof lock->writer_thread);
}
#ifdef UNIV_SYNC_DEBUG
rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
#endif
- /* If there may be waiters, signal the lock */
- if (UNIV_UNLIKELY(lock->waiters)
- && lock->writer_count == 0) {
-
- sg = TRUE;
- rw_lock_set_waiters(lock, 0);
- }
-
- mutex_exit(&(lock->mutex));
-
- if (UNIV_UNLIKELY(sg)) {
-#ifdef __WIN__
- os_event_set(lock->wait_ex_event);
-#endif
- os_event_set(lock->event);
- sync_array_object_signalled(sync_primary_wait_array);
+ if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
+ /* Lock is now free. May have to signal read/write waiters.
+ We do not need to signal wait_ex waiters, since they cannot
+ exist when there is a writer. */
+ if (lock->waiters) {
+ rw_lock_reset_waiter_flag(lock);
+ os_event_set(lock->event);
+ sync_array_object_signalled(sync_primary_wait_array);
+ }
}
ut_ad(rw_lock_validate(lock));
@@ -479,30 +589,32 @@ rw_lock_x_unlock_func(
#endif
}
-/**********************************************************************
+/******************************************************************//**
Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock durint the time this function is executed. */
+none else will access the lock during the time this function is executed. */
UNIV_INLINE
void
rw_lock_x_unlock_direct(
/*====================*/
- rw_lock_t* lock) /* in: rw-lock */
+ rw_lock_t* lock) /*!< in/out: rw-lock */
{
/* Reset the exclusive lock if this thread no longer has an x-mode
lock */
- ut_ad(lock->writer_count > 0);
-
- lock->writer_count--;
-
- if (lock->writer_count == 0) {
- rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
- }
+ ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
#ifdef UNIV_SYNC_DEBUG
rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
#endif
+ if (lock->lock_word == 0) {
+ lock->recursive = FALSE;
+ UNIV_MEM_INVALID(&lock->writer_thread,
+ sizeof lock->writer_thread);
+ }
+
+ lock->lock_word += X_LOCK_DECR;
+
ut_ad(!lock->waiters);
ut_ad(rw_lock_validate(lock));
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 6a61330f97e..df990823cc4 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -1,7 +1,31 @@
-/******************************************************
-Mutex, the basic synchronization primitive
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0sync.h
+Mutex, the basic synchronization primitive
Created 9/5/1995 Heikki Tuuri
*******************************************************/
@@ -17,23 +41,30 @@ Created 9/5/1995 Heikki Tuuri
#include "os0sync.h"
#include "sync0arr.h"
-#ifndef UNIV_HOTBACKUP
+#if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
extern my_bool timed_mutexes;
-#endif /* UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-/**********************************************************************
-Initializes the synchronization data structures. */
+#ifdef HAVE_WINDOWS_ATOMICS
+typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
+ on LONG variable */
+#else
+typedef byte lock_word_t;
+#endif
+/******************************************************************//**
+Initializes the synchronization data structures. */
+UNIV_INTERN
void
sync_init(void);
/*===========*/
-/**********************************************************************
+/******************************************************************//**
Frees the resources in synchronization data structures. */
-
+UNIV_INTERN
void
sync_close(void);
/*===========*/
-/**********************************************************************
+/******************************************************************//**
Creates, or rather, initializes a mutex object to a specified memory
location (which must be appropriately aligned). The mutex is initialized
in the reset state. Explicit freeing of the mutex with mutex_free is
@@ -52,57 +83,49 @@ necessary only if the memory block containing it is freed. */
mutex_create_func((M), __FILE__, __LINE__)
#endif
-/**********************************************************************
+/******************************************************************//**
Creates, or rather, initializes a mutex object in a specified memory
location (which must be appropriately aligned). The mutex is initialized
in the reset state. Explicit freeing of the mutex with mutex_free is
necessary only if the memory block containing it is freed. */
-
+UNIV_INTERN
void
mutex_create_func(
/*==============*/
- mutex_t* mutex, /* in: pointer to memory */
+ mutex_t* mutex, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
- const char* cmutex_name, /* in: mutex name */
+ const char* cmutex_name, /*!< in: mutex name */
# ifdef UNIV_SYNC_DEBUG
- ulint level, /* in: level */
+ ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
#endif /* UNIV_DEBUG */
- const char* cfile_name, /* in: file name where created */
- ulint cline); /* in: file line where created */
-/**********************************************************************
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline); /*!< in: file line where created */
+
+#undef mutex_free /* Fix for MacOS X */
+
+/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the mutex is freed. Removes a mutex object from the mutex list. The mutex
is checked to be in the reset state. */
-
-#undef mutex_free /* Fix for MacOS X */
+UNIV_INTERN
void
mutex_free(
/*=======*/
- mutex_t* mutex); /* in: mutex */
-/******************************************************************
+ mutex_t* mutex); /*!< in: mutex */
+/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
-/**********************************************************************
-A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled
-inlining of InnoDB functions, and no inlined functions should be called from
-there. That is why we need to duplicate the inlined function here. */
-
-void
-mutex_enter_noninline(
-/*==================*/
- mutex_t* mutex); /* in: mutex */
-/******************************************************************
+/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
/* NOTE! currently same as mutex_enter! */
#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__)
-#define mutex_enter_fast_func mutex_enter_func;
-/**********************************************************************
+/******************************************************************//**
NOTE! Use the corresponding macro in the header file, not this function
directly. Locks a mutex for the current thread. If the mutex is reserved
the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
@@ -111,161 +134,157 @@ UNIV_INLINE
void
mutex_enter_func(
/*=============*/
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where locked */
- ulint line); /* in: line where locked */
-/******************************************************************
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where locked */
+ ulint line); /*!< in: line where locked */
+/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
#define mutex_enter_nowait(M) \
mutex_enter_nowait_func((M), __FILE__, __LINE__)
-/************************************************************************
+/********************************************************************//**
NOTE! Use the corresponding macro in the header file, not this function
directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1. */
-
+acquired immediately, returns with return value 1.
+@return 0 if succeed, 1 if not */
+UNIV_INTERN
ulint
mutex_enter_nowait_func(
/*====================*/
- /* out: 0 if succeed, 1 if not */
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where mutex
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where mutex
requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
void
mutex_exit(
/*=======*/
- mutex_t* mutex); /* in: pointer to mutex */
-/**********************************************************************
-Releases a mutex. */
-
-void
-mutex_exit_noninline(
-/*=================*/
- mutex_t* mutex); /* in: mutex */
-/**********************************************************************
+ mutex_t* mutex); /*!< in: pointer to mutex */
+#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
Returns TRUE if no mutex or rw-lock is currently locked.
-Works only in the debug version. */
-
+Works only in the debug version.
+@return TRUE if no mutexes and rw-locks reserved */
+UNIV_INTERN
ibool
sync_all_freed(void);
/*================*/
+#endif /* UNIV_SYNC_DEBUG */
/*#####################################################################
FUNCTION PROTOTYPES FOR DEBUGGING */
-/***********************************************************************
+/*******************************************************************//**
Prints wait info of the sync system. */
-
+UNIV_INTERN
void
sync_print_wait_info(
/*=================*/
- FILE* file); /* in: file where to print */
-/***********************************************************************
+ FILE* file); /*!< in: file where to print */
+/*******************************************************************//**
Prints info of the sync system. */
-
+UNIV_INTERN
void
sync_print(
/*=======*/
- FILE* file); /* in: file where to print */
+ FILE* file); /*!< in: file where to print */
#ifdef UNIV_DEBUG
-/**********************************************************************
-Checks that the mutex has been initialized. */
-
+/******************************************************************//**
+Checks that the mutex has been initialized.
+@return TRUE */
+UNIV_INTERN
ibool
mutex_validate(
/*===========*/
- const mutex_t* mutex);
-/**********************************************************************
+ const mutex_t* mutex); /*!< in: mutex */
+/******************************************************************//**
Checks that the current thread owns the mutex. Works only
-in the debug version. */
-
+in the debug version.
+@return TRUE if owns */
+UNIV_INTERN
ibool
mutex_own(
/*======*/
- /* out: TRUE if owns */
- const mutex_t* mutex); /* in: mutex */
+ const mutex_t* mutex); /*!< in: mutex */
#endif /* UNIV_DEBUG */
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
Adds a latch and its level in the thread level array. Allocates the memory
for the array if called first time for this OS thread. Makes the checks
against other latch levels stored in the array for this thread. */
-
+UNIV_INTERN
void
sync_thread_add_level(
/*==================*/
- void* latch, /* in: pointer to a mutex or an rw-lock */
- ulint level); /* in: level in the latching order; if
+ void* latch, /*!< in: pointer to a mutex or an rw-lock */
+ ulint level); /*!< in: level in the latching order; if
SYNC_LEVEL_VARYING, nothing is done */
-/**********************************************************************
-Removes a latch from the thread level array if it is found there. */
-
+/******************************************************************//**
+Removes a latch from the thread level array if it is found there.
+@return TRUE if found in the array; it is no error if the latch is
+not found, as we presently are not able to determine the level for
+every latch reservation the program does */
+UNIV_INTERN
ibool
sync_thread_reset_level(
/*====================*/
- /* out: TRUE if found from the array; it is no error
- if the latch is not found, as we presently are not
- able to determine the level for every latch
- reservation the program does */
- void* latch); /* in: pointer to a mutex or an rw-lock */
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
+ void* latch); /*!< in: pointer to a mutex or an rw-lock */
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return TRUE if empty */
+UNIV_INTERN
ibool
sync_thread_levels_empty(void);
/*==========================*/
- /* out: TRUE if empty */
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return TRUE if empty except the exceptions specified below */
+UNIV_INTERN
ibool
sync_thread_levels_empty_gen(
/*=========================*/
- /* out: TRUE if empty except the
- exceptions specified below */
- ibool dict_mutex_allowed); /* in: TRUE if dictionary mutex is
+ ibool dict_mutex_allowed); /*!< in: TRUE if dictionary mutex is
allowed to be owned by the thread,
also purge_is_running mutex is
allowed */
-/**********************************************************************
+/******************************************************************//**
Gets the debug information for a reserved mutex. */
-
+UNIV_INTERN
void
mutex_get_debug_info(
/*=================*/
- mutex_t* mutex, /* in: mutex */
- const char** file_name, /* out: file where requested */
- ulint* line, /* out: line where requested */
- os_thread_id_t* thread_id); /* out: id of the thread which owns
+ mutex_t* mutex, /*!< in: mutex */
+ const char** file_name, /*!< out: file where requested */
+ ulint* line, /*!< out: line where requested */
+ os_thread_id_t* thread_id); /*!< out: id of the thread which owns
the mutex */
-/**********************************************************************
-Counts currently reserved mutexes. Works only in the debug version. */
-
+/******************************************************************//**
+Counts currently reserved mutexes. Works only in the debug version.
+@return number of reserved mutexes */
+UNIV_INTERN
ulint
mutex_n_reserved(void);
/*==================*/
#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
NOT to be used outside this module except in debugging! Gets the value
of the lock word. */
UNIV_INLINE
-ulint
+lock_word_t
mutex_get_lock_word(
/*================*/
- const mutex_t* mutex); /* in: mutex */
+ const mutex_t* mutex); /*!< in: mutex */
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
NOT to be used outside this module except in debugging! Gets the waiters
-field in a mutex. */
+field in a mutex.
+@return value to set */
UNIV_INLINE
ulint
mutex_get_waiters(
/*==============*/
- /* out: value to set */
- const mutex_t* mutex); /* in: mutex */
+ const mutex_t* mutex); /*!< in: mutex */
#endif /* UNIV_SYNC_DEBUG */
/*
@@ -400,6 +419,12 @@ or row lock! */
their level set after the page is
locked; see e.g.
ibuf_bitmap_get_map_page(). */
+#define SYNC_TRX_I_S_RWLOCK 1910 /* Used for
+ trx_i_s_cache_t::rw_lock */
+#define SYNC_TRX_I_S_LAST_READ 1900 /* Used for
+ trx_i_s_cache_t::last_read_mutex */
+#define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the
+ file format tag */
#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve
this in X-mode, implicit or backround
operations purge, rollback, foreign
@@ -442,7 +467,8 @@ or row lock! */
#define SYNC_TRX_SYS_HEADER 290
#define SYNC_LOG 170
#define SYNC_RECV 168
-#define SYNC_WORK_QUEUE 161
+#define SYNC_WORK_QUEUE 162
+#define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
heap that can be extended to the
buffer pool, its logical level is
@@ -469,79 +495,79 @@ or row lock! */
Do not use its fields directly! The structure used in the spin lock
implementation of a mutual exclusion semaphore. */
+/** InnoDB mutex */
struct mutex_struct {
- os_event_t event; /* Used by sync0arr.c for the wait queue */
- ulint lock_word; /* This ulint is the target of the atomic
- test-and-set instruction in Win32 */
-#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
+ os_event_t event; /*!< Used by sync0arr.c for the wait queue */
+ volatile lock_word_t lock_word; /*!< lock_word is the target
+ of the atomic test-and-set instruction when
+ atomic operations are enabled. */
+
+#if !defined(HAVE_ATOMIC_BUILTINS)
os_fast_mutex_t
- os_fast_mutex; /* In other systems we use this OS mutex
- in place of lock_word */
+ os_fast_mutex; /*!< We use this OS mutex in place of lock_word
+ when atomic operations are not enabled */
#endif
- ulint waiters; /* This ulint is set to 1 if there are (or
+ ulint waiters; /*!< This ulint is set to 1 if there are (or
may be) threads waiting in the global wait
array for this mutex to be released.
Otherwise, this is 0. */
- UT_LIST_NODE_T(mutex_t) list; /* All allocated mutexes are put into
+ UT_LIST_NODE_T(mutex_t) list; /*!< All allocated mutexes are put into
a list. Pointers to the next and prev. */
#ifdef UNIV_SYNC_DEBUG
- const char* file_name; /* File where the mutex was locked */
- ulint line; /* Line where the mutex was locked */
- ulint level; /* Level in the global latching order */
+ const char* file_name; /*!< File where the mutex was locked */
+ ulint line; /*!< Line where the mutex was locked */
+ ulint level; /*!< Level in the global latching order */
#endif /* UNIV_SYNC_DEBUG */
- const char* cfile_name;/* File name where mutex created */
- ulint cline; /* Line where created */
+ const char* cfile_name;/*!< File name where mutex created */
+ ulint cline; /*!< Line where created */
#ifdef UNIV_DEBUG
- os_thread_id_t thread_id; /* The thread id of the thread
+ os_thread_id_t thread_id; /*!< The thread id of the thread
which locked the mutex. */
- ulint magic_n;
+ ulint magic_n; /*!< MUTEX_MAGIC_N */
+/** Value of mutex_struct::magic_n */
# define MUTEX_MAGIC_N (ulint)979585
#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
- ulong count_os_wait; /* count of os_wait */
-# ifdef UNIV_DEBUG
- ulong count_using; /* count of times mutex used */
- ulong count_spin_loop; /* count of spin loops */
- ulong count_spin_rounds; /* count of spin rounds */
- ulong count_os_yield; /* count of os_wait */
- ulonglong lspent_time; /* mutex os_wait timer msec */
- ulonglong lmax_spent_time; /* mutex os_wait timer msec */
- const char* cmutex_name;/* mutex name */
- ulint mutex_type;/* 0 - usual mutex 1 - rw_lock mutex */
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+ ulong count_os_wait; /*!< count of os_wait */
+#ifdef UNIV_DEBUG
+ ulong count_using; /*!< count of times mutex used */
+ ulong count_spin_loop; /*!< count of spin loops */
+ ulong count_spin_rounds;/*!< count of spin rounds */
+ ulong count_os_yield; /*!< count of os_wait */
+ ulonglong lspent_time; /*!< mutex os_wait timer msec */
+ ulonglong lmax_spent_time;/*!< mutex os_wait timer msec */
+ const char* cmutex_name; /*!< mutex name */
+ ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
+#endif /* UNIV_DEBUG */
};
-/* The global array of wait cells for implementation of the databases own
-mutexes and read-write locks. Appears here for debugging purposes only! */
+/** The global array of wait cells for implementation of the databases own
+mutexes and read-write locks. */
+extern sync_array_t* sync_primary_wait_array;/* Appears here for
+ debugging purposes only! */
-extern sync_array_t* sync_primary_wait_array;
-
-/* Constant determining how long spin wait is continued before suspending
+/** Constant determining how long spin wait is continued before suspending
the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
to 20 microseconds. */
#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
-/* The number of system calls made in this module. Intended for performance
-monitoring. */
-
-extern ulint mutex_system_call_count;
-extern ulint mutex_exit_count;
+/** The number of mutex_exit calls. Intended for performance monitoring. */
+extern ib_int64_t mutex_exit_count;
#ifdef UNIV_SYNC_DEBUG
-/* Latching order checks start when this is set TRUE */
+/** Latching order checks start when this is set TRUE */
extern ibool sync_order_checks_on;
#endif /* UNIV_SYNC_DEBUG */
-/* This variable is set to TRUE when sync_init is called */
+/** This variable is set to TRUE when sync_init is called */
extern ibool sync_initialized;
-/* Global list of database mutexes (not OS mutexes) created. */
+/** Global list of database mutexes (not OS mutexes) created. */
typedef UT_LIST_BASE_NODE_T(mutex_t) ut_list_base_node_t;
+/** Global list of database mutexes (not OS mutexes) created. */
extern ut_list_base_node_t mutex_list;
-/* Mutex protecting the mutex_list variable */
+/** Mutex protecting the mutex_list variable */
extern mutex_t mutex_list_mutex;
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
index ee640abefa6..b05020b5660 100644
--- a/storage/innobase/include/sync0sync.ic
+++ b/storage/innobase/include/sync0sync.ic
@@ -1,109 +1,86 @@
-/******************************************************
-Mutex, the basic synchronization primitive
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0sync.ic
+Mutex, the basic synchronization primitive
Created 9/5/1995 Heikki Tuuri
*******************************************************/
-#if defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
-/* %z0: Use the size of operand %0 which in our case is *m to determine
-instruction size, it should end up as xchgl. "1" in the input constraint,
-says that "in" has to go in the same place as "out".*/
-#define TAS(m, in, out) \
- asm volatile ("xchg%z0 %2, %0" \
- : "=g" (*(m)), "=r" (out) \
- : "1" (in)) /* Note: "1" here refers to "=r" (out) */
-#endif
-
-/**********************************************************************
+/******************************************************************//**
Sets the waiters field in a mutex. */
-
+UNIV_INTERN
void
mutex_set_waiters(
/*==============*/
- mutex_t* mutex, /* in: mutex */
- ulint n); /* in: value to set */
-/**********************************************************************
+ mutex_t* mutex, /*!< in: mutex */
+ ulint n); /*!< in: value to set */
+/******************************************************************//**
Reserves a mutex for the current thread. If the mutex is reserved, the
function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
for the mutex before suspending the thread. */
-
+UNIV_INTERN
void
mutex_spin_wait(
/*============*/
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where mutex
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where mutex
requested */
- ulint line); /* in: line where requested */
+ ulint line); /*!< in: line where requested */
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
Sets the debug information for a reserved mutex. */
-
+UNIV_INTERN
void
mutex_set_debug_info(
/*=================*/
- mutex_t* mutex, /* in: mutex */
- const char* file_name, /* in: file where requested */
- ulint line); /* in: line where requested */
+ mutex_t* mutex, /*!< in: mutex */
+ const char* file_name, /*!< in: file where requested */
+ ulint line); /*!< in: line where requested */
#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
Releases the threads waiting in the primary wait array for this mutex. */
-
+UNIV_INTERN
void
mutex_signal_object(
/*================*/
- mutex_t* mutex); /* in: mutex */
+ mutex_t* mutex); /*!< in: mutex */
-/**********************************************************************
+/******************************************************************//**
Performs an atomic test-and-set instruction to the lock_word field of a
-mutex. */
+mutex.
+@return the previous value of lock_word: 0 or 1 */
UNIV_INLINE
-ulint
+byte
mutex_test_and_set(
/*===============*/
- /* out: the previous value of lock_word: 0 or
- 1 */
- mutex_t* mutex) /* in: mutex */
+ mutex_t* mutex) /*!< in: mutex */
{
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
- ulint res;
- ulint* lw; /* assembler code is used to ensure that
- lock_word is loaded from memory */
- ut_ad(mutex);
- ut_ad(sizeof(ulint) == 4);
-
- lw = &(mutex->lock_word);
-
- __asm MOV ECX, lw
- __asm MOV EDX, 1
- __asm XCHG EDX, DWORD PTR [ECX]
- __asm MOV res, EDX
-
- /* The fence below would prevent this thread from
- reading the data structure protected by the mutex
- before the test-and-set operation is committed, but
- the fence is apparently not needed:
-
- In a posting to comp.arch newsgroup (August 10, 1997)
- Andy Glew said that in P6 a LOCKed instruction like
- XCHG establishes a fence with respect to memory reads
- and writes and thus an explicit fence is not
- needed. In P5 he seemed to agree with a previous
- newsgroup poster that LOCKed instructions serialize
- all instruction execution, and, consequently, also
- memory operations. This is confirmed in Intel Software
- Dev. Manual, Vol. 3. */
-
- /* mutex_fence(); */
-
- return(res);
-#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
- ulint res;
-
- TAS(&mutex->lock_word, 1, res);
-
- return(res);
+#if defined(HAVE_ATOMIC_BUILTINS)
+ return(os_atomic_test_and_set_byte(&mutex->lock_word, 1));
#else
ibool ret;
@@ -117,33 +94,24 @@ mutex_test_and_set(
mutex->lock_word = 1;
}
- return(ret);
+ return((byte)ret);
#endif
}
-/**********************************************************************
+/******************************************************************//**
Performs a reset instruction to the lock_word field of a mutex. This
instruction also serializes memory operations to the program order. */
UNIV_INLINE
void
mutex_reset_lock_word(
/*==================*/
- mutex_t* mutex) /* in: mutex */
+ mutex_t* mutex) /*!< in: mutex */
{
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
- ulint* lw; /* assembler code is used to ensure that
- lock_word is loaded from memory */
- ut_ad(mutex);
-
- lw = &(mutex->lock_word);
-
- __asm MOV EDX, 0
- __asm MOV ECX, lw
- __asm XCHG EDX, DWORD PTR [ECX]
-#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
- ulint res;
-
- TAS(&mutex->lock_word, 0, res);
+#if defined(HAVE_ATOMIC_BUILTINS)
+ /* In theory __sync_lock_release should be used to release the lock.
+ Unfortunately, it does not work properly alone. The workaround is
+ that more conservative __sync_lock_test_and_set is used instead. */
+ os_atomic_test_and_set_byte(&mutex->lock_word, 0);
#else
mutex->lock_word = 0;
@@ -151,33 +119,29 @@ mutex_reset_lock_word(
#endif
}
-/**********************************************************************
+/******************************************************************//**
Gets the value of the lock word. */
UNIV_INLINE
-ulint
+lock_word_t
mutex_get_lock_word(
/*================*/
- const mutex_t* mutex) /* in: mutex */
+ const mutex_t* mutex) /*!< in: mutex */
{
- const volatile ulint* ptr; /* declared volatile to ensure that
- lock_word is loaded from memory */
ut_ad(mutex);
- ptr = &(mutex->lock_word);
-
- return(*ptr);
+ return(mutex->lock_word);
}
-/**********************************************************************
-Gets the waiters field in a mutex. */
+/******************************************************************//**
+Gets the waiters field in a mutex.
+@return value to set */
UNIV_INLINE
ulint
mutex_get_waiters(
/*==============*/
- /* out: value to set */
- const mutex_t* mutex) /* in: mutex */
+ const mutex_t* mutex) /*!< in: mutex */
{
- const volatile ulint* ptr; /* declared volatile to ensure that
+ const volatile ulint* ptr; /*!< declared volatile to ensure that
the value is read from memory */
ut_ad(mutex);
@@ -187,13 +151,13 @@ mutex_get_waiters(
word from memory is atomic */
}
-/**********************************************************************
+/******************************************************************//**
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
void
mutex_exit(
/*=======*/
- mutex_t* mutex) /* in: pointer to mutex */
+ mutex_t* mutex) /*!< in: pointer to mutex */
{
ut_ad(mutex_own(mutex));
@@ -226,7 +190,7 @@ mutex_exit(
#endif
}
-/**********************************************************************
+/******************************************************************//**
Locks a mutex for the current thread. If the mutex is reserved, the function
spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
before suspending the thread. */
@@ -234,9 +198,9 @@ UNIV_INLINE
void
mutex_enter_func(
/*=============*/
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where locked */
- ulint line) /* in: line where locked */
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where locked */
+ ulint line) /*!< in: line where locked */
{
ut_ad(mutex_validate(mutex));
ut_ad(!mutex_own(mutex));
@@ -244,9 +208,7 @@ mutex_enter_func(
/* Note that we do not peek at the value of lock_word before trying
the atomic test_and_set; we could peek, and possibly save time. */
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- mutex->count_using++;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ ut_d(mutex->count_using++);
if (!mutex_test_and_set(mutex)) {
ut_d(mutex->thread_id = os_thread_get_curr_id());
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 57478426f25..1911bbac7fd 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Global types for sync
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0types.h
+Global types for sync
Created 9/5/1995 Heikki Tuuri
*******************************************************/
@@ -9,8 +26,9 @@ Created 9/5/1995 Heikki Tuuri
#ifndef sync0types_h
#define sync0types_h
+/** Rename mutex_t to avoid name space collision on some systems */
#define mutex_t ib_mutex_t
+/** InnoDB mutex */
typedef struct mutex_struct mutex_t;
-
#endif
diff --git a/storage/innobase/include/thr0loc.h b/storage/innobase/include/thr0loc.h
index 32e2dc3ae93..b7eb29f2ed0 100644
--- a/storage/innobase/include/thr0loc.h
+++ b/storage/innobase/include/thr0loc.h
@@ -1,7 +1,24 @@
-/******************************************************
-The thread local storage
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/thr0loc.h
+The thread local storage
Created 10/5/1995 Heikki Tuuri
*******************************************************/
@@ -16,49 +33,55 @@ OS handle to the current thread, or its priority. */
#include "univ.i"
#include "os0thread.h"
-/********************************************************************
+/****************************************************************//**
Initializes the thread local storage module. */
-
+UNIV_INTERN
void
thr_local_init(void);
/*================*/
-/***********************************************************************
+ /****************************************************************//**
+Close the thread local storage module. */
+UNIV_INTERN
+void
+thr_local_close(void);
+/*=================*/
+/*******************************************************************//**
Creates a local storage struct for the calling new thread. */
-
+UNIV_INTERN
void
thr_local_create(void);
/*==================*/
-/***********************************************************************
+/*******************************************************************//**
Frees the local storage struct for the specified thread. */
-
+UNIV_INTERN
void
thr_local_free(
/*===========*/
- os_thread_id_t id); /* in: thread id */
-/***********************************************************************
-Gets the slot number in the thread table of a thread. */
-
+ os_thread_id_t id); /*!< in: thread id */
+/*******************************************************************//**
+Gets the slot number in the thread table of a thread.
+@return slot number */
+UNIV_INTERN
ulint
thr_local_get_slot_no(
/*==================*/
- /* out: slot number */
- os_thread_id_t id); /* in: thread id of the thread */
-/***********************************************************************
+ os_thread_id_t id); /*!< in: thread id of the thread */
+/*******************************************************************//**
Sets in the local storage the slot number in the thread table of a thread. */
-
+UNIV_INTERN
void
thr_local_set_slot_no(
/*==================*/
- os_thread_id_t id, /* in: thread id of the thread */
- ulint slot_no);/* in: slot number */
-/***********************************************************************
+ os_thread_id_t id, /*!< in: thread id of the thread */
+ ulint slot_no);/*!< in: slot number */
+/*******************************************************************//**
Returns pointer to the 'in_ibuf' field within the current thread local
-storage. */
-
+storage.
+@return pointer to the in_ibuf field */
+UNIV_INTERN
ibool*
thr_local_get_in_ibuf_field(void);
/*=============================*/
- /* out: pointer to the in_ibuf field */
#ifndef UNIV_NONINL
#include "thr0loc.ic"
diff --git a/storage/innobase/include/thr0loc.ic b/storage/innobase/include/thr0loc.ic
index b8b8136180c..ce44e512320 100644
--- a/storage/innobase/include/thr0loc.ic
+++ b/storage/innobase/include/thr0loc.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Thread local storage
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/thr0loc.ic
+Thread local storage
Created 10/4/1995 Heikki Tuuri
*******************************************************/
diff --git a/storage/innodb_plugin/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index 7bd4e1b88c8..7bd4e1b88c8 100644
--- a/storage/innodb_plugin/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index c4aab91a93a..908760580f6 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -1,7 +1,24 @@
-/******************************************************
-Purge old versions
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0purge.h
+Purge old versions
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -18,143 +35,146 @@ Created 3/26/1996 Heikki Tuuri
#include "usr0sess.h"
#include "fil0fil.h"
-/* The global data structure coordinating a purge */
+/** The global data structure coordinating a purge */
extern trx_purge_t* purge_sys;
-/* A dummy undo record used as a return value when we have a whole undo log
+/** A dummy undo record used as a return value when we have a whole undo log
which needs no purge */
extern trx_undo_rec_t trx_purge_dummy_rec;
-/************************************************************************
+/********************************************************************//**
Calculates the file address of an undo log header when we have the file
-address of its history list node. */
+address of its history list node.
+@return file address of the log */
UNIV_INLINE
fil_addr_t
trx_purge_get_log_from_hist(
/*========================*/
- /* out: file address of the log */
- fil_addr_t node_addr); /* in: file address of the history
+ fil_addr_t node_addr); /*!< in: file address of the history
list node of the log */
-/*********************************************************************
+/*****************************************************************//**
Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system. */
-
+undo log still exists in the system.
+@return TRUE if is sure that it is preserved, also if the function
+returns FALSE, it is possible that the undo log still exists in the
+system */
+UNIV_INTERN
ibool
trx_purge_update_undo_must_exist(
/*=============================*/
- /* out: TRUE if is sure that it is preserved, also
- if the function returns FALSE, it is possible that
- the undo log still exists in the system */
- dulint trx_id);/* in: transaction id */
-/************************************************************************
+ trx_id_t trx_id);/*!< in: transaction id */
+/********************************************************************//**
Creates the global purge system control structure and inits the history
mutex. */
-
+UNIV_INTERN
void
trx_purge_sys_create(void);
/*======================*/
+/********************************************************************//**
+Frees the global purge system control structure. */
+UNIV_INTERN
+void
+trx_purge_sys_close(void);
+/*======================*/
/************************************************************************
Adds the update undo log as the first log in the history list. Removes the
update undo log segment from the rseg slot if it is too big for reuse. */
-
+UNIV_INTERN
void
trx_purge_add_update_undo_to_history(
/*=================================*/
- trx_t* trx, /* in: transaction */
- page_t* undo_page, /* in: update undo log header page,
+ trx_t* trx, /*!< in: transaction */
+ page_t* undo_page, /*!< in: update undo log header page,
x-latched */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/********************************************************************//**
Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function. */
-
+released with the corresponding release function.
+@return copy of an undo log record or pointer to trx_purge_dummy_rec,
+if the whole undo log can skipped in purge; NULL if none left */
+UNIV_INTERN
trx_undo_rec_t*
trx_purge_fetch_next_rec(
/*=====================*/
- /* out: copy of an undo log record, or
- pointer to the dummy undo log record
- &trx_purge_dummy_rec if the whole undo log
- can skipped in purge; NULL if none left */
- dulint* roll_ptr,/* out: roll pointer to undo record */
- trx_undo_inf_t** cell, /* out: storage cell for the record in the
+ roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
+ trx_undo_inf_t** cell, /*!< out: storage cell for the record in the
purge array */
- mem_heap_t* heap); /* in: memory heap where copied */
-/***********************************************************************
+ mem_heap_t* heap); /*!< in: memory heap where copied */
+/*******************************************************************//**
Releases a reserved purge undo record. */
-
+UNIV_INTERN
void
trx_purge_rec_release(
/*==================*/
- trx_undo_inf_t* cell); /* in: storage cell */
-/***********************************************************************
-This function runs a purge batch. */
-
+ trx_undo_inf_t* cell); /*!< in: storage cell */
+/*******************************************************************//**
+This function runs a purge batch.
+@return number of undo log pages handled in the batch */
+UNIV_INTERN
ulint
trx_purge(void);
/*===========*/
- /* out: number of undo log pages handled in
- the batch */
-/**********************************************************************
+/******************************************************************//**
Prints information of the purge system to stderr. */
-
+UNIV_INTERN
void
trx_purge_sys_print(void);
/*======================*/
-/* The control structure used in the purge operation */
+/** The control structure used in the purge operation */
struct trx_purge_struct{
- ulint state; /* Purge system state */
- sess_t* sess; /* System session running the purge
+ ulint state; /*!< Purge system state */
+ sess_t* sess; /*!< System session running the purge
query */
- trx_t* trx; /* System transaction running the purge
+ trx_t* trx; /*!< System transaction running the purge
query: this trx is not in the trx list
of the trx system and it never ends */
- que_t* query; /* The query graph which will do the
+ que_t* query; /*!< The query graph which will do the
parallelized purge operation */
- rw_lock_t latch; /* The latch protecting the purge view.
+ rw_lock_t latch; /*!< The latch protecting the purge view.
A purge operation must acquire an
x-latch here for the instant at which
it changes the purge view: an undo
log operation can prevent this by
obtaining an s-latch here. */
- read_view_t* view; /* The purge will not remove undo logs
+ read_view_t* view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
- mutex_t mutex; /* Mutex protecting the fields below */
- ulint n_pages_handled;/* Approximate number of undo log
+ mutex_t mutex; /*!< Mutex protecting the fields below */
+ ulint n_pages_handled;/*!< Approximate number of undo log
pages processed in purge */
- ulint handle_limit; /* Target of how many pages to get
+ ulint handle_limit; /*!< Target of how many pages to get
processed in the current purge */
/*------------------------------*/
/* The following two fields form the 'purge pointer' which advances
during a purge, and which is used in history list truncation */
- dulint purge_trx_no; /* Purge has advanced past all
+ trx_id_t purge_trx_no; /*!< Purge has advanced past all
transactions whose number is less
than this */
- dulint purge_undo_no; /* Purge has advanced past all records
+ undo_no_t purge_undo_no; /*!< Purge has advanced past all records
whose undo number is less than this */
/*-----------------------------*/
- ibool next_stored; /* TRUE if the info of the next record
+ ibool next_stored; /*!< TRUE if the info of the next record
to purge is stored below: if yes, then
the transaction number and the undo
number of the record are stored in
purge_trx_no and purge_undo_no above */
- trx_rseg_t* rseg; /* Rollback segment for the next undo
+ trx_rseg_t* rseg; /*!< Rollback segment for the next undo
record to purge */
- ulint page_no; /* Page number for the next undo
+ ulint page_no; /*!< Page number for the next undo
record to purge, page number of the
log header, if dummy record */
- ulint offset; /* Page offset for the next undo
+ ulint offset; /*!< Page offset for the next undo
record to purge, 0 if the dummy
record */
- ulint hdr_page_no; /* Header page of the undo log where
+ ulint hdr_page_no; /*!< Header page of the undo log where
the next record to purge belongs */
- ulint hdr_offset; /* Header byte offset on the page */
+ ulint hdr_offset; /*!< Header byte offset on the page */
/*-----------------------------*/
- trx_undo_arr_t* arr; /* Array of transaction numbers and
+ trx_undo_arr_t* arr; /*!< Array of transaction numbers and
undo numbers of the undo records
currently under processing in purge */
- mem_heap_t* heap; /* Temporary storage used during a
+ mem_heap_t* heap; /*!< Temporary storage used during a
purge: can be emptied after purge
completes */
};
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
index 9f1c0ed96f8..de09e393654 100644
--- a/storage/innobase/include/trx0purge.ic
+++ b/storage/innobase/include/trx0purge.ic
@@ -1,22 +1,39 @@
-/******************************************************
-Purge old versions
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0purge.ic
+Purge old versions
Created 3/26/1996 Heikki Tuuri
*******************************************************/
#include "trx0undo.h"
-/************************************************************************
+/********************************************************************//**
Calculates the file address of an undo log header when we have the file
-address of its history list node. */
+address of its history list node.
+@return file address of the log */
UNIV_INLINE
fil_addr_t
trx_purge_get_log_from_hist(
/*========================*/
- /* out: file address of the log */
- fil_addr_t node_addr) /* in: file address of the history
+ fil_addr_t node_addr) /*!< in: file address of the history
list node of the log */
{
node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index 6447b6a2e35..a6e56e963c6 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction undo log record
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rec.h
+Transaction undo log record
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -13,291 +30,309 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0types.h"
#include "row0types.h"
#include "mtr0mtr.h"
-#include "trx0sys.h"
#include "dict0types.h"
-#include "que0types.h"
#include "data0data.h"
#include "rem0types.h"
-/***************************************************************************
-Copies the undo record to the heap. */
+#ifndef UNIV_HOTBACKUP
+# include "que0types.h"
+
+/***********************************************************************//**
+Copies the undo record to the heap.
+@return own: copy of undo log record */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_rec_copy(
/*==============*/
- /* out, own: copy of undo log record */
- trx_undo_rec_t* undo_rec, /* in: undo log record */
- mem_heap_t* heap); /* in: heap where copied */
-/**************************************************************************
-Reads the undo log record type. */
+ const trx_undo_rec_t* undo_rec, /*!< in: undo log record */
+ mem_heap_t* heap); /*!< in: heap where copied */
+/**********************************************************************//**
+Reads the undo log record type.
+@return record type */
UNIV_INLINE
ulint
trx_undo_rec_get_type(
/*==================*/
- /* out: record type */
- trx_undo_rec_t* undo_rec); /* in: undo log record */
-/**************************************************************************
-Reads from an undo log record the record compiler info. */
+ const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
+/**********************************************************************//**
+Reads from an undo log record the record compiler info.
+@return compiler info */
UNIV_INLINE
ulint
trx_undo_rec_get_cmpl_info(
/*=======================*/
- /* out: compiler info */
- trx_undo_rec_t* undo_rec); /* in: undo log record */
-/**************************************************************************
-Returns TRUE if an undo log record contains an extern storage field. */
+ const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
+/**********************************************************************//**
+Returns TRUE if an undo log record contains an extern storage field.
+@return TRUE if extern */
UNIV_INLINE
ibool
trx_undo_rec_get_extern_storage(
/*============================*/
- /* out: TRUE if extern */
- trx_undo_rec_t* undo_rec); /* in: undo log record */
-/**************************************************************************
-Reads the undo log record number. */
+ const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
+/**********************************************************************//**
+Reads the undo log record number.
+@return undo no */
UNIV_INLINE
-dulint
+undo_no_t
trx_undo_rec_get_undo_no(
/*=====================*/
- /* out: undo no */
- trx_undo_rec_t* undo_rec); /* in: undo log record */
-/**************************************************************************
-Reads from an undo log record the general parameters. */
+ const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
+/**********************************************************************//**
+Returns the start of the undo record data area.
+@return offset to the data area */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_offset(
+/*====================*/
+ undo_no_t undo_no) /*!< in: undo no read from node */
+ __attribute__((const));
+/**********************************************************************//**
+Returns the start of the undo record data area. */
+#define trx_undo_rec_get_ptr(undo_rec, undo_no) \
+ ((undo_rec) + trx_undo_rec_get_offset(undo_no))
+
+/**********************************************************************//**
+Reads from an undo log record the general parameters.
+@return remaining part of undo log record after reading these values */
+UNIV_INTERN
byte*
trx_undo_rec_get_pars(
/*==================*/
- /* out: remaining part of undo log
- record after reading these values */
- trx_undo_rec_t* undo_rec, /* in: undo log record */
- ulint* type, /* out: undo record type:
+ trx_undo_rec_t* undo_rec, /*!< in: undo log record */
+ ulint* type, /*!< out: undo record type:
TRX_UNDO_INSERT_REC, ... */
- ulint* cmpl_info, /* out: compiler info, relevant only
+ ulint* cmpl_info, /*!< out: compiler info, relevant only
for update type records */
- ibool* updated_extern, /* out: TRUE if we updated an
+ ibool* updated_extern, /*!< out: TRUE if we updated an
externally stored fild */
- dulint* undo_no, /* out: undo log record number */
- dulint* table_id); /* out: table id */
-/***********************************************************************
-Builds a row reference from an undo log record. */
-
+ undo_no_t* undo_no, /*!< out: undo log record number */
+ dulint* table_id); /*!< out: table id */
+/*******************************************************************//**
+Builds a row reference from an undo log record.
+@return pointer to remaining part of undo record */
+UNIV_INTERN
byte*
trx_undo_rec_get_row_ref(
/*=====================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part of a copy of an undo log
+ byte* ptr, /*!< in: remaining part of a copy of an undo log
record, at the start of the row reference;
NOTE that this copy of the undo log record must
be preserved as long as the row reference is
used, as we do NOT copy the data in the
record! */
- dict_index_t* index, /* in: clustered index */
- dtuple_t** ref, /* out, own: row reference */
- mem_heap_t* heap); /* in: memory heap from which the memory
+ dict_index_t* index, /*!< in: clustered index */
+ dtuple_t** ref, /*!< out, own: row reference */
+ mem_heap_t* heap); /*!< in: memory heap from which the memory
needed is allocated */
-/***********************************************************************
-Skips a row reference from an undo log record. */
-
+/*******************************************************************//**
+Skips a row reference from an undo log record.
+@return pointer to remaining part of undo record */
+UNIV_INTERN
byte*
trx_undo_rec_skip_row_ref(
/*======================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part in update undo log
+ byte* ptr, /*!< in: remaining part in update undo log
record, at the start of the row reference */
- dict_index_t* index); /* in: clustered index */
-/**************************************************************************
+ dict_index_t* index); /*!< in: clustered index */
+/**********************************************************************//**
Reads from an undo log update record the system field values of the old
-version. */
-
+version.
+@return remaining part of undo log record after reading these values */
+UNIV_INTERN
byte*
trx_undo_update_rec_get_sys_cols(
/*=============================*/
- /* out: remaining part of undo log
- record after reading these values */
- byte* ptr, /* in: remaining part of undo log
- record after reading general
- parameters */
- dulint* trx_id, /* out: trx id */
- dulint* roll_ptr, /* out: roll ptr */
- ulint* info_bits); /* out: info bits state */
-/***********************************************************************
-Builds an update vector based on a remaining part of an undo log record. */
-
+ byte* ptr, /*!< in: remaining part of undo
+ log record after reading
+ general parameters */
+ trx_id_t* trx_id, /*!< out: trx id */
+ roll_ptr_t* roll_ptr, /*!< out: roll ptr */
+ ulint* info_bits); /*!< out: info bits state */
+/*******************************************************************//**
+Builds an update vector based on a remaining part of an undo log record.
+@return remaining part of the record, NULL if an error detected, which
+means that the record is corrupted */
+UNIV_INTERN
byte*
trx_undo_update_rec_get_update(
/*===========================*/
- /* out: remaining part of the record,
- NULL if an error detected, which means that
- the record is corrupted */
- byte* ptr, /* in: remaining part in update undo log
+ byte* ptr, /*!< in: remaining part in update undo log
record, after reading the row reference
NOTE that this copy of the undo log record must
be preserved as long as the update vector is
used, as we do NOT copy the data in the
record! */
- dict_index_t* index, /* in: clustered index */
- ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
+ dict_index_t* index, /*!< in: clustered index */
+ ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
TRX_UNDO_UPD_DEL_REC, or
TRX_UNDO_DEL_MARK_REC; in the last case,
only trx id and roll ptr fields are added to
the update vector */
- dulint trx_id, /* in: transaction id from this undorecord */
- dulint roll_ptr,/* in: roll pointer from this undo record */
- ulint info_bits,/* in: info bits from this undo record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap, /* in: memory heap from which the memory
+ trx_id_t trx_id, /*!< in: transaction id from this undorecord */
+ roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
+ ulint info_bits,/*!< in: info bits from this undo record */
+ trx_t* trx, /*!< in: transaction */
+ mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
- upd_t** upd); /* out, own: update vector */
-/***********************************************************************
+ upd_t** upd); /*!< out, own: update vector */
+/*******************************************************************//**
Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table. */
-
+columns which occur as ordering in any index of the table.
+@return pointer to remaining part of undo record */
+UNIV_INTERN
byte*
trx_undo_rec_get_partial_row(
/*=========================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part in update undo log
+ byte* ptr, /*!< in: remaining part in update undo log
record of a suitable type, at the start of
the stored index columns;
NOTE that this copy of the undo log record must
be preserved as long as the partial row is
used, as we do NOT copy the data in the
record! */
- dict_index_t* index, /* in: clustered index */
- dtuple_t** row, /* out, own: partial row */
- mem_heap_t* heap); /* in: memory heap from which the memory
+ dict_index_t* index, /*!< in: clustered index */
+ dtuple_t** row, /*!< out, own: partial row */
+ ibool ignore_prefix, /*!< in: flag to indicate if we
+ expect blob prefixes in undo. Used
+ only in the assertion. */
+ mem_heap_t* heap); /*!< in: memory heap from which the memory
needed is allocated */
-/***************************************************************************
+/***********************************************************************//**
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
transaction and in consistent reads that must look to the history of this
-transaction. */
-
+transaction.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
trx_undo_report_row_operation(
/*==========================*/
- /* out: DB_SUCCESS or error code */
- ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
+ ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
set, does nothing */
- ulint op_type, /* in: TRX_UNDO_INSERT_OP or
+ ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or
TRX_UNDO_MODIFY_OP */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* clust_entry, /* in: in the case of an insert,
+ que_thr_t* thr, /*!< in: query thread */
+ dict_index_t* index, /*!< in: clustered index */
+ const dtuple_t* clust_entry, /*!< in: in the case of an insert,
index entry to insert into the
clustered index, otherwise NULL */
- upd_t* update, /* in: in the case of an update,
+ const upd_t* update, /*!< in: in the case of an update,
the update vector, otherwise NULL */
- ulint cmpl_info, /* in: compiler info on secondary
+ ulint cmpl_info, /*!< in: compiler info on secondary
index updates */
- rec_t* rec, /* in: case of an update or delete
+ const rec_t* rec, /*!< in: case of an update or delete
marking, the record in the clustered
index, otherwise NULL */
- dulint* roll_ptr); /* out: rollback pointer to the
+ roll_ptr_t* roll_ptr); /*!< out: rollback pointer to the
inserted undo log record,
ut_dulint_zero if BTR_NO_UNDO_LOG
flag was specified */
-/**********************************************************************
+/******************************************************************//**
Copies an undo record to heap. This function can be called if we know that
-the undo log record exists. */
-
+the undo log record exists.
+@return own: copy of the record */
+UNIV_INTERN
trx_undo_rec_t*
trx_undo_get_undo_rec_low(
/*======================*/
- /* out, own: copy of the record */
- dulint roll_ptr, /* in: roll pointer to record */
- mem_heap_t* heap); /* in: memory heap where copied */
-/**********************************************************************
-Copies an undo record to heap. */
+ roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
+ mem_heap_t* heap); /*!< in: memory heap where copied */
+/******************************************************************//**
+Copies an undo record to heap.
+
+NOTE: the caller must have latches on the clustered index page and
+purge_view.
+@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
+truncated and we cannot fetch the old version */
+UNIV_INTERN
ulint
trx_undo_get_undo_rec(
/*==================*/
- /* out: DB_SUCCESS, or
- DB_MISSING_HISTORY if the undo log
- has been truncated and we cannot
- fetch the old version; NOTE: the
- caller must have latches on the
- clustered index page and purge_view */
- dulint roll_ptr, /* in: roll pointer to record */
- dulint trx_id, /* in: id of the trx that generated
+ roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
+ trx_id_t trx_id, /*!< in: id of the trx that generated
the roll pointer: it points to an
undo log of this transaction */
- trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
- mem_heap_t* heap); /* in: memory heap where copied */
-/***********************************************************************
+ trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
+ mem_heap_t* heap); /*!< in: memory heap where copied */
+/*******************************************************************//**
Build a previous version of a clustered index record. This function checks
that the caller has a latch on the index page of the clustered index record
and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked. */
-
+is locked.
+@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
+earlier than purge_view, which means that it may have been removed,
+DB_ERROR if corrupted record */
+UNIV_INTERN
ulint
trx_undo_prev_version_build(
/*========================*/
- /* out: DB_SUCCESS, or DB_MISSING_HISTORY if
- the previous version is not >= purge_view,
- which means that it may have been removed,
- DB_ERROR if corrupted record */
- rec_t* index_rec,/* in: clustered index record in the
+ const rec_t* index_rec,/*!< in: clustered index record in the
index tree */
- mtr_t* index_mtr,/* in: mtr which contains the latch to
+ mtr_t* index_mtr,/*!< in: mtr which contains the latch to
index_rec page and purge_view */
- rec_t* rec, /* in: version of a clustered index record */
- dict_index_t* index, /* in: clustered index */
- ulint* offsets,/* in: rec_get_offsets(rec, index) */
- mem_heap_t* heap, /* in: memory heap from which the memory
+ const rec_t* rec, /*!< in: version of a clustered index record */
+ dict_index_t* index, /*!< in: clustered index */
+ ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
- rec_t** old_vers);/* out, own: previous version, or NULL if
+ rec_t** old_vers);/*!< out, own: previous version, or NULL if
rec is the first inserted version, or if
history data has been deleted */
-/***************************************************************
-Parses a redo log record of adding an undo log record. */
-
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a redo log record of adding an undo log record.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_add_undo_rec(
/*========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
-/***************************************************************
-Parses a redo log record of erasing of an undo page end. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page); /*!< in: page or NULL */
+/***********************************************************//**
+Parses a redo log record of erasing of an undo page end.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_erase_page_end(
/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+
+#ifndef UNIV_HOTBACKUP
/* Types of an undo log record: these have to be smaller than 16, as the
compilation info multiplied by 16 is ORed to this value in an undo log
record */
-#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
-#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
+
+#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
+#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
record */
#define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to
a not delete marked record; also the
fields of the record can change */
-#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields
+#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields
do not change */
#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
this and ORed to the type above */
-#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
+#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
to denote that we updated external
storage fields: used by purge to
free the external storage */
/* Operation type flags used in trx_undo_report_row_operation */
-#define TRX_UNDO_INSERT_OP 1
-#define TRX_UNDO_MODIFY_OP 2
+#define TRX_UNDO_INSERT_OP 1
+#define TRX_UNDO_MODIFY_OP 2
#ifndef UNIV_NONINL
#include "trx0rec.ic"
#endif
-#endif
+#endif /* !UNIV_HOTBACKUP */
+
+#endif /* trx0rec_h */
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index a1ddc127ec7..e7e41d6d9f6 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -1,43 +1,61 @@
-/******************************************************
-Transaction undo log record
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rec.ic
+Transaction undo log record
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-/**************************************************************************
-Reads from an undo log record the record type. */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Reads from an undo log record the record type.
+@return record type */
UNIV_INLINE
ulint
trx_undo_rec_get_type(
/*==================*/
- /* out: record type */
- trx_undo_rec_t* undo_rec) /* in: undo log record */
+ const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
{
return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
}
-/**************************************************************************
-Reads from an undo log record the record compiler info. */
+/**********************************************************************//**
+Reads from an undo log record the record compiler info.
+@return compiler info */
UNIV_INLINE
ulint
trx_undo_rec_get_cmpl_info(
/*=======================*/
- /* out: compiler info */
- trx_undo_rec_t* undo_rec) /* in: undo log record */
+ const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
{
return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
}
-/**************************************************************************
-Returns TRUE if an undo log record contains an extern storage field. */
+/**********************************************************************//**
+Returns TRUE if an undo log record contains an extern storage field.
+@return TRUE if extern */
UNIV_INLINE
ibool
trx_undo_rec_get_extern_storage(
/*============================*/
- /* out: TRUE if extern */
- trx_undo_rec_t* undo_rec) /* in: undo log record */
+ const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
{
if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
@@ -47,40 +65,48 @@ trx_undo_rec_get_extern_storage(
return(FALSE);
}
-/**************************************************************************
-Reads the undo log record number. */
+/**********************************************************************//**
+Reads the undo log record number.
+@return undo no */
UNIV_INLINE
-dulint
+undo_no_t
trx_undo_rec_get_undo_no(
/*=====================*/
- /* out: undo no */
- trx_undo_rec_t* undo_rec) /* in: undo log record */
+ const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
{
- byte* ptr;
+ const byte* ptr;
ptr = undo_rec + 3;
return(mach_dulint_read_much_compressed(ptr));
}
-/***************************************************************************
-Copies the undo record to the heap. */
+/**********************************************************************//**
+Returns the start of the undo record data area.
+@return offset to the data area */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_offset(
+/*====================*/
+ undo_no_t undo_no) /*!< in: undo no read from node */
+{
+ return (3 + mach_dulint_get_much_compressed_size(undo_no));
+}
+
+/***********************************************************************//**
+Copies the undo record to the heap.
+@return own: copy of undo log record */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_rec_copy(
/*==============*/
- /* out, own: copy of undo log record */
- trx_undo_rec_t* undo_rec, /* in: undo log record */
- mem_heap_t* heap) /* in: heap where copied */
+ const trx_undo_rec_t* undo_rec, /*!< in: undo log record */
+ mem_heap_t* heap) /*!< in: heap where copied */
{
ulint len;
- trx_undo_rec_t* rec_copy;
-
- len = mach_read_from_2(undo_rec) + buf_frame_align(undo_rec)
- - undo_rec;
- rec_copy = mem_heap_alloc(heap, len);
-
- ut_memcpy(rec_copy, undo_rec, len);
- return(rec_copy);
+ len = mach_read_from_2(undo_rec)
+ - ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
+ return(mem_heap_dup(heap, undo_rec, len));
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index c1eca3d5753..1dee5655c8c 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction rollback
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0roll.h
+Transaction rollback
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -17,296 +34,317 @@ Created 3/26/1996 Heikki Tuuri
#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL)
-/***********************************************************************
-Returns a transaction savepoint taken at this point in time. */
-
+/*******************************************************************//**
+Determines if this transaction is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if trx is an incomplete transaction that is being rolled
+back in crash recovery */
+UNIV_INTERN
+ibool
+trx_is_recv(
+/*========*/
+ const trx_t* trx); /*!< in: transaction */
+/*******************************************************************//**
+Returns a transaction savepoint taken at this point in time.
+@return savepoint */
+UNIV_INTERN
trx_savept_t
trx_savept_take(
/*============*/
- /* out: savepoint */
- trx_t* trx); /* in: transaction */
-/***********************************************************************
+ trx_t* trx); /*!< in: transaction */
+/*******************************************************************//**
Creates an undo number array. */
-
+UNIV_INTERN
trx_undo_arr_t*
trx_undo_arr_create(void);
/*=====================*/
-/***********************************************************************
+/*******************************************************************//**
Frees an undo number array. */
-
+UNIV_INTERN
void
trx_undo_arr_free(
/*==============*/
- trx_undo_arr_t* arr); /* in: undo number array */
-/***********************************************************************
-Returns pointer to nth element in an undo number array. */
+ trx_undo_arr_t* arr); /*!< in: undo number array */
+/*******************************************************************//**
+Returns pointer to nth element in an undo number array.
+@return pointer to the nth element */
UNIV_INLINE
trx_undo_inf_t*
trx_undo_arr_get_nth_info(
/*======================*/
- /* out: pointer to the nth element */
- trx_undo_arr_t* arr, /* in: undo number array */
- ulint n); /* in: position */
-/***************************************************************************
+ trx_undo_arr_t* arr, /*!< in: undo number array */
+ ulint n); /*!< in: position */
+/***********************************************************************//**
Tries truncate the undo logs. */
-
+UNIV_INTERN
void
trx_roll_try_truncate(
/*==================*/
- trx_t* trx); /* in: transaction */
-/************************************************************************
+ trx_t* trx); /*!< in/out: transaction */
+/********************************************************************//**
Pops the topmost record when the two undo logs of a transaction are seen
as a single stack of records ordered by their undo numbers. Inserts the
undo number of the popped undo record to the array of currently processed
undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release. */
-
+of this undo record, it must be released with trx_undo_rec_release.
+@return undo log record copied to heap, NULL if none left, or if the
+undo number of the top record would be less than the limit */
+UNIV_INTERN
trx_undo_rec_t*
trx_roll_pop_top_rec_of_trx(
/*========================*/
- /* out: undo log record copied to heap, NULL
- if none left, or if the undo number of the
- top record would be less than the limit */
- trx_t* trx, /* in: transaction */
- dulint limit, /* in: least undo number we need */
- dulint* roll_ptr,/* out: roll pointer to undo record */
- mem_heap_t* heap); /* in: memory heap where copied */
-/************************************************************************
+ trx_t* trx, /*!< in: transaction */
+ undo_no_t limit, /*!< in: least undo number we need */
+ roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
+ mem_heap_t* heap); /*!< in: memory heap where copied */
+/********************************************************************//**
Reserves an undo log record for a query thread to undo. This should be
called if the query thread gets the undo log record not using the pop
-function above. */
-
+function above.
+@return TRUE if succeeded */
+UNIV_INTERN
ibool
trx_undo_rec_reserve(
/*=================*/
- /* out: TRUE if succeeded */
- trx_t* trx, /* in: transaction */
- dulint undo_no);/* in: undo number of the record */
-/***********************************************************************
+ trx_t* trx, /*!< in/out: transaction */
+ undo_no_t undo_no);/*!< in: undo number of the record */
+/*******************************************************************//**
Releases a reserved undo record. */
-
+UNIV_INTERN
void
trx_undo_rec_release(
/*=================*/
- trx_t* trx, /* in: transaction */
- dulint undo_no);/* in: undo number */
-/*************************************************************************
+ trx_t* trx, /*!< in/out: transaction */
+ undo_no_t undo_no);/*!< in: undo number */
+/*********************************************************************//**
Starts a rollback operation. */
-
+UNIV_INTERN
void
trx_rollback(
/*=========*/
- trx_t* trx, /* in: transaction */
- trx_sig_t* sig, /* in: signal starting the rollback */
- que_thr_t** next_thr);/* in/out: next query thread to run;
+ trx_t* trx, /*!< in: transaction */
+ trx_sig_t* sig, /*!< in: signal starting the rollback */
+ que_thr_t** next_thr);/*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
a new query thread */
-/***********************************************************************
-Rollback or clean up transactions which have no user session. If the
-transaction already was committed, then we clean up a possible insert
-undo log. If the transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread. */
-
+/*******************************************************************//**
+Rollback or clean up any incomplete transactions which were
+encountered in crash recovery. If the transaction already was
+committed, then we clean up a possible insert undo log. If the
+transaction was not yet committed, then we roll it back. */
+UNIV_INTERN
+void
+trx_rollback_or_clean_recovered(
+/*============================*/
+ ibool all); /*!< in: FALSE=roll back dictionary transactions;
+ TRUE=roll back all non-PREPARED transactions */
+/*******************************************************************//**
+Rollback or clean up any incomplete transactions which were
+encountered in crash recovery. If the transaction already was
+committed, then we clean up a possible insert undo log. If the
+transaction was not yet committed, then we roll it back.
+Note: this is done in a background thread.
+@return a dummy parameter */
+UNIV_INTERN
os_thread_ret_t
-trx_rollback_or_clean_all_without_sess(
-/*===================================*/
- /* out: a dummy parameter */
+trx_rollback_or_clean_all_recovered(
+/*================================*/
void* arg __attribute__((unused)));
- /* in: a dummy parameter required by
+ /*!< in: a dummy parameter required by
os_thread_create */
-/********************************************************************
+/****************************************************************//**
Finishes a transaction rollback. */
-
+UNIV_INTERN
void
trx_finish_rollback_off_kernel(
/*===========================*/
- que_t* graph, /* in: undo graph which can now be freed */
- trx_t* trx, /* in: transaction */
- que_thr_t** next_thr);/* in/out: next query thread to run;
+ que_t* graph, /*!< in: undo graph which can now be freed */
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t** next_thr);/*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
a new query thread; if this parameter is
NULL, it is ignored */
-/********************************************************************
+/****************************************************************//**
Builds an undo 'query' graph for a transaction. The actual rollback is
performed by executing this query graph like a query subprocedure call.
The reply about the completion of the rollback will be sent by this
-graph. */
-
+graph.
+@return own: the query graph */
+UNIV_INTERN
que_t*
trx_roll_graph_build(
/*=================*/
- /* out, own: the query graph */
- trx_t* trx); /* in: trx handle */
-/*************************************************************************
-Creates a rollback command node struct. */
-
+ trx_t* trx); /*!< in: trx handle */
+/*********************************************************************//**
+Creates a rollback command node struct.
+@return own: rollback node struct */
+UNIV_INTERN
roll_node_t*
roll_node_create(
/*=============*/
- /* out, own: rollback node struct */
- mem_heap_t* heap); /* in: mem heap where created */
-/***************************************************************
-Performs an execution step for a rollback command node in a query graph. */
-
+ mem_heap_t* heap); /*!< in: mem heap where created */
+/***********************************************************//**
+Performs an execution step for a rollback command node in a query graph.
+@return query thread to run next, or NULL */
+UNIV_INTERN
que_thr_t*
trx_rollback_step(
/*==============*/
- /* out: query thread to run next, or NULL */
- que_thr_t* thr); /* in: query thread */
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
+ que_thr_t* thr); /*!< in: query thread */
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
trx_rollback_for_mysql(
/*===================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx); /* in: transaction handle */
-/***********************************************************************
-Rollback the latest SQL statement for MySQL. */
-
+ trx_t* trx); /*!< in: transaction handle */
+/*******************************************************************//**
+Rollback the latest SQL statement for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
trx_rollback_last_sql_stat_for_mysql(
/*=================================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx); /* in: transaction handle */
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
+ trx_t* trx); /*!< in: transaction handle */
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
trx_general_rollback_for_mysql(
/*===========================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- ibool partial,/* in: TRUE if partial rollback requested */
- trx_savept_t* savept);/* in: pointer to savepoint undo number, if
- partial rollback requested */
-/***********************************************************************
+ trx_t* trx, /*!< in: transaction handle */
+ trx_savept_t* savept);/*!< in: pointer to savepoint undo number, if
+ partial rollback requested, or NULL for
+ complete rollback */
+/*******************************************************************//**
Rolls back a transaction back to a named savepoint. Modifications after the
savepoint are undone but InnoDB does NOT release the corresponding locks
which are stored in memory. If a lock is 'implicit', that is, a new inserted
row holds a lock where the lock information is carried by the trx id stored in
the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted. */
-
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
ulint
trx_rollback_to_savepoint_for_mysql(
/*================================*/
- /* out: if no savepoint
- of the name found then
- DB_NO_SAVEPOINT,
- otherwise DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name, /* in: savepoint name */
- ib_longlong* mysql_binlog_cache_pos);/* out: the MySQL binlog cache
+ trx_t* trx, /*!< in: transaction handle */
+ const char* savepoint_name, /*!< in: savepoint name */
+ ib_int64_t* mysql_binlog_cache_pos);/*!< out: the MySQL binlog cache
position corresponding to this
savepoint; MySQL needs this
information to remove the
binlog entries of the queries
executed after the savepoint */
-/***********************************************************************
+/*******************************************************************//**
Creates a named savepoint. If the transaction is not yet started, starts it.
If there is already a savepoint of the same name, this call erases that old
savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback. */
-
+commit or rollback.
+@return always DB_SUCCESS */
+UNIV_INTERN
ulint
trx_savepoint_for_mysql(
/*====================*/
- /* out: always DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name, /* in: savepoint name */
- ib_longlong binlog_cache_pos); /* in: MySQL binlog cache
+ trx_t* trx, /*!< in: transaction handle */
+ const char* savepoint_name, /*!< in: savepoint name */
+ ib_int64_t binlog_cache_pos); /*!< in: MySQL binlog cache
position corresponding to this
connection at the time of the
savepoint */
-/***********************************************************************
+/*******************************************************************//**
Releases a named savepoint. Savepoints which
-were set after this savepoint are deleted. */
-
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
ulint
trx_release_savepoint_for_mysql(
/*============================*/
- /* out: if no savepoint
- of the name found then
- DB_NO_SAVEPOINT,
- otherwise DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name); /* in: savepoint name */
+ trx_t* trx, /*!< in: transaction handle */
+ const char* savepoint_name); /*!< in: savepoint name */
-/***********************************************************************
+/*******************************************************************//**
Frees a single savepoint struct. */
-
+UNIV_INTERN
void
trx_roll_savepoint_free(
/*=====================*/
- trx_t* trx, /* in: transaction handle */
- trx_named_savept_t* savep); /* in: savepoint to free */
+ trx_t* trx, /*!< in: transaction handle */
+ trx_named_savept_t* savep); /*!< in: savepoint to free */
-/***********************************************************************
+/*******************************************************************//**
Frees savepoint structs starting from savep, if savep == NULL then
free all savepoints. */
-
+UNIV_INTERN
void
trx_roll_savepoints_free(
/*=====================*/
- trx_t* trx, /* in: transaction handle */
- trx_named_savept_t* savep); /* in: free all savepoints > this one;
+ trx_t* trx, /*!< in: transaction handle */
+ trx_named_savept_t* savep); /*!< in: free all savepoints > this one;
if this is NULL, free all savepoints
of trx */
-extern sess_t* trx_dummy_sess;
-
-/* A cell in the array used during a rollback and a purge */
+/** A cell of trx_undo_arr_struct; used during a rollback and a purge */
struct trx_undo_inf_struct{
- dulint trx_no; /* transaction number: not defined during
+ trx_id_t trx_no; /*!< transaction number: not defined during
a rollback */
- dulint undo_no; /* undo number of an undo record */
- ibool in_use; /* TRUE if the cell is in use */
+ undo_no_t undo_no;/*!< undo number of an undo record */
+ ibool in_use; /*!< TRUE if the cell is in use */
};
-/* During a rollback and a purge, undo numbers of undo records currently being
+/** During a rollback and a purge, undo numbers of undo records currently being
processed are stored in this array */
struct trx_undo_arr_struct{
- ulint n_cells; /* number of cells in the array */
- ulint n_used; /* number of cells currently in use */
- trx_undo_inf_t* infos; /* the array of undo infos */
- mem_heap_t* heap; /* memory heap from which allocated */
+ ulint n_cells; /*!< number of cells in the array */
+ ulint n_used; /*!< number of cells currently in use */
+ trx_undo_inf_t* infos; /*!< the array of undo infos */
+ mem_heap_t* heap; /*!< memory heap from which allocated */
+};
+
+/** Rollback node states */
+enum roll_node_state {
+ ROLL_NODE_SEND = 1, /*!< about to send a rollback signal to
+ the transaction */
+ ROLL_NODE_WAIT /*!< rollback signal sent to the transaction,
+ waiting for completion */
};
-/* Rollback command node in a query graph */
+/** Rollback command node in a query graph */
struct roll_node_struct{
- que_common_t common; /* node type: QUE_NODE_ROLLBACK */
- ulint state; /* node execution state */
- ibool partial;/* TRUE if we want a partial rollback */
- trx_savept_t savept; /* savepoint to which to roll back, in the
- case of a partial rollback */
+ que_common_t common; /*!< node type: QUE_NODE_ROLLBACK */
+ enum roll_node_state state; /*!< node execution state */
+ ibool partial;/*!< TRUE if we want a partial
+ rollback */
+ trx_savept_t savept; /*!< savepoint to which to
+ roll back, in the case of a
+ partial rollback */
};
-/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
+/** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
struct trx_named_savept_struct{
- char* name; /* savepoint name */
- trx_savept_t savept; /* the undo number corresponding to
+ char* name; /*!< savepoint name */
+ trx_savept_t savept; /*!< the undo number corresponding to
the savepoint */
- ib_longlong mysql_binlog_cache_pos;
- /* the MySQL binlog cache position
+ ib_int64_t mysql_binlog_cache_pos;
+ /*!< the MySQL binlog cache position
corresponding to this savepoint, not
defined if the MySQL binlogging is not
enabled */
UT_LIST_NODE_T(trx_named_savept_t)
- trx_savepoints; /* the list of savepoints of a
+ trx_savepoints; /*!< the list of savepoints of a
transaction */
};
-/* Rollback node states */
-#define ROLL_NODE_SEND 1
-#define ROLL_NODE_WAIT 2
-
#ifndef UNIV_NONINL
#include "trx0roll.ic"
#endif
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
index dfde83ac478..3460832b18c 100644
--- a/storage/innobase/include/trx0roll.ic
+++ b/storage/innobase/include/trx0roll.ic
@@ -1,20 +1,37 @@
-/******************************************************
-Transaction rollback
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0roll.ic
+Transaction rollback
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-/***********************************************************************
-Returns pointer to nth element in an undo number array. */
+/*******************************************************************//**
+Returns pointer to nth element in an undo number array.
+@return pointer to the nth element */
UNIV_INLINE
trx_undo_inf_t*
trx_undo_arr_get_nth_info(
/*======================*/
- /* out: pointer to the nth element */
- trx_undo_arr_t* arr, /* in: undo number array */
- ulint n) /* in: position */
+ trx_undo_arr_t* arr, /*!< in: undo number array */
+ ulint n) /*!< in: position */
{
ut_ad(arr);
ut_ad(n < arr->n_cells);
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 46ba010bd1d..ba1fc88b6c4 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,7 +1,24 @@
-/******************************************************
-Rollback segment
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rseg.h
+Rollback segment
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -13,104 +30,112 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0types.h"
#include "trx0sys.h"
-/**********************************************************************
-Gets a rollback segment header. */
+/******************************************************************//**
+Gets a rollback segment header.
+@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
trx_rsegf_get(
/*==========*/
- /* out: rollback segment header, page
- x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number of the header */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Gets a newly created rollback segment header. */
+ ulint space, /*!< in: space where placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number of the header */
+ mtr_t* mtr); /*!< in: mtr */
+/******************************************************************//**
+Gets a newly created rollback segment header.
+@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
trx_rsegf_get_new(
/*==============*/
- /* out: rollback segment header, page
- x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number of the header */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Gets the file page number of the nth undo log slot. */
+ ulint space, /*!< in: space where placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number of the header */
+ mtr_t* mtr); /*!< in: mtr */
+/***************************************************************//**
+Gets the file page number of the nth undo log slot.
+@return page number of the undo log segment */
UNIV_INLINE
ulint
trx_rsegf_get_nth_undo(
/*===================*/
- /* out: page number of the undo log segment */
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- ulint n, /* in: index of slot */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
+ trx_rsegf_t* rsegf, /*!< in: rollback segment header */
+ ulint n, /*!< in: index of slot */
+ mtr_t* mtr); /*!< in: mtr */
+/***************************************************************//**
Sets the file page number of the nth undo log slot. */
UNIV_INLINE
void
trx_rsegf_set_nth_undo(
/*===================*/
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- ulint n, /* in: index of slot */
- ulint page_no,/* in: page number of the undo log segment */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
-Looks for a free slot for an undo log segment. */
+ trx_rsegf_t* rsegf, /*!< in: rollback segment header */
+ ulint n, /*!< in: index of slot */
+ ulint page_no,/*!< in: page number of the undo log segment */
+ mtr_t* mtr); /*!< in: mtr */
+/****************************************************************//**
+Looks for a free slot for an undo log segment.
+@return slot index or ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
trx_rsegf_undo_find_free(
/*=====================*/
- /* out: slot index or ULINT_UNDEFINED if not
- found */
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Looks for a rollback segment, based on the rollback segment id. */
-
+ trx_rsegf_t* rsegf, /*!< in: rollback segment header */
+ mtr_t* mtr); /*!< in: mtr */
+/******************************************************************//**
+Looks for a rollback segment, based on the rollback segment id.
+@return rollback segment */
+UNIV_INTERN
trx_rseg_t*
trx_rseg_get_on_id(
/*===============*/
- /* out: rollback segment */
- ulint id); /* in: rollback segment id */
-/********************************************************************
+ ulint id); /*!< in: rollback segment id */
+/****************************************************************//**
Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database. */
-
+a new rollback segment is created in the database.
+@return page number of the created segment, FIL_NULL if fail */
+UNIV_INTERN
ulint
trx_rseg_header_create(
/*===================*/
- /* out: page number of the created segment,
- FIL_NULL if fail */
- ulint space, /* in: space id */
- ulint max_size, /* in: max size in pages */
- ulint* slot_no, /* out: rseg id == slot number in trx sys */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint max_size, /*!< in: max size in pages */
+ ulint* slot_no, /*!< out: rseg id == slot number in trx sys */
+ mtr_t* mtr); /*!< in: mtr */
+/*********************************************************************//**
Creates the memory copies for rollback segments and initializes the
rseg list and array in trx_sys at a database startup. */
-
+UNIV_INTERN
void
trx_rseg_list_and_array_init(
/*=========================*/
- trx_sysf_t* sys_header, /* in: trx system header */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
-Creates a new rollback segment to the database. */
-
+ trx_sysf_t* sys_header, /*!< in: trx system header */
+ mtr_t* mtr); /*!< in: mtr */
+/****************************************************************//**
+Creates a new rollback segment to the database.
+@return the created segment object, NULL if fail */
+UNIV_INTERN
trx_rseg_t*
trx_rseg_create(
/*============*/
- /* out: the created segment object, NULL if
- fail */
- ulint space, /* in: space id */
- ulint max_size, /* in: max size in pages */
- ulint* id, /* out: rseg id */
- mtr_t* mtr); /* in: mtr */
+ ulint space, /*!< in: space id */
+ ulint max_size, /*!< in: max size in pages */
+ ulint* id, /*!< out: rseg id */
+ mtr_t* mtr); /*!< in: mtr */
+/***************************************************************************
+Free's an instance of the rollback segment in memory. */
+UNIV_INTERN
+void
+trx_rseg_mem_free(
+/*==============*/
+ trx_rseg_t* rseg); /* in, own: instance to free */
/* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS 1024
+#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16)
/* Maximum number of transactions supported by a single rollback segment */
#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
@@ -118,14 +143,16 @@ trx_rseg_create(
/* The rollback segment memory object */
struct trx_rseg_struct{
/*--------------------------------------------------------*/
- ulint id; /* rollback segment id == the index of
+ ulint id; /*!< rollback segment id == the index of
its slot in the trx system file copy */
- mutex_t mutex; /* mutex protecting the fields in this
+ mutex_t mutex; /*!< mutex protecting the fields in this
struct except id; NOTE that the latching
order must always be kernel mutex ->
rseg mutex */
- ulint space; /* space where the rollback segment is
+ ulint space; /*!< space where the rollback segment is
header is placed */
+ ulint zip_size;/* compressed page size of space
+ in bytes, or 0 for uncompressed spaces */
ulint page_no;/* page number of the rollback segment
header */
ulint max_size;/* maximum allowed size in pages */
@@ -145,14 +172,14 @@ struct trx_rseg_struct{
/* List of insert undo log segments
cached for fast reuse */
/*--------------------------------------------------------*/
- ulint last_page_no; /* Page number of the last not yet
+ ulint last_page_no; /*!< Page number of the last not yet
purged log header in the history list;
FIL_NULL if all list purged */
- ulint last_offset; /* Byte offset of the last not yet
+ ulint last_offset; /*!< Byte offset of the last not yet
purged log header */
- dulint last_trx_no; /* Transaction number of the last not
+ trx_id_t last_trx_no; /*!< Transaction number of the last not
yet purged log */
- ibool last_del_marks; /* TRUE if the last not yet purged log
+ ibool last_del_marks; /*!< TRUE if the last not yet purged log
needs purging */
/*--------------------------------------------------------*/
UT_LIST_NODE_T(trx_rseg_t) rseg_list;
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
index 577cd0dee7b..daffa92fc7d 100644
--- a/storage/innobase/include/trx0rseg.ic
+++ b/storage/innobase/include/trx0rseg.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Rollback segment
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rseg.ic
+Rollback segment
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -9,62 +26,64 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0srv.h"
#include "mtr0log.h"
-/**********************************************************************
-Gets a rollback segment header. */
+/******************************************************************//**
+Gets a rollback segment header.
+@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
trx_rsegf_get(
/*==========*/
- /* out: rollback segment header, page
- x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number of the header */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space where placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number of the header */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* block;
trx_rsegf_t* header;
- header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
+ block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_RSEG_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
+ header = TRX_RSEG + buf_block_get_frame(block);
return(header);
}
-/**********************************************************************
-Gets a newly created rollback segment header. */
+/******************************************************************//**
+Gets a newly created rollback segment header.
+@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
trx_rsegf_get_new(
/*==============*/
- /* out: rollback segment header, page
- x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number of the header */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space where placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number of the header */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* block;
trx_rsegf_t* header;
- header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
+ block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_RSEG_HEADER_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+ header = TRX_RSEG + buf_block_get_frame(block);
return(header);
}
-/*******************************************************************
-Gets the file page number of the nth undo log slot. */
+/***************************************************************//**
+Gets the file page number of the nth undo log slot.
+@return page number of the undo log segment */
UNIV_INLINE
ulint
trx_rsegf_get_nth_undo(
/*===================*/
- /* out: page number of the undo log segment */
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- ulint n, /* in: index of slot */
- mtr_t* mtr) /* in: mtr */
+ trx_rsegf_t* rsegf, /*!< in: rollback segment header */
+ ulint n, /*!< in: index of slot */
+ mtr_t* mtr) /*!< in: mtr */
{
if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
fprintf(stderr,
@@ -77,16 +96,16 @@ trx_rsegf_get_nth_undo(
+ n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
}
-/*******************************************************************
+/***************************************************************//**
Sets the file page number of the nth undo log slot. */
UNIV_INLINE
void
trx_rsegf_set_nth_undo(
/*===================*/
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- ulint n, /* in: index of slot */
- ulint page_no,/* in: page number of the undo log segment */
- mtr_t* mtr) /* in: mtr */
+ trx_rsegf_t* rsegf, /*!< in: rollback segment header */
+ ulint n, /*!< in: index of slot */
+ ulint page_no,/*!< in: page number of the undo log segment */
+ mtr_t* mtr) /*!< in: mtr */
{
if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
fprintf(stderr,
@@ -99,16 +118,15 @@ trx_rsegf_set_nth_undo(
page_no, MLOG_4BYTES, mtr);
}
-/********************************************************************
-Looks for a free slot for an undo log segment. */
+/****************************************************************//**
+Looks for a free slot for an undo log segment.
+@return slot index or ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
trx_rsegf_undo_find_free(
/*=====================*/
- /* out: slot index or ULINT_UNDEFINED if not
- found */
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- mtr_t* mtr) /* in: mtr */
+ trx_rsegf_t* rsegf, /*!< in: rollback segment header */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint i;
ulint page_no;
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index bad3c9d570c..a53296a06d9 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0sys.h
+Transaction system
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -12,196 +29,207 @@ Created 3/26/1996 Heikki Tuuri
#include "univ.i"
#include "trx0types.h"
+#include "fsp0types.h"
+#include "fil0fil.h"
+#include "buf0buf.h"
+#ifndef UNIV_HOTBACKUP
#include "mtr0mtr.h"
#include "ut0byte.h"
#include "mem0mem.h"
#include "sync0sync.h"
#include "ut0lst.h"
-#include "buf0buf.h"
-#include "fil0fil.h"
#include "read0types.h"
+#include "page0types.h"
-/* In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. We have successfully got the updates to InnoDB
-up to this position. If .._pos is -1, it means no crash recovery was needed,
-or there was no master log position info inside InnoDB. */
-
+/** In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. */
+/* @{ */
+/** Master binlog file name */
extern char trx_sys_mysql_master_log_name[];
-extern ib_longlong trx_sys_mysql_master_log_pos;
+/** Master binlog file position. We have successfully got the updates
+up to this position. -1 means that no crash recovery was needed, or
+there was no master log position info inside InnoDB.*/
+extern ib_int64_t trx_sys_mysql_master_log_pos;
+/* @} */
-/* If this MySQL server uses binary logging, after InnoDB has been inited
+/** If this MySQL server uses binary logging, after InnoDB has been inited
and if it has done a crash recovery, we store the binlog file name and position
-here. If .._pos is -1, it means there was no binlog position info inside
-InnoDB. */
-
+here. */
+/* @{ */
+/** Binlog file name */
extern char trx_sys_mysql_bin_log_name[];
-extern ib_longlong trx_sys_mysql_bin_log_pos;
+/** Binlog file position, or -1 if unknown */
+extern ib_int64_t trx_sys_mysql_bin_log_pos;
+/* @} */
-/* The transaction system */
+/** The transaction system */
extern trx_sys_t* trx_sys;
-/* Doublewrite system */
+/** Doublewrite system */
extern trx_doublewrite_t* trx_doublewrite;
-/* Set to TRUE when the doublewrite buffer is being created */
-extern ibool trx_doublewrite_buf_is_being_created;
+/** The following is set to TRUE when we are upgrading from pre-4.1
+format data files to the multiple tablespaces format data files */
extern ibool trx_doublewrite_must_reset_space_ids;
+/** Set to TRUE when the doublewrite buffer is being created */
+extern ibool trx_doublewrite_buf_is_being_created;
+/** The following is TRUE when we are using the database in the
+post-4.1 format, i.e., we have successfully upgraded, or have created
+a new database installation */
extern ibool trx_sys_multiple_tablespace_format;
-/********************************************************************
+/****************************************************************//**
Creates the doublewrite buffer to a new InnoDB installation. The header of the
doublewrite buffer is placed on the trx system header page. */
-
+UNIV_INTERN
void
trx_sys_create_doublewrite_buf(void);
/*================================*/
-/********************************************************************
+/****************************************************************//**
At a database startup initializes the doublewrite buffer memory structure if
we already have a doublewrite buffer created in the data files. If we are
upgrading to an InnoDB version which supports multiple tablespaces, then this
function performs the necessary update operations. If we are in a crash
recovery, this function uses a possible doublewrite buffer to restore
half-written pages in the data files. */
-
+UNIV_INTERN
void
trx_sys_doublewrite_init_or_restore_pages(
/*======================================*/
- ibool restore_corrupt_pages);
-/********************************************************************
+ ibool restore_corrupt_pages); /*!< in: TRUE=restore pages */
+/****************************************************************//**
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
multiple tablespace format. */
-
+UNIV_INTERN
void
trx_sys_mark_upgraded_to_multiple_tablespaces(void);
/*===============================================*/
-/********************************************************************
-Determines if a page number is located inside the doublewrite buffer. */
-
+/****************************************************************//**
+Determines if a page number is located inside the doublewrite buffer.
+@return TRUE if the location is inside the two blocks of the
+doublewrite buffer */
+UNIV_INTERN
ibool
trx_doublewrite_page_inside(
/*========================*/
- /* out: TRUE if the location is inside
- the two blocks of the doublewrite buffer */
- ulint page_no); /* in: page number */
-/*******************************************************************
-Checks if a page address is the trx sys header page. */
+ ulint page_no); /*!< in: page number */
+/***************************************************************//**
+Checks if a page address is the trx sys header page.
+@return TRUE if trx sys header page */
UNIV_INLINE
ibool
trx_sys_hdr_page(
/*=============*/
- /* out: TRUE if trx sys header page */
- ulint space, /* in: space */
- ulint page_no);/* in: page number */
-/*********************************************************************
+ ulint space, /*!< in: space */
+ ulint page_no);/*!< in: page number */
+/*****************************************************************//**
Creates and initializes the central memory structures for the transaction
system. This is called when the database is started. */
-
+UNIV_INTERN
void
trx_sys_init_at_db_start(void);
/*==========================*/
-/*********************************************************************
+/*****************************************************************//**
Creates and initializes the transaction system at the database creation. */
-
+UNIV_INTERN
void
trx_sys_create(void);
/*================*/
-/********************************************************************
-Looks for a free slot for a rollback segment in the trx system file copy. */
-
+/****************************************************************//**
+Looks for a free slot for a rollback segment in the trx system file copy.
+@return slot index or ULINT_UNDEFINED if not found */
+UNIV_INTERN
ulint
trx_sysf_rseg_find_free(
/*====================*/
- /* out: slot index or ULINT_UNDEFINED
- if not found */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Gets the pointer in the nth slot of the rseg array. */
+ mtr_t* mtr); /*!< in: mtr */
+/***************************************************************//**
+Gets the pointer in the nth slot of the rseg array.
+@return pointer to rseg object, NULL if slot not in use */
UNIV_INLINE
trx_rseg_t*
trx_sys_get_nth_rseg(
/*=================*/
- /* out: pointer to rseg object, NULL if slot
- not in use */
- trx_sys_t* sys, /* in: trx system */
- ulint n); /* in: index of slot */
-/*******************************************************************
+ trx_sys_t* sys, /*!< in: trx system */
+ ulint n); /*!< in: index of slot */
+/***************************************************************//**
Sets the pointer in the nth slot of the rseg array. */
UNIV_INLINE
void
trx_sys_set_nth_rseg(
/*=================*/
- trx_sys_t* sys, /* in: trx system */
- ulint n, /* in: index of slot */
- trx_rseg_t* rseg); /* in: pointer to rseg object, NULL if slot
+ trx_sys_t* sys, /*!< in: trx system */
+ ulint n, /*!< in: index of slot */
+ trx_rseg_t* rseg); /*!< in: pointer to rseg object, NULL if slot
not in use */
-/**************************************************************************
-Gets a pointer to the transaction system file copy and x-locks its page. */
+/**********************************************************************//**
+Gets a pointer to the transaction system file copy and x-locks its page.
+@return pointer to system file copy, page x-locked */
UNIV_INLINE
trx_sysf_t*
trx_sysf_get(
/*=========*/
- /* out: pointer to system file copy, page x-locked */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*****************************************************************//**
Gets the space of the nth rollback segment slot in the trx system
-file copy. */
+file copy.
+@return space id */
UNIV_INLINE
ulint
trx_sysf_rseg_get_space(
/*====================*/
- /* out: space id */
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
+ trx_sysf_t* sys_header, /*!< in: trx sys file copy */
+ ulint i, /*!< in: slot index == rseg id */
+ mtr_t* mtr); /*!< in: mtr */
+/*****************************************************************//**
Gets the page number of the nth rollback segment slot in the trx system
-file copy. */
+file copy.
+@return page number, FIL_NULL if slot unused */
UNIV_INLINE
ulint
trx_sysf_rseg_get_page_no(
/*======================*/
- /* out: page number, FIL_NULL
- if slot unused */
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
+ trx_sysf_t* sys_header, /*!< in: trx sys file copy */
+ ulint i, /*!< in: slot index == rseg id */
+ mtr_t* mtr); /*!< in: mtr */
+/*****************************************************************//**
Sets the space id of the nth rollback segment slot in the trx system
file copy. */
UNIV_INLINE
void
trx_sysf_rseg_set_space(
/*====================*/
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- ulint space, /* in: space id */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
+ trx_sysf_t* sys_header, /*!< in: trx sys file copy */
+ ulint i, /*!< in: slot index == rseg id */
+ ulint space, /*!< in: space id */
+ mtr_t* mtr); /*!< in: mtr */
+/*****************************************************************//**
Sets the page number of the nth rollback segment slot in the trx system
file copy. */
UNIV_INLINE
void
trx_sysf_rseg_set_page_no(
/*======================*/
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- ulint page_no, /* in: page number, FIL_NULL if
+ trx_sysf_t* sys_header, /*!< in: trx sys file copy */
+ ulint i, /*!< in: slot index == rseg id */
+ ulint page_no, /*!< in: page number, FIL_NULL if
the slot is reset to unused */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Allocates a new transaction id. */
+ mtr_t* mtr); /*!< in: mtr */
+/*****************************************************************//**
+Allocates a new transaction id.
+@return new, allocated trx id */
UNIV_INLINE
-dulint
+trx_id_t
trx_sys_get_new_trx_id(void);
/*========================*/
- /* out: new, allocated trx id */
-/*********************************************************************
-Allocates a new transaction number. */
+/*****************************************************************//**
+Allocates a new transaction number.
+@return new, allocated trx number */
UNIV_INLINE
-dulint
+trx_id_t
trx_sys_get_new_trx_no(void);
/*========================*/
- /* out: new, allocated trx number */
-/*********************************************************************
+#endif /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
Writes a trx id to an index page. In case that the id size changes in
some future version, this function should be used instead of
mach_write_... */
@@ -209,93 +237,198 @@ UNIV_INLINE
void
trx_write_trx_id(
/*=============*/
- byte* ptr, /* in: pointer to memory where written */
- dulint id); /* in: id */
-/*********************************************************************
+ byte* ptr, /*!< in: pointer to memory where written */
+ trx_id_t id); /*!< in: id */
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
Reads a trx id from an index page. In case that the id size changes in
some future version, this function should be used instead of
-mach_read_... */
+mach_read_...
+@return id */
UNIV_INLINE
-dulint
+trx_id_t
trx_read_trx_id(
/*============*/
- /* out: id */
- byte* ptr); /* in: pointer to memory from where to read */
-/********************************************************************
-Looks for the trx handle with the given id in trx_list. */
+ const byte* ptr); /*!< in: pointer to memory from where to read */
+/****************************************************************//**
+Looks for the trx handle with the given id in trx_list.
+@return the trx handle or NULL if not found */
UNIV_INLINE
trx_t*
trx_get_on_id(
/*==========*/
- /* out: the trx handle or NULL if not found */
- dulint trx_id); /* in: trx id to search for */
-/********************************************************************
+ trx_id_t trx_id);/*!< in: trx id to search for */
+/****************************************************************//**
Returns the minumum trx id in trx list. This is the smallest id for which
the trx can possibly be active. (But, you must look at the trx->conc_state to
find out if the minimum trx id transaction itself is active, or already
-committed.) */
+committed.)
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
UNIV_INLINE
-dulint
+trx_id_t
trx_list_get_min_trx_id(void);
/*=========================*/
- /* out: the minimum trx id, or trx_sys->max_trx_id
- if the trx list is empty */
-/********************************************************************
-Checks if a transaction with the given id is active. */
+/****************************************************************//**
+Checks if a transaction with the given id is active.
+@return TRUE if active */
UNIV_INLINE
ibool
trx_is_active(
/*==========*/
- /* out: TRUE if active */
- dulint trx_id);/* in: trx id of the transaction */
-/********************************************************************
-Checks that trx is in the trx list. */
-
+ trx_id_t trx_id);/*!< in: trx id of the transaction */
+/****************************************************************//**
+Checks that trx is in the trx list.
+@return TRUE if is in */
+UNIV_INTERN
ibool
trx_in_trx_list(
/*============*/
- /* out: TRUE if is in */
- trx_t* in_trx);/* in: trx */
-/*********************************************************************
+ trx_t* in_trx);/*!< in: trx */
+/*****************************************************************//**
Updates the offset information about the end of the MySQL binlog entry
which corresponds to the transaction just being committed. In a MySQL
replication slave updates the latest master binlog position up to which
replication has proceeded. */
-
+UNIV_INTERN
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
- const char* file_name,/* in: MySQL log file name */
- ib_longlong offset, /* in: position in that log file */
- ulint field, /* in: offset of the MySQL log info field in
+ const char* file_name,/*!< in: MySQL log file name */
+ ib_int64_t offset, /*!< in: position in that log file */
+ ulint field, /*!< in: offset of the MySQL log info field in
the trx sys header */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/*****************************************************************//**
Prints to stderr the MySQL binlog offset info in the trx system header if
the magic number shows it valid. */
-
+UNIV_INTERN
void
trx_sys_print_mysql_binlog_offset(void);
/*===================================*/
-#ifdef UNIV_HOTBACKUP
-/*********************************************************************
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- byte* page); /* in: buffer containing the trx system header page,
- i.e., page number TRX_SYS_PAGE_NO in the tablespace */
-#endif /* UNIV_HOTBACKUP */
-/*********************************************************************
+/*****************************************************************//**
Prints to stderr the MySQL master log offset info in the trx system header if
the magic number shows it valid. */
-
+UNIV_INTERN
void
trx_sys_print_mysql_master_log_pos(void);
/*====================================*/
-
+/*****************************************************************//**
+Initializes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_init(void);
+/*==========================*/
+/*****************************************************************//**
+Closes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_close(void);
+/*===========================*/
+/********************************************************************//**
+Tags the system table space with minimum format id if it has not been
+tagged yet.
+WARNING: This function is only called during the startup and AFTER the
+redo log application during recovery has finished. */
+UNIV_INTERN
+void
+trx_sys_file_format_tag_init(void);
+/*==============================*/
+/*****************************************************************//**
+Shutdown/Close the transaction system. */
+UNIV_INTERN
+void
+trx_sys_close(void);
+/*===============*/
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+ const ulint id); /*!< in: id of the file format */
+/*****************************************************************//**
+Set the file format id unconditionally except if it's already the
+same value.
+@return TRUE if value updated */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_set(
+/*========================*/
+ ulint format_id, /*!< in: file format id */
+ const char** name); /*!< out: max file format name or
+ NULL if not needed. */
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the max format name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_max_get(void);
+/*=============================*/
+/*****************************************************************//**
+Check for the max file format tag stored on disk.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+trx_sys_file_format_max_check(
+/*==========================*/
+ ulint max_format_id); /*!< in: the max format id to check */
+/********************************************************************//**
+Update the file format tag in the system tablespace only if the given
+format id is greater than the known max id.
+@return TRUE if format_id was bigger than the known max id */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_upgrade(
+/*============================*/
+ const char** name, /*!< out: max file format name */
+ ulint format_id); /*!< in: file format identifier */
+#else /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Prints to stderr the MySQL binlog info in the system header if the
+magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset_from_page(
+/*========================================*/
+ const byte* page); /*!< in: buffer containing the trx
+ system header page, i.e., page number
+ TRX_SYS_PAGE_NO in the tablespace */
+/*****************************************************************//**
+Reads the file format id from the first system table space file.
+Even if the call succeeds and returns TRUE, the returned format id
+may be ULINT_UNDEFINED signalling that the format id was not present
+in the data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_file_format_id(
+/*========================*/
+ const char *pathname, /*!< in: pathname of the first system
+ table space file */
+ ulint *format_id); /*!< out: file format of the system table
+ space */
+/*****************************************************************//**
+Reads the file format id from the given per-table data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_pertable_file_format_id(
+/*=================================*/
+ const char *pathname, /*!< in: pathname of a per-table
+ datafile */
+ ulint *format_id); /*!< out: file format of the per-table
+ data file */
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+ const ulint id); /*!< in: id of the file format */
+
+#endif /* !UNIV_HOTBACKUP */
/* The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0
@@ -307,144 +440,182 @@ trx_sys_print_mysql_master_log_pos(void);
/* The offset of the transaction system header on the page */
#define TRX_SYS FSEG_PAGE_DATA
-/* Transaction system header */
-/*-------------------------------------------------------------*/
-#define TRX_SYS_TRX_ID_STORE 0 /* the maximum trx id or trx number
- modulo TRX_SYS_TRX_ID_UPDATE_MARGIN
+/** Transaction system header */
+/*------------------------------------------------------------- @{ */
+#define TRX_SYS_TRX_ID_STORE 0 /*!< the maximum trx id or trx
+ number modulo
+ TRX_SYS_TRX_ID_UPDATE_MARGIN
written to a file page by any
transaction; the assignment of
- transaction ids continues from this
- number rounded up by .._MARGIN plus
- .._MARGIN when the database is
+ transaction ids continues from
+ this number rounded up by
+ TRX_SYS_TRX_ID_UPDATE_MARGIN
+ plus
+ TRX_SYS_TRX_ID_UPDATE_MARGIN
+ when the database is
started */
-#define TRX_SYS_FSEG_HEADER 8 /* segment header for the tablespace
- segment the trx system is created
- into */
+#define TRX_SYS_FSEG_HEADER 8 /*!< segment header for the
+ tablespace segment the trx
+ system is created into */
#define TRX_SYS_RSEGS (8 + FSEG_HEADER_SIZE)
- /* the start of the array of rollback
- segment specification slots */
-/*-------------------------------------------------------------*/
-
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one byte,
-therefore 256; each slot is currently 8 bytes in size */
+ /*!< the start of the array of
+ rollback segment specification
+ slots */
+/*------------------------------------------------------------- @} */
+
+/** Maximum number of rollback segments: the number of segment
+specification slots in the transaction system array; rollback segment
+id must fit in one byte, therefore 256; each slot is currently 8 bytes
+in size */
#define TRX_SYS_N_RSEGS 256
+/** Maximum length of MySQL binlog file name, in bytes.
+@see trx_sys_mysql_master_log_name
+@see trx_sys_mysql_bin_log_name */
#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
+/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
-/* The offset of the MySQL replication info in the trx system header;
+#if UNIV_PAGE_SIZE < 4096
+# error "UNIV_PAGE_SIZE < 4096"
+#endif
+/** The offset of the MySQL replication info in the trx system header;
this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
-/* The offset of the MySQL binlog offset info in the trx system header */
+/** The offset of the MySQL binlog offset info in the trx system header */
#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
-#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /* magic number which shows
+#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /*!< magic number which is
+ TRX_SYS_MYSQL_LOG_MAGIC_N
if we have valid data in the
- MySQL binlog info; the value
- is ..._MAGIC_N if yes */
-#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /* high 4 bytes of the offset
+ MySQL binlog info */
+#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /*!< high 4 bytes of the offset
within that file */
-#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /* low 4 bytes of the offset
+#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /*!< low 4 bytes of the offset
within that file */
-#define TRX_SYS_MYSQL_LOG_NAME 12 /* MySQL log file name */
+#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
-/* The offset of the doublewrite buffer header on the trx system header page */
+#ifndef UNIV_HOTBACKUP
+/** Doublewrite buffer */
+/* @{ */
+/** The offset of the doublewrite buffer header on the trx system header page */
#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
/*-------------------------------------------------------------*/
-#define TRX_SYS_DOUBLEWRITE_FSEG 0 /* fseg header of the fseg
+#define TRX_SYS_DOUBLEWRITE_FSEG 0 /*!< fseg header of the fseg
containing the doublewrite
buffer */
#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE
- /* 4-byte magic number which
+ /*!< 4-byte magic number which
shows if we already have
created the doublewrite
buffer */
#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE)
- /* page number of the
+ /*!< page number of the
first page in the first
sequence of 64
(= FSP_EXTENT_SIZE) consecutive
pages in the doublewrite
buffer */
#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE)
- /* page number of the
+ /*!< page number of the
first page in the second
sequence of 64 consecutive
pages in the doublewrite
buffer */
-#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /* we repeat the above 3
- numbers so that if the trx
- sys header is half-written
- to disk, we still may be able
- to recover the information */
+#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /*!< we repeat
+ TRX_SYS_DOUBLEWRITE_MAGIC,
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
+ TRX_SYS_DOUBLEWRITE_BLOCK2
+ so that if the trx sys
+ header is half-written
+ to disk, we still may
+ be able to recover the
+ information */
+/** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
+we must reset the doublewrite buffer, because starting from 4.1.x the
+space id of a data page is stored into
+FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */
#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
- /* If this is not yet set to
- .._N, we must reset the
- doublewrite buffer, because
- starting from 4.1.x the space
- id of a data page is stored to
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO */
+
/*-------------------------------------------------------------*/
+/** Contents of TRX_SYS_DOUBLEWRITE_MAGIC */
#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855
+/** Contents of TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED */
#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
-
+/** Size of the doublewrite block in pages */
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
-
-/* Doublewrite control struct */
+/* @} */
+
+/** File format tag */
+/* @{ */
+/** The offset of the file format tag on the trx system header page
+(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
+#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
+
+/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
+identifier is added to this constant. */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
+/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
+/* @} */
+
+/** Doublewrite control struct */
struct trx_doublewrite_struct{
- mutex_t mutex; /* mutex protecting the first_free field and
+ mutex_t mutex; /*!< mutex protecting the first_free field and
write_buf */
- ulint block1; /* the page number of the first
+ ulint block1; /*!< the page number of the first
doublewrite block (64 pages) */
- ulint block2; /* page number of the second block */
- ulint first_free; /* first free position in write_buf measured
+ ulint block2; /*!< page number of the second block */
+ ulint first_free; /*!< first free position in write_buf measured
in units of UNIV_PAGE_SIZE */
- byte* write_buf; /* write buffer used in writing to the
+ byte* write_buf; /*!< write buffer used in writing to the
doublewrite buffer, aligned to an
address divisible by UNIV_PAGE_SIZE
(which is required by Windows aio) */
- byte* write_buf_unaligned; /* pointer to write_buf, but unaligned */
- buf_block_t**
- buf_block_arr; /* array to store pointers to the buffer
+ byte* write_buf_unaligned;
+ /*!< pointer to write_buf, but unaligned */
+ buf_page_t**
+ buf_block_arr; /*!< array to store pointers to the buffer
blocks which have been cached to write_buf */
};
-/* The transaction system central memory data structure; protected by the
+/** The transaction system central memory data structure; protected by the
kernel mutex */
struct trx_sys_struct{
- dulint max_trx_id; /* The smallest number not yet
+ trx_id_t max_trx_id; /*!< The smallest number not yet
assigned as a transaction id or
transaction number */
UT_LIST_BASE_NODE_T(trx_t) trx_list;
- /* List of active and committed in
+ /*!< List of active and committed in
memory transactions, sorted on trx id,
biggest first */
UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list;
- /* List of transactions created
+ /*!< List of transactions created
for MySQL */
UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list;
- /* List of rollback segment objects */
- trx_rseg_t* latest_rseg; /* Latest rollback segment in the
+ /*!< List of rollback segment
+ objects */
+ trx_rseg_t* latest_rseg; /*!< Latest rollback segment in the
round-robin assignment of rollback
segments to transactions */
trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
- /* Pointer array to rollback segments;
- NULL if slot not in use */
- ulint rseg_history_len;/* Length of the TRX_RSEG_HISTORY
+ /*!< Pointer array to rollback
+ segments; NULL if slot not in use */
+ ulint rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
list (update undo logs for committed
transactions), protected by
rseg->mutex */
UT_LIST_BASE_NODE_T(read_view_t) view_list;
- /* List of read views sorted on trx no,
- biggest first */
+ /*!< List of read views sorted
+ on trx no, biggest first */
};
-/* When a trx id which is zero modulo this number (which must be a power of
+/** When a trx id which is zero modulo this number (which must be a power of
two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
page is updated */
#define TRX_SYS_TRX_ID_WRITE_MARGIN 256
+#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "trx0sys.ic"
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index 55bcc12a414..820d31d0692 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -1,48 +1,66 @@
-/******************************************************
-Transaction system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0sys.ic
+Transaction system
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-#include "srv0srv.h"
#include "trx0trx.h"
#include "data0type.h"
-#include "mtr0log.h"
+#ifndef UNIV_HOTBACKUP
+# include "srv0srv.h"
+# include "mtr0log.h"
/* The typedef for rseg slot in the file copy */
typedef byte trx_sysf_rseg_t;
/* Rollback segment specification slot offsets */
/*-------------------------------------------------------------*/
-#define TRX_SYS_RSEG_SPACE 0 /* space where the the segment
+#define TRX_SYS_RSEG_SPACE 0 /* space where the segment
header is placed; starting with
MySQL/InnoDB 5.1.7, this is
UNIV_UNDEFINED if the slot is unused */
-#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the the segment
+#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the segment
header is placed; this is FIL_NULL
if the slot is unused */
/*-------------------------------------------------------------*/
/* Size of a rollback segment specification slot */
#define TRX_SYS_RSEG_SLOT_SIZE 8
-/*********************************************************************
+/*****************************************************************//**
Writes the value of max_trx_id to the file based trx system header. */
-
+UNIV_INTERN
void
trx_sys_flush_max_trx_id(void);
/*==========================*/
-/*******************************************************************
-Checks if a page address is the trx sys header page. */
+/***************************************************************//**
+Checks if a page address is the trx sys header page.
+@return TRUE if trx sys header page */
UNIV_INLINE
ibool
trx_sys_hdr_page(
/*=============*/
- /* out: TRUE if trx sys header page */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space */
+ ulint page_no)/*!< in: page number */
{
if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
@@ -52,16 +70,15 @@ trx_sys_hdr_page(
return(FALSE);
}
-/*******************************************************************
-Gets the pointer in the nth slot of the rseg array. */
+/***************************************************************//**
+Gets the pointer in the nth slot of the rseg array.
+@return pointer to rseg object, NULL if slot not in use */
UNIV_INLINE
trx_rseg_t*
trx_sys_get_nth_rseg(
/*=================*/
- /* out: pointer to rseg object, NULL if slot
- not in use */
- trx_sys_t* sys, /* in: trx system */
- ulint n) /* in: index of slot */
+ trx_sys_t* sys, /*!< in: trx system */
+ ulint n) /*!< in: index of slot */
{
ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(n < TRX_SYS_N_RSEGS);
@@ -69,15 +86,15 @@ trx_sys_get_nth_rseg(
return(sys->rseg_array[n]);
}
-/*******************************************************************
+/***************************************************************//**
Sets the pointer in the nth slot of the rseg array. */
UNIV_INLINE
void
trx_sys_set_nth_rseg(
/*=================*/
- trx_sys_t* sys, /* in: trx system */
- ulint n, /* in: index of slot */
- trx_rseg_t* rseg) /* in: pointer to rseg object, NULL if slot
+ trx_sys_t* sys, /*!< in: trx system */
+ ulint n, /*!< in: index of slot */
+ trx_rseg_t* rseg) /*!< in: pointer to rseg object, NULL if slot
not in use */
{
ut_ad(n < TRX_SYS_N_RSEGS);
@@ -85,40 +102,40 @@ trx_sys_set_nth_rseg(
sys->rseg_array[n] = rseg;
}
-/**************************************************************************
-Gets a pointer to the transaction system header and x-latches its page. */
+/**********************************************************************//**
+Gets a pointer to the transaction system header and x-latches its page.
+@return pointer to system header, page x-latched. */
UNIV_INLINE
trx_sysf_t*
trx_sysf_get(
/*=========*/
- /* out: pointer to system header, page x-latched. */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
+ buf_block_t* block;
trx_sysf_t* header;
ut_ad(mtr);
- header = TRX_SYS + buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
- RW_X_LATCH, mtr);
+ block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
+ RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_TRX_SYS_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
+ header = TRX_SYS + buf_block_get_frame(block);
return(header);
}
-/*********************************************************************
+/*****************************************************************//**
Gets the space of the nth rollback segment slot in the trx system
-file copy. */
+file copy.
+@return space id */
UNIV_INLINE
ulint
trx_sysf_rseg_get_space(
/*====================*/
- /* out: space id */
- trx_sysf_t* sys_header, /* in: trx sys header */
- ulint i, /* in: slot index == rseg id */
- mtr_t* mtr) /* in: mtr */
+ trx_sysf_t* sys_header, /*!< in: trx sys header */
+ ulint i, /*!< in: slot index == rseg id */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(sys_header);
@@ -129,18 +146,17 @@ trx_sysf_rseg_get_space(
+ TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
}
-/*********************************************************************
+/*****************************************************************//**
Gets the page number of the nth rollback segment slot in the trx system
-header. */
+header.
+@return page number, FIL_NULL if slot unused */
UNIV_INLINE
ulint
trx_sysf_rseg_get_page_no(
/*======================*/
- /* out: page number, FIL_NULL
- if slot unused */
- trx_sysf_t* sys_header, /* in: trx system header */
- ulint i, /* in: slot index == rseg id */
- mtr_t* mtr) /* in: mtr */
+ trx_sysf_t* sys_header, /*!< in: trx system header */
+ ulint i, /*!< in: slot index == rseg id */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(sys_header);
ut_ad(mutex_own(&(kernel_mutex)));
@@ -151,17 +167,17 @@ trx_sysf_rseg_get_page_no(
+ TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
}
-/*********************************************************************
+/*****************************************************************//**
Sets the space id of the nth rollback segment slot in the trx system
file copy. */
UNIV_INLINE
void
trx_sysf_rseg_set_space(
/*====================*/
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- ulint space, /* in: space id */
- mtr_t* mtr) /* in: mtr */
+ trx_sysf_t* sys_header, /*!< in: trx sys file copy */
+ ulint i, /*!< in: slot index == rseg id */
+ ulint space, /*!< in: space id */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(sys_header);
@@ -174,18 +190,18 @@ trx_sysf_rseg_set_space(
MLOG_4BYTES, mtr);
}
-/*********************************************************************
+/*****************************************************************//**
Sets the page number of the nth rollback segment slot in the trx system
header. */
UNIV_INLINE
void
trx_sysf_rseg_set_page_no(
/*======================*/
- trx_sysf_t* sys_header, /* in: trx sys header */
- ulint i, /* in: slot index == rseg id */
- ulint page_no, /* in: page number, FIL_NULL if the
+ trx_sysf_t* sys_header, /*!< in: trx sys header */
+ ulint i, /*!< in: slot index == rseg id */
+ ulint page_no, /*!< in: page number, FIL_NULL if the
slot is reset to unused */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mutex_own(&(kernel_mutex)));
ut_ad(sys_header);
@@ -197,8 +213,9 @@ trx_sysf_rseg_set_page_no(
page_no,
MLOG_4BYTES, mtr);
}
+#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************
+/*****************************************************************//**
Writes a trx id to an index page. In case that the id size changes in
some future version, this function should be used instead of
mach_write_... */
@@ -206,38 +223,41 @@ UNIV_INLINE
void
trx_write_trx_id(
/*=============*/
- byte* ptr, /* in: pointer to memory where written */
- dulint id) /* in: id */
+ byte* ptr, /*!< in: pointer to memory where written */
+ trx_id_t id) /*!< in: id */
{
- ut_ad(DATA_TRX_ID_LEN == 6);
-
+#if DATA_TRX_ID_LEN != 6
+# error "DATA_TRX_ID_LEN != 6"
+#endif
mach_write_to_6(ptr, id);
}
-/*********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
Reads a trx id from an index page. In case that the id size changes in
some future version, this function should be used instead of
-mach_read_... */
+mach_read_...
+@return id */
UNIV_INLINE
-dulint
+trx_id_t
trx_read_trx_id(
/*============*/
- /* out: id */
- byte* ptr) /* in: pointer to memory from where to read */
+ const byte* ptr) /*!< in: pointer to memory from where to read */
{
- ut_ad(DATA_TRX_ID_LEN == 6);
-
+#if DATA_TRX_ID_LEN != 6
+# error "DATA_TRX_ID_LEN != 6"
+#endif
return(mach_read_from_6(ptr));
}
-/********************************************************************
-Looks for the trx handle with the given id in trx_list. */
+/****************************************************************//**
+Looks for the trx handle with the given id in trx_list.
+@return the trx handle or NULL if not found */
UNIV_INLINE
trx_t*
trx_get_on_id(
/*==========*/
- /* out: the trx handle or NULL if not found */
- dulint trx_id) /* in: trx id to search for */
+ trx_id_t trx_id) /*!< in: trx id to search for */
{
trx_t* trx;
@@ -257,17 +277,16 @@ trx_get_on_id(
return(NULL);
}
-/********************************************************************
+/****************************************************************//**
Returns the minumum trx id in trx list. This is the smallest id for which
the trx can possibly be active. (But, you must look at the trx->conc_state to
find out if the minimum trx id transaction itself is active, or already
-committed.) */
+committed.)
+@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
UNIV_INLINE
-dulint
+trx_id_t
trx_list_get_min_trx_id(void)
/*=========================*/
- /* out: the minimum trx id, or trx_sys->max_trx_id
- if the trx list is empty */
{
trx_t* trx;
@@ -283,14 +302,14 @@ trx_list_get_min_trx_id(void)
return(trx->id);
}
-/********************************************************************
-Checks if a transaction with the given id is active. */
+/****************************************************************//**
+Checks if a transaction with the given id is active.
+@return TRUE if active */
UNIV_INLINE
ibool
trx_is_active(
/*==========*/
- /* out: TRUE if active */
- dulint trx_id) /* in: trx id of the transaction */
+ trx_id_t trx_id) /*!< in: trx id of the transaction */
{
trx_t* trx;
@@ -321,15 +340,15 @@ trx_is_active(
return(FALSE);
}
-/*********************************************************************
-Allocates a new transaction id. */
+/*****************************************************************//**
+Allocates a new transaction id.
+@return new, allocated trx id */
UNIV_INLINE
-dulint
+trx_id_t
trx_sys_get_new_trx_id(void)
/*========================*/
- /* out: new, allocated trx id */
{
- dulint id;
+ trx_id_t id;
ut_ad(mutex_own(&kernel_mutex));
@@ -353,15 +372,16 @@ trx_sys_get_new_trx_id(void)
return(id);
}
-/*********************************************************************
-Allocates a new transaction number. */
+/*****************************************************************//**
+Allocates a new transaction number.
+@return new, allocated trx number */
UNIV_INLINE
-dulint
+trx_id_t
trx_sys_get_new_trx_no(void)
/*========================*/
- /* out: new, allocated trx number */
{
ut_ad(mutex_own(&kernel_mutex));
return(trx_sys_get_new_trx_id());
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index f0833bc6f21..d2a59740c93 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0trx.h
+The transaction
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -11,365 +28,432 @@ Created 3/26/1996 Heikki Tuuri
#include "univ.i"
#include "trx0types.h"
+#include "dict0types.h"
+#ifndef UNIV_HOTBACKUP
#include "lock0types.h"
#include "usr0types.h"
#include "que0types.h"
#include "mem0mem.h"
#include "read0types.h"
-#include "dict0types.h"
#include "trx0xa.h"
+#include "ut0vec.h"
+/** Dummy session used currently in MySQL interface */
+extern sess_t* trx_dummy_sess;
+
+/** Number of transactions currently allocated for MySQL: protected by
+the kernel mutex */
extern ulint trx_n_mysql_transactions;
-/************************************************************************
+/********************************************************************//**
Releases the search latch if trx has reserved it. */
-
+UNIV_INTERN
void
trx_search_latch_release_if_reserved(
/*=================================*/
- trx_t* trx); /* in: transaction */
-/**********************************************************************
+ trx_t* trx); /*!< in: transaction */
+/******************************************************************//**
Set detailed error message for the transaction. */
+UNIV_INTERN
void
trx_set_detailed_error(
/*===================*/
- trx_t* trx, /* in: transaction struct */
- const char* msg); /* in: detailed error message */
-/*****************************************************************
+ trx_t* trx, /*!< in: transaction struct */
+ const char* msg); /*!< in: detailed error message */
+/*************************************************************//**
Set detailed error message for the transaction from a file. Note that the
file is rewinded before reading from it. */
-
+UNIV_INTERN
void
trx_set_detailed_error_from_file(
/*=============================*/
- trx_t* trx, /* in: transaction struct */
- FILE* file); /* in: file to read message from */
-/********************************************************************
-Retrieves the error_info field from a trx. */
-
-void*
+ trx_t* trx, /*!< in: transaction struct */
+ FILE* file); /*!< in: file to read message from */
+/****************************************************************//**
+Retrieves the error_info field from a trx.
+@return the error info */
+UNIV_INLINE
+const dict_index_t*
trx_get_error_info(
/*===============*/
- /* out: the error info */
- trx_t* trx); /* in: trx object */
-/********************************************************************
-Creates and initializes a transaction object. */
-
+ const trx_t* trx); /*!< in: trx object */
+/****************************************************************//**
+Creates and initializes a transaction object.
+@return own: the transaction */
+UNIV_INTERN
trx_t*
trx_create(
/*=======*/
- /* out, own: the transaction */
- sess_t* sess); /* in: session or NULL */
-/************************************************************************
-Creates a transaction object for MySQL. */
-
+ sess_t* sess) /*!< in: session */
+ __attribute__((nonnull));
+/********************************************************************//**
+Creates a transaction object for MySQL.
+@return own: transaction object */
+UNIV_INTERN
trx_t*
trx_allocate_for_mysql(void);
/*========================*/
- /* out, own: transaction object */
-/************************************************************************
-Creates a transaction object for background operations by the master thread. */
-
+/********************************************************************//**
+Creates a transaction object for background operations by the master thread.
+@return own: transaction object */
+UNIV_INTERN
trx_t*
trx_allocate_for_background(void);
/*=============================*/
- /* out, own: transaction object */
-/************************************************************************
+/********************************************************************//**
Frees a transaction object. */
-
+UNIV_INTERN
void
trx_free(
/*=====*/
- trx_t* trx); /* in, own: trx object */
-/************************************************************************
+ trx_t* trx); /*!< in, own: trx object */
+/********************************************************************//**
Frees a transaction object for MySQL. */
-
+UNIV_INTERN
void
trx_free_for_mysql(
/*===============*/
- trx_t* trx); /* in, own: trx object */
-/************************************************************************
+ trx_t* trx); /*!< in, own: trx object */
+/********************************************************************//**
Frees a transaction object of a background operation of the master thread. */
-
+UNIV_INTERN
void
trx_free_for_background(
/*====================*/
- trx_t* trx); /* in, own: trx object */
-/********************************************************************
+ trx_t* trx); /*!< in, own: trx object */
+/****************************************************************//**
Creates trx objects for transactions and initializes the trx list of
trx_sys at database start. Rollback segment and undo log lists must
already exist when this function is called, because the lists of
transactions to be rolled back or cleaned up are built based on the
undo log lists. */
-
+UNIV_INTERN
void
trx_lists_init_at_db_start(void);
/*============================*/
-/********************************************************************
-Starts a new transaction. */
-
+/****************************************************************//**
+Starts a new transaction.
+@return TRUE if success, FALSE if the rollback segment could not
+support this many transactions */
+UNIV_INTERN
ibool
trx_start(
/*======*/
- /* out: TRUE if success, FALSE if the rollback
- segment could not support this many transactions */
- trx_t* trx, /* in: transaction */
- ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
+ trx_t* trx, /*!< in: transaction */
+ ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED
is passed, the system chooses the rollback segment
automatically in a round-robin fashion */
-/********************************************************************
-Starts a new transaction. */
-
+/****************************************************************//**
+Starts a new transaction.
+@return TRUE */
+UNIV_INTERN
ibool
trx_start_low(
/*==========*/
- /* out: TRUE */
- trx_t* trx, /* in: transaction */
- ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
+ trx_t* trx, /*!< in: transaction */
+ ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED
is passed, the system chooses the rollback segment
automatically in a round-robin fashion */
-/*****************************************************************
+/*************************************************************//**
Starts the transaction if it is not yet started. */
UNIV_INLINE
void
trx_start_if_not_started(
/*=====================*/
- trx_t* trx); /* in: transaction */
-/*****************************************************************
+ trx_t* trx); /*!< in: transaction */
+/*************************************************************//**
Starts the transaction if it is not yet started. Assumes we have reserved
the kernel mutex! */
UNIV_INLINE
void
trx_start_if_not_started_low(
/*=========================*/
- trx_t* trx); /* in: transaction */
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-
-void
-trx_start_if_not_started_noninline(
-/*===============================*/
- trx_t* trx); /* in: transaction */
-/********************************************************************
+ trx_t* trx); /*!< in: transaction */
+/****************************************************************//**
Commits a transaction. */
-
+UNIV_INTERN
void
trx_commit_off_kernel(
/*==================*/
- trx_t* trx); /* in: transaction */
-/********************************************************************
+ trx_t* trx); /*!< in: transaction */
+/****************************************************************//**
Cleans up a transaction at database startup. The cleanup is needed if
the transaction already got to the middle of a commit when the database
-crashed, andf we cannot roll it back. */
-
+crashed, and we cannot roll it back. */
+UNIV_INTERN
void
trx_cleanup_at_db_startup(
/*======================*/
- trx_t* trx); /* in: transaction */
-/**************************************************************************
-Does the transaction commit for MySQL. */
-
+ trx_t* trx); /*!< in: transaction */
+/**********************************************************************//**
+Does the transaction commit for MySQL.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
trx_commit_for_mysql(
/*=================*/
- /* out: 0 or error number */
- trx_t* trx); /* in: trx handle */
-/**************************************************************************
-Does the transaction prepare for MySQL. */
-
+ trx_t* trx); /*!< in: trx handle */
+/**********************************************************************//**
+Does the transaction prepare for MySQL.
+@return 0 or error number */
+UNIV_INTERN
ulint
trx_prepare_for_mysql(
/*==================*/
- /* out: 0 or error number */
- trx_t* trx); /* in: trx handle */
-/**************************************************************************
+ trx_t* trx); /*!< in: trx handle */
+/**********************************************************************//**
This function is used to find number of prepared transactions and
-their transaction objects for a recovery. */
-
+their transaction objects for a recovery.
+@return number of prepared transactions */
+UNIV_INTERN
int
trx_recover_for_mysql(
/*==================*/
- /* out: number of prepared transactions */
- XID* xid_list, /* in/out: prepared transactions */
- ulint len); /* in: number of slots in xid_list */
-/***********************************************************************
+ XID* xid_list, /*!< in/out: prepared transactions */
+ ulint len); /*!< in: number of slots in xid_list */
+/*******************************************************************//**
This function is used to find one X/Open XA distributed transaction
-which is in the prepared state */
+which is in the prepared state
+@return trx or NULL */
+UNIV_INTERN
trx_t *
trx_get_trx_by_xid(
/*===============*/
- /* out: trx or NULL */
- XID* xid); /* in: X/Open XA transaction identification */
-/**************************************************************************
+ XID* xid); /*!< in: X/Open XA transaction identification */
+/**********************************************************************//**
If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE. */
-
+with trx->flush_log_later == TRUE.
+@return 0 or error number */
+UNIV_INTERN
ulint
trx_commit_complete_for_mysql(
/*==========================*/
- /* out: 0 or error number */
- trx_t* trx); /* in: trx handle */
-/**************************************************************************
+ trx_t* trx); /*!< in: trx handle */
+/**********************************************************************//**
Marks the latest SQL statement ended. */
-
+UNIV_INTERN
void
trx_mark_sql_stat_end(
/*==================*/
- trx_t* trx); /* in: trx handle */
-/************************************************************************
+ trx_t* trx); /*!< in: trx handle */
+/********************************************************************//**
Assigns a read view for a consistent read query. All the consistent reads
within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-
+when this function is first called for a new started transaction.
+@return consistent read view */
+UNIV_INTERN
read_view_t*
trx_assign_read_view(
/*=================*/
- /* out: consistent read view */
- trx_t* trx); /* in: active transaction */
-/***************************************************************
+ trx_t* trx); /*!< in: active transaction */
+/***********************************************************//**
The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
the TRX_QUE_RUNNING state and releases query threads which were
waiting for a lock in the wait_thrs list. */
-
+UNIV_INTERN
void
trx_end_lock_wait(
/*==============*/
- trx_t* trx); /* in: transaction */
-/********************************************************************
+ trx_t* trx); /*!< in: transaction */
+/****************************************************************//**
Sends a signal to a trx object. */
-
+UNIV_INTERN
void
trx_sig_send(
/*=========*/
- trx_t* trx, /* in: trx handle */
- ulint type, /* in: signal type */
- ulint sender, /* in: TRX_SIG_SELF or
+ trx_t* trx, /*!< in: trx handle */
+ ulint type, /*!< in: signal type */
+ ulint sender, /*!< in: TRX_SIG_SELF or
TRX_SIG_OTHER_SESS */
- que_thr_t* receiver_thr, /* in: query thread which wants the
+ que_thr_t* receiver_thr, /*!< in: query thread which wants the
reply, or NULL; if type is
TRX_SIG_END_WAIT, this must be NULL */
- trx_savept_t* savept, /* in: possible rollback savepoint, or
+ trx_savept_t* savept, /*!< in: possible rollback savepoint, or
NULL */
- que_thr_t** next_thr); /* in/out: next query thread to run;
+ que_thr_t** next_thr); /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
a new query thread; if the parameter
is NULL, it is ignored */
-/********************************************************************
+/****************************************************************//**
Send the reply message when a signal in the queue of the trx has
been handled. */
-
+UNIV_INTERN
void
trx_sig_reply(
/*==========*/
- trx_sig_t* sig, /* in: signal */
- que_thr_t** next_thr); /* in/out: next query thread to run;
+ trx_sig_t* sig, /*!< in: signal */
+ que_thr_t** next_thr); /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
a new query thread */
-/********************************************************************
+/****************************************************************//**
Removes the signal object from a trx signal queue. */
-
+UNIV_INTERN
void
trx_sig_remove(
/*===========*/
- trx_t* trx, /* in: trx handle */
- trx_sig_t* sig); /* in, own: signal */
-/********************************************************************
+ trx_t* trx, /*!< in: trx handle */
+ trx_sig_t* sig); /*!< in, own: signal */
+/****************************************************************//**
Starts handling of a trx signal. */
-
+UNIV_INTERN
void
trx_sig_start_handle(
/*=================*/
- trx_t* trx, /* in: trx handle */
- que_thr_t** next_thr); /* in/out: next query thread to run;
+ trx_t* trx, /*!< in: trx handle */
+ que_thr_t** next_thr); /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
a new query thread */
-/********************************************************************
+/****************************************************************//**
Ends signal handling. If the session is in the error state, and
trx->graph_before_signal_handling != NULL, returns control to the error
handling routine of the graph (currently only returns the control to the
graph root which then sends an error message to the client). */
-
+UNIV_INTERN
void
trx_end_signal_handling(
/*====================*/
- trx_t* trx); /* in: trx */
-/*************************************************************************
-Creates a commit command node struct. */
-
+ trx_t* trx); /*!< in: trx */
+/*********************************************************************//**
+Creates a commit command node struct.
+@return own: commit node struct */
+UNIV_INTERN
commit_node_t*
commit_node_create(
/*===============*/
- /* out, own: commit node struct */
- mem_heap_t* heap); /* in: mem heap where created */
-/***************************************************************
-Performs an execution step for a commit type node in a query graph. */
-
+ mem_heap_t* heap); /*!< in: mem heap where created */
+/***********************************************************//**
+Performs an execution step for a commit type node in a query graph.
+@return query thread to run next, or NULL */
+UNIV_INTERN
que_thr_t*
trx_commit_step(
/*============*/
- /* out: query thread to run next, or NULL */
- que_thr_t* thr); /* in: query thread */
+ que_thr_t* thr); /*!< in: query thread */
-/**************************************************************************
+/**********************************************************************//**
Prints info about a transaction to the given file. The caller must own the
kernel mutex and must have called
innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL
or InnoDB cannot meanwhile change the info printed here. */
-
+UNIV_INTERN
void
trx_print(
/*======*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction */
- ulint max_query_len); /* in: max query length to print, or 0 to
+ FILE* f, /*!< in: output stream */
+ trx_t* trx, /*!< in: transaction */
+ ulint max_query_len); /*!< in: max query length to print, or 0 to
use the default max length */
-#ifndef UNIV_HOTBACKUP
-/**************************************************************************
-Determines if the currently running transaction has been interrupted. */
+/** Type of data dictionary operation */
+enum trx_dict_op {
+ /** The transaction is not modifying the data dictionary. */
+ TRX_DICT_OP_NONE = 0,
+ /** The transaction is creating a table or an index, or
+ dropping a table. The table must be dropped in crash
+ recovery. This and TRX_DICT_OP_NONE are the only possible
+ operation modes in crash recovery. */
+ TRX_DICT_OP_TABLE = 1,
+ /** The transaction is creating or dropping an index in an
+ existing table. In crash recovery, the data dictionary
+ must be locked, but the table must not be dropped. */
+ TRX_DICT_OP_INDEX = 2
+};
+/**********************************************************************//**
+Determine if a transaction is a dictionary operation.
+@return dictionary operation mode */
+UNIV_INLINE
+enum trx_dict_op
+trx_get_dict_operation(
+/*===================*/
+ const trx_t* trx) /*!< in: transaction */
+ __attribute__((pure));
+/**********************************************************************//**
+Flag a transaction a dictionary operation. */
+UNIV_INLINE
+void
+trx_set_dict_operation(
+/*===================*/
+ trx_t* trx, /*!< in/out: transaction */
+ enum trx_dict_op op); /*!< in: operation, not
+ TRX_DICT_OP_NONE */
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determines if the currently running transaction has been interrupted.
+@return TRUE if interrupted */
+UNIV_INTERN
ibool
trx_is_interrupted(
/*===============*/
- /* out: TRUE if interrupted */
- trx_t* trx); /* in: transaction */
+ trx_t* trx); /*!< in: transaction */
#else /* !UNIV_HOTBACKUP */
#define trx_is_interrupted(trx) FALSE
#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
-Compares the "weight" (or size) of two transactions. The weight of one
-transaction is estimated as the number of altered rows + the number of
-locked rows. Transactions that have edited non-transactional tables are
-considered heavier than ones that have not. */
-
+/*******************************************************************//**
+Calculates the "weight" of a transaction. The weight of one transaction
+is estimated as the number of altered rows + the number of locked rows.
+@param t transaction
+@return transaction weight */
+#define TRX_WEIGHT(t) \
+ ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))
+
+/*******************************************************************//**
+Compares the "weight" (or size) of two transactions. Transactions that
+have edited non-transactional tables are considered heavier than ones
+that have not.
+@return <0, 0 or >0; similar to strcmp(3) */
+UNIV_INTERN
int
trx_weight_cmp(
/*===========*/
- /* out: <0, 0 or >0; similar to strcmp(3) */
- trx_t* a, /* in: the first transaction to be compared */
- trx_t* b); /* in: the second transaction to be compared */
+ const trx_t* a, /*!< in: the first transaction to be compared */
+ const trx_t* b); /*!< in: the second transaction to be compared */
+
+/*******************************************************************//**
+Retrieves transacion's id, represented as unsigned long long.
+@return transaction's id */
+UNIV_INLINE
+ullint
+trx_get_id(
+/*=======*/
+ const trx_t* trx); /*!< in: transaction */
+
+/* Maximum length of a string that can be returned by
+trx_get_que_state_str(). */
+#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */
+
+/*******************************************************************//**
+Retrieves transaction's que state in a human readable string. The string
+should not be free()'d or modified.
+@return string in the data segment */
+UNIV_INLINE
+const char*
+trx_get_que_state_str(
+/*==================*/
+ const trx_t* trx); /*!< in: transaction */
/* Signal to a transaction */
struct trx_sig_struct{
- ulint type; /* signal type */
- ulint sender; /* TRX_SIG_SELF or
+ unsigned type:3; /*!< signal type */
+ unsigned sender:1; /*!< TRX_SIG_SELF or
TRX_SIG_OTHER_SESS */
- que_thr_t* receiver; /* non-NULL if the sender of the signal
+ que_thr_t* receiver; /*!< non-NULL if the sender of the signal
wants reply after the operation induced
by the signal is completed */
- trx_savept_t savept; /* possible rollback savepoint */
+ trx_savept_t savept; /*!< possible rollback savepoint */
UT_LIST_NODE_T(trx_sig_t)
- signals; /* queue of pending signals to the
+ signals; /*!< queue of pending signals to the
transaction */
UT_LIST_NODE_T(trx_sig_t)
- reply_signals; /* list of signals for which the sender
+ reply_signals; /*!< list of signals for which the sender
transaction is waiting a reply */
};
@@ -383,65 +467,82 @@ struct trx_struct{
ulint magic_n;
/* All the next fields are protected by the kernel mutex, except the
undo logs which are protected by undo_mutex */
- const char* op_info; /* English text describing the
+ const char* op_info; /*!< English text describing the
current operation, or an empty
string */
- unsigned is_purge:1; /* 0=user transaction, 1=purge */
- ulint conc_state; /* state of the trx from the point
+ unsigned is_purge:1; /*!< 0=user transaction, 1=purge */
+ unsigned is_recovered:1; /*!< 0=normal transaction,
+ 1=recovered, must be rolled back */
+ unsigned conc_state:2; /*!< state of the trx from the point
of view of concurrency control:
TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
... */
- time_t start_time; /* time the trx object was created
- or the state last time became
- TRX_ACTIVE */
- ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
- ibool check_foreigns; /* normally TRUE, but if the user
+ unsigned que_state:2; /*!< valid when conc_state == TRX_ACTIVE:
+ TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
+ ... */
+ unsigned isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */
+ unsigned check_foreigns:1;/* normally TRUE, but if the user
wants to suppress foreign key checks,
(in table imports, for example) we
set this FALSE */
- ibool check_unique_secondary;
+ unsigned check_unique_secondary:1;
/* normally TRUE, but if the user
wants to speed up inserts by
suppressing unique key checks
for secondary indexes when we decide
if we can use the insert buffer for
them, we set this FALSE */
- dulint id; /* transaction id */
- XID xid; /* X/Open XA transaction
- identification to identify a
- transaction branch */
- ibool support_xa; /* normally we do the XA two-phase
+ unsigned support_xa:1; /*!< normally we do the XA two-phase
commit steps, but by setting this to
FALSE, one can save CPU time and about
150 bytes in the undo log size as then
we skip XA steps */
- dulint no; /* transaction serialization number ==
- max trx id when the transaction is
- moved to COMMITTED_IN_MEMORY state */
- ibool flush_log_later;/* when we commit the transaction
- in MySQL's binlog write, we will
- flush the log to disk later in
- a separate call */
- ibool must_flush_log_later;/* this flag is set to TRUE in
+ unsigned flush_log_later:1;/* In 2PC, we hold the
+ prepare_commit mutex across
+ both phases. In that case, we
+ defer flush of the logs to disk
+ until after we release the
+ mutex. */
+ unsigned must_flush_log_later:1;/* this flag is set to TRUE in
trx_commit_off_kernel() if
flush_log_later was TRUE, and there
were modifications by the transaction;
in that case we must flush the log
in trx_commit_complete_for_mysql() */
- dulint commit_lsn; /* lsn at the time of the commit */
- ibool dict_operation; /* TRUE if the trx is used to create
- a table, create an index, or drop a
- table. This is a hint that the table
- may need to be dropped in crash
- recovery. */
- dulint table_id; /* table id if the preceding field is
- TRUE */
- /*------------------------------*/
- unsigned duplicates:2; /* TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- unsigned active_trans:2; /* 1 - if a transaction in MySQL
+ unsigned dict_operation:2;/**< @see enum trx_dict_op */
+ unsigned duplicates:2; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
+ unsigned active_trans:2; /*!< 1 - if a transaction in MySQL
is active. 2 - if prepare_commit_mutex
was taken */
- void* mysql_thd; /* MySQL thread handle corresponding
+ unsigned has_search_latch:1;
+ /* TRUE if this trx has latched the
+ search system latch in S-mode */
+ unsigned declared_to_be_inside_innodb:1;
+ /* this is TRUE if we have declared
+ this transaction in
+ srv_conc_enter_innodb to be inside the
+ InnoDB engine */
+ unsigned handling_signals:1;/* this is TRUE as long as the trx
+ is handling signals */
+ unsigned dict_operation_lock_mode:2;
+ /* 0, RW_S_LATCH, or RW_X_LATCH:
+ the latch mode trx currently holds
+ on dict_operation_lock */
+ time_t start_time; /*!< time the trx object was created
+ or the state last time became
+ TRX_ACTIVE */
+ trx_id_t id; /*!< transaction id */
+ XID xid; /*!< X/Open XA transaction
+ identification to identify a
+ transaction branch */
+ trx_id_t no; /*!< transaction serialization number ==
+ max trx id when the transaction is
+ moved to COMMITTED_IN_MEMORY state */
+ ib_uint64_t commit_lsn; /*!< lsn at the time of the commit */
+ trx_id_t table_id; /*!< Table to drop iff dict_operation
+ is TRUE, or ut_dulint_zero. */
+ /*------------------------------*/
+ void* mysql_thd; /*!< MySQL thread handle corresponding
to this trx, or NULL */
char** mysql_query_str;/* pointer to the field in mysqld_thd
which contains the pointer to the
@@ -451,7 +552,7 @@ struct trx_struct{
contains a pointer to the latest file
name; this is NULL if binlog is not
used */
- ib_longlong mysql_log_offset;/* if MySQL binlog is used, this field
+ ib_int64_t mysql_log_offset;/* if MySQL binlog is used, this field
contains the end offset of the binlog
entry */
os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
@@ -467,13 +568,6 @@ struct trx_struct{
/* how many tables the current SQL
statement uses, except those
in consistent read */
- ibool dict_operation_lock_mode;
- /* 0, RW_S_LATCH, or RW_X_LATCH:
- the latch mode trx currently holds
- on dict_operation_lock */
- ibool has_search_latch;
- /* TRUE if this trx has latched the
- search system latch in S-mode */
ulint search_latch_timeout;
/* If we notice that someone is
waiting for our S-lock on the search
@@ -485,64 +579,55 @@ struct trx_struct{
to reduce contention on the search
latch */
/*------------------------------*/
- ibool declared_to_be_inside_innodb;
- /* this is TRUE if we have declared
- this transaction in
- srv_conc_enter_innodb to be inside the
- InnoDB engine */
ulint n_tickets_to_enter_innodb;
/* this can be > 0 only when
declared_to_... is TRUE; when we come
to srv_conc_innodb_enter, if the value
here is > 0, we decrement this by 1 */
/*------------------------------*/
- lock_t* auto_inc_lock; /* possible auto-inc lock reserved by
- the transaction; note that it is also
- in the lock list trx_locks */
UT_LIST_NODE_T(trx_t)
- trx_list; /* list of transactions */
+ trx_list; /*!< list of transactions */
UT_LIST_NODE_T(trx_t)
- mysql_trx_list; /* list of transactions created for
+ mysql_trx_list; /*!< list of transactions created for
MySQL */
/*------------------------------*/
- ulint error_state; /* 0 if no error, otherwise error
+ ulint error_state; /*!< 0 if no error, otherwise error
number; NOTE That ONLY the thread
doing the transaction is allowed to
set this field: this is NOT protected
by the kernel mutex */
- void* error_info; /* if the error number indicates a
+ const dict_index_t*error_info; /*!< if the error number indicates a
duplicate key error, a pointer to
the problematic index is stored here */
- sess_t* sess; /* session of the trx, NULL if none */
- ulint que_state; /* TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
- ... */
- que_t* graph; /* query currently run in the session,
+ ulint error_key_num; /*!< if the index creation fails to a
+ duplicate key error, a mysql key
+ number of that index is stored here */
+ sess_t* sess; /*!< session of the trx, NULL if none */
+ que_t* graph; /*!< query currently run in the session,
or NULL if none; NOTE that the query
belongs to the session, and it can
survive over a transaction commit, if
it is a stored procedure with a COMMIT
WORK statement, for instance */
- ulint n_active_thrs; /* number of active query threads */
- ibool handling_signals;/* this is TRUE as long as the trx
- is handling signals */
+ ulint n_active_thrs; /*!< number of active query threads */
que_t* graph_before_signal_handling;
/* value of graph when signal handling
for this trx started: this is used to
return control to the original query
graph for error processing */
- trx_sig_t sig; /* one signal object can be allocated
+ trx_sig_t sig; /*!< one signal object can be allocated
in this space, avoiding mem_alloc */
UT_LIST_BASE_NODE_T(trx_sig_t)
- signals; /* queue of processed or pending
+ signals; /*!< queue of processed or pending
signals to the trx */
UT_LIST_BASE_NODE_T(trx_sig_t)
- reply_signals; /* list of signals sent by the query
+ reply_signals; /*!< list of signals sent by the query
threads of this trx for which a thread
is waiting for a reply; if this trx is
killed, the reply requests in the list
must be canceled */
/*------------------------------*/
- lock_t* wait_lock; /* if trx execution state is
+ lock_t* wait_lock; /*!< if trx execution state is
TRX_QUE_LOCK_WAIT, this points to
the lock request, otherwise this is
NULL */
@@ -552,18 +637,21 @@ struct trx_struct{
if another transaction chooses this
transaction as a victim in deadlock
resolution, it sets this to TRUE */
- time_t wait_started; /* lock wait started at this time */
+ time_t wait_started; /*!< lock wait started at this time */
UT_LIST_BASE_NODE_T(que_thr_t)
- wait_thrs; /* query threads belonging to this
+ wait_thrs; /*!< query threads belonging to this
trx that are in the QUE_THR_LOCK_WAIT
state */
- ulint deadlock_mark; /* a mark field used in deadlock
- checking algorithm */
+ ulint deadlock_mark; /*!< a mark field used in deadlock
+ checking algorithm. This must be
+ in its own machine word, because
+ it can be changed by other
+ threads while holding kernel_mutex. */
/*------------------------------*/
- mem_heap_t* lock_heap; /* memory heap for the locks of the
+ mem_heap_t* lock_heap; /*!< memory heap for the locks of the
transaction */
UT_LIST_BASE_NODE_T(lock_t)
- trx_locks; /* locks reserved by the transaction */
+ trx_locks; /*!< locks reserved by the transaction */
/*------------------------------*/
mem_heap_t* global_read_view_heap;
/* memory heap for the global read
@@ -571,7 +659,7 @@ struct trx_struct{
read_view_t* global_read_view;
/* consistent read view associated
to a transaction or NULL */
- read_view_t* read_view; /* consistent read view used in the
+ read_view_t* read_view; /*!< consistent read view used in the
transaction or NULL, this read view
if defined can be normal read view
associated to a transaction (i.e.
@@ -579,16 +667,16 @@ struct trx_struct{
associated to a cursor */
/*------------------------------*/
UT_LIST_BASE_NODE_T(trx_named_savept_t)
- trx_savepoints; /* savepoints set with SAVEPOINT ...,
+ trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
oldest first */
/*------------------------------*/
- mutex_t undo_mutex; /* mutex protecting the fields in this
+ mutex_t undo_mutex; /*!< mutex protecting the fields in this
section (down to undo_no_arr), EXCEPT
last_sql_stat_start, which can be
accessed only when we know that there
cannot be any activity in the undo
logs! */
- dulint undo_no; /* next undo log record number to
+ undo_no_t undo_no; /*!< next undo log record number to
assign; since the undo log is
private for a transaction, this
is a simple ascending sequence
@@ -600,25 +688,31 @@ struct trx_struct{
was started: in case of an error, trx
is rolled back down to this undo
number; see note at undo_mutex! */
- trx_rseg_t* rseg; /* rollback segment assigned to the
+ trx_rseg_t* rseg; /*!< rollback segment assigned to the
transaction, or NULL if not assigned
yet */
- trx_undo_t* insert_undo; /* pointer to the insert undo log, or
+ trx_undo_t* insert_undo; /*!< pointer to the insert undo log, or
NULL if no inserts performed yet */
- trx_undo_t* update_undo; /* pointer to the update undo log, or
+ trx_undo_t* update_undo; /*!< pointer to the update undo log, or
NULL if no update performed yet */
- dulint roll_limit; /* least undo number to undo during
+ undo_no_t roll_limit; /*!< least undo number to undo during
a rollback */
- ulint pages_undone; /* number of undo log pages undone
+ ulint pages_undone; /*!< number of undo log pages undone
since the last undo log truncation */
- trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log
+ trx_undo_arr_t* undo_no_arr; /*!< array of undo numbers of undo log
records which are currently processed
by a rollback operation */
- ulint n_autoinc_rows; /* no. of AUTO-INC rows required for
+ /*------------------------------*/
+ ulint n_autoinc_rows; /*!< no. of AUTO-INC rows required for
an SQL statement. This is useful for
multi-row INSERTs */
+ ib_vector_t* autoinc_locks; /* AUTOINC locks held by this
+ transaction. Note that these are
+ also in the lock list trx_locks. This
+ vector needs to be freed explicitly
+ when the trx_t instance is desrtoyed */
/*------------------------------*/
- char detailed_error[256]; /* detailed error message for last
+ char detailed_error[256]; /*!< detailed error message for last
error, or empty. */
};
@@ -628,19 +722,19 @@ struct trx_struct{
transaction, e.g., a parallel
query */
/* Transaction concurrency states (trx->conc_state) */
-#define TRX_NOT_STARTED 1
-#define TRX_ACTIVE 2
-#define TRX_COMMITTED_IN_MEMORY 3
-#define TRX_PREPARED 4 /* Support for 2PC/XA */
+#define TRX_NOT_STARTED 0
+#define TRX_ACTIVE 1
+#define TRX_COMMITTED_IN_MEMORY 2
+#define TRX_PREPARED 3 /* Support for 2PC/XA */
/* Transaction execution states when trx->conc_state == TRX_ACTIVE */
-#define TRX_QUE_RUNNING 1 /* transaction is running */
-#define TRX_QUE_LOCK_WAIT 2 /* transaction is waiting for a lock */
-#define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */
-#define TRX_QUE_COMMITTING 4 /* transaction is committing */
+#define TRX_QUE_RUNNING 0 /* transaction is running */
+#define TRX_QUE_LOCK_WAIT 1 /* transaction is waiting for a lock */
+#define TRX_QUE_ROLLING_BACK 2 /* transaction is rolling back */
+#define TRX_QUE_COMMITTING 3 /* transaction is committing */
/* Transaction isolation levels (trx->isolation_level) */
-#define TRX_ISO_READ_UNCOMMITTED 1 /* dirty read: non-locking
+#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking
SELECTs are performed so that
we do not look at a possible
earlier version of a record;
@@ -649,7 +743,7 @@ struct trx_struct{
level; otherwise like level
2 */
-#define TRX_ISO_READ_COMMITTED 2 /* somewhat Oracle-like
+#define TRX_ISO_READ_COMMITTED 1 /* somewhat Oracle-like
isolation, except that in
range UPDATE and DELETE we
must block phantom rows
@@ -662,7 +756,7 @@ struct trx_struct{
each consistent read reads its
own snapshot */
-#define TRX_ISO_REPEATABLE_READ 3 /* this is the default;
+#define TRX_ISO_REPEATABLE_READ 2 /* this is the default;
all consistent reads in the
same trx read the same
snapshot;
@@ -670,7 +764,7 @@ struct trx_struct{
in locking reads to block
insertions into gaps */
-#define TRX_ISO_SERIALIZABLE 4 /* all plain SELECTs are
+#define TRX_ISO_SERIALIZABLE 3 /* all plain SELECTs are
converted to LOCK IN SHARE
MODE reads */
@@ -681,7 +775,7 @@ Multiple flags can be combined with bitwise OR. */
/* Types of a trx signal */
-#define TRX_SIG_NO_SIGNAL 100
+#define TRX_SIG_NO_SIGNAL 0
#define TRX_SIG_TOTAL_ROLLBACK 1
#define TRX_SIG_ROLLBACK_TO_SAVEPT 2
#define TRX_SIG_COMMIT 3
@@ -689,25 +783,32 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_BREAK_EXECUTION 5
/* Sender types of a signal */
-#define TRX_SIG_SELF 1 /* sent by the session itself, or
+#define TRX_SIG_SELF 0 /* sent by the session itself, or
by an error occurring within this
session */
-#define TRX_SIG_OTHER_SESS 2 /* sent by another session (which
+#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
must hold rights to this) */
-/* Commit command node in a query graph */
+/** Commit node states */
+enum commit_node_state {
+ COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
+ the transaction */
+ COMMIT_NODE_WAIT /*!< commit signal sent to the transaction,
+ waiting for completion */
+};
+
+/** Commit command node in a query graph */
struct commit_node_struct{
- que_common_t common; /* node type: QUE_NODE_COMMIT */
- ulint state; /* node execution state */
+ que_common_t common; /*!< node type: QUE_NODE_COMMIT */
+ enum commit_node_state
+ state; /*!< node execution state */
};
-/* Commit node states */
-#define COMMIT_NODE_SEND 1
-#define COMMIT_NODE_WAIT 2
#ifndef UNIV_NONINL
#include "trx0trx.ic"
#endif
+#endif /* !UNIV_HOTBACKUP */
#endif
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index 09b2f822ff7..7332eeece85 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -1,18 +1,35 @@
-/******************************************************
-The transaction
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0trx.ic
+The transaction
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-/*****************************************************************
+/*************************************************************//**
Starts the transaction if it is not yet started. */
UNIV_INLINE
void
trx_start_if_not_started(
/*=====================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
@@ -22,14 +39,14 @@ trx_start_if_not_started(
}
}
-/*****************************************************************
+/*************************************************************//**
Starts the transaction if it is not yet started. Assumes we have reserved
the kernel mutex! */
UNIV_INLINE
void
trx_start_if_not_started_low(
/*=========================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
@@ -38,3 +55,110 @@ trx_start_if_not_started_low(
trx_start_low(trx, ULINT_UNDEFINED);
}
}
+
+/****************************************************************//**
+Retrieves the error_info field from a trx.
+@return the error info */
+UNIV_INLINE
+const dict_index_t*
+trx_get_error_info(
+/*===============*/
+ const trx_t* trx) /*!< in: trx object */
+{
+ return(trx->error_info);
+}
+
+/*******************************************************************//**
+Retrieves transacion's id, represented as unsigned long long.
+@return transaction's id */
+UNIV_INLINE
+ullint
+trx_get_id(
+/*=======*/
+ const trx_t* trx) /*!< in: transaction */
+{
+ return((ullint)ut_conv_dulint_to_longlong(trx->id));
+}
+
+/*******************************************************************//**
+Retrieves transaction's que state in a human readable string. The string
+should not be free()'d or modified.
+@return string in the data segment */
+UNIV_INLINE
+const char*
+trx_get_que_state_str(
+/*==================*/
+ const trx_t* trx) /*!< in: transaction */
+{
+ /* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */
+ switch (trx->que_state) {
+ case TRX_QUE_RUNNING:
+ return("RUNNING");
+ case TRX_QUE_LOCK_WAIT:
+ return("LOCK WAIT");
+ case TRX_QUE_ROLLING_BACK:
+ return("ROLLING BACK");
+ case TRX_QUE_COMMITTING:
+ return("COMMITTING");
+ default:
+ return("UNKNOWN");
+ }
+}
+
+/**********************************************************************//**
+Determine if a transaction is a dictionary operation.
+@return dictionary operation mode */
+UNIV_INLINE
+enum trx_dict_op
+trx_get_dict_operation(
+/*===================*/
+ const trx_t* trx) /*!< in: transaction */
+{
+ enum trx_dict_op op = (enum trx_dict_op) trx->dict_operation;
+
+#ifdef UNIV_DEBUG
+ switch (op) {
+ case TRX_DICT_OP_NONE:
+ case TRX_DICT_OP_TABLE:
+ case TRX_DICT_OP_INDEX:
+ return(op);
+ }
+ ut_error;
+#endif /* UNIV_DEBUG */
+ return((enum trx_dict_op) UNIV_EXPECT(op, TRX_DICT_OP_NONE));
+}
+/**********************************************************************//**
+Flag a transaction a dictionary operation. */
+UNIV_INLINE
+void
+trx_set_dict_operation(
+/*===================*/
+ trx_t* trx, /*!< in/out: transaction */
+ enum trx_dict_op op) /*!< in: operation, not
+ TRX_DICT_OP_NONE */
+{
+#ifdef UNIV_DEBUG
+ enum trx_dict_op old_op = trx_get_dict_operation(trx);
+
+ switch (op) {
+ case TRX_DICT_OP_NONE:
+ ut_error;
+ break;
+ case TRX_DICT_OP_TABLE:
+ switch (old_op) {
+ case TRX_DICT_OP_NONE:
+ case TRX_DICT_OP_INDEX:
+ case TRX_DICT_OP_TABLE:
+ goto ok;
+ }
+ ut_error;
+ break;
+ case TRX_DICT_OP_INDEX:
+ ut_ad(old_op == TRX_DICT_OP_NONE);
+ break;
+ }
+ok:
+#endif /* UNIV_DEBUG */
+
+ trx->dict_operation = op;
+}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index 0e6ee79498c..24cf57d53d5 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction system global type definitions
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0types.h
+Transaction system global type definitions
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -9,37 +26,83 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0types_h
#define trx0types_h
-#include "lock0types.h"
#include "ut0byte.h"
-/* Memory objects */
+/** prepare trx_t::id for being printed via printf(3) */
+#define TRX_ID_PREP_PRINTF(id) (ullint) ut_conv_dulint_to_longlong(id)
+
+/** printf(3) format used for printing TRX_ID_PRINTF_PREP() */
+#define TRX_ID_FMT "%llX"
+
+/** maximum length that a formatted trx_t::id could take, not including
+the terminating NUL character. */
+#define TRX_ID_MAX_LEN 17
+
+/** Memory objects */
+/* @{ */
+/** Transaction */
typedef struct trx_struct trx_t;
+/** Transaction system */
typedef struct trx_sys_struct trx_sys_t;
+/** Doublewrite information */
typedef struct trx_doublewrite_struct trx_doublewrite_t;
+/** Signal */
typedef struct trx_sig_struct trx_sig_t;
+/** Rollback segment */
typedef struct trx_rseg_struct trx_rseg_t;
+/** Transaction undo log */
typedef struct trx_undo_struct trx_undo_t;
+/** Array of undo numbers of undo records being rolled back or purged */
typedef struct trx_undo_arr_struct trx_undo_arr_t;
+/** A cell of trx_undo_arr_t */
typedef struct trx_undo_inf_struct trx_undo_inf_t;
+/** The control structure used in the purge operation */
typedef struct trx_purge_struct trx_purge_t;
+/** Rollback command node in a query graph */
typedef struct roll_node_struct roll_node_t;
+/** Commit command node in a query graph */
typedef struct commit_node_struct commit_node_t;
+/** SAVEPOINT command node in a query graph */
typedef struct trx_named_savept_struct trx_named_savept_t;
+/* @} */
+
+/** Rollback contexts */
+enum trx_rb_ctx {
+ RB_NONE = 0, /*!< no rollback */
+ RB_NORMAL, /*!< normal rollback */
+ RB_RECOVERY /*!< rolling back an incomplete transaction,
+ in crash recovery */
+};
+
+/** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
+typedef dulint trx_id_t;
+/** Rollback pointer (DB_ROLL_PTR, DATA_ROLL_PTR) */
+typedef dulint roll_ptr_t;
+/** Undo number */
+typedef dulint undo_no_t;
-/* Transaction savepoint */
+/** Transaction savepoint */
typedef struct trx_savept_struct trx_savept_t;
+/** Transaction savepoint */
struct trx_savept_struct{
- dulint least_undo_no; /* least undo number to undo */
+ undo_no_t least_undo_no; /*!< least undo number to undo */
};
-/* File objects */
+/** File objects */
+/* @{ */
+/** Transaction system header */
typedef byte trx_sysf_t;
+/** Rollback segment header */
typedef byte trx_rsegf_t;
+/** Undo segment header */
typedef byte trx_usegf_t;
+/** Undo log header */
typedef byte trx_ulogf_t;
+/** Undo log page header */
typedef byte trx_upagef_t;
-/* Undo log record */
+/** Undo log record */
typedef byte trx_undo_rec_t;
+/* @} */
#endif
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index 7f10e407746..a084f2394b5 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction undo log
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0undo.h
+Transaction undo log
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -16,37 +33,40 @@ Created 3/26/1996 Heikki Tuuri
#include "page0types.h"
#include "trx0xa.h"
-/***************************************************************************
-Builds a roll pointer dulint. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Builds a roll pointer.
+@return roll pointer */
UNIV_INLINE
-dulint
+roll_ptr_t
trx_undo_build_roll_ptr(
/*====================*/
- /* out: roll pointer */
- ibool is_insert, /* in: TRUE if insert undo log */
- ulint rseg_id, /* in: rollback segment id */
- ulint page_no, /* in: page number */
- ulint offset); /* in: offset of the undo entry within page */
-/***************************************************************************
-Decodes a roll pointer dulint. */
+ ibool is_insert, /*!< in: TRUE if insert undo log */
+ ulint rseg_id, /*!< in: rollback segment id */
+ ulint page_no, /*!< in: page number */
+ ulint offset); /*!< in: offset of the undo entry within page */
+/***********************************************************************//**
+Decodes a roll pointer. */
UNIV_INLINE
void
trx_undo_decode_roll_ptr(
/*=====================*/
- dulint roll_ptr, /* in: roll pointer */
- ibool* is_insert, /* out: TRUE if insert undo log */
- ulint* rseg_id, /* out: rollback segment id */
- ulint* page_no, /* out: page number */
- ulint* offset); /* out: offset of the undo entry within page */
-/***************************************************************************
-Returns TRUE if the roll pointer is of the insert type. */
+ roll_ptr_t roll_ptr, /*!< in: roll pointer */
+ ibool* is_insert, /*!< out: TRUE if insert undo log */
+ ulint* rseg_id, /*!< out: rollback segment id */
+ ulint* page_no, /*!< out: page number */
+ ulint* offset); /*!< out: offset of the undo
+ entry within page */
+/***********************************************************************//**
+Returns TRUE if the roll pointer is of the insert type.
+@return TRUE if insert undo log */
UNIV_INLINE
ibool
trx_undo_roll_ptr_is_insert(
/*========================*/
- /* out: TRUE if insert undo log */
- dulint roll_ptr); /* in: roll pointer */
-/*********************************************************************
+ roll_ptr_t roll_ptr); /*!< in: roll pointer */
+#endif /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
Writes a roll ptr to an index page. In case that the size changes in
some future version, this function should be used instead of
mach_write_... */
@@ -54,263 +74,272 @@ UNIV_INLINE
void
trx_write_roll_ptr(
/*===============*/
- byte* ptr, /* in: pointer to memory where written */
- dulint roll_ptr); /* in: roll ptr */
-/*********************************************************************
+ byte* ptr, /*!< in: pointer to memory where
+ written */
+ roll_ptr_t roll_ptr); /*!< in: roll ptr */
+/*****************************************************************//**
Reads a roll ptr from an index page. In case that the roll ptr size
changes in some future version, this function should be used instead of
-mach_read_... */
+mach_read_...
+@return roll ptr */
UNIV_INLINE
-dulint
+roll_ptr_t
trx_read_roll_ptr(
/*==============*/
- /* out: roll ptr */
- byte* ptr); /* in: pointer to memory from where to read */
-/**********************************************************************
-Gets an undo log page and x-latches it. */
+ const byte* ptr); /*!< in: pointer to memory from where to read */
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
+Gets an undo log page and x-latches it.
+@return pointer to page x-latched */
UNIV_INLINE
page_t*
trx_undo_page_get(
/*==============*/
- /* out: pointer to page x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Gets an undo log page and s-latches it. */
+ ulint space, /*!< in: space where placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ mtr_t* mtr); /*!< in: mtr */
+/******************************************************************//**
+Gets an undo log page and s-latches it.
+@return pointer to page s-latched */
UNIV_INLINE
page_t*
trx_undo_page_get_s_latched(
/*========================*/
- /* out: pointer to page s-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
+ ulint space, /*!< in: space where placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ mtr_t* mtr); /*!< in: mtr */
+/******************************************************************//**
Returns the previous undo record on the page in the specified log, or
-NULL if none exists. */
+NULL if none exists.
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_prev_rec(
/*=======================*/
- /* out: pointer to record, NULL if none */
- trx_undo_rec_t* rec, /* in: undo log record */
- ulint page_no,/* in: undo log header page number */
- ulint offset);/* in: undo log header offset on page */
-/**********************************************************************
+ trx_undo_rec_t* rec, /*!< in: undo log record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset);/*!< in: undo log header offset on page */
+/******************************************************************//**
Returns the next undo log record on the page in the specified log, or
-NULL if none exists. */
+NULL if none exists.
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_next_rec(
/*=======================*/
- /* out: pointer to record, NULL if none */
- trx_undo_rec_t* rec, /* in: undo log record */
- ulint page_no,/* in: undo log header page number */
- ulint offset);/* in: undo log header offset on page */
-/**********************************************************************
+ trx_undo_rec_t* rec, /*!< in: undo log record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset);/*!< in: undo log header offset on page */
+/******************************************************************//**
Returns the last undo record on the page in the specified undo log, or
-NULL if none exists. */
+NULL if none exists.
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_last_rec(
/*=======================*/
- /* out: pointer to record, NULL if none */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset); /* in: undo log header offset on page */
-/**********************************************************************
+ page_t* undo_page,/*!< in: undo log page */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset); /*!< in: undo log header offset on page */
+/******************************************************************//**
Returns the first undo record on the page in the specified undo log, or
-NULL if none exists. */
+NULL if none exists.
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_first_rec(
/*========================*/
- /* out: pointer to record, NULL if none */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset);/* in: undo log header offset on page */
-/***************************************************************************
-Gets the previous record in an undo log. */
-
+ page_t* undo_page,/*!< in: undo log page */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset);/*!< in: undo log header offset on page */
+/***********************************************************************//**
+Gets the previous record in an undo log.
+@return undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
trx_undo_rec_t*
trx_undo_get_prev_rec(
/*==================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************************
-Gets the next record in an undo log. */
-
+ trx_undo_rec_t* rec, /*!< in: undo record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset, /*!< in: undo log header offset on page */
+ mtr_t* mtr); /*!< in: mtr */
+/***********************************************************************//**
+Gets the next record in an undo log.
+@return undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
trx_undo_rec_t*
trx_undo_get_next_rec(
/*==================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************************
-Gets the first record in an undo log. */
-
+ trx_undo_rec_t* rec, /*!< in: undo record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset, /*!< in: undo log header offset on page */
+ mtr_t* mtr); /*!< in: mtr */
+/***********************************************************************//**
+Gets the first record in an undo log.
+@return undo log record, the page latched, NULL if none */
+UNIV_INTERN
trx_undo_rec_t*
trx_undo_get_first_rec(
/*===================*/
- /* out: undo log record, the page latched, NULL if
- none */
- ulint space, /* in: undo log header space */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-Tries to add a page to the undo log segment where the undo log is placed. */
-
+ ulint space, /*!< in: undo log header space */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset, /*!< in: undo log header offset on page */
+ ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
+ mtr_t* mtr); /*!< in: mtr */
+/********************************************************************//**
+Tries to add a page to the undo log segment where the undo log is placed.
+@return page number if success, else FIL_NULL */
+UNIV_INTERN
ulint
trx_undo_add_page(
/*==============*/
- /* out: page number if success, else
- FIL_NULL */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory object */
- mtr_t* mtr); /* in: mtr which does not have a latch to any
+ trx_t* trx, /*!< in: transaction */
+ trx_undo_t* undo, /*!< in: undo log memory object */
+ mtr_t* mtr); /*!< in: mtr which does not have a latch to any
undo log page; the caller must have reserved
the rollback segment mutex */
-/***************************************************************************
+/***********************************************************************//**
Truncates an undo log from the end. This function is used during a rollback
to free space from an undo log. */
-
+UNIV_INTERN
void
trx_undo_truncate_end(
/*==================*/
- trx_t* trx, /* in: transaction whose undo log it is */
- trx_undo_t* undo, /* in: undo log */
- dulint limit); /* in: all undo records with undo number
+ trx_t* trx, /*!< in: transaction whose undo log it is */
+ trx_undo_t* undo, /*!< in: undo log */
+ undo_no_t limit); /*!< in: all undo records with undo number
>= this value should be truncated */
-/***************************************************************************
+/***********************************************************************//**
Truncates an undo log from the start. This function is used during a purge
operation. */
-
+UNIV_INTERN
void
trx_undo_truncate_start(
/*====================*/
- trx_rseg_t* rseg, /* in: rollback segment */
- ulint space, /* in: space id of the log */
- ulint hdr_page_no, /* in: header page number */
- ulint hdr_offset, /* in: header offset on the page */
- dulint limit); /* in: all undo pages with undo numbers <
- this value should be truncated; NOTE that
- the function only frees whole pages; the
- header page is not freed, but emptied, if
- all the records there are < limit */
-/************************************************************************
+ trx_rseg_t* rseg, /*!< in: rollback segment */
+ ulint space, /*!< in: space id of the log */
+ ulint hdr_page_no, /*!< in: header page number */
+ ulint hdr_offset, /*!< in: header offset on the page */
+ undo_no_t limit); /*!< in: all undo pages with
+ undo numbers < this value
+ should be truncated; NOTE that
+ the function only frees whole
+ pages; the header page is not
+ freed, but emptied, if all the
+ records there are < limit */
+/********************************************************************//**
Initializes the undo log lists for a rollback segment memory copy.
This function is only called when the database is started or a new
-rollback segment created. */
-
+rollback segment created.
+@return the combined size of undo log segments in pages */
+UNIV_INTERN
ulint
trx_undo_lists_init(
/*================*/
- /* out: the combined size of undo log segments
- in pages */
- trx_rseg_t* rseg); /* in: rollback segment memory object */
-/**************************************************************************
+ trx_rseg_t* rseg); /*!< in: rollback segment memory object */
+/**********************************************************************//**
Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused. */
-
+undo log reused.
+@return DB_SUCCESS if undo log assign successful, possible error codes
+are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE
+DB_OUT_OF_MEMORY */
+UNIV_INTERN
ulint
trx_undo_assign_undo(
/*=================*/
- /* out: DB_SUCCESS if undo log assign
- * successful, possible error codes are:
- * ER_TOO_MANY_CONCURRENT_TRXS
- * DB_OUT_OF_FILE_SPAC
- * DB_OUT_OF_MEMORY */
- trx_t* trx, /* in: transaction */
- ulint type); /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
-/**********************************************************************
-Sets the state of the undo log segment at a transaction finish. */
-
+ trx_t* trx, /*!< in: transaction */
+ ulint type); /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction finish.
+@return undo log segment header page, x-latched */
+UNIV_INTERN
page_t*
trx_undo_set_state_at_finish(
/*=========================*/
- /* out: undo log segment header page,
- x-latched */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Sets the state of the undo log segment at a transaction prepare. */
-
+ trx_rseg_t* rseg, /*!< in: rollback segment memory object */
+ trx_t* trx, /*!< in: transaction */
+ trx_undo_t* undo, /*!< in: undo log memory copy */
+ mtr_t* mtr); /*!< in: mtr */
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction prepare.
+@return undo log segment header page, x-latched */
+UNIV_INTERN
page_t*
trx_undo_set_state_at_prepare(
/*==========================*/
- /* out: undo log segment header page,
- x-latched */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- mtr_t* mtr); /* in: mtr */
+ trx_t* trx, /*!< in: transaction */
+ trx_undo_t* undo, /*!< in: undo log memory copy */
+ mtr_t* mtr); /*!< in: mtr */
-/**************************************************************************
+/**********************************************************************//**
Adds the update undo log header as the first in the history list, and
frees the memory object, or puts it to the list of cached update undo log
segments. */
-
+UNIV_INTERN
void
trx_undo_update_cleanup(
/*====================*/
- trx_t* trx, /* in: trx owning the update undo log */
- page_t* undo_page, /* in: update undo log header page,
+ trx_t* trx, /*!< in: trx owning the update undo log */
+ page_t* undo_page, /*!< in: update undo log header page,
x-latched */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
+ mtr_t* mtr); /*!< in: mtr */
+/******************************************************************//**
Frees or caches an insert undo log after a transaction commit or rollback.
Knowledge of inserts is not needed after a commit or rollback, therefore
the data can be discarded. */
-
+UNIV_INTERN
void
trx_undo_insert_cleanup(
/*====================*/
- trx_t* trx); /* in: transaction handle */
-/***************************************************************
-Parses the redo log entry of an undo log page initialization. */
-
+ trx_t* trx); /*!< in: transaction handle */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses the redo log entry of an undo log page initialization.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_page_init(
/*=====================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses the redo log entry of an undo log page header create or reuse. */
-
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/***********************************************************//**
+Parses the redo log entry of an undo log page header create or reuse.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_page_header(
/*=======================*/
- /* out: end of log record or NULL */
- ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses the redo log entry of an undo log page header discard. */
-
+ ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/***********************************************************//**
+Parses the redo log entry of an undo log page header discard.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_discard_latest(
/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr); /*!< in: mtr or NULL */
+/************************************************************************
+Frees an undo log memory copy. */
+UNIV_INTERN
+void
+trx_undo_mem_free(
+/*==============*/
+ trx_undo_t* undo); /* in: the undo object to be freed */
/* Types of an undo log segment */
#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */
@@ -329,18 +358,19 @@ trx_undo_parse_discard_latest(
#define TRX_UNDO_PREPARED 5 /* contains an undo log of an
prepared transaction */
-/* Transaction undo log memory object; this is protected by the undo_mutex
+#ifndef UNIV_HOTBACKUP
+/** Transaction undo log memory object; this is protected by the undo_mutex
in the corresponding transaction object */
struct trx_undo_struct{
/*-----------------------------*/
- ulint id; /* undo log slot number within the
+ ulint id; /*!< undo log slot number within the
rollback segment */
- ulint type; /* TRX_UNDO_INSERT or
+ ulint type; /*!< TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */
- ulint state; /* state of the corresponding undo log
+ ulint state; /*!< state of the corresponding undo log
segment */
- ibool del_marks; /* relevant only in an update undo log:
+ ibool del_marks; /*!< relevant only in an update undo log:
this is TRUE if the transaction may
have delete marked records, because of
a delete of a row or an update of an
@@ -348,65 +378,72 @@ struct trx_undo_struct{
necessary; also TRUE if the transaction
has updated an externally stored
field */
- dulint trx_id; /* id of the trx assigned to the undo
+ trx_id_t trx_id; /*!< id of the trx assigned to the undo
log */
- XID xid; /* X/Open XA transaction
+ XID xid; /*!< X/Open XA transaction
identification */
- ibool dict_operation; /* TRUE if a dict operation trx */
- dulint table_id; /* if a dict operation, then the table
+ ibool dict_operation; /*!< TRUE if a dict operation trx */
+ dulint table_id; /*!< if a dict operation, then the table
id */
- trx_rseg_t* rseg; /* rseg where the undo log belongs */
+ trx_rseg_t* rseg; /*!< rseg where the undo log belongs */
/*-----------------------------*/
- ulint space; /* space id where the undo log
+ ulint space; /*!< space id where the undo log
placed */
- ulint hdr_page_no; /* page number of the header page in
+ ulint zip_size; /*!< compressed page size of space
+ in bytes, or 0 for uncompressed */
+ ulint hdr_page_no; /*!< page number of the header page in
the undo log */
- ulint hdr_offset; /* header offset of the undo log on the
+ ulint hdr_offset; /*!< header offset of the undo log on the
page */
- ulint last_page_no; /* page number of the last page in the
+ ulint last_page_no; /*!< page number of the last page in the
undo log; this may differ from
top_page_no during a rollback */
- ulint size; /* current size in pages */
+ ulint size; /*!< current size in pages */
/*-----------------------------*/
- ulint empty; /* TRUE if the stack of undo log
+ ulint empty; /*!< TRUE if the stack of undo log
records is currently empty */
- ulint top_page_no; /* page number where the latest undo
+ ulint top_page_no; /*!< page number where the latest undo
log record was catenated; during
rollback the page from which the latest
undo record was chosen */
- ulint top_offset; /* offset of the latest undo record,
+ ulint top_offset; /*!< offset of the latest undo record,
i.e., the topmost element in the undo
log if we think of it as a stack */
- dulint top_undo_no; /* undo number of the latest record */
- page_t* guess_page; /* guess for the buffer frame where
+ undo_no_t top_undo_no; /*!< undo number of the latest record */
+ buf_block_t* guess_block; /*!< guess for the buffer block where
the top page might reside */
/*-----------------------------*/
UT_LIST_NODE_T(trx_undo_t) undo_list;
- /* undo log objects in the rollback
+ /*!< undo log objects in the rollback
segment are chained into lists */
};
+#endif /* !UNIV_HOTBACKUP */
-/* The offset of the undo log page header on pages of the undo log */
+/** The offset of the undo log page header on pages of the undo log */
#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA
/*-------------------------------------------------------------*/
-/* Transaction undo log page header offsets */
-#define TRX_UNDO_PAGE_TYPE 0 /* TRX_UNDO_INSERT or
+/** Transaction undo log page header offsets */
+/* @{ */
+#define TRX_UNDO_PAGE_TYPE 0 /*!< TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */
-#define TRX_UNDO_PAGE_START 2 /* Byte offset where the undo log
+#define TRX_UNDO_PAGE_START 2 /*!< Byte offset where the undo log
records for the LATEST transaction
start on this page (remember that
in an update undo log, the first page
can contain several undo logs) */
-#define TRX_UNDO_PAGE_FREE 4 /* On each page of the undo log this
+#define TRX_UNDO_PAGE_FREE 4 /*!< On each page of the undo log this
field contains the byte offset of the
first free byte on the page */
-#define TRX_UNDO_PAGE_NODE 6 /* The file list node in the chain
+#define TRX_UNDO_PAGE_NODE 6 /*!< The file list node in the chain
of undo log pages */
/*-------------------------------------------------------------*/
#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE)
+ /*!< Size of the transaction undo
+ log page header, in bytes */
+/* @} */
-/* An update undo segment with just one page can be reused if it has
-< this number bytes used; we must leave space at least for one new undo
+/** An update undo segment with just one page can be reused if it has
+at most this many bytes used; we must leave space at least for one new undo
log header on the page */
#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4)
@@ -420,62 +457,67 @@ allowed to have zero undo records, but if the segment extends to several
pages, then all the rest of the pages must contain at least one undo log
record. */
-/* The offset of the undo log segment header on the first page of the undo
+/** The offset of the undo log segment header on the first page of the undo
log segment */
#define TRX_UNDO_SEG_HDR (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
+/** Undo log segment header */
+/* @{ */
/*-------------------------------------------------------------*/
-#define TRX_UNDO_STATE 0 /* TRX_UNDO_ACTIVE, ... */
-#define TRX_UNDO_LAST_LOG 2 /* Offset of the last undo log header
+#define TRX_UNDO_STATE 0 /*!< TRX_UNDO_ACTIVE, ... */
+#define TRX_UNDO_LAST_LOG 2 /*!< Offset of the last undo log header
on the segment header page, 0 if
none */
-#define TRX_UNDO_FSEG_HEADER 4 /* Header for the file segment which
+#define TRX_UNDO_FSEG_HEADER 4 /*!< Header for the file segment which
the undo log segment occupies */
#define TRX_UNDO_PAGE_LIST (4 + FSEG_HEADER_SIZE)
- /* Base node for the list of pages in
+ /*!< Base node for the list of pages in
the undo log segment; defined only on
the undo log segment's first page */
/*-------------------------------------------------------------*/
-/* Size of the undo log segment header */
+/** Size of the undo log segment header */
#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
+/* @} */
-/* The undo log header. There can be several undo log headers on the first
+/** The undo log header. There can be several undo log headers on the first
page of an update undo log segment. */
+/* @{ */
/*-------------------------------------------------------------*/
-#define TRX_UNDO_TRX_ID 0 /* Transaction id */
-#define TRX_UNDO_TRX_NO 8 /* Transaction number of the
+#define TRX_UNDO_TRX_ID 0 /*!< Transaction id */
+#define TRX_UNDO_TRX_NO 8 /*!< Transaction number of the
transaction; defined only if the log
is in a history list */
-#define TRX_UNDO_DEL_MARKS 16 /* Defined only in an update undo
+#define TRX_UNDO_DEL_MARKS 16 /*!< Defined only in an update undo
log: TRUE if the transaction may have
done delete markings of records, and
thus purge is necessary */
-#define TRX_UNDO_LOG_START 18 /* Offset of the first undo log record
+#define TRX_UNDO_LOG_START 18 /*!< Offset of the first undo log record
of this log on the header page; purge
may remove undo log record from the
log start, and therefore this is not
necessarily the same as this log
header end offset */
-#define TRX_UNDO_XID_EXISTS 20 /* TRUE if undo log header includes
+#define TRX_UNDO_XID_EXISTS 20 /*!< TRUE if undo log header includes
X/Open XA transaction identification
XID */
-#define TRX_UNDO_DICT_TRANS 21 /* TRUE if the transaction is a table
+#define TRX_UNDO_DICT_TRANS 21 /*!< TRUE if the transaction is a table
create, index create, or drop
transaction: in recovery
the transaction cannot be rolled back
in the usual way: a 'rollback' rather
means dropping the created or dropped
table, if it still exists */
-#define TRX_UNDO_TABLE_ID 22 /* Id of the table if the preceding
+#define TRX_UNDO_TABLE_ID 22 /*!< Id of the table if the preceding
field is TRUE */
-#define TRX_UNDO_NEXT_LOG 30 /* Offset of the next undo log header
+#define TRX_UNDO_NEXT_LOG 30 /*!< Offset of the next undo log header
on this page, 0 if none */
-#define TRX_UNDO_PREV_LOG 32 /* Offset of the previous undo log
+#define TRX_UNDO_PREV_LOG 32 /*!< Offset of the previous undo log
header on this page, 0 if none */
-#define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history
+#define TRX_UNDO_HISTORY_NODE 34 /*!< If the log is put to the history
list, the file list node is here */
/*-------------------------------------------------------------*/
+/** Size of the undo log header without XID information */
#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
/* Note: the writing of the undo log old header is coded by a log record
@@ -486,15 +528,21 @@ is not needed by the user. The XID wastes about 150 bytes of space in every
undo log. In the history list we may have millions of undo logs, which means
quite a large overhead. */
-/* X/Open XA Transaction Identification (XID) */
-
+/** X/Open XA Transaction Identification (XID) */
+/* @{ */
+/** xid_t::formatID */
#define TRX_UNDO_XA_FORMAT (TRX_UNDO_LOG_OLD_HDR_SIZE)
+/** xid_t::gtrid_length */
#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4)
+/** xid_t::bqual_length */
#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4)
+/** Distributed transaction identifier data */
#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4)
/*--------------------------------------------------------------*/
#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
- /* Total size of the header with the XA XID */
+ /*!< Total size of the undo log header
+ with the XA XID */
+/* @} */
#ifndef UNIV_NONINL
#include "trx0undo.ic"
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index f28f36ade03..2d289b34ef1 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -1,24 +1,43 @@
-/******************************************************
-Transaction undo log
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0undo.ic
+Transaction undo log
Created 3/26/1996 Heikki Tuuri
*******************************************************/
#include "data0type.h"
+#include "page0page.h"
-/***************************************************************************
-Builds a roll pointer dulint. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Builds a roll pointer.
+@return roll pointer */
UNIV_INLINE
-dulint
+roll_ptr_t
trx_undo_build_roll_ptr(
/*====================*/
- /* out: roll pointer */
- ibool is_insert, /* in: TRUE if insert undo log */
- ulint rseg_id, /* in: rollback segment id */
- ulint page_no, /* in: page number */
- ulint offset) /* in: offset of the undo entry within page */
+ ibool is_insert, /*!< in: TRUE if insert undo log */
+ ulint rseg_id, /*!< in: rollback segment id */
+ ulint page_no, /*!< in: page number */
+ ulint offset) /*!< in: offset of the undo entry within page */
{
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
@@ -32,17 +51,18 @@ trx_undo_build_roll_ptr(
+ offset));
}
-/***************************************************************************
-Decodes a roll pointer dulint. */
+/***********************************************************************//**
+Decodes a roll pointer. */
UNIV_INLINE
void
trx_undo_decode_roll_ptr(
/*=====================*/
- dulint roll_ptr, /* in: roll pointer */
- ibool* is_insert, /* out: TRUE if insert undo log */
- ulint* rseg_id, /* out: rollback segment id */
- ulint* page_no, /* out: page number */
- ulint* offset) /* out: offset of the undo entry within page */
+ roll_ptr_t roll_ptr, /*!< in: roll pointer */
+ ibool* is_insert, /*!< out: TRUE if insert undo log */
+ ulint* rseg_id, /*!< out: rollback segment id */
+ ulint* page_no, /*!< out: page number */
+ ulint* offset) /*!< out: offset of the undo
+ entry within page */
{
ulint low;
ulint high;
@@ -64,14 +84,14 @@ trx_undo_decode_roll_ptr(
+ (low / 256) / 256;
}
-/***************************************************************************
-Returns TRUE if the roll pointer is of the insert type. */
+/***********************************************************************//**
+Returns TRUE if the roll pointer is of the insert type.
+@return TRUE if insert undo log */
UNIV_INLINE
ibool
trx_undo_roll_ptr_is_insert(
/*========================*/
- /* out: TRUE if insert undo log */
- dulint roll_ptr) /* in: roll pointer */
+ roll_ptr_t roll_ptr) /*!< in: roll pointer */
{
ulint high;
#if DATA_ROLL_PTR_LEN != 7
@@ -84,8 +104,9 @@ trx_undo_roll_ptr_is_insert(
return(high / (256 * 256 * 128));
}
+#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************
+/*****************************************************************//**
Writes a roll ptr to an index page. In case that the size changes in
some future version, this function should be used instead of
mach_write_... */
@@ -93,24 +114,26 @@ UNIV_INLINE
void
trx_write_roll_ptr(
/*===============*/
- byte* ptr, /* in: pointer to memory where written */
- dulint roll_ptr) /* in: roll ptr */
+ byte* ptr, /*!< in: pointer to memory where
+ written */
+ roll_ptr_t roll_ptr) /*!< in: roll ptr */
{
- ut_ad(DATA_ROLL_PTR_LEN == 7);
-
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
mach_write_to_7(ptr, roll_ptr);
}
-/*********************************************************************
+/*****************************************************************//**
Reads a roll ptr from an index page. In case that the roll ptr size
changes in some future version, this function should be used instead of
-mach_read_... */
+mach_read_...
+@return roll ptr */
UNIV_INLINE
-dulint
+roll_ptr_t
trx_read_roll_ptr(
/*==============*/
- /* out: roll ptr */
- byte* ptr) /* in: pointer to memory from where to read */
+ const byte* ptr) /*!< in: pointer to memory from where to read */
{
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
@@ -118,65 +141,62 @@ trx_read_roll_ptr(
return(mach_read_from_7(ptr));
}
-/**********************************************************************
-Gets an undo log page and x-latches it. */
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
+Gets an undo log page and x-latches it.
+@return pointer to page x-latched */
UNIV_INLINE
page_t*
trx_undo_page_get(
/*==============*/
- /* out: pointer to page x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space where placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
-
- page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+ buf_block_t* block = buf_page_get(space, zip_size, page_no,
+ RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(page);
+ return(buf_block_get_frame(block));
}
-/**********************************************************************
-Gets an undo log page and s-latches it. */
+/******************************************************************//**
+Gets an undo log page and s-latches it.
+@return pointer to page s-latched */
UNIV_INLINE
page_t*
trx_undo_page_get_s_latched(
/*========================*/
- /* out: pointer to page s-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space where placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_t* page;
-
- page = buf_page_get(space, page_no, RW_S_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_t* block = buf_page_get(space, zip_size, page_no,
+ RW_S_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
- return(page);
+ return(buf_block_get_frame(block));
}
-/**********************************************************************
+/******************************************************************//**
Returns the start offset of the undo log records of the specified undo
-log on the page. */
+log on the page.
+@return start offset */
UNIV_INLINE
ulint
trx_undo_page_get_start(
/*====================*/
- /* out: start offset */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
+ page_t* undo_page,/*!< in: undo log page */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset) /*!< in: undo log header offset on page */
{
ulint start;
- if (page_no == buf_frame_get_page_no(undo_page)) {
+ if (page_no == page_get_page_no(undo_page)) {
start = mach_read_from_2(offset + undo_page
+ TRX_UNDO_LOG_START);
@@ -187,22 +207,22 @@ trx_undo_page_get_start(
return(start);
}
-/**********************************************************************
+/******************************************************************//**
Returns the end offset of the undo log records of the specified undo
-log on the page. */
+log on the page.
+@return end offset */
UNIV_INLINE
ulint
trx_undo_page_get_end(
/*==================*/
- /* out: end offset */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
+ page_t* undo_page,/*!< in: undo log page */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset) /*!< in: undo log header offset on page */
{
trx_ulogf_t* log_hdr;
ulint end;
- if (page_no == buf_frame_get_page_no(undo_page)) {
+ if (page_no == page_get_page_no(undo_page)) {
log_hdr = undo_page + offset;
@@ -220,22 +240,22 @@ trx_undo_page_get_end(
return(end);
}
-/**********************************************************************
+/******************************************************************//**
Returns the previous undo record on the page in the specified log, or
-NULL if none exists. */
+NULL if none exists.
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_prev_rec(
/*=======================*/
- /* out: pointer to record, NULL if none */
- trx_undo_rec_t* rec, /* in: undo log record */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
+ trx_undo_rec_t* rec, /*!< in: undo log record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset) /*!< in: undo log header offset on page */
{
page_t* undo_page;
ulint start;
- undo_page = buf_frame_align(rec);
+ undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
start = trx_undo_page_get_start(undo_page, page_no, offset);
@@ -247,23 +267,23 @@ trx_undo_page_get_prev_rec(
return(undo_page + mach_read_from_2(rec - 2));
}
-/**********************************************************************
+/******************************************************************//**
Returns the next undo log record on the page in the specified log, or
-NULL if none exists. */
+NULL if none exists.
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_next_rec(
/*=======================*/
- /* out: pointer to record, NULL if none */
- trx_undo_rec_t* rec, /* in: undo log record */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
+ trx_undo_rec_t* rec, /*!< in: undo log record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset) /*!< in: undo log header offset on page */
{
page_t* undo_page;
ulint end;
ulint next;
- undo_page = buf_frame_align(rec);
+ undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
end = trx_undo_page_get_end(undo_page, page_no, offset);
@@ -277,17 +297,17 @@ trx_undo_page_get_next_rec(
return(undo_page + next);
}
-/**********************************************************************
+/******************************************************************//**
Returns the last undo record on the page in the specified undo log, or
-NULL if none exists. */
+NULL if none exists.
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_last_rec(
/*=======================*/
- /* out: pointer to record, NULL if none */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
+ page_t* undo_page,/*!< in: undo log page */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset) /*!< in: undo log header offset on page */
{
ulint start;
ulint end;
@@ -303,17 +323,17 @@ trx_undo_page_get_last_rec(
return(undo_page + mach_read_from_2(undo_page + end - 2));
}
-/**********************************************************************
+/******************************************************************//**
Returns the first undo record on the page in the specified undo log, or
-NULL if none exists. */
+NULL if none exists.
+@return pointer to record, NULL if none */
UNIV_INLINE
trx_undo_rec_t*
trx_undo_page_get_first_rec(
/*========================*/
- /* out: pointer to record, NULL if none */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
+ page_t* undo_page,/*!< in: undo log page */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset) /*!< in: undo log header offset on page */
{
ulint start;
ulint end;
@@ -328,3 +348,4 @@ trx_undo_page_get_first_rec(
return(undo_page + start);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0xa.h b/storage/innobase/include/trx0xa.h
index df85cd663cb..e0dd8a1af5b 100644
--- a/storage/innobase/include/trx0xa.h
+++ b/storage/innobase/include/trx0xa.h
@@ -1,3 +1,21 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
/*
* Start of xa.h header
*
@@ -11,172 +29,41 @@
*/
#ifndef XIDDATASIZE
-#define XIDDATASIZE 128 /* size in bytes */
-#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */
-#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */
+/** Sizes of transaction identifier */
+#define XIDDATASIZE 128 /*!< maximum size of a transaction
+ identifier, in bytes */
+#define MAXGTRIDSIZE 64 /*!< maximum size in bytes of gtrid */
+#define MAXBQUALSIZE 64 /*!< maximum size in bytes of bqual */
+/** X/Open XA distributed transaction identifier */
struct xid_t {
- long formatID; /* format identifier */
- long gtrid_length; /* value from 1 through 64 */
- long bqual_length; /* value from 1 through 64 */
- char data[XIDDATASIZE];
+ long formatID; /*!< format identifier; -1
+ means that the XID is null */
+ long gtrid_length; /*!< value from 1 through 64 */
+ long bqual_length; /*!< value from 1 through 64 */
+ char data[XIDDATASIZE]; /*!< distributed transaction
+ identifier */
};
+/** X/Open XA distributed transaction identifier */
typedef struct xid_t XID;
#endif
-/*
- * A value of -1 in formatID means that the XID is null.
- */
-
-
-#ifdef NOTDEFINED
-/* Let us comment this out to remove compiler errors!!!!!!!!!!!! */
-
-/*
- * Declarations of routines by which RMs call TMs:
- */
-extern int ax_reg __P((int, XID *, long));
-extern int ax_unreg __P((int, long));
-
-/*
- * XA Switch Data Structure
- */
-#define RMNAMESZ 32 /* length of resource manager name, */
- /* including the null terminator */
-#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */
- /* strings, including the null
- terminator */
-
-
-struct xa_switch_t {
- char name[RMNAMESZ]; /* name of resource manager */
- long flags; /* resource manager specific options */
- long version; /* must be 0 */
- int (*xa_open_entry) /* xa_open function pointer */
- __P((char *, int, long));
- int (*xa_close_entry) /* xa_close function pointer */
- __P((char *, int, long));
- int (*xa_start_entry) /* xa_start function pointer */
- __P((XID *, int, long));
- int (*xa_end_entry) /* xa_end function pointer */
- __P((XID *, int, long));
- int (*xa_rollback_entry) /* xa_rollback function pointer */
- __P((XID *, int, long));
- int (*xa_prepare_entry) /* xa_prepare function pointer */
- __P((XID *, int, long));
- int (*xa_commit_entry) /* xa_commit function pointer */
- __P((XID *, int, long));
- int (*xa_recover_entry) /* xa_recover function pointer */
- __P((XID *, long, int, long));
- int (*xa_forget_entry) /* xa_forget function pointer */
- __P((XID *, int, long));
- int (*xa_complete_entry) /* xa_complete function pointer */
- __P((int *, int *, int, long));
-};
-#endif /* NOTDEFINED */
-
-
-/*
- * Flag definitions for the RM switch
- */
-#define TMNOFLAGS 0x00000000L /* no resource manager features
- selected */
-#define TMREGISTER 0x00000001L /* resource manager dynamically
- registers */
-#define TMNOMIGRATE 0x00000002L /* resource manager does not support
- association migration */
-#define TMUSEASYNC 0x00000004L /* resource manager supports
- asynchronous operations */
-/*
- * Flag definitions for xa_ and ax_ routines
- */
-/* use TMNOFLAGGS, defined above, when not specifying other flags */
-#define TMASYNC 0x80000000L /* perform routine asynchronously */
-#define TMONEPHASE 0x40000000L /* caller is using one-phase commit
- optimisation */
-#define TMFAIL 0x20000000L /* dissociates caller and marks
- transaction branch rollback-only */
-#define TMNOWAIT 0x10000000L /* return if blocking condition
- exists */
-#define TMRESUME 0x08000000L /* caller is resuming association with
- suspended transaction branch */
-#define TMSUCCESS 0x04000000L /* dissociate caller from transaction
- branch */
-#define TMSUSPEND 0x02000000L /* caller is suspending, not ending,
- association */
-#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */
-#define TMENDRSCAN 0x00800000L /* end a recovery scan */
-#define TMMULTIPLE 0x00400000L /* wait for any asynchronous
- operation */
-#define TMJOIN 0x00200000L /* caller is joining existing
- transaction branch */
-#define TMMIGRATE 0x00100000L /* caller intends to perform
- migration */
-
-/*
- * ax_() return codes (transaction manager reports to resource manager)
- */
-#define TM_JOIN 2 /* caller is joining existing
- transaction branch */
-#define TM_RESUME 1 /* caller is resuming association with
- suspended transaction branch */
-#define TM_OK 0 /* normal execution */
-#define TMER_TMERR -1 /* an error occurred in the transaction
- manager */
-#define TMER_INVAL -2 /* invalid arguments were given */
-#define TMER_PROTO -3 /* routine invoked in an improper
- context */
-
-/*
- * xa_() return codes (resource manager reports to transaction manager)
- */
-#define XA_RBBASE 100 /* The inclusive lower bound of the
- rollback codes */
-#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an
- unspecified reason */
-#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a
- communication failure */
-#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */
-#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the
- integrity of the resources was
- detected */
-#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the
- transaction branch for a reason not
- on this list */
-#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the
- resource manager */
-#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took
- too long */
-#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */
-#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the
- rollback codes */
-#define XA_NOMIGRATE 9 /* resumption must occur where
- suspension occurred */
-#define XA_HEURHAZ 8 /* the transaction branch may have
- been heuristically completed */
-#define XA_HEURCOM 7 /* the transaction branch has been
- heuristically committed */
-#define XA_HEURRB 6 /* the transaction branch has been
- heuristically rolled back */
-#define XA_HEURMIX 5 /* the transaction branch has been
- heuristically committed and rolled
- back */
-#define XA_RETRY 4 /* routine returned with no effect and
- may be re-issued */
-#define XA_RDONLY 3 /* the transaction branch was read-only
- and has been committed */
-#define XA_OK 0 /* normal execution */
-#define XAER_ASYNC -2 /* asynchronous operation already
+/** X/Open XA distributed transaction status codes */
+/* @{ */
+#define XA_OK 0 /*!< normal execution */
+#define XAER_ASYNC -2 /*!< asynchronous operation already
outstanding */
-#define XAER_RMERR -3 /* a resource manager error occurred in
- the transaction branch */
-#define XAER_NOTA -4 /* the XID is not valid */
-#define XAER_INVAL -5 /* invalid arguments were given */
-#define XAER_PROTO -6 /* routine invoked in an improper
+#define XAER_RMERR -3 /*!< a resource manager error
+ occurred in the transaction
+ branch */
+#define XAER_NOTA -4 /*!< the XID is not valid */
+#define XAER_INVAL -5 /*!< invalid arguments were given */
+#define XAER_PROTO -6 /*!< routine invoked in an improper
context */
-#define XAER_RMFAIL -7 /* resource manager unavailable */
-#define XAER_DUPID -8 /* the XID already exists */
-#define XAER_OUTSIDE -9 /* resource manager doing work outside
- transaction */
+#define XAER_RMFAIL -7 /*!< resource manager unavailable */
+#define XAER_DUPID -8 /*!< the XID already exists */
+#define XAER_OUTSIDE -9 /*!< resource manager doing
+ work outside transaction */
+/* @} */
#endif /* ifndef XA_H */
/*
* End of xa.h header
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 8ab62e655ff..2081e136590 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -1,7 +1,38 @@
-/***************************************************************************
-Version control for database, common definitions, and include files
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+Copyright (c) 2009, Sun Microsystems, Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
-(c) 1994 - 2000 Innobase Oy
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/***********************************************************************//**
+@file include/univ.i
+Version control for database, common definitions, and include files
Created 1/20/1994 Heikki Tuuri
****************************************************************************/
@@ -9,16 +40,63 @@ Created 1/20/1994 Heikki Tuuri
#ifndef univ_i
#define univ_i
+#ifdef UNIV_HOTBACKUP
+#include "hb_univ.i"
+#endif /* UNIV_HOTBACKUP */
+
+#define INNODB_VERSION_MAJOR 1
+#define INNODB_VERSION_MINOR 0
+#define INNODB_VERSION_BUGFIX 6
+
+/* The following is the InnoDB version as shown in
+SELECT plugin_version FROM information_schema.plugins;
+calculated in make_version_string() in sql/sql_show.cc like this:
+"version >> 8" . "version & 0xff"
+because the version is shown with only one dot, we skip the last
+component, i.e. we show M.N.P as M.N */
+#define INNODB_VERSION_SHORT \
+ (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+
+/* auxiliary macros to help creating the version as string */
+#define __INNODB_VERSION(a, b, c) (#a "." #b "." #c)
+#define _INNODB_VERSION(a, b, c) __INNODB_VERSION(a, b, c)
+
+#define INNODB_VERSION_STR \
+ _INNODB_VERSION(INNODB_VERSION_MAJOR, \
+ INNODB_VERSION_MINOR, \
+ INNODB_VERSION_BUGFIX)
+
+#define REFMAN "http://dev.mysql.com/doc/refman/5.1/en/"
+
+#ifdef MYSQL_DYNAMIC_PLUGIN
+/* In the dynamic plugin, redefine some externally visible symbols
+in order not to conflict with the symbols of a builtin InnoDB. */
+
+/* Rename all C++ classes that contain virtual functions, because we
+have not figured out how to apply the visibility=hidden attribute to
+the virtual method table (vtable) in GCC 3. */
+# define ha_innobase ha_innodb
+#endif /* MYSQL_DYNAMIC_PLUGIN */
+
+/* if any of the following macros is defined at this point this means
+that the code from the "right" plug.in was executed and we do not
+need to include ut0auxconf.h which would either define the same macros
+or will be empty */
+#if !defined(HAVE_IB_GCC_ATOMIC_BUILTINS) \
+ && !defined(HAVE_IB_ATOMIC_PTHREAD_T_GCC) \
+ && !defined(HAVE_IB_SOLARIS_ATOMICS) \
+ && !defined(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) \
+ && !defined(SIZEOF_PTHREAD_T) \
+ && !defined(HAVE_IB_PAUSE_INSTRUCTION)
+# include "ut0auxconf.h"
+#endif
+
#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
# undef __WIN__
# define __WIN__
# include <windows.h>
-# if !defined(WIN64) && !defined(_WIN64)
-# define UNIV_CAN_USE_X86_ASSEMBLER
-# endif
-
# ifdef _NT_
# define __NT__
# endif
@@ -30,35 +108,32 @@ Created 1/20/1994 Heikki Tuuri
in compiling more Posix-compatible. These headers also define __WIN__
if we are compiling on Windows. */
+#ifndef UNIV_HOTBACKUP
# include <my_global.h>
# include <my_pthread.h>
+#endif /* UNIV_HOTBACKUP */
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
# include <sys/stat.h>
-
-# undef PACKAGE
-# undef VERSION
+# if !defined(__NETWARE__) && !defined(__WIN__)
+# include <sys/mman.h> /* mmap() for os0proc.c */
+# endif
/* Include the header file generated by GNU autoconf */
# ifndef __WIN__
-# include "config.h"
+# ifndef UNIV_HOTBACKUP
+# include "config.h"
+# endif /* UNIV_HOTBACKUP */
# endif
# ifdef HAVE_SCHED_H
# include <sched.h>
# endif
-/* When compiling for Itanium IA64, undefine the flag below to prevent use
-of the 32-bit x86 assembler in mutex operations. */
-
-# if defined(__WIN__) && !defined(WIN64) && !defined(_WIN64)
-# define UNIV_CAN_USE_X86_ASSEMBLER
-# endif
-
/* We only try to do explicit inlining of functions with gcc and
-Microsoft Visual C++ */
+Sun Studio */
-# if !defined(__GNUC__)
+# if !defined(__GNUC__) && !(defined(__SUNPRO_C) || defined(__SUNPRO_CC))
# undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */
# define UNIV_MUST_NOT_INLINE
# endif
@@ -80,15 +155,42 @@ memory is read outside the allocated blocks. */
#define UNIV_INIT_MEM_TO_ZERO
*/
-/* Make a non-inline debug version */
+/* When this macro is defined then additional test functions will be
+compiled. These functions live at the end of each relevant source file
+and have "test_" prefix. These functions are not called from anywhere in
+the code, they can be called from gdb after
+innobase_start_or_create_for_mysql() has executed using the call
+command. Not tested on Windows. */
+/*
+#define UNIV_COMPILE_TEST_FUNCS
+*/
#if 0
#define UNIV_DEBUG_VALGRIND /* Enable extra
Valgrind instrumentation */
-#define UNIV_DEBUG /* Enable ut_ad() assertions */
+#define UNIV_DEBUG_PRINT /* Enable the compilation of
+ some debug print functions */
+#define UNIV_AHI_DEBUG /* Enable adaptive hash index
+ debugging without UNIV_DEBUG */
+#define UNIV_BUF_DEBUG /* Enable buffer pool
+ debugging without UNIV_DEBUG */
+#define UNIV_DEBUG /* Enable ut_ad() assertions
+ and disable UNIV_INLINE */
+#define UNIV_DEBUG_LOCK_VALIDATE /* Enable
+ ut_ad(lock_rec_validate_page())
+ assertions. */
+#define UNIV_DEBUG_FILE_ACCESSES /* Debug .ibd file access
+ (field file_page_was_freed
+ in buf_page_t) */
+#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */
+#define UNIV_HASH_DEBUG /* debug HASH_ macros */
#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */
+#define UNIV_LOG_LSN_DEBUG /* write LSN to the redo log;
+this will break redo log file compatibility, but it may be useful when
+debugging redo log application problems. */
#define UNIV_MEM_DEBUG /* detect memory leaks etc */
-#define UNIV_IBUF_DEBUG /* debug the insert buffer;
+#define UNIV_IBUF_DEBUG /* debug the insert buffer */
+#define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer;
this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
and the insert buffer must be empty when the database is started */
#define UNIV_SYNC_DEBUG /* debug mutex and latch
@@ -102,6 +204,10 @@ operations (very slow); also UNIV_DEBUG must be defined */
in sync0sync.c */
#define UNIV_BTR_PRINT /* enable functions for
printing B-trees */
+#define UNIV_ZIP_DEBUG /* extensive consistency checks
+ for compressed pages */
+#define UNIV_ZIP_COPY /* call page_zip_copy_recs()
+ more often */
#endif
#define UNIV_BTR_DEBUG /* check B-tree links */
@@ -129,13 +235,23 @@ by one. */
/* the above option enables basic recovery debugging:
new allocated file pages are reset */
-#if (!defined(UNIV_DEBUG) && !defined(INSIDE_HA_INNOBASE_CC) && !defined(UNIV_MUST_NOT_INLINE))
+/* Linkage specifier for non-static InnoDB symbols (variables and functions)
+that are only referenced from within InnoDB, not from MySQL */
+#if defined(__GNUC__) && (__GNUC__ >= 4) || defined(__INTEL_COMPILER)
+# define UNIV_INTERN __attribute__((visibility ("hidden")))
+#else
+# define UNIV_INTERN
+#endif
+
+#if (!defined(UNIV_DEBUG) && !defined(UNIV_MUST_NOT_INLINE))
/* Definition for inline version */
#ifdef __WIN__
-#define UNIV_INLINE __inline
+# define UNIV_INLINE __inline
+#elif defined(__SUNPRO_CC) || defined(__SUNPRO_C)
+# define UNIV_INLINE static inline
#else
-#define UNIV_INLINE static __inline__
+# define UNIV_INLINE static __inline__
#endif
#else
@@ -143,7 +259,7 @@ by one. */
definitions: */
#define UNIV_NONINL
-#define UNIV_INLINE
+#define UNIV_INLINE UNIV_INTERN
#endif /* UNIV_DEBUG */
@@ -168,11 +284,10 @@ management to ensure correct alignment for doubles etc. */
========================
*/
-/* The universal page size of the database */
-#define UNIV_PAGE_SIZE (2 * 8192) /* NOTE! Currently, this has to be a
- power of 2 */
/* The 2-logarithm of UNIV_PAGE_SIZE: */
#define UNIV_PAGE_SIZE_SHIFT 14
+/* The universal page size of the database */
+#define UNIV_PAGE_SIZE (1 << UNIV_PAGE_SIZE_SHIFT)
/* Maximum number of parallel threads in a parallelized operation */
#define UNIV_MAX_PARALLELISM 32
@@ -211,15 +326,17 @@ typedef long int lint;
#endif
#ifdef __WIN__
-typedef __int64 ib_longlong;
-typedef unsigned __int64 ib_ulonglong;
-#else
+typedef __int64 ib_int64_t;
+typedef unsigned __int64 ib_uint64_t;
+#elif !defined(UNIV_HOTBACKUP)
/* Note: longlong and ulonglong come from MySQL headers. */
-typedef longlong ib_longlong;
-typedef ulonglong ib_ulonglong;
+typedef longlong ib_int64_t;
+typedef ulonglong ib_uint64_t;
#endif
+#ifndef UNIV_HOTBACKUP
typedef unsigned long long int ullint;
+#endif /* UNIV_HOTBACKUP */
#ifndef __WIN__
#if SIZEOF_LONG != SIZEOF_VOIDP
@@ -236,6 +353,9 @@ typedef unsigned long long int ullint;
/* Maximum value for a ulint */
#define ULINT_MAX ((ulint)(-2))
+/* Maximum value for ib_uint64_t */
+#define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL))
+
/* This 'ibool' type is used within Innobase. Remember that different included
headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
#define ibool ulint
@@ -274,6 +394,19 @@ it is read. */
/* Minimize cache-miss latency by moving data at addr into a cache before
it is read or written. */
# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
+/* Sun Studio includes sun_prefetch.h as of version 5.9 */
+#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \
+ || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590)
+# include <sun_prefetch.h>
+#if __SUNPRO_C >= 0x550
+# undef UNIV_INTERN
+# define UNIV_INTERN __hidden
+#endif /* __SUNPRO_C >= 0x550 */
+/* Use sun_prefetch when compile with Sun Studio */
+# define UNIV_EXPECT(expr,value) (expr)
+# define UNIV_LIKELY_NULL(expr) (expr)
+# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr)
+# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
#else
/* Dummy versions of the macros */
# define UNIV_EXPECT(expr,value) (expr)
@@ -311,6 +444,8 @@ typedef void* os_thread_ret_t;
# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
+# define UNIV_MEM_DESC(addr, size, b) VALGRIND_CREATE_BLOCK(addr, size, b)
+# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b)
# define UNIV_MEM_ASSERT_RW(addr, size) do { \
const void* _p = (const void*) (ulint) \
VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \
@@ -334,6 +469,8 @@ typedef void* os_thread_ret_t;
# define UNIV_MEM_INVALID(addr, size) do {} while(0)
# define UNIV_MEM_FREE(addr, size) do {} while(0)
# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
+# define UNIV_MEM_DESC(addr, size, b) do {} while(0)
+# define UNIV_MEM_UNDESC(b) do {} while(0)
# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
#endif
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
index 3ed1ea21a4d..2c288f7d455 100644
--- a/storage/innobase/include/usr0sess.h
+++ b/storage/innobase/include/usr0sess.h
@@ -1,7 +1,24 @@
-/******************************************************
-Sessions
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0sess.h
+Sessions
Created 6/25/1996 Heikki Tuuri
*******************************************************/
@@ -19,33 +36,31 @@ Created 6/25/1996 Heikki Tuuri
#include "data0data.h"
#include "rem0rec.h"
-/*************************************************************************
-Opens a session. */
-
+/*********************************************************************//**
+Opens a session.
+@return own: session object */
+UNIV_INTERN
sess_t*
sess_open(void);
/*============*/
- /* out, own: session object */
-/*************************************************************************
-Closes a session, freeing the memory occupied by it, if it is in a state
-where it should be closed. */
-
-ibool
-sess_try_close(
-/*===========*/
- /* out: TRUE if closed */
- sess_t* sess); /* in, own: session object */
+/*********************************************************************//**
+Closes a session, freeing the memory occupied by it. */
+UNIV_INTERN
+void
+sess_close(
+/*=======*/
+ sess_t* sess); /* in, own: session object */
/* The session handle. All fields are protected by the kernel mutex */
struct sess_struct{
- ulint state; /* state of the session */
- trx_t* trx; /* transaction object permanently
+ ulint state; /*!< state of the session */
+ trx_t* trx; /*!< transaction object permanently
assigned for the session: the
transaction instance designated by the
trx id changes, but the memory
structure is preserved */
UT_LIST_BASE_NODE_T(que_t)
- graphs; /* query graphs belonging to this
+ graphs; /*!< query graphs belonging to this
session */
};
diff --git a/storage/innobase/include/usr0sess.ic b/storage/innobase/include/usr0sess.ic
index c851d5745b9..35a75d75acc 100644
--- a/storage/innobase/include/usr0sess.ic
+++ b/storage/innobase/include/usr0sess.ic
@@ -1,7 +1,24 @@
-/******************************************************
-Sessions
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0sess.ic
+Sessions
Created 6/25/1996 Heikki Tuuri
*******************************************************/
diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h
index 311471c1a0e..6cc6f015613 100644
--- a/storage/innobase/include/usr0types.h
+++ b/storage/innobase/include/usr0types.h
@@ -1,7 +1,24 @@
-/******************************************************
-Users and sessions global types
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0types.h
+Users and sessions global types
Created 6/25/1996 Heikki Tuuri
*******************************************************/
diff --git a/storage/innodb_plugin/include/ut0auxconf.h b/storage/innobase/include/ut0auxconf.h
index 16bcc308392..16bcc308392 100644
--- a/storage/innodb_plugin/include/ut0auxconf.h
+++ b/storage/innobase/include/ut0auxconf.h
diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h
index 6533f1166ca..f55e2888c60 100644
--- a/storage/innobase/include/ut0byte.h
+++ b/storage/innobase/include/ut0byte.h
@@ -1,7 +1,24 @@
-/**********************************************************************
-Utilities for byte operations
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1994, 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0byte.h
+Utilities for byte operations
Created 1/20/1994 Heikki Tuuri
***********************************************************************/
@@ -12,143 +29,162 @@ Created 1/20/1994 Heikki Tuuri
#include "univ.i"
-/* Type definition for a 64-bit unsigned integer, which works also
+/** Pair of ulint integers. */
+typedef struct dulint_struct dulint;
+/** Type definition for a 64-bit unsigned integer, which works also
in 32-bit machines. NOTE! Access the fields only with the accessor
functions. This definition appears here only for the compiler to
know the size of a dulint. */
-
-typedef struct dulint_struct dulint;
struct dulint_struct{
- ulint high; /* most significant 32 bits */
- ulint low; /* least significant 32 bits */
+ ulint high; /*!< most significant 32 bits */
+ ulint low; /*!< least significant 32 bits */
};
-/* Zero value for a dulint */
-extern dulint ut_dulint_zero;
+/** Zero value for a dulint */
+extern const dulint ut_dulint_zero;
-/* Maximum value for a dulint */
-extern dulint ut_dulint_max;
+/** Maximum value for a dulint */
+extern const dulint ut_dulint_max;
-/***********************************************************
-Creates a 64-bit dulint out of two ulints. */
+/*******************************************************//**
+Creates a 64-bit dulint out of two ulints.
+@return created dulint */
UNIV_INLINE
dulint
ut_dulint_create(
/*=============*/
- /* out: created dulint */
- ulint high, /* in: high-order 32 bits */
- ulint low); /* in: low-order 32 bits */
-/***********************************************************
-Gets the high-order 32 bits of a dulint. */
+ ulint high, /*!< in: high-order 32 bits */
+ ulint low); /*!< in: low-order 32 bits */
+/*******************************************************//**
+Gets the high-order 32 bits of a dulint.
+@return 32 bits in ulint */
UNIV_INLINE
ulint
ut_dulint_get_high(
/*===============*/
- /* out: 32 bits in ulint */
- dulint d); /* in: dulint */
-/***********************************************************
-Gets the low-order 32 bits of a dulint. */
+ dulint d); /*!< in: dulint */
+/*******************************************************//**
+Gets the low-order 32 bits of a dulint.
+@return 32 bits in ulint */
UNIV_INLINE
ulint
ut_dulint_get_low(
/*==============*/
- /* out: 32 bits in ulint */
- dulint d); /* in: dulint */
-/***********************************************************
-Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
-integer type. */
+ dulint d); /*!< in: dulint */
+/*******************************************************//**
+Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit
+integer type.
+@return value in ib_int64_t type */
UNIV_INLINE
-ib_longlong
+ib_int64_t
ut_conv_dulint_to_longlong(
/*=======================*/
- /* out: value in ib_longlong type */
- dulint d); /* in: dulint */
-/***********************************************************
-Tests if a dulint is zero. */
+ dulint d); /*!< in: dulint */
+/*******************************************************//**
+Tests if a dulint is zero.
+@return TRUE if zero */
UNIV_INLINE
ibool
ut_dulint_is_zero(
/*==============*/
- /* out: TRUE if zero */
- dulint a); /* in: dulint */
-/***********************************************************
-Compares two dulints. */
+ dulint a); /*!< in: dulint */
+/*******************************************************//**
+Compares two dulints.
+@return -1 if a < b, 0 if a == b, 1 if a > b */
UNIV_INLINE
int
ut_dulint_cmp(
/*==========*/
- /* out: -1 if a < b, 0 if a == b,
- 1 if a > b */
- dulint a, /* in: dulint */
- dulint b); /* in: dulint */
-/***********************************************************
-Calculates the max of two dulints. */
+ dulint a, /*!< in: dulint */
+ dulint b); /*!< in: dulint */
+/*******************************************************//**
+Calculates the max of two dulints.
+@return max(a, b) */
UNIV_INLINE
dulint
ut_dulint_get_max(
/*==============*/
- /* out: max(a, b) */
- dulint a, /* in: dulint */
- dulint b); /* in: dulint */
-/***********************************************************
-Calculates the min of two dulints. */
+ dulint a, /*!< in: dulint */
+ dulint b); /*!< in: dulint */
+/*******************************************************//**
+Calculates the min of two dulints.
+@return min(a, b) */
UNIV_INLINE
dulint
ut_dulint_get_min(
/*==============*/
- /* out: min(a, b) */
- dulint a, /* in: dulint */
- dulint b); /* in: dulint */
-/***********************************************************
-Adds a ulint to a dulint. */
+ dulint a, /*!< in: dulint */
+ dulint b); /*!< in: dulint */
+/*******************************************************//**
+Adds a ulint to a dulint.
+@return sum a + b */
UNIV_INLINE
dulint
ut_dulint_add(
/*==========*/
- /* out: sum a + b */
- dulint a, /* in: dulint */
- ulint b); /* in: ulint */
-/***********************************************************
-Subtracts a ulint from a dulint. */
+ dulint a, /*!< in: dulint */
+ ulint b); /*!< in: ulint */
+/*******************************************************//**
+Subtracts a ulint from a dulint.
+@return a - b */
UNIV_INLINE
dulint
ut_dulint_subtract(
/*===============*/
- /* out: a - b */
- dulint a, /* in: dulint */
- ulint b); /* in: ulint, b <= a */
-/***********************************************************
+ dulint a, /*!< in: dulint */
+ ulint b); /*!< in: ulint, b <= a */
+/*******************************************************//**
Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G. */
+and smaller that 4G.
+@return a - b */
UNIV_INLINE
ulint
ut_dulint_minus(
/*============*/
- /* out: a - b */
- dulint a, /* in: dulint; NOTE a must be >= b and at most
+ dulint a, /*!< in: dulint; NOTE a must be >= b and at most
2 to power 32 - 1 greater */
- dulint b); /* in: dulint */
-/************************************************************
-Rounds a dulint downward to a multiple of a power of 2. */
+ dulint b); /*!< in: dulint */
+/********************************************************//**
+Rounds a dulint downward to a multiple of a power of 2.
+@return rounded value */
UNIV_INLINE
dulint
ut_dulint_align_down(
/*=================*/
- /* out: rounded value */
- dulint n, /* in: number to be rounded */
- ulint align_no); /* in: align by this number which must be a
+ dulint n, /*!< in: number to be rounded */
+ ulint align_no); /*!< in: align by this number which must be a
power of 2 */
-/************************************************************
-Rounds a dulint upward to a multiple of a power of 2. */
+/********************************************************//**
+Rounds a dulint upward to a multiple of a power of 2.
+@return rounded value */
UNIV_INLINE
dulint
ut_dulint_align_up(
/*===============*/
- /* out: rounded value */
- dulint n, /* in: number to be rounded */
- ulint align_no); /* in: align by this number which must be a
+ dulint n, /*!< in: number to be rounded */
+ ulint align_no); /*!< in: align by this number which must be a
power of 2 */
-/***********************************************************
+/********************************************************//**
+Rounds a dulint downward to a multiple of a power of 2.
+@return rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_down(
+/*=================*/
+ ib_uint64_t n, /*!< in: number to be rounded */
+ ulint align_no); /*!< in: align by this number
+ which must be a power of 2 */
+/********************************************************//**
+Rounds ib_uint64_t upward to a multiple of a power of 2.
+@return rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_up(
+/*===============*/
+ ib_uint64_t n, /*!< in: number to be rounded */
+ ulint align_no); /*!< in: align by this number
+ which must be a power of 2 */
+/*******************************************************//**
Increments a dulint variable by 1. */
#define UT_DULINT_INC(D)\
{\
@@ -159,89 +195,73 @@ Increments a dulint variable by 1. */
(D).low = (D).low + 1;\
}\
}
-/***********************************************************
+/*******************************************************//**
Tests if two dulints are equal. */
#define UT_DULINT_EQ(D1, D2) (((D1).low == (D2).low)\
&& ((D1).high == (D2).high))
-/****************************************************************
+#ifdef notdefined
+/************************************************************//**
Sort function for dulint arrays. */
+UNIV_INTERN
void
-ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high);
-/*===============================================================*/
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the least product of align_no which is >= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align(
-/*==========*/
- /* out: rounded value */
- ulint n, /* in: number to be rounded */
- ulint align_no); /* in: align by this number */
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the biggest product of align_no which is <= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align_down(
-/*===============*/
- /* out: rounded value */
- ulint n, /* in: number to be rounded */
- ulint align_no); /* in: align by this number */
-/*************************************************************
-The following function rounds up a pointer to the nearest aligned address. */
+ut_dulint_sort(
+/*===========*/
+ dulint* arr, /*!< in/out: array to be sorted */
+ dulint* aux_arr,/*!< in/out: auxiliary array (same size as arr) */
+ ulint low, /*!< in: low bound of sort interval, inclusive */
+ ulint high); /*!< in: high bound of sort interval, noninclusive */
+#endif /* notdefined */
+
+/*********************************************************//**
+The following function rounds up a pointer to the nearest aligned address.
+@return aligned pointer */
UNIV_INLINE
void*
ut_align(
/*=====*/
- /* out: aligned pointer */
- void* ptr, /* in: pointer */
- ulint align_no); /* in: align by this number */
-/*************************************************************
+ const void* ptr, /*!< in: pointer */
+ ulint align_no); /*!< in: align by this number */
+/*********************************************************//**
The following function rounds down a pointer to the nearest
-aligned address. */
+aligned address.
+@return aligned pointer */
UNIV_INLINE
void*
ut_align_down(
/*==========*/
- /* out: aligned pointer */
- void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
+ const void* ptr, /*!< in: pointer */
+ ulint align_no) /*!< in: align by this number */
__attribute__((const));
-/*************************************************************
+/*********************************************************//**
The following function computes the offset of a pointer from the nearest
-aligned address. */
+aligned address.
+@return distance from aligned pointer */
UNIV_INLINE
ulint
ut_align_offset(
/*============*/
- /* out: distance from aligned
- pointer */
- const void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
+ const void* ptr, /*!< in: pointer */
+ ulint align_no) /*!< in: align by this number */
__attribute__((const));
-/*********************************************************************
-Gets the nth bit of a ulint. */
+/*****************************************************************//**
+Gets the nth bit of a ulint.
+@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
UNIV_INLINE
ibool
ut_bit_get_nth(
/*===========*/
- /* out: TRUE if nth bit is 1; 0th bit is defined to
- be the least significant */
- ulint a, /* in: ulint */
- ulint n); /* in: nth bit requested */
-/*********************************************************************
-Sets the nth bit of a ulint. */
+ ulint a, /*!< in: ulint */
+ ulint n); /*!< in: nth bit requested */
+/*****************************************************************//**
+Sets the nth bit of a ulint.
+@return the ulint with the bit set as requested */
UNIV_INLINE
ulint
ut_bit_set_nth(
/*===========*/
- /* out: the ulint with the bit set as requested */
- ulint a, /* in: ulint */
- ulint n, /* in: nth bit requested */
- ibool val); /* in: value for the bit to set */
+ ulint a, /*!< in: ulint */
+ ulint n, /*!< in: nth bit requested */
+ ibool val); /*!< in: value for the bit to set */
#ifndef UNIV_NONINL
#include "ut0byte.ic"
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
index 01b6c29d08f..3dd51890cb4 100644
--- a/storage/innobase/include/ut0byte.ic
+++ b/storage/innobase/include/ut0byte.ic
@@ -1,20 +1,37 @@
-/******************************************************************
-Utilities for byte operations
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994, 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0byte.ic
+Utilities for byte operations
Created 5/30/1994 Heikki Tuuri
*******************************************************************/
-/***********************************************************
-Creates a 64-bit dulint out of two ulints. */
+/*******************************************************//**
+Creates a 64-bit dulint out of two ulints.
+@return created dulint */
UNIV_INLINE
dulint
ut_dulint_create(
/*=============*/
- /* out: created dulint */
- ulint high, /* in: high-order 32 bits */
- ulint low) /* in: low-order 32 bits */
+ ulint high, /*!< in: high-order 32 bits */
+ ulint low) /*!< in: low-order 32 bits */
{
dulint res;
@@ -27,52 +44,52 @@ ut_dulint_create(
return(res);
}
-/***********************************************************
-Gets the high-order 32 bits of a dulint. */
+/*******************************************************//**
+Gets the high-order 32 bits of a dulint.
+@return 32 bits in ulint */
UNIV_INLINE
ulint
ut_dulint_get_high(
/*===============*/
- /* out: 32 bits in ulint */
- dulint d) /* in: dulint */
+ dulint d) /*!< in: dulint */
{
return(d.high);
}
-/***********************************************************
-Gets the low-order 32 bits of a dulint. */
+/*******************************************************//**
+Gets the low-order 32 bits of a dulint.
+@return 32 bits in ulint */
UNIV_INLINE
ulint
ut_dulint_get_low(
/*==============*/
- /* out: 32 bits in ulint */
- dulint d) /* in: dulint */
+ dulint d) /*!< in: dulint */
{
return(d.low);
}
-/***********************************************************
-Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
-integer type. */
+/*******************************************************//**
+Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit
+integer type.
+@return value in ib_int64_t type */
UNIV_INLINE
-ib_longlong
+ib_int64_t
ut_conv_dulint_to_longlong(
/*=======================*/
- /* out: value in ib_longlong type */
- dulint d) /* in: dulint */
+ dulint d) /*!< in: dulint */
{
- return((ib_longlong)d.low
- + (((ib_longlong)d.high) << 32));
+ return((ib_int64_t)d.low
+ + (((ib_int64_t)d.high) << 32));
}
-/***********************************************************
-Tests if a dulint is zero. */
+/*******************************************************//**
+Tests if a dulint is zero.
+@return TRUE if zero */
UNIV_INLINE
ibool
ut_dulint_is_zero(
/*==============*/
- /* out: TRUE if zero */
- dulint a) /* in: dulint */
+ dulint a) /*!< in: dulint */
{
if ((a.low == 0) && (a.high == 0)) {
@@ -82,16 +99,15 @@ ut_dulint_is_zero(
return(FALSE);
}
-/***********************************************************
-Compares two dulints. */
+/*******************************************************//**
+Compares two dulints.
+@return -1 if a < b, 0 if a == b, 1 if a > b */
UNIV_INLINE
int
ut_dulint_cmp(
/*==========*/
- /* out: -1 if a < b, 0 if a == b,
- 1 if a > b */
- dulint a, /* in: dulint */
- dulint b) /* in: dulint */
+ dulint a, /*!< in: dulint */
+ dulint b) /*!< in: dulint */
{
if (a.high > b.high) {
return(1);
@@ -106,15 +122,15 @@ ut_dulint_cmp(
}
}
-/***********************************************************
-Calculates the max of two dulints. */
+/*******************************************************//**
+Calculates the max of two dulints.
+@return max(a, b) */
UNIV_INLINE
dulint
ut_dulint_get_max(
/*==============*/
- /* out: max(a, b) */
- dulint a, /* in: dulint */
- dulint b) /* in: dulint */
+ dulint a, /*!< in: dulint */
+ dulint b) /*!< in: dulint */
{
if (ut_dulint_cmp(a, b) > 0) {
@@ -124,15 +140,15 @@ ut_dulint_get_max(
return(b);
}
-/***********************************************************
-Calculates the min of two dulints. */
+/*******************************************************//**
+Calculates the min of two dulints.
+@return min(a, b) */
UNIV_INLINE
dulint
ut_dulint_get_min(
/*==============*/
- /* out: min(a, b) */
- dulint a, /* in: dulint */
- dulint b) /* in: dulint */
+ dulint a, /*!< in: dulint */
+ dulint b) /*!< in: dulint */
{
if (ut_dulint_cmp(a, b) > 0) {
@@ -142,15 +158,15 @@ ut_dulint_get_min(
return(a);
}
-/***********************************************************
-Adds a ulint to a dulint. */
+/*******************************************************//**
+Adds a ulint to a dulint.
+@return sum a + b */
UNIV_INLINE
dulint
ut_dulint_add(
/*==========*/
- /* out: sum a + b */
- dulint a, /* in: dulint */
- ulint b) /* in: ulint */
+ dulint a, /*!< in: dulint */
+ ulint b) /*!< in: ulint */
{
if (0xFFFFFFFFUL - b >= a.low) {
a.low += b;
@@ -165,15 +181,15 @@ ut_dulint_add(
return(a);
}
-/***********************************************************
-Subtracts a ulint from a dulint. */
+/*******************************************************//**
+Subtracts a ulint from a dulint.
+@return a - b */
UNIV_INLINE
dulint
ut_dulint_subtract(
/*===============*/
- /* out: a - b */
- dulint a, /* in: dulint */
- ulint b) /* in: ulint, b <= a */
+ dulint a, /*!< in: dulint */
+ ulint b) /*!< in: ulint, b <= a */
{
if (a.low >= b) {
a.low -= b;
@@ -192,17 +208,17 @@ ut_dulint_subtract(
return(a);
}
-/***********************************************************
+/*******************************************************//**
Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G. */
+and smaller that 4G.
+@return a - b */
UNIV_INLINE
ulint
ut_dulint_minus(
/*============*/
- /* out: a - b */
- dulint a, /* in: dulint; NOTE a must be >= b and at most
+ dulint a, /*!< in: dulint; NOTE a must be >= b and at most
2 to power 32 - 1 greater */
- dulint b) /* in: dulint */
+ dulint b) /*!< in: dulint */
{
ulint diff;
@@ -222,15 +238,15 @@ ut_dulint_minus(
return(diff);
}
-/************************************************************
-Rounds a dulint downward to a multiple of a power of 2. */
+/********************************************************//**
+Rounds a dulint downward to a multiple of a power of 2.
+@return rounded value */
UNIV_INLINE
dulint
ut_dulint_align_down(
/*=================*/
- /* out: rounded value */
- dulint n, /* in: number to be rounded */
- ulint align_no) /* in: align by this number which must be a
+ dulint n, /*!< in: number to be rounded */
+ ulint align_no) /*!< in: align by this number which must be a
power of 2 */
{
ulint low, high;
@@ -246,47 +262,65 @@ ut_dulint_align_down(
return(ut_dulint_create(high, low));
}
-/************************************************************
-Rounds a dulint upward to a multiple of a power of 2. */
+/********************************************************//**
+Rounds a dulint upward to a multiple of a power of 2.
+@return rounded value */
UNIV_INLINE
dulint
ut_dulint_align_up(
/*===============*/
- /* out: rounded value */
- dulint n, /* in: number to be rounded */
- ulint align_no) /* in: align by this number which must be a
+ dulint n, /*!< in: number to be rounded */
+ ulint align_no) /*!< in: align by this number which must be a
power of 2 */
{
return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no));
}
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the least product of align_no which is >= n. align_no
-has to be a power of 2. */
+/********************************************************//**
+Rounds ib_uint64_t downward to a multiple of a power of 2.
+@return rounded value */
UNIV_INLINE
-ulint
-ut_calc_align(
-/*==========*/
- /* out: rounded value */
- ulint n, /* in: number to be rounded */
- ulint align_no) /* in: align by this number */
+ib_uint64_t
+ut_uint64_align_down(
+/*=================*/
+ ib_uint64_t n, /*!< in: number to be rounded */
+ ulint align_no) /*!< in: align by this number
+ which must be a power of 2 */
{
ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
+ ut_ad(ut_is_2pow(align_no));
+
+ return(n & ~((ib_uint64_t) align_no - 1));
+}
+
+/********************************************************//**
+Rounds ib_uint64_t upward to a multiple of a power of 2.
+@return rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_up(
+/*===============*/
+ ib_uint64_t n, /*!< in: number to be rounded */
+ ulint align_no) /*!< in: align by this number
+ which must be a power of 2 */
+{
+ ib_uint64_t align_1 = (ib_uint64_t) align_no - 1;
- return((n + align_no - 1) & ~(align_no - 1));
+ ut_ad(align_no > 0);
+ ut_ad(ut_is_2pow(align_no));
+
+ return((n + align_1) & ~align_1);
}
-/*************************************************************
-The following function rounds up a pointer to the nearest aligned address. */
+/*********************************************************//**
+The following function rounds up a pointer to the nearest aligned address.
+@return aligned pointer */
UNIV_INLINE
void*
ut_align(
/*=====*/
- /* out: aligned pointer */
- void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
+ const void* ptr, /*!< in: pointer */
+ ulint align_no) /*!< in: align by this number */
{
ut_ad(align_no > 0);
ut_ad(((align_no - 1) & align_no) == 0);
@@ -297,34 +331,16 @@ ut_align(
return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1)));
}
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the biggest product of align_no which is <= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align_down(
-/*===============*/
- /* out: rounded value */
- ulint n, /* in: number to be rounded */
- ulint align_no) /* in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
-
- return(n & ~(align_no - 1));
-}
-
-/*************************************************************
+/*********************************************************//**
The following function rounds down a pointer to the nearest
-aligned address. */
+aligned address.
+@return aligned pointer */
UNIV_INLINE
void*
ut_align_down(
/*==========*/
- /* out: aligned pointer */
- void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
+ const void* ptr, /*!< in: pointer */
+ ulint align_no) /*!< in: align by this number */
{
ut_ad(align_no > 0);
ut_ad(((align_no - 1) & align_no) == 0);
@@ -335,17 +351,16 @@ ut_align_down(
return((void*)((((ulint)ptr)) & ~(align_no - 1)));
}
-/*************************************************************
+/*********************************************************//**
The following function computes the offset of a pointer from the nearest
-aligned address. */
+aligned address.
+@return distance from aligned pointer */
UNIV_INLINE
ulint
ut_align_offset(
/*============*/
- /* out: distance from
- aligned pointer */
- const void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
+ const void* ptr, /*!< in: pointer */
+ ulint align_no) /*!< in: align by this number */
{
ut_ad(align_no > 0);
ut_ad(((align_no - 1) & align_no) == 0);
@@ -356,16 +371,15 @@ ut_align_offset(
return(((ulint)ptr) & (align_no - 1));
}
-/*********************************************************************
-Gets the nth bit of a ulint. */
+/*****************************************************************//**
+Gets the nth bit of a ulint.
+@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
UNIV_INLINE
ibool
ut_bit_get_nth(
/*===========*/
- /* out: TRUE if nth bit is 1; 0th bit is defined to
- be the least significant */
- ulint a, /* in: ulint */
- ulint n) /* in: nth bit requested */
+ ulint a, /*!< in: ulint */
+ ulint n) /*!< in: nth bit requested */
{
ut_ad(n < 8 * sizeof(ulint));
#if TRUE != 1
@@ -374,16 +388,16 @@ ut_bit_get_nth(
return(1 & (a >> n));
}
-/*********************************************************************
-Sets the nth bit of a ulint. */
+/*****************************************************************//**
+Sets the nth bit of a ulint.
+@return the ulint with the bit set as requested */
UNIV_INLINE
ulint
ut_bit_set_nth(
/*===========*/
- /* out: the ulint with the bit set as requested */
- ulint a, /* in: ulint */
- ulint n, /* in: nth bit requested */
- ibool val) /* in: value for the bit to set */
+ ulint a, /*!< in: ulint */
+ ulint n, /*!< in: nth bit requested */
+ ibool val) /*!< in: value for the bit to set */
{
ut_ad(n < 8 * sizeof(ulint));
#if TRUE != 1
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index a317f35f4be..78b525c38ab 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -1,7 +1,24 @@
-/*********************************************************************
-Debug utilities for Innobase
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994, 1995 Innobase Oy
+*****************************************************************************/
+
+/*****************************************************************//**
+@file include/ut0dbg.h
+Debug utilities for Innobase
Created 1/30/1994 Heikki Tuuri
**********************************************************************/
@@ -14,32 +31,39 @@ Created 1/30/1994 Heikki Tuuri
#include "os0thread.h"
#if defined(__GNUC__) && (__GNUC__ > 2)
+/** Test if an assertion fails.
+@param EXPR assertion expression
+@return nonzero if EXPR holds, zero if not */
# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
#else
-extern ulint ut_dbg_zero; /* This is used to eliminate
- compiler warnings */
+/** This is used to eliminate compiler warnings */
+extern ulint ut_dbg_zero;
+/** Test if an assertion fails.
+@param EXPR assertion expression
+@return nonzero if EXPR holds, zero if not */
# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
#endif
-/*****************************************************************
+/*************************************************************//**
Report a failed assertion. */
-
+UNIV_INTERN
void
ut_dbg_assertion_failed(
/*====================*/
- const char* expr, /* in: the failed assertion */
- const char* file, /* in: source file containing the assertion */
- ulint line); /* in: line number of the assertion */
+ const char* expr, /*!< in: the failed assertion */
+ const char* file, /*!< in: source file containing the assertion */
+ ulint line); /*!< in: line number of the assertion */
#ifdef __NETWARE__
-/* Flag for ignoring further assertion failures.
-On NetWare, have a graceful exit rather than a segfault to avoid abends. */
+/** Flag for ignoring further assertion failures. This is set to TRUE
+when on NetWare there happens an InnoDB assertion failure or other
+fatal error condition that requires an immediate shutdown. */
extern ibool panic_shutdown;
/* Abort the execution. */
void ut_dbg_panic(void);
# define UT_DBG_PANIC ut_dbg_panic()
/* Stop threads in ut_a(). */
-# define UT_DBG_STOP while (0) /* We do not do this on NetWare */
+# define UT_DBG_STOP do {} while (0) /* We do not do this on NetWare */
#else /* __NETWARE__ */
# if defined(__WIN__) || defined(__INTEL_COMPILER)
# undef UT_DBG_USE_ABORT
@@ -48,18 +72,18 @@ void ut_dbg_panic(void);
# endif
# ifndef UT_DBG_USE_ABORT
-/* A null pointer that will be dereferenced to trigger a memory trap */
+/** A null pointer that will be dereferenced to trigger a memory trap */
extern ulint* ut_dbg_null_ptr;
# endif
# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/* Flag for indicating that all threads should stop. This will be set
-by ut_dbg_assertion_failed(). */
+/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
+will stop at the next ut_a() or ut_ad(). */
extern ibool ut_dbg_stop_threads;
-/*****************************************************************
+/*************************************************************//**
Stop a thread after assertion failure. */
-
+UNIV_INTERN
void
ut_dbg_stop_thread(
/*===============*/
@@ -68,15 +92,15 @@ ut_dbg_stop_thread(
# endif
# ifdef UT_DBG_USE_ABORT
-/* Abort the execution. */
+/** Abort the execution. */
# define UT_DBG_PANIC abort()
-/* Stop threads (null operation) */
-# define UT_DBG_STOP while (0)
+/** Stop threads (null operation) */
+# define UT_DBG_STOP do {} while (0)
# else /* UT_DBG_USE_ABORT */
-/* Abort the execution. */
+/** Abort the execution. */
# define UT_DBG_PANIC \
if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL
-/* Stop threads in ut_a(). */
+/** Stop threads in ut_a(). */
# define UT_DBG_STOP do \
if (UNIV_UNLIKELY(ut_dbg_stop_threads)) { \
ut_dbg_stop_thread(__FILE__, (ulint) __LINE__); \
@@ -84,7 +108,8 @@ ut_dbg_stop_thread(
# endif /* UT_DBG_USE_ABORT */
#endif /* __NETWARE__ */
-/* Abort execution if EXPR does not evaluate to nonzero. */
+/** Abort execution if EXPR does not evaluate to nonzero.
+@param EXPR assertion expression that should hold */
#define ut_a(EXPR) do { \
if (UT_DBG_FAIL(EXPR)) { \
ut_dbg_assertion_failed(#EXPR, \
@@ -94,20 +119,57 @@ ut_dbg_stop_thread(
UT_DBG_STOP; \
} while (0)
-/* Abort execution. */
+/** Abort execution. */
#define ut_error do { \
ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__); \
UT_DBG_PANIC; \
} while (0)
#ifdef UNIV_DEBUG
+/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
#define ut_ad(EXPR) ut_a(EXPR)
+/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
#define ut_d(EXPR) do {EXPR;} while (0)
#else
+/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
#define ut_ad(EXPR)
+/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
#define ut_d(EXPR)
#endif
+/** Silence warnings about an unused variable by doing a null assignment.
+@param A the unused variable */
#define UT_NOT_USED(A) A = A
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/** structure used for recording usage statistics */
+typedef struct speedo_struct {
+ struct rusage ru; /*!< getrusage() result */
+ struct timeval tv; /*!< gettimeofday() result */
+} speedo_t;
+
+/*******************************************************************//**
+Resets a speedo (records the current time in it). */
+UNIV_INTERN
+void
+speedo_reset(
+/*=========*/
+ speedo_t* speedo); /*!< out: speedo */
+
+/*******************************************************************//**
+Shows the time elapsed and usage statistics since the last reset of a
+speedo. */
+UNIV_INTERN
+void
+speedo_show(
+/*========*/
+ const speedo_t* speedo); /*!< in: speedo */
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
+
#endif
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
index c35cf202600..ec67f4e2a0f 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innobase/include/ut0list.h
@@ -1,4 +1,29 @@
-/***********************************************************************
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0list.h
+A double-linked list
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/*******************************************************************//**
A double-linked list. This differs from the one in ut0lst.h in that in this
one, each list node contains a pointer to the data, whereas the one in
ut0lst.h uses a strategy where the list pointers are embedded in the data
@@ -18,7 +43,6 @@ automatically freeing the list node when the item's heap is freed.
************************************************************************/
-
#ifndef IB_LIST_H
#define IB_LIST_H
@@ -28,117 +52,117 @@ typedef struct ib_list_struct ib_list_t;
typedef struct ib_list_node_struct ib_list_node_t;
typedef struct ib_list_helper_struct ib_list_helper_t;
-/********************************************************************
+/****************************************************************//**
Create a new list using mem_alloc. Lists created with this function must be
-freed with ib_list_free. */
-
+freed with ib_list_free.
+@return list */
+UNIV_INTERN
ib_list_t*
ib_list_create(void);
/*=================*/
- /* out: list */
-/********************************************************************
+/****************************************************************//**
Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function. */
-
+lists created with this function.
+@return list */
+UNIV_INTERN
ib_list_t*
ib_list_create_heap(
/*================*/
- /* out: list */
- mem_heap_t* heap); /* in: memory heap to use */
+ mem_heap_t* heap); /*!< in: memory heap to use */
-/********************************************************************
+/****************************************************************//**
Free a list. */
-
+UNIV_INTERN
void
ib_list_free(
/*=========*/
- ib_list_t* list); /* in: list */
-
-/********************************************************************
-Add the data to the start of the list. */
+ ib_list_t* list); /*!< in: list */
+/****************************************************************//**
+Add the data to the start of the list.
+@return new list node */
+UNIV_INTERN
ib_list_node_t*
ib_list_add_first(
/*==============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- void* data, /* in: data */
- mem_heap_t* heap); /* in: memory heap to use */
-
-/********************************************************************
-Add the data to the end of the list. */
-
+ ib_list_t* list, /*!< in: list */
+ void* data, /*!< in: data */
+ mem_heap_t* heap); /*!< in: memory heap to use */
+
+/****************************************************************//**
+Add the data to the end of the list.
+@return new list node */
+UNIV_INTERN
ib_list_node_t*
ib_list_add_last(
/*=============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- void* data, /* in: data */
- mem_heap_t* heap); /* in: memory heap to use */
-
-/********************************************************************
-Add the data after the indicated node. */
-
+ ib_list_t* list, /*!< in: list */
+ void* data, /*!< in: data */
+ mem_heap_t* heap); /*!< in: memory heap to use */
+
+/****************************************************************//**
+Add the data after the indicated node.
+@return new list node */
+UNIV_INTERN
ib_list_node_t*
ib_list_add_after(
/*==============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- ib_list_node_t* prev_node, /* in: node preceding new node (can
+ ib_list_t* list, /*!< in: list */
+ ib_list_node_t* prev_node, /*!< in: node preceding new node (can
be NULL) */
- void* data, /* in: data */
- mem_heap_t* heap); /* in: memory heap to use */
+ void* data, /*!< in: data */
+ mem_heap_t* heap); /*!< in: memory heap to use */
-/********************************************************************
+/****************************************************************//**
Remove the node from the list. */
-
+UNIV_INTERN
void
ib_list_remove(
/*===========*/
- ib_list_t* list, /* in: list */
- ib_list_node_t* node); /* in: node to remove */
+ ib_list_t* list, /*!< in: list */
+ ib_list_node_t* node); /*!< in: node to remove */
-/********************************************************************
-Get the first node in the list. */
+/****************************************************************//**
+Get the first node in the list.
+@return first node, or NULL */
UNIV_INLINE
ib_list_node_t*
ib_list_get_first(
/*==============*/
- /* out: first node, or NULL */
- ib_list_t* list); /* in: list */
+ ib_list_t* list); /*!< in: list */
-/********************************************************************
-Get the last node in the list. */
+/****************************************************************//**
+Get the last node in the list.
+@return last node, or NULL */
UNIV_INLINE
ib_list_node_t*
ib_list_get_last(
/*=============*/
- /* out: last node, or NULL */
- ib_list_t* list); /* in: list */
+ ib_list_t* list); /*!< in: list */
/* List. */
struct ib_list_struct {
- ib_list_node_t* first; /* first node */
- ib_list_node_t* last; /* last node */
- ibool is_heap_list; /* TRUE if this list was
+ ib_list_node_t* first; /*!< first node */
+ ib_list_node_t* last; /*!< last node */
+ ibool is_heap_list; /*!< TRUE if this list was
allocated through a heap */
};
/* A list node. */
struct ib_list_node_struct {
- ib_list_node_t* prev; /* previous node */
- ib_list_node_t* next; /* next node */
- void* data; /* user data */
+ ib_list_node_t* prev; /*!< previous node */
+ ib_list_node_t* next; /*!< next node */
+ void* data; /*!< user data */
};
/* Quite often, the only additional piece of data you need is the per-item
memory heap, so we have this generic struct available to use in those
cases. */
struct ib_list_helper_struct {
- mem_heap_t* heap; /* memory heap */
- void* data; /* user data */
+ mem_heap_t* heap; /*!< memory heap */
+ void* data; /*!< user data */
};
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
index c2d3e4557f0..eb5c62796e8 100644
--- a/storage/innobase/include/ut0list.ic
+++ b/storage/innobase/include/ut0list.ic
@@ -1,23 +1,48 @@
-/********************************************************************
-Get the first node in the list. */
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0list.ic
+A double-linked list
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/****************************************************************//**
+Get the first node in the list.
+@return first node, or NULL */
UNIV_INLINE
ib_list_node_t*
ib_list_get_first(
/*==============*/
- /* out: first node, or NULL */
- ib_list_t* list) /* in: list */
+ ib_list_t* list) /*!< in: list */
{
return(list->first);
}
-/********************************************************************
-Get the last node in the list. */
+/****************************************************************//**
+Get the last node in the list.
+@return last node, or NULL */
UNIV_INLINE
ib_list_node_t*
ib_list_get_last(
/*=============*/
- /* out: last node, or NULL */
- ib_list_t* list) /* in: list */
+ ib_list_t* list) /*!< in: list */
{
return(list->last);
}
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index ebe2803fe23..261d33963dc 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -1,7 +1,24 @@
-/**********************************************************************
-List utilities
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0lst.h
+List utilities
Created 9/10/1995 Heikki Tuuri
***********************************************************************/
@@ -16,45 +33,46 @@ if a list is used in the database. Note that a single struct may belong
to two or more lists, provided that the list are given different names.
An example of the usage of the lists can be found in fil0fil.c. */
-/***********************************************************************
+/*******************************************************************//**
This macro expands to the unnamed type definition of a struct which acts
as the two-way list base node. The base node contains pointers
to both ends of the list and a count of nodes in the list (excluding
-the base node from the count). TYPE should be the list node type name. */
-
+the base node from the count).
+@param TYPE the name of the list node data type */
#define UT_LIST_BASE_NODE_T(TYPE)\
struct {\
- ulint count; /* count of nodes in list */\
- TYPE * start; /* pointer to list start, NULL if empty */\
- TYPE * end; /* pointer to list end, NULL if empty */\
+ ulint count; /*!< count of nodes in list */\
+ TYPE * start; /*!< pointer to list start, NULL if empty */\
+ TYPE * end; /*!< pointer to list end, NULL if empty */\
}\
-/***********************************************************************
+/*******************************************************************//**
This macro expands to the unnamed type definition of a struct which
should be embedded in the nodes of the list, the node type must be a struct.
This struct contains the pointers to next and previous nodes in the list.
The name of the field in the node struct should be the name given
-to the list. TYPE should be the list node type name. Example of usage:
-
+to the list.
+@param TYPE the list node type name */
+/* Example:
typedef struct LRU_node_struct LRU_node_t;
struct LRU_node_struct {
UT_LIST_NODE_T(LRU_node_t) LRU_list;
...
}
The example implements an LRU list of name LRU_list. Its nodes are of type
-LRU_node_t.
-*/
+LRU_node_t. */
#define UT_LIST_NODE_T(TYPE)\
struct {\
- TYPE * prev; /* pointer to the previous node,\
+ TYPE * prev; /*!< pointer to the previous node,\
NULL if start of list */\
- TYPE * next; /* pointer to next node, NULL if end of list */\
+ TYPE * next; /*!< pointer to next node, NULL if end of list */\
}\
-/***********************************************************************
-Initializes the base node of a two-way list. */
-
+/*******************************************************************//**
+Initializes the base node of a two-way list.
+@param BASE the list base node
+*/
#define UT_LIST_INIT(BASE)\
{\
(BASE).count = 0;\
@@ -62,32 +80,34 @@ Initializes the base node of a two-way list. */
(BASE).end = NULL;\
}\
-/***********************************************************************
+/*******************************************************************//**
Adds the node as the first element in a two-way linked list.
-BASE has to be the base node (not a pointer to it). N has to be
-the pointer to the node to be added to the list. NAME is the list name. */
-
+@param NAME list name
+@param BASE the base node (not a pointer to it)
+@param N pointer to the node to be added to the list.
+*/
#define UT_LIST_ADD_FIRST(NAME, BASE, N)\
{\
ut_ad(N);\
((BASE).count)++;\
((N)->NAME).next = (BASE).start;\
((N)->NAME).prev = NULL;\
- if ((BASE).start != NULL) {\
+ if (UNIV_LIKELY((BASE).start != NULL)) {\
ut_ad((BASE).start != (N));\
(((BASE).start)->NAME).prev = (N);\
}\
(BASE).start = (N);\
- if ((BASE).end == NULL) {\
+ if (UNIV_UNLIKELY((BASE).end == NULL)) {\
(BASE).end = (N);\
}\
}\
-/***********************************************************************
+/*******************************************************************//**
Adds the node as the last element in a two-way linked list.
-BASE has to be the base node (not a pointer to it). N has to be
-the pointer to the node to be added to the list. NAME is the list name. */
-
+@param NAME list name
+@param BASE the base node (not a pointer to it)
+@param N pointer to the node to be added to the list
+*/
#define UT_LIST_ADD_LAST(NAME, BASE, N)\
{\
ut_ad(N);\
@@ -104,11 +124,13 @@ the pointer to the node to be added to the list. NAME is the list name. */
}\
}\
-/***********************************************************************
+/*******************************************************************//**
Inserts a NODE2 after NODE1 in a list.
-BASE has to be the base node (not a pointer to it). NAME is the list
-name, NODE1 and NODE2 are pointers to nodes. */
-
+@param NAME list name
+@param BASE the base node (not a pointer to it)
+@param NODE1 pointer to node after which NODE2 is inserted
+@param NODE2 pointer to node being inserted after NODE1
+*/
#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\
{\
ut_ad(NODE1);\
@@ -126,19 +148,25 @@ name, NODE1 and NODE2 are pointers to nodes. */
}\
}\
-/* Invalidate the pointers in a list node. */
#ifdef UNIV_LIST_DEBUG
+/** Invalidate the pointers in a list node.
+@param NAME list name
+@param N pointer to the node that was removed */
# define UT_LIST_REMOVE_CLEAR(NAME, N) \
((N)->NAME.prev = (N)->NAME.next = (void*) -1)
#else
+/** Invalidate the pointers in a list node.
+@param NAME list name
+@param N pointer to the node that was removed */
# define UT_LIST_REMOVE_CLEAR(NAME, N) while (0)
#endif
-/***********************************************************************
-Removes a node from a two-way linked list. BASE has to be the base node
-(not a pointer to it). N has to be the pointer to the node to be removed
-from the list. NAME is the list name. */
-
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param NAME list name
+@param BASE the base node (not a pointer to it)
+@param N pointer to the node to be removed from the list
+*/
#define UT_LIST_REMOVE(NAME, BASE, N) \
do { \
ut_ad(N); \
@@ -157,71 +185,77 @@ do { \
UT_LIST_REMOVE_CLEAR(NAME, N); \
} while (0)
-/************************************************************************
-Gets the next node in a two-way list. NAME is the name of the list
-and N is pointer to a node. */
-
+/********************************************************************//**
+Gets the next node in a two-way list.
+@param NAME list name
+@param N pointer to a node
+@return the successor of N in NAME, or NULL */
#define UT_LIST_GET_NEXT(NAME, N)\
(((N)->NAME).next)
-/************************************************************************
-Gets the previous node in a two-way list. NAME is the name of the list
-and N is pointer to a node. */
-
+/********************************************************************//**
+Gets the previous node in a two-way list.
+@param NAME list name
+@param N pointer to a node
+@return the predecessor of N in NAME, or NULL */
#define UT_LIST_GET_PREV(NAME, N)\
(((N)->NAME).prev)
-/************************************************************************
+/********************************************************************//**
Alternative macro to get the number of nodes in a two-way list, i.e.,
-its length. BASE is the base node (not a pointer to it). */
-
+its length.
+@param BASE the base node (not a pointer to it).
+@return the number of nodes in the list */
#define UT_LIST_GET_LEN(BASE)\
(BASE).count
-/************************************************************************
-Gets the first node in a two-way list, or returns NULL,
-if the list is empty. BASE is the base node (not a pointer to it). */
-
+/********************************************************************//**
+Gets the first node in a two-way list.
+@param BASE the base node (not a pointer to it)
+@return first node, or NULL if the list is empty */
#define UT_LIST_GET_FIRST(BASE)\
(BASE).start
-/************************************************************************
-Gets the last node in a two-way list, or returns NULL,
-if the list is empty. BASE is the base node (not a pointer to it). */
-
+/********************************************************************//**
+Gets the last node in a two-way list.
+@param BASE the base node (not a pointer to it)
+@return last node, or NULL if the list is empty */
#define UT_LIST_GET_LAST(BASE)\
(BASE).end
-/************************************************************************
-Checks the consistency of a two-way list. NAME is the name of the list,
-TYPE is the node type, and BASE is the base node (not a pointer to it). */
-
-#define UT_LIST_VALIDATE(NAME, TYPE, BASE)\
-{\
- ulint ut_list_i_313;\
- TYPE * ut_list_node_313;\
-\
- ut_list_node_313 = (BASE).start;\
-\
- for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
- ut_list_i_313++) {\
- ut_a(ut_list_node_313);\
- ut_list_node_313 = (ut_list_node_313->NAME).next;\
- }\
-\
- ut_a(ut_list_node_313 == NULL);\
-\
- ut_list_node_313 = (BASE).end;\
-\
- for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
- ut_list_i_313++) {\
- ut_a(ut_list_node_313);\
- ut_list_node_313 = (ut_list_node_313->NAME).prev;\
- }\
-\
- ut_a(ut_list_node_313 == NULL);\
-}\
-
+/********************************************************************//**
+Checks the consistency of a two-way list.
+@param NAME the name of the list
+@param TYPE node type
+@param BASE base node (not a pointer to it)
+@param ASSERTION a condition on ut_list_node_313 */
+#define UT_LIST_VALIDATE(NAME, TYPE, BASE, ASSERTION) \
+do { \
+ ulint ut_list_i_313; \
+ TYPE* ut_list_node_313; \
+ \
+ ut_list_node_313 = (BASE).start; \
+ \
+ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
+ ut_a(ut_list_node_313); \
+ ASSERTION; \
+ ut_ad((ut_list_node_313->NAME).next || !ut_list_i_313); \
+ ut_list_node_313 = (ut_list_node_313->NAME).next; \
+ } \
+ \
+ ut_a(ut_list_node_313 == NULL); \
+ \
+ ut_list_node_313 = (BASE).end; \
+ \
+ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
+ ut_a(ut_list_node_313); \
+ ASSERTION; \
+ ut_ad((ut_list_node_313->NAME).prev || !ut_list_i_313); \
+ ut_list_node_313 = (ut_list_node_313->NAME).prev; \
+ } \
+ \
+ ut_a(ut_list_node_313 == NULL); \
+} while (0)
#endif
diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h
index e56895bc142..cf41cba4643 100644
--- a/storage/innobase/include/ut0mem.h
+++ b/storage/innobase/include/ut0mem.h
@@ -1,7 +1,24 @@
-/***********************************************************************
-Memory primitives
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994, 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0mem.h
+Memory primitives
Created 5/30/1994 Heikki Tuuri
************************************************************************/
@@ -11,65 +28,99 @@ Created 5/30/1994 Heikki Tuuri
#include "univ.i"
#include <string.h>
-#include <stdlib.h>
-
-/* The total amount of memory currently allocated from the OS with malloc */
-extern ulint ut_total_allocated_memory;
-
+#ifndef UNIV_HOTBACKUP
+# include "os0sync.h"
+
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large() or malloc(). Does not count malloc()
+if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
+extern ulint ut_total_allocated_memory;
+
+/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
+extern os_fast_mutex_t ut_list_mutex;
+#endif /* !UNIV_HOTBACKUP */
+
+/** Wrapper for memcpy(3). Copy memory area when the source and
+target are not overlapping.
+* @param dest in: copy to
+* @param sour in: copy from
+* @param n in: number of bytes to copy
+* @return dest */
UNIV_INLINE
void*
ut_memcpy(void* dest, const void* sour, ulint n);
+/** Wrapper for memmove(3). Copy memory area when the source and
+target are overlapping.
+* @param dest in: copy to
+* @param sour in: copy from
+* @param n in: number of bytes to copy
+* @return dest */
UNIV_INLINE
void*
ut_memmove(void* dest, const void* sour, ulint n);
+/** Wrapper for memcmp(3). Compare memory areas.
+* @param str1 in: first memory block to compare
+* @param str2 in: second memory block to compare
+* @param n in: number of bytes to compare
+* @return negative, 0, or positive if str1 is smaller, equal,
+ or greater than str2, respectively. */
UNIV_INLINE
int
ut_memcmp(const void* str1, const void* str2, ulint n);
+/**********************************************************************//**
+Initializes the mem block list at database startup. */
+UNIV_INTERN
+void
+ut_mem_init(void);
+/*=============*/
-/**************************************************************************
+/**********************************************************************//**
Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE. */
-
+defined and set_to_zero is TRUE.
+@return own: allocated memory */
+UNIV_INTERN
void*
ut_malloc_low(
/*==========*/
- /* out, own: allocated memory */
- ulint n, /* in: number of bytes to allocate */
- ibool set_to_zero, /* in: TRUE if allocated memory
+ ulint n, /*!< in: number of bytes to allocate */
+ ibool set_to_zero, /*!< in: TRUE if allocated memory
should be set to zero if
UNIV_SET_MEM_TO_ZERO is defined */
- ibool assert_on_error); /* in: if TRUE, we crash mysqld if
+ ibool assert_on_error); /*!< in: if TRUE, we crash mysqld if
the memory cannot be allocated */
-/**************************************************************************
+/**********************************************************************//**
Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined. */
-
+defined.
+@return own: allocated memory */
+UNIV_INTERN
void*
ut_malloc(
/*======*/
- /* out, own: allocated memory */
- ulint n); /* in: number of bytes to allocate */
-/**************************************************************************
+ ulint n); /*!< in: number of bytes to allocate */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails. */
-
+stderr a message if fails.
+@return TRUE if succeeded */
+UNIV_INTERN
ibool
ut_test_malloc(
/*===========*/
- /* out: TRUE if succeeded */
- ulint n); /* in: try to allocate this many bytes */
-/**************************************************************************
+ ulint n); /*!< in: try to allocate this many bytes */
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
Frees a memory block allocated with ut_malloc. */
-
+UNIV_INTERN
void
ut_free(
/*====*/
- void* ptr); /* in, own: memory block */
-/**************************************************************************
+ void* ptr); /*!< in, own: memory block */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
use this function because the allocation functions in mem0mem.h are the
recommended ones in InnoDB.
@@ -92,118 +143,161 @@ RETURN VALUE
size was equal to 0, either NULL or a pointer suitable to
be passed to free() is returned. If realloc() fails the
original block is left untouched - it is not freed or
- moved. */
-
+ moved.
+@return own: pointer to new mem block or NULL */
+UNIV_INTERN
void*
ut_realloc(
/*=======*/
- /* out, own: pointer to new mem block or NULL */
- void* ptr, /* in: pointer to old block or NULL */
- ulint size); /* in: desired size */
-/**************************************************************************
+ void* ptr, /*!< in: pointer to old block or NULL */
+ ulint size); /*!< in: desired size */
+/**********************************************************************//**
Frees in shutdown all allocated memory not freed yet. */
-
+UNIV_INTERN
void
ut_free_all_mem(void);
/*=================*/
+#endif /* !UNIV_HOTBACKUP */
+/** Wrapper for strcpy(3). Copy a NUL-terminated string.
+* @param dest in: copy to
+* @param sour in: copy from
+* @return dest */
UNIV_INLINE
char*
ut_strcpy(char* dest, const char* sour);
+/** Wrapper for strlen(3). Determine the length of a NUL-terminated string.
+* @param str in: string
+* @return length of the string in bytes, excluding the terminating NUL */
UNIV_INLINE
ulint
ut_strlen(const char* str);
+/** Wrapper for strcmp(3). Compare NUL-terminated strings.
+* @param str1 in: first string to compare
+* @param str2 in: second string to compare
+* @return negative, 0, or positive if str1 is smaller, equal,
+ or greater than str2, respectively. */
UNIV_INLINE
int
-ut_strcmp(const void* str1, const void* str2);
+ut_strcmp(const char* str1, const char* str2);
-/**************************************************************************
+/**********************************************************************//**
Copies up to size - 1 characters from the NUL-terminated string src to
dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size. */
-
+occurred if the return value >= size.
+@return strlen(src) */
+UNIV_INTERN
ulint
ut_strlcpy(
/*=======*/
- /* out: strlen(src) */
- char* dst, /* in: destination buffer */
- const char* src, /* in: source buffer */
- ulint size); /* in: size of destination buffer */
+ char* dst, /*!< in: destination buffer */
+ const char* src, /*!< in: source buffer */
+ ulint size); /*!< in: size of destination buffer */
-/**************************************************************************
+/**********************************************************************//**
Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first. */
-
+(size - 1) bytes of src, not the first.
+@return strlen(src) */
+UNIV_INTERN
ulint
ut_strlcpy_rev(
/*===========*/
- /* out: strlen(src) */
- char* dst, /* in: destination buffer */
- const char* src, /* in: source buffer */
- ulint size); /* in: size of destination buffer */
+ char* dst, /*!< in: destination buffer */
+ const char* src, /*!< in: source buffer */
+ ulint size); /*!< in: size of destination buffer */
-/**************************************************************************
-Compute strlen(ut_strcpyq(str, q)). */
+/**********************************************************************//**
+Compute strlen(ut_strcpyq(str, q)).
+@return length of the string when quoted */
UNIV_INLINE
ulint
ut_strlenq(
/*=======*/
- /* out: length of the string when quoted */
- const char* str, /* in: null-terminated string */
- char q); /* in: the quote character */
+ const char* str, /*!< in: null-terminated string */
+ char q); /*!< in: the quote character */
-/**************************************************************************
+/**********************************************************************//**
Make a quoted copy of a NUL-terminated string. Leading and trailing
quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq(). */
-
+See also ut_strlenq() and ut_memcpyq().
+@return pointer to end of dest */
+UNIV_INTERN
char*
ut_strcpyq(
/*=======*/
- /* out: pointer to end of dest */
- char* dest, /* in: output buffer */
- char q, /* in: the quote character */
- const char* src); /* in: null-terminated string */
+ char* dest, /*!< in: output buffer */
+ char q, /*!< in: the quote character */
+ const char* src); /*!< in: null-terminated string */
-/**************************************************************************
+/**********************************************************************//**
Make a quoted copy of a fixed-length string. Leading and trailing
quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq(). */
-
+See also ut_strlenq() and ut_strcpyq().
+@return pointer to end of dest */
+UNIV_INTERN
char*
ut_memcpyq(
/*=======*/
- /* out: pointer to end of dest */
- char* dest, /* in: output buffer */
- char q, /* in: the quote character */
- const char* src, /* in: string to be quoted */
- ulint len); /* in: length of src */
+ char* dest, /*!< in: output buffer */
+ char q, /*!< in: the quote character */
+ const char* src, /*!< in: string to be quoted */
+ ulint len); /*!< in: length of src */
-/**************************************************************************
+/**********************************************************************//**
Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once. */
-
+are only counted once.
+@return the number of times s2 occurs in s1 */
+UNIV_INTERN
ulint
ut_strcount(
/*========*/
- /* out: the number of times s2 occurs in s1 */
- const char* s1, /* in: string to search in */
- const char* s2); /* in: string to search for */
+ const char* s1, /*!< in: string to search in */
+ const char* s2); /*!< in: string to search for */
-/**************************************************************************
+/**********************************************************************//**
Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once. */
-
-char *
+are only replaced once.
+@return own: modified string, must be freed with mem_free() */
+UNIV_INTERN
+char*
ut_strreplace(
/*==========*/
- /* out, own: modified string, must be
- freed with mem_free() */
- const char* str, /* in: string to operate on */
- const char* s1, /* in: string to replace */
- const char* s2); /* in: string to replace s1 with */
+ const char* str, /*!< in: string to operate on */
+ const char* s1, /*!< in: string to replace */
+ const char* s2); /*!< in: string to replace s1 with */
+
+/**********************************************************************//**
+Converts a raw binary data to a NUL-terminated hex string. The output is
+truncated if there is not enough space in "hex", make sure "hex_size" is at
+least (2 * raw_size + 1) if you do not want this to happen. Returns the
+actual number of characters written to "hex" (including the NUL).
+@return number of chars written */
+UNIV_INLINE
+ulint
+ut_raw_to_hex(
+/*==========*/
+ const void* raw, /*!< in: raw data */
+ ulint raw_size, /*!< in: "raw" length in bytes */
+ char* hex, /*!< out: hex string */
+ ulint hex_size); /*!< in: "hex" size in bytes */
+
+/*******************************************************************//**
+Adds single quotes to the start and end of string and escapes any quotes
+by doubling them. Returns the number of bytes that were written to "buf"
+(including the terminating NUL). If buf_size is too small then the
+trailing bytes from "str" are discarded.
+@return number of bytes that were written */
+UNIV_INLINE
+ulint
+ut_str_sql_format(
+/*==============*/
+ const char* str, /*!< in: string */
+ ulint str_len, /*!< in: string length in bytes */
+ char* buf, /*!< out: output buffer */
+ ulint buf_size); /*!< in: output buffer size
+ in bytes */
#ifndef UNIV_NONINL
#include "ut0mem.ic"
diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic
index e0253ebf618..f36c28f1989 100644
--- a/storage/innobase/include/ut0mem.ic
+++ b/storage/innobase/include/ut0mem.ic
@@ -1,11 +1,37 @@
-/***********************************************************************
-Memory primitives
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994, 1995 Innobase Oy
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0mem.ic
+Memory primitives
Created 5/30/1994 Heikki Tuuri
************************************************************************/
+#include "ut0byte.h"
+#include "mach0data.h"
+
+/** Wrapper for memcpy(3). Copy memory area when the source and
+target are not overlapping.
+* @param dest in: copy to
+* @param sour in: copy from
+* @param n in: number of bytes to copy
+* @return dest */
UNIV_INLINE
void*
ut_memcpy(void* dest, const void* sour, ulint n)
@@ -13,6 +39,12 @@ ut_memcpy(void* dest, const void* sour, ulint n)
return(memcpy(dest, sour, n));
}
+/** Wrapper for memmove(3). Copy memory area when the source and
+target are overlapping.
+* @param dest in: copy to
+* @param sour in: copy from
+* @param n in: number of bytes to copy
+* @return dest */
UNIV_INLINE
void*
ut_memmove(void* dest, const void* sour, ulint n)
@@ -20,6 +52,12 @@ ut_memmove(void* dest, const void* sour, ulint n)
return(memmove(dest, sour, n));
}
+/** Wrapper for memcmp(3). Compare memory areas.
+* @param str1 in: first memory block to compare
+* @param str2 in: second memory block to compare
+* @param n in: number of bytes to compare
+* @return negative, 0, or positive if str1 is smaller, equal,
+ or greater than str2, respectively. */
UNIV_INLINE
int
ut_memcmp(const void* str1, const void* str2, ulint n)
@@ -27,6 +65,10 @@ ut_memcmp(const void* str1, const void* str2, ulint n)
return(memcmp(str1, str2, n));
}
+/** Wrapper for strcpy(3). Copy a NUL-terminated string.
+* @param dest in: copy to
+* @param sour in: copy from
+* @return dest */
UNIV_INLINE
char*
ut_strcpy(char* dest, const char* sour)
@@ -34,6 +76,9 @@ ut_strcpy(char* dest, const char* sour)
return(strcpy(dest, sour));
}
+/** Wrapper for strlen(3). Determine the length of a NUL-terminated string.
+* @param str in: string
+* @return length of the string in bytes, excluding the terminating NUL */
UNIV_INLINE
ulint
ut_strlen(const char* str)
@@ -41,22 +86,27 @@ ut_strlen(const char* str)
return(strlen(str));
}
+/** Wrapper for strcmp(3). Compare NUL-terminated strings.
+* @param str1 in: first string to compare
+* @param str2 in: second string to compare
+* @return negative, 0, or positive if str1 is smaller, equal,
+ or greater than str2, respectively. */
UNIV_INLINE
int
-ut_strcmp(const void* str1, const void* str2)
+ut_strcmp(const char* str1, const char* str2)
{
- return(strcmp((const char*)str1, (const char*)str2));
+ return(strcmp(str1, str2));
}
-/**************************************************************************
-Compute strlen(ut_strcpyq(str, q)). */
+/**********************************************************************//**
+Compute strlen(ut_strcpyq(str, q)).
+@return length of the string when quoted */
UNIV_INLINE
ulint
ut_strlenq(
/*=======*/
- /* out: length of the string when quoted */
- const char* str, /* in: null-terminated string */
- char q) /* in: the quote character */
+ const char* str, /*!< in: null-terminated string */
+ char q) /*!< in: the quote character */
{
ulint len;
@@ -68,3 +118,221 @@ ut_strlenq(
return(len);
}
+
+/**********************************************************************//**
+Converts a raw binary data to a NUL-terminated hex string. The output is
+truncated if there is not enough space in "hex", make sure "hex_size" is at
+least (2 * raw_size + 1) if you do not want this to happen. Returns the
+actual number of characters written to "hex" (including the NUL).
+@return number of chars written */
+UNIV_INLINE
+ulint
+ut_raw_to_hex(
+/*==========*/
+ const void* raw, /*!< in: raw data */
+ ulint raw_size, /*!< in: "raw" length in bytes */
+ char* hex, /*!< out: hex string */
+ ulint hex_size) /*!< in: "hex" size in bytes */
+{
+
+#ifdef WORDS_BIGENDIAN
+
+#define MK_UINT16(a, b) (((uint16) (a)) << 8 | (uint16) (b))
+
+#define UINT16_GET_A(u) ((unsigned char) ((u) >> 8))
+#define UINT16_GET_B(u) ((unsigned char) ((u) & 0xFF))
+
+#else /* WORDS_BIGENDIAN */
+
+#define MK_UINT16(a, b) (((uint16) (b)) << 8 | (uint16) (a))
+
+#define UINT16_GET_A(u) ((unsigned char) ((u) & 0xFF))
+#define UINT16_GET_B(u) ((unsigned char) ((u) >> 8))
+
+#endif /* WORDS_BIGENDIAN */
+
+#define MK_ALL_UINT16_WITH_A(a) \
+ MK_UINT16(a, '0'), \
+ MK_UINT16(a, '1'), \
+ MK_UINT16(a, '2'), \
+ MK_UINT16(a, '3'), \
+ MK_UINT16(a, '4'), \
+ MK_UINT16(a, '5'), \
+ MK_UINT16(a, '6'), \
+ MK_UINT16(a, '7'), \
+ MK_UINT16(a, '8'), \
+ MK_UINT16(a, '9'), \
+ MK_UINT16(a, 'A'), \
+ MK_UINT16(a, 'B'), \
+ MK_UINT16(a, 'C'), \
+ MK_UINT16(a, 'D'), \
+ MK_UINT16(a, 'E'), \
+ MK_UINT16(a, 'F')
+
+ static const uint16 hex_map[256] = {
+ MK_ALL_UINT16_WITH_A('0'),
+ MK_ALL_UINT16_WITH_A('1'),
+ MK_ALL_UINT16_WITH_A('2'),
+ MK_ALL_UINT16_WITH_A('3'),
+ MK_ALL_UINT16_WITH_A('4'),
+ MK_ALL_UINT16_WITH_A('5'),
+ MK_ALL_UINT16_WITH_A('6'),
+ MK_ALL_UINT16_WITH_A('7'),
+ MK_ALL_UINT16_WITH_A('8'),
+ MK_ALL_UINT16_WITH_A('9'),
+ MK_ALL_UINT16_WITH_A('A'),
+ MK_ALL_UINT16_WITH_A('B'),
+ MK_ALL_UINT16_WITH_A('C'),
+ MK_ALL_UINT16_WITH_A('D'),
+ MK_ALL_UINT16_WITH_A('E'),
+ MK_ALL_UINT16_WITH_A('F')
+ };
+ const unsigned char* rawc;
+ ulint read_bytes;
+ ulint write_bytes;
+ ulint i;
+
+ rawc = (const unsigned char*) raw;
+
+ if (hex_size == 0) {
+
+ return(0);
+ }
+
+ if (hex_size <= 2 * raw_size) {
+
+ read_bytes = hex_size / 2;
+ write_bytes = hex_size;
+ } else {
+
+ read_bytes = raw_size;
+ write_bytes = 2 * raw_size + 1;
+ }
+
+#define LOOP_READ_BYTES(ASSIGN) \
+ for (i = 0; i < read_bytes; i++) { \
+ ASSIGN; \
+ hex += 2; \
+ rawc++; \
+ }
+
+ if (ut_align_offset(hex, 2) == 0) {
+
+ LOOP_READ_BYTES(
+ *(uint16*) hex = hex_map[*rawc]
+ );
+ } else {
+
+ LOOP_READ_BYTES(
+ *hex = UINT16_GET_A(hex_map[*rawc]);
+ *(hex + 1) = UINT16_GET_B(hex_map[*rawc])
+ );
+ }
+
+ if (hex_size <= 2 * raw_size && hex_size % 2 == 0) {
+
+ hex--;
+ }
+
+ *hex = '\0';
+
+ return(write_bytes);
+}
+
+/*******************************************************************//**
+Adds single quotes to the start and end of string and escapes any quotes
+by doubling them. Returns the number of bytes that were written to "buf"
+(including the terminating NUL). If buf_size is too small then the
+trailing bytes from "str" are discarded.
+@return number of bytes that were written */
+UNIV_INLINE
+ulint
+ut_str_sql_format(
+/*==============*/
+ const char* str, /*!< in: string */
+ ulint str_len, /*!< in: string length in bytes */
+ char* buf, /*!< out: output buffer */
+ ulint buf_size) /*!< in: output buffer size
+ in bytes */
+{
+ ulint str_i;
+ ulint buf_i;
+
+ buf_i = 0;
+
+ switch (buf_size) {
+ case 3:
+
+ if (str_len == 0) {
+
+ buf[buf_i] = '\'';
+ buf_i++;
+ buf[buf_i] = '\'';
+ buf_i++;
+ }
+ /* FALLTHROUGH */
+ case 2:
+ case 1:
+
+ buf[buf_i] = '\0';
+ buf_i++;
+ /* FALLTHROUGH */
+ case 0:
+
+ return(buf_i);
+ }
+
+ /* buf_size >= 4 */
+
+ buf[0] = '\'';
+ buf_i = 1;
+
+ for (str_i = 0; str_i < str_len; str_i++) {
+
+ char ch;
+
+ if (buf_size - buf_i == 2) {
+
+ break;
+ }
+
+ ch = str[str_i];
+
+ switch (ch) {
+ case '\0':
+
+ if (UNIV_UNLIKELY(buf_size - buf_i < 4)) {
+
+ goto func_exit;
+ }
+ buf[buf_i] = '\\';
+ buf_i++;
+ buf[buf_i] = '0';
+ buf_i++;
+ break;
+ case '\'':
+ case '\\':
+
+ if (UNIV_UNLIKELY(buf_size - buf_i < 4)) {
+
+ goto func_exit;
+ }
+ buf[buf_i] = ch;
+ buf_i++;
+ /* FALLTHROUGH */
+ default:
+
+ buf[buf_i] = ch;
+ buf_i++;
+ }
+ }
+
+func_exit:
+
+ buf[buf_i] = '\'';
+ buf_i++;
+ buf[buf_i] = '\0';
+ buf_i++;
+
+ return(buf_i);
+}
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
index 3f3fce1075c..ce5152e942f 100644
--- a/storage/innobase/include/ut0rnd.h
+++ b/storage/innobase/include/ut0rnd.h
@@ -1,7 +1,24 @@
-/**********************************************************************
-Random numbers and hashing
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994, 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0rnd.h
+Random numbers and hashing
Created 1/20/1994 Heikki Tuuri
***********************************************************************/
@@ -13,105 +30,110 @@ Created 1/20/1994 Heikki Tuuri
#include "ut0byte.h"
-/* The 'character code' for end of field or string (used
+/** The 'character code' for end of field or string (used
in folding records */
#define UT_END_OF_FIELD 257
-/************************************************************
+/********************************************************//**
This is used to set the random number seed. */
UNIV_INLINE
void
ut_rnd_set_seed(
/*============*/
- ulint seed); /* in: seed */
-/************************************************************
-The following function generates a series of 'random' ulint integers. */
+ ulint seed); /*!< in: seed */
+/********************************************************//**
+The following function generates a series of 'random' ulint integers.
+@return the next 'random' number */
UNIV_INLINE
ulint
ut_rnd_gen_next_ulint(
/*==================*/
- /* out: the next 'random' number */
- ulint rnd); /* in: the previous random number value */
-/*************************************************************
+ ulint rnd); /*!< in: the previous random number value */
+/*********************************************************//**
The following function generates 'random' ulint integers which
enumerate the value space (let there be N of them) of ulint integers
in a pseudo-random fashion. Note that the same integer is repeated
-always after N calls to the generator. */
+always after N calls to the generator.
+@return the 'random' number */
UNIV_INLINE
ulint
ut_rnd_gen_ulint(void);
/*==================*/
- /* out: the 'random' number */
-/************************************************************
-Generates a random integer from a given interval. */
+/********************************************************//**
+Generates a random integer from a given interval.
+@return the 'random' number */
UNIV_INLINE
ulint
ut_rnd_interval(
/*============*/
- /* out: the 'random' number */
- ulint low, /* in: low limit; can generate also this value */
- ulint high); /* in: high limit; can generate also this value */
-/*************************************************************
-Generates a random iboolean value. */
+ ulint low, /*!< in: low limit; can generate also this value */
+ ulint high); /*!< in: high limit; can generate also this value */
+/*********************************************************//**
+Generates a random iboolean value.
+@return the random value */
UNIV_INLINE
ibool
ut_rnd_gen_ibool(void);
/*=================*/
- /* out: the random value */
-/***********************************************************
+/*******************************************************//**
The following function generates a hash value for a ulint integer
to a hash table of size table_size, which should be a prime or some
-random number to work reliably. */
+random number to work reliably.
+@return hash value */
UNIV_INLINE
ulint
ut_hash_ulint(
/*==========*/
- /* out: hash value */
- ulint key, /* in: value to be hashed */
- ulint table_size); /* in: hash table size */
-/*****************************************************************
-Folds a pair of ulints. */
+ ulint key, /*!< in: value to be hashed */
+ ulint table_size); /*!< in: hash table size */
+/*************************************************************//**
+Folds a pair of ulints.
+@return folded value */
UNIV_INLINE
ulint
ut_fold_ulint_pair(
/*===============*/
- /* out: folded value */
- ulint n1, /* in: ulint */
- ulint n2); /* in: ulint */
-/*****************************************************************
-Folds a dulint. */
+ ulint n1, /*!< in: ulint */
+ ulint n2) /*!< in: ulint */
+ __attribute__((const));
+/*************************************************************//**
+Folds a dulint.
+@return folded value */
UNIV_INLINE
ulint
ut_fold_dulint(
/*===========*/
- /* out: folded value */
- dulint d); /* in: dulint */
-/*****************************************************************
-Folds a character string ending in the null character. */
+ dulint d) /*!< in: dulint */
+ __attribute__((const));
+/*************************************************************//**
+Folds a character string ending in the null character.
+@return folded value */
UNIV_INLINE
ulint
ut_fold_string(
/*===========*/
- /* out: folded value */
- const char* str); /* in: null-terminated string */
-/*****************************************************************
-Folds a binary string. */
+ const char* str) /*!< in: null-terminated string */
+ __attribute__((pure));
+/*************************************************************//**
+Folds a binary string.
+@return folded value */
UNIV_INLINE
ulint
ut_fold_binary(
/*===========*/
- /* out: folded value */
- const byte* str, /* in: string of bytes */
- ulint len); /* in: length */
-/***************************************************************
+ const byte* str, /*!< in: string of bytes */
+ ulint len) /*!< in: length */
+ __attribute__((pure));
+/***********************************************************//**
Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2. */
-
+The prime is chosen so that it is not near any power of 2.
+@return prime */
+UNIV_INTERN
ulint
ut_find_prime(
/*==========*/
- /* out: prime */
- ulint n); /* in: positive number > 100 */
+ ulint n) /*!< in: positive number > 100 */
+ __attribute__((const));
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
index 625c378489a..763469142ec 100644
--- a/storage/innobase/include/ut0rnd.ic
+++ b/storage/innobase/include/ut0rnd.ic
@@ -1,7 +1,24 @@
-/******************************************************************
-Random numbers and hashing
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994, 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0rnd.ic
+Random numbers and hashing
Created 5/30/1994 Heikki Tuuri
*******************************************************************/
@@ -18,27 +35,28 @@ Created 5/30/1994 Heikki Tuuri
#define UT_XOR_RND1 187678878
#define UT_XOR_RND2 143537923
+/** Seed value of ut_rnd_gen_ulint() */
extern ulint ut_rnd_ulint_counter;
-/************************************************************
+/********************************************************//**
This is used to set the random number seed. */
UNIV_INLINE
void
ut_rnd_set_seed(
/*============*/
- ulint seed) /* in: seed */
+ ulint seed) /*!< in: seed */
{
ut_rnd_ulint_counter = seed;
}
-/************************************************************
-The following function generates a series of 'random' ulint integers. */
+/********************************************************//**
+The following function generates a series of 'random' ulint integers.
+@return the next 'random' number */
UNIV_INLINE
ulint
ut_rnd_gen_next_ulint(
/*==================*/
- /* out: the next 'random' number */
- ulint rnd) /* in: the previous random number value */
+ ulint rnd) /*!< in: the previous random number value */
{
ulint n_bits;
@@ -55,16 +73,16 @@ ut_rnd_gen_next_ulint(
return(rnd);
}
-/************************************************************
+/********************************************************//**
The following function generates 'random' ulint integers which
enumerate the value space of ulint integers in a pseudo random
fashion. Note that the same integer is repeated always after
-2 to power 32 calls to the generator (if ulint is 32-bit). */
+2 to power 32 calls to the generator (if ulint is 32-bit).
+@return the 'random' number */
UNIV_INLINE
ulint
ut_rnd_gen_ulint(void)
/*==================*/
- /* out: the 'random' number */
{
ulint rnd;
ulint n_bits;
@@ -78,15 +96,15 @@ ut_rnd_gen_ulint(void)
return(rnd);
}
-/************************************************************
-Generates a random integer from a given interval. */
+/********************************************************//**
+Generates a random integer from a given interval.
+@return the 'random' number */
UNIV_INLINE
ulint
ut_rnd_interval(
/*============*/
- /* out: the 'random' number */
- ulint low, /* in: low limit; can generate also this value */
- ulint high) /* in: high limit; can generate also this value */
+ ulint low, /*!< in: low limit; can generate also this value */
+ ulint high) /*!< in: high limit; can generate also this value */
{
ulint rnd;
@@ -102,13 +120,13 @@ ut_rnd_interval(
return(low + (rnd % (high - low + 1)));
}
-/*************************************************************
-Generates a random iboolean value. */
+/*********************************************************//**
+Generates a random iboolean value.
+@return the random value */
UNIV_INLINE
ibool
ut_rnd_gen_ibool(void)
/*=================*/
- /* out: the random value */
{
ulint x;
@@ -122,73 +140,64 @@ ut_rnd_gen_ibool(void)
return(FALSE);
}
-/***********************************************************
+/*******************************************************//**
The following function generates a hash value for a ulint integer
to a hash table of size table_size, which should be a prime
-or some random number for the hash table to work reliably. */
+or some random number for the hash table to work reliably.
+@return hash value */
UNIV_INLINE
ulint
ut_hash_ulint(
/*==========*/
- /* out: hash value */
- ulint key, /* in: value to be hashed */
- ulint table_size) /* in: hash table size */
+ ulint key, /*!< in: value to be hashed */
+ ulint table_size) /*!< in: hash table size */
{
key = key ^ UT_HASH_RANDOM_MASK2;
return(key % table_size);
}
-/*****************************************************************
-Folds a pair of ulints. */
+/*************************************************************//**
+Folds a pair of ulints.
+@return folded value */
UNIV_INLINE
ulint
ut_fold_ulint_pair(
/*===============*/
- /* out: folded value */
- ulint n1, /* in: ulint */
- ulint n2) /* in: ulint */
+ ulint n1, /*!< in: ulint */
+ ulint n2) /*!< in: ulint */
{
return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
^ UT_HASH_RANDOM_MASK) + n2);
}
-/*****************************************************************
-Folds a dulint. */
+/*************************************************************//**
+Folds a dulint.
+@return folded value */
UNIV_INLINE
ulint
ut_fold_dulint(
/*===========*/
- /* out: folded value */
- dulint d) /* in: dulint */
+ dulint d) /*!< in: dulint */
{
return(ut_fold_ulint_pair(ut_dulint_get_low(d),
ut_dulint_get_high(d)));
}
-/*****************************************************************
-Folds a character string ending in the null character. */
+/*************************************************************//**
+Folds a character string ending in the null character.
+@return folded value */
UNIV_INLINE
ulint
ut_fold_string(
/*===========*/
- /* out: folded value */
- const char* str) /* in: null-terminated string */
+ const char* str) /*!< in: null-terminated string */
{
-#ifdef UNIV_DEBUG
- ulint i = 0;
-#endif
ulint fold = 0;
ut_ad(str);
while (*str != '\0') {
-
-#ifdef UNIV_DEBUG
- i++;
- ut_a(i < 100);
-#endif
-
fold = ut_fold_ulint_pair(fold, (ulint)(*str));
str++;
}
@@ -196,20 +205,20 @@ ut_fold_string(
return(fold);
}
-/*****************************************************************
-Folds a binary string. */
+/*************************************************************//**
+Folds a binary string.
+@return folded value */
UNIV_INLINE
ulint
ut_fold_binary(
/*===========*/
- /* out: folded value */
- const byte* str, /* in: string of bytes */
- ulint len) /* in: length */
+ const byte* str, /*!< in: string of bytes */
+ ulint len) /*!< in: length */
{
const byte* str_end = str + len;
ulint fold = 0;
- ut_ad(str);
+ ut_ad(str || !len);
while (str < str_end) {
fold = ut_fold_ulint_pair(fold, (ulint)(*str));
diff --git a/storage/innobase/include/ut0sort.h b/storage/innobase/include/ut0sort.h
index 87d30dee6f2..5c6647dda9e 100644
--- a/storage/innobase/include/ut0sort.h
+++ b/storage/innobase/include/ut0sort.h
@@ -1,7 +1,24 @@
-/**********************************************************************
-Sort utility
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0sort.h
+Sort utility
Created 11/9/1995 Heikki Tuuri
***********************************************************************/
@@ -18,7 +35,7 @@ the macro. The sort algorithm is mergesort which has logarithmic
worst case.
*/
-/***********************************************************************
+/*******************************************************************//**
This macro expands to the body of a standard sort function.
The sort function uses mergesort and must be defined separately
for each type of array.
@@ -30,8 +47,7 @@ and the low (LOW), inclusive, and high (HIGH), noninclusive,
limits for the sort interval as arguments.
CMP_FUN is the comparison function name. It takes as arguments
two elements from the array and returns 1, if the first is bigger,
-0 if equal, and -1 if the second bigger. For an eaxmaple of use
-see test program in tsut.c. */
+0 if equal, and -1 if the second bigger. */
#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
{\
@@ -81,9 +97,8 @@ see test program in tsut.c. */
}\
}\
\
- for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
- (ARR)[ut_sort_i77] = (AUX_ARR)[ut_sort_i77];\
- }\
+ memcpy((void*) ((ARR) + (LOW)), (AUX_ARR) + (LOW),\
+ ((HIGH) - (LOW)) * sizeof *(ARR));\
}\
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 8ad1782b178..197b8401428 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -1,7 +1,31 @@
-/**********************************************************************
-Various utilities
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Sun Microsystems, Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994, 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0ut.h
+Various utilities
Created 1/20/1994 Heikki Tuuri
***********************************************************************/
@@ -10,279 +34,365 @@ Created 1/20/1994 Heikki Tuuri
#define ut0ut_h
#include "univ.i"
+
+#ifndef UNIV_HOTBACKUP
+# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
+#endif /* UNIV_HOTBACKUP */
+
#include <time.h>
#ifndef MYSQL_SERVER
#include <ctype.h>
#endif
+/** Index name prefix in fast index creation */
+#define TEMP_INDEX_PREFIX '\377'
+/** Index name prefix in fast index creation, as a string constant */
+#define TEMP_INDEX_PREFIX_STR "\377"
+
+/** Time stamp */
typedef time_t ib_time_t;
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+#if defined(HAVE_IB_PAUSE_INSTRUCTION)
+# ifdef WIN32
+ /* In the Win32 API, the x86 PAUSE instruction is executed by calling
+ the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
+ independent way by using YieldProcessor.*/
+# define UT_RELAX_CPU() YieldProcessor()
+# else
+ /* According to the gcc info page, asm volatile means that the
+ instruction has important side-effects and must not be removed.
+ Also asm volatile may trigger a memory barrier (spilling all registers
+ to memory). */
+# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
+# endif
+#elif defined(HAVE_ATOMIC_BUILTINS)
+# define UT_RELAX_CPU() do { \
+ volatile lint volatile_var; \
+ os_compare_and_swap_lint(&volatile_var, 0, 1); \
+ } while (0)
+#else
+# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
+#endif
+
+/*********************************************************************//**
+Delays execution for at most max_wait_us microseconds or returns earlier
+if cond becomes true.
+@param cond in: condition to wait for; evaluated every 2 ms
+@param max_wait_us in: maximum delay to wait, in microseconds */
+#define UT_WAIT_FOR(cond, max_wait_us) \
+do { \
+ ullint start_us; \
+ start_us = ut_time_us(NULL); \
+ while (!(cond) \
+ && ut_time_us(NULL) - start_us < (max_wait_us)) {\
+ \
+ os_thread_sleep(2000 /* 2 ms */); \
+ } \
+} while (0)
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************//**
Gets the high 32 bits in a ulint. That is makes a shift >> 32,
but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion. */
-
+we do this by a special conversion.
+@return a >> 32 */
+UNIV_INTERN
ulint
ut_get_high32(
/*==========*/
- /* out: a >> 32 */
- ulint a); /* in: ulint */
-/**********************************************************
-Calculates the minimum of two ulints. */
+ ulint a); /*!< in: ulint */
+/******************************************************//**
+Calculates the minimum of two ulints.
+@return minimum */
UNIV_INLINE
ulint
ut_min(
/*===*/
- /* out: minimum */
- ulint n1, /* in: first number */
- ulint n2); /* in: second number */
-/**********************************************************
-Calculates the maximum of two ulints. */
+ ulint n1, /*!< in: first number */
+ ulint n2); /*!< in: second number */
+/******************************************************//**
+Calculates the maximum of two ulints.
+@return maximum */
UNIV_INLINE
ulint
ut_max(
/*===*/
- /* out: maximum */
- ulint n1, /* in: first number */
- ulint n2); /* in: second number */
-/********************************************************************
+ ulint n1, /*!< in: first number */
+ ulint n2); /*!< in: second number */
+/****************************************************************//**
Calculates minimum of two ulint-pairs. */
UNIV_INLINE
void
ut_pair_min(
/*========*/
- ulint* a, /* out: more significant part of minimum */
- ulint* b, /* out: less significant part of minimum */
- ulint a1, /* in: more significant part of first pair */
- ulint b1, /* in: less significant part of first pair */
- ulint a2, /* in: more significant part of second pair */
- ulint b2); /* in: less significant part of second pair */
-/**********************************************************
-Compares two ulints. */
+ ulint* a, /*!< out: more significant part of minimum */
+ ulint* b, /*!< out: less significant part of minimum */
+ ulint a1, /*!< in: more significant part of first pair */
+ ulint b1, /*!< in: less significant part of first pair */
+ ulint a2, /*!< in: more significant part of second pair */
+ ulint b2); /*!< in: less significant part of second pair */
+/******************************************************//**
+Compares two ulints.
+@return 1 if a > b, 0 if a == b, -1 if a < b */
UNIV_INLINE
int
ut_ulint_cmp(
/*=========*/
- /* out: 1 if a > b, 0 if a == b, -1 if a < b */
- ulint a, /* in: ulint */
- ulint b); /* in: ulint */
-/***********************************************************
-Compares two pairs of ulints. */
+ ulint a, /*!< in: ulint */
+ ulint b); /*!< in: ulint */
+/*******************************************************//**
+Compares two pairs of ulints.
+@return -1 if a < b, 0 if a == b, 1 if a > b */
UNIV_INLINE
int
ut_pair_cmp(
/*========*/
- /* out: -1 if a < b, 0 if a == b,
- 1 if a > b */
- ulint a1, /* in: more significant part of first pair */
- ulint a2, /* in: less significant part of first pair */
- ulint b1, /* in: more significant part of second pair */
- ulint b2); /* in: less significant part of second pair */
-/*****************************************************************
-Calculates fast the remainder when divided by a power of two. */
-UNIV_INLINE
-ulint
-ut_2pow_remainder(
-/*==============*/ /* out: remainder */
- ulint n, /* in: number to be divided */
- ulint m); /* in: divisor; power of 2 */
-/*****************************************************************
-Calculates fast value rounded to a multiple of a power of 2. */
-UNIV_INLINE
-ulint
-ut_2pow_round(
-/*==========*/ /* out: value of n rounded down to nearest
- multiple of m */
- ulint n, /* in: number to be rounded */
- ulint m); /* in: divisor; power of 2 */
-/*****************************************************************
+ ulint a1, /*!< in: more significant part of first pair */
+ ulint a2, /*!< in: less significant part of first pair */
+ ulint b1, /*!< in: more significant part of second pair */
+ ulint b2); /*!< in: less significant part of second pair */
+/*************************************************************//**
+Determines if a number is zero or a power of two.
+@param n in: number
+@return nonzero if n is zero or a power of two; zero otherwise */
+#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+/*************************************************************//**
+Calculates fast the remainder of n/m when m is a power of two.
+@param n in: numerator
+@param m in: denominator, must be a power of two
+@return the remainder of n/m */
+#define ut_2pow_remainder(n, m) ((n) & ((m) - 1))
+/*************************************************************//**
+Calculates the biggest multiple of m that is not bigger than n
+when m is a power of two. In other words, rounds n down to m * k.
+@param n in: number to round down
+@param m in: alignment, must be a power of two
+@return n rounded down to the biggest possible integer multiple of m */
+#define ut_2pow_round(n, m) ((n) & ~((m) - 1))
+/** Align a number down to a multiple of a power of two.
+@param n in: number to round down
+@param m in: alignment, must be a power of two
+@return n rounded down to the biggest possible integer multiple of m */
+#define ut_calc_align_down(n, m) ut_2pow_round(n, m)
+/********************************************************//**
+Calculates the smallest multiple of m that is not smaller than n
+when m is a power of two. In other words, rounds n up to m * k.
+@param n in: number to round up
+@param m in: alignment, must be a power of two
+@return n rounded up to the smallest possible integer multiple of m */
+#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1))
+/*************************************************************//**
Calculates fast the 2-logarithm of a number, rounded upward to an
-integer. */
+integer.
+@return logarithm in the base 2, rounded upward */
UNIV_INLINE
ulint
ut_2_log(
/*=====*/
- /* out: logarithm in the base 2, rounded upward */
- ulint n); /* in: number */
-/*****************************************************************
-Calculates 2 to power n. */
+ ulint n); /*!< in: number */
+/*************************************************************//**
+Calculates 2 to power n.
+@return 2 to power n */
UNIV_INLINE
ulint
ut_2_exp(
/*=====*/
- /* out: 2 to power n */
- ulint n); /* in: number */
-/*****************************************************************
-Calculates fast the number rounded up to the nearest power of 2. */
-
+ ulint n); /*!< in: number */
+/*************************************************************//**
+Calculates fast the number rounded up to the nearest power of 2.
+@return first power of 2 which is >= n */
+UNIV_INTERN
ulint
ut_2_power_up(
/*==========*/
- /* out: first power of 2 which is >= n */
- ulint n) /* in: number != 0 */
+ ulint n) /*!< in: number != 0 */
__attribute__((const));
-/* Determine how many bytes (groups of 8 bits) are needed to
-store the given number of bits. */
+/** Determine how many bytes (groups of 8 bits) are needed to
+store the given number of bits.
+@param b in: bits
+@return number of bytes (octets) needed to represent b */
#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
-/****************************************************************
-Sort function for ulint arrays. */
-
-void
-ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high);
-/*============================================================*/
-/************************************************************
-The following function returns elapsed CPU time in milliseconds. */
-
-ulint
-ut_clock(void);
-/**************************************************************
+/**********************************************************//**
Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime. */
-
+the only way to manipulate it is to use the function ut_difftime.
+@return system time */
+UNIV_INTERN
ib_time_t
ut_time(void);
/*=========*/
-/**************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
Returns system time.
Upon successful completion, the value 0 is returned; otherwise the
value -1 is returned and the global variable errno is set to indicate the
-error. */
-
+error.
+@return 0 on success, -1 otherwise */
+UNIV_INTERN
int
ut_usectime(
/*========*/
- /* out: 0 on success, -1 otherwise */
- ulint* sec, /* out: seconds since the Epoch */
- ulint* ms); /* out: microseconds since the Epoch+*sec */
-/**************************************************************
-Returns the difference of two times in seconds. */
-
+ ulint* sec, /*!< out: seconds since the Epoch */
+ ulint* ms); /*!< out: microseconds since the Epoch+*sec */
+
+/**********************************************************//**
+Returns the number of microseconds since epoch. Similar to
+time(3), the return value is also stored in *tloc, provided
+that tloc is non-NULL.
+@return us since epoch */
+UNIV_INTERN
+ullint
+ut_time_us(
+/*=======*/
+ ullint* tloc); /*!< out: us since epoch, if non-NULL */
+/**********************************************************//**
+Returns the number of milliseconds since some epoch. The
+value may wrap around. It should only be used for heuristic
+purposes.
+@return ms since epoch */
+UNIV_INTERN
+ulint
+ut_time_ms(void);
+/*============*/
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Returns the difference of two times in seconds.
+@return time2 - time1 expressed in seconds */
+UNIV_INTERN
double
ut_difftime(
/*========*/
- /* out: time2 - time1 expressed in seconds */
- ib_time_t time2, /* in: time */
- ib_time_t time1); /* in: time */
-/**************************************************************
+ ib_time_t time2, /*!< in: time */
+ ib_time_t time1); /*!< in: time */
+/**********************************************************//**
Prints a timestamp to a file. */
-
+UNIV_INTERN
void
ut_print_timestamp(
/*===============*/
- FILE* file); /* in: file where to print */
-/**************************************************************
+ FILE* file); /*!< in: file where to print */
+/**********************************************************//**
Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-
+UNIV_INTERN
void
ut_sprintf_timestamp(
/*=================*/
- char* buf); /* in: buffer where to sprintf */
-/**************************************************************
+ char* buf); /*!< in: buffer where to sprintf */
+#ifdef UNIV_HOTBACKUP
+/**********************************************************//**
Sprintfs a timestamp to a buffer with no spaces and with ':' characters
replaced by '_'. */
-
+UNIV_INTERN
void
ut_sprintf_timestamp_without_extra_chars(
/*=====================================*/
- char* buf); /* in: buffer where to sprintf */
-/**************************************************************
+ char* buf); /*!< in: buffer where to sprintf */
+/**********************************************************//**
Returns current year, month, day. */
-
+UNIV_INTERN
void
ut_get_year_month_day(
/*==================*/
- ulint* year, /* out: current year */
- ulint* month, /* out: month */
- ulint* day); /* out: day */
-/*****************************************************************
+ ulint* year, /*!< out: current year */
+ ulint* month, /*!< out: month */
+ ulint* day); /*!< out: day */
+#else /* UNIV_HOTBACKUP */
+/*************************************************************//**
Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++. */
-
+in microseconds on 100 MHz Pentium + Visual C++.
+@return dummy value */
+UNIV_INTERN
ulint
ut_delay(
/*=====*/
- /* out: dummy value */
- ulint delay); /* in: delay in microseconds on 100 MHz Pentium */
-/*****************************************************************
+ ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */
+#endif /* UNIV_HOTBACKUP */
+/*************************************************************//**
Prints the contents of a memory buffer in hex and ascii. */
-
+UNIV_INTERN
void
ut_print_buf(
/*=========*/
- FILE* file, /* in: file where to print */
- const void* buf, /* in: memory buffer */
- ulint len); /* in: length of the buffer */
+ FILE* file, /*!< in: file where to print */
+ const void* buf, /*!< in: memory buffer */
+ ulint len); /*!< in: length of the buffer */
-/**************************************************************************
+/**********************************************************************//**
Outputs a NUL-terminated file name, quoted with apostrophes. */
-
+UNIV_INTERN
void
ut_print_filename(
/*==============*/
- FILE* f, /* in: output stream */
- const char* name); /* in: name to print */
+ FILE* f, /*!< in: output stream */
+ const char* name); /*!< in: name to print */
+#ifndef UNIV_HOTBACKUP
/* Forward declaration of transaction handle */
struct trx_struct;
-/**************************************************************************
+/**********************************************************************//**
Outputs a fixed-length string, quoted as an SQL identifier.
If the string contains a slash '/', the string will be
output as two identifiers separated by a period (.),
as in SQL database_name.identifier. */
-
+UNIV_INTERN
void
ut_print_name(
/*==========*/
- FILE* f, /* in: output stream */
- struct trx_struct*trx, /* in: transaction */
- ibool table_id,/* in: TRUE=print a table name,
+ FILE* f, /*!< in: output stream */
+ struct trx_struct*trx, /*!< in: transaction */
+ ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
- const char* name); /* in: name to print */
+ const char* name); /*!< in: name to print */
-/**************************************************************************
+/**********************************************************************//**
Outputs a fixed-length string, quoted as an SQL identifier.
If the string contains a slash '/', the string will be
output as two identifiers separated by a period (.),
as in SQL database_name.identifier. */
-
+UNIV_INTERN
void
ut_print_namel(
/*===========*/
- FILE* f, /* in: output stream */
- struct trx_struct*trx, /* in: transaction (NULL=no quotes) */
- ibool table_id,/* in: TRUE=print a table name,
+ FILE* f, /*!< in: output stream */
+ struct trx_struct*trx, /*!< in: transaction (NULL=no quotes) */
+ ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
- const char* name, /* in: name to print */
- ulint namelen);/* in: length of name */
+ const char* name, /*!< in: name to print */
+ ulint namelen);/*!< in: length of name */
-/**************************************************************************
+/**********************************************************************//**
Catenate files. */
-
+UNIV_INTERN
void
ut_copy_file(
/*=========*/
- FILE* dest, /* in: output file */
- FILE* src); /* in: input file to be appended to output */
-
-/**************************************************************************
-snprintf(). */
+ FILE* dest, /*!< in: output file */
+ FILE* src); /*!< in: input file to be appended to output */
+#endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
+/**********************************************************************//**
+A substitute for snprintf(3), formatted output conversion into
+a limited buffer.
+@return number of characters that would have been printed if the size
+were unlimited, not including the terminating '\0'. */
+UNIV_INTERN
int
ut_snprintf(
- /* out: number of characters that would
- have been printed if the size were
- unlimited, not including the terminating
- '\0'. */
- char* str, /* out: string */
- size_t size, /* in: str size */
- const char* fmt, /* in: format */
- ...); /* in: format values */
+/*========*/
+ char* str, /*!< out: string */
+ size_t size, /*!< in: str size */
+ const char* fmt, /*!< in: format */
+ ...); /*!< in: format values */
#else
-#define ut_snprintf snprintf
+/**********************************************************************//**
+A wrapper for snprintf(3), formatted output conversion into
+a limited buffer. */
+# define ut_snprintf snprintf
#endif /* __WIN__ */
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic
index 412717a094e..6f55c7e410e 100644
--- a/storage/innobase/include/ut0ut.ic
+++ b/storage/innobase/include/ut0ut.ic
@@ -1,49 +1,66 @@
-/******************************************************************
-Various utilities
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994, 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0ut.ic
+Various utilities
Created 5/30/1994 Heikki Tuuri
*******************************************************************/
-/**********************************************************
-Calculates the minimum of two ulints. */
+/******************************************************//**
+Calculates the minimum of two ulints.
+@return minimum */
UNIV_INLINE
ulint
ut_min(
/*===*/
- /* out: minimum */
- ulint n1, /* in: first number */
- ulint n2) /* in: second number */
+ ulint n1, /*!< in: first number */
+ ulint n2) /*!< in: second number */
{
return((n1 <= n2) ? n1 : n2);
}
-/**********************************************************
-Calculates the maximum of two ulints. */
+/******************************************************//**
+Calculates the maximum of two ulints.
+@return maximum */
UNIV_INLINE
ulint
ut_max(
/*===*/
- /* out: maximum */
- ulint n1, /* in: first number */
- ulint n2) /* in: second number */
+ ulint n1, /*!< in: first number */
+ ulint n2) /*!< in: second number */
{
return((n1 <= n2) ? n2 : n1);
}
-/********************************************************************
+/****************************************************************//**
Calculates minimum of two ulint-pairs. */
UNIV_INLINE
void
ut_pair_min(
/*========*/
- ulint* a, /* out: more significant part of minimum */
- ulint* b, /* out: less significant part of minimum */
- ulint a1, /* in: more significant part of first pair */
- ulint b1, /* in: less significant part of first pair */
- ulint a2, /* in: more significant part of second pair */
- ulint b2) /* in: less significant part of second pair */
+ ulint* a, /*!< out: more significant part of minimum */
+ ulint* b, /*!< out: less significant part of minimum */
+ ulint a1, /*!< in: more significant part of first pair */
+ ulint b1, /*!< in: less significant part of first pair */
+ ulint a2, /*!< in: more significant part of second pair */
+ ulint b2) /*!< in: less significant part of second pair */
{
if (a1 == a2) {
*a = a1;
@@ -57,15 +74,15 @@ ut_pair_min(
}
}
-/**********************************************************
-Compares two ulints. */
+/******************************************************//**
+Compares two ulints.
+@return 1 if a > b, 0 if a == b, -1 if a < b */
UNIV_INLINE
int
ut_ulint_cmp(
/*=========*/
- /* out: 1 if a > b, 0 if a == b, -1 if a < b */
- ulint a, /* in: ulint */
- ulint b) /* in: ulint */
+ ulint a, /*!< in: ulint */
+ ulint b) /*!< in: ulint */
{
if (a < b) {
return(-1);
@@ -76,17 +93,17 @@ ut_ulint_cmp(
}
}
-/***********************************************************
-Compares two pairs of ulints. */
+/*******************************************************//**
+Compares two pairs of ulints.
+@return -1 if a < b, 0 if a == b, 1 if a > b */
UNIV_INLINE
int
ut_pair_cmp(
/*========*/
- /* out: -1 if a < b, 0 if a == b, 1 if a > b */
- ulint a1, /* in: more significant part of first pair */
- ulint a2, /* in: less significant part of first pair */
- ulint b1, /* in: more significant part of second pair */
- ulint b2) /* in: less significant part of second pair */
+ ulint a1, /*!< in: more significant part of first pair */
+ ulint a2, /*!< in: less significant part of first pair */
+ ulint b1, /*!< in: more significant part of second pair */
+ ulint b2) /*!< in: less significant part of second pair */
{
if (a1 > b1) {
return(1);
@@ -101,44 +118,15 @@ ut_pair_cmp(
}
}
-/*****************************************************************
-Calculates fast the remainder when divided by a power of two. */
-UNIV_INLINE
-ulint
-ut_2pow_remainder(
-/*==============*/ /* out: remainder */
- ulint n, /* in: number to be divided */
- ulint m) /* in: divisor; power of 2 */
-{
- ut_ad(0x80000000UL % m == 0);
-
- return(n & (m - 1));
-}
-
-/*****************************************************************
-Calculates fast a value rounded to a multiple of a power of 2. */
-UNIV_INLINE
-ulint
-ut_2pow_round(
-/*==========*/ /* out: value of n rounded down to nearest
- multiple of m */
- ulint n, /* in: number to be rounded */
- ulint m) /* in: divisor; power of 2 */
-{
- ut_ad(0x80000000UL % m == 0);
-
- return(n & ~(m - 1));
-}
-
-/*****************************************************************
+/*************************************************************//**
Calculates fast the 2-logarithm of a number, rounded upward to an
-integer. */
+integer.
+@return logarithm in the base 2, rounded upward */
UNIV_INLINE
ulint
ut_2_log(
/*=====*/
- /* out: logarithm in the base 2, rounded upward */
- ulint n) /* in: number != 0 */
+ ulint n) /*!< in: number != 0 */
{
ulint res;
@@ -161,14 +149,14 @@ ut_2_log(
return(res + 1);
}
-/*****************************************************************
-Calculates 2 to power n. */
+/*************************************************************//**
+Calculates 2 to power n.
+@return 2 to power n */
UNIV_INLINE
ulint
ut_2_exp(
/*=====*/
- /* out: 2 to power n */
- ulint n) /* in: number */
+ ulint n) /*!< in: number */
{
return((ulint) 1 << n);
}
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
index e0cc4dfb009..a770f671cfc 100644
--- a/storage/innobase/include/ut0vec.h
+++ b/storage/innobase/include/ut0vec.h
@@ -1,9 +1,35 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0vec.h
+A vector of pointers to data items
+
+Created 4/6/2006 Osku Salerma
+************************************************************************/
+
#ifndef IB_VECTOR_H
#define IB_VECTOR_H
#include "univ.i"
#include "mem0mem.h"
+/** An automatically resizing vector data type. */
typedef struct ib_vector_struct ib_vector_t;
/* An automatically resizing vector datatype with the following properties:
@@ -20,50 +46,76 @@ typedef struct ib_vector_struct ib_vector_t;
relatively small or short-lived uses.
*/
-/********************************************************************
-Create a new vector with the given initial size. */
-
+/****************************************************************//**
+Create a new vector with the given initial size.
+@return vector */
+UNIV_INTERN
ib_vector_t*
ib_vector_create(
/*=============*/
- /* out: vector */
- mem_heap_t* heap, /* in: heap */
- ulint size); /* in: initial size */
+ mem_heap_t* heap, /*!< in: heap */
+ ulint size); /*!< in: initial size */
-/********************************************************************
+/****************************************************************//**
Push a new element to the vector, increasing its size if necessary. */
-
+UNIV_INTERN
void
ib_vector_push(
/*===========*/
- ib_vector_t* vec, /* in: vector */
- void* elem); /* in: data element */
+ ib_vector_t* vec, /*!< in: vector */
+ void* elem); /*!< in: data element */
-/********************************************************************
-Get the number of elements in the vector. */
+/****************************************************************//**
+Get the number of elements in the vector.
+@return number of elements in vector */
UNIV_INLINE
ulint
ib_vector_size(
/*===========*/
- /* out: number of elements in vector */
- ib_vector_t* vec); /* in: vector */
+ const ib_vector_t* vec); /*!< in: vector */
-/********************************************************************
-Get the n'th element. */
+/****************************************************************//**
+Test whether a vector is empty or not.
+@return TRUE if empty */
+UNIV_INLINE
+ibool
+ib_vector_is_empty(
+/*===============*/
+ const ib_vector_t* vec); /*!< in: vector */
+
+/****************************************************************//**
+Get the n'th element.
+@return n'th element */
UNIV_INLINE
void*
ib_vector_get(
/*==========*/
- /* out: n'th element */
- ib_vector_t* vec, /* in: vector */
- ulint n); /* in: element index to get */
+ ib_vector_t* vec, /*!< in: vector */
+ ulint n); /*!< in: element index to get */
+
+/****************************************************************//**
+Remove the last element from the vector. */
+UNIV_INLINE
+void*
+ib_vector_pop(
+/*==========*/
+ ib_vector_t* vec); /*!< in: vector */
+
+/****************************************************************//**
+Free the underlying heap of the vector. Note that vec is invalid
+after this call. */
+UNIV_INLINE
+void
+ib_vector_free(
+/*===========*/
+ ib_vector_t* vec); /*!< in,own: vector */
-/* See comment at beginning of file. */
+/** An automatically resizing vector data type. */
struct ib_vector_struct {
- mem_heap_t* heap; /* heap */
- void** data; /* data elements */
- ulint used; /* number of elements currently used */
- ulint total; /* number of elements allocated */
+ mem_heap_t* heap; /*!< heap */
+ void** data; /*!< data elements */
+ ulint used; /*!< number of elements currently used */
+ ulint total; /*!< number of elements allocated */
};
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
index 417a17d951f..02e881f9bca 100644
--- a/storage/innobase/include/ut0vec.ic
+++ b/storage/innobase/include/ut0vec.ic
@@ -1,26 +1,96 @@
-/********************************************************************
-Get number of elements in vector. */
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0vec.ic
+A vector of pointers to data items
+
+Created 4/6/2006 Osku Salerma
+************************************************************************/
+
+/****************************************************************//**
+Get number of elements in vector.
+@return number of elements in vector */
UNIV_INLINE
ulint
ib_vector_size(
/*===========*/
- /* out: number of elements in vector */
- ib_vector_t* vec) /* in: vector */
+ const ib_vector_t* vec) /*!< in: vector */
{
return(vec->used);
}
-/********************************************************************
-Get n'th element. */
+/****************************************************************//**
+Get n'th element.
+@return n'th element */
UNIV_INLINE
void*
ib_vector_get(
/*==========*/
- /* out: n'th element */
- ib_vector_t* vec, /* in: vector */
- ulint n) /* in: element index to get */
+ ib_vector_t* vec, /*!< in: vector */
+ ulint n) /*!< in: element index to get */
{
ut_a(n < vec->used);
return(vec->data[n]);
}
+
+/****************************************************************//**
+Remove the last element from the vector.
+@return last vector element */
+UNIV_INLINE
+void*
+ib_vector_pop(
+/*==========*/
+ ib_vector_t* vec) /*!< in/out: vector */
+{
+ void* elem;
+
+ ut_a(vec->used > 0);
+ --vec->used;
+ elem = vec->data[vec->used];
+
+ ut_d(vec->data[vec->used] = NULL);
+ UNIV_MEM_INVALID(&vec->data[vec->used], sizeof(*vec->data));
+
+ return(elem);
+}
+
+/****************************************************************//**
+Free the underlying heap of the vector. Note that vec is invalid
+after this call. */
+UNIV_INLINE
+void
+ib_vector_free(
+/*===========*/
+ ib_vector_t* vec) /*!< in, own: vector */
+{
+ mem_heap_free(vec->heap);
+}
+
+/****************************************************************//**
+Test whether a vector is empty or not.
+@return TRUE if empty */
+UNIV_INLINE
+ibool
+ib_vector_is_empty(
+/*===============*/
+ const ib_vector_t* vec) /*!< in: vector */
+{
+ return(ib_vector_size(vec) == 0);
+}
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index 57f2297beee..2ec0f16ab05 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -1,8 +1,32 @@
-/***********************************************************************
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0wqueue.h
+A work queue
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/*******************************************************************//**
A Work queue. Threads can add work items to the queue and other threads can
wait for work items to be available and take them off the queue for
processing.
-
************************************************************************/
#ifndef IB_WORK_QUEUE_H
@@ -15,46 +39,47 @@ processing.
typedef struct ib_wqueue_struct ib_wqueue_t;
-/********************************************************************
-Create a new work queue. */
-
+/****************************************************************//**
+Create a new work queue.
+@return work queue */
+UNIV_INTERN
ib_wqueue_t*
ib_wqueue_create(void);
/*===================*/
- /* out: work queue */
-/********************************************************************
+/****************************************************************//**
Free a work queue. */
-
+UNIV_INTERN
void
ib_wqueue_free(
/*===========*/
- ib_wqueue_t* wq); /* in: work queue */
+ ib_wqueue_t* wq); /*!< in: work queue */
-/********************************************************************
+/****************************************************************//**
Add a work item to the queue. */
-
+UNIV_INTERN
void
ib_wqueue_add(
/*==========*/
- ib_wqueue_t* wq, /* in: work queue */
- void* item, /* in: work item */
- mem_heap_t* heap); /* in: memory heap to use for allocating the
+ ib_wqueue_t* wq, /*!< in: work queue */
+ void* item, /*!< in: work item */
+ mem_heap_t* heap); /*!< in: memory heap to use for allocating the
list node */
-/********************************************************************
-Wait for a work item to appear in the queue. */
-
+/****************************************************************//**
+Wait for a work item to appear in the queue.
+@return work item */
+UNIV_INTERN
void*
ib_wqueue_wait(
- /* out: work item */
- ib_wqueue_t* wq); /* in: work queue */
+/*===========*/
+ ib_wqueue_t* wq); /*!< in: work queue */
/* Work queue. */
struct ib_wqueue_struct {
- mutex_t mutex; /* mutex protecting everything */
- ib_list_t* items; /* work item list */
- os_event_t event; /* event we use to signal additions to list */
+ mutex_t mutex; /*!< mutex protecting everything */
+ ib_list_t* items; /*!< work item list */
+ os_event_t event; /*!< event we use to signal additions to list */
};
#endif
diff --git a/storage/innobase/lock/lock0iter.c b/storage/innobase/lock/lock0iter.c
index 0afa7019c86..51d1802ccde 100644
--- a/storage/innobase/lock/lock0iter.c
+++ b/storage/innobase/lock/lock0iter.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0iter.c
Lock queue iterator. Can iterate over table and record
lock queues.
-(c) 2007 Innobase Oy
-
Created July 16, 2007 Vasil Dimov
*******************************************************/
@@ -15,8 +32,11 @@ Created July 16, 2007 Vasil Dimov
#include "lock0priv.h"
#include "ut0dbg.h"
#include "ut0lst.h"
+#ifdef UNIV_DEBUG
+# include "srv0srv.h" /* kernel_mutex */
+#endif /* UNIV_DEBUG */
-/***********************************************************************
+/*******************************************************************//**
Initialize lock queue iterator so that it starts to iterate from
"lock". bit_no specifies the record number within the heap where the
record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
@@ -25,15 +45,17 @@ record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
bit_no is calculated in this function by using
lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
of a wait lock. */
-
+UNIV_INTERN
void
lock_queue_iterator_reset(
/*======================*/
- lock_queue_iterator_t* iter, /* out: iterator */
- lock_t* lock, /* in: lock to start from */
- ulint bit_no) /* in: record number in the
+ lock_queue_iterator_t* iter, /*!< out: iterator */
+ const lock_t* lock, /*!< in: lock to start from */
+ ulint bit_no) /*!< in: record number in the
heap */
{
+ ut_ad(mutex_own(&kernel_mutex));
+
iter->current_lock = lock;
if (bit_no != ULINT_UNDEFINED) {
@@ -41,7 +63,7 @@ lock_queue_iterator_reset(
iter->bit_no = bit_no;
} else {
- switch (lock_get_type(lock)) {
+ switch (lock_get_type_low(lock)) {
case LOCK_TABLE:
iter->bit_no = ULINT_UNDEFINED;
break;
@@ -55,20 +77,22 @@ lock_queue_iterator_reset(
}
}
-/***********************************************************************
+/*******************************************************************//**
Gets the previous lock in the lock queue, returns NULL if there are no
more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned). */
-
-lock_t*
+receded (if not-NULL is returned).
+@return previous lock or NULL */
+UNIV_INTERN
+const lock_t*
lock_queue_iterator_get_prev(
/*=========================*/
- /* out: previous lock or NULL */
- lock_queue_iterator_t* iter) /* in/out: iterator */
+ lock_queue_iterator_t* iter) /*!< in/out: iterator */
{
- lock_t* prev_lock;
+ const lock_t* prev_lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
- switch (lock_get_type(iter->current_lock)) {
+ switch (lock_get_type_low(iter->current_lock)) {
case LOCK_REC:
prev_lock = lock_rec_get_prev(
iter->current_lock, iter->bit_no);
diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
index 5afd19aa7e7..736198dc346 100644
--- a/storage/innobase/lock/lock0lock.c
+++ b/storage/innobase/lock/lock0lock.c
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction lock system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0lock.c
+The transaction lock system
Created 5/7/1996 Heikki Tuuri
*******************************************************/
@@ -16,37 +33,12 @@ Created 5/7/1996 Heikki Tuuri
#include "lock0priv.ic"
#endif
+#include "ha_prototypes.h"
#include "usr0sess.h"
#include "trx0purge.h"
#include "dict0mem.h"
#include "trx0sys.h"
-
-/* 2 function prototypes copied from ha_innodb.cc: */
-
-/*****************************************************************
-If you want to print a thd that is not associated with the current thread,
-you must call this function before reserving the InnoDB kernel_mutex, to
-protect MySQL from setting thd->query NULL. If you print a thd of the current
-thread, we know that MySQL cannot modify thd->query, and it is not necessary
-to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
-the kernel_mutex.
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-
-void
-innobase_mysql_prepare_print_arbitrary_thd(void);
-/*============================================*/
-
-/*****************************************************************
-Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-
-void
-innobase_mysql_end_print_arbitrary_thd(void);
-/*========================================*/
-
/* Restricts the length of search we will do in the waits-for
graph of transactions */
#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
@@ -222,7 +214,7 @@ a waiting s-lock request on the next record? If this s-lock was placed
by a read cursor moving in the ascending order in the index, we cannot
do the insert immediately, because when we finally commit our transaction,
the read cursor should see also the new inserted record. So we should
-move the read cursor backward from the the next record for it to pass over
+move the read cursor backward from the next record for it to pass over
the new inserted record. This move backward may be too cumbersome to
implement. If we in this situation just enqueue a second x-lock request
for our transaction on the next record, then the deadlock mechanism
@@ -315,75 +307,123 @@ locks on the inserted record. */
* statement-level MySQL binlog.
* See also lock_mode_compatible().
*/
+#define LK(a,b) (1 << ((a) * LOCK_NUM + (b)))
+#define LKS(a,b) LK(a,b) | LK(b,a)
+
+/* Define the lock compatibility matrix in a ulint. The first line below
+defines the diagonal entries. The following lines define the compatibility
+for LOCK_IX, LOCK_S, and LOCK_AUTO_INC using LKS(), since the matrix
+is symmetric. */
+#define LOCK_MODE_COMPATIBILITY 0 \
+ | LK(LOCK_IS, LOCK_IS) | LK(LOCK_IX, LOCK_IX) | LK(LOCK_S, LOCK_S) \
+ | LKS(LOCK_IX, LOCK_IS) | LKS(LOCK_IS, LOCK_AUTO_INC) \
+ | LKS(LOCK_S, LOCK_IS) \
+ | LKS(LOCK_AUTO_INC, LOCK_IS) | LKS(LOCK_AUTO_INC, LOCK_IX)
+
+/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
+ * IS IX S X AI
+ * IS + - - - -
+ * IX + + - - -
+ * S + - + - -
+ * X + + + + +
+ * AI - - - - +
+ * See lock_mode_stronger_or_eq().
+ */
+
+/* Define the stronger-or-equal lock relation in a ulint. This relation
+contains all pairs LK(mode1, mode2) where mode1 is stronger than or
+equal to mode2. */
+#define LOCK_MODE_STRONGER_OR_EQ 0 \
+ | LK(LOCK_IS, LOCK_IS) \
+ | LK(LOCK_IX, LOCK_IS) | LK(LOCK_IX, LOCK_IX) \
+ | LK(LOCK_S, LOCK_IS) | LK(LOCK_S, LOCK_S) \
+ | LK(LOCK_AUTO_INC, LOCK_AUTO_INC) \
+ | LK(LOCK_X, LOCK_IS) | LK(LOCK_X, LOCK_IX) | LK(LOCK_X, LOCK_S) \
+ | LK(LOCK_X, LOCK_AUTO_INC) | LK(LOCK_X, LOCK_X)
#ifdef UNIV_DEBUG
-ibool lock_print_waits = FALSE;
+UNIV_INTERN ibool lock_print_waits = FALSE;
+
+/*********************************************************************//**
+Validates the lock system.
+@return TRUE if ok */
+static
+ibool
+lock_validate(void);
+/*===============*/
+
+/*********************************************************************//**
+Validates the record lock queues on a page.
+@return TRUE if ok */
+static
+ibool
+lock_rec_validate_page(
+/*===================*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no);/*!< in: page number */
#endif /* UNIV_DEBUG */
/* The lock system */
-lock_sys_t* lock_sys = NULL;
+UNIV_INTERN lock_sys_t* lock_sys = NULL;
/* We store info on the latest deadlock error to this buffer. InnoDB
Monitor will then fetch it and print */
-ibool lock_deadlock_found = FALSE;
-FILE* lock_latest_err_file;
+UNIV_INTERN ibool lock_deadlock_found = FALSE;
+UNIV_INTERN FILE* lock_latest_err_file;
/* Flags for recursive deadlock search */
#define LOCK_VICTIM_IS_START 1
#define LOCK_VICTIM_IS_OTHER 2
-/************************************************************************
-Checks if a lock request results in a deadlock. */
+/********************************************************************//**
+Checks if a lock request results in a deadlock.
+@return TRUE if a deadlock was detected and we chose trx as a victim;
+FALSE if no deadlock, or there was a deadlock, but we chose other
+transaction(s) as victim(s) */
static
ibool
lock_deadlock_occurs(
/*=================*/
- /* out: TRUE if a deadlock was detected and we
- chose trx as a victim; FALSE if no deadlock, or
- there was a deadlock, but we chose other
- transaction(s) as victim(s) */
- lock_t* lock, /* in: lock the transaction is requesting */
- trx_t* trx); /* in: transaction */
-/************************************************************************
-Looks recursively for a deadlock. */
+ lock_t* lock, /*!< in: lock the transaction is requesting */
+ trx_t* trx); /*!< in: transaction */
+/********************************************************************//**
+Looks recursively for a deadlock.
+@return 0 if no deadlock found, LOCK_VICTIM_IS_START if there was a
+deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
+deadlock was found and we chose some other trx as a victim: we must do
+the search again in this last case because there may be another
+deadlock! */
static
ulint
lock_deadlock_recursive(
/*====================*/
- /* out: 0 if no deadlock found,
- LOCK_VICTIM_IS_START if there was a deadlock
- and we chose 'start' as the victim,
- LOCK_VICTIM_IS_OTHER if a deadlock
- was found and we chose some other trx as a
- victim: we must do the search again in this
- last case because there may be another
- deadlock! */
- trx_t* start, /* in: recursion starting point */
- trx_t* trx, /* in: a transaction waiting for a lock */
- lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
- ulint* cost, /* in/out: number of calculation steps thus
+ trx_t* start, /*!< in: recursion starting point */
+ trx_t* trx, /*!< in: a transaction waiting for a lock */
+ lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
+ ulint* cost, /*!< in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_...
we return LOCK_VICTIM_IS_START */
- ulint depth); /* in: recursion depth: if this exceeds
+ ulint depth); /*!< in: recursion depth: if this exceeds
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
return LOCK_VICTIM_IS_START */
-/*************************************************************************
-Gets the nth bit of a record lock. */
+/*********************************************************************//**
+Gets the nth bit of a record lock.
+@return TRUE if bit set */
UNIV_INLINE
ibool
lock_rec_get_nth_bit(
/*=================*/
- /* out: TRUE if bit set */
- lock_t* lock, /* in: record lock */
- ulint i) /* in: index of the bit */
+ const lock_t* lock, /*!< in: record lock */
+ ulint i) /*!< in: index of the bit */
{
ulint byte_index;
ulint bit_index;
- ulint b;
ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
if (i >= lock->un_member.rec_lock.n_bits) {
@@ -393,9 +433,7 @@ lock_rec_get_nth_bit(
byte_index = i / 8;
bit_index = i % 8;
- b = (ulint)*((byte*)lock + sizeof(lock_t) + byte_index);
-
- return(ut_bit_get_nth(b, bit_index));
+ return(1 & ((const byte*) &lock[1])[byte_index] >> bit_index);
}
/*************************************************************************/
@@ -403,18 +441,18 @@ lock_rec_get_nth_bit(
#define lock_mutex_enter_kernel() mutex_enter(&kernel_mutex)
#define lock_mutex_exit_kernel() mutex_exit(&kernel_mutex)
-/*************************************************************************
-Checks that a transaction id is sensible, i.e., not in the future. */
-
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return TRUE if ok */
+UNIV_INTERN
ibool
lock_check_trx_id_sanity(
/*=====================*/
- /* out: TRUE if ok */
- dulint trx_id, /* in: trx id */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: index */
- const ulint* offsets, /* in: rec_get_offsets(rec, index) */
- ibool has_kernel_mutex)/* in: TRUE if the caller owns the
+ trx_id_t trx_id, /*!< in: trx id */
+ const rec_t* rec, /*!< in: user record */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
+ ibool has_kernel_mutex)/*!< in: TRUE if the caller owns the
kernel mutex */
{
ibool is_ok = TRUE;
@@ -437,14 +475,12 @@ lock_check_trx_id_sanity(
fputs("InnoDB: in ", stderr);
dict_index_name_print(stderr, NULL, index);
fprintf(stderr, "\n"
- "InnoDB: is %lu %lu which is higher than the"
- " global trx id counter %lu %lu!\n"
+ "InnoDB: is " TRX_ID_FMT " which is higher than the"
+ " global trx id counter " TRX_ID_FMT "!\n"
"InnoDB: The table is corrupt. You have to do"
" dump + drop + reimport.\n",
- (ulong) ut_dulint_get_high(trx_id),
- (ulong) ut_dulint_get_low(trx_id),
- (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
- (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
+ TRX_ID_PREP_PRINTF(trx_id),
+ TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
is_ok = FALSE;
}
@@ -456,23 +492,23 @@ lock_check_trx_id_sanity(
return(is_ok);
}
-/*************************************************************************
-Checks that a record is seen in a consistent read. */
-
+/*********************************************************************//**
+Checks that a record is seen in a consistent read.
+@return TRUE if sees, or FALSE if an earlier version of the record
+should be retrieved */
+UNIV_INTERN
ibool
lock_clust_rec_cons_read_sees(
/*==========================*/
- /* out: TRUE if sees, or FALSE if an earlier
- version of the record should be retrieved */
- rec_t* rec, /* in: user record which should be read or
+ const rec_t* rec, /*!< in: user record which should be read or
passed over by a read cursor */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- read_view_t* view) /* in: consistent read view */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ read_view_t* view) /*!< in: consistent read view */
{
- dulint trx_id;
+ trx_id_t trx_id;
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(page_rec_is_user_rec(rec));
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -485,30 +521,27 @@ lock_clust_rec_cons_read_sees(
return(read_view_sees_trx_id(view, trx_id));
}
-/*************************************************************************
-Checks that a non-clustered index record is seen in a consistent read. */
+/*********************************************************************//**
+Checks that a non-clustered index record is seen in a consistent read.
+NOTE that a non-clustered index page contains so little information on
+its modifications that also in the case FALSE, the present version of
+rec may be the right, but we must check this from the clustered index
+record.
+
+@return TRUE if certainly sees, or FALSE if an earlier version of the
+clustered index record might be needed */
+UNIV_INTERN
ulint
lock_sec_rec_cons_read_sees(
/*========================*/
- /* out: TRUE if certainly sees, or FALSE if an
- earlier version of the clustered index record
- might be needed: NOTE that a non-clustered
- index page contains so little information on
- its modifications that also in the case FALSE,
- the present version of rec may be the right,
- but we must check this from the clustered
- index record */
- rec_t* rec, /* in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /* in: non-clustered index */
- read_view_t* view) /* in: consistent read view */
+ const rec_t* rec, /*!< in: user record which
+ should be read or passed over
+ by a read cursor */
+ const read_view_t* view) /*!< in: consistent read view */
{
- dulint max_trx_id;
-
- UT_NOT_USED(index);
+ trx_id_t max_trx_id;
- ut_ad(!(index->type & DICT_CLUSTERED));
ut_ad(page_rec_is_user_rec(rec));
/* NOTE that we might call this function while holding the search
@@ -520,23 +553,19 @@ lock_sec_rec_cons_read_sees(
return(FALSE);
}
- max_trx_id = page_get_max_trx_id(buf_frame_align(rec));
+ max_trx_id = page_get_max_trx_id(page_align(rec));
+ ut_ad(!ut_dulint_is_zero(max_trx_id));
- if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) {
-
- return(FALSE);
- }
-
- return(TRUE);
+ return(ut_dulint_cmp(max_trx_id, view->up_limit_id) < 0);
}
-/*************************************************************************
+/*********************************************************************//**
Creates the lock system at database start. */
-
+UNIV_INTERN
void
lock_sys_create(
/*============*/
- ulint n_cells) /* in: number of slots in lock hash table */
+ ulint n_cells) /*!< in: number of slots in lock hash table */
{
lock_sys = mem_alloc(sizeof(lock_sys_t));
@@ -548,43 +577,60 @@ lock_sys_create(
ut_a(lock_latest_err_file);
}
-/*************************************************************************
-Gets the size of a lock struct. */
+/*********************************************************************//**
+Closes the lock system at database shutdown. */
+UNIV_INTERN
+void
+lock_sys_close(void)
+/*================*/
+{
+ if (lock_latest_err_file != NULL) {
+ fclose(lock_latest_err_file);
+ lock_latest_err_file = NULL;
+ }
+
+ hash_table_free(lock_sys->rec_hash);
+ mem_free(lock_sys);
+ lock_sys = NULL;
+}
+/*********************************************************************//**
+Gets the size of a lock struct.
+@return size in bytes */
+UNIV_INTERN
ulint
lock_get_size(void)
/*===============*/
- /* out: size in bytes */
{
return((ulint)sizeof(lock_t));
}
-/*************************************************************************
-Gets the mode of a lock. */
+/*********************************************************************//**
+Gets the mode of a lock.
+@return mode */
UNIV_INLINE
-ulint
+enum lock_mode
lock_get_mode(
/*==========*/
- /* out: mode */
- const lock_t* lock) /* in: lock */
+ const lock_t* lock) /*!< in: lock */
{
ut_ad(lock);
return(lock->type_mode & LOCK_MODE_MASK);
}
-/*************************************************************************
-Gets the wait flag of a lock. */
+/*********************************************************************//**
+Gets the wait flag of a lock.
+@return TRUE if waiting */
UNIV_INLINE
ibool
lock_get_wait(
/*==========*/
- /* out: TRUE if waiting */
- lock_t* lock) /* in: lock */
+ const lock_t* lock) /*!< in: lock */
{
ut_ad(lock);
- if (lock->type_mode & LOCK_WAIT) {
+ if (UNIV_UNLIKELY(lock->type_mode & LOCK_WAIT)) {
return(TRUE);
}
@@ -592,21 +638,20 @@ lock_get_wait(
return(FALSE);
}
-/*************************************************************************
+/*********************************************************************//**
Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock. */
-
+covered by an IX or IS table lock.
+@return the source table of transaction, if it is covered by an IX or
+IS table lock; dest if there is no source table, and NULL if the
+transaction is locking more than two tables or an inconsistency is
+found */
+UNIV_INTERN
dict_table_t*
lock_get_src_table(
/*===============*/
- /* out: the source table of transaction,
- if it is covered by an IX or IS table lock;
- dest if there is no source table, and
- NULL if the transaction is locking more than
- two tables or an inconsistency is found */
- trx_t* trx, /* in: transaction */
- dict_table_t* dest, /* in: destination of ALTER TABLE */
- ulint* mode) /* out: lock mode of the source table */
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* dest, /*!< in: destination of ALTER TABLE */
+ enum lock_mode* mode) /*!< out: lock mode of the source table */
{
dict_table_t* src;
lock_t* lock;
@@ -618,8 +663,8 @@ lock_get_src_table(
lock;
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
lock_table_t* tab_lock;
- ulint lock_mode;
- if (!(lock_get_type(lock) & LOCK_TABLE)) {
+ enum lock_mode lock_mode;
+ if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
/* We are only interested in table locks. */
continue;
}
@@ -645,15 +690,12 @@ lock_get_src_table(
/* Check that the source table is locked by
LOCK_IX or LOCK_IS. */
lock_mode = lock_get_mode(lock);
- switch (lock_mode) {
- case LOCK_IX:
- case LOCK_IS:
+ if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
if (*mode != LOCK_NONE && *mode != lock_mode) {
/* There are multiple locks on src. */
return(NULL);
}
*mode = lock_mode;
- break;
}
}
@@ -665,21 +707,21 @@ lock_get_src_table(
return(src);
}
-/*************************************************************************
+/*********************************************************************//**
Determine if the given table is exclusively "owned" by the given
transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table. */
-
+on the table.
+@return TRUE if table is only locked by trx, with LOCK_IX, and
+possibly LOCK_AUTO_INC */
+UNIV_INTERN
ibool
lock_is_table_exclusive(
/*====================*/
- /* out: TRUE if table is only locked by trx,
- with LOCK_IX, and possibly LOCK_AUTO_INC */
- dict_table_t* table, /* in: table */
- trx_t* trx) /* in: transaction */
+ dict_table_t* table, /*!< in: table */
+ trx_t* trx) /*!< in: transaction */
{
- lock_t* lock;
- ibool ok = FALSE;
+ const lock_t* lock;
+ ibool ok = FALSE;
ut_ad(table);
ut_ad(trx);
@@ -695,7 +737,7 @@ lock_is_table_exclusive(
goto not_ok;
}
- if (!(lock_get_type(lock) & LOCK_TABLE)) {
+ if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
/* We are interested in table locks only. */
continue;
}
@@ -722,30 +764,30 @@ func_exit:
return(ok);
}
-/*************************************************************************
+/*********************************************************************//**
Sets the wait flag of a lock and the back pointer in trx to lock. */
UNIV_INLINE
void
lock_set_lock_and_trx_wait(
/*=======================*/
- lock_t* lock, /* in: lock */
- trx_t* trx) /* in: trx */
+ lock_t* lock, /*!< in: lock */
+ trx_t* trx) /*!< in: trx */
{
ut_ad(lock);
ut_ad(trx->wait_lock == NULL);
trx->wait_lock = lock;
- lock->type_mode = lock->type_mode | LOCK_WAIT;
+ lock->type_mode |= LOCK_WAIT;
}
-/**************************************************************************
+/**********************************************************************//**
The back pointer to a waiting lock request in the transaction is set to NULL
and the wait bit in lock type_mode is reset. */
UNIV_INLINE
void
lock_reset_lock_and_trx_wait(
/*=========================*/
- lock_t* lock) /* in: record lock */
+ lock_t* lock) /*!< in: record lock */
{
ut_ad((lock->trx)->wait_lock == lock);
ut_ad(lock_get_wait(lock));
@@ -753,20 +795,20 @@ lock_reset_lock_and_trx_wait(
/* Reset the back pointer in trx to this waiting lock request */
(lock->trx)->wait_lock = NULL;
- lock->type_mode = lock->type_mode & ~LOCK_WAIT;
+ lock->type_mode &= ~LOCK_WAIT;
}
-/*************************************************************************
-Gets the gap flag of a record lock. */
+/*********************************************************************//**
+Gets the gap flag of a record lock.
+@return TRUE if gap flag set */
UNIV_INLINE
ibool
lock_rec_get_gap(
/*=============*/
- /* out: TRUE if gap flag set */
- lock_t* lock) /* in: record lock */
+ const lock_t* lock) /*!< in: record lock */
{
ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
if (lock->type_mode & LOCK_GAP) {
@@ -776,17 +818,17 @@ lock_rec_get_gap(
return(FALSE);
}
-/*************************************************************************
-Gets the LOCK_REC_NOT_GAP flag of a record lock. */
+/*********************************************************************//**
+Gets the LOCK_REC_NOT_GAP flag of a record lock.
+@return TRUE if LOCK_REC_NOT_GAP flag set */
UNIV_INLINE
ibool
lock_rec_get_rec_not_gap(
/*=====================*/
- /* out: TRUE if LOCK_REC_NOT_GAP flag set */
- lock_t* lock) /* in: record lock */
+ const lock_t* lock) /*!< in: record lock */
{
ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
if (lock->type_mode & LOCK_REC_NOT_GAP) {
@@ -796,17 +838,17 @@ lock_rec_get_rec_not_gap(
return(FALSE);
}
-/*************************************************************************
-Gets the waiting insert flag of a record lock. */
+/*********************************************************************//**
+Gets the waiting insert flag of a record lock.
+@return TRUE if gap flag set */
UNIV_INLINE
ibool
lock_rec_get_insert_intention(
/*==========================*/
- /* out: TRUE if gap flag set */
- lock_t* lock) /* in: record lock */
+ const lock_t* lock) /*!< in: record lock */
{
ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
if (lock->type_mode & LOCK_INSERT_INTENTION) {
@@ -816,108 +858,65 @@ lock_rec_get_insert_intention(
return(FALSE);
}
-/*************************************************************************
-Calculates if lock mode 1 is stronger or equal to lock mode 2. */
+/*********************************************************************//**
+Calculates if lock mode 1 is stronger or equal to lock mode 2.
+@return nonzero if mode1 stronger or equal to mode2 */
UNIV_INLINE
-ibool
+ulint
lock_mode_stronger_or_eq(
/*=====================*/
- /* out: TRUE if mode1 stronger or equal to mode2 */
- ulint mode1, /* in: lock mode */
- ulint mode2) /* in: lock mode */
+ enum lock_mode mode1, /*!< in: lock mode */
+ enum lock_mode mode2) /*!< in: lock mode */
{
ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
|| mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
|| mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
- if (mode1 == LOCK_X) {
-
- return(TRUE);
- } else if (mode1 == LOCK_AUTO_INC && mode2 == LOCK_AUTO_INC) {
-
- return(TRUE);
-
- } else if (mode1 == LOCK_S
- && (mode2 == LOCK_S || mode2 == LOCK_IS)) {
- return(TRUE);
-
- } else if (mode1 == LOCK_IS && mode2 == LOCK_IS) {
-
- return(TRUE);
-
- } else if (mode1 == LOCK_IX && (mode2 == LOCK_IX
- || mode2 == LOCK_IS)) {
- return(TRUE);
- }
-
- return(FALSE);
+ return((LOCK_MODE_STRONGER_OR_EQ) & LK(mode1, mode2));
}
-/*************************************************************************
-Calculates if lock mode 1 is compatible with lock mode 2. */
+/*********************************************************************//**
+Calculates if lock mode 1 is compatible with lock mode 2.
+@return nonzero if mode1 compatible with mode2 */
UNIV_INLINE
-ibool
+ulint
lock_mode_compatible(
/*=================*/
- /* out: TRUE if mode1 compatible with mode2 */
- ulint mode1, /* in: lock mode */
- ulint mode2) /* in: lock mode */
+ enum lock_mode mode1, /*!< in: lock mode */
+ enum lock_mode mode2) /*!< in: lock mode */
{
ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
|| mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
|| mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
- if (mode1 == LOCK_S && (mode2 == LOCK_IS || mode2 == LOCK_S)) {
-
- return(TRUE);
-
- } else if (mode1 == LOCK_X) {
-
- return(FALSE);
-
- } else if (mode1 == LOCK_AUTO_INC && (mode2 == LOCK_IS
- || mode2 == LOCK_IX)) {
- return(TRUE);
-
- } else if (mode1 == LOCK_IS && (mode2 == LOCK_IS
- || mode2 == LOCK_IX
- || mode2 == LOCK_AUTO_INC
- || mode2 == LOCK_S)) {
- return(TRUE);
-
- } else if (mode1 == LOCK_IX && (mode2 == LOCK_IS
- || mode2 == LOCK_AUTO_INC
- || mode2 == LOCK_IX)) {
- return(TRUE);
- }
-
- return(FALSE);
+ return((LOCK_MODE_COMPATIBILITY) & LK(mode1, mode2));
}
-/*************************************************************************
-Checks if a lock request for a new lock has to wait for request lock2. */
+/*********************************************************************//**
+Checks if a lock request for a new lock has to wait for request lock2.
+@return TRUE if new lock has to wait for lock2 to be removed */
UNIV_INLINE
ibool
lock_rec_has_to_wait(
/*=================*/
- /* out: TRUE if new lock has to wait for lock2 to be
- removed */
- trx_t* trx, /* in: trx of new lock */
- ulint type_mode,/* in: precise mode of the new lock to set:
- LOCK_S or LOCK_X, possibly ORed to
- LOCK_GAP or LOCK_REC_NOT_GAP, LOCK_INSERT_INTENTION */
- lock_t* lock2, /* in: another record lock; NOTE that it is assumed
- that this has a lock bit set on the same record as
- in the new lock we are setting */
- ibool lock_is_on_supremum) /* in: TRUE if we are setting the lock
- on the 'supremum' record of an index
- page: we know then that the lock request
- is really for a 'gap' type lock */
+ const trx_t* trx, /*!< in: trx of new lock */
+ ulint type_mode,/*!< in: precise mode of the new lock
+ to set: LOCK_S or LOCK_X, possibly
+ ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
+ LOCK_INSERT_INTENTION */
+ const lock_t* lock2, /*!< in: another record lock; NOTE that
+ it is assumed that this has a lock bit
+ set on the same record as in the new
+ lock we are setting */
+ ibool lock_is_on_supremum) /*!< in: TRUE if we are setting the
+ lock on the 'supremum' record of an
+ index page: we know then that the lock
+ request is really for a 'gap' type lock */
{
ut_ad(trx && lock2);
- ut_ad(lock_get_type(lock2) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock2) == LOCK_REC);
if (trx != lock2->trx
&& !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
@@ -977,26 +976,26 @@ lock_rec_has_to_wait(
return(FALSE);
}
-/*************************************************************************
-Checks if a lock request lock1 has to wait for request lock2. */
-
+/*********************************************************************//**
+Checks if a lock request lock1 has to wait for request lock2.
+@return TRUE if lock1 has to wait for lock2 to be removed */
+UNIV_INTERN
ibool
lock_has_to_wait(
/*=============*/
- /* out: TRUE if lock1 has to wait for lock2 to be
- removed */
- lock_t* lock1, /* in: waiting lock */
- lock_t* lock2) /* in: another lock; NOTE that it is assumed that this
- has a lock bit set on the same record as in lock1 if
- the locks are record locks */
+ const lock_t* lock1, /*!< in: waiting lock */
+ const lock_t* lock2) /*!< in: another lock; NOTE that it is
+ assumed that this has a lock bit set
+ on the same record as in lock1 if the
+ locks are record locks */
{
ut_ad(lock1 && lock2);
if (lock1->trx != lock2->trx
&& !lock_mode_compatible(lock_get_mode(lock1),
lock_get_mode(lock2))) {
- if (lock_get_type(lock1) == LOCK_REC) {
- ut_ad(lock_get_type(lock2) == LOCK_REC);
+ if (lock_get_type_low(lock1) == LOCK_REC) {
+ ut_ad(lock_get_type_low(lock2) == LOCK_REC);
/* If this lock request is for a supremum record
then the second bit on the lock bitmap is set */
@@ -1015,58 +1014,50 @@ lock_has_to_wait(
/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
-/*************************************************************************
-Gets the number of bits in a record lock bitmap. */
+/*********************************************************************//**
+Gets the number of bits in a record lock bitmap.
+@return number of bits */
UNIV_INLINE
ulint
lock_rec_get_n_bits(
/*================*/
- /* out: number of bits */
- lock_t* lock) /* in: record lock */
+ const lock_t* lock) /*!< in: record lock */
{
return(lock->un_member.rec_lock.n_bits);
}
-/**************************************************************************
+/**********************************************************************//**
Sets the nth bit of a record lock to TRUE. */
UNIV_INLINE
void
lock_rec_set_nth_bit(
/*=================*/
- lock_t* lock, /* in: record lock */
- ulint i) /* in: index of the bit */
+ lock_t* lock, /*!< in: record lock */
+ ulint i) /*!< in: index of the bit */
{
ulint byte_index;
ulint bit_index;
- byte* ptr;
- ulint b;
ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
ut_ad(i < lock->un_member.rec_lock.n_bits);
byte_index = i / 8;
bit_index = i % 8;
- ptr = (byte*)lock + sizeof(lock_t) + byte_index;
-
- b = (ulint)*ptr;
-
- b = ut_bit_set_nth(b, bit_index, TRUE);
-
- *ptr = (byte)b;
+ ((byte*) &lock[1])[byte_index] |= 1 << bit_index;
}
-/**************************************************************************
+/**********************************************************************//**
Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found. */
-
+if none found.
+@return bit index == heap number of the record, or ULINT_UNDEFINED if
+none found */
+UNIV_INTERN
ulint
lock_rec_find_set_bit(
/*==================*/
- /* out: bit index == heap number of the record, or
- ULINT_UNDEFINED if none found */
- lock_t* lock) /* in: record lock with at least one bit set */
+ const lock_t* lock) /*!< in: record lock with at least one bit set */
{
ulint i;
@@ -1081,51 +1072,43 @@ lock_rec_find_set_bit(
return(ULINT_UNDEFINED);
}
-/**************************************************************************
+/**********************************************************************//**
Resets the nth bit of a record lock. */
UNIV_INLINE
void
lock_rec_reset_nth_bit(
/*===================*/
- lock_t* lock, /* in: record lock */
- ulint i) /* in: index of the bit which must be set to TRUE
+ lock_t* lock, /*!< in: record lock */
+ ulint i) /*!< in: index of the bit which must be set to TRUE
when this function is called */
{
ulint byte_index;
ulint bit_index;
- byte* ptr;
- ulint b;
ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
ut_ad(i < lock->un_member.rec_lock.n_bits);
byte_index = i / 8;
bit_index = i % 8;
- ptr = (byte*)lock + sizeof(lock_t) + byte_index;
-
- b = (ulint)*ptr;
-
- b = ut_bit_set_nth(b, bit_index, FALSE);
-
- *ptr = (byte)b;
+ ((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
}
-/*************************************************************************
-Gets the first or next record lock on a page. */
+/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_next_on_page(
/*======================*/
- /* out: next lock, NULL if none exists */
- lock_t* lock) /* in: a record lock */
+ lock_t* lock) /*!< in: a record lock */
{
ulint space;
ulint page_no;
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
space = lock->un_member.rec_lock.space;
page_no = lock->un_member.rec_lock.page_no;
@@ -1148,16 +1131,16 @@ lock_rec_get_next_on_page(
return(lock);
}
-/*************************************************************************
+/*********************************************************************//**
Gets the first record lock on a page, where the page is identified by its
-file address. */
+file address.
+@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first_on_page_addr(
/*============================*/
- /* out: first lock, NULL if none exists */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space */
+ ulint page_no)/*!< in: page number */
{
lock_t* lock;
@@ -1178,16 +1161,15 @@ lock_rec_get_first_on_page_addr(
return(lock);
}
-/*************************************************************************
-Returns TRUE if there are explicit record locks on a page. */
-
+/*********************************************************************//**
+Returns TRUE if there are explicit record locks on a page.
+@return TRUE if there are explicit record locks on the page */
+UNIV_INTERN
ibool
lock_rec_expl_exist_on_page(
/*========================*/
- /* out: TRUE if there are explicit record locks on
- the page */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space id */
+ ulint page_no)/*!< in: page number */
{
ibool ret;
@@ -1204,31 +1186,28 @@ lock_rec_expl_exist_on_page(
return(ret);
}
-/*************************************************************************
+/*********************************************************************//**
Gets the first record lock on a page, where the page is identified by a
-pointer to it. */
+pointer to it.
+@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first_on_page(
/*=======================*/
- /* out: first lock, NULL if none exists */
- byte* ptr) /* in: pointer to somewhere on the page */
+ const buf_block_t* block) /*!< in: buffer block */
{
ulint hash;
lock_t* lock;
- ulint space;
- ulint page_no;
+ ulint space = buf_block_get_space(block);
+ ulint page_no = buf_block_get_page_no(block);
ut_ad(mutex_own(&kernel_mutex));
- hash = buf_frame_get_lock_hash_val(ptr);
+ hash = buf_block_get_lock_hash_val(block);
lock = HASH_GET_FIRST(lock_sys->rec_hash, hash);
while (lock) {
- space = buf_frame_get_space_id(ptr);
- page_no = buf_frame_get_page_no(ptr);
-
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)) {
@@ -1241,60 +1220,51 @@ lock_rec_get_first_on_page(
return(lock);
}
-/*************************************************************************
-Gets the next explicit lock request on a record. */
+/*********************************************************************//**
+Gets the next explicit lock request on a record.
+@return next lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_next(
/*==============*/
- /* out: next lock, NULL if none exists */
- rec_t* rec, /* in: record on a page */
- lock_t* lock) /* in: lock */
+ ulint heap_no,/*!< in: heap number of the record */
+ lock_t* lock) /*!< in: lock */
{
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(lock) == LOCK_REC);
- if (page_rec_is_comp(rec)) {
- do {
- lock = lock_rec_get_next_on_page(lock);
- } while (lock && !lock_rec_get_nth_bit(
- lock, rec_get_heap_no(rec, TRUE)));
- } else {
- do {
- lock = lock_rec_get_next_on_page(lock);
- } while (lock && !lock_rec_get_nth_bit(
- lock, rec_get_heap_no(rec, FALSE)));
- }
+ do {
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ lock = lock_rec_get_next_on_page(lock);
+ } while (lock && !lock_rec_get_nth_bit(lock, heap_no));
return(lock);
}
-/*************************************************************************
-Gets the first explicit lock request on a record. */
+/*********************************************************************//**
+Gets the first explicit lock request on a record.
+@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first(
/*===============*/
- /* out: first lock, NULL if none exists */
- rec_t* rec) /* in: record on a page */
+ const buf_block_t* block, /*!< in: block containing the record */
+ ulint heap_no)/*!< in: heap number of the record */
{
lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
- lock = lock_rec_get_first_on_page(rec);
- if (UNIV_LIKELY_NULL(lock)) {
- ulint heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
- while (lock && !lock_rec_get_nth_bit(lock, heap_no)) {
- lock = lock_rec_get_next_on_page(lock);
+ for (lock = lock_rec_get_first_on_page(block); lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
+ break;
}
}
return(lock);
}
-/*************************************************************************
+/*********************************************************************//**
Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
pointer in the transaction! This function is used in lock object creation
and resetting. */
@@ -1302,64 +1272,50 @@ static
void
lock_rec_bitmap_reset(
/*==================*/
- lock_t* lock) /* in: record lock */
+ lock_t* lock) /*!< in: record lock */
{
- byte* ptr;
ulint n_bytes;
- ulint i;
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
/* Reset to zero the bitmap which resides immediately after the lock
struct */
- ptr = (byte*)lock + sizeof(lock_t);
-
n_bytes = lock_rec_get_n_bits(lock) / 8;
ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
- for (i = 0; i < n_bytes; i++) {
-
- *ptr = 0;
- ptr++;
- }
+ memset(&lock[1], 0, n_bytes);
}
-/*************************************************************************
-Copies a record lock to heap. */
+/*********************************************************************//**
+Copies a record lock to heap.
+@return copy of lock */
static
lock_t*
lock_rec_copy(
/*==========*/
- /* out: copy of lock */
- lock_t* lock, /* in: record lock */
- mem_heap_t* heap) /* in: memory heap */
+ const lock_t* lock, /*!< in: record lock */
+ mem_heap_t* heap) /*!< in: memory heap */
{
- lock_t* dupl_lock;
ulint size;
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
- dupl_lock = mem_heap_alloc(heap, size);
-
- ut_memcpy(dupl_lock, lock, size);
-
- return(dupl_lock);
+ return(mem_heap_dup(heap, lock, size));
}
-/*************************************************************************
-Gets the previous record lock set on a record. */
-
-lock_t*
+/*********************************************************************//**
+Gets the previous record lock set on a record.
+@return previous lock on the same record, NULL if none exists */
+UNIV_INTERN
+const lock_t*
lock_rec_get_prev(
/*==============*/
- /* out: previous lock on the same record, NULL if
- none exists */
- lock_t* in_lock,/* in: record lock */
- ulint heap_no)/* in: heap number of the record */
+ const lock_t* in_lock,/*!< in: record lock */
+ ulint heap_no)/*!< in: heap number of the record */
{
lock_t* lock;
ulint space;
@@ -1367,7 +1323,7 @@ lock_rec_get_prev(
lock_t* found_lock = NULL;
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(in_lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
@@ -1393,16 +1349,16 @@ lock_rec_get_prev(
/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
-/*************************************************************************
-Checks if a transaction has the specified table lock, or stronger. */
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger.
+@return lock or NULL */
UNIV_INLINE
lock_t*
lock_table_has(
/*===========*/
- /* out: lock or NULL */
- trx_t* trx, /* in: transaction */
- dict_table_t* table, /* in: table */
- ulint mode) /* in: lock mode */
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table, /*!< in: table */
+ enum lock_mode mode) /*!< in: lock mode */
{
lock_t* lock;
@@ -1433,20 +1389,23 @@ lock_table_has(
/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
-/*************************************************************************
+/*********************************************************************//**
Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
-to precise_mode. */
+to precise_mode.
+@return lock or NULL */
UNIV_INLINE
lock_t*
lock_rec_has_expl(
/*==============*/
- /* out: lock or NULL */
- ulint precise_mode,/* in: LOCK_S or LOCK_X possibly ORed to
- LOCK_GAP or LOCK_REC_NOT_GAP,
- for a supremum record we regard this always a gap
- type request */
- rec_t* rec, /* in: record */
- trx_t* trx) /* in: transaction */
+ ulint precise_mode,/*!< in: LOCK_S or LOCK_X
+ possibly ORed to LOCK_GAP or
+ LOCK_REC_NOT_GAP, for a
+ supremum record we regard this
+ always a gap type request */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ trx_t* trx) /*!< in: transaction */
{
lock_t* lock;
@@ -1455,7 +1414,7 @@ lock_rec_has_expl(
|| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
while (lock) {
if (lock->trx == trx
@@ -1464,37 +1423,42 @@ lock_rec_has_expl(
&& !lock_get_wait(lock)
&& (!lock_rec_get_rec_not_gap(lock)
|| (precise_mode & LOCK_REC_NOT_GAP)
- || page_rec_is_supremum(rec))
+ || heap_no == PAGE_HEAP_NO_SUPREMUM)
&& (!lock_rec_get_gap(lock)
|| (precise_mode & LOCK_GAP)
- || page_rec_is_supremum(rec))
+ || heap_no == PAGE_HEAP_NO_SUPREMUM)
&& (!lock_rec_get_insert_intention(lock))) {
return(lock);
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
return(NULL);
}
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Checks if some other transaction has a lock request in the queue. */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Checks if some other transaction has a lock request in the queue.
+@return lock or NULL */
static
lock_t*
lock_rec_other_has_expl_req(
/*========================*/
- /* out: lock or NULL */
- ulint mode, /* in: LOCK_S or LOCK_X */
- ulint gap, /* in: LOCK_GAP if also gap locks are taken
- into account, or 0 if not */
- ulint wait, /* in: LOCK_WAIT if also waiting locks are
- taken into account, or 0 if not */
- rec_t* rec, /* in: record to look at */
- trx_t* trx) /* in: transaction, or NULL if requests by all
- transactions are taken into account */
+ enum lock_mode mode, /*!< in: LOCK_S or LOCK_X */
+ ulint gap, /*!< in: LOCK_GAP if also gap
+ locks are taken into account,
+ or 0 if not */
+ ulint wait, /*!< in: LOCK_WAIT if also
+ waiting locks are taken into
+ account, or 0 if not */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ const trx_t* trx) /*!< in: transaction, or NULL if
+ requests by all transactions
+ are taken into account */
{
lock_t* lock;
@@ -1503,80 +1467,92 @@ lock_rec_other_has_expl_req(
ut_ad(gap == 0 || gap == LOCK_GAP);
ut_ad(wait == 0 || wait == LOCK_WAIT);
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
while (lock) {
if (lock->trx != trx
&& (gap
|| !(lock_rec_get_gap(lock)
- || page_rec_is_supremum(rec)))
+ || heap_no == PAGE_HEAP_NO_SUPREMUM))
&& (wait || !lock_get_wait(lock))
&& lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
return(lock);
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
return(NULL);
}
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
-/*************************************************************************
+/*********************************************************************//**
Checks if some other transaction has a conflicting explicit lock request
-in the queue, so that we have to wait. */
+in the queue, so that we have to wait.
+@return lock or NULL */
static
lock_t*
lock_rec_other_has_conflicting(
/*===========================*/
- /* out: lock or NULL */
- ulint mode, /* in: LOCK_S or LOCK_X,
- possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
- LOCK_INSERT_INTENTION */
- rec_t* rec, /* in: record to look at */
- trx_t* trx) /* in: our transaction */
+ enum lock_mode mode, /*!< in: LOCK_S or LOCK_X,
+ possibly ORed to LOCK_GAP or
+ LOC_REC_NOT_GAP,
+ LOCK_INSERT_INTENTION */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ trx_t* trx) /*!< in: our transaction */
{
lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
- while (lock) {
- if (lock_rec_has_to_wait(trx, mode, lock,
- page_rec_is_supremum(rec))) {
+ if (UNIV_LIKELY_NULL(lock)) {
+ if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
- return(lock);
- }
+ do {
+ if (lock_rec_has_to_wait(trx, mode, lock,
+ TRUE)) {
+ return(lock);
+ }
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
+ } while (lock);
+ } else {
+
+ do {
+ if (lock_rec_has_to_wait(trx, mode, lock,
+ FALSE)) {
+ return(lock);
+ }
+
+ lock = lock_rec_get_next(heap_no, lock);
+ } while (lock);
+ }
}
return(NULL);
}
-/*************************************************************************
+/*********************************************************************//**
Looks for a suitable type record lock struct by the same trx on the same page.
This can be used to save space when a new record lock should be set on a page:
-no new struct is needed, if a suitable old is found. */
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
UNIV_INLINE
lock_t*
lock_rec_find_similar_on_page(
/*==========================*/
- /* out: lock or NULL */
- ulint type_mode, /* in: lock type_mode field */
- rec_t* rec, /* in: record */
- trx_t* trx) /* in: transaction */
+ ulint type_mode, /*!< in: lock type_mode field */
+ ulint heap_no, /*!< in: heap number of the record */
+ lock_t* lock, /*!< in: lock_rec_get_first_on_page() */
+ const trx_t* trx) /*!< in: transaction */
{
- lock_t* lock;
- ulint heap_no;
-
ut_ad(mutex_own(&kernel_mutex));
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
- lock = lock_rec_get_first_on_page(rec);
-
while (lock != NULL) {
if (lock->trx == trx
&& lock->type_mode == type_mode
@@ -1591,28 +1567,25 @@ lock_rec_find_similar_on_page(
return(NULL);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if some transaction has an implicit x-lock on a record in a secondary
-index. */
-
+index.
+@return transaction which has the x-lock, or NULL */
+static
trx_t*
lock_sec_rec_some_has_impl_off_kernel(
/*==================================*/
- /* out: transaction which has the x-lock, or
- NULL */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: secondary index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: user record */
+ dict_index_t* index, /*!< in: secondary index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
- page_t* page;
+ const page_t* page = page_align(rec);
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(!(index->type & DICT_CLUSTERED));
+ ut_ad(!dict_index_is_clust(index));
ut_ad(page_rec_is_user_rec(rec));
ut_ad(rec_offs_validate(rec, index, offsets));
- page = buf_frame_align(rec);
-
/* Some transaction may have an implicit x-lock on the record only
if the max trx id for the page >= min trx id for the trx list, or
database recovery is running. We do not write the changes of a page
@@ -1631,7 +1604,7 @@ lock_sec_rec_some_has_impl_off_kernel(
if (!lock_check_trx_id_sanity(page_get_max_trx_id(page),
rec, index, offsets, TRUE)) {
- buf_page_print(page);
+ buf_page_print(page, 0);
/* The page is corrupt: try to avoid a crash by returning
NULL */
@@ -1641,15 +1614,15 @@ lock_sec_rec_some_has_impl_off_kernel(
return(row_vers_impl_x_locked_off_kernel(rec, index, offsets));
}
-/*************************************************************************
+/*********************************************************************//**
Return approximate number or record locks (bits set in the bitmap) for
this transaction. Since delete-marked records may be removed, the
record count will not be precise. */
-
+UNIV_INTERN
ulint
lock_number_of_rows_locked(
/*=======================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
lock_t* lock;
ulint n_records = 0;
@@ -1659,7 +1632,7 @@ lock_number_of_rows_locked(
lock = UT_LIST_GET_FIRST(trx->trx_locks);
while (lock) {
- if (lock_get_type(lock) == LOCK_REC) {
+ if (lock_get_type_low(lock) == LOCK_REC) {
n_bits = lock_rec_get_n_bits(lock);
for (n_bit = 0; n_bit < n_bits; n_bit++) {
@@ -1677,34 +1650,35 @@ lock_number_of_rows_locked(
/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
-/*************************************************************************
+/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
-for deadlocks or lock compatibility! */
+for deadlocks or lock compatibility!
+@return created lock */
static
lock_t*
lock_rec_create(
/*============*/
- /* out: created lock */
- ulint type_mode,/* in: lock mode and wait flag, type is
- ignored and replaced by LOCK_REC */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: index of record */
- trx_t* trx) /* in: transaction */
+ ulint type_mode,/*!< in: lock mode and wait
+ flag, type is ignored and
+ replaced by LOCK_REC */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx) /*!< in: transaction */
{
- page_t* page;
- lock_t* lock;
- ulint page_no;
- ulint heap_no;
- ulint space;
- ulint n_bits;
- ulint n_bytes;
+ lock_t* lock;
+ ulint page_no;
+ ulint space;
+ ulint n_bits;
+ ulint n_bytes;
+ const page_t* page;
ut_ad(mutex_own(&kernel_mutex));
- page = buf_frame_align(rec);
- space = buf_frame_get_space_id(page);
- page_no = buf_frame_get_page_no(page);
- heap_no = rec_get_heap_no(rec, page_is_comp(page));
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+ page = block->frame;
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
@@ -1712,7 +1686,7 @@ lock_rec_create(
LOCK_REC_NOT_GAP bits, as all locks on the supremum are
automatically of the gap type */
- if (rec == page_get_supremum_rec(page)) {
+ if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
@@ -1745,7 +1719,7 @@ lock_rec_create(
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
- if (type_mode & LOCK_WAIT) {
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
lock_set_lock_and_trx_wait(lock, trx);
}
@@ -1753,28 +1727,31 @@ lock_rec_create(
return(lock);
}
-/*************************************************************************
+/*********************************************************************//**
Enqueues a waiting request for a lock which cannot be granted immediately.
-Checks for deadlocks. */
+Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
+transaction was chosen as a victim, and we got the lock immediately:
+no need to wait then */
static
ulint
lock_rec_enqueue_waiting(
/*=====================*/
- /* out: DB_LOCK_WAIT, DB_DEADLOCK, or
- DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
- DB_SUCCESS means that there was a deadlock,
- but another transaction was chosen as a
- victim, and we got the lock immediately:
- no need to wait then */
- ulint type_mode,/* in: lock mode this transaction is
- requesting: LOCK_S or LOCK_X, possibly ORed
- with LOCK_GAP or LOCK_REC_NOT_GAP, ORed
- with LOCK_INSERT_INTENTION if this waiting
- lock request is set when performing an
- insert of an index record */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of record */
- que_thr_t* thr) /* in: query thread */
+ ulint type_mode,/*!< in: lock mode this
+ transaction is requesting:
+ LOCK_S or LOCK_X, possibly
+ ORed with LOCK_GAP or
+ LOCK_REC_NOT_GAP, ORed with
+ LOCK_INSERT_INTENTION if this
+ waiting lock request is set
+ when performing an insert of
+ an index record */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ dict_index_t* index, /*!< in: index of record */
+ que_thr_t* thr) /*!< in: query thread */
{
lock_t* lock;
trx_t* trx;
@@ -1785,7 +1762,7 @@ lock_rec_enqueue_waiting(
we do not enqueue a lock request if the query thread should be
stopped anyway */
- if (que_thr_stop(thr)) {
+ if (UNIV_UNLIKELY(que_thr_stop(thr))) {
ut_error;
@@ -1794,12 +1771,16 @@ lock_rec_enqueue_waiting(
trx = thr_get_trx(thr);
- if (trx->dict_operation) {
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ break;
+ case TRX_DICT_OP_TABLE:
+ case TRX_DICT_OP_INDEX:
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: a record lock wait happens"
" in a dictionary operation!\n"
- "InnoDB: Table name ", stderr);
- ut_print_name(stderr, trx, TRUE, index->table_name);
+ "InnoDB: ", stderr);
+ dict_index_name_print(stderr, trx, index);
fputs(".\n"
"InnoDB: Submit a detailed bug report"
" to http://bugs.mysql.com\n",
@@ -1807,16 +1788,16 @@ lock_rec_enqueue_waiting(
}
/* Enqueue the lock request that will wait to be granted */
- lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx);
+ lock = lock_rec_create(type_mode | LOCK_WAIT,
+ block, heap_no, index, trx);
/* Check if a deadlock occurs: if yes, remove the lock request and
return an error code */
- if (lock_deadlock_occurs(lock, trx)) {
+ if (UNIV_UNLIKELY(lock_deadlock_occurs(lock, trx))) {
lock_reset_lock_and_trx_wait(lock);
- lock_rec_reset_nth_bit(lock, rec_get_heap_no(
- rec, page_rec_is_comp(rec)));
+ lock_rec_reset_nth_bit(lock, heap_no);
return(DB_DEADLOCK);
}
@@ -1846,47 +1827,58 @@ lock_rec_enqueue_waiting(
return(DB_LOCK_WAIT);
}
-/*************************************************************************
+/*********************************************************************//**
Adds a record lock request in the record queue. The request is normally
added as the last in the queue, but if there are no waiting lock requests
on the record, and the request to be added is not a waiting request, we
can reuse a suitable record lock object already existing on the same page,
just setting the appropriate bit in its bitmap. This is a low-level function
-which does NOT check for deadlocks or lock compatibility! */
+which does NOT check for deadlocks or lock compatibility!
+@return lock where the bit was set */
static
lock_t*
lock_rec_add_to_queue(
/*==================*/
- /* out: lock where the bit was set */
- ulint type_mode,/* in: lock mode, wait, gap etc. flags;
- type is ignored and replaced by LOCK_REC */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: index of record */
- trx_t* trx) /* in: transaction */
+ ulint type_mode,/*!< in: lock mode, wait, gap
+ etc. flags; type is ignored
+ and replaced by LOCK_REC */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of the record */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx) /*!< in: transaction */
{
lock_t* lock;
- lock_t* similar_lock = NULL;
- ulint heap_no;
- ibool somebody_waits = FALSE;
ut_ad(mutex_own(&kernel_mutex));
- ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
- || ((type_mode & LOCK_MODE_MASK) != LOCK_S)
- || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT,
- rec, trx));
- ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
- || ((type_mode & LOCK_MODE_MASK) != LOCK_X)
- || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
- rec, trx));
+#ifdef UNIV_DEBUG
+ switch (type_mode & LOCK_MODE_MASK) {
+ case LOCK_X:
+ case LOCK_S:
+ break;
+ default:
+ ut_error;
+ }
+
+ if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
+ enum lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
+ ? LOCK_X
+ : LOCK_S;
+ lock_t* other_lock
+ = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
+ block, heap_no, trx);
+ ut_a(!other_lock);
+ }
+#endif /* UNIV_DEBUG */
- type_mode = type_mode | LOCK_REC;
+ type_mode |= LOCK_REC;
/* If rec is the supremum record, then we can reset the gap bit, as
all locks on the supremum are automatically of the gap type, and we
try to avoid unnecessary memory consumption of a new record lock
struct for a gap type lock */
- if (page_rec_is_supremum(rec)) {
+ if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
/* There should never be LOCK_REC_NOT_GAP on a supremum
@@ -1897,57 +1889,66 @@ lock_rec_add_to_queue(
/* Look for a waiting lock request on the same record or on a gap */
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
- lock = lock_rec_get_first_on_page(rec);
+ lock = lock_rec_get_first_on_page(block);
while (lock != NULL) {
if (lock_get_wait(lock)
&& (lock_rec_get_nth_bit(lock, heap_no))) {
- somebody_waits = TRUE;
+ goto somebody_waits;
}
lock = lock_rec_get_next_on_page(lock);
}
- /* Look for a similar record lock on the same page: if one is found
- and there are no waiting lock requests, we can just set the bit */
+ if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
+
+ /* Look for a similar record lock on the same page:
+ if one is found and there are no waiting lock requests,
+ we can just set the bit */
- similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx);
+ lock = lock_rec_find_similar_on_page(
+ type_mode, heap_no,
+ lock_rec_get_first_on_page(block), trx);
- if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) {
+ if (lock) {
- lock_rec_set_nth_bit(similar_lock, heap_no);
+ lock_rec_set_nth_bit(lock, heap_no);
- return(similar_lock);
+ return(lock);
+ }
}
- return(lock_rec_create(type_mode, rec, index, trx));
+somebody_waits:
+ return(lock_rec_create(type_mode, block, heap_no, index, trx));
}
-/*************************************************************************
+/*********************************************************************//**
This is a fast routine for locking a record in the most common cases:
there are no explicit locks on the page, or there is just one lock, owned
by this transaction, and of the right type_mode. This is a low-level function
which does NOT look at implicit locks! Checks lock compatibility within
explicit locks. This function sets a normal next-key lock, or in the case of
-a page supremum record, a gap type lock. */
+a page supremum record, a gap type lock.
+@return TRUE if locking succeeded */
UNIV_INLINE
ibool
lock_rec_lock_fast(
/*===============*/
- /* out: TRUE if locking succeeded */
- ibool impl, /* in: if TRUE, no lock is set if no wait
- is necessary: we assume that the caller will
- set an implicit lock */
- ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
- ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of record */
- que_thr_t* thr) /* in: query thread */
+ ibool impl, /*!< in: if TRUE, no lock is set
+ if no wait is necessary: we
+ assume that the caller will
+ set an implicit lock */
+ ulint mode, /*!< in: lock mode: LOCK_X or
+ LOCK_S possibly ORed to either
+ LOCK_GAP or LOCK_REC_NOT_GAP */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of record */
+ dict_index_t* index, /*!< in: index of record */
+ que_thr_t* thr) /*!< in: query thread */
{
lock_t* lock;
- ulint heap_no;
trx_t* trx;
ut_ad(mutex_own(&kernel_mutex));
@@ -1961,15 +1962,13 @@ lock_rec_lock_fast(
|| mode - (LOCK_MODE_MASK & mode) == 0
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
- lock = lock_rec_get_first_on_page(rec);
+ lock = lock_rec_get_first_on_page(block);
trx = thr_get_trx(thr);
if (lock == NULL) {
if (!impl) {
- lock_rec_create(mode, rec, index, trx);
+ lock_rec_create(mode, block, heap_no, index, trx);
}
return(TRUE);
@@ -1999,25 +1998,28 @@ lock_rec_lock_fast(
return(TRUE);
}
-/*************************************************************************
+/*********************************************************************//**
This is the general, and slower, routine for locking a record. This is a
low-level function which does NOT look at implicit locks! Checks lock
compatibility within explicit locks. This function sets a normal next-key
-lock, or in the case of a page supremum record, a gap type lock. */
+lock, or in the case of a page supremum record, a gap type lock.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
static
ulint
lock_rec_lock_slow(
/*===============*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- code */
- ibool impl, /* in: if TRUE, no lock is set if no wait is
- necessary: we assume that the caller will set
- an implicit lock */
- ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
- ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of record */
- que_thr_t* thr) /* in: query thread */
+ ibool impl, /*!< in: if TRUE, no lock is set
+ if no wait is necessary: we
+ assume that the caller will
+ set an implicit lock */
+ ulint mode, /*!< in: lock mode: LOCK_X or
+ LOCK_S possibly ORed to either
+ LOCK_GAP or LOCK_REC_NOT_GAP */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of record */
+ dict_index_t* index, /*!< in: index of record */
+ que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
ulint err;
@@ -2035,24 +2037,25 @@ lock_rec_lock_slow(
trx = thr_get_trx(thr);
- if (lock_rec_has_expl(mode, rec, trx)) {
+ if (lock_rec_has_expl(mode, block, heap_no, trx)) {
/* The trx already has a strong enough lock on rec: do
nothing */
err = DB_SUCCESS;
- } else if (lock_rec_other_has_conflicting(mode, rec, trx)) {
+ } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) {
/* If another transaction has a non-gap conflicting request in
the queue, as this transaction does not have a lock strong
enough already granted on the record, we have to wait. */
- err = lock_rec_enqueue_waiting(mode, rec, index, thr);
+ err = lock_rec_enqueue_waiting(mode, block, heap_no,
+ index, thr);
} else {
if (!impl) {
/* Set the requested lock on the record */
- lock_rec_add_to_queue(LOCK_REC | mode, rec, index,
- trx);
+ lock_rec_add_to_queue(LOCK_REC | mode, block,
+ heap_no, index, trx);
}
err = DB_SUCCESS;
@@ -2061,26 +2064,29 @@ lock_rec_lock_slow(
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Tries to lock the specified record in the mode requested. If not immediately
possible, enqueues a waiting lock request. This is a low-level function
which does NOT look at implicit locks! Checks lock compatibility within
explicit locks. This function sets a normal next-key lock, or in the case
-of a page supremum record, a gap type lock. */
+of a page supremum record, a gap type lock.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
static
ulint
lock_rec_lock(
/*==========*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- code */
- ibool impl, /* in: if TRUE, no lock is set if no wait is
- necessary: we assume that the caller will set
- an implicit lock */
- ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
- ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of record */
- que_thr_t* thr) /* in: query thread */
+ ibool impl, /*!< in: if TRUE, no lock is set
+ if no wait is necessary: we
+ assume that the caller will
+ set an implicit lock */
+ ulint mode, /*!< in: lock mode: LOCK_X or
+ LOCK_S possibly ORed to either
+ LOCK_GAP or LOCK_REC_NOT_GAP */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no,/*!< in: heap number of record */
+ dict_index_t* index, /*!< in: index of record */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
@@ -2095,27 +2101,28 @@ lock_rec_lock(
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0);
- if (lock_rec_lock_fast(impl, mode, rec, index, thr)) {
+ if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
/* We try a simplified and faster subroutine for the most
common cases */
err = DB_SUCCESS;
} else {
- err = lock_rec_lock_slow(impl, mode, rec, index, thr);
+ err = lock_rec_lock_slow(impl, mode, block,
+ heap_no, index, thr);
}
return(err);
}
-/*************************************************************************
-Checks if a waiting record lock request still has to wait in a queue. */
+/*********************************************************************//**
+Checks if a waiting record lock request still has to wait in a queue.
+@return TRUE if still has to wait */
static
ibool
lock_rec_has_to_wait_in_queue(
/*==========================*/
- /* out: TRUE if still has to wait */
- lock_t* wait_lock) /* in: waiting record lock */
+ lock_t* wait_lock) /*!< in: waiting record lock */
{
lock_t* lock;
ulint space;
@@ -2124,7 +2131,7 @@ lock_rec_has_to_wait_in_queue(
ut_ad(mutex_own(&kernel_mutex));
ut_ad(lock_get_wait(wait_lock));
- ut_ad(lock_get_type(wait_lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
space = wait_lock->un_member.rec_lock.space;
page_no = wait_lock->un_member.rec_lock.page_no;
@@ -2146,31 +2153,32 @@ lock_rec_has_to_wait_in_queue(
return(FALSE);
}
-/*****************************************************************
+/*************************************************************//**
Grants a lock to a waiting lock request and releases the waiting
transaction. */
static
void
lock_grant(
/*=======*/
- lock_t* lock) /* in: waiting lock request */
+ lock_t* lock) /*!< in/out: waiting lock request */
{
ut_ad(mutex_own(&kernel_mutex));
lock_reset_lock_and_trx_wait(lock);
if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+ trx_t* trx = lock->trx;
+ dict_table_t* table = lock->un_member.tab_lock.table;
- if (lock->trx->auto_inc_lock != NULL) {
+ if (table->autoinc_trx == trx) {
fprintf(stderr,
"InnoDB: Error: trx already had"
" an AUTO-INC lock!\n");
- }
-
- /* Store pointer to lock to trx so that we know to
- release it at the end of the SQL statement */
+ } else {
+ table->autoinc_trx = trx;
- lock->trx->auto_inc_lock = lock;
+ ib_vector_push(trx->autoinc_locks, lock);
+ }
}
#ifdef UNIV_DEBUG
@@ -2190,7 +2198,7 @@ lock_grant(
}
}
-/*****************************************************************
+/*************************************************************//**
Cancels a waiting record lock request and releases the waiting transaction
that requested it. NOTE: does NOT check if waiting lock requests behind this
one can now be granted! */
@@ -2198,10 +2206,10 @@ static
void
lock_rec_cancel(
/*============*/
- lock_t* lock) /* in: waiting record lock request */
+ lock_t* lock) /*!< in: waiting record lock request */
{
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
/* Reset the bit (there can be only one set bit) in the lock bitmap */
lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@@ -2215,7 +2223,7 @@ lock_rec_cancel(
trx_end_lock_wait(lock->trx);
}
-/*****************************************************************
+/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue and
grants locks to other transactions in the queue if they now are entitled
to a lock. NOTE: all record locks contained in in_lock are removed. */
@@ -2223,7 +2231,7 @@ static
void
lock_rec_dequeue_from_page(
/*=======================*/
- lock_t* in_lock)/* in: record lock object: all record locks which
+ lock_t* in_lock)/*!< in: record lock object: all record locks which
are contained in this lock object are removed;
transactions waiting behind will get their lock
requests granted, if they are now qualified to it */
@@ -2234,7 +2242,7 @@ lock_rec_dequeue_from_page(
trx_t* trx;
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(in_lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
trx = in_lock->trx;
@@ -2263,13 +2271,13 @@ lock_rec_dequeue_from_page(
}
}
-/*****************************************************************
+/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue. */
static
void
lock_rec_discard(
/*=============*/
- lock_t* in_lock)/* in: record lock object: all record locks which
+ lock_t* in_lock)/*!< in: record lock object: all record locks which
are contained in this lock object are removed */
{
ulint space;
@@ -2277,7 +2285,7 @@ lock_rec_discard(
trx_t* trx;
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(in_lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
trx = in_lock->trx;
@@ -2290,7 +2298,7 @@ lock_rec_discard(
UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
}
-/*****************************************************************
+/*************************************************************//**
Removes record lock objects set on an index page which is discarded. This
function does not move locks, or check for waiting locks, therefore the
lock bitmaps must already be reset when this function is called. */
@@ -2298,7 +2306,7 @@ static
void
lock_rec_free_all_from_discard_page(
/*================================*/
- page_t* page) /* in: page to be discarded */
+ const buf_block_t* block) /*!< in: page to be discarded */
{
ulint space;
ulint page_no;
@@ -2307,8 +2315,8 @@ lock_rec_free_all_from_discard_page(
ut_ad(mutex_own(&kernel_mutex));
- space = buf_frame_get_space_id(page);
- page_no = buf_frame_get_page_no(page);
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
lock = lock_rec_get_first_on_page_addr(space, page_no);
@@ -2326,23 +2334,22 @@ lock_rec_free_all_from_discard_page(
/*============= RECORD LOCK MOVING AND INHERITING ===================*/
-/*****************************************************************
+/*************************************************************//**
Resets the lock bits for a single record. Releases transactions waiting for
lock requests here. */
static
void
lock_rec_reset_and_release_wait(
/*============================*/
- rec_t* rec) /* in: record whose locks bits should be reset */
+ const buf_block_t* block, /*!< in: buffer block containing
+ the record */
+ ulint heap_no)/*!< in: heap number of record */
{
lock_t* lock;
- ulint heap_no;
ut_ad(mutex_own(&kernel_mutex));
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
while (lock != NULL) {
if (lock_get_wait(lock)) {
@@ -2351,28 +2358,35 @@ lock_rec_reset_and_release_wait(
lock_rec_reset_nth_bit(lock, heap_no);
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
}
-/*****************************************************************
+/*************************************************************//**
Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
of another record as gap type locks, but does not reset the lock bits of
the other record. Also waiting lock requests on rec are inherited as
GRANTED gap locks. */
-
+static
void
lock_rec_inherit_to_gap(
/*====================*/
- rec_t* heir, /* in: record which inherits */
- rec_t* rec) /* in: record from which inherited; does NOT reset
- the locks on this record */
+ const buf_block_t* heir_block, /*!< in: block containing the
+ record which inherits */
+ const buf_block_t* block, /*!< in: block containing the
+ record from which inherited;
+ does NOT reset the locks on
+ this record */
+ ulint heir_heap_no, /*!< in: heap_no of the
+ inheriting record */
+ ulint heap_no) /*!< in: heap_no of the
+ donating record */
{
lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
/* If srv_locks_unsafe_for_binlog is TRUE or session is using
READ COMMITTED isolation level, we do not want locks set
@@ -2387,16 +2401,17 @@ lock_rec_inherit_to_gap(
== TRX_ISO_READ_COMMITTED)
&& lock_get_mode(lock) == LOCK_X)) {
- lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
- | LOCK_GAP,
- heir, lock->index, lock->trx);
+ lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
+ | lock_get_mode(lock),
+ heir_block, heir_heap_no,
+ lock->index, lock->trx);
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
}
-/*****************************************************************
+/*************************************************************//**
Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
of another record as gap type locks, but does not reset the lock bits of the
other record. Also waiting lock requests are inherited as GRANTED gap locks. */
@@ -2404,99 +2419,103 @@ static
void
lock_rec_inherit_to_gap_if_gap_lock(
/*================================*/
- rec_t* heir, /* in: record which inherits */
- rec_t* rec) /* in: record from which inherited; does NOT reset
- the locks on this record */
+ const buf_block_t* block, /*!< in: buffer block */
+ ulint heir_heap_no, /*!< in: heap_no of
+ record which inherits */
+ ulint heap_no) /*!< in: heap_no of record
+ from which inherited;
+ does NOT reset the locks
+ on this record */
{
lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
while (lock != NULL) {
if (!lock_rec_get_insert_intention(lock)
- && (page_rec_is_supremum(rec)
+ && (heap_no == PAGE_HEAP_NO_SUPREMUM
|| !lock_rec_get_rec_not_gap(lock))) {
- lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
- | LOCK_GAP,
- heir, lock->index, lock->trx);
+ lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
+ | lock_get_mode(lock),
+ block, heir_heap_no,
+ lock->index, lock->trx);
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
}
-/*****************************************************************
+/*************************************************************//**
Moves the locks of a record to another record and resets the lock bits of
the donating record. */
static
void
lock_rec_move(
/*==========*/
- rec_t* receiver, /* in: record which gets locks; this record
- must have no lock requests on it! */
- rec_t* donator, /* in: record which gives locks */
- ulint comp) /* in: nonzero=compact page format */
+ const buf_block_t* receiver, /*!< in: buffer block containing
+ the receiving record */
+ const buf_block_t* donator, /*!< in: buffer block containing
+ the donating record */
+ ulint receiver_heap_no,/*!< in: heap_no of the record
+ which gets the locks; there
+ must be no lock requests
+ on it! */
+ ulint donator_heap_no)/*!< in: heap_no of the record
+ which gives the locks */
{
lock_t* lock;
- ulint heap_no;
- ulint type_mode;
ut_ad(mutex_own(&kernel_mutex));
- heap_no = rec_get_heap_no(donator, comp);
-
- lock = lock_rec_get_first(donator);
+ lock = lock_rec_get_first(donator, donator_heap_no);
- ut_ad(lock_rec_get_first(receiver) == NULL);
+ ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
while (lock != NULL) {
- type_mode = lock->type_mode;
+ const ulint type_mode = lock->type_mode;
- lock_rec_reset_nth_bit(lock, heap_no);
+ lock_rec_reset_nth_bit(lock, donator_heap_no);
- if (lock_get_wait(lock)) {
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
lock_reset_lock_and_trx_wait(lock);
}
/* Note that we FIRST reset the bit, and then set the lock:
the function works also if donator == receiver */
- lock_rec_add_to_queue(type_mode, receiver, lock->index,
- lock->trx);
- lock = lock_rec_get_next(donator, lock);
+ lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no,
+ lock->index, lock->trx);
+ lock = lock_rec_get_next(donator_heap_no, lock);
}
- ut_ad(lock_rec_get_first(donator) == NULL);
+ ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when we have reorganized a page. NOTE: we copy
also the locks set on the infimum of the page; the infimum may carry
locks if an update of a record is occurring on the page, and its locks
were temporarily stored on the infimum. */
-
+UNIV_INTERN
void
lock_move_reorganize_page(
/*======================*/
- page_t* page, /* in: old index page, now reorganized */
- page_t* old_page) /* in: copy of the old, not reorganized page */
+ const buf_block_t* block, /*!< in: old index page, now
+ reorganized */
+ const buf_block_t* oblock) /*!< in: copy of the old, not
+ reorganized page */
{
lock_t* lock;
- lock_t* old_lock;
- page_cur_t cur1;
- page_cur_t cur2;
- ulint old_heap_no;
UT_LIST_BASE_NODE_T(lock_t) old_locks;
mem_heap_t* heap = NULL;
- rec_t* sup;
ulint comp;
lock_mutex_enter_kernel();
- lock = lock_rec_get_first_on_page(page);
+ lock = lock_rec_get_first_on_page(block);
if (lock == NULL) {
lock_mutex_exit_kernel();
@@ -2512,10 +2531,9 @@ lock_move_reorganize_page(
UT_LIST_INIT(old_locks);
- while (lock != NULL) {
-
+ do {
/* Make a copy of the lock */
- old_lock = lock_rec_copy(lock, heap);
+ lock_t* old_lock = lock_rec_copy(lock, heap);
UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
@@ -2527,44 +2545,59 @@ lock_move_reorganize_page(
}
lock = lock_rec_get_next_on_page(lock);
- }
-
- sup = page_get_supremum_rec(page);
+ } while (lock != NULL);
- lock = UT_LIST_GET_FIRST(old_locks);
+ comp = page_is_comp(block->frame);
+ ut_ad(comp == page_is_comp(oblock->frame));
- comp = page_is_comp(page);
- ut_ad(comp == page_is_comp(old_page));
-
- while (lock) {
+ for (lock = UT_LIST_GET_FIRST(old_locks); lock;
+ lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
/* NOTE: we copy also the locks set on the infimum and
supremum of the page; the infimum may carry locks if an
update of a record is occurring on the page, and its locks
were temporarily stored on the infimum */
+ page_cur_t cur1;
+ page_cur_t cur2;
- page_cur_set_before_first(page, &cur1);
- page_cur_set_before_first(old_page, &cur2);
+ page_cur_set_before_first(block, &cur1);
+ page_cur_set_before_first(oblock, &cur2);
/* Set locks according to old locks */
for (;;) {
+ ulint old_heap_no;
+ ulint new_heap_no;
+
ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
page_cur_get_rec(&cur2),
rec_get_data_size_old(
page_cur_get_rec(
&cur2))));
- old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2),
- comp);
+ if (UNIV_LIKELY(comp)) {
+ old_heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur2));
+ new_heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur1));
+ } else {
+ old_heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur2));
+ new_heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur1));
+ }
if (lock_rec_get_nth_bit(lock, old_heap_no)) {
+ /* Clear the bit in old_lock. */
+ ut_d(lock_rec_reset_nth_bit(lock,
+ old_heap_no));
+
/* NOTE that the old lock bitmap could be too
small for the new heap number! */
- lock_rec_add_to_queue(lock->type_mode,
- page_cur_get_rec(&cur1),
+ lock_rec_add_to_queue(lock->type_mode, block,
+ new_heap_no,
lock->index, lock->trx);
- /* if ((page_cur_get_rec(&cur1) == sup)
+ /* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
&& lock_get_wait(lock)) {
fprintf(stderr,
"---\n--\n!!!Lock reorg: supr type %lu\n",
@@ -2572,8 +2605,10 @@ lock_move_reorganize_page(
} */
}
- if (page_cur_get_rec(&cur1) == sup) {
+ if (UNIV_UNLIKELY
+ (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+ ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
break;
}
@@ -2581,41 +2616,47 @@ lock_move_reorganize_page(
page_cur_move_to_next(&cur2);
}
- /* Remember that we chained old locks on the trx_locks field */
+#ifdef UNIV_DEBUG
+ {
+ ulint i = lock_rec_find_set_bit(lock);
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
+ /* Check that all locks were moved. */
+ if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
+ fprintf(stderr,
+ "lock_move_reorganize_page():"
+ " %lu not moved in %p\n",
+ (ulong) i, (void*) lock);
+ ut_error;
+ }
+ }
+#endif /* UNIV_DEBUG */
}
lock_mutex_exit_kernel();
mem_heap_free(heap);
-#if 0
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
- buf_frame_get_page_no(page)));
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+ ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+ buf_block_get_zip_size(block),
+ buf_block_get_page_no(block)));
#endif
}
-/*****************************************************************
+/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list end is moved to another page. */
-
+UNIV_INTERN
void
lock_move_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page to move to */
- page_t* page, /* in: index page */
- rec_t* rec) /* in: record on page: this is the
- first record moved */
+ const buf_block_t* new_block, /*!< in: index page to move to */
+ const buf_block_t* block, /*!< in: index page */
+ const rec_t* rec) /*!< in: record on page: this
+ is the first record moved */
{
lock_t* lock;
- page_cur_t cur1;
- page_cur_t cur2;
- ulint heap_no;
- rec_t* sup;
- ulint type_mode;
- ulint comp;
- ut_ad(page == buf_frame_align(rec));
+ const ulint comp = page_rec_is_comp(rec);
lock_mutex_enter_kernel();
@@ -2625,127 +2666,150 @@ lock_move_rec_list_end(
table to the end of the hash chain, and lock_rec_add_to_queue
does not reuse locks if there are waiters in the queue. */
- sup = page_get_supremum_rec(page);
+ for (lock = lock_rec_get_first_on_page(block); lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ page_cur_t cur1;
+ page_cur_t cur2;
+ const ulint type_mode = lock->type_mode;
- lock = lock_rec_get_first_on_page(page);
-
- comp = page_is_comp(page);
-
- while (lock != NULL) {
-
- page_cur_position(rec, &cur1);
+ page_cur_position(rec, block, &cur1);
if (page_cur_is_before_first(&cur1)) {
page_cur_move_to_next(&cur1);
}
- page_cur_set_before_first(new_page, &cur2);
+ page_cur_set_before_first(new_block, &cur2);
page_cur_move_to_next(&cur2);
/* Copy lock requests on user records to new page and
reset the lock bits on the old */
- while (page_cur_get_rec(&cur1) != sup) {
- ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
- comp);
+ while (!page_cur_is_after_last(&cur1)) {
+ ulint heap_no;
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- type_mode = lock->type_mode;
+ if (comp) {
+ heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur1));
+ } else {
+ heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur1));
+ ut_ad(!memcmp(page_cur_get_rec(&cur1),
+ page_cur_get_rec(&cur2),
+ rec_get_data_size_old(
+ page_cur_get_rec(&cur2))));
+ }
+ if (lock_rec_get_nth_bit(lock, heap_no)) {
lock_rec_reset_nth_bit(lock, heap_no);
- if (lock_get_wait(lock)) {
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
lock_reset_lock_and_trx_wait(lock);
}
+ if (comp) {
+ heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur2));
+ } else {
+ heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur2));
+ }
+
lock_rec_add_to_queue(type_mode,
- page_cur_get_rec(&cur2),
+ new_block, heap_no,
lock->index, lock->trx);
}
page_cur_move_to_next(&cur1);
page_cur_move_to_next(&cur2);
}
-
- lock = lock_rec_get_next_on_page(lock);
}
lock_mutex_exit_kernel();
-#if 0
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
- buf_frame_get_page_no(page)));
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
- buf_frame_get_page_no(new_page)));
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+ ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+ buf_block_get_zip_size(block),
+ buf_block_get_page_no(block)));
+ ut_ad(lock_rec_validate_page(buf_block_get_space(new_block),
+ buf_block_get_zip_size(block),
+ buf_block_get_page_no(new_block)));
#endif
}
-/*****************************************************************
+/*************************************************************//**
Moves the explicit locks on user records to another page if a record
list start is moved to another page. */
-
+UNIV_INTERN
void
lock_move_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page to move to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page: this is the
- first record NOT copied */
- rec_t* old_end) /* in: old previous-to-last record on
- new_page before the records were copied */
+ const buf_block_t* new_block, /*!< in: index page to move to */
+ const buf_block_t* block, /*!< in: index page */
+ const rec_t* rec, /*!< in: record on page:
+ this is the first
+ record NOT copied */
+ const rec_t* old_end) /*!< in: old
+ previous-to-last
+ record on new_page
+ before the records
+ were copied */
{
lock_t* lock;
- page_cur_t cur1;
- page_cur_t cur2;
- ulint heap_no;
- ulint type_mode;
- ulint comp;
+ const ulint comp = page_rec_is_comp(rec);
- ut_a(new_page);
+ ut_ad(block->frame == page_align(rec));
+ ut_ad(new_block->frame == page_align(old_end));
lock_mutex_enter_kernel();
- lock = lock_rec_get_first_on_page(page);
- comp = page_is_comp(page);
- ut_ad(comp == page_is_comp(new_page));
- ut_ad(page == buf_frame_align(rec));
-
- while (lock != NULL) {
+ for (lock = lock_rec_get_first_on_page(block); lock;
+ lock = lock_rec_get_next_on_page(lock)) {
+ page_cur_t cur1;
+ page_cur_t cur2;
+ const ulint type_mode = lock->type_mode;
- page_cur_set_before_first(page, &cur1);
+ page_cur_set_before_first(block, &cur1);
page_cur_move_to_next(&cur1);
- page_cur_position(old_end, &cur2);
+ page_cur_position(old_end, new_block, &cur2);
page_cur_move_to_next(&cur2);
/* Copy lock requests on user records to new page and
reset the lock bits on the old */
while (page_cur_get_rec(&cur1) != rec) {
- ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
+ ulint heap_no;
+
+ if (comp) {
+ heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur1));
+ } else {
+ heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur1));
+ ut_ad(!memcmp(page_cur_get_rec(&cur1),
page_cur_get_rec(&cur2),
rec_get_data_size_old(
page_cur_get_rec(
&cur2))));
- heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
- comp);
+ }
if (lock_rec_get_nth_bit(lock, heap_no)) {
- type_mode = lock->type_mode;
-
lock_rec_reset_nth_bit(lock, heap_no);
- if (lock_get_wait(lock)) {
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
lock_reset_lock_and_trx_wait(lock);
}
+ if (comp) {
+ heap_no = rec_get_heap_no_new(
+ page_cur_get_rec(&cur2));
+ } else {
+ heap_no = rec_get_heap_no_old(
+ page_cur_get_rec(&cur2));
+ }
+
lock_rec_add_to_queue(type_mode,
- page_cur_get_rec(&cur2),
+ new_block, heap_no,
lock->index, lock->trx);
}
@@ -2753,58 +2817,78 @@ lock_move_rec_list_start(
page_cur_move_to_next(&cur2);
}
- lock = lock_rec_get_next_on_page(lock);
+#ifdef UNIV_DEBUG
+ if (page_rec_is_supremum(rec)) {
+ ulint i;
+
+ for (i = PAGE_HEAP_NO_USER_LOW;
+ i < lock_rec_get_n_bits(lock); i++) {
+ if (UNIV_UNLIKELY
+ (lock_rec_get_nth_bit(lock, i))) {
+
+ fprintf(stderr,
+ "lock_move_rec_list_start():"
+ " %lu not moved in %p\n",
+ (ulong) i, (void*) lock);
+ ut_error;
+ }
+ }
+ }
+#endif /* UNIV_DEBUG */
}
lock_mutex_exit_kernel();
-#if 0
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
- buf_frame_get_page_no(page)));
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
- buf_frame_get_page_no(new_page)));
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+ ut_ad(lock_rec_validate_page(buf_block_get_space(block),
+ buf_block_get_zip_size(block),
+ buf_block_get_page_no(block)));
#endif
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when a page is split to the right. */
-
+UNIV_INTERN
void
lock_update_split_right(
/*====================*/
- page_t* right_page, /* in: right page */
- page_t* left_page) /* in: left page */
+ const buf_block_t* right_block, /*!< in: right page */
+ const buf_block_t* left_block) /*!< in: left page */
{
- ulint comp;
+ ulint heap_no = lock_get_min_heap_no(right_block);
+
lock_mutex_enter_kernel();
- comp = page_is_comp(left_page);
- ut_ad(comp == page_is_comp(right_page));
/* Move the locks on the supremum of the left page to the supremum
of the right page */
- lock_rec_move(page_get_supremum_rec(right_page),
- page_get_supremum_rec(left_page), comp);
+ lock_rec_move(right_block, left_block,
+ PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
/* Inherit the locks to the supremum of left page from the successor
of the infimum on right page */
- lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
- page_rec_get_next(
- page_get_infimum_rec(right_page)));
+ lock_rec_inherit_to_gap(left_block, right_block,
+ PAGE_HEAP_NO_SUPREMUM, heap_no);
lock_mutex_exit_kernel();
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when a page is merged to the right. */
-
+UNIV_INTERN
void
lock_update_merge_right(
/*====================*/
- rec_t* orig_succ, /* in: original successor of infimum
- on the right page before merge */
- page_t* left_page) /* in: merged index page which will be
- discarded */
+ const buf_block_t* right_block, /*!< in: right page to
+ which merged */
+ const rec_t* orig_succ, /*!< in: original
+ successor of infimum
+ on the right page
+ before merge */
+ const buf_block_t* left_block) /*!< in: merged index
+ page which will be
+ discarded */
{
lock_mutex_enter_kernel();
@@ -2812,170 +2896,187 @@ lock_update_merge_right(
original successor of infimum on the right page, to which the left
page was merged */
- lock_rec_inherit_to_gap(orig_succ, page_get_supremum_rec(left_page));
+ lock_rec_inherit_to_gap(right_block, left_block,
+ page_rec_get_heap_no(orig_succ),
+ PAGE_HEAP_NO_SUPREMUM);
/* Reset the locks on the supremum of the left page, releasing
waiting transactions */
- lock_rec_reset_and_release_wait(page_get_supremum_rec(left_page));
+ lock_rec_reset_and_release_wait(left_block,
+ PAGE_HEAP_NO_SUPREMUM);
- lock_rec_free_all_from_discard_page(left_page);
+ lock_rec_free_all_from_discard_page(left_block);
lock_mutex_exit_kernel();
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when the root page is copied to another in
btr_root_raise_and_insert. Note that we leave lock structs on the
root page, even though they do not make sense on other than leaf
pages: the reason is that in a pessimistic update the infimum record
of the root page will act as a dummy carrier of the locks of the record
to be updated. */
-
+UNIV_INTERN
void
lock_update_root_raise(
/*===================*/
- page_t* new_page, /* in: index page to which copied */
- page_t* root) /* in: root page */
+ const buf_block_t* block, /*!< in: index page to which copied */
+ const buf_block_t* root) /*!< in: root page */
{
- ulint comp;
lock_mutex_enter_kernel();
- comp = page_is_comp(root);
- ut_ad(comp == page_is_comp(new_page));
/* Move the locks on the supremum of the root to the supremum
- of new_page */
+ of block */
- lock_rec_move(page_get_supremum_rec(new_page),
- page_get_supremum_rec(root), comp);
+ lock_rec_move(block, root,
+ PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
lock_mutex_exit_kernel();
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when a page is copied to another and the original page
is removed from the chain of leaf pages, except if page is the root! */
-
+UNIV_INTERN
void
lock_update_copy_and_discard(
/*=========================*/
- page_t* new_page, /* in: index page to which copied */
- page_t* page) /* in: index page; NOT the root! */
+ const buf_block_t* new_block, /*!< in: index page to
+ which copied */
+ const buf_block_t* block) /*!< in: index page;
+ NOT the root! */
{
- ulint comp;
lock_mutex_enter_kernel();
- comp = page_is_comp(page);
- ut_ad(comp == page_is_comp(new_page));
/* Move the locks on the supremum of the old page to the supremum
of new_page */
- lock_rec_move(page_get_supremum_rec(new_page),
- page_get_supremum_rec(page), comp);
- lock_rec_free_all_from_discard_page(page);
+ lock_rec_move(new_block, block,
+ PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+ lock_rec_free_all_from_discard_page(block);
lock_mutex_exit_kernel();
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when a page is split to the left. */
-
+UNIV_INTERN
void
lock_update_split_left(
/*===================*/
- page_t* right_page, /* in: right page */
- page_t* left_page) /* in: left page */
+ const buf_block_t* right_block, /*!< in: right page */
+ const buf_block_t* left_block) /*!< in: left page */
{
+ ulint heap_no = lock_get_min_heap_no(right_block);
+
lock_mutex_enter_kernel();
/* Inherit the locks to the supremum of the left page from the
successor of the infimum on the right page */
- lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
- page_rec_get_next(
- page_get_infimum_rec(right_page)));
+ lock_rec_inherit_to_gap(left_block, right_block,
+ PAGE_HEAP_NO_SUPREMUM, heap_no);
lock_mutex_exit_kernel();
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when a page is merged to the left. */
-
+UNIV_INTERN
void
lock_update_merge_left(
/*===================*/
- page_t* left_page, /* in: left page to which merged */
- rec_t* orig_pred, /* in: original predecessor of supremum
- on the left page before merge */
- page_t* right_page) /* in: merged index page which will be
- discarded */
+ const buf_block_t* left_block, /*!< in: left page to
+ which merged */
+ const rec_t* orig_pred, /*!< in: original predecessor
+ of supremum on the left page
+ before merge */
+ const buf_block_t* right_block) /*!< in: merged index page
+ which will be discarded */
{
- rec_t* left_next_rec;
- rec_t* left_supremum;
- ulint comp;
+ const rec_t* left_next_rec;
+
+ ut_ad(left_block->frame == page_align(orig_pred));
+
lock_mutex_enter_kernel();
- comp = page_is_comp(left_page);
- ut_ad(comp == page_is_comp(right_page));
- ut_ad(left_page == buf_frame_align(orig_pred));
- left_next_rec = page_rec_get_next(orig_pred);
- left_supremum = page_get_supremum_rec(left_page);
+ left_next_rec = page_rec_get_next_const(orig_pred);
- if (UNIV_LIKELY(left_next_rec != left_supremum)) {
+ if (!page_rec_is_supremum(left_next_rec)) {
/* Inherit the locks on the supremum of the left page to the
first record which was moved from the right page */
- lock_rec_inherit_to_gap(left_next_rec, left_supremum);
+ lock_rec_inherit_to_gap(left_block, left_block,
+ page_rec_get_heap_no(left_next_rec),
+ PAGE_HEAP_NO_SUPREMUM);
/* Reset the locks on the supremum of the left page,
releasing waiting transactions */
- lock_rec_reset_and_release_wait(left_supremum);
+ lock_rec_reset_and_release_wait(left_block,
+ PAGE_HEAP_NO_SUPREMUM);
}
/* Move the locks from the supremum of right page to the supremum
of the left page */
- lock_rec_move(left_supremum, page_get_supremum_rec(right_page), comp);
+ lock_rec_move(left_block, right_block,
+ PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
- lock_rec_free_all_from_discard_page(right_page);
+ lock_rec_free_all_from_discard_page(right_block);
lock_mutex_exit_kernel();
}
-/*****************************************************************
+/*************************************************************//**
Resets the original locks on heir and replaces them with gap type locks
inherited from rec. */
-
+UNIV_INTERN
void
lock_rec_reset_and_inherit_gap_locks(
/*=================================*/
- rec_t* heir, /* in: heir record */
- rec_t* rec) /* in: record */
+ const buf_block_t* heir_block, /*!< in: block containing the
+ record which inherits */
+ const buf_block_t* block, /*!< in: block containing the
+ record from which inherited;
+ does NOT reset the locks on
+ this record */
+ ulint heir_heap_no, /*!< in: heap_no of the
+ inheriting record */
+ ulint heap_no) /*!< in: heap_no of the
+ donating record */
{
mutex_enter(&kernel_mutex);
- lock_rec_reset_and_release_wait(heir);
+ lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
- lock_rec_inherit_to_gap(heir, rec);
+ lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
mutex_exit(&kernel_mutex);
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when a page is discarded. */
-
+UNIV_INTERN
void
lock_update_discard(
/*================*/
- rec_t* heir, /* in: record which will inherit the locks */
- page_t* page) /* in: index page which will be discarded */
+ const buf_block_t* heir_block, /*!< in: index page
+ which will inherit the locks */
+ ulint heir_heap_no, /*!< in: heap_no of the record
+ which will inherit the locks */
+ const buf_block_t* block) /*!< in: index page
+ which will be discarded */
{
- rec_t* rec;
+ const page_t* page = block->frame;
+ const rec_t* rec;
+ ulint heap_no;
lock_mutex_enter_kernel();
- if (NULL == lock_rec_get_first_on_page(page)) {
+ if (!lock_rec_get_first_on_page(block)) {
/* No locks exist on page, nothing to do */
lock_mutex_exit_kernel();
@@ -2986,128 +3087,179 @@ lock_update_discard(
/* Inherit all the locks on the page to the record and reset all
the locks on the page */
- rec = page_get_infimum_rec(page);
+ if (page_is_comp(page)) {
+ rec = page + PAGE_NEW_INFIMUM;
- for (;;) {
- lock_rec_inherit_to_gap(heir, rec);
+ do {
+ heap_no = rec_get_heap_no_new(rec);
- /* Reset the locks on rec, releasing waiting transactions */
+ lock_rec_inherit_to_gap(heir_block, block,
+ heir_heap_no, heap_no);
- lock_rec_reset_and_release_wait(rec);
+ lock_rec_reset_and_release_wait(block, heap_no);
- if (page_rec_is_supremum(rec)) {
+ rec = page + rec_get_next_offs(rec, TRUE);
+ } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+ } else {
+ rec = page + PAGE_OLD_INFIMUM;
- break;
- }
+ do {
+ heap_no = rec_get_heap_no_old(rec);
+
+ lock_rec_inherit_to_gap(heir_block, block,
+ heir_heap_no, heap_no);
- rec = page_rec_get_next(rec);
+ lock_rec_reset_and_release_wait(block, heap_no);
+
+ rec = page + rec_get_next_offs(rec, FALSE);
+ } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
}
- lock_rec_free_all_from_discard_page(page);
+ lock_rec_free_all_from_discard_page(block);
lock_mutex_exit_kernel();
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when a new user record is inserted. */
-
+UNIV_INTERN
void
lock_update_insert(
/*===============*/
- rec_t* rec) /* in: the inserted record */
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec) /*!< in: the inserted record */
{
- lock_mutex_enter_kernel();
+ ulint receiver_heap_no;
+ ulint donator_heap_no;
+
+ ut_ad(block->frame == page_align(rec));
/* Inherit the gap-locking locks for rec, in gap mode, from the next
record */
- lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec));
+ if (page_rec_is_comp(rec)) {
+ receiver_heap_no = rec_get_heap_no_new(rec);
+ donator_heap_no = rec_get_heap_no_new(
+ page_rec_get_next_low(rec, TRUE));
+ } else {
+ receiver_heap_no = rec_get_heap_no_old(rec);
+ donator_heap_no = rec_get_heap_no_old(
+ page_rec_get_next_low(rec, FALSE));
+ }
+ lock_mutex_enter_kernel();
+ lock_rec_inherit_to_gap_if_gap_lock(block,
+ receiver_heap_no, donator_heap_no);
lock_mutex_exit_kernel();
}
-/*****************************************************************
+/*************************************************************//**
Updates the lock table when a record is removed. */
-
+UNIV_INTERN
void
lock_update_delete(
/*===============*/
- rec_t* rec) /* in: the record to be removed */
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec) /*!< in: the record to be removed */
{
+ const page_t* page = block->frame;
+ ulint heap_no;
+ ulint next_heap_no;
+
+ ut_ad(page == page_align(rec));
+
+ if (page_is_comp(page)) {
+ heap_no = rec_get_heap_no_new(rec);
+ next_heap_no = rec_get_heap_no_new(page
+ + rec_get_next_offs(rec,
+ TRUE));
+ } else {
+ heap_no = rec_get_heap_no_old(rec);
+ next_heap_no = rec_get_heap_no_old(page
+ + rec_get_next_offs(rec,
+ FALSE));
+ }
+
lock_mutex_enter_kernel();
/* Let the next record inherit the locks from rec, in gap mode */
- lock_rec_inherit_to_gap(page_rec_get_next(rec), rec);
+ lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
/* Reset the lock bits on rec and release waiting transactions */
- lock_rec_reset_and_release_wait(rec);
+ lock_rec_reset_and_release_wait(block, heap_no);
lock_mutex_exit_kernel();
}
-/*************************************************************************
+/*********************************************************************//**
Stores on the page infimum record the explicit locks of another record.
This function is used to store the lock state of a record when it is
updated and the size of the record changes in the update. The record
is moved in such an update, perhaps to another page. The infimum record
acts as a dummy carrier record, taking care of lock releases while the
actual record is being moved. */
-
+UNIV_INTERN
void
lock_rec_store_on_page_infimum(
/*===========================*/
- page_t* page, /* in: page containing the record */
- rec_t* rec) /* in: record whose lock state is stored
- on the infimum record of the same page; lock
- bits are reset on the record */
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec) /*!< in: record whose lock state
+ is stored on the infimum
+ record of the same page; lock
+ bits are reset on the
+ record */
{
- ut_ad(page == buf_frame_align(rec));
+ ulint heap_no = page_rec_get_heap_no(rec);
+
+ ut_ad(block->frame == page_align(rec));
lock_mutex_enter_kernel();
- lock_rec_move(page_get_infimum_rec(page), rec, page_is_comp(page));
+ lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
lock_mutex_exit_kernel();
}
-/*************************************************************************
+/*********************************************************************//**
Restores the state of explicit lock requests on a single record, where the
state was stored on the infimum of the page. */
-
+UNIV_INTERN
void
lock_rec_restore_from_page_infimum(
/*===============================*/
- rec_t* rec, /* in: record whose lock state is restored */
- page_t* page) /* in: page (rec is not necessarily on this page)
- whose infimum stored the lock state; lock bits are
- reset on the infimum */
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec, /*!< in: record whose lock state
+ is restored */
+ const buf_block_t* donator)/*!< in: page (rec is not
+ necessarily on this page)
+ whose infimum stored the lock
+ state; lock bits are reset on
+ the infimum */
{
- ulint comp;
+ ulint heap_no = page_rec_get_heap_no(rec);
+
lock_mutex_enter_kernel();
- comp = page_is_comp(page);
- ut_ad(!comp == !page_rec_is_comp(rec));
- lock_rec_move(rec, page_get_infimum_rec(page), comp);
+ lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
lock_mutex_exit_kernel();
}
/*=========== DEADLOCK CHECKING ======================================*/
-/************************************************************************
-Checks if a lock request results in a deadlock. */
+/********************************************************************//**
+Checks if a lock request results in a deadlock.
+@return TRUE if a deadlock was detected and we chose trx as a victim;
+FALSE if no deadlock, or there was a deadlock, but we chose other
+transaction(s) as victim(s) */
static
ibool
lock_deadlock_occurs(
/*=================*/
- /* out: TRUE if a deadlock was detected and we
- chose trx as a victim; FALSE if no deadlock, or
- there was a deadlock, but we chose other
- transaction(s) as victim(s) */
- lock_t* lock, /* in: lock the transaction is requesting */
- trx_t* trx) /* in: transaction */
+ lock_t* lock, /*!< in: lock the transaction is requesting */
+ trx_t* trx) /*!< in: transaction */
{
dict_table_t* table;
dict_index_t* index;
@@ -3139,8 +3291,8 @@ retry:
goto retry;
}
- if (ret == LOCK_VICTIM_IS_START) {
- if (lock_get_type(lock) & LOCK_TABLE) {
+ if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
+ if (lock_get_type_low(lock) & LOCK_TABLE) {
table = lock->un_member.tab_lock.table;
index = NULL;
} else {
@@ -3159,27 +3311,24 @@ retry:
return(FALSE);
}
-/************************************************************************
-Looks recursively for a deadlock. */
+/********************************************************************//**
+Looks recursively for a deadlock.
+@return 0 if no deadlock found, LOCK_VICTIM_IS_START if there was a
+deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
+deadlock was found and we chose some other trx as a victim: we must do
+the search again in this last case because there may be another
+deadlock! */
static
ulint
lock_deadlock_recursive(
/*====================*/
- /* out: 0 if no deadlock found,
- LOCK_VICTIM_IS_START if there was a deadlock
- and we chose 'start' as the victim,
- LOCK_VICTIM_IS_OTHER if a deadlock
- was found and we chose some other trx as a
- victim: we must do the search again in this
- last case because there may be another
- deadlock! */
- trx_t* start, /* in: recursion starting point */
- trx_t* trx, /* in: a transaction waiting for a lock */
- lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
- ulint* cost, /* in/out: number of calculation steps thus
+ trx_t* start, /*!< in: recursion starting point */
+ trx_t* trx, /*!< in: a transaction waiting for a lock */
+ lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
+ ulint* cost, /*!< in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_...
we return LOCK_VICTIM_IS_START */
- ulint depth) /* in: recursion depth: if this exceeds
+ ulint depth) /*!< in: recursion depth: if this exceeds
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
return LOCK_VICTIM_IS_START */
{
@@ -3204,7 +3353,7 @@ lock_deadlock_recursive(
lock = wait_lock;
- if (lock_get_type(wait_lock) == LOCK_REC) {
+ if (lock_get_type_low(wait_lock) == LOCK_REC) {
bit_no = lock_rec_find_set_bit(wait_lock);
@@ -3214,15 +3363,15 @@ lock_deadlock_recursive(
/* Look at the locks ahead of wait_lock in the lock queue */
for (;;) {
- if (lock_get_type(lock) & LOCK_TABLE) {
+ if (lock_get_type_low(lock) & LOCK_TABLE) {
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
lock);
} else {
- ut_ad(lock_get_type(lock) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
ut_a(bit_no != ULINT_UNDEFINED);
- lock = lock_rec_get_prev(lock, bit_no);
+ lock = (lock_t*) lock_rec_get_prev(lock, bit_no);
}
if (lock == NULL) {
@@ -3258,7 +3407,7 @@ lock_deadlock_recursive(
fputs("*** (1) WAITING FOR THIS LOCK"
" TO BE GRANTED:\n", ef);
- if (lock_get_type(wait_lock) == LOCK_REC) {
+ if (lock_get_type_low(wait_lock) == LOCK_REC) {
lock_rec_print(ef, wait_lock);
} else {
lock_table_print(ef, wait_lock);
@@ -3270,7 +3419,7 @@ lock_deadlock_recursive(
fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
- if (lock_get_type(lock) == LOCK_REC) {
+ if (lock_get_type_low(lock) == LOCK_REC) {
lock_rec_print(ef, lock);
} else {
lock_table_print(ef, lock);
@@ -3279,7 +3428,7 @@ lock_deadlock_recursive(
fputs("*** (2) WAITING FOR THIS LOCK"
" TO BE GRANTED:\n", ef);
- if (lock_get_type(start->wait_lock)
+ if (lock_get_type_low(start->wait_lock)
== LOCK_REC) {
lock_rec_print(ef, start->wait_lock);
} else {
@@ -3355,18 +3504,18 @@ lock_deadlock_recursive(
/*========================= TABLE LOCKS ==============================*/
-/*************************************************************************
+/*********************************************************************//**
Creates a table lock object and adds it as the last in the lock queue
-of the table. Does NOT check for deadlocks or lock compatibility. */
+of the table. Does NOT check for deadlocks or lock compatibility.
+@return own: new lock object */
UNIV_INLINE
lock_t*
lock_table_create(
/*==============*/
- /* out, own: new lock object */
- dict_table_t* table, /* in: database table in dictionary cache */
- ulint type_mode,/* in: lock mode possibly ORed with
+ dict_table_t* table, /*!< in: database table in dictionary cache */
+ ulint type_mode,/*!< in: lock mode possibly ORed with
LOCK_WAIT */
- trx_t* trx) /* in: trx */
+ trx_t* trx) /*!< in: trx */
{
lock_t* lock;
@@ -3377,15 +3526,16 @@ lock_table_create(
++table->n_waiting_or_granted_auto_inc_locks;
}
+ /* For AUTOINC locking we reuse the lock instance only if
+ there is no wait involved else we allocate the waiting lock
+ from the transaction lock heap. */
if (type_mode == LOCK_AUTO_INC) {
- /* Only one trx can have the lock on the table
- at a time: we may use the memory preallocated
- to the table object */
- lock = table->auto_inc_lock;
+ lock = table->autoinc_lock;
- ut_a(trx->auto_inc_lock == NULL);
- trx->auto_inc_lock = lock;
+ table->autoinc_trx = trx;
+
+ ib_vector_push(trx->autoinc_locks, lock);
} else {
lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
}
@@ -3399,7 +3549,7 @@ lock_table_create(
UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
- if (type_mode & LOCK_WAIT) {
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
lock_set_lock_and_trx_wait(lock, trx);
}
@@ -3407,7 +3557,7 @@ lock_table_create(
return(lock);
}
-/*****************************************************************
+/*************************************************************//**
Removes a table lock request from the queue and the trx list of locks;
this is a low-level function which does NOT check if waiting requests
can now be granted. */
@@ -3415,18 +3565,42 @@ UNIV_INLINE
void
lock_table_remove_low(
/*==================*/
- lock_t* lock) /* in: table lock */
+ lock_t* lock) /*!< in: table lock */
{
- dict_table_t* table;
trx_t* trx;
+ dict_table_t* table;
ut_ad(mutex_own(&kernel_mutex));
- table = lock->un_member.tab_lock.table;
trx = lock->trx;
+ table = lock->un_member.tab_lock.table;
- if (lock == trx->auto_inc_lock) {
- trx->auto_inc_lock = NULL;
+ /* Remove the table from the transaction's AUTOINC vector, if
+ the lock that is being release is an AUTOINC lock. */
+ if (lock_get_mode(lock) == LOCK_AUTO_INC) {
+
+ /* The table's AUTOINC lock can get transferred to
+ another transaction before we get here. */
+ if (table->autoinc_trx == trx) {
+ table->autoinc_trx = NULL;
+ }
+
+ /* The locks must be freed in the reverse order from
+ the one in which they were acquired. This is to avoid
+ traversing the AUTOINC lock vector unnecessarily.
+
+ We only store locks that were granted in the
+ trx->autoinc_locks vector (see lock_table_create()
+ and lock_grant()). Therefore it can be empty and we
+ need to check for that. */
+
+ if (!lock_get_wait(lock)
+ && !ib_vector_is_empty(trx->autoinc_locks)) {
+ lock_t* autoinc_lock;
+
+ autoinc_lock = ib_vector_pop(trx->autoinc_locks);
+ ut_a(autoinc_lock == lock);
+ }
ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
--table->n_waiting_or_granted_auto_inc_locks;
@@ -3436,23 +3610,21 @@ lock_table_remove_low(
UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
}
-/*************************************************************************
+/*********************************************************************//**
Enqueues a waiting request for a table lock which cannot be granted
-immediately. Checks for deadlocks. */
+immediately. Checks for deadlocks.
+@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
+DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
+transaction was chosen as a victim, and we got the lock immediately:
+no need to wait then */
static
ulint
lock_table_enqueue_waiting(
/*=======================*/
- /* out: DB_LOCK_WAIT, DB_DEADLOCK, or
- DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
- DB_SUCCESS means that there was a deadlock,
- but another transaction was chosen as a
- victim, and we got the lock immediately:
- no need to wait then */
- ulint mode, /* in: lock mode this transaction is
+ ulint mode, /*!< in: lock mode this transaction is
requesting */
- dict_table_t* table, /* in: table */
- que_thr_t* thr) /* in: query thread */
+ dict_table_t* table, /*!< in: table */
+ que_thr_t* thr) /*!< in: query thread */
{
lock_t* lock;
trx_t* trx;
@@ -3471,7 +3643,11 @@ lock_table_enqueue_waiting(
trx = thr_get_trx(thr);
- if (trx->dict_operation) {
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ break;
+ case TRX_DICT_OP_TABLE:
+ case TRX_DICT_OP_INDEX:
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: a table lock wait happens"
" in a dictionary operation!\n"
@@ -3492,8 +3668,10 @@ lock_table_enqueue_waiting(
if (lock_deadlock_occurs(lock, trx)) {
- lock_reset_lock_and_trx_wait(lock);
+ /* The order here is important, we don't want to
+ lose the state of the lock before calling remove. */
lock_table_remove_low(lock);
+ lock_reset_lock_and_trx_wait(lock);
return(DB_DEADLOCK);
}
@@ -3514,19 +3692,19 @@ lock_table_enqueue_waiting(
return(DB_LOCK_WAIT);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if other transactions have an incompatible mode lock request in
the lock queue. */
UNIV_INLINE
ibool
lock_table_other_has_incompatible(
/*==============================*/
- trx_t* trx, /* in: transaction, or NULL if all
+ trx_t* trx, /*!< in: transaction, or NULL if all
transactions should be included */
- ulint wait, /* in: LOCK_WAIT if also waiting locks are
+ ulint wait, /*!< in: LOCK_WAIT if also waiting locks are
taken into account, or 0 if not */
- dict_table_t* table, /* in: table */
- ulint mode) /* in: lock mode */
+ dict_table_t* table, /*!< in: table */
+ enum lock_mode mode) /*!< in: lock mode */
{
lock_t* lock;
@@ -3549,20 +3727,19 @@ lock_table_other_has_incompatible(
return(FALSE);
}
-/*************************************************************************
+/*********************************************************************//**
Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait. */
-
+be granted immediately, the query thread is put to wait.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_table(
/*=======*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
- dict_table_t* table, /* in: database table in dictionary cache */
- ulint mode, /* in: lock mode */
- que_thr_t* thr) /* in: query thread */
+ dict_table_t* table, /*!< in: database table in dictionary cache */
+ enum lock_mode mode, /*!< in: lock mode */
+ que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
ulint err;
@@ -3613,40 +3790,14 @@ lock_table(
return(DB_SUCCESS);
}
-/*************************************************************************
-Checks if there are any locks set on the table. */
-
-ibool
-lock_is_on_table(
-/*=============*/
- /* out: TRUE if there are lock(s) */
- dict_table_t* table) /* in: database table in dictionary cache */
-{
- ibool ret;
-
- ut_ad(table);
-
- lock_mutex_enter_kernel();
-
- if (UT_LIST_GET_LAST(table->locks)) {
- ret = TRUE;
- } else {
- ret = FALSE;
- }
-
- lock_mutex_exit_kernel();
-
- return(ret);
-}
-
-/*************************************************************************
-Checks if a waiting table lock request still has to wait in a queue. */
+/*********************************************************************//**
+Checks if a waiting table lock request still has to wait in a queue.
+@return TRUE if still has to wait */
static
ibool
lock_table_has_to_wait_in_queue(
/*============================*/
- /* out: TRUE if still has to wait */
- lock_t* wait_lock) /* in: waiting table lock */
+ lock_t* wait_lock) /*!< in: waiting table lock */
{
dict_table_t* table;
lock_t* lock;
@@ -3671,7 +3822,7 @@ lock_table_has_to_wait_in_queue(
return(FALSE);
}
-/*****************************************************************
+/*************************************************************//**
Removes a table lock request, waiting or granted, from the queue and grants
locks to other transactions in the queue, if they now are entitled to a
lock. */
@@ -3679,14 +3830,14 @@ static
void
lock_table_dequeue(
/*===============*/
- lock_t* in_lock)/* in: table lock object; transactions waiting
+ lock_t* in_lock)/*!< in: table lock object; transactions waiting
behind will get their lock requests granted, if
they are now qualified to it */
{
lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type(in_lock) == LOCK_TABLE);
+ ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
@@ -3710,30 +3861,32 @@ lock_table_dequeue(
/*=========================== LOCK RELEASE ==============================*/
-/*****************************************************************
+/*************************************************************//**
Removes a granted record lock of a transaction from the queue and grants
locks to other transactions waiting in the queue if they now are entitled
to a lock. */
-
+UNIV_INTERN
void
lock_rec_unlock(
/*============*/
- trx_t* trx, /* in: transaction that has set a record
- lock */
- rec_t* rec, /* in: record */
- ulint lock_mode) /* in: LOCK_S or LOCK_X */
+ trx_t* trx, /*!< in: transaction that has
+ set a record lock */
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec, /*!< in: record */
+ enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */
{
lock_t* lock;
lock_t* release_lock = NULL;
ulint heap_no;
ut_ad(trx && rec);
+ ut_ad(block->frame == page_align(rec));
- mutex_enter(&kernel_mutex);
+ heap_no = page_rec_get_heap_no(rec);
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
+ mutex_enter(&kernel_mutex);
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
/* Find the last lock with the same lock_mode and transaction
from the record. */
@@ -3744,7 +3897,7 @@ lock_rec_unlock(
ut_a(!lock_get_wait(lock));
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
/* If a record lock is found, release the record lock */
@@ -3764,7 +3917,7 @@ lock_rec_unlock(
/* Check if we can now grant waiting lock requests */
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
while (lock != NULL) {
if (lock_get_wait(lock)
@@ -3774,54 +3927,20 @@ lock_rec_unlock(
lock_grant(lock);
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
mutex_exit(&kernel_mutex);
}
-/*************************************************************************
-Releases a table lock.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock(
-/*==============*/
- lock_t* lock) /* in: lock */
-{
- mutex_enter(&kernel_mutex);
-
- lock_table_dequeue(lock);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************************
-Releases an auto-inc lock a transaction possibly has on a table.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock_auto_inc(
-/*=======================*/
- trx_t* trx) /* in: transaction */
-{
- if (trx->auto_inc_lock) {
- mutex_enter(&kernel_mutex);
-
- lock_table_dequeue(trx->auto_inc_lock);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/*************************************************************************
+/*********************************************************************//**
Releases transaction locks, and releases possible other transactions waiting
because of these locks. */
-
+UNIV_INTERN
void
lock_release_off_kernel(
/*====================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
dict_table_t* table;
ulint count;
@@ -3837,15 +3956,14 @@ lock_release_off_kernel(
count++;
- if (lock_get_type(lock) == LOCK_REC) {
+ if (lock_get_type_low(lock) == LOCK_REC) {
lock_rec_dequeue_from_page(lock);
} else {
- ut_ad(lock_get_type(lock) & LOCK_TABLE);
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
if (lock_get_mode(lock) != LOCK_IS
- && 0 != ut_dulint_cmp(trx->undo_no,
- ut_dulint_zero)) {
+ && !ut_dulint_is_zero(trx->undo_no)) {
/* The trx may have modified the table. We
block the use of the MySQL query cache for
@@ -3874,27 +3992,32 @@ lock_release_off_kernel(
lock = UT_LIST_GET_LAST(trx->trx_locks);
}
- mem_heap_empty(trx->lock_heap);
+ ut_a(ib_vector_size(trx->autoinc_locks) == 0);
- ut_a(trx->auto_inc_lock == NULL);
+ mem_heap_empty(trx->lock_heap);
}
-/*************************************************************************
+/*********************************************************************//**
Cancels a waiting lock request and releases possible other transactions
waiting behind it. */
-
+UNIV_INTERN
void
lock_cancel_waiting_and_release(
/*============================*/
- lock_t* lock) /* in: waiting lock request */
+ lock_t* lock) /*!< in: waiting lock request */
{
ut_ad(mutex_own(&kernel_mutex));
- if (lock_get_type(lock) == LOCK_REC) {
+ if (lock_get_type_low(lock) == LOCK_REC) {
lock_rec_dequeue_from_page(lock);
} else {
- ut_ad(lock_get_type(lock) & LOCK_TABLE);
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+
+ if (lock->trx->autoinc_locks != NULL) {
+ /* Release the transaction's AUTOINC locks/ */
+ lock_release_autoinc_locks(lock->trx);
+ }
lock_table_dequeue(lock);
}
@@ -3914,7 +4037,7 @@ lock_cancel_waiting_and_release(
|| lock_get_mode(lock) == LOCK_X)
-/*************************************************************************
+/*********************************************************************//**
Removes locks of a transaction on a table to be dropped.
If remove_also_table_sx_locks is TRUE then table-level S and X locks are
also removed in addition to other table-level and record-level locks.
@@ -3923,9 +4046,9 @@ static
void
lock_remove_all_on_table_for_trx(
/*=============================*/
- dict_table_t* table, /* in: table to be dropped */
- trx_t* trx, /* in: a transaction */
- ibool remove_also_table_sx_locks)/* in: also removes
+ dict_table_t* table, /*!< in: table to be dropped */
+ trx_t* trx, /*!< in: a transaction */
+ ibool remove_also_table_sx_locks)/*!< in: also removes
table S and X locks */
{
lock_t* lock;
@@ -3938,12 +4061,12 @@ lock_remove_all_on_table_for_trx(
while (lock != NULL) {
prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
- if (lock_get_type(lock) == LOCK_REC
+ if (lock_get_type_low(lock) == LOCK_REC
&& lock->index->table == table) {
ut_a(!lock_get_wait(lock));
lock_rec_discard(lock);
- } else if (lock_get_type(lock) & LOCK_TABLE
+ } else if (lock_get_type_low(lock) & LOCK_TABLE
&& lock->un_member.tab_lock.table == table
&& (remove_also_table_sx_locks
|| !IS_LOCK_S_OR_X(lock))) {
@@ -3957,18 +4080,18 @@ lock_remove_all_on_table_for_trx(
}
}
-/*************************************************************************
+/*********************************************************************//**
Removes locks on a table to be dropped or truncated.
If remove_also_table_sx_locks is TRUE then table-level S and X locks are
also removed in addition to other table-level and record-level locks.
No lock, that is going to be removed, is allowed to be a wait lock. */
-
+UNIV_INTERN
void
lock_remove_all_on_table(
/*=====================*/
- dict_table_t* table, /* in: table to be dropped
+ dict_table_t* table, /*!< in: table to be dropped
or truncated */
- ibool remove_also_table_sx_locks)/* in: also removes
+ ibool remove_also_table_sx_locks)/*!< in: also removes
table S and X locks */
{
lock_t* lock;
@@ -4024,23 +4147,23 @@ lock_remove_all_on_table(
/*===================== VALIDATION AND DEBUGGING ====================*/
-/*************************************************************************
+/*********************************************************************//**
Prints info of a table lock. */
-
+UNIV_INTERN
void
lock_table_print(
/*=============*/
- FILE* file, /* in: file where to print */
- lock_t* lock) /* in: table type lock */
+ FILE* file, /*!< in: file where to print */
+ const lock_t* lock) /*!< in: table type lock */
{
ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type(lock) == LOCK_TABLE);
+ ut_a(lock_get_type_low(lock) == LOCK_TABLE);
fputs("TABLE LOCK table ", file);
ut_print_name(file, lock->trx, TRUE,
lock->un_member.tab_lock.table->name);
- fprintf(file, " trx id %lu %lu",
- (ulong) (lock->trx)->id.high, (ulong) (lock->trx)->id.low);
+ fprintf(file, " trx id " TRX_ID_FMT,
+ TRX_ID_PREP_PRINTF(lock->trx->id));
if (lock_get_mode(lock) == LOCK_S) {
fputs(" lock mode S", file);
@@ -4064,27 +4187,27 @@ lock_table_print(
putc('\n', file);
}
-/*************************************************************************
+/*********************************************************************//**
Prints info of a record lock. */
-
+UNIV_INTERN
void
lock_rec_print(
/*===========*/
- FILE* file, /* in: file where to print */
- lock_t* lock) /* in: record type lock */
+ FILE* file, /*!< in: file where to print */
+ const lock_t* lock) /*!< in: record type lock */
{
- page_t* page;
- ulint space;
- ulint page_no;
- ulint i;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ const buf_block_t* block;
+ ulint space;
+ ulint page_no;
+ ulint i;
+ mtr_t mtr;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type(lock) == LOCK_REC);
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
space = lock->un_member.rec_lock.space;
page_no = lock->un_member.rec_lock.page_no;
@@ -4093,9 +4216,8 @@ lock_rec_print(
(ulong) space, (ulong) page_no,
(ulong) lock_rec_get_n_bits(lock));
dict_index_name_print(file, lock->trx, lock->index);
- fprintf(file, " trx id %lu %lu",
- (ulong) (lock->trx)->id.high,
- (ulong) (lock->trx)->id.low);
+ fprintf(file, " trx id " TRX_ID_FMT,
+ TRX_ID_PREP_PRINTF(lock->trx->id));
if (lock_get_mode(lock) == LOCK_S) {
fputs(" lock mode S", file);
@@ -4125,48 +4247,29 @@ lock_rec_print(
putc('\n', file);
- /* If the page is not in the buffer pool, we cannot load it
- because we have the kernel mutex and ibuf operations would
- break the latching order */
-
- page = buf_page_get_gen(space, page_no, RW_NO_LATCH,
- NULL, BUF_GET_IF_IN_POOL,
- __FILE__, __LINE__, &mtr);
- if (page) {
- page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr);
-
- if (!page) {
- /* Let us try to get an X-latch. If the current thread
- is holding an X-latch on the page, we cannot get an
- S-latch. */
-
- page = buf_page_get_nowait(space, page_no, RW_X_LATCH,
- &mtr);
- }
- }
-
- if (page) {
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- }
+ block = buf_page_try_get(space, page_no, &mtr);
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+ if (block) {
+ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
- if (lock_rec_get_nth_bit(lock, i)) {
+ if (lock_rec_get_nth_bit(lock, i)) {
- fprintf(file, "Record lock, heap no %lu ", (ulong) i);
-
- if (page) {
- rec_t* rec
- = page_find_rec_with_heap_no(page, i);
+ const rec_t* rec
+ = page_find_rec_with_heap_no(
+ buf_block_get_frame(block), i);
offsets = rec_get_offsets(
rec, lock->index, offsets,
ULINT_UNDEFINED, &heap);
+
+ fprintf(file, "Record lock, heap no %lu ",
+ (ulong) i);
rec_print_new(file, rec, offsets);
+ putc('\n', file);
}
-
- putc('\n', file);
+ }
+ } else {
+ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+ fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
}
}
@@ -4176,8 +4279,6 @@ lock_rec_print(
}
}
-#ifndef UNIV_HOTBACKUP
-
#ifdef UNIV_DEBUG
/* Print the number of lock structs from lock_print_info_summary() only
in non-production builds for performance reasons, see
@@ -4186,8 +4287,9 @@ http://bugs.mysql.com/36942 */
#endif /* UNIV_DEBUG */
#ifdef PRINT_NUM_OF_LOCK_STRUCTS
-/*************************************************************************
-Calculates the number of record lock structs in the record lock hash table. */
+/*********************************************************************//**
+Calculates the number of record lock structs in the record lock hash table.
+@return number of record locks */
static
ulint
lock_get_n_rec_locks(void)
@@ -4214,13 +4316,13 @@ lock_get_n_rec_locks(void)
}
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
-/*************************************************************************
+/*********************************************************************//**
Prints info of locks for all transactions. */
-
+UNIV_INTERN
void
lock_print_info_summary(
/*====================*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
/* We must protect the MySQL thd->query field with a MySQL mutex, and
because the MySQL mutex must be reserved before the kernel_mutex of
@@ -4241,16 +4343,14 @@ lock_print_info_summary(
"TRANSACTIONS\n"
"------------\n", file);
- fprintf(file, "Trx id counter %lu %lu\n",
- (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
- (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
+ fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
fprintf(file,
- "Purge done for trx's n:o < %lu %lu undo n:o < %lu %lu\n",
- (ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
- (ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
- (ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
- (ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
+ "Purge done for trx's n:o < " TRX_ID_FMT
+ " undo n:o < " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(purge_sys->purge_trx_no),
+ TRX_ID_PREP_PRINTF(purge_sys->purge_undo_no));
fprintf(file,
"History list length %lu\n",
@@ -4263,18 +4363,15 @@ lock_print_info_summary(
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
}
-/*************************************************************************
+/*********************************************************************//**
Prints info of locks for each transaction. */
-
+UNIV_INTERN
void
lock_print_info_all_transactions(
/*=============================*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
lock_t* lock;
- ulint space;
- ulint page_no;
- page_t* page;
ibool load_page_first = TRUE;
ulint nth_trx = 0;
ulint nth_lock = 0;
@@ -4328,14 +4425,11 @@ loop:
if (trx->read_view) {
fprintf(file,
"Trx read view will not see trx with"
- " id >= %lu %lu, sees < %lu %lu\n",
- (ulong) ut_dulint_get_high(
+ " id >= " TRX_ID_FMT
+ ", sees < " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(
trx->read_view->low_limit_id),
- (ulong) ut_dulint_get_low(
- trx->read_view->low_limit_id),
- (ulong) ut_dulint_get_high(
- trx->read_view->up_limit_id),
- (ulong) ut_dulint_get_low(
+ TRX_ID_PREP_PRINTF(
trx->read_view->up_limit_id));
}
@@ -4346,7 +4440,7 @@ loop:
(ulong) difftime(time(NULL),
trx->wait_started));
- if (lock_get_type(trx->wait_lock) == LOCK_REC) {
+ if (lock_get_type_low(trx->wait_lock) == LOCK_REC) {
lock_rec_print(file, trx->wait_lock);
} else {
lock_table_print(file, trx->wait_lock);
@@ -4380,18 +4474,33 @@ loop:
goto loop;
}
- if (lock_get_type(lock) == LOCK_REC) {
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
+ if (lock_get_type_low(lock) == LOCK_REC) {
if (load_page_first) {
+ ulint space = lock->un_member.rec_lock.space;
+ ulint zip_size= fil_space_get_zip_size(space);
+ ulint page_no = lock->un_member.rec_lock.page_no;
+
+ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+
+ /* It is a single table tablespace and
+ the .ibd file is missing (TRUNCATE
+ TABLE probably stole the locks): just
+ print the lock without attempting to
+ load the page in the buffer pool. */
+
+ fprintf(file, "RECORD LOCKS on"
+ " non-existing space %lu\n",
+ (ulong) space);
+ goto print_rec;
+ }
+
lock_mutex_exit_kernel();
innobase_mysql_end_print_arbitrary_thd();
mtr_start(&mtr);
- page = buf_page_get_with_no_latch(
- space, page_no, &mtr);
+ buf_page_get_with_no_latch(space, zip_size,
+ page_no, &mtr);
mtr_commit(&mtr);
@@ -4403,9 +4512,10 @@ loop:
goto loop;
}
+print_rec:
lock_rec_print(file, lock);
} else {
- ut_ad(lock_get_type(lock) & LOCK_TABLE);
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
lock_table_print(file, lock);
}
@@ -4428,14 +4538,15 @@ loop:
goto loop;
}
-/*************************************************************************
-Validates the lock queue on a table. */
-
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Validates the lock queue on a table.
+@return TRUE if ok */
+static
ibool
lock_table_queue_validate(
/*======================*/
- /* out: TRUE if ok */
- dict_table_t* table) /* in: table */
+ dict_table_t* table) /*!< in: table */
{
lock_t* lock;
@@ -4464,29 +4575,34 @@ lock_table_queue_validate(
return(TRUE);
}
-/*************************************************************************
-Validates the lock queue on a single record. */
-
+/*********************************************************************//**
+Validates the lock queue on a single record.
+@return TRUE if ok */
+static
ibool
lock_rec_queue_validate(
/*====================*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: record to look at */
- dict_index_t* index, /* in: index, or NULL if not known */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
+ const buf_block_t* block, /*!< in: buffer block containing rec */
+ const rec_t* rec, /*!< in: record to look at */
+ dict_index_t* index, /*!< in: index, or NULL if not known */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
trx_t* impl_trx;
lock_t* lock;
+ ulint heap_no;
ut_a(rec);
+ ut_a(block->frame == page_align(rec));
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+ heap_no = page_rec_get_heap_no(rec);
+
lock_mutex_enter_kernel();
if (!page_rec_is_user_rec(rec)) {
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
while (lock) {
switch(lock->trx->conc_state) {
@@ -4508,7 +4624,7 @@ lock_rec_queue_validate(
ut_a(lock->index == index);
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
lock_mutex_exit_kernel();
@@ -4516,36 +4632,41 @@ lock_rec_queue_validate(
return(TRUE);
}
- if (index && (index->type & DICT_CLUSTERED)) {
+ if (!index);
+ else if (dict_index_is_clust(index)) {
impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
- if (impl_trx && lock_rec_other_has_expl_req(
- LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) {
+ if (impl_trx
+ && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
+ block, heap_no, impl_trx)) {
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- rec, impl_trx));
+ block, heap_no, impl_trx));
}
- }
-
- if (index && !(index->type & DICT_CLUSTERED)) {
+ } else {
/* The kernel mutex may get released temporarily in the
next function call: we have to release lock table mutex
to obey the latching order */
+ /* If this thread is holding the file space latch
+ (fil_space_t::latch), the following check WILL break
+ latching order and may cause a deadlock of threads. */
+
impl_trx = lock_sec_rec_some_has_impl_off_kernel(
rec, index, offsets);
- if (impl_trx && lock_rec_other_has_expl_req(
- LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) {
+ if (impl_trx
+ && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
+ block, heap_no, impl_trx)) {
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- rec, impl_trx));
+ block, heap_no, impl_trx));
}
}
- lock = lock_rec_get_first(rec);
+ lock = lock_rec_get_first(block, heap_no);
while (lock) {
ut_a(lock->trx->conc_state == TRX_ACTIVE
@@ -4559,7 +4680,7 @@ lock_rec_queue_validate(
if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
- ulint mode;
+ enum lock_mode mode;
if (lock_get_mode(lock) == LOCK_S) {
mode = LOCK_X;
@@ -4567,14 +4688,14 @@ lock_rec_queue_validate(
mode = LOCK_S;
}
ut_a(!lock_rec_other_has_expl_req(
- mode, 0, 0, rec, lock->trx));
+ mode, 0, 0, block, heap_no, lock->trx));
} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
ut_a(lock_rec_has_to_wait_in_queue(lock));
}
- lock = lock_rec_get_next(rec, lock);
+ lock = lock_rec_get_next(heap_no, lock);
}
lock_mutex_exit_kernel();
@@ -4582,37 +4703,41 @@ lock_rec_queue_validate(
return(TRUE);
}
-/*************************************************************************
-Validates the record lock queues on a page. */
-
+/*********************************************************************//**
+Validates the record lock queues on a page.
+@return TRUE if ok */
+static
ibool
lock_rec_validate_page(
/*===================*/
- /* out: TRUE if ok */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no)/*!< in: page number */
{
dict_index_t* index;
- page_t* page;
- lock_t* lock;
- rec_t* rec;
- ulint nth_lock = 0;
- ulint nth_bit = 0;
- ulint i;
- mtr_t mtr;
+ buf_block_t* block;
+ const page_t* page;
+ lock_t* lock;
+ const rec_t* rec;
+ ulint nth_lock = 0;
+ ulint nth_bit = 0;
+ ulint i;
+ mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
ut_ad(!mutex_own(&kernel_mutex));
mtr_start(&mtr);
- page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(zip_size != ULINT_UNDEFINED);
+ block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+ page = block->frame;
lock_mutex_enter_kernel();
loop:
@@ -4642,6 +4767,7 @@ loop:
index = lock->index;
rec = page_find_rec_with_heap_no(page, i);
+ ut_a(rec);
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
@@ -4651,7 +4777,12 @@ loop:
lock_mutex_exit_kernel();
- lock_rec_queue_validate(rec, index, offsets);
+ /* If this thread is holding the file space
+ latch (fil_space_t::latch), the following
+ check WILL break the latching order and may
+ cause a deadlock of threads. */
+
+ lock_rec_queue_validate(block, rec, index, offsets);
lock_mutex_enter_kernel();
@@ -4677,13 +4808,13 @@ function_exit:
return(TRUE);
}
-/*************************************************************************
-Validates the lock system. */
-
+/*********************************************************************//**
+Validates the lock system.
+@return TRUE if ok */
+static
ibool
lock_validate(void)
/*===============*/
- /* out: TRUE if ok */
{
lock_t* lock;
trx_t* trx;
@@ -4700,7 +4831,7 @@ lock_validate(void)
lock = UT_LIST_GET_FIRST(trx->trx_locks);
while (lock) {
- if (lock_get_type(lock) & LOCK_TABLE) {
+ if (lock_get_type_low(lock) & LOCK_TABLE) {
lock_table_queue_validate(
lock->un_member.tab_lock.table);
@@ -4741,7 +4872,9 @@ lock_validate(void)
lock_mutex_exit_kernel();
- lock_rec_validate_page(space, page_no);
+ lock_rec_validate_page(space,
+ fil_space_get_zip_size(space),
+ page_no);
lock_mutex_enter_kernel();
@@ -4753,65 +4886,74 @@ lock_validate(void)
return(TRUE);
}
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
-/*************************************************************************
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate insert of
a record. If they do, first tests if the query thread should anyway
be suspended for some reason; if not, then puts the transaction and
the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue. */
-
+for a gap x-lock to the lock queue.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_rec_insert_check_and_lock(
/*===========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record after which to insert */
- dict_index_t* index, /* in: index */
- que_thr_t* thr, /* in: query thread */
- ibool* inherit)/* out: set to TRUE if the new inserted
- record maybe should inherit LOCK_GAP type
- locks from the successor record */
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
+ set, does nothing */
+ const rec_t* rec, /*!< in: record after which to insert */
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ dict_index_t* index, /*!< in: index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ ibool* inherit)/*!< out: set to TRUE if the new
+ inserted record maybe should inherit
+ LOCK_GAP type locks from the successor
+ record */
{
- rec_t* next_rec;
- trx_t* trx;
- lock_t* lock;
- ulint err;
+ const rec_t* next_rec;
+ trx_t* trx;
+ lock_t* lock;
+ ulint err;
+ ulint next_rec_heap_no;
+
+ ut_ad(block->frame == page_align(rec));
if (flags & BTR_NO_LOCKING_FLAG) {
return(DB_SUCCESS);
}
- ut_ad(rec);
-
trx = thr_get_trx(thr);
- next_rec = page_rec_get_next(rec);
-
- *inherit = FALSE;
+ next_rec = page_rec_get_next((rec_t*) rec);
+ next_rec_heap_no = page_rec_get_heap_no(next_rec);
lock_mutex_enter_kernel();
- ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
+ /* When inserting a record into an index, the table must be at
+ least IX-locked or we must be building an index, in which case
+ the table must be at least S-locked. */
+ ut_ad(lock_table_has(trx, index->table, LOCK_IX)
+ || (*index->name == TEMP_INDEX_PREFIX
+ && lock_table_has(trx, index->table, LOCK_S)));
- lock = lock_rec_get_first(next_rec);
+ lock = lock_rec_get_first(block, next_rec_heap_no);
- if (lock == NULL) {
+ if (UNIV_LIKELY(lock == NULL)) {
/* We optimize CPU time usage in the simplest case */
lock_mutex_exit_kernel();
- if (!(index->type & DICT_CLUSTERED)) {
-
+ if (!dict_index_is_clust(index)) {
/* Update the page max trx id field */
- page_update_max_trx_id(buf_frame_align(rec),
- thr_get_trx(thr)->id);
+ page_update_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ trx->id, mtr);
}
+ *inherit = FALSE;
+
return(DB_SUCCESS);
}
@@ -4828,24 +4970,25 @@ lock_rec_insert_check_and_lock(
on the successor, which produced an unnecessary deadlock. */
if (lock_rec_other_has_conflicting(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, next_rec,
- trx)) {
+ LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
+ block, next_rec_heap_no, trx)) {
/* Note that we may get DB_SUCCESS also here! */
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
| LOCK_INSERT_INTENTION,
- next_rec, index, thr);
+ block, next_rec_heap_no,
+ index, thr);
} else {
err = DB_SUCCESS;
}
lock_mutex_exit_kernel();
- if (!(index->type & DICT_CLUSTERED) && (err == DB_SUCCESS)) {
-
+ if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) {
/* Update the page max trx id field */
- page_update_max_trx_id(buf_frame_align(rec),
- thr_get_trx(thr)->id);
+ page_update_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ trx->id, mtr);
}
#ifdef UNIV_DEBUG
@@ -4853,11 +4996,12 @@ lock_rec_insert_check_and_lock(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
offsets = rec_get_offsets(next_rec, index, offsets_,
ULINT_UNDEFINED, &heap);
- ut_ad(lock_rec_queue_validate(next_rec, index, offsets));
+ ut_ad(lock_rec_queue_validate(block,
+ next_rec, index, offsets));
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@@ -4867,7 +5011,7 @@ lock_rec_insert_check_and_lock(
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
If a transaction has an implicit x-lock on a record, but no explicit x-lock
set on the record, sets one for it. NOTE that in the case of a secondary
index, the kernel mutex may get temporarily released. */
@@ -4875,9 +5019,10 @@ static
void
lock_rec_convert_impl_to_expl(
/*==========================*/
- rec_t* rec, /* in: user record on page */
- dict_index_t* index, /* in: index of record */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record on page */
+ dict_index_t* index, /*!< in: index of record */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
trx_t* impl_trx;
@@ -4886,7 +5031,7 @@ lock_rec_convert_impl_to_expl(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
} else {
impl_trx = lock_sec_rec_some_has_impl_off_kernel(
@@ -4894,49 +5039,58 @@ lock_rec_convert_impl_to_expl(
}
if (impl_trx) {
+ ulint heap_no = page_rec_get_heap_no(rec);
+
/* If the transaction has no explicit x-lock set on the
record, set one for it */
- if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
- impl_trx)) {
+ if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
+ heap_no, impl_trx)) {
lock_rec_add_to_queue(
LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
- rec, index, impl_trx);
+ block, heap_no, index, impl_trx);
}
}
}
-/*************************************************************************
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify (update,
delete mark, or delete unmark) of a clustered index record. If they do,
first tests if the query thread should anyway be suspended for some
reason; if not, then puts the transaction and the query thread to the
lock wait state and inserts a waiting request for a record x-lock to the
-lock queue. */
-
+lock queue.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_clust_rec_modify_check_and_lock(
/*=================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record which should be modified */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /* in: query thread */
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: record which should be
+ modified */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
+ ulint heap_no;
ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(block->frame == page_align(rec));
if (flags & BTR_NO_LOCKING_FLAG) {
return(DB_SUCCESS);
}
+ heap_no = rec_offs_comp(offsets)
+ ? rec_get_heap_no_new(rec)
+ : rec_get_heap_no_old(rec);
+
lock_mutex_enter_kernel();
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
@@ -4944,43 +5098,50 @@ lock_clust_rec_modify_check_and_lock(
/* If a transaction has no explicit x-lock set on the record, set one
for it */
- lock_rec_convert_impl_to_expl(rec, index, offsets);
+ lock_rec_convert_impl_to_expl(block, rec, index, offsets);
- err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
+ err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, index, thr);
lock_mutex_exit_kernel();
- ut_ad(lock_rec_queue_validate(rec, index, offsets));
+ ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify (delete
-mark or delete unmark) of a secondary index record. */
-
+mark or delete unmark) of a secondary index record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_sec_rec_modify_check_and_lock(
/*===============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record which should be modified;
- NOTE: as this is a secondary index, we
- always have to modify the clustered index
- record first: see the comment below */
- dict_index_t* index, /* in: secondary index */
- que_thr_t* thr) /* in: query thread */
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ buf_block_t* block, /*!< in/out: buffer block of rec */
+ const rec_t* rec, /*!< in: record which should be
+ modified; NOTE: as this is a secondary
+ index, we always have to modify the
+ clustered index record first: see the
+ comment below */
+ dict_index_t* index, /*!< in: secondary index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint err;
+ ulint heap_no;
+
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(block->frame == page_align(rec));
if (flags & BTR_NO_LOCKING_FLAG) {
return(DB_SUCCESS);
}
- ut_ad(!(index->type & DICT_CLUSTERED));
+ heap_no = page_rec_get_heap_no(rec);
/* Another transaction cannot have an implicit lock on the record,
because when we come here, we already have modified the clustered
@@ -4991,7 +5152,8 @@ lock_sec_rec_modify_check_and_lock(
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
+ err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
+ block, heap_no, index, thr);
lock_mutex_exit_kernel();
@@ -5000,11 +5162,11 @@ lock_sec_rec_modify_check_and_lock(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
offsets = rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap);
- ut_ad(lock_rec_queue_validate(rec, index, offsets));
+ ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@@ -5013,48 +5175,56 @@ lock_sec_rec_modify_check_and_lock(
if (err == DB_SUCCESS) {
/* Update the page max trx id field */
-
- page_update_max_trx_id(buf_frame_align(rec),
- thr_get_trx(thr)->id);
+ page_update_max_trx_id(block,
+ buf_block_get_page_zip(block),
+ thr_get_trx(thr)->id, mtr);
}
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Like the counterpart for a clustered index below, but now we read a
-secondary index record. */
-
+secondary index record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_sec_rec_read_check_and_lock(
/*=============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: secondary index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /* in: query thread */
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: secondary index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
+ ulint heap_no;
- ut_ad(!(index->type & DICT_CLUSTERED));
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(block->frame == page_align(rec));
ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(mode == LOCK_X || mode == LOCK_S);
if (flags & BTR_NO_LOCKING_FLAG) {
return(DB_SUCCESS);
}
+ heap_no = page_rec_get_heap_no(rec);
+
lock_mutex_enter_kernel();
ut_ad(mode != LOCK_X
@@ -5066,53 +5236,59 @@ lock_sec_rec_read_check_and_lock(
if the max trx id for the page >= min trx id for the trx list or a
database recovery is running. */
- if (((ut_dulint_cmp(page_get_max_trx_id(buf_frame_align(rec)),
+ if (((ut_dulint_cmp(page_get_max_trx_id(block->frame),
trx_list_get_min_trx_id()) >= 0)
|| recv_recovery_is_on())
&& !page_rec_is_supremum(rec)) {
- lock_rec_convert_impl_to_expl(rec, index, offsets);
+ lock_rec_convert_impl_to_expl(block, rec, index, offsets);
}
- err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
+ err = lock_rec_lock(FALSE, mode | gap_mode,
+ block, heap_no, index, thr);
lock_mutex_exit_kernel();
- ut_ad(lock_rec_queue_validate(rec, index, offsets));
+ ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then
puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. */
-
+lock on the record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_clust_rec_read_check_and_lock(
/*===============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /* in: query thread */
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
+ ulint heap_no;
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(block->frame == page_align(rec));
ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
|| gap_mode == LOCK_REC_NOT_GAP);
@@ -5123,6 +5299,8 @@ lock_clust_rec_read_check_and_lock(
return(DB_SUCCESS);
}
+ heap_no = page_rec_get_heap_no(rec);
+
lock_mutex_enter_kernel();
ut_ad(mode != LOCK_X
@@ -5130,20 +5308,21 @@ lock_clust_rec_read_check_and_lock(
ut_ad(mode != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- if (!page_rec_is_supremum(rec)) {
+ if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
- lock_rec_convert_impl_to_expl(rec, index, offsets);
+ lock_rec_convert_impl_to_expl(block, rec, index, offsets);
}
- err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
+ err = lock_rec_lock(FALSE, mode | gap_mode,
+ block, heap_no, index, thr);
lock_mutex_exit_kernel();
- ut_ad(lock_rec_queue_validate(rec, index, offsets));
+ ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then
@@ -5151,35 +5330,38 @@ puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record. This is an alternative version of
lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets". */
-
+"offsets".
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
ulint
lock_clust_rec_read_check_and_lock_alt(
/*===================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: clustered index */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /* in: query thread */
+ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
+ bit is set, does nothing */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record or page
+ supremum record which should
+ be read or passed over by a
+ read cursor */
+ dict_index_t* index, /*!< in: clustered index */
+ enum lock_mode mode, /*!< in: mode of the lock which
+ the read cursor should set on
+ records: LOCK_S or LOCK_X; the
+ latter is possible in
+ SELECT FOR UPDATE */
+ ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr) /*!< in: query thread */
{
mem_heap_t* tmp_heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
ulint ret;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &tmp_heap);
- ret = lock_clust_rec_read_check_and_lock(flags, rec, index,
+ ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
offsets, mode, gap_mode, thr);
if (tmp_heap) {
mem_heap_free(tmp_heap);
@@ -5187,3 +5369,272 @@ lock_clust_rec_read_check_and_lock_alt(
return(ret);
}
+/*******************************************************************//**
+Release the last lock from the transaction's autoinc locks. */
+UNIV_INLINE
+void
+lock_release_autoinc_last_lock(
+/*===========================*/
+ ib_vector_t* autoinc_locks) /*!< in/out: vector of AUTOINC locks */
+{
+ ulint last;
+ lock_t* lock;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(!ib_vector_is_empty(autoinc_locks));
+
+ /* The lock to be release must be the last lock acquired. */
+ last = ib_vector_size(autoinc_locks) - 1;
+ lock = ib_vector_get(autoinc_locks, last);
+
+ /* Should have only AUTOINC locks in the vector. */
+ ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
+ ut_a(lock_get_type(lock) == LOCK_TABLE);
+
+ ut_a(lock->un_member.tab_lock.table != NULL);
+
+ /* This will remove the lock from the trx autoinc_locks too. */
+ lock_table_dequeue(lock);
+}
+
+/*******************************************************************//**
+Check if a transaction holds any autoinc locks.
+@return TRUE if the transaction holds any AUTOINC locks. */
+UNIV_INTERN
+ibool
+lock_trx_holds_autoinc_locks(
+/*=========================*/
+ const trx_t* trx) /*!< in: transaction */
+{
+ ut_a(trx->autoinc_locks != NULL);
+
+ return(!ib_vector_is_empty(trx->autoinc_locks));
+}
+
+/*******************************************************************//**
+Release all the transaction's autoinc locks. */
+UNIV_INTERN
+void
+lock_release_autoinc_locks(
+/*=======================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+
+ ut_a(trx->autoinc_locks != NULL);
+
+ /* We release the locks in the reverse order. This is to
+ avoid searching the vector for the element to delete at
+ the lower level. See (lock_table_remove_low()) for details. */
+ while (!ib_vector_is_empty(trx->autoinc_locks)) {
+
+ /* lock_table_remove_low() will also remove the lock from
+ the transaction's autoinc_locks vector. */
+ lock_release_autoinc_last_lock(trx->autoinc_locks);
+ }
+
+ /* Should release all locks. */
+ ut_a(ib_vector_is_empty(trx->autoinc_locks));
+}
+
+/*******************************************************************//**
+Gets the type of a lock. Non-inline version for using outside of the
+lock module.
+@return LOCK_TABLE or LOCK_REC */
+UNIV_INTERN
+ulint
+lock_get_type(
+/*==========*/
+ const lock_t* lock) /*!< in: lock */
+{
+ return(lock_get_type_low(lock));
+}
+
+/*******************************************************************//**
+Gets the id of the transaction owning a lock.
+@return transaction id */
+UNIV_INTERN
+ullint
+lock_get_trx_id(
+/*============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ return(trx_get_id(lock->trx));
+}
+
+/*******************************************************************//**
+Gets the mode of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return lock mode */
+UNIV_INTERN
+const char*
+lock_get_mode_str(
+/*==============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ibool is_gap_lock;
+
+ is_gap_lock = lock_get_type_low(lock) == LOCK_REC
+ && lock_rec_get_gap(lock);
+
+ switch (lock_get_mode(lock)) {
+ case LOCK_S:
+ if (is_gap_lock) {
+ return("S,GAP");
+ } else {
+ return("S");
+ }
+ case LOCK_X:
+ if (is_gap_lock) {
+ return("X,GAP");
+ } else {
+ return("X");
+ }
+ case LOCK_IS:
+ if (is_gap_lock) {
+ return("IS,GAP");
+ } else {
+ return("IS");
+ }
+ case LOCK_IX:
+ if (is_gap_lock) {
+ return("IX,GAP");
+ } else {
+ return("IX");
+ }
+ case LOCK_AUTO_INC:
+ return("AUTO_INC");
+ default:
+ return("UNKNOWN");
+ }
+}
+
+/*******************************************************************//**
+Gets the type of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return lock type */
+UNIV_INTERN
+const char*
+lock_get_type_str(
+/*==============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ switch (lock_get_type_low(lock)) {
+ case LOCK_REC:
+ return("RECORD");
+ case LOCK_TABLE:
+ return("TABLE");
+ default:
+ return("UNKNOWN");
+ }
+}
+
+/*******************************************************************//**
+Gets the table on which the lock is.
+@return table */
+UNIV_INLINE
+dict_table_t*
+lock_get_table(
+/*===========*/
+ const lock_t* lock) /*!< in: lock */
+{
+ switch (lock_get_type_low(lock)) {
+ case LOCK_REC:
+ return(lock->index->table);
+ case LOCK_TABLE:
+ return(lock->un_member.tab_lock.table);
+ default:
+ ut_error;
+ return(NULL);
+ }
+}
+
+/*******************************************************************//**
+Gets the id of the table on which the lock is.
+@return id of the table */
+UNIV_INTERN
+ullint
+lock_get_table_id(
+/*==============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ dict_table_t* table;
+
+ table = lock_get_table(lock);
+
+ return((ullint)ut_conv_dulint_to_longlong(table->id));
+}
+
+/*******************************************************************//**
+Gets the name of the table on which the lock is.
+The string should not be free()'d or modified.
+@return name of the table */
+UNIV_INTERN
+const char*
+lock_get_table_name(
+/*================*/
+ const lock_t* lock) /*!< in: lock */
+{
+ dict_table_t* table;
+
+ table = lock_get_table(lock);
+
+ return(table->name);
+}
+
+/*******************************************************************//**
+For a record lock, gets the index on which the lock is.
+@return index */
+UNIV_INTERN
+const dict_index_t*
+lock_rec_get_index(
+/*===============*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ return(lock->index);
+}
+
+/*******************************************************************//**
+For a record lock, gets the name of the index on which the lock is.
+The string should not be free()'d or modified.
+@return name of the index */
+UNIV_INTERN
+const char*
+lock_rec_get_index_name(
+/*====================*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ return(lock->index->name);
+}
+
+/*******************************************************************//**
+For a record lock, gets the tablespace number on which the lock is.
+@return tablespace number */
+UNIV_INTERN
+ulint
+lock_rec_get_space_id(
+/*==================*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ return(lock->un_member.rec_lock.space);
+}
+
+/*******************************************************************//**
+For a record lock, gets the page number on which the lock is.
+@return page number */
+UNIV_INTERN
+ulint
+lock_rec_get_page_no(
+/*=================*/
+ const lock_t* lock) /*!< in: lock */
+{
+ ut_a(lock_get_type_low(lock) == LOCK_REC);
+
+ return(lock->un_member.rec_lock.page_no);
+}
diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
index b10c348b24d..d5b696074b3 100644
--- a/storage/innobase/log/log0log.c
+++ b/storage/innobase/log/log0log.c
@@ -1,7 +1,48 @@
-/******************************************************
-Database log
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Google Inc.
-(c) 1995-1997 Innobase Oy
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file log/log0log.c
+Database log
Created 12/9/1995 Heikki Tuuri
*******************************************************/
@@ -12,6 +53,7 @@ Created 12/9/1995 Heikki Tuuri
#include "log0log.ic"
#endif
+#ifndef UNIV_HOTBACKUP
#include "mem0mem.h"
#include "buf0buf.h"
#include "buf0flu.h"
@@ -52,26 +94,24 @@ reduce the size of the log.
/* Current free limit of space 0; protected by the log sys mutex; 0 means
uninitialized */
-ulint log_fsp_current_free_limit = 0;
+UNIV_INTERN ulint log_fsp_current_free_limit = 0;
/* Global log system variable */
-log_t* log_sys = NULL;
+UNIV_INTERN log_t* log_sys = NULL;
#ifdef UNIV_DEBUG
-ibool log_do_write = TRUE;
-
-ibool log_debug_writes = FALSE;
+UNIV_INTERN ibool log_do_write = TRUE;
#endif /* UNIV_DEBUG */
/* These control how often we print warnings if the last checkpoint is too
old */
-ibool log_has_printed_chkp_warning = FALSE;
-time_t log_last_warning_time;
+UNIV_INTERN ibool log_has_printed_chkp_warning = FALSE;
+UNIV_INTERN time_t log_last_warning_time;
#ifdef UNIV_LOG_ARCHIVE
/* Pointer to this variable is used as the i/o-message when we do i/o to an
archive */
-byte log_archive_io;
+UNIV_INTERN byte log_archive_io;
#endif /* UNIV_LOG_ARCHIVE */
/* A margin for free space in the log buffer before a log entry is catenated */
@@ -113,14 +153,14 @@ the previous */
#define LOG_ARCHIVE_READ 1
#define LOG_ARCHIVE_WRITE 2
-/**********************************************************
+/******************************************************//**
Completes a checkpoint write i/o to a log file. */
static
void
log_io_complete_checkpoint(void);
/*============================*/
#ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
+/******************************************************//**
Completes an archiving i/o. */
static
void
@@ -128,15 +168,15 @@ log_io_complete_archive(void);
/*=========================*/
#endif /* UNIV_LOG_ARCHIVE */
-/********************************************************************
+/****************************************************************//**
Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
so that we know that the limit has been written to a log checkpoint field
on disk. */
-
+UNIV_INTERN
void
log_fsp_current_free_limit_set_and_checkpoint(
/*==========================================*/
- ulint limit) /* in: limit to set */
+ ulint limit) /*!< in: limit to set */
{
ibool success;
@@ -155,21 +195,22 @@ log_fsp_current_free_limit_set_and_checkpoint(
}
}
-/********************************************************************
+/****************************************************************//**
Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
-exists. */
+exists.
+@return LSN of oldest modification */
static
-dulint
+ib_uint64_t
log_buf_pool_get_oldest_modification(void)
/*======================================*/
{
- dulint lsn;
+ ib_uint64_t lsn;
ut_ad(mutex_own(&(log_sys->mutex)));
lsn = buf_pool_get_oldest_modification();
- if (ut_dulint_is_zero(lsn)) {
+ if (!lsn) {
lsn = log_sys->lsn;
}
@@ -177,15 +218,15 @@ log_buf_pool_get_oldest_modification(void)
return(lsn);
}
-/****************************************************************
+/************************************************************//**
Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release. */
-
-dulint
+released with log_release.
+@return start lsn of the log record */
+UNIV_INTERN
+ib_uint64_t
log_reserve_and_open(
/*=================*/
- /* out: start lsn of the log record */
- ulint len) /* in: length of data to be catenated */
+ ulint len) /*!< in: length of data to be catenated */
{
log_t* log = log_sys;
ulint len_upper_limit;
@@ -200,6 +241,7 @@ log_reserve_and_open(
ut_a(len < log->buf_size / 2);
loop:
mutex_enter(&(log->mutex));
+ ut_ad(!recv_no_log_write);
/* Calculate an upper limit for the space the string may take in the
log buffer */
@@ -225,8 +267,7 @@ loop:
#ifdef UNIV_LOG_ARCHIVE
if (log->archiving_state != LOG_ARCH_OFF) {
- archived_lsn_age = ut_dulint_minus(log->lsn,
- log->archived_lsn);
+ archived_lsn_age = log->lsn - log->archived_lsn;
if (archived_lsn_age + len_upper_limit
> log->max_archived_lsn_age) {
/* Not enough free archived space in log groups: do a
@@ -252,15 +293,15 @@ loop:
return(log->lsn);
}
-/****************************************************************
+/************************************************************//**
Writes to the log the string given. It is assumed that the caller holds the
log mutex. */
-
+UNIV_INTERN
void
log_write_low(
/*==========*/
- byte* str, /* in: string */
- ulint str_len) /* in: string length */
+ byte* str, /*!< in: string */
+ ulint str_len) /*!< in: string length */
{
log_t* log = log_sys;
ulint len;
@@ -269,6 +310,7 @@ log_write_low(
ut_ad(mutex_own(&(log->mutex)));
part_loop:
+ ut_ad(!recv_no_log_write);
/* Calculate a part length */
data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
@@ -302,12 +344,12 @@ part_loop:
log_sys->next_checkpoint_no);
len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
- log->lsn = ut_dulint_add(log->lsn, len);
+ log->lsn += len;
/* Initialize the next block header */
log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
} else {
- log->lsn = ut_dulint_add(log->lsn, len);
+ log->lsn += len;
}
log->buf_free += len;
@@ -321,22 +363,23 @@ part_loop:
srv_log_write_requests++;
}
-/****************************************************************
-Closes the log. */
-
-dulint
+/************************************************************//**
+Closes the log.
+@return lsn */
+UNIV_INTERN
+ib_uint64_t
log_close(void)
/*===========*/
- /* out: lsn */
{
- byte* log_block;
- ulint first_rec_group;
- dulint oldest_lsn;
- dulint lsn;
- log_t* log = log_sys;
- ulint checkpoint_age;
+ byte* log_block;
+ ulint first_rec_group;
+ ib_uint64_t oldest_lsn;
+ ib_uint64_t lsn;
+ log_t* log = log_sys;
+ ib_uint64_t checkpoint_age;
ut_ad(mutex_own(&(log->mutex)));
+ ut_ad(!recv_no_log_write);
lsn = log->lsn;
@@ -358,7 +401,7 @@ log_close(void)
log->check_flush_or_checkpoint = TRUE;
}
- checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn);
+ checkpoint_age = lsn - log->last_checkpoint_lsn;
if (checkpoint_age >= log->log_group_capacity) {
/* TODO: split btr_store_big_rec_extern_fields() into small
@@ -396,9 +439,8 @@ log_close(void)
oldest_lsn = buf_pool_get_oldest_modification();
- if (ut_dulint_is_zero(oldest_lsn)
- || (ut_dulint_minus(lsn, oldest_lsn)
- > log->max_modified_age_async)
+ if (!oldest_lsn
+ || lsn - oldest_lsn > log->max_modified_age_async
|| checkpoint_age > log->max_checkpoint_age_async) {
log->check_flush_or_checkpoint = TRUE;
@@ -414,7 +456,7 @@ function_exit:
}
#ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
+/******************************************************//**
Pads the current log block full with dummy log records. Used in producing
consistent archived log files. */
static
@@ -422,10 +464,10 @@ void
log_pad_current_log_block(void)
/*===========================*/
{
- byte b = MLOG_DUMMY_RECORD;
- ulint pad_length;
- ulint i;
- dulint lsn;
+ byte b = MLOG_DUMMY_RECORD;
+ ulint pad_length;
+ ulint i;
+ ib_uint64_t lsn;
/* We retrieve lsn only because otherwise gcc crashed on HP-UX */
lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
@@ -443,52 +485,53 @@ log_pad_current_log_block(void)
log_close();
log_release();
- ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
- == LOG_BLOCK_HDR_SIZE);
+ ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
}
#endif /* UNIV_LOG_ARCHIVE */
-/**********************************************************
+/******************************************************//**
Calculates the data capacity of a log group, when the log file headers are not
-included. */
-
+included.
+@return capacity in bytes */
+UNIV_INTERN
ulint
log_group_get_capacity(
/*===================*/
- /* out: capacity in bytes */
- log_group_t* group) /* in: log group */
+ const log_group_t* group) /*!< in: log group */
{
ut_ad(mutex_own(&(log_sys->mutex)));
return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
}
-/**********************************************************
+/******************************************************//**
Calculates the offset within a log group, when the log file headers are not
-included. */
+included.
+@return size offset (<= offset) */
UNIV_INLINE
ulint
log_group_calc_size_offset(
/*=======================*/
- /* out: size offset (<= offset) */
- ulint offset, /* in: real offset within the log group */
- log_group_t* group) /* in: log group */
+ ulint offset, /*!< in: real offset within the
+ log group */
+ const log_group_t* group) /*!< in: log group */
{
ut_ad(mutex_own(&(log_sys->mutex)));
return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
}
-/**********************************************************
+/******************************************************//**
Calculates the offset within a log group, when the log file headers are
-included. */
+included.
+@return real offset (>= offset) */
UNIV_INLINE
ulint
log_group_calc_real_offset(
/*=======================*/
- /* out: real offset (>= offset) */
- ulint offset, /* in: size offset within the log group */
- log_group_t* group) /* in: log group */
+ ulint offset, /*!< in: size offset within the
+ log group */
+ const log_group_t* group) /*!< in: log group */
{
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -496,22 +539,22 @@ log_group_calc_real_offset(
* (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
}
-/**********************************************************
-Calculates the offset of an lsn within a log group. */
+/******************************************************//**
+Calculates the offset of an lsn within a log group.
+@return offset within the log group */
static
ulint
log_group_calc_lsn_offset(
/*======================*/
- /* out: offset within the log group */
- dulint lsn, /* in: lsn, must be within 4 GB of
- group->lsn */
- log_group_t* group) /* in: log group */
+ ib_uint64_t lsn, /*!< in: lsn, must be within 4 GB of
+ group->lsn */
+ const log_group_t* group) /*!< in: log group */
{
- dulint gr_lsn;
- ib_longlong gr_lsn_size_offset;
- ib_longlong difference;
- ib_longlong group_size;
- ib_longlong offset;
+ ib_uint64_t gr_lsn;
+ ib_int64_t gr_lsn_size_offset;
+ ib_int64_t difference;
+ ib_int64_t group_size;
+ ib_int64_t offset;
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -520,16 +563,16 @@ log_group_calc_lsn_offset(
gr_lsn = group->lsn;
- gr_lsn_size_offset = (ib_longlong)
+ gr_lsn_size_offset = (ib_int64_t)
log_group_calc_size_offset(group->lsn_offset, group);
- group_size = (ib_longlong) log_group_get_capacity(group);
+ group_size = (ib_int64_t) log_group_get_capacity(group);
- if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
+ if (lsn >= gr_lsn) {
- difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn);
+ difference = (ib_int64_t) (lsn - gr_lsn);
} else {
- difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn);
+ difference = (ib_int64_t) (gr_lsn - lsn);
difference = difference % group_size;
@@ -538,7 +581,7 @@ log_group_calc_lsn_offset(
offset = (gr_lsn_size_offset + difference) % group_size;
- ut_a(offset < (((ib_longlong) 1) << 32)); /* offset must be < 4 GB */
+ ut_a(offset < (((ib_int64_t) 1) << 32)); /* offset must be < 4 GB */
/* fprintf(stderr,
"Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
@@ -547,78 +590,78 @@ log_group_calc_lsn_offset(
return(log_group_calc_real_offset((ulint)offset, group));
}
+#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
-Calculates where in log files we find a specified lsn. */
+#ifdef UNIV_DEBUG
+UNIV_INTERN ibool log_debug_writes = FALSE;
+#endif /* UNIV_DEBUG */
+/*******************************************************************//**
+Calculates where in log files we find a specified lsn.
+@return log file number */
+UNIV_INTERN
ulint
log_calc_where_lsn_is(
/*==================*/
- /* out: log file number */
- ib_longlong* log_file_offset, /* out: offset in that file
+ ib_int64_t* log_file_offset, /*!< out: offset in that file
(including the header) */
- dulint first_header_lsn, /* in: first log file start
+ ib_uint64_t first_header_lsn, /*!< in: first log file start
lsn */
- dulint lsn, /* in: lsn whose position to
+ ib_uint64_t lsn, /*!< in: lsn whose position to
determine */
- ulint n_log_files, /* in: total number of log
+ ulint n_log_files, /*!< in: total number of log
files */
- ib_longlong log_file_size) /* in: log file size
+ ib_int64_t log_file_size) /*!< in: log file size
(including the header) */
{
- ib_longlong ib_lsn;
- ib_longlong ib_first_header_lsn;
- ib_longlong capacity = log_file_size - LOG_FILE_HDR_SIZE;
+ ib_int64_t capacity = log_file_size - LOG_FILE_HDR_SIZE;
ulint file_no;
- ib_longlong add_this_many;
-
- ib_lsn = ut_conv_dulint_to_longlong(lsn);
- ib_first_header_lsn = ut_conv_dulint_to_longlong(first_header_lsn);
+ ib_int64_t add_this_many;
- if (ib_lsn < ib_first_header_lsn) {
- add_this_many = 1 + (ib_first_header_lsn - ib_lsn)
- / (capacity * (ib_longlong)n_log_files);
- ib_lsn += add_this_many
- * capacity * (ib_longlong)n_log_files;
+ if (lsn < first_header_lsn) {
+ add_this_many = 1 + (first_header_lsn - lsn)
+ / (capacity * (ib_int64_t)n_log_files);
+ lsn += add_this_many
+ * capacity * (ib_int64_t)n_log_files;
}
- ut_a(ib_lsn >= ib_first_header_lsn);
+ ut_a(lsn >= first_header_lsn);
- file_no = ((ulint)((ib_lsn - ib_first_header_lsn) / capacity))
+ file_no = ((ulint)((lsn - first_header_lsn) / capacity))
% n_log_files;
- *log_file_offset = (ib_lsn - ib_first_header_lsn) % capacity;
+ *log_file_offset = (lsn - first_header_lsn) % capacity;
*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
return(file_no);
}
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
Sets the field values in group to correspond to a given lsn. For this function
to work, the values must already be correctly initialized to correspond to
some lsn, for instance, a checkpoint lsn. */
-
+UNIV_INTERN
void
log_group_set_fields(
/*=================*/
- log_group_t* group, /* in: group */
- dulint lsn) /* in: lsn for which the values should be
+ log_group_t* group, /*!< in/out: group */
+ ib_uint64_t lsn) /*!< in: lsn for which the values should be
set */
{
group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
group->lsn = lsn;
}
-/*********************************************************************
+/*****************************************************************//**
Calculates the recommended highest values for lsn - last_checkpoint_lsn,
-lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */
+lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age.
+@return error value FALSE if the smallest log group is too small to
+accommodate the number of OS threads in the database server */
static
ibool
log_calc_max_ages(void)
/*===================*/
- /* out: error value FALSE if the smallest log group is
- too small to accommodate the number of OS threads in
- the database server */
{
log_group_t* group;
ulint margin;
@@ -628,8 +671,6 @@ log_calc_max_ages(void)
ulint archive_margin;
ulint smallest_archive_margin;
- ut_ad(!mutex_own(&(log_sys->mutex)));
-
mutex_enter(&(log_sys->mutex));
group = UT_LIST_GET_FIRST(log_sys->log_groups);
@@ -712,8 +753,7 @@ failure:
" After an ERROR-FREE shutdown\n"
"InnoDB: of mysqld you can adjust the size of"
" ib_logfiles, as explained in\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "adding-and-removing.html\n"
+ "InnoDB: " REFMAN "adding-and-removing.html\n"
"InnoDB: Cannot continue operation."
" Calling exit(1).\n",
(ulong)srv_thread_concurrency);
@@ -724,15 +764,13 @@ failure:
return(success);
}
-/**********************************************************
+/******************************************************//**
Initializes the log. */
-
+UNIV_INTERN
void
log_init(void)
/*==========*/
{
- byte* buf;
-
log_sys = mem_alloc(sizeof(log_t));
mutex_create(&log_sys->mutex, SYNC_LOG);
@@ -747,8 +785,8 @@ log_init(void)
ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
- buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->buf_ptr = mem_alloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->buf = ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE);
log_sys->buf_size = LOG_BUFFER_SIZE;
@@ -767,9 +805,9 @@ log_init(void)
log_sys->buf_next_to_write = 0;
- log_sys->write_lsn = ut_dulint_zero;
- log_sys->current_flush_lsn = ut_dulint_zero;
- log_sys->flushed_to_disk_lsn = ut_dulint_zero;
+ log_sys->write_lsn = 0;
+ log_sys->current_flush_lsn = 0;
+ log_sys->flushed_to_disk_lsn = 0;
log_sys->written_to_some_lsn = log_sys->lsn;
log_sys->written_to_all_lsn = log_sys->lsn;
@@ -787,15 +825,15 @@ log_init(void)
/*----------------------------*/
log_sys->adm_checkpoint_interval = ULINT_MAX;
- log_sys->next_checkpoint_no = ut_dulint_zero;
+ log_sys->next_checkpoint_no = 0;
log_sys->last_checkpoint_lsn = log_sys->lsn;
log_sys->n_pending_checkpoint_writes = 0;
rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK);
- log_sys->checkpoint_buf
- = ut_align(mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
- OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->checkpoint_buf_ptr = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->checkpoint_buf = ut_align(log_sys->checkpoint_buf_ptr,
+ OS_FILE_LOG_BLOCK_SIZE);
memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
/*----------------------------*/
@@ -803,7 +841,7 @@ log_init(void)
/* Under MySQL, log archiving is always off */
log_sys->archiving_state = LOG_ARCH_OFF;
log_sys->archived_lsn = log_sys->lsn;
- log_sys->next_archived_lsn = ut_dulint_zero;
+ log_sys->next_archived_lsn = 0;
log_sys->n_pending_archive_ios = 0;
@@ -828,36 +866,36 @@ log_init(void)
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE);
+ log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
mutex_exit(&(log_sys->mutex));
#ifdef UNIV_LOG_DEBUG
recv_sys_create();
- recv_sys_init(FALSE, buf_pool_get_curr_size());
+ recv_sys_init(buf_pool_get_curr_size());
recv_sys->parse_start_lsn = log_sys->lsn;
recv_sys->scanned_lsn = log_sys->lsn;
recv_sys->scanned_checkpoint_no = 0;
recv_sys->recovered_lsn = log_sys->lsn;
- recv_sys->limit_lsn = ut_dulint_max;
+ recv_sys->limit_lsn = IB_ULONGLONG_MAX;
#endif
}
-/**********************************************************************
+/******************************************************************//**
Inits a log group to the log system. */
-
+UNIV_INTERN
void
log_group_init(
/*===========*/
- ulint id, /* in: group id */
- ulint n_files, /* in: number of log files */
- ulint file_size, /* in: log file size in bytes */
- ulint space_id, /* in: space id of the file space
+ ulint id, /*!< in: group id */
+ ulint n_files, /*!< in: number of log files */
+ ulint file_size, /*!< in: log file size in bytes */
+ ulint space_id, /*!< in: space id of the file space
which contains the log files of this
group */
ulint archive_space_id __attribute__((unused)))
- /* in: space id of the file space
+ /*!< in: space id of the file space
which contains some archived log
files for this group; currently, only
for the first log group this is
@@ -878,23 +916,33 @@ log_group_init(
group->lsn_offset = LOG_FILE_HDR_SIZE;
group->n_pending_writes = 0;
+ group->file_header_bufs_ptr = mem_alloc(sizeof(byte*) * n_files);
group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
#ifdef UNIV_LOG_ARCHIVE
+ group->archive_file_header_bufs_ptr = mem_alloc(
+ sizeof(byte*) * n_files);
group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
#endif /* UNIV_LOG_ARCHIVE */
for (i = 0; i < n_files; i++) {
- *(group->file_header_bufs + i) = ut_align(
- mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
+ group->file_header_bufs_ptr[i] = mem_alloc(
+ LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+
+ group->file_header_bufs[i] = ut_align(
+ group->file_header_bufs_ptr[i],
OS_FILE_LOG_BLOCK_SIZE);
memset(*(group->file_header_bufs + i), '\0',
LOG_FILE_HDR_SIZE);
#ifdef UNIV_LOG_ARCHIVE
- *(group->archive_file_header_bufs + i) = ut_align(
- mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
+ group->archive_file_header_bufs_ptr[i] = mem_alloc(
+ LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+
+ group->archive_file_header_bufs[i] = ut_align(
+ group->archive_file_header_bufs_ptr[i],
OS_FILE_LOG_BLOCK_SIZE);
+
memset(*(group->archive_file_header_bufs + i), '\0',
LOG_FILE_HDR_SIZE);
#endif /* UNIV_LOG_ARCHIVE */
@@ -907,8 +955,9 @@ log_group_init(
group->archived_offset = 0;
#endif /* UNIV_LOG_ARCHIVE */
- group->checkpoint_buf = ut_align(
- mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE);
+ group->checkpoint_buf_ptr = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
+ group->checkpoint_buf = ut_align(group->checkpoint_buf_ptr,
+ OS_FILE_LOG_BLOCK_SIZE);
memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
@@ -917,13 +966,13 @@ log_group_init(
ut_a(log_calc_max_ages());
}
-/**********************************************************************
+/******************************************************************//**
Does the unlockings needed in flush i/o completion. */
UNIV_INLINE
void
log_flush_do_unlocks(
/*=================*/
- ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
+ ulint code) /*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
and LOG_UNLOCK_NONE_FLUSHED_LOCK */
{
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -948,15 +997,15 @@ log_flush_do_unlocks(
}
}
-/**********************************************************************
+/******************************************************************//**
Checks if a flush is completed for a log group and does the completion
-routine if yes. */
+routine if yes.
+@return LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
UNIV_INLINE
ulint
log_group_check_flush_completion(
/*=============================*/
- /* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
- log_group_t* group) /* in: log group */
+ log_group_t* group) /*!< in: log group */
{
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -984,13 +1033,13 @@ log_group_check_flush_completion(
return(0);
}
-/**********************************************************
-Checks if a flush is completed and does the completion routine if yes. */
+/******************************************************//**
+Checks if a flush is completed and does the completion routine if yes.
+@return LOG_UNLOCK_FLUSH_LOCK or 0 */
static
ulint
log_sys_check_flush_completion(void)
/*================================*/
- /* out: LOG_UNLOCK_FLUSH_LOCK or 0 */
{
ulint move_start;
ulint move_end;
@@ -1025,13 +1074,13 @@ log_sys_check_flush_completion(void)
return(0);
}
-/**********************************************************
+/******************************************************//**
Completes an i/o to a log file. */
-
+UNIV_INTERN
void
log_io_complete(
/*============*/
- log_group_t* group) /* in: log group or a dummy pointer */
+ log_group_t* group) /*!< in: log group or a dummy pointer */
{
ulint unlock;
@@ -1067,7 +1116,7 @@ log_io_complete(
return;
}
- ut_error; /* We currently use synchronous writing of the
+ ut_error; /*!< We currently use synchronous writing of the
logs and cannot end up here! */
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
@@ -1078,6 +1127,7 @@ log_io_complete(
}
mutex_enter(&(log_sys->mutex));
+ ut_ad(!recv_no_log_write);
ut_a(group->n_pending_writes > 0);
ut_a(log_sys->n_pending_writes > 0);
@@ -1093,28 +1143,29 @@ log_io_complete(
mutex_exit(&(log_sys->mutex));
}
-/**********************************************************
+/******************************************************//**
Writes a log file header to a log file space. */
static
void
log_group_file_header_flush(
/*========================*/
- log_group_t* group, /* in: log group */
- ulint nth_file, /* in: header to the nth file in the
+ log_group_t* group, /*!< in: log group */
+ ulint nth_file, /*!< in: header to the nth file in the
log file space */
- dulint start_lsn) /* in: log file data starts at this
+ ib_uint64_t start_lsn) /*!< in: log file data starts at this
lsn */
{
byte* buf;
ulint dest_offset;
ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(!recv_no_log_write);
ut_a(nth_file < group->n_files);
buf = *(group->file_header_bufs + nth_file);
mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+ mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
/* Wipe over possible label of ibbackup --restore */
memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
@@ -1133,7 +1184,7 @@ log_group_file_header_flush(
srv_os_log_pending_writes++;
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
dest_offset / UNIV_PAGE_SIZE,
dest_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
@@ -1143,7 +1194,7 @@ log_group_file_header_flush(
}
}
-/**********************************************************
+/******************************************************//**
Stores a 4-byte checksum to the trailer checksum field of a log block
before writing it to a log file. This checksum is used in recovery to
check the consistency of a log block. */
@@ -1151,25 +1202,25 @@ static
void
log_block_store_checksum(
/*=====================*/
- byte* block) /* in/out: pointer to a log block */
+ byte* block) /*!< in/out: pointer to a log block */
{
log_block_set_checksum(block, log_block_calc_checksum(block));
}
-/**********************************************************
+/******************************************************//**
Writes a buffer to a log file group. */
-
+UNIV_INTERN
void
log_group_write_buf(
/*================*/
- log_group_t* group, /* in: log group */
- byte* buf, /* in: buffer */
- ulint len, /* in: buffer len; must be divisible
+ log_group_t* group, /*!< in: log group */
+ byte* buf, /*!< in: buffer */
+ ulint len, /*!< in: buffer len; must be divisible
by OS_FILE_LOG_BLOCK_SIZE */
- dulint start_lsn, /* in: start lsn of the buffer; must
+ ib_uint64_t start_lsn, /*!< in: start lsn of the buffer; must
be divisible by
OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset)/* in: start offset of new data in
+ ulint new_data_offset)/*!< in: start offset of new data in
buf: this parameter is used to decide
if we have to write a new log file
header */
@@ -1180,8 +1231,9 @@ log_group_write_buf(
ulint i;
ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(!recv_no_log_write);
ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_a(((ulint) start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
if (new_data_offset == 0) {
write_header = TRUE;
@@ -1221,12 +1273,11 @@ loop:
fprintf(stderr,
"Writing log file segment to group %lu"
" offset %lu len %lu\n"
- "start lsn %lu %lu\n"
+ "start lsn %llu\n"
"First block n:o %lu last block n:o %lu\n",
(ulong) group->id, (ulong) next_offset,
(ulong) write_len,
- (ulong) ut_dulint_get_high(start_lsn),
- (ulong) ut_dulint_get_low(start_lsn),
+ start_lsn,
(ulong) log_block_get_hdr_no(buf),
(ulong) log_block_get_hdr_no(
buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
@@ -1253,7 +1304,7 @@ loop:
srv_os_log_pending_writes++;
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
next_offset / UNIV_PAGE_SIZE,
next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
@@ -1264,7 +1315,7 @@ loop:
}
if (write_len < len) {
- start_lsn = ut_dulint_add(start_lsn, write_len);
+ start_lsn += write_len;
len -= write_len;
buf += write_len;
@@ -1274,22 +1325,23 @@ loop:
}
}
-/**********************************************************
+/******************************************************//**
This function is called, e.g., when a transaction wants to commit. It checks
that the log has been written to the log file up to the last log entry written
by the transaction. If there is a flush running, it waits and checks if the
flush flushed enough. If not, starts a new flush. */
-
+UNIV_INTERN
void
log_write_up_to(
/*============*/
- dulint lsn, /* in: log sequence number up to which the log should
- be written, ut_dulint_max if not specified */
- ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk)
- /* in: TRUE if we want the written log also to be
- flushed to disk */
+ ib_uint64_t lsn, /*!< in: log sequence number up to which
+ the log should be written,
+ IB_ULONGLONG_MAX if not specified */
+ ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ or LOG_WAIT_ALL_GROUPS */
+ ibool flush_to_disk)
+ /*!< in: TRUE if we want the written log
+ also to be flushed to disk */
{
log_group_t* group;
ulint start_offset;
@@ -1322,9 +1374,10 @@ loop:
#endif
mutex_enter(&(log_sys->mutex));
+ ut_ad(!recv_no_log_write);
if (flush_to_disk
- && ut_dulint_cmp(log_sys->flushed_to_disk_lsn, lsn) >= 0) {
+ && log_sys->flushed_to_disk_lsn >= lsn) {
mutex_exit(&(log_sys->mutex));
@@ -1332,9 +1385,8 @@ loop:
}
if (!flush_to_disk
- && (ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0
- || (ut_dulint_cmp(log_sys->written_to_some_lsn, lsn)
- >= 0
+ && (log_sys->written_to_all_lsn >= lsn
+ || (log_sys->written_to_some_lsn >= lsn
&& wait != LOG_WAIT_ALL_GROUPS))) {
mutex_exit(&(log_sys->mutex));
@@ -1346,8 +1398,7 @@ loop:
/* A write (+ possibly flush to disk) is running */
if (flush_to_disk
- && ut_dulint_cmp(log_sys->current_flush_lsn, lsn)
- >= 0) {
+ && log_sys->current_flush_lsn >= lsn) {
/* The write + flush will write enough: wait for it to
complete */
@@ -1355,7 +1406,7 @@ loop:
}
if (!flush_to_disk
- && ut_dulint_cmp(log_sys->write_lsn, lsn) >= 0) {
+ && log_sys->write_lsn >= lsn) {
/* The write will write enough: wait for it to
complete */
@@ -1384,19 +1435,15 @@ loop:
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
- "Writing log from %lu %lu up to lsn %lu %lu\n",
- (ulong) ut_dulint_get_high(
- log_sys->written_to_all_lsn),
- (ulong) ut_dulint_get_low(
- log_sys->written_to_all_lsn),
- (ulong) ut_dulint_get_high(log_sys->lsn),
- (ulong) ut_dulint_get_low(log_sys->lsn));
+ "Writing log from %llu up to lsn %llu\n",
+ log_sys->written_to_all_lsn,
+ log_sys->lsn);
}
#endif /* UNIV_DEBUG */
log_sys->n_pending_writes++;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group->n_pending_writes++; /* We assume here that we have only
+ group->n_pending_writes++; /*!< We assume here that we have only
one log group! */
os_event_reset(log_sys->no_flush_event);
@@ -1442,7 +1489,7 @@ loop:
log_group_write_buf(
group, log_sys->buf + area_start,
area_end - area_start,
- ut_dulint_align_down(log_sys->written_to_all_lsn,
+ ut_uint64_align_down(log_sys->written_to_all_lsn,
OS_FILE_LOG_BLOCK_SIZE),
start_offset - area_start);
@@ -1489,23 +1536,30 @@ loop:
do_waits:
mutex_exit(&(log_sys->mutex));
- if (wait == LOG_WAIT_ONE_GROUP) {
+ switch (wait) {
+ case LOG_WAIT_ONE_GROUP:
os_event_wait(log_sys->one_flushed_event);
- } else if (wait == LOG_WAIT_ALL_GROUPS) {
+ break;
+ case LOG_WAIT_ALL_GROUPS:
os_event_wait(log_sys->no_flush_event);
- } else {
- ut_ad(wait == LOG_NO_WAIT);
+ break;
+#ifdef UNIV_DEBUG
+ case LOG_NO_WAIT:
+ break;
+ default:
+ ut_error;
+#endif /* UNIV_DEBUG */
}
}
-/********************************************************************
+/****************************************************************//**
Does a syncronous flush of the log buffer to disk. */
-
+UNIV_INTERN
void
log_buffer_flush_to_disk(void)
/*==========================*/
{
- dulint lsn;
+ ib_uint64_t lsn;
mutex_enter(&(log_sys->mutex));
@@ -1516,7 +1570,30 @@ log_buffer_flush_to_disk(void)
log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
}
+/****************************************************************//**
+This functions writes the log buffer to the log file and if 'flush'
+is set it forces a flush of the log file as well. This is meant to be
+called from background master thread only as it does not wait for
+the write (+ possible flush) to finish. */
+UNIV_INTERN
+void
+log_buffer_sync_in_background(
+/*==========================*/
+ ibool flush) /*!< in: flush the logs to disk */
+{
+ ib_uint64_t lsn;
+
+ mutex_enter(&(log_sys->mutex));
+
+ lsn = log_sys->lsn;
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_write_up_to(lsn, LOG_NO_WAIT, flush);
+}
+
/********************************************************************
+
Tries to establish a big enough margin of free space in the log buffer, such
that a new log entry can be catenated without an immediate need for a flush. */
static
@@ -1524,9 +1601,8 @@ void
log_flush_margin(void)
/*==================*/
{
- ibool do_flush = FALSE;
- log_t* log = log_sys;
- dulint lsn;
+ log_t* log = log_sys;
+ ib_uint64_t lsn = 0;
mutex_enter(&(log->mutex));
@@ -1536,33 +1612,32 @@ log_flush_margin(void)
/* A flush is running: hope that it will provide enough
free space */
} else {
- do_flush = TRUE;
lsn = log->lsn;
}
}
mutex_exit(&(log->mutex));
- if (do_flush) {
+ if (lsn) {
log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
}
}
-/********************************************************************
+/****************************************************************//**
Advances the smallest lsn for which there are unflushed dirty blocks in the
buffer pool. NOTE: this function may only be called if the calling thread owns
-no synchronization objects! */
-
+no synchronization objects!
+@return FALSE if there was a flush batch of the same type running,
+which means that we could not start this flush batch */
+UNIV_INTERN
ibool
log_preflush_pool_modified_pages(
/*=============================*/
- /* out: FALSE if there was a flush batch of
- the same type running, which means that we
- could not start this flush batch */
- dulint new_oldest, /* in: try to advance oldest_modified_lsn
- at least to this lsn */
- ibool sync) /* in: TRUE if synchronous operation is
- desired */
+ ib_uint64_t new_oldest, /*!< in: try to advance
+ oldest_modified_lsn at least
+ to this lsn */
+ ibool sync) /*!< in: TRUE if synchronous
+ operation is desired */
{
ulint n_pages;
@@ -1593,7 +1668,7 @@ log_preflush_pool_modified_pages(
return(TRUE);
}
-/**********************************************************
+/******************************************************//**
Completes a checkpoint. */
static
void
@@ -1603,15 +1678,14 @@ log_complete_checkpoint(void)
ut_ad(mutex_own(&(log_sys->mutex)));
ut_ad(log_sys->n_pending_checkpoint_writes == 0);
- log_sys->next_checkpoint_no
- = ut_dulint_add(log_sys->next_checkpoint_no, 1);
+ log_sys->next_checkpoint_no++;
log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
}
-/**********************************************************
+/******************************************************//**
Completes an asynchronous checkpoint info write i/o to a log file. */
static
void
@@ -1631,16 +1705,16 @@ log_io_complete_checkpoint(void)
mutex_exit(&(log_sys->mutex));
}
-/***********************************************************************
+/*******************************************************************//**
Writes info to a checkpoint about a log group. */
static
void
log_checkpoint_set_nth_group_info(
/*==============================*/
- byte* buf, /* in: buffer for checkpoint info */
- ulint n, /* in: nth slot */
- ulint file_no,/* in: archived file number */
- ulint offset) /* in: archived file offset */
+ byte* buf, /*!< in: buffer for checkpoint info */
+ ulint n, /*!< in: nth slot */
+ ulint file_no,/*!< in: archived file number */
+ ulint offset) /*!< in: archived file offset */
{
ut_ad(n < LOG_MAX_N_GROUPS);
@@ -1650,16 +1724,16 @@ log_checkpoint_set_nth_group_info(
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
}
-/***********************************************************************
+/*******************************************************************//**
Gets info from a checkpoint about a log group. */
-
+UNIV_INTERN
void
log_checkpoint_get_nth_group_info(
/*==============================*/
- byte* buf, /* in: buffer containing checkpoint info */
- ulint n, /* in: nth slot */
- ulint* file_no,/* out: archived file number */
- ulint* offset) /* out: archived file offset */
+ const byte* buf, /*!< in: buffer containing checkpoint info */
+ ulint n, /*!< in: nth slot */
+ ulint* file_no,/*!< out: archived file number */
+ ulint* offset) /*!< out: archived file offset */
{
ut_ad(n < LOG_MAX_N_GROUPS);
@@ -1669,23 +1743,23 @@ log_checkpoint_get_nth_group_info(
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
}
-/**********************************************************
+/******************************************************//**
Writes the checkpoint info to a log group header. */
static
void
log_group_checkpoint(
/*=================*/
- log_group_t* group) /* in: log group */
+ log_group_t* group) /*!< in: log group */
{
log_group_t* group2;
#ifdef UNIV_LOG_ARCHIVE
- dulint archived_lsn;
- dulint next_archived_lsn;
+ ib_uint64_t archived_lsn;
+ ib_uint64_t next_archived_lsn;
#endif /* UNIV_LOG_ARCHIVE */
- ulint write_offset;
- ulint fold;
- byte* buf;
- ulint i;
+ ulint write_offset;
+ ulint fold;
+ byte* buf;
+ ulint i;
ut_ad(mutex_own(&(log_sys->mutex)));
#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
@@ -1694,9 +1768,8 @@ log_group_checkpoint(
buf = group->checkpoint_buf;
- mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN,
- log_sys->next_checkpoint_lsn);
+ mach_write_ull(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
+ mach_write_ull(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
log_group_calc_lsn_offset(
@@ -1706,20 +1779,19 @@ log_group_checkpoint(
#ifdef UNIV_LOG_ARCHIVE
if (log_sys->archiving_state == LOG_ARCH_OFF) {
- archived_lsn = ut_dulint_max;
+ archived_lsn = IB_ULONGLONG_MAX;
} else {
archived_lsn = log_sys->archived_lsn;
- if (0 != ut_dulint_cmp(archived_lsn,
- log_sys->next_archived_lsn)) {
+ if (archived_lsn != log_sys->next_archived_lsn) {
next_archived_lsn = log_sys->next_archived_lsn;
/* For debugging only */
}
}
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
+ mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
#else /* UNIV_LOG_ARCHIVE */
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
+ mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
#endif /* UNIV_LOG_ARCHIVE */
for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
@@ -1760,7 +1832,7 @@ log_group_checkpoint(
/* We alternate the physical place of the checkpoint info in the first
log file */
- if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) {
+ if ((log_sys->next_checkpoint_no & 1) == 0) {
write_offset = LOG_CHECKPOINT_1;
} else {
write_offset = LOG_CHECKPOINT_2;
@@ -1781,7 +1853,7 @@ log_group_checkpoint(
added with 1, as we want to distinguish between a normal log
file write and a checkpoint field write */
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id,
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, 0,
write_offset / UNIV_PAGE_SIZE,
write_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
@@ -1790,28 +1862,30 @@ log_group_checkpoint(
ut_ad(((ulint)group & 0x1UL) == 0);
}
}
+#endif /* !UNIV_HOTBACKUP */
-/**********************************************************
+#ifdef UNIV_HOTBACKUP
+/******************************************************//**
Writes info to a buffer of a log group when log files are created in
backup restoration. */
-
+UNIV_INTERN
void
log_reset_first_header_and_checkpoint(
/*==================================*/
- byte* hdr_buf,/* in: buffer which will be written to the start
- of the first log file */
- dulint start) /* in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
+ byte* hdr_buf,/*!< in: buffer which will be written to the
+ start of the first log file */
+ ib_uint64_t start) /*!< in: lsn of the start of the first log file;
+ we pretend that there is a checkpoint at
+ start + LOG_BLOCK_HDR_SIZE */
{
- ulint fold;
- byte* buf;
- dulint lsn;
+ ulint fold;
+ byte* buf;
+ ib_uint64_t lsn;
mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
- mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
+ mach_write_ull(hdr_buf + LOG_FILE_START_LSN, start);
- lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE);
+ lsn = start + LOG_BLOCK_HDR_SIZE;
/* Write the label of ibbackup --restore */
strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
@@ -1821,15 +1895,15 @@ log_reset_first_header_and_checkpoint(
+ (sizeof "ibbackup ") - 1));
buf = hdr_buf + LOG_CHECKPOINT_1;
- mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
+ mach_write_ull(buf + LOG_CHECKPOINT_NO, 0);
+ mach_write_ull(buf + LOG_CHECKPOINT_LSN, lsn);
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
+ mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
@@ -1842,28 +1916,30 @@ log_reset_first_header_and_checkpoint(
allocated size in the tablespace, but unfortunately we do not
know it here */
}
+#endif /* UNIV_HOTBACKUP */
-/**********************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************//**
Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-
+UNIV_INTERN
void
log_group_read_checkpoint_info(
/*===========================*/
- log_group_t* group, /* in: log group */
- ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+ log_group_t* group, /*!< in: log group */
+ ulint field) /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
{
ut_ad(mutex_own(&(log_sys->mutex)));
log_sys->n_log_ios++;
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id,
+ fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 0,
field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
}
-/**********************************************************
+/******************************************************//**
Writes checkpoint info to groups. */
-
+UNIV_INTERN
void
log_groups_write_checkpoint_info(void)
/*==================================*/
@@ -1881,27 +1957,26 @@ log_groups_write_checkpoint_info(void)
}
}
-/**********************************************************
+/******************************************************//**
Makes a checkpoint. Note that this function does not flush dirty
blocks from the buffer pool: it only checks what is lsn of the oldest
modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool. */
-
+log files. Use log_make_checkpoint_at to flush also the pool.
+@return TRUE if success, FALSE if a checkpoint write was already running */
+UNIV_INTERN
ibool
log_checkpoint(
/*===========*/
- /* out: TRUE if success, FALSE if a checkpoint
- write was already running */
- ibool sync, /* in: TRUE if synchronous operation is
+ ibool sync, /*!< in: TRUE if synchronous operation is
desired */
- ibool write_always) /* in: the function normally checks if the
+ ibool write_always) /*!< in: the function normally checks if the
the new checkpoint would have a greater
lsn than the previous one: if not, then no
physical write is done; by setting this
parameter TRUE, a physical write will always be
made to log files */
{
- dulint oldest_lsn;
+ ib_uint64_t oldest_lsn;
if (recv_recovery_is_on()) {
recv_apply_hashed_log_recs(TRUE);
@@ -1913,6 +1988,7 @@ log_checkpoint(
mutex_enter(&(log_sys->mutex));
+ ut_ad(!recv_no_log_write);
oldest_lsn = log_buf_pool_get_oldest_modification();
mutex_exit(&(log_sys->mutex));
@@ -1930,14 +2006,14 @@ log_checkpoint(
mutex_enter(&(log_sys->mutex));
if (!write_always
- && ut_dulint_cmp(log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) {
+ && log_sys->last_checkpoint_lsn >= oldest_lsn) {
mutex_exit(&(log_sys->mutex));
return(TRUE);
}
- ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0);
+ ut_ad(log_sys->written_to_all_lsn >= oldest_lsn);
if (log_sys->n_pending_checkpoint_writes > 0) {
/* A checkpoint write is running */
@@ -1957,10 +2033,9 @@ log_checkpoint(
#ifdef UNIV_DEBUG
if (log_debug_writes) {
- fprintf(stderr, "Making checkpoint no %lu at lsn %lu %lu\n",
- (ulong) ut_dulint_get_low(log_sys->next_checkpoint_no),
- (ulong) ut_dulint_get_high(oldest_lsn),
- (ulong) ut_dulint_get_low(oldest_lsn));
+ fprintf(stderr, "Making checkpoint no %lu at lsn %llu\n",
+ (ulong) log_sys->next_checkpoint_no,
+ oldest_lsn);
}
#endif /* UNIV_DEBUG */
@@ -1977,40 +2052,31 @@ log_checkpoint(
return(TRUE);
}
-/********************************************************************
+/****************************************************************//**
Makes a checkpoint at a given lsn or later. */
-
+UNIV_INTERN
void
log_make_checkpoint_at(
/*===================*/
- dulint lsn, /* in: make a checkpoint at this or a later
- lsn, if ut_dulint_max, makes a checkpoint at
- the latest lsn */
- ibool write_always) /* in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
+ ib_uint64_t lsn, /*!< in: make a checkpoint at this or a
+ later lsn, if IB_ULONGLONG_MAX, makes
+ a checkpoint at the latest lsn */
+ ibool write_always) /*!< in: the function normally checks if
+ the new checkpoint would have a
+ greater lsn than the previous one: if
+ not, then no physical write is done;
+ by setting this parameter TRUE, a
+ physical write will always be made to
+ log files */
{
- ibool success;
-
/* Preflush pages synchronously */
- success = FALSE;
-
- while (!success) {
- success = log_preflush_pool_modified_pages(lsn, TRUE);
- }
-
- success = FALSE;
+ while (!log_preflush_pool_modified_pages(lsn, TRUE));
- while (!success) {
- success = log_checkpoint(TRUE, write_always);
- }
+ while (!log_checkpoint(TRUE, write_always));
}
-/********************************************************************
+/****************************************************************//**
Tries to establish a big enough margin of free space in the log groups, such
that a new log entry can be catenated without an immediate need for a
checkpoint. NOTE: this function may only be called if the calling thread
@@ -2020,21 +2086,22 @@ void
log_checkpoint_margin(void)
/*=======================*/
{
- log_t* log = log_sys;
- ulint age;
- ulint checkpoint_age;
- ulint advance;
- dulint oldest_lsn;
- ibool sync;
- ibool checkpoint_sync;
- ibool do_checkpoint;
- ibool success;
+ log_t* log = log_sys;
+ ib_uint64_t age;
+ ib_uint64_t checkpoint_age;
+ ib_uint64_t advance;
+ ib_uint64_t oldest_lsn;
+ ibool sync;
+ ibool checkpoint_sync;
+ ibool do_checkpoint;
+ ibool success;
loop:
sync = FALSE;
checkpoint_sync = FALSE;
do_checkpoint = FALSE;
mutex_enter(&(log->mutex));
+ ut_ad(!recv_no_log_write);
if (log->check_flush_or_checkpoint == FALSE) {
mutex_exit(&(log->mutex));
@@ -2044,7 +2111,7 @@ loop:
oldest_lsn = log_buf_pool_get_oldest_modification();
- age = ut_dulint_minus(log->lsn, oldest_lsn);
+ age = log->lsn - oldest_lsn;
if (age > log->max_modified_age_sync) {
@@ -2060,7 +2127,7 @@ loop:
advance = 0;
}
- checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn);
+ checkpoint_age = log->lsn - log->last_checkpoint_lsn;
if (checkpoint_age > log->max_checkpoint_age) {
/* A checkpoint is urgent: we do it synchronously */
@@ -2082,7 +2149,7 @@ loop:
mutex_exit(&(log->mutex));
if (advance) {
- dulint new_oldest = ut_dulint_add(oldest_lsn, advance);
+ ib_uint64_t new_oldest = oldest_lsn + advance;
success = log_preflush_pool_modified_pages(new_oldest, sync);
@@ -2112,17 +2179,17 @@ loop:
}
}
-/**********************************************************
+/******************************************************//**
Reads a specified log segment to a buffer. */
-
+UNIV_INTERN
void
log_group_read_log_seg(
/*===================*/
- ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /* in: buffer where to read */
- log_group_t* group, /* in: log group */
- dulint start_lsn, /* in: read area start */
- dulint end_lsn) /* in: read area end */
+ ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
+ byte* buf, /*!< in: buffer where to read */
+ log_group_t* group, /*!< in: log group */
+ ib_uint64_t start_lsn, /*!< in: read area start */
+ ib_uint64_t end_lsn) /*!< in: read area end */
{
ulint len;
ulint source_offset;
@@ -2130,15 +2197,11 @@ log_group_read_log_seg(
ut_ad(mutex_own(&(log_sys->mutex)));
- sync = FALSE;
-
- if (type == LOG_RECOVER) {
- sync = TRUE;
- }
+ sync = (type == LOG_RECOVER);
loop:
source_offset = log_group_calc_lsn_offset(start_lsn, group);
- len = ut_dulint_minus(end_lsn, start_lsn);
+ len = (ulint) (end_lsn - start_lsn);
ut_ad(len != 0);
@@ -2156,46 +2219,46 @@ loop:
log_sys->n_log_ios++;
- fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id,
+ fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
len, buf, NULL);
- start_lsn = ut_dulint_add(start_lsn, len);
+ start_lsn += len;
buf += len;
- if (ut_dulint_cmp(start_lsn, end_lsn) != 0) {
+ if (start_lsn != end_lsn) {
goto loop;
}
}
#ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
+/******************************************************//**
Generates an archived log file name. */
-
+UNIV_INTERN
void
log_archived_file_name_gen(
/*=======================*/
- char* buf, /* in: buffer where to write */
+ char* buf, /*!< in: buffer where to write */
ulint id __attribute__((unused)),
- /* in: group id;
+ /*!< in: group id;
currently we only archive the first group */
- ulint file_no)/* in: file number */
+ ulint file_no)/*!< in: file number */
{
sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
}
-/**********************************************************
+/******************************************************//**
Writes a log file header to a log file space. */
static
void
log_group_archive_file_header_write(
/*================================*/
- log_group_t* group, /* in: log group */
- ulint nth_file, /* in: header to the nth file in the
+ log_group_t* group, /*!< in: log group */
+ ulint nth_file, /*!< in: header to the nth file in the
archive log file space */
- ulint file_no, /* in: archived file number */
- dulint start_lsn) /* in: log file data starts at this
+ ulint file_no, /*!< in: archived file number */
+ ib_uint64_t start_lsn) /*!< in: log file data starts at this
lsn */
{
byte* buf;
@@ -2208,7 +2271,7 @@ log_group_archive_file_header_write(
buf = *(group->archive_file_header_bufs + nth_file);
mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+ mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
mach_write_to_4(buf + LOG_FILE_NO, file_no);
mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
@@ -2224,16 +2287,16 @@ log_group_archive_file_header_write(
buf, &log_archive_io);
}
-/**********************************************************
+/******************************************************//**
Writes a log file header to a completed archived log file. */
static
void
log_group_archive_completed_header_write(
/*=====================================*/
- log_group_t* group, /* in: log group */
- ulint nth_file, /* in: header to the nth file in the
+ log_group_t* group, /*!< in: log group */
+ ulint nth_file, /*!< in: header to the nth file in the
archive log file space */
- dulint end_lsn) /* in: end lsn of the file */
+ ib_uint64_t end_lsn) /*!< in: end lsn of the file */
{
byte* buf;
ulint dest_offset;
@@ -2244,7 +2307,7 @@ log_group_archive_completed_header_write(
buf = *(group->archive_file_header_bufs + nth_file);
mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
- mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
+ mach_write_ull(buf + LOG_FILE_END_LSN, end_lsn);
dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
@@ -2258,34 +2321,34 @@ log_group_archive_completed_header_write(
&log_archive_io);
}
-/**********************************************************
+/******************************************************//**
Does the archive writes for a single log group. */
static
void
log_group_archive(
/*==============*/
- log_group_t* group) /* in: log group */
+ log_group_t* group) /*!< in: log group */
{
- os_file_t file_handle;
- dulint start_lsn;
- dulint end_lsn;
- char name[1024];
- byte* buf;
- ulint len;
- ibool ret;
- ulint next_offset;
- ulint n_files;
- ulint open_mode;
+ os_file_t file_handle;
+ ib_uint64_t start_lsn;
+ ib_uint64_t end_lsn;
+ char name[1024];
+ byte* buf;
+ ulint len;
+ ibool ret;
+ ulint next_offset;
+ ulint n_files;
+ ulint open_mode;
ut_ad(mutex_own(&(log_sys->mutex)));
start_lsn = log_sys->archived_lsn;
- ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
end_lsn = log_sys->next_archived_lsn;
- ut_a(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
buf = log_sys->archive_buf;
@@ -2354,7 +2417,7 @@ loop:
}
}
- len = ut_dulint_minus(end_lsn, start_lsn);
+ len = end_lsn - start_lsn;
if (group->file_size < (next_offset % group->file_size) + len) {
@@ -2364,10 +2427,9 @@ loop:
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
- "Archiving starting at lsn %lu %lu, len %lu"
+ "Archiving starting at lsn %llu, len %lu"
" to group %lu\n",
- (ulong) ut_dulint_get_high(start_lsn),
- (ulong) ut_dulint_get_low(start_lsn),
+ start_lsn,
(ulong) len, (ulong) group->id);
}
#endif /* UNIV_DEBUG */
@@ -2381,7 +2443,7 @@ loop:
ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
&log_archive_io);
- start_lsn = ut_dulint_add(start_lsn, len);
+ start_lsn += len;
next_offset += len;
buf += len;
@@ -2389,7 +2451,7 @@ loop:
n_files++;
}
- if (ut_dulint_cmp(end_lsn, start_lsn) != 0) {
+ if (end_lsn != start_lsn) {
goto loop;
}
@@ -2400,7 +2462,7 @@ loop:
ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
}
-/*********************************************************
+/*****************************************************//**
(Writes to the archive of each log group.) Currently, only the first
group is archived. */
static
@@ -2417,7 +2479,7 @@ log_archive_groups(void)
log_group_archive(group);
}
-/*********************************************************
+/*****************************************************//**
Completes the archiving write phase for (each log group), currently,
the first log group. */
static
@@ -2429,8 +2491,8 @@ log_archive_write_complete_groups(void)
ulint end_offset;
ulint trunc_files;
ulint n_files;
- dulint start_lsn;
- dulint end_lsn;
+ ib_uint64_t start_lsn;
+ ib_uint64_t end_lsn;
ulint i;
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -2466,16 +2528,14 @@ log_archive_write_complete_groups(void)
#endif /* UNIV_DEBUG */
/* Calculate the archive file space start lsn */
- start_lsn = ut_dulint_subtract(
- log_sys->next_archived_lsn,
- end_offset - LOG_FILE_HDR_SIZE + trunc_files
- * (group->file_size - LOG_FILE_HDR_SIZE));
+ start_lsn = log_sys->next_archived_lsn
+ - (end_offset - LOG_FILE_HDR_SIZE + trunc_files
+ * (group->file_size - LOG_FILE_HDR_SIZE));
end_lsn = start_lsn;
for (i = 0; i < trunc_files; i++) {
- end_lsn = ut_dulint_add(end_lsn,
- group->file_size - LOG_FILE_HDR_SIZE);
+ end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
/* Write a notice to the headers of archived log
files that the file write has been completed */
@@ -2493,7 +2553,7 @@ log_archive_write_complete_groups(void)
#endif /* UNIV_DEBUG */
}
-/**********************************************************
+/******************************************************//**
Completes an archiving i/o. */
static
void
@@ -2529,7 +2589,7 @@ log_archive_check_completion_low(void)
}
}
-/**********************************************************
+/******************************************************//**
Completes an archiving i/o. */
static
void
@@ -2557,42 +2617,39 @@ log_io_complete_archive(void)
mutex_exit(&(log_sys->mutex));
}
-/************************************************************************
-Starts an archiving operation. */
-
+/********************************************************************//**
+Starts an archiving operation.
+@return TRUE if succeed, FALSE if an archiving operation was already running */
+UNIV_INTERN
ibool
log_archive_do(
/*===========*/
- /* out: TRUE if succeed, FALSE if an archiving
- operation was already running */
- ibool sync, /* in: TRUE if synchronous operation is desired */
- ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to
+ ibool sync, /*!< in: TRUE if synchronous operation is desired */
+ ulint* n_bytes)/*!< out: archive log buffer size, 0 if nothing to
archive */
{
- ibool calc_new_limit;
- dulint start_lsn;
- dulint limit_lsn;
+ ibool calc_new_limit;
+ ib_uint64_t start_lsn;
+ ib_uint64_t limit_lsn;
calc_new_limit = TRUE;
loop:
mutex_enter(&(log_sys->mutex));
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ switch (log_sys->archiving_state) {
+ case LOG_ARCH_OFF:
+arch_none:
mutex_exit(&(log_sys->mutex));
*n_bytes = 0;
return(TRUE);
-
- } else if (log_sys->archiving_state == LOG_ARCH_STOPPED
- || log_sys->archiving_state == LOG_ARCH_STOPPING2) {
-
+ case LOG_ARCH_STOPPED:
+ case LOG_ARCH_STOPPING2:
mutex_exit(&(log_sys->mutex));
os_event_wait(log_sys->archiving_on);
- mutex_enter(&(log_sys->mutex));
-
goto loop;
}
@@ -2600,28 +2657,23 @@ loop:
if (calc_new_limit) {
ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
- limit_lsn = ut_dulint_add(start_lsn,
- log_sys->archive_buf_size);
+ limit_lsn = start_lsn + log_sys->archive_buf_size;
*n_bytes = log_sys->archive_buf_size;
- if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) {
+ if (limit_lsn >= log_sys->lsn) {
- limit_lsn = ut_dulint_align_down(
+ limit_lsn = ut_uint64_align_down(
log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
}
}
- if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) {
-
- mutex_exit(&(log_sys->mutex));
+ if (log_sys->archived_lsn >= limit_lsn) {
- *n_bytes = 0;
-
- return(TRUE);
+ goto arch_none;
}
- if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) {
+ if (log_sys->written_to_all_lsn < limit_lsn) {
mutex_exit(&(log_sys->mutex));
@@ -2656,11 +2708,8 @@ loop:
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
- "Archiving from lsn %lu %lu to lsn %lu %lu\n",
- (ulong) ut_dulint_get_high(log_sys->archived_lsn),
- (ulong) ut_dulint_get_low(log_sys->archived_lsn),
- (ulong) ut_dulint_get_high(limit_lsn),
- (ulong) ut_dulint_get_low(limit_lsn));
+ "Archiving from lsn %llu to lsn %llu\n",
+ log_sys->archived_lsn, limit_lsn);
}
#endif /* UNIV_DEBUG */
@@ -2682,7 +2731,7 @@ loop:
return(TRUE);
}
-/********************************************************************
+/****************************************************************//**
Writes the log contents to the archive at least up to the lsn when this
function was called. */
static
@@ -2690,8 +2739,8 @@ void
log_archive_all(void)
/*=================*/
{
- dulint present_lsn;
- ulint dummy;
+ ib_uint64_t present_lsn;
+ ulint dummy;
mutex_enter(&(log_sys->mutex));
@@ -2710,7 +2759,7 @@ log_archive_all(void)
for (;;) {
mutex_enter(&(log_sys->mutex));
- if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) {
+ if (present_lsn <= log_sys->archived_lsn) {
mutex_exit(&(log_sys->mutex));
@@ -2723,14 +2772,14 @@ log_archive_all(void)
}
}
-/*********************************************************
+/*****************************************************//**
Closes the possible open archive log file (for each group) the first group,
and if it was open, increments the group file count by 2, if desired. */
static
void
log_archive_close_groups(
/*=====================*/
- ibool increment_file_count) /* in: TRUE if we want to increment
+ ibool increment_file_count) /*!< in: TRUE if we want to increment
the file count */
{
log_group_t* group;
@@ -2775,16 +2824,16 @@ log_archive_close_groups(
}
}
-/********************************************************************
+/****************************************************************//**
Writes the log contents to the archive up to the lsn when this function was
called, and stops the archiving. When archiving is started again, the archived
log file numbers start from 2 higher, so that the archiving will not write
-again to the archived log files which exist when this function returns. */
-
+again to the archived log files which exist when this function returns.
+@return DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
ulint
log_archive_stop(void)
/*==================*/
- /* out: DB_SUCCESS or DB_ERROR */
{
ibool success;
@@ -2842,13 +2891,13 @@ log_archive_stop(void)
return(DB_SUCCESS);
}
-/********************************************************************
-Starts again archiving which has been stopped. */
-
+/****************************************************************//**
+Starts again archiving which has been stopped.
+@return DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
ulint
log_archive_start(void)
/*===================*/
- /* out: DB_SUCCESS or DB_ERROR */
{
mutex_enter(&(log_sys->mutex));
@@ -2868,13 +2917,13 @@ log_archive_start(void)
return(DB_SUCCESS);
}
-/********************************************************************
-Stop archiving the log so that a gap may occur in the archived log files. */
-
+/****************************************************************//**
+Stop archiving the log so that a gap may occur in the archived log files.
+@return DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
ulint
log_archive_noarchivelog(void)
/*==========================*/
- /* out: DB_SUCCESS or DB_ERROR */
{
loop:
mutex_enter(&(log_sys->mutex));
@@ -2900,13 +2949,13 @@ loop:
goto loop;
}
-/********************************************************************
-Start archiving the log so that a gap may occur in the archived log files. */
-
+/****************************************************************//**
+Start archiving the log so that a gap may occur in the archived log files.
+@return DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
ulint
log_archive_archivelog(void)
/*========================*/
- /* out: DB_SUCCESS or DB_ERROR */
{
mutex_enter(&(log_sys->mutex));
@@ -2915,7 +2964,7 @@ log_archive_archivelog(void)
log_sys->archiving_state = LOG_ARCH_ON;
log_sys->archived_lsn
- = ut_dulint_align_down(log_sys->lsn,
+ = ut_uint64_align_down(log_sys->lsn,
OS_FILE_LOG_BLOCK_SIZE);
mutex_exit(&(log_sys->mutex));
@@ -2927,7 +2976,7 @@ log_archive_archivelog(void)
return(DB_ERROR);
}
-/********************************************************************
+/****************************************************************//**
Tries to establish a big enough margin of free space in the log groups, such
that a new log entry can be catenated without an immediate need for
archiving. */
@@ -2949,7 +2998,7 @@ loop:
return;
}
- age = ut_dulint_minus(log->lsn, log->archived_lsn);
+ age = log->lsn - log->archived_lsn;
if (age > log->max_archived_lsn_age) {
@@ -2982,12 +3031,12 @@ loop:
}
#endif /* UNIV_LOG_ARCHIVE */
-/************************************************************************
+/********************************************************************//**
Checks that there is enough free space in the log to start a new query step.
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
function may only be called if the calling thread owns no synchronization
objects! */
-
+UNIV_INTERN
void
log_check_margins(void)
/*===================*/
@@ -3002,6 +3051,7 @@ loop:
#endif /* UNIV_LOG_ARCHIVE */
mutex_enter(&(log_sys->mutex));
+ ut_ad(!recv_no_log_write);
if (log_sys->check_flush_or_checkpoint) {
@@ -3013,18 +3063,18 @@ loop:
mutex_exit(&(log_sys->mutex));
}
-/********************************************************************
+/****************************************************************//**
Makes a checkpoint at the latest lsn and writes it to first page of each
data file in the database, so that we know that the file spaces contain
all modifications up to that lsn. This can only be called at database
shutdown. This function also writes all log in log files to the log archive. */
-
+UNIV_INTERN
void
logs_empty_and_mark_files_at_shutdown(void)
/*=======================================*/
{
- dulint lsn;
- ulint arch_log_no;
+ ib_uint64_t lsn;
+ ulint arch_log_no;
if (srv_print_verbose_log) {
ut_print_timestamp(stderr);
@@ -3113,19 +3163,16 @@ loop:
log_archive_all();
#endif /* UNIV_LOG_ARCHIVE */
- log_make_checkpoint_at(ut_dulint_max, TRUE);
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
mutex_enter(&(log_sys->mutex));
lsn = log_sys->lsn;
- if ((ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0)
+ if (lsn != log_sys->last_checkpoint_lsn
#ifdef UNIV_LOG_ARCHIVE
|| (srv_log_archive_on
- && ut_dulint_cmp(lsn,
- ut_dulint_add(log_sys->archived_lsn,
- LOG_BLOCK_HDR_SIZE))
- != 0)
+ && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
#endif /* UNIV_LOG_ARCHIVE */
) {
@@ -3180,17 +3227,14 @@ loop:
/* Make some checks that the server really is quiet */
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
ut_a(buf_all_freed());
- ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
+ ut_a(lsn == log_sys->lsn);
- if (ut_dulint_cmp(lsn, srv_start_lsn) < 0) {
+ if (lsn < srv_start_lsn) {
fprintf(stderr,
"InnoDB: Error: log sequence number"
- " at shutdown %lu %lu\n"
- "InnoDB: is lower than at startup %lu %lu!\n",
- (ulong) ut_dulint_get_high(lsn),
- (ulong) ut_dulint_get_low(lsn),
- (ulong) ut_dulint_get_high(srv_start_lsn),
- (ulong) ut_dulint_get_low(srv_start_lsn));
+ " at shutdown %llu\n"
+ "InnoDB: is lower than at startup %llu!\n",
+ lsn, srv_start_lsn);
}
srv_shutdown_lsn = lsn;
@@ -3204,27 +3248,29 @@ loop:
/* Make some checks that the server really is quiet */
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
ut_a(buf_all_freed());
- ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
+ ut_a(lsn == log_sys->lsn);
}
-/**********************************************************
+#ifdef UNIV_LOG_DEBUG
+/******************************************************//**
Checks by parsing that the catenated log segment for a single mtr is
consistent. */
-
+UNIV_INTERN
ibool
log_check_log_recs(
/*===============*/
- byte* buf, /* in: pointer to the start of the log segment
- in the log_sys->buf log buffer */
- ulint len, /* in: segment length in bytes */
- dulint buf_start_lsn) /* in: buffer start lsn */
-{
- dulint contiguous_lsn;
- dulint scanned_lsn;
- byte* start;
- byte* end;
- byte* buf1;
- byte* scan_buf;
+ const byte* buf, /*!< in: pointer to the start of
+ the log segment in the
+ log_sys->buf log buffer */
+ ulint len, /*!< in: segment length in bytes */
+ ib_uint64_t buf_start_lsn) /*!< in: buffer start lsn */
+{
+ ib_uint64_t contiguous_lsn;
+ ib_uint64_t scanned_lsn;
+ const byte* start;
+ const byte* end;
+ byte* buf1;
+ byte* scan_buf;
ut_ad(mutex_own(&(log_sys->mutex)));
@@ -3241,32 +3287,30 @@ log_check_log_recs(
ut_memcpy(scan_buf, start, end - start);
- recv_scan_log_recs(TRUE,
- (buf_pool->n_frames
+ recv_scan_log_recs((buf_pool->curr_size
- recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
FALSE, scan_buf, end - start,
- ut_dulint_align_down(buf_start_lsn,
+ ut_uint64_align_down(buf_start_lsn,
OS_FILE_LOG_BLOCK_SIZE),
&contiguous_lsn, &scanned_lsn);
- ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len))
- == 0);
- ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0);
+ ut_a(scanned_lsn == buf_start_lsn + len);
+ ut_a(recv_sys->recovered_lsn == scanned_lsn);
mem_free(buf1);
return(TRUE);
}
+#endif /* UNIV_LOG_DEBUG */
-/**********************************************************
-Peeks the current lsn. */
-
+/******************************************************//**
+Peeks the current lsn.
+@return TRUE if success, FALSE if could not get the log system mutex */
+UNIV_INTERN
ibool
log_peek_lsn(
/*=========*/
- /* out: TRUE if success, FALSE if could not get the
- log system mutex */
- dulint* lsn) /* out: if returns TRUE, current lsn is here */
+ ib_uint64_t* lsn) /*!< out: if returns TRUE, current lsn is here */
{
if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
*lsn = log_sys->lsn;
@@ -3279,13 +3323,13 @@ log_peek_lsn(
return(FALSE);
}
-/**********************************************************
+/******************************************************//**
Prints info of the log. */
-
+UNIV_INTERN
void
log_print(
/*======*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
double time_elapsed;
time_t current_time;
@@ -3293,15 +3337,12 @@ log_print(
mutex_enter(&(log_sys->mutex));
fprintf(file,
- "Log sequence number %lu %lu\n"
- "Log flushed up to %lu %lu\n"
- "Last checkpoint at %lu %lu\n",
- (ulong) ut_dulint_get_high(log_sys->lsn),
- (ulong) ut_dulint_get_low(log_sys->lsn),
- (ulong) ut_dulint_get_high(log_sys->flushed_to_disk_lsn),
- (ulong) ut_dulint_get_low(log_sys->flushed_to_disk_lsn),
- (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
- (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
+ "Log sequence number %llu\n"
+ "Log flushed up to %llu\n"
+ "Last checkpoint at %llu\n",
+ log_sys->lsn,
+ log_sys->flushed_to_disk_lsn,
+ log_sys->last_checkpoint_lsn);
current_time = time(NULL);
@@ -3322,9 +3363,9 @@ log_print(
mutex_exit(&(log_sys->mutex));
}
-/**************************************************************************
+/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
void
log_refresh_stats(void)
/*===================*/
@@ -3332,3 +3373,95 @@ log_refresh_stats(void)
log_sys->n_log_ios_old = log_sys->n_log_ios;
log_sys->last_printout_time = time(NULL);
}
+
+/**********************************************************************
+Closes a log group. */
+static
+void
+log_group_close(
+/*===========*/
+ log_group_t* group) /* in,own: log group to close */
+{
+ ulint i;
+
+ for (i = 0; i < group->n_files; i++) {
+ mem_free(group->file_header_bufs_ptr[i]);
+#ifdef UNIV_LOG_ARCHIVE
+ mem_free(group->archive_file_header_bufs_ptr[i]);
+#endif /* UNIV_LOG_ARCHIVE */
+ }
+
+ mem_free(group->file_header_bufs_ptr);
+ mem_free(group->file_header_bufs);
+
+#ifdef UNIV_LOG_ARCHIVE
+ mem_free(group->archive_file_header_bufs_ptr);
+ mem_free(group->archive_file_header_bufs);
+#endif /* UNIV_LOG_ARCHIVE */
+
+ mem_free(group->checkpoint_buf_ptr);
+
+ mem_free(group);
+}
+
+/**********************************************************
+Shutdown the log system but do not release all the memory. */
+UNIV_INTERN
+void
+log_shutdown(void)
+/*==============*/
+{
+ log_group_t* group;
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
+ log_group_t* prev_group = group;
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
+
+ log_group_close(prev_group);
+ }
+
+ mem_free(log_sys->buf_ptr);
+ log_sys->buf_ptr = NULL;
+ log_sys->buf = NULL;
+ mem_free(log_sys->checkpoint_buf_ptr);
+ log_sys->checkpoint_buf_ptr = NULL;
+ log_sys->checkpoint_buf = NULL;
+
+ os_event_free(log_sys->no_flush_event);
+ os_event_free(log_sys->one_flushed_event);
+
+ rw_lock_free(&log_sys->checkpoint_lock);
+
+ mutex_free(&log_sys->mutex);
+
+#ifdef UNIV_LOG_ARCHIVE
+ rw_lock_free(&log_sys->archive_lock);
+ os_event_create(log_sys->archiving_on);
+#endif /* UNIV_LOG_ARCHIVE */
+
+#ifdef UNIV_LOG_DEBUG
+ recv_sys_debug_free();
+#endif
+
+ recv_sys_close();
+}
+
+/**********************************************************
+Free the log system data structures. */
+UNIV_INTERN
+void
+log_mem_free(void)
+/*==============*/
+{
+ if (log_sys != NULL) {
+ recv_sys_mem_free();
+ mem_free(log_sys);
+
+ log_sys = NULL;
+ }
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
index aef58b7b576..ddbc71d4b71 100644
--- a/storage/innobase/log/log0recv.c
+++ b/storage/innobase/log/log0recv.c
@@ -1,7 +1,24 @@
-/******************************************************
-Recovery
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file log/log0recv.c
+Recovery
Created 9/20/1997 Heikki Tuuri
*******************************************************/
@@ -15,49 +32,64 @@ Created 9/20/1997 Heikki Tuuri
#include "mem0mem.h"
#include "buf0buf.h"
#include "buf0flu.h"
-#include "buf0rea.h"
-#include "srv0srv.h"
-#include "srv0start.h"
#include "mtr0mtr.h"
#include "mtr0log.h"
-#include "page0page.h"
#include "page0cur.h"
+#include "page0zip.h"
#include "btr0btr.h"
#include "btr0cur.h"
#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "trx0rec.h"
-#include "trx0roll.h"
-#include "btr0cur.h"
-#include "btr0cur.h"
-#include "btr0cur.h"
-#include "dict0boot.h"
#include "fil0fil.h"
-#include "sync0sync.h"
-
-#ifdef UNIV_HOTBACKUP
-/* This is set to FALSE if the backup was originally taken with the
+#ifndef UNIV_HOTBACKUP
+# include "buf0rea.h"
+# include "srv0srv.h"
+# include "srv0start.h"
+# include "trx0roll.h"
+# include "row0merge.h"
+# include "sync0sync.h"
+#else /* !UNIV_HOTBACKUP */
+
+/** This is set to FALSE if the backup was originally taken with the
ibbackup --include regexp option: then we do not want to create tables in
directories which were not included */
-ibool recv_replay_file_ops = TRUE;
-#endif /* UNIV_HOTBACKUP */
+UNIV_INTERN ibool recv_replay_file_ops = TRUE;
+#endif /* !UNIV_HOTBACKUP */
-/* Log records are stored in the hash table in chunks at most of this size;
+/** Log records are stored in the hash table in chunks at most of this size;
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
-/* Read-ahead area in applying log records to file pages */
+/** Read-ahead area in applying log records to file pages */
#define RECV_READ_AHEAD_AREA 32
-recv_sys_t* recv_sys = NULL;
-ibool recv_recovery_on = FALSE;
-ibool recv_recovery_from_backup_on = FALSE;
-
-ibool recv_needed_recovery = FALSE;
-
-ibool recv_lsn_checks_on = FALSE;
+/** The recovery system */
+UNIV_INTERN recv_sys_t* recv_sys = NULL;
+/** TRUE when applying redo log records during crash recovery; FALSE
+otherwise. Note that this is FALSE while a background thread is
+rolling back incomplete transactions. */
+UNIV_INTERN ibool recv_recovery_on;
+#ifdef UNIV_LOG_ARCHIVE
+/** TRUE when applying redo log records from an archived log file */
+UNIV_INTERN ibool recv_recovery_from_backup_on;
+#endif /* UNIV_LOG_ARCHIVE */
-/* There are two conditions under which we scan the logs, the first
+#ifndef UNIV_HOTBACKUP
+/** TRUE when recv_init_crash_recovery() has been called. */
+UNIV_INTERN ibool recv_needed_recovery;
+# ifdef UNIV_DEBUG
+/** TRUE if writing to the redo log (mtr_commit) is forbidden.
+Protected by log_sys->mutex. */
+UNIV_INTERN ibool recv_no_log_write = FALSE;
+# endif /* UNIV_DEBUG */
+
+/** TRUE if buf_page_is_corrupted() should check if the log sequence
+number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
+recv_recovery_from_checkpoint_start_func(). */
+UNIV_INTERN ibool recv_lsn_checks_on;
+
+/** There are two conditions under which we scan the logs, the first
is normal startup and the second is when we do a recovery from an
archive.
This flag is set if we are doing a scan from the last checkpoint during
@@ -65,63 +97,70 @@ startup. If we find log entries that were written after the last checkpoint
we know that the server was not cleanly shutdown. We must then initialize
the crash recovery environment before attempting to store these entries in
the log hash table. */
-ibool recv_log_scan_is_startup_type = FALSE;
+static ibool recv_log_scan_is_startup_type;
-/* If the following is TRUE, the buffer pool file pages must be invalidated
+/** If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if
the log record hash table becomes too full, and log records must be merged
to file pages already before the recovery is finished: in this case no
ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state */
-
-/* Recovery is running and no operations on the log files are allowed
-yet: the variable name is misleading */
-
-ibool recv_no_ibuf_operations = FALSE;
-
-/* The following counter is used to decide when to print info on
+buffer pool before the pages have been recovered to the up-to-date state.
+
+TRUE means that recovery is running and no operations on the log files
+are allowed yet: the variable name is misleading. */
+UNIV_INTERN ibool recv_no_ibuf_operations;
+/** TRUE when the redo log is being backed up */
+# define recv_is_making_a_backup FALSE
+/** TRUE when recovering from a backed up redo log file */
+# define recv_is_from_backup FALSE
+#else /* !UNIV_HOTBACKUP */
+# define recv_needed_recovery FALSE
+/** TRUE when the redo log is being backed up */
+UNIV_INTERN ibool recv_is_making_a_backup = FALSE;
+/** TRUE when recovering from a backed up redo log file */
+UNIV_INTERN ibool recv_is_from_backup = FALSE;
+# define buf_pool_get_curr_size() (5 * 1024 * 1024)
+#endif /* !UNIV_HOTBACKUP */
+/** The following counter is used to decide when to print info on
log scan */
-ulint recv_scan_print_counter = 0;
+static ulint recv_scan_print_counter;
-ibool recv_is_from_backup = FALSE;
-#ifdef UNIV_HOTBACKUP
-ibool recv_is_making_a_backup = FALSE;
-#else
-# define recv_is_making_a_backup FALSE
-#endif /* UNIV_HOTBACKUP */
+/** The type of the previous parsed redo log record */
+static ulint recv_previous_parsed_rec_type;
+/** The offset of the previous parsed redo log record */
+static ulint recv_previous_parsed_rec_offset;
+/** The 'multi' flag of the previous parsed redo log record */
+static ulint recv_previous_parsed_rec_is_multi;
-ulint recv_previous_parsed_rec_type = 999999;
-ulint recv_previous_parsed_rec_offset = 0;
-ulint recv_previous_parsed_rec_is_multi = 0;
+/** Maximum page number encountered in the redo log */
+UNIV_INTERN ulint recv_max_parsed_page_no;
-ulint recv_max_parsed_page_no = 0;
-
-/* This many frames must be left free in the buffer pool when we scan
+/** This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will
use these free frames to read in pages when we start applying the
log records to the database. */
+UNIV_INTERN ulint recv_n_pool_free_frames;
-ulint recv_n_pool_free_frames = 256;
-
-/* The maximum lsn we see for a page during the recovery process. If this
+/** The maximum lsn we see for a page during the recovery process. If this
is bigger than the lsn we are able to scan up to, that is an indication that
the recovery failed and the database may be corrupt. */
-
-dulint recv_max_page_lsn;
+UNIV_INTERN ib_uint64_t recv_max_page_lsn;
/* prototypes */
-/***********************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************//**
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == FALSE. */
static
void
recv_init_crash_recovery(void);
/*===========================*/
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************
+/********************************************************//**
Creates the recovery system. */
-
+UNIV_INTERN
void
recv_sys_create(void)
/*=================*/
@@ -131,7 +170,8 @@ recv_sys_create(void)
return;
}
- recv_sys = mem_alloc(sizeof(recv_sys_t));
+ recv_sys = mem_alloc(sizeof(*recv_sys));
+ memset(recv_sys, 0x0, sizeof(*recv_sys));
mutex_create(&recv_sys->mutex, SYNC_RECV);
@@ -139,15 +179,113 @@ recv_sys_create(void)
recv_sys->addr_hash = NULL;
}
+/********************************************************//**
+Release recovery system mutexes. */
+UNIV_INTERN
+void
+recv_sys_close(void)
+/*================*/
+{
+ if (recv_sys != NULL) {
+ if (recv_sys->addr_hash != NULL) {
+ hash_table_free(recv_sys->addr_hash);
+ }
+
+ if (recv_sys->heap != NULL) {
+ mem_heap_free(recv_sys->heap);
+ }
+
+ if (recv_sys->buf != NULL) {
+ ut_free(recv_sys->buf);
+ }
+
+ if (recv_sys->last_block_buf_start != NULL) {
+ mem_free(recv_sys->last_block_buf_start);
+ }
+
+ mutex_free(&recv_sys->mutex);
+
+ mem_free(recv_sys);
+ recv_sys = NULL;
+ }
+}
+
+/********************************************************//**
+Frees the recovery system memory. */
+UNIV_INTERN
+void
+recv_sys_mem_free(void)
+/*===================*/
+{
+ if (recv_sys != NULL) {
+ if (recv_sys->addr_hash != NULL) {
+ hash_table_free(recv_sys->addr_hash);
+ }
+
+ if (recv_sys->heap != NULL) {
+ mem_heap_free(recv_sys->heap);
+ }
+
+ if (recv_sys->buf != NULL) {
+ ut_free(recv_sys->buf);
+ }
+
+ if (recv_sys->last_block_buf_start != NULL) {
+ mem_free(recv_sys->last_block_buf_start);
+ }
+
+ mem_free(recv_sys);
+ recv_sys = NULL;
+ }
+}
+
/************************************************************
-Inits the recovery system for a recovery operation. */
+Reset the state of the recovery system variables. */
+UNIV_INTERN
+void
+recv_sys_var_init(void)
+/*===================*/
+{
+ recv_lsn_checks_on = FALSE;
+
+ recv_n_pool_free_frames = 256;
+
+ recv_recovery_on = FALSE;
+
+#ifdef UNIV_LOG_ARCHIVE
+ recv_recovery_from_backup_on = FALSE;
+#endif /* UNIV_LOG_ARCHIVE */
+
+ recv_needed_recovery = FALSE;
+
+ recv_lsn_checks_on = FALSE;
+
+ recv_log_scan_is_startup_type = FALSE;
+
+ recv_no_ibuf_operations = FALSE;
+
+ recv_scan_print_counter = 0;
+
+ recv_previous_parsed_rec_type = 999999;
+
+ recv_previous_parsed_rec_offset = 0;
+ recv_previous_parsed_rec_is_multi = 0;
+
+ recv_max_parsed_page_no = 0;
+
+ recv_n_pool_free_frames = 256;
+
+ recv_max_page_lsn = 0;
+}
+
+/************************************************************
+Inits the recovery system for a recovery operation. */
+UNIV_INTERN
void
recv_sys_init(
/*==========*/
- ibool recover_from_backup, /* in: TRUE if this is called
- to recover from a hot backup */
- ulint available_memory) /* in: available memory in bytes */
+ ulint available_memory) /*!< in: available memory in bytes */
{
if (recv_sys->heap != NULL) {
@@ -156,12 +294,12 @@ recv_sys_init(
mutex_enter(&(recv_sys->mutex));
- if (!recover_from_backup) {
- recv_sys->heap = mem_heap_create_in_buffer(256);
- } else {
- recv_sys->heap = mem_heap_create(256);
- recv_is_from_backup = TRUE;
- }
+#ifndef UNIV_HOTBACKUP
+ recv_sys->heap = mem_heap_create_in_buffer(256);
+#else /* !UNIV_HOTBACKUP */
+ recv_sys->heap = mem_heap_create(256);
+ recv_is_from_backup = TRUE;
+#endif /* !UNIV_HOTBACKUP */
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
recv_sys->len = 0;
@@ -179,12 +317,12 @@ recv_sys_init(
OS_FILE_LOG_BLOCK_SIZE);
recv_sys->found_corrupt_log = FALSE;
- recv_max_page_lsn = ut_dulint_zero;
+ recv_max_page_lsn = 0;
mutex_exit(&(recv_sys->mutex));
}
-/************************************************************
+/********************************************************//**
Empties the hash table when it has been fully processed. */
static
void
@@ -210,13 +348,14 @@ recv_sys_empty_hash(void)
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
}
-#ifndef UNIV_LOG_DEBUG
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+# ifndef UNIV_LOG_DEBUG
+/********************************************************//**
Frees the recovery system. */
static
void
-recv_sys_free(void)
-/*===============*/
+recv_sys_debug_free(void)
+/*=====================*/
{
mutex_enter(&(recv_sys->mutex));
@@ -225,60 +364,61 @@ recv_sys_free(void)
ut_free(recv_sys->buf);
mem_free(recv_sys->last_block_buf_start);
- recv_sys->addr_hash = NULL;
+ recv_sys->buf = NULL;
recv_sys->heap = NULL;
+ recv_sys->addr_hash = NULL;
+ recv_sys->last_block_buf_start = NULL;
mutex_exit(&(recv_sys->mutex));
}
-#endif /* UNIV_LOG_DEBUG */
+# endif /* UNIV_LOG_DEBUG */
-/************************************************************
+/********************************************************//**
Truncates possible corrupted or extra records from a log group. */
static
void
recv_truncate_group(
/*================*/
- log_group_t* group, /* in: log group */
- dulint recovered_lsn, /* in: recovery succeeded up to this
+ log_group_t* group, /*!< in: log group */
+ ib_uint64_t recovered_lsn, /*!< in: recovery succeeded up to this
lsn */
- dulint limit_lsn, /* in: this was the limit for
+ ib_uint64_t limit_lsn, /*!< in: this was the limit for
recovery */
- dulint checkpoint_lsn, /* in: recovery was started from this
+ ib_uint64_t checkpoint_lsn, /*!< in: recovery was started from this
checkpoint */
- dulint archived_lsn) /* in: the log has been archived up to
+ ib_uint64_t archived_lsn) /*!< in: the log has been archived up to
this lsn */
{
- dulint start_lsn;
- dulint end_lsn;
- dulint finish_lsn1;
- dulint finish_lsn2;
- dulint finish_lsn;
- ulint len;
- ulint i;
+ ib_uint64_t start_lsn;
+ ib_uint64_t end_lsn;
+ ib_uint64_t finish_lsn1;
+ ib_uint64_t finish_lsn2;
+ ib_uint64_t finish_lsn;
+ ulint len;
+ ulint i;
- if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
+ if (archived_lsn == IB_ULONGLONG_MAX) {
/* Checkpoint was taken in the NOARCHIVELOG mode */
archived_lsn = checkpoint_lsn;
}
- finish_lsn1 = ut_dulint_add(ut_dulint_align_down(
- archived_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- log_group_get_capacity(group));
+ finish_lsn1 = ut_uint64_align_down(archived_lsn,
+ OS_FILE_LOG_BLOCK_SIZE)
+ + log_group_get_capacity(group);
- finish_lsn2 = ut_dulint_add(ut_dulint_align_up(
- recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- recv_sys->last_log_buf_size);
+ finish_lsn2 = ut_uint64_align_up(recovered_lsn,
+ OS_FILE_LOG_BLOCK_SIZE)
+ + recv_sys->last_log_buf_size;
- if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
+ if (limit_lsn != IB_ULONGLONG_MAX) {
/* We do not know how far we should erase log records: erase
as much as possible */
finish_lsn = finish_lsn1;
} else {
/* It is enough to erase the length of the log buffer */
- finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2);
+ finish_lsn = finish_lsn1 < finish_lsn2
+ ? finish_lsn1 : finish_lsn2;
}
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
@@ -289,36 +429,36 @@ recv_truncate_group(
*(log_sys->buf + i) = '\0';
}
- start_lsn = ut_dulint_align_down(recovered_lsn,
+ start_lsn = ut_uint64_align_down(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
- if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) {
+ if (start_lsn != recovered_lsn) {
/* Copy the last incomplete log block to the log buffer and
edit its data length: */
ut_memcpy(log_sys->buf, recv_sys->last_block,
OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_data_len(log_sys->buf, ut_dulint_minus(
- recovered_lsn, start_lsn));
+ log_block_set_data_len(log_sys->buf,
+ (ulint) (recovered_lsn - start_lsn));
}
- if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) {
+ if (start_lsn >= finish_lsn) {
return;
}
for (;;) {
- end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+ end_lsn = start_lsn + RECV_SCAN_SIZE;
- if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) {
+ if (end_lsn > finish_lsn) {
end_lsn = finish_lsn;
}
- len = ut_dulint_minus(end_lsn, start_lsn);
+ len = (ulint) (end_lsn - start_lsn);
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
- if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) {
+ if (end_lsn >= finish_lsn) {
return;
}
@@ -333,49 +473,49 @@ recv_truncate_group(
}
}
-/************************************************************
+/********************************************************//**
Copies the log segment between group->recovered_lsn and recovered_lsn from the
most up-to-date log group to group, so that it contains the latest log data. */
static
void
recv_copy_group(
/*============*/
- log_group_t* up_to_date_group, /* in: the most up-to-date log
+ log_group_t* up_to_date_group, /*!< in: the most up-to-date log
group */
- log_group_t* group, /* in: copy to this log
+ log_group_t* group, /*!< in: copy to this log
group */
- dulint recovered_lsn) /* in: recovery succeeded up
+ ib_uint64_t recovered_lsn) /*!< in: recovery succeeded up
to this lsn */
{
- dulint start_lsn;
- dulint end_lsn;
- ulint len;
+ ib_uint64_t start_lsn;
+ ib_uint64_t end_lsn;
+ ulint len;
- if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) {
+ if (group->scanned_lsn >= recovered_lsn) {
return;
}
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
- start_lsn = ut_dulint_align_down(group->scanned_lsn,
+ start_lsn = ut_uint64_align_down(group->scanned_lsn,
OS_FILE_LOG_BLOCK_SIZE);
for (;;) {
- end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+ end_lsn = start_lsn + RECV_SCAN_SIZE;
- if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
- end_lsn = ut_dulint_align_up(recovered_lsn,
+ if (end_lsn > recovered_lsn) {
+ end_lsn = ut_uint64_align_up(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
}
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
up_to_date_group, start_lsn, end_lsn);
- len = ut_dulint_minus(end_lsn, start_lsn);
+ len = (ulint) (end_lsn - start_lsn);
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
- if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) {
+ if (end_lsn >= recovered_lsn) {
return;
}
@@ -384,7 +524,7 @@ recv_copy_group(
}
}
-/************************************************************
+/********************************************************//**
Copies a log segment from the most up-to-date log group to the other log
groups, so that they all contain the latest log data. Also writes the info
about the latest checkpoint to the groups, and inits the fields in the group
@@ -393,14 +533,14 @@ static
void
recv_synchronize_groups(
/*====================*/
- log_group_t* up_to_date_group) /* in: the most up-to-date
+ log_group_t* up_to_date_group) /*!< in: the most up-to-date
log group */
{
log_group_t* group;
- dulint start_lsn;
- dulint end_lsn;
- dulint recovered_lsn;
- dulint limit_lsn;
+ ib_uint64_t start_lsn;
+ ib_uint64_t end_lsn;
+ ib_uint64_t recovered_lsn;
+ ib_uint64_t limit_lsn;
recovered_lsn = recv_sys->recovered_lsn;
limit_lsn = recv_sys->limit_lsn;
@@ -408,11 +548,11 @@ recv_synchronize_groups(
/* Read the last recovered log block to the recovery system buffer:
the block is always incomplete */
- start_lsn = ut_dulint_align_down(recovered_lsn,
+ start_lsn = ut_uint64_align_down(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
- end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
+ end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
- ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
+ ut_a(start_lsn != end_lsn);
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
up_to_date_group, start_lsn, end_lsn);
@@ -451,15 +591,16 @@ recv_synchronize_groups(
mutex_enter(&(log_sys->mutex));
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************************
-Checks the consistency of the checkpoint info */
+/***********************************************************************//**
+Checks the consistency of the checkpoint info
+@return TRUE if ok */
static
ibool
recv_check_cp_is_consistent(
/*========================*/
- /* out: TRUE if ok */
- byte* buf) /* in: buffer containing checkpoint info */
+ const byte* buf) /*!< in: buffer containing checkpoint info */
{
ulint fold;
@@ -481,26 +622,27 @@ recv_check_cp_is_consistent(
return(TRUE);
}
-/************************************************************
-Looks for the maximum consistent checkpoint from the log groups. */
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Looks for the maximum consistent checkpoint from the log groups.
+@return error code or DB_SUCCESS */
static
ulint
recv_find_max_checkpoint(
/*=====================*/
- /* out: error code or DB_SUCCESS */
- log_group_t** max_group, /* out: max group */
- ulint* max_field) /* out: LOG_CHECKPOINT_1 or
+ log_group_t** max_group, /*!< out: max group */
+ ulint* max_field) /*!< out: LOG_CHECKPOINT_1 or
LOG_CHECKPOINT_2 */
{
log_group_t* group;
- dulint max_no;
- dulint checkpoint_no;
+ ib_uint64_t max_no;
+ ib_uint64_t checkpoint_no;
ulint field;
byte* buf;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
- max_no = ut_dulint_zero;
+ max_no = 0;
*max_group = NULL;
*max_field = 0;
@@ -533,11 +675,11 @@ recv_find_max_checkpoint(
group->state = LOG_GROUP_OK;
- group->lsn = mach_read_from_8(
+ group->lsn = mach_read_ull(
buf + LOG_CHECKPOINT_LSN);
group->lsn_offset = mach_read_from_4(
buf + LOG_CHECKPOINT_OFFSET);
- checkpoint_no = mach_read_from_8(
+ checkpoint_no = mach_read_ull(
buf + LOG_CHECKPOINT_NO);
#ifdef UNIV_DEBUG
@@ -545,13 +687,12 @@ recv_find_max_checkpoint(
fprintf(stderr,
"InnoDB: Checkpoint number %lu"
" found in group %lu\n",
- (ulong) ut_dulint_get_low(
- checkpoint_no),
+ (ulong) checkpoint_no,
(ulong) group->id);
}
#endif /* UNIV_DEBUG */
- if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
+ if (checkpoint_no >= max_no) {
*max_group = group;
*max_field = field;
max_no = checkpoint_no;
@@ -575,46 +716,47 @@ not_consistent:
"InnoDB: to create the InnoDB data files,"
" but log file creation failed.\n"
"InnoDB: If that is the case, please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "error-creating-innodb.html\n");
+ "InnoDB: " REFMAN "error-creating-innodb.html\n");
return(DB_ERROR);
}
return(DB_SUCCESS);
}
-
-/***********************************************************************
-Reads the checkpoint info needed in hot backup. */
-
+#else /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Reads the checkpoint info needed in hot backup.
+@return TRUE if success */
+UNIV_INTERN
ibool
recv_read_cp_info_for_backup(
/*=========================*/
- /* out: TRUE if success */
- byte* hdr, /* in: buffer containing the log group header */
- dulint* lsn, /* out: checkpoint lsn */
- ulint* offset, /* out: checkpoint offset in the log group */
- ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
- database is running with < version 3.23.50 of InnoDB */
- dulint* cp_no, /* out: checkpoint number */
- dulint* first_header_lsn)
- /* out: lsn of of the start of the first log file */
+ const byte* hdr, /*!< in: buffer containing the log group
+ header */
+ ib_uint64_t* lsn, /*!< out: checkpoint lsn */
+ ulint* offset, /*!< out: checkpoint offset in the log group */
+ ulint* fsp_limit,/*!< out: fsp limit of space 0,
+ 1000000000 if the database is running
+ with < version 3.23.50 of InnoDB */
+ ib_uint64_t* cp_no, /*!< out: checkpoint number */
+ ib_uint64_t* first_header_lsn)
+ /*!< out: lsn of of the start of the
+ first log file */
{
- ulint max_cp = 0;
- dulint max_cp_no = ut_dulint_zero;
- byte* cp_buf;
+ ulint max_cp = 0;
+ ib_uint64_t max_cp_no = 0;
+ const byte* cp_buf;
cp_buf = hdr + LOG_CHECKPOINT_1;
if (recv_check_cp_is_consistent(cp_buf)) {
- max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
+ max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
max_cp = LOG_CHECKPOINT_1;
}
cp_buf = hdr + LOG_CHECKPOINT_2;
if (recv_check_cp_is_consistent(cp_buf)) {
- if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
- max_cp_no) > 0) {
+ if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
max_cp = LOG_CHECKPOINT_2;
}
}
@@ -625,7 +767,7 @@ recv_read_cp_info_for_backup(
cp_buf = hdr + max_cp;
- *lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
+ *lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
/* If the user is running a pre-3.23.50 version of InnoDB, its
@@ -645,24 +787,25 @@ recv_read_cp_info_for_backup(
/* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
- *cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
+ *cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
- *first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
+ *first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
return(TRUE);
}
-
-/**********************************************************
-Checks the 4-byte checksum to the trailer checksum field of a log block.
-We also accept a log block in the old format < InnoDB-3.23.52 where the
-checksum field contains the log block number. */
+#endif /* !UNIV_HOTBACKUP */
+
+/******************************************************//**
+Checks the 4-byte checksum to the trailer checksum field of a log
+block. We also accept a log block in the old format before
+InnoDB-3.23.52 where the checksum field contains the log block number.
+@return TRUE if ok, or if the log block may be in the format of InnoDB
+version predating 3.23.52 */
static
ibool
log_block_checksum_is_ok_or_old_format(
/*===================================*/
- /* out: TRUE if ok, or if the log block may be in the
- format of InnoDB version < 3.23.52 */
- byte* block) /* in: pointer to a log block */
+ const byte* block) /*!< in: pointer to a log block */
{
#ifdef UNIV_LOG_DEBUG
return(TRUE);
@@ -688,22 +831,23 @@ log_block_checksum_is_ok_or_old_format(
return(FALSE);
}
-/***********************************************************************
+#ifdef UNIV_HOTBACKUP
+/*******************************************************************//**
Scans the log segment and n_bytes_scanned is set to the length of valid
log scanned. */
-
+UNIV_INTERN
void
recv_scan_log_seg_for_backup(
/*=========================*/
- byte* buf, /* in: buffer containing log data */
- ulint buf_len, /* in: data length in that buffer */
- dulint* scanned_lsn, /* in/out: lsn of buffer start,
+ byte* buf, /*!< in: buffer containing log data */
+ ulint buf_len, /*!< in: data length in that buffer */
+ ib_uint64_t* scanned_lsn, /*!< in/out: lsn of buffer start,
we return scanned lsn */
ulint* scanned_checkpoint_no,
- /* in/out: 4 lowest bytes of the
+ /*!< in/out: 4 lowest bytes of the
highest scanned checkpoint number so
far */
- ulint* n_bytes_scanned)/* out: how much we were able to
+ ulint* n_bytes_scanned)/*!< out: how much we were able to
scan, smaller than buf_len if log
data ended here */
{
@@ -762,7 +906,7 @@ recv_scan_log_seg_for_backup(
*scanned_checkpoint_no
= log_block_get_checkpoint_no(log_block);
- *scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
+ *scanned_lsn += data_len;
*n_bytes_scanned += data_len;
@@ -777,32 +921,131 @@ recv_scan_log_seg_for_backup(
}
}
}
+#endif /* UNIV_HOTBACKUP */
-/***********************************************************************
+/*******************************************************************//**
Tries to parse a single log record body and also applies it to a page if
-specified. File ops are parsed, but not applied in this function. */
+specified. File ops are parsed, but not applied in this function.
+@return log record end, NULL if not a complete record */
static
byte*
recv_parse_or_apply_log_rec_body(
/*=============================*/
- /* out: log record end, NULL if not a complete
- record */
- byte type, /* in: type */
- byte* ptr, /* in: pointer to a buffer */
- byte* end_ptr,/* in: pointer to the buffer end */
- page_t* page, /* in: buffer page or NULL; if not NULL, then the log
- record is applied to the page, and the log record
- should be complete then */
- mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if
- page is non-NULL */
+ byte type, /*!< in: type */
+ byte* ptr, /*!< in: pointer to a buffer */
+ byte* end_ptr,/*!< in: pointer to the buffer end */
+ buf_block_t* block, /*!< in/out: buffer block or NULL; if
+ not NULL, then the log record is
+ applied to the page, and the log
+ record should be complete then */
+ mtr_t* mtr) /*!< in: mtr or NULL; should be non-NULL
+ if and only if block is non-NULL */
{
- dict_index_t* index = NULL;
+ dict_index_t* index = NULL;
+ page_t* page;
+ page_zip_des_t* page_zip;
+#ifdef UNIV_DEBUG
+ ulint page_type;
+#endif /* UNIV_DEBUG */
+
+ ut_ad(!block == !mtr);
+
+ if (block) {
+ page = block->frame;
+ page_zip = buf_block_get_page_zip(block);
+ ut_d(page_type = fil_page_get_type(page));
+ } else {
+ page = NULL;
+ page_zip = NULL;
+ ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
+ }
switch (type) {
+#ifdef UNIV_LOG_LSN_DEBUG
+ case MLOG_LSN:
+ /* The LSN is checked in recv_parse_log_rec(). */
+ break;
+#endif /* UNIV_LOG_LSN_DEBUG */
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
- ptr = mlog_parse_nbytes(type, ptr, end_ptr, page);
+#ifdef UNIV_DEBUG
+ if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
+ && end_ptr >= ptr + 2) {
+ /* It is OK to set FIL_PAGE_TYPE and certain
+ list node fields on an empty page. Any other
+ write is not OK. */
+
+ /* NOTE: There may be bogus assertion failures for
+ dict_hdr_create(), trx_rseg_header_create(),
+ trx_sys_create_doublewrite_buf(), and
+ trx_sysf_create().
+ These are only called during database creation. */
+ ulint offs = mach_read_from_2(ptr);
+
+ switch (type) {
+ default:
+ ut_error;
+ case MLOG_2BYTES:
+ /* Note that this can fail when the
+ redo log been written with something
+ older than InnoDB Plugin 1.0.4. */
+ ut_ad(offs == FIL_PAGE_TYPE
+ || offs == IBUF_TREE_SEG_HEADER
+ + IBUF_HEADER + FSEG_HDR_OFFSET
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER + FIL_ADDR_BYTE
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER + FIL_ADDR_BYTE
+ + FIL_ADDR_SIZE
+ || offs == PAGE_BTR_SEG_LEAF
+ + PAGE_HEADER + FSEG_HDR_OFFSET
+ || offs == PAGE_BTR_SEG_TOP
+ + PAGE_HEADER + FSEG_HDR_OFFSET
+ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ + PAGE_HEADER + FIL_ADDR_BYTE
+ + 0 /*FLST_PREV*/
+ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ + PAGE_HEADER + FIL_ADDR_BYTE
+ + FIL_ADDR_SIZE /*FLST_NEXT*/);
+ break;
+ case MLOG_4BYTES:
+ /* Note that this can fail when the
+ redo log been written with something
+ older than InnoDB Plugin 1.0.4. */
+ ut_ad(0
+ || offs == IBUF_TREE_SEG_HEADER
+ + IBUF_HEADER + FSEG_HDR_SPACE
+ || offs == IBUF_TREE_SEG_HEADER
+ + IBUF_HEADER + FSEG_HDR_PAGE_NO
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER/* flst_init */
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER + FIL_ADDR_PAGE
+ || offs == PAGE_BTR_IBUF_FREE_LIST
+ + PAGE_HEADER + FIL_ADDR_PAGE
+ + FIL_ADDR_SIZE
+ || offs == PAGE_BTR_SEG_LEAF
+ + PAGE_HEADER + FSEG_HDR_PAGE_NO
+ || offs == PAGE_BTR_SEG_LEAF
+ + PAGE_HEADER + FSEG_HDR_SPACE
+ || offs == PAGE_BTR_SEG_TOP
+ + PAGE_HEADER + FSEG_HDR_PAGE_NO
+ || offs == PAGE_BTR_SEG_TOP
+ + PAGE_HEADER + FSEG_HDR_SPACE
+ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ + PAGE_HEADER + FIL_ADDR_PAGE
+ + 0 /*FLST_PREV*/
+ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ + PAGE_HEADER + FIL_ADDR_PAGE
+ + FIL_ADDR_SIZE /*FLST_NEXT*/);
+ break;
+ }
+ }
+#endif /* UNIV_DEBUG */
+ ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
break;
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_REC_INSERT,
@@ -811,10 +1054,12 @@ recv_parse_or_apply_log_rec_body(
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
- index, page, mtr);
+ block, index, mtr);
}
break;
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_REC_CLUST_DELETE_MARK,
@@ -823,22 +1068,28 @@ recv_parse_or_apply_log_rec_body(
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = btr_cur_parse_del_mark_set_clust_rec(
- ptr, end_ptr, index, page);
+ ptr, end_ptr, page, page_zip, index);
}
break;
case MLOG_COMP_REC_SEC_DELETE_MARK:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
/* This log record type is obsolete, but we process it for
backward compatibility with MySQL 5.0.3 and 5.0.4. */
ut_a(!page || page_is_comp(page));
+ ut_a(!page_zip);
ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
if (!ptr) {
break;
}
/* Fall through */
case MLOG_REC_SEC_DELETE_MARK:
- ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page);
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
+ page, page_zip);
break;
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_REC_UPDATE_IN_PLACE,
@@ -846,12 +1097,14 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
- ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
- page, index);
+ ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
+ page_zip, index);
}
break;
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_LIST_END_DELETE
@@ -861,10 +1114,12 @@ recv_parse_or_apply_log_rec_body(
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
- index, page, mtr);
+ block, index, mtr);
}
break;
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_LIST_END_COPY_CREATED,
@@ -873,10 +1128,12 @@ recv_parse_or_apply_log_rec_body(
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = page_parse_copy_rec_list_to_created_page(
- ptr, end_ptr, index, page, mtr);
+ ptr, end_ptr, block, index, mtr);
}
break;
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_PAGE_REORGANIZE,
@@ -885,37 +1142,52 @@ recv_parse_or_apply_log_rec_body(
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
- page, mtr);
+ block, mtr);
}
break;
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
+ /* Allow anything in page_type when creating a page. */
+ ut_a(!page_zip);
ptr = page_parse_create(ptr, end_ptr,
type == MLOG_COMP_PAGE_CREATE,
- page, mtr);
+ block, mtr);
break;
case MLOG_UNDO_INSERT:
+ ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
break;
case MLOG_UNDO_ERASE_END:
+ ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
break;
case MLOG_UNDO_INIT:
+ /* Allow anything in page_type when creating a page. */
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
break;
case MLOG_UNDO_HDR_DISCARD:
+ ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
break;
case MLOG_UNDO_HDR_CREATE:
case MLOG_UNDO_HDR_REUSE:
+ ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
page, mtr);
break;
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ /* On a compressed page, MLOG_COMP_REC_MIN_MARK
+ will be followed by MLOG_COMP_REC_DELETE
+ or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
+ in the same mini-transaction. */
+ ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
ptr = btr_parse_set_min_rec_mark(
ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
page, mtr);
break;
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_REC_DELETE,
@@ -924,23 +1196,46 @@ recv_parse_or_apply_log_rec_body(
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
- index, page, mtr);
+ block, index, mtr);
}
break;
case MLOG_IBUF_BITMAP_INIT:
- ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr);
+ /* Allow anything in page_type when creating a page. */
+ ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
break;
case MLOG_INIT_FILE_PAGE:
- ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
+ /* Allow anything in page_type when creating a page. */
+ ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
break;
case MLOG_WRITE_STRING:
- ptr = mlog_parse_string(ptr, end_ptr, page);
+ ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
+ ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
break;
case MLOG_FILE_CREATE:
case MLOG_FILE_RENAME:
case MLOG_FILE_DELETE:
- ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
- ULINT_UNDEFINED);
+ case MLOG_FILE_CREATE2:
+ ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
+ break;
+ case MLOG_ZIP_WRITE_NODE_PTR:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
+ page, page_zip);
+ break;
+ case MLOG_ZIP_WRITE_BLOB_PTR:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
+ page, page_zip);
+ break;
+ case MLOG_ZIP_WRITE_HEADER:
+ ut_ad(!page || page_type == FIL_PAGE_INDEX);
+ ptr = page_zip_parse_write_header(ptr, end_ptr,
+ page, page_zip);
+ break;
+ case MLOG_ZIP_PAGE_COMPRESS:
+ /* Allow anything in page_type when creating a page. */
+ ptr = page_zip_parse_compress(ptr, end_ptr,
+ page, page_zip);
break;
default:
ptr = NULL;
@@ -957,44 +1252,43 @@ recv_parse_or_apply_log_rec_body(
return(ptr);
}
-/*************************************************************************
+/*********************************************************************//**
Calculates the fold value of a page file address: used in inserting or
-searching for a log record in the hash table. */
+searching for a log record in the hash table.
+@return folded value */
UNIV_INLINE
ulint
recv_fold(
/*======*/
- /* out: folded value */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space */
+ ulint page_no)/*!< in: page number */
{
return(ut_fold_ulint_pair(space, page_no));
}
-/*************************************************************************
+/*********************************************************************//**
Calculates the hash value of a page file address: used in inserting or
-searching for a log record in the hash table. */
+searching for a log record in the hash table.
+@return folded value */
UNIV_INLINE
ulint
recv_hash(
/*======*/
- /* out: folded value */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space */
+ ulint page_no)/*!< in: page number */
{
return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
}
-/*************************************************************************
-Gets the hashed file address struct for a page. */
+/*********************************************************************//**
+Gets the hashed file address struct for a page.
+@return file address struct, NULL if not found from the hash table */
static
recv_addr_t*
recv_get_fil_addr_struct(
/*=====================*/
- /* out: file address struct, NULL if not found from
- the hash table */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space id */
+ ulint page_no)/*!< in: page number */
{
recv_addr_t* recv_addr;
@@ -1013,19 +1307,19 @@ recv_get_fil_addr_struct(
return(recv_addr);
}
-/***********************************************************************
+/*******************************************************************//**
Adds a new log record to the hash table of log records. */
static
void
recv_add_to_hash_table(
/*===================*/
- byte type, /* in: log record type */
- ulint space, /* in: space id */
- ulint page_no, /* in: page number */
- byte* body, /* in: log record body */
- byte* rec_end, /* in: log record end */
- dulint start_lsn, /* in: start lsn of the mtr */
- dulint end_lsn) /* in: end lsn of the mtr */
+ byte type, /*!< in: log record type */
+ ulint space, /*!< in: space id */
+ ulint page_no, /*!< in: page number */
+ byte* body, /*!< in: log record body */
+ byte* rec_end, /*!< in: log record end */
+ ib_uint64_t start_lsn, /*!< in: start lsn of the mtr */
+ ib_uint64_t end_lsn) /*!< in: end lsn of the mtr */
{
recv_t* recv;
ulint len;
@@ -1088,7 +1382,7 @@ recv_add_to_hash_table(
sizeof(recv_data_t) + len);
*prev_field = recv_data;
- ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
+ memcpy(recv_data + 1, body, len);
prev_field = &(recv_data->next);
@@ -1098,14 +1392,14 @@ recv_add_to_hash_table(
*prev_field = NULL;
}
-/*************************************************************************
+/*********************************************************************//**
Copies the log record body from recv to buf. */
static
void
recv_data_copy_to_buf(
/*==================*/
- byte* buf, /* in: buffer of length at least recv->len */
- recv_t* recv) /* in: log record */
+ byte* buf, /*!< in: buffer of length at least recv->len */
+ recv_t* recv) /*!< in: log record */
{
recv_data_t* recv_data;
ulint part_len;
@@ -1130,34 +1424,34 @@ recv_data_copy_to_buf(
}
}
-/****************************************************************************
+/************************************************************************//**
Applies the hashed log records to the page, if the page lsn is less than the
lsn of a log record. This can be called when a buffer page has just been
read in, or also for a page already in the buffer pool. */
-
+UNIV_INTERN
void
-recv_recover_page(
-/*==============*/
- ibool recover_backup, /* in: TRUE if we are recovering a backup
- page: then we do not acquire any latches
- since the page was read in outside the
- buffer pool */
- ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
- a freshly read page */
- page_t* page, /* in: buffer page */
- ulint space, /* in: space id */
- ulint page_no) /* in: page number */
+recv_recover_page_func(
+/*===================*/
+#ifndef UNIV_HOTBACKUP
+ ibool just_read_in,
+ /*!< in: TRUE if the i/o handler calls
+ this for a freshly read page */
+#endif /* !UNIV_HOTBACKUP */
+ buf_block_t* block) /*!< in/out: buffer block */
{
- buf_block_t* block = NULL;
+ page_t* page;
+ page_zip_des_t* page_zip;
recv_addr_t* recv_addr;
recv_t* recv;
byte* buf;
- dulint start_lsn;
- dulint end_lsn;
- dulint page_lsn;
- dulint page_newest_lsn;
+ ib_uint64_t start_lsn;
+ ib_uint64_t end_lsn;
+ ib_uint64_t page_lsn;
+ ib_uint64_t page_newest_lsn;
ibool modification_to_page;
+#ifndef UNIV_HOTBACKUP
ibool success;
+#endif /* !UNIV_HOTBACKUP */
mtr_t mtr;
mutex_enter(&(recv_sys->mutex));
@@ -1171,7 +1465,8 @@ recv_recover_page(
return;
}
- recv_addr = recv_get_fil_addr_struct(space, page_no);
+ recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
+ buf_block_get_page_no(block));
if ((recv_addr == NULL)
|| (recv_addr->state == RECV_BEING_PROCESSED)
@@ -1183,7 +1478,8 @@ recv_recover_page(
}
#if 0
- fprintf(stderr, "Recovering space %lu, page %lu\n", space, page_no);
+ fprintf(stderr, "Recovering space %lu, page %lu\n",
+ buf_block_get_space(block), buf_block_get_page_no(block));
#endif
recv_addr->state = RECV_BEING_PROCESSED;
@@ -1193,52 +1489,48 @@ recv_recover_page(
mtr_start(&mtr);
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
- if (!recover_backup) {
- block = buf_block_align(page);
+ page = block->frame;
+ page_zip = buf_block_get_page_zip(block);
- if (just_read_in) {
- /* Move the ownership of the x-latch on the
- page to this OS thread, so that we can acquire
- a second x-latch on it. This is needed for the
- operations to the page to pass the debug
- checks. */
+#ifndef UNIV_HOTBACKUP
+ if (just_read_in) {
+ /* Move the ownership of the x-latch on the page to
+ this OS thread, so that we can acquire a second
+ x-latch on it. This is needed for the operations to
+ the page to pass the debug checks. */
- rw_lock_x_lock_move_ownership(&(block->lock));
- }
+ rw_lock_x_lock_move_ownership(&block->lock);
+ }
- success = buf_page_get_known_nowait(RW_X_LATCH, page,
- BUF_KEEP_OLD,
- __FILE__, __LINE__,
- &mtr);
- ut_a(success);
+ success = buf_page_get_known_nowait(RW_X_LATCH, block,
+ BUF_KEEP_OLD,
+ __FILE__, __LINE__,
+ &mtr);
+ ut_a(success);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- }
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+#endif /* !UNIV_HOTBACKUP */
/* Read the newest modification lsn from the page */
- page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
+ page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
- if (!recover_backup) {
- /* It may be that the page has been modified in the buffer
- pool: read the newest modification lsn there */
+#ifndef UNIV_HOTBACKUP
+ /* It may be that the page has been modified in the buffer
+ pool: read the newest modification lsn there */
- page_newest_lsn = buf_frame_get_newest_modification(page);
+ page_newest_lsn = buf_page_get_newest_modification(&block->page);
- if (!ut_dulint_is_zero(page_newest_lsn)) {
-
- page_lsn = page_newest_lsn;
- }
- } else {
- /* In recovery from a backup we do not really use the buffer
- pool */
+ if (page_newest_lsn) {
- page_newest_lsn = ut_dulint_zero;
+ page_lsn = page_newest_lsn;
}
+#else /* !UNIV_HOTBACKUP */
+ /* In recovery from a backup we do not really use the buffer pool */
+ page_newest_lsn = 0;
+#endif /* !UNIV_HOTBACKUP */
modification_to_page = FALSE;
- start_lsn = end_lsn = ut_dulint_zero;
+ start_lsn = end_lsn = 0;
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
@@ -1259,13 +1551,18 @@ recv_recover_page(
if (recv->type == MLOG_INIT_FILE_PAGE) {
page_lsn = page_newest_lsn;
- mach_write_to_8(page + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM,
- ut_dulint_zero);
- mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
+ memset(FIL_PAGE_LSN + page, 0, 8);
+ memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
+ + page, 0, 8);
+
+ if (page_zip) {
+ memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
+ }
}
- if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) {
+ if (recv->start_lsn >= page_lsn) {
+
+ ib_uint64_t end_lsn;
if (!modification_to_page) {
@@ -1287,14 +1584,18 @@ recv_recover_page(
recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len,
- page, &mtr);
- mach_write_to_8(page + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM,
- ut_dulint_add(recv->start_lsn,
- recv->len));
- mach_write_to_8(page + FIL_PAGE_LSN,
- ut_dulint_add(recv->start_lsn,
- recv->len));
+ block, &mtr);
+
+ end_lsn = recv->start_lsn + recv->len;
+ mach_write_ull(FIL_PAGE_LSN + page, end_lsn);
+ mach_write_ull(UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN_OLD_CHKSUM
+ + page, end_lsn);
+
+ if (page_zip) {
+ mach_write_ull(FIL_PAGE_LSN
+ + page_zip->data, end_lsn);
+ }
}
if (recv->len > RECV_DATA_BLOCK_SIZE) {
@@ -1304,9 +1605,19 @@ recv_recover_page(
recv = UT_LIST_GET_NEXT(rec_list, recv);
}
+#ifdef UNIV_ZIP_DEBUG
+ if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+
+ if (page_zip) {
+ ut_a(page_zip_validate_low(page_zip, page, FALSE));
+ }
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
mutex_enter(&(recv_sys->mutex));
- if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
+ if (recv_max_page_lsn < page_lsn) {
recv_max_page_lsn = page_lsn;
}
@@ -1317,11 +1628,13 @@ recv_recover_page(
mutex_exit(&(recv_sys->mutex));
- if (!recover_backup && modification_to_page) {
+#ifndef UNIV_HOTBACKUP
+ if (modification_to_page) {
ut_a(block);
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
}
+#endif /* !UNIV_HOTBACKUP */
/* Make sure that committing mtr does not change the modification
lsn values of page */
@@ -1331,16 +1644,18 @@ recv_recover_page(
mtr_commit(&mtr);
}
-/***********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
Reads in pages which have hashed log records, from an area around a given
-page number. */
+page number.
+@return number of pages found */
static
ulint
recv_read_in_area(
/*==============*/
- /* out: number of pages found */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
+ ulint space, /*!< in: space */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no)/*!< in: page number */
{
recv_addr_t* recv_addr;
ulint page_nos[RECV_READ_AHEAD_AREA];
@@ -1371,21 +1686,21 @@ recv_read_in_area(
}
}
- buf_read_recv_pages(FALSE, space, page_nos, n);
+ buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
/*
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
*/
return(n);
}
-/***********************************************************************
+/*******************************************************************//**
Empties the hash table of stored log records, applying them to appropriate
pages. */
-
+UNIV_INTERN
void
recv_apply_hashed_log_recs(
/*=======================*/
- ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
+ ibool allow_ibuf) /*!< in: if TRUE, also ibuf operations are
allowed during the application; if FALSE,
no ibuf operations are allowed, and after
the application all file pages are flushed to
@@ -1396,10 +1711,7 @@ recv_apply_hashed_log_recs(
mutex */
{
recv_addr_t* recv_addr;
- page_t* page;
ulint i;
- ulint space;
- ulint page_no;
ulint n_pages;
ibool has_printed = FALSE;
mtr_t mtr;
@@ -1429,8 +1741,9 @@ loop:
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
while (recv_addr) {
- space = recv_addr->space;
- page_no = recv_addr->page_no;
+ ulint space = recv_addr->space;
+ ulint zip_size = fil_space_get_zip_size(space);
+ ulint page_no = recv_addr->page_no;
if (recv_addr->state == RECV_NOT_PROCESSED) {
if (!has_printed) {
@@ -1446,21 +1759,21 @@ loop:
mutex_exit(&(recv_sys->mutex));
if (buf_page_peek(space, page_no)) {
+ buf_block_t* block;
mtr_start(&mtr);
- page = buf_page_get(space, page_no,
- RW_X_LATCH, &mtr);
+ block = buf_page_get(
+ space, zip_size, page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(
+ block, SYNC_NO_ORDER_CHECK);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(
- page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- recv_recover_page(FALSE, FALSE, page,
- space, page_no);
+ recv_recover_page(FALSE, block);
mtr_commit(&mtr);
} else {
- recv_read_in_area(space, page_no);
+ recv_read_in_area(space, zip_size,
+ page_no);
}
mutex_enter(&(recv_sys->mutex));
@@ -1500,11 +1813,12 @@ loop:
/* Flush all the file pages to disk and invalidate them in
the buffer pool */
+ ut_d(recv_no_log_write = TRUE);
mutex_exit(&(recv_sys->mutex));
mutex_exit(&(log_sys->mutex));
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
- ut_dulint_max);
+ IB_ULONGLONG_MAX);
ut_a(n_pages != ULINT_UNDEFINED);
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
@@ -1513,6 +1827,7 @@ loop:
mutex_enter(&(log_sys->mutex));
mutex_enter(&(recv_sys->mutex));
+ ut_d(recv_no_log_write = FALSE);
recv_no_ibuf_operations = FALSE;
}
@@ -1528,21 +1843,17 @@ loop:
mutex_exit(&(recv_sys->mutex));
}
-
-/* This page is allocated from the buffer pool and used in the function
-below */
-static page_t* recv_backup_application_page = NULL;
-
-/***********************************************************************
+#else /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
Applies log records in the hash table to a backup. */
-
+UNIV_INTERN
void
recv_apply_log_recs_for_backup(void)
/*================================*/
{
recv_addr_t* recv_addr;
ulint n_hash_cells;
- byte* page;
+ buf_block_t* block;
ulint actual_size;
ibool success;
ulint error;
@@ -1551,11 +1862,7 @@ recv_apply_log_recs_for_backup(void)
recv_sys->apply_log_recs = TRUE;
recv_sys->apply_batch_on = TRUE;
- if (recv_backup_application_page == NULL) {
- recv_backup_application_page = buf_frame_alloc();
- }
-
- page = recv_backup_application_page;
+ block = back_block1;
fputs("InnoDB: Starting an apply batch of log records"
" to the database...\n"
@@ -1569,7 +1876,10 @@ recv_apply_log_recs_for_backup(void)
while (recv_addr != NULL) {
- if (!fil_tablespace_exists_in_mem(recv_addr->space)) {
+ ulint zip_size
+ = fil_space_get_zip_size(recv_addr->space);
+
+ if (zip_size == ULINT_UNDEFINED) {
#if 0
fprintf(stderr,
"InnoDB: Warning: cannot apply"
@@ -1588,14 +1898,12 @@ recv_apply_log_recs_for_backup(void)
}
/* We simulate a page read made by the buffer pool, to
- make sure the recovery apparatus works ok, for
- example, the buf_frame_align() function. We must init
- the block corresponding to buf_pool->frame_zero
- (== page). */
+ make sure the recovery apparatus works ok. We must init
+ the block. */
buf_page_init_for_backup_restore(
recv_addr->space, recv_addr->page_no,
- buf_block_align(page));
+ zip_size, block);
/* Extend the tablespace's last file if the page_no
does not fall inside its bounds; we assume the last
@@ -1617,9 +1925,23 @@ recv_apply_log_recs_for_backup(void)
/* Read the page from the tablespace file using the
fil0fil.c routines */
- error = fil_io(OS_FILE_READ, TRUE, recv_addr->space,
- recv_addr->page_no, 0, UNIV_PAGE_SIZE,
- page, NULL);
+ if (zip_size) {
+ error = fil_io(OS_FILE_READ, TRUE,
+ recv_addr->space, zip_size,
+ recv_addr->page_no, 0, zip_size,
+ block->page.zip.data, NULL);
+ if (error == DB_SUCCESS
+ && !buf_zip_decompress(block, TRUE)) {
+ exit(1);
+ }
+ } else {
+ error = fil_io(OS_FILE_READ, TRUE,
+ recv_addr->space, 0,
+ recv_addr->page_no, 0,
+ UNIV_PAGE_SIZE,
+ block->frame, NULL);
+ }
+
if (error != DB_SUCCESS) {
fprintf(stderr,
"InnoDB: Fatal error: cannot read"
@@ -1632,19 +1954,28 @@ recv_apply_log_recs_for_backup(void)
}
/* Apply the log records to this page */
- recv_recover_page(TRUE, FALSE, page, recv_addr->space,
- recv_addr->page_no);
+ recv_recover_page(FALSE, block);
/* Write the page back to the tablespace file using the
fil0fil.c routines */
buf_flush_init_for_writing(
- page, mach_read_from_8(page + FIL_PAGE_LSN),
- recv_addr->space, recv_addr->page_no);
-
- error = fil_io(OS_FILE_WRITE, TRUE, recv_addr->space,
- recv_addr->page_no, 0, UNIV_PAGE_SIZE,
- page, NULL);
+ block->frame, buf_block_get_page_zip(block),
+ mach_read_ull(block->frame + FIL_PAGE_LSN));
+
+ if (zip_size) {
+ error = fil_io(OS_FILE_WRITE, TRUE,
+ recv_addr->space, zip_size,
+ recv_addr->page_no, 0,
+ zip_size,
+ block->page.zip.data, NULL);
+ } else {
+ error = fil_io(OS_FILE_WRITE, TRUE,
+ recv_addr->space, 0,
+ recv_addr->page_no, 0,
+ UNIV_PAGE_SIZE,
+ block->frame, NULL);
+ }
skip_this_recv_addr:
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
}
@@ -1659,21 +1990,21 @@ skip_this_recv_addr:
recv_sys_empty_hash();
}
+#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************
-Tries to parse a single log record and returns its length. */
+/*******************************************************************//**
+Tries to parse a single log record and returns its length.
+@return length of the record, or 0 if the record was not complete */
static
ulint
recv_parse_log_rec(
/*===============*/
- /* out: length of the record, or 0 if the record was
- not complete */
- byte* ptr, /* in: pointer to a buffer */
- byte* end_ptr,/* in: pointer to the buffer end */
- byte* type, /* out: type */
- ulint* space, /* out: space id */
- ulint* page_no,/* out: page number */
- byte** body) /* out: log record body start */
+ byte* ptr, /*!< in: pointer to a buffer */
+ byte* end_ptr,/*!< in: pointer to the buffer end */
+ byte* type, /*!< out: type */
+ ulint* space, /*!< out: space id */
+ ulint* page_no,/*!< out: page number */
+ byte** body) /*!< out: log record body start */
{
byte* new_ptr;
@@ -1708,6 +2039,17 @@ recv_parse_log_rec(
return(0);
}
+#ifdef UNIV_LOG_LSN_DEBUG
+ if (*type == MLOG_LSN) {
+ ib_uint64_t lsn = (ib_uint64_t) *space << 32 | *page_no;
+# ifdef UNIV_LOG_DEBUG
+ ut_a(lsn == log_sys->old_lsn);
+# else /* UNIV_LOG_DEBUG */
+ ut_a(lsn == recv_sys->recovered_lsn);
+# endif /* UNIV_LOG_DEBUG */
+ }
+#endif /* UNIV_LOG_LSN_DEBUG */
+
/* Check that page_no is sensible */
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
@@ -1731,40 +2073,42 @@ recv_parse_log_rec(
return(new_ptr - ptr);
}
-/***********************************************************
+/*******************************************************//**
Calculates the new value for lsn when more data is added to the log. */
static
-dulint
+ib_uint64_t
recv_calc_lsn_on_data_add(
/*======================*/
- dulint lsn, /* in: old lsn */
- ulint len) /* in: this many bytes of data is added, log block
- headers not included */
+ ib_uint64_t lsn, /*!< in: old lsn */
+ ib_uint64_t len) /*!< in: this many bytes of data is
+ added, log block headers not included */
{
ulint frag_len;
ulint lsn_len;
- frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
+ frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
- LOG_BLOCK_HDR_SIZE;
ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- LOG_BLOCK_TRL_SIZE);
- lsn_len = len + ((len + frag_len)
- / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- - LOG_BLOCK_TRL_SIZE))
+ lsn_len = (ulint) len;
+ lsn_len += (lsn_len + frag_len)
+ / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
+ - LOG_BLOCK_TRL_SIZE)
* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
- return(ut_dulint_add(lsn, lsn_len));
+ return(lsn + lsn_len);
}
-/***********************************************************
+#ifdef UNIV_LOG_DEBUG
+/*******************************************************//**
Checks that the parser recognizes incomplete initial segments of a log
record as incomplete. */
-
+static
void
recv_check_incomplete_log_recs(
/*===========================*/
- byte* ptr, /* in: pointer to a complete log record */
- ulint len) /* in: length of the log record */
+ byte* ptr, /*!< in: pointer to a complete log record */
+ ulint len) /*!< in: length of the log record */
{
ulint i;
byte type;
@@ -1777,27 +2121,27 @@ recv_check_incomplete_log_recs(
&page_no, &body));
}
}
+#endif /* UNIV_LOG_DEBUG */
-/***********************************************************
+/*******************************************************//**
Prints diagnostic info of corrupt log. */
static
void
recv_report_corrupt_log(
/*====================*/
- byte* ptr, /* in: pointer to corrupt log record */
- byte type, /* in: type of the record */
- ulint space, /* in: space id, this may also be garbage */
- ulint page_no)/* in: page number, this may also be garbage */
+ byte* ptr, /*!< in: pointer to corrupt log record */
+ byte type, /*!< in: type of the record */
+ ulint space, /*!< in: space id, this may also be garbage */
+ ulint page_no)/*!< in: page number, this may also be garbage */
{
fprintf(stderr,
"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
- "InnoDB: Log parsing proceeded successfully up to %lu %lu\n"
+ "InnoDB: Log parsing proceeded successfully up to %llu\n"
"InnoDB: Previous log record type %lu, is multi %lu\n"
"InnoDB: Recv offset %lu, prev %lu\n",
(ulong) type, (ulong) space, (ulong) page_no,
- (ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
- (ulong) ut_dulint_get_low(recv_sys->recovered_lsn),
+ recv_sys->recovered_lsn,
(ulong) recv_previous_parsed_rec_type,
(ulong) recv_previous_parsed_rec_is_multi,
(ulong) (ptr - recv_sys->buf),
@@ -1828,40 +2172,39 @@ recv_report_corrupt_log(
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
"InnoDB: on your InnoDB tables to check that they are ok!\n"
"InnoDB: If mysqld crashes after this recovery, look at\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
+ "InnoDB: " REFMAN "forcing-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
fflush(stderr);
}
-/***********************************************************
+/*******************************************************//**
Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages. */
+merging to file pages.
+@return currently always returns FALSE */
static
ibool
recv_parse_log_recs(
/*================*/
- /* out: currently always returns FALSE */
- ibool store_to_hash) /* in: TRUE if the records should be stored
+ ibool store_to_hash) /*!< in: TRUE if the records should be stored
to the hash table; this is set to FALSE if just
debug checking is needed */
{
- byte* ptr;
- byte* end_ptr;
- ulint single_rec;
- ulint len;
- ulint total_len;
- dulint new_recovered_lsn;
- dulint old_lsn;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
- ulint n_recs;
+ byte* ptr;
+ byte* end_ptr;
+ ulint single_rec;
+ ulint len;
+ ulint total_len;
+ ib_uint64_t new_recovered_lsn;
+ ib_uint64_t old_lsn;
+ byte type;
+ ulint space;
+ ulint page_no;
+ byte* body;
+ ulint n_recs;
ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn));
+ ut_ad(recv_sys->parse_start_lsn != 0);
loop:
ptr = recv_sys->buf + recv_sys->recovered_offset;
@@ -1897,8 +2240,7 @@ loop:
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
- if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
- > 0) {
+ if (new_recovered_lsn > recv_sys->scanned_lsn) {
/* The log record filled a log block, and we require
that also the next log block should have been scanned
in */
@@ -1926,9 +2268,19 @@ loop:
if (type == MLOG_DUMMY_RECORD) {
/* Do nothing */
- } else if (store_to_hash && (type == MLOG_FILE_CREATE
- || type == MLOG_FILE_RENAME
- || type == MLOG_FILE_DELETE)) {
+ } else if (!store_to_hash) {
+ /* In debug checking, update a replicate page
+ according to the log record, and check that it
+ becomes identical with the original page */
+#ifdef UNIV_LOG_DEBUG
+ recv_check_incomplete_log_recs(ptr, len);
+#endif/* UNIV_LOG_DEBUG */
+
+ } else if (type == MLOG_FILE_CREATE
+ || type == MLOG_FILE_CREATE2
+ || type == MLOG_FILE_RENAME
+ || type == MLOG_FILE_DELETE) {
+ ut_a(space);
#ifdef UNIV_HOTBACKUP
if (recv_replay_file_ops) {
@@ -1938,8 +2290,8 @@ loop:
point to the datadir we should use there */
if (NULL == fil_op_log_parse_or_replay(
- body, end_ptr, type, TRUE,
- space)) {
+ body, end_ptr, type,
+ space, page_no)) {
fprintf(stderr,
"InnoDB: Error: file op"
" log record of type %lu"
@@ -1949,20 +2301,22 @@ loop:
(ulint)type, space,
(char*)(body + 2));
- ut_a(0);
+ ut_error;
}
}
#endif
/* In normal mysqld crash recovery we do not try to
replay file operations */
- } else if (store_to_hash) {
+#ifdef UNIV_LOG_LSN_DEBUG
+ } else if (type == MLOG_LSN) {
+ /* Do not add these records to the hash table.
+ The page number and space id fields are misused
+ for something else. */
+#endif /* UNIV_LOG_LSN_DEBUG */
+ } else {
recv_add_to_hash_table(type, space, page_no, body,
ptr + len, old_lsn,
recv_sys->recovered_lsn);
- } else {
-#ifdef UNIV_LOG_DEBUG
- recv_check_incomplete_log_recs(ptr, len);
-#endif/* UNIV_LOG_DEBUG */
}
} else {
/* Check that all the records associated with the single mtr
@@ -1990,11 +2344,11 @@ loop:
= recv_sys->recovered_offset + total_len;
recv_previous_parsed_rec_is_multi = 1;
- if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
#ifdef UNIV_LOG_DEBUG
+ if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
recv_check_incomplete_log_recs(ptr, len);
-#endif /* UNIV_LOG_DEBUG */
}
+#endif /* UNIV_LOG_DEBUG */
#ifdef UNIV_DEBUG
if (log_debug_writes) {
@@ -2023,8 +2377,7 @@ loop:
new_recovered_lsn = recv_calc_lsn_on_data_add(
recv_sys->recovered_lsn, total_len);
- if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
- > 0) {
+ if (new_recovered_lsn > recv_sys->scanned_lsn) {
/* The log record filled a log block, and we require
that also the next log block should have been scanned
in */
@@ -2059,7 +2412,11 @@ loop:
break;
}
- if (store_to_hash) {
+ if (store_to_hash
+#ifdef UNIV_LOG_LSN_DEBUG
+ && type != MLOG_LSN
+#endif /* UNIV_LOG_LSN_DEBUG */
+ ) {
recv_add_to_hash_table(type, space, page_no,
body, ptr + len,
old_lsn,
@@ -2073,26 +2430,26 @@ loop:
goto loop;
}
-/***********************************************************
+/*******************************************************//**
Adds data from a new log block to the parsing buffer of recv_sys if
-recv_sys->parse_start_lsn is non-zero. */
+recv_sys->parse_start_lsn is non-zero.
+@return TRUE if more data added */
static
ibool
recv_sys_add_to_parsing_buf(
/*========================*/
- /* out: TRUE if more data added */
- byte* log_block, /* in: log block */
- dulint scanned_lsn) /* in: lsn of how far we were able to find
- data in this log block */
+ const byte* log_block, /*!< in: log block */
+ ib_uint64_t scanned_lsn) /*!< in: lsn of how far we were able
+ to find data in this log block */
{
ulint more_len;
ulint data_len;
ulint start_offset;
ulint end_offset;
- ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0);
+ ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
- if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) {
+ if (!recv_sys->parse_start_lsn) {
/* Cannot start parsing yet because no start point for
it found */
@@ -2101,20 +2458,18 @@ recv_sys_add_to_parsing_buf(
data_len = log_block_get_data_len(log_block);
- if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) {
+ if (recv_sys->parse_start_lsn >= scanned_lsn) {
return(FALSE);
- } else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) {
+ } else if (recv_sys->scanned_lsn >= scanned_lsn) {
return(FALSE);
- } else if (ut_dulint_cmp(recv_sys->parse_start_lsn,
- recv_sys->scanned_lsn) > 0) {
- more_len = ut_dulint_minus(scanned_lsn,
- recv_sys->parse_start_lsn);
+ } else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
+ more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
} else {
- more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn);
+ more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
}
if (more_len == 0) {
@@ -2150,7 +2505,7 @@ recv_sys_add_to_parsing_buf(
return(TRUE);
}
-/***********************************************************
+/*******************************************************//**
Moves the parsing buffer data left to the buffer start. */
static
void
@@ -2165,44 +2520,43 @@ recv_sys_justify_left_parsing_buf(void)
recv_sys->recovered_offset = 0;
}
-/***********************************************************
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-
+/*******************************************************//**
+Scans log from a buffer and stores new log data to the parsing buffer.
+Parses and hashes the log records if new data found. Unless
+UNIV_HOTBACKUP is defined, this function will apply log records
+automatically when the hash table becomes full.
+@return TRUE if limit_lsn has been reached, or not able to scan any
+more in this log group */
+UNIV_INTERN
ibool
recv_scan_log_recs(
/*===============*/
- /* out: TRUE if limit_lsn has been reached, or
- not able to scan any more in this log group */
- ibool apply_automatically,/* in: TRUE if we want this function to
- apply log records automatically when the
- hash table becomes full; in the hot backup tool
- the tool does the applying, not this
- function */
- ulint available_memory,/* in: we let the hash table of recs to grow
- to this size, at the maximum */
- ibool store_to_hash, /* in: TRUE if the records should be stored
- to the hash table; this is set to FALSE if just
- debug checking is needed */
- byte* buf, /* in: buffer containing a log segment or
- garbage */
- ulint len, /* in: buffer length */
- dulint start_lsn, /* in: buffer start lsn */
- dulint* contiguous_lsn, /* in/out: it is known that all log groups
- contain contiguous log data up to this lsn */
- dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
+ ulint available_memory,/*!< in: we let the hash table of recs
+ to grow to this size, at the maximum */
+ ibool store_to_hash, /*!< in: TRUE if the records should be
+ stored to the hash table; this is set
+ to FALSE if just debug checking is
+ needed */
+ const byte* buf, /*!< in: buffer containing a log
+ segment or garbage */
+ ulint len, /*!< in: buffer length */
+ ib_uint64_t start_lsn, /*!< in: buffer start lsn */
+ ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
+ groups contain contiguous log data up
+ to this lsn */
+ ib_uint64_t* group_scanned_lsn)/*!< out: scanning succeeded up to
+ this lsn */
{
- byte* log_block;
- ulint no;
- dulint scanned_lsn;
- ibool finished;
- ulint data_len;
- ibool more_data;
-
- ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+ const byte* log_block;
+ ulint no;
+ ib_uint64_t scanned_lsn;
+ ibool finished;
+ ulint data_len;
+ ibool more_data;
+
+ ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len > 0);
- ut_a(apply_automatically <= TRUE);
ut_a(store_to_hash <= TRUE);
finished = FALSE;
@@ -2211,8 +2565,7 @@ recv_scan_log_recs(
scanned_lsn = start_lsn;
more_data = FALSE;
- while (log_block < buf + len && !finished) {
-
+ do {
no = log_block_get_hdr_no(log_block);
/*
fprintf(stderr, "Log block header no %lu\n", no);
@@ -2228,13 +2581,11 @@ recv_scan_log_recs(
log_block)) {
fprintf(stderr,
"InnoDB: Log block no %lu at"
- " lsn %lu %lu has\n"
+ " lsn %llu has\n"
"InnoDB: ok header, but checksum field"
" contains %lu, should be %lu\n",
(ulong) no,
- (ulong) ut_dulint_get_high(
- scanned_lsn),
- (ulong) ut_dulint_get_low(scanned_lsn),
+ scanned_lsn,
(ulong) log_block_get_checksum(
log_block),
(ulong) log_block_calc_checksum(
@@ -2256,7 +2607,7 @@ recv_scan_log_recs(
we know that log data is contiguous up to scanned_lsn
in all non-corrupt log groups. */
- if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) {
+ if (scanned_lsn > *contiguous_lsn) {
*contiguous_lsn = scanned_lsn;
}
}
@@ -2264,8 +2615,7 @@ recv_scan_log_recs(
data_len = log_block_get_data_len(log_block);
if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
- && (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len),
- recv_sys->scanned_lsn) > 0)
+ && scanned_lsn + data_len > recv_sys->scanned_lsn
&& (recv_sys->scanned_checkpoint_no > 0)
&& (log_block_get_checkpoint_no(log_block)
< recv_sys->scanned_checkpoint_no)
@@ -2286,40 +2636,37 @@ recv_scan_log_recs(
break;
}
- if (ut_dulint_is_zero(recv_sys->parse_start_lsn)
+ if (!recv_sys->parse_start_lsn
&& (log_block_get_first_rec_group(log_block) > 0)) {
/* We found a point from which to start the parsing
of log records */
- recv_sys->parse_start_lsn
- = ut_dulint_add(scanned_lsn,
- log_block_get_first_rec_group(
- log_block));
+ recv_sys->parse_start_lsn = scanned_lsn
+ + log_block_get_first_rec_group(log_block);
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
}
- scanned_lsn = ut_dulint_add(scanned_lsn, data_len);
+ scanned_lsn += data_len;
- if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
+ if (scanned_lsn > recv_sys->scanned_lsn) {
/* We have found more entries. If this scan is
of startup type, we must initiate crash recovery
environment before parsing these log records. */
+#ifndef UNIV_HOTBACKUP
if (recv_log_scan_is_startup_type
&& !recv_needed_recovery) {
fprintf(stderr,
"InnoDB: Log scan progressed"
- " past the checkpoint lsn %lu %lu\n",
- (ulong) ut_dulint_get_high(
- recv_sys->scanned_lsn),
- (ulong) ut_dulint_get_low(
- recv_sys->scanned_lsn));
+ " past the checkpoint lsn %llu\n",
+ recv_sys->scanned_lsn);
recv_init_crash_recovery();
}
+#endif /* !UNIV_HOTBACKUP */
/* We were able to find more log data: add it to the
parsing buffer if parse_start_lsn is already
@@ -2348,10 +2695,11 @@ recv_scan_log_recs(
/* Log data for this group ends here */
finished = TRUE;
+ break;
} else {
log_block += OS_FILE_LOG_BLOCK_SIZE;
}
- }
+ } while (log_block < buf + len && !finished);
*group_scanned_lsn = scanned_lsn;
@@ -2363,9 +2711,8 @@ recv_scan_log_recs(
fprintf(stderr,
"InnoDB: Doing recovery: scanned up to"
- " log sequence number %lu %lu\n",
- (ulong) ut_dulint_get_high(*group_scanned_lsn),
- (ulong) ut_dulint_get_low(*group_scanned_lsn));
+ " log sequence number %llu\n",
+ *group_scanned_lsn);
}
}
@@ -2374,9 +2721,9 @@ recv_scan_log_recs(
recv_parse_log_recs(store_to_hash);
+#ifndef UNIV_HOTBACKUP
if (store_to_hash && mem_heap_get_size(recv_sys->heap)
- > available_memory
- && apply_automatically) {
+ > available_memory) {
/* Hash table of log records has grown too big:
empty it; FALSE means no ibuf operations
@@ -2386,6 +2733,7 @@ recv_scan_log_recs(
recv_apply_hashed_log_recs(FALSE);
}
+#endif /* !UNIV_HOTBACKUP */
if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
/* Move parsing buffer data to the buffer start */
@@ -2397,34 +2745,37 @@ recv_scan_log_recs(
return(finished);
}
-/***********************************************************
+#ifndef UNIV_HOTBACKUP
+/*******************************************************//**
Scans log from a buffer and stores new log data to the parsing buffer. Parses
and hashes the log records if new data found. */
static
void
recv_group_scan_log_recs(
/*=====================*/
- log_group_t* group, /* in: log group */
- dulint* contiguous_lsn, /* in/out: it is known that all log groups
- contain contiguous log data up to this lsn */
- dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
+ log_group_t* group, /*!< in: log group */
+ ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
+ groups contain contiguous log data up
+ to this lsn */
+ ib_uint64_t* group_scanned_lsn)/*!< out: scanning succeeded up to
+ this lsn */
{
- ibool finished;
- dulint start_lsn;
- dulint end_lsn;
+ ibool finished;
+ ib_uint64_t start_lsn;
+ ib_uint64_t end_lsn;
finished = FALSE;
start_lsn = *contiguous_lsn;
while (!finished) {
- end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
+ end_lsn = start_lsn + RECV_SCAN_SIZE;
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
group, start_lsn, end_lsn);
finished = recv_scan_log_recs(
- TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
+ (buf_pool->curr_size - recv_n_pool_free_frames)
* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
start_lsn, contiguous_lsn, group_scanned_lsn);
start_lsn = end_lsn;
@@ -2434,15 +2785,14 @@ recv_group_scan_log_recs(
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Scanned group %lu up to"
- " log sequence number %lu %lu\n",
+ " log sequence number %llu\n",
(ulong) group->id,
- (ulong) ut_dulint_get_high(*group_scanned_lsn),
- (ulong) ut_dulint_get_low(*group_scanned_lsn));
+ *group_scanned_lsn);
}
#endif /* UNIV_DEBUG */
}
-/***********************************************************
+/*******************************************************//**
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == FALSE. */
static
@@ -2483,42 +2833,57 @@ recv_init_crash_recovery(void)
}
}
-/************************************************************
+/********************************************************//**
Recovers from a checkpoint. When this function returns, the database is able
to start processing of new user transactions, but the function
recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it. */
-
+the recovery and free the resources used in it.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
-recv_recovery_from_checkpoint_start(
-/*================================*/
- /* out: error code or DB_SUCCESS */
- ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
- dulint limit_lsn, /* in: recover up to this lsn if possible */
- dulint min_flushed_lsn,/* in: min flushed lsn from data files */
- dulint max_flushed_lsn)/* in: max flushed lsn from data files */
+recv_recovery_from_checkpoint_start_func(
+/*=====================================*/
+#ifdef UNIV_LOG_ARCHIVE
+ ulint type, /*!< in: LOG_CHECKPOINT or
+ LOG_ARCHIVE */
+ ib_uint64_t limit_lsn, /*!< in: recover up to this lsn
+ if possible */
+#endif /* UNIV_LOG_ARCHIVE */
+ ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn from
+ data files */
+ ib_uint64_t max_flushed_lsn)/*!< in: max flushed lsn from
+ data files */
{
log_group_t* group;
log_group_t* max_cp_group;
log_group_t* up_to_date_group;
ulint max_cp_field;
- dulint checkpoint_lsn;
- dulint checkpoint_no;
- dulint old_scanned_lsn;
- dulint group_scanned_lsn;
- dulint contiguous_lsn;
- dulint archived_lsn;
- ulint capacity;
+ ib_uint64_t checkpoint_lsn;
+ ib_uint64_t checkpoint_no;
+ ib_uint64_t old_scanned_lsn;
+ ib_uint64_t group_scanned_lsn;
+ ib_uint64_t contiguous_lsn;
+ ib_uint64_t archived_lsn;
byte* buf;
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
ulint err;
- ut_ad((type != LOG_CHECKPOINT)
- || (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
+#ifdef UNIV_LOG_ARCHIVE
+ ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
+/** TRUE when recovering from a checkpoint */
+# define TYPE_CHECKPOINT (type == LOG_CHECKPOINT)
+/** Recover up to this log sequence number */
+# define LIMIT_LSN limit_lsn
+#else /* UNIV_LOG_ARCHIVE */
+/** TRUE when recovering from a checkpoint */
+# define TYPE_CHECKPOINT 1
+/** Recover up to this log sequence number */
+# define LIMIT_LSN IB_ULONGLONG_MAX
+#endif /* UNIV_LOG_ARCHIVE */
- if (type == LOG_CHECKPOINT) {
+ if (TYPE_CHECKPOINT) {
recv_sys_create();
- recv_sys_init(FALSE, buf_pool_get_curr_size());
+ recv_sys_init(buf_pool_get_curr_size());
}
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
@@ -2532,7 +2897,7 @@ recv_recovery_from_checkpoint_start(
recv_recovery_on = TRUE;
- recv_sys->limit_lsn = limit_lsn;
+ recv_sys->limit_lsn = LIMIT_LSN;
mutex_enter(&(log_sys->mutex));
@@ -2551,14 +2916,14 @@ recv_recovery_from_checkpoint_start(
buf = log_sys->checkpoint_buf;
- checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
- checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
- archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
+ checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
+ checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
+ archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
/* Read the first log file header to print a note if this is
a recovery from a restored InnoDB Hot Backup */
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
+ fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
0, 0, LOG_FILE_HDR_SIZE,
log_hdr_buf, max_cp_group);
@@ -2582,7 +2947,7 @@ recv_recovery_from_checkpoint_start(
' ', 4);
/* Write to the log file to wipe over the label */
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
- max_cp_group->space_id,
+ max_cp_group->space_id, 0,
0, 0, OS_FILE_LOG_BLOCK_SIZE,
log_hdr_buf, max_cp_group);
}
@@ -2599,7 +2964,7 @@ recv_recovery_from_checkpoint_start(
}
#endif /* UNIV_LOG_ARCHIVE */
- if (type == LOG_CHECKPOINT) {
+ if (TYPE_CHECKPOINT) {
/* Start reading the log groups from the checkpoint lsn up. The
variable contiguous_lsn contains an lsn up to which the log is
known to be contiguously written to all log groups. */
@@ -2612,20 +2977,22 @@ recv_recovery_from_checkpoint_start(
srv_start_lsn = checkpoint_lsn;
}
- contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
+ contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
OS_FILE_LOG_BLOCK_SIZE);
- if (type == LOG_ARCHIVE) {
+ if (TYPE_CHECKPOINT) {
+ up_to_date_group = max_cp_group;
+#ifdef UNIV_LOG_ARCHIVE
+ } else {
+ ulint capacity;
+
/* Try to recover the remaining part from logs: first from
the logs of the archived group */
group = recv_sys->archive_group;
capacity = log_group_get_capacity(group);
- if ((ut_dulint_cmp(recv_sys->scanned_lsn, ut_dulint_add(
- checkpoint_lsn, capacity)) > 0)
- || (ut_dulint_cmp(checkpoint_lsn, ut_dulint_add(
- recv_sys->scanned_lsn, capacity))
- > 0)) {
+ if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
+ || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
mutex_exit(&(log_sys->mutex));
@@ -2637,7 +3004,7 @@ recv_recovery_from_checkpoint_start(
recv_group_scan_log_recs(group, &contiguous_lsn,
&group_scanned_lsn);
- if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) {
+ if (recv_sys->scanned_lsn < checkpoint_lsn) {
mutex_exit(&(log_sys->mutex));
@@ -2650,20 +3017,21 @@ recv_recovery_from_checkpoint_start(
group->scanned_lsn = group_scanned_lsn;
up_to_date_group = group;
- } else {
- up_to_date_group = max_cp_group;
+#endif /* UNIV_LOG_ARCHIVE */
}
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
group = UT_LIST_GET_FIRST(log_sys->log_groups);
+#ifdef UNIV_LOG_ARCHIVE
if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
group = UT_LIST_GET_NEXT(log_groups, group);
}
+#endif /* UNIV_LOG_ARCHIVE */
/* Set the flag to publish that we are doing startup scan. */
- recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
+ recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
while (group) {
old_scanned_lsn = recv_sys->scanned_lsn;
@@ -2671,32 +3039,33 @@ recv_recovery_from_checkpoint_start(
&group_scanned_lsn);
group->scanned_lsn = group_scanned_lsn;
- if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
+ if (old_scanned_lsn < group_scanned_lsn) {
/* We found a more up-to-date group */
up_to_date_group = group;
}
+#ifdef UNIV_LOG_ARCHIVE
if ((type == LOG_ARCHIVE)
&& (group == recv_sys->archive_group)) {
group = UT_LIST_GET_NEXT(log_groups, group);
}
+#endif /* UNIV_LOG_ARCHIVE */
group = UT_LIST_GET_NEXT(log_groups, group);
}
/* Done with startup scan. Clear the flag. */
recv_log_scan_is_startup_type = FALSE;
- if (type == LOG_CHECKPOINT) {
+ if (TYPE_CHECKPOINT) {
/* NOTE: we always do a 'recovery' at startup, but only if
there is something wrong we will print a message to the
user about recovery: */
- if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
- || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
+ if (checkpoint_lsn != max_flushed_lsn
+ || checkpoint_lsn != min_flushed_lsn) {
- if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
- < 0) {
+ if (checkpoint_lsn < max_flushed_lsn) {
fprintf(stderr,
"InnoDB: #########################"
"#################################\n"
@@ -2710,26 +3079,15 @@ recv_recovery_from_checkpoint_start(
" ib_logfiles to start up"
" the database?\n"
"InnoDB: Log sequence number in"
- " ib_logfiles is %lu %lu, log\n"
+ " ib_logfiles is %llu, log\n"
"InnoDB: sequence numbers stamped"
" to ibdata file headers are between\n"
- "InnoDB: %lu %lu and %lu %lu.\n"
+ "InnoDB: %llu and %llu.\n"
"InnoDB: #########################"
"#################################\n",
- (ulong) ut_dulint_get_high(
- checkpoint_lsn),
- (ulong) ut_dulint_get_low(
- checkpoint_lsn),
- (ulong) ut_dulint_get_high(
- min_flushed_lsn),
- (ulong) ut_dulint_get_low(
- min_flushed_lsn),
- (ulong) ut_dulint_get_high(
- max_flushed_lsn),
- (ulong) ut_dulint_get_low(
- max_flushed_lsn));
-
-
+ checkpoint_lsn,
+ min_flushed_lsn,
+ max_flushed_lsn);
}
if (!recv_needed_recovery) {
@@ -2740,8 +3098,8 @@ recv_recovery_from_checkpoint_start(
" in the ib_logfiles!\n");
recv_init_crash_recovery();
}
-
}
+
if (!recv_needed_recovery) {
/* Init the doublewrite buffer memory structure */
trx_sys_doublewrite_init_or_restore_pages(FALSE);
@@ -2749,39 +3107,35 @@ recv_recovery_from_checkpoint_start(
}
/* We currently have only one log group */
- if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
+ if (group_scanned_lsn < checkpoint_lsn) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: We were only able to scan the log"
" up to\n"
- "InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
+ "InnoDB: %llu, but a checkpoint was at %llu.\n"
"InnoDB: It is possible that"
" the database is now corrupt!\n",
- (ulong) ut_dulint_get_high(group_scanned_lsn),
- (ulong) ut_dulint_get_low(group_scanned_lsn),
- (ulong) ut_dulint_get_high(checkpoint_lsn),
- (ulong) ut_dulint_get_low(checkpoint_lsn));
+ group_scanned_lsn,
+ checkpoint_lsn);
}
- if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
+ if (group_scanned_lsn < recv_max_page_lsn) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: We were only able to scan the log"
- " up to %lu %lu\n"
- "InnoDB: but a database page a had an lsn %lu %lu."
+ " up to %llu\n"
+ "InnoDB: but a database page a had an lsn %llu."
" It is possible that the\n"
"InnoDB: database is now corrupt!\n",
- (ulong) ut_dulint_get_high(group_scanned_lsn),
- (ulong) ut_dulint_get_low(group_scanned_lsn),
- (ulong) ut_dulint_get_high(recv_max_page_lsn),
- (ulong) ut_dulint_get_low(recv_max_page_lsn));
+ group_scanned_lsn,
+ recv_max_page_lsn);
}
- if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
+ if (recv_sys->recovered_lsn < checkpoint_lsn) {
mutex_exit(&(log_sys->mutex));
- if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) {
+ if (recv_sys->recovered_lsn >= LIMIT_LSN) {
return(DB_SUCCESS);
}
@@ -2795,7 +3149,7 @@ recv_recovery_from_checkpoint_start(
group; we also copy checkpoint info to groups */
log_sys->next_checkpoint_lsn = checkpoint_lsn;
- log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
+ log_sys->next_checkpoint_no = checkpoint_no + 1;
#ifdef UNIV_LOG_ARCHIVE
log_sys->archived_lsn = archived_lsn;
@@ -2804,9 +3158,7 @@ recv_recovery_from_checkpoint_start(
recv_synchronize_groups(up_to_date_group);
if (!recv_needed_recovery) {
- ut_a(ut_dulint_cmp(checkpoint_lsn,
- recv_sys->recovered_lsn) == 0);
-
+ ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
} else {
srv_start_lsn = recv_sys->recovered_lsn;
}
@@ -2815,18 +3167,17 @@ recv_recovery_from_checkpoint_start(
ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
- log_sys->buf_free = ut_dulint_get_low(log_sys->lsn)
- % OS_FILE_LOG_BLOCK_SIZE;
+ log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
log_sys->buf_next_to_write = log_sys->buf_free;
log_sys->written_to_some_lsn = log_sys->lsn;
log_sys->written_to_all_lsn = log_sys->lsn;
log_sys->last_checkpoint_lsn = checkpoint_lsn;
- log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
+ log_sys->next_checkpoint_no = checkpoint_no + 1;
#ifdef UNIV_LOG_ARCHIVE
- if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
+ if (archived_lsn == IB_ULONGLONG_MAX) {
log_sys->archiving_state = LOG_ARCH_OFF;
}
@@ -2847,11 +3198,14 @@ recv_recovery_from_checkpoint_start(
records in the hash table can be run in background. */
return(DB_SUCCESS);
+
+#undef TYPE_CHECKPOINT
+#undef LIMIT_LSN
}
-/************************************************************
+/********************************************************//**
Completes recovery from a checkpoint. */
-
+UNIV_INTERN
void
recv_recovery_from_checkpoint_finish(void)
/*======================================*/
@@ -2898,8 +3252,16 @@ recv_recovery_from_checkpoint_finish(void)
recv_recovery_on = FALSE;
#ifndef UNIV_LOG_DEBUG
- recv_sys_free();
+ recv_sys_debug_free();
#endif
+ /* Roll back any recovered data dictionary transactions, so
+ that the data dictionary tables will be free of any locks.
+ The data dictionary latch should guarantee that there is at
+ most one data dictionary transaction active at a time. */
+ trx_rollback_or_clean_recovered(FALSE);
+
+ /* Drop partially created indexes. */
+ row_merge_drop_temp_indexes();
#ifdef UNIV_SYNC_DEBUG
/* Wait for a while so that created threads have time to suspend
@@ -2913,32 +3275,35 @@ recv_recovery_from_checkpoint_finish(void)
/* Rollback the uncommitted transactions which have no user
session */
- os_thread_create(trx_rollback_or_clean_all_without_sess,
+ os_thread_create(trx_rollback_or_clean_all_recovered,
(void *)&i, NULL);
}
}
-/**********************************************************
+/******************************************************//**
Resets the logs. The contents of log files will be lost! */
-
+UNIV_INTERN
void
recv_reset_logs(
/*============*/
- dulint lsn, /* in: reset to this lsn rounded up to
- be divisible by OS_FILE_LOG_BLOCK_SIZE,
- after which we add LOG_BLOCK_HDR_SIZE */
+ ib_uint64_t lsn, /*!< in: reset to this lsn
+ rounded up to be divisible by
+ OS_FILE_LOG_BLOCK_SIZE, after
+ which we add
+ LOG_BLOCK_HDR_SIZE */
#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /* in: next archived log file number */
+ ulint arch_log_no, /*!< in: next archived log file number */
#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created)/* in: TRUE if resetting logs is done
- at the log creation; FALSE if it is done
- after archive recovery */
+ ibool new_logs_created)/*!< in: TRUE if resetting logs
+ is done at the log creation;
+ FALSE if it is done after
+ archive recovery */
{
log_group_t* group;
ut_ad(mutex_own(&(log_sys->mutex)));
- log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
group = UT_LIST_GET_FIRST(log_sys->log_groups);
@@ -2962,8 +3327,8 @@ recv_reset_logs(
log_sys->written_to_some_lsn = log_sys->lsn;
log_sys->written_to_all_lsn = log_sys->lsn;
- log_sys->next_checkpoint_no = ut_dulint_zero;
- log_sys->last_checkpoint_lsn = ut_dulint_zero;
+ log_sys->next_checkpoint_no = 0;
+ log_sys->last_checkpoint_lsn = 0;
#ifdef UNIV_LOG_ARCHIVE
log_sys->archived_lsn = log_sys->lsn;
@@ -2973,29 +3338,30 @@ recv_reset_logs(
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE);
+ log_sys->lsn += LOG_BLOCK_HDR_SIZE;
mutex_exit(&(log_sys->mutex));
/* Reset the checkpoint fields in logs */
- log_make_checkpoint_at(ut_dulint_max, TRUE);
- log_make_checkpoint_at(ut_dulint_max, TRUE);
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
mutex_enter(&(log_sys->mutex));
}
+#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_HOTBACKUP
-/**********************************************************
+/******************************************************//**
Creates new log files after a backup has been restored. */
-
+UNIV_INTERN
void
recv_reset_log_files_for_backup(
/*============================*/
- const char* log_dir, /* in: log file directory path */
- ulint n_log_files, /* in: number of log files */
- ulint log_file_size, /* in: log file size */
- dulint lsn) /* in: new start lsn, must be
+ const char* log_dir, /*!< in: log file directory path */
+ ulint n_log_files, /*!< in: number of log files */
+ ulint log_file_size, /*!< in: log file size */
+ ib_uint64_t lsn) /*!< in: new start lsn, must be
divisible by OS_FILE_LOG_BLOCK_SIZE */
{
os_file_t log_file;
@@ -3078,29 +3444,28 @@ recv_reset_log_files_for_backup(
#endif /* UNIV_HOTBACKUP */
#ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
-Reads from the archive of a log group and performs recovery. */
+/******************************************************//**
+Reads from the archive of a log group and performs recovery.
+@return TRUE if no more complete consistent archive files */
static
ibool
log_group_recover_from_archive_file(
/*================================*/
- /* out: TRUE if no more complete
- consistent archive files */
- log_group_t* group) /* in: log group */
+ log_group_t* group) /*!< in: log group */
{
- os_file_t file_handle;
- dulint start_lsn;
- dulint file_end_lsn;
- dulint dummy_lsn;
- dulint scanned_lsn;
- ulint len;
- ibool ret;
- byte* buf;
- ulint read_offset;
- ulint file_size;
- ulint file_size_high;
- int input_char;
- char name[10000];
+ os_file_t file_handle;
+ ib_uint64_t start_lsn;
+ ib_uint64_t file_end_lsn;
+ ib_uint64_t dummy_lsn;
+ ib_uint64_t scanned_lsn;
+ ulint len;
+ ibool ret;
+ byte* buf;
+ ulint read_offset;
+ ulint file_size;
+ ulint file_size_high;
+ int input_char;
+ char name[10000];
ut_a(0);
@@ -3188,12 +3553,12 @@ ask_again:
return(TRUE);
}
- start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
- file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
+ start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
+ file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
- if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
+ if (!recv_sys->scanned_lsn) {
- if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
+ if (recv_sys->parse_start_lsn < start_lsn) {
fprintf(stderr,
"InnoDB: Archive log file %s"
" starts from too big a lsn\n",
@@ -3204,7 +3569,7 @@ ask_again:
recv_sys->scanned_lsn = start_lsn;
}
- if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) {
+ if (recv_sys->scanned_lsn != start_lsn) {
fprintf(stderr,
"InnoDB: Archive log file %s starts from"
@@ -3232,9 +3597,8 @@ ask_again:
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Archive read starting at"
- " lsn %lu %lu, len %lu from file %s\n",
- (ulong) ut_dulint_get_high(start_lsn),
- (ulong) ut_dulint_get_low(start_lsn),
+ " lsn %llu, len %lu from file %s\n",
+ start_lsn,
(ulong) len, name);
}
#endif /* UNIV_DEBUG */
@@ -3244,11 +3608,11 @@ ask_again:
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
ret = recv_scan_log_recs(
- TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
+ (buf_pool->n_frames - recv_n_pool_free_frames)
* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
&dummy_lsn, &scanned_lsn);
- if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
+ if (scanned_lsn == file_end_lsn) {
return(FALSE);
}
@@ -3262,28 +3626,30 @@ ask_again:
}
read_offset += len;
- start_lsn = ut_dulint_add(start_lsn, len);
+ start_lsn += len;
- ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0);
+ ut_ad(start_lsn == scanned_lsn);
}
return(FALSE);
}
-/************************************************************
-Recovers from archived log files, and also from log files, if they exist. */
-
+/********************************************************//**
+Recovers from archived log files, and also from log files, if they exist.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
recv_recovery_from_archive_start(
/*=============================*/
- /* out: error code or DB_SUCCESS */
- dulint min_flushed_lsn,/* in: min flushed lsn field from the
- data files */
- dulint limit_lsn, /* in: recover up to this lsn if possible */
- ulint first_log_no) /* in: number of the first archived log file
- to use in the recovery; the file will be
- searched from INNOBASE_LOG_ARCH_DIR specified
- in server config file */
+ ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the
+ data files */
+ ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if
+ possible */
+ ulint first_log_no) /*!< in: number of the first archived
+ log file to use in the recovery; the
+ file will be searched from
+ INNOBASE_LOG_ARCH_DIR specified in
+ server config file */
{
log_group_t* group;
ulint group_id;
@@ -3294,7 +3660,7 @@ recv_recovery_from_archive_start(
ut_a(0);
recv_sys_create();
- recv_sys_init(FALSE, buf_pool_get_curr_size());
+ recv_sys_init(buf_pool_get_curr_size());
recv_recovery_on = TRUE;
recv_recovery_from_backup_on = TRUE;
@@ -3325,7 +3691,7 @@ recv_recovery_from_archive_start(
recv_sys->parse_start_lsn = min_flushed_lsn;
- recv_sys->scanned_lsn = ut_dulint_zero;
+ recv_sys->scanned_lsn = 0;
recv_sys->scanned_checkpoint_no = 0;
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
@@ -3351,9 +3717,9 @@ recv_recovery_from_archive_start(
group->archived_file_no++;
}
- if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) {
+ if (recv_sys->recovered_lsn < limit_lsn) {
- if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
+ if (!recv_sys->scanned_lsn) {
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
}
@@ -3362,8 +3728,8 @@ recv_recovery_from_archive_start(
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
limit_lsn,
- ut_dulint_max,
- ut_dulint_max);
+ IB_ULONGLONG_MAX,
+ IB_ULONGLONG_MAX);
if (err != DB_SUCCESS) {
return(err);
@@ -3372,7 +3738,7 @@ recv_recovery_from_archive_start(
mutex_enter(&(log_sys->mutex));
}
- if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
+ if (limit_lsn != IB_ULONGLONG_MAX) {
recv_apply_hashed_log_recs(FALSE);
@@ -3384,9 +3750,9 @@ recv_recovery_from_archive_start(
return(DB_SUCCESS);
}
-/************************************************************
+/********************************************************//**
Completes recovery from archive. */
-
+UNIV_INTERN
void
recv_recovery_from_archive_finish(void)
/*===================================*/
diff --git a/storage/innobase/mach/mach0data.c b/storage/innobase/mach/mach0data.c
index b92293fd037..e030ce9aadf 100644
--- a/storage/innobase/mach/mach0data.c
+++ b/storage/innobase/mach/mach0data.c
@@ -1,9 +1,26 @@
-/**********************************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file mach/mach0data.c
Utilities for converting data from the database file
to the machine format.
-(c) 1995 Innobase Oy
-
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
@@ -13,17 +30,16 @@ Created 11/28/1995 Heikki Tuuri
#include "mach0data.ic"
#endif
-/*************************************************************
-Reads a ulint in a compressed form if the log record fully contains it. */
-
+/*********************************************************//**
+Reads a ulint in a compressed form if the log record fully contains it.
+@return pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
byte*
mach_parse_compressed(
/*==================*/
- /* out: pointer to end of the stored field, NULL if
- not complete */
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- ulint* val) /* out: read value (< 2^32) */
+ byte* ptr, /*!< in: pointer to buffer from where to read */
+ byte* end_ptr,/*!< in: pointer to end of the buffer */
+ ulint* val) /*!< out: read value (< 2^32) */
{
ulint flag;
@@ -77,17 +93,16 @@ mach_parse_compressed(
}
}
-/*************************************************************
-Reads a dulint in a compressed form if the log record fully contains it. */
-
+/*********************************************************//**
+Reads a dulint in a compressed form if the log record fully contains it.
+@return pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
byte*
mach_dulint_parse_compressed(
/*=========================*/
- /* out: pointer to end of the stored field, NULL if
- not complete */
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- dulint* val) /* out: read value */
+ byte* ptr, /*!< in: pointer to buffer from where to read */
+ byte* end_ptr,/*!< in: pointer to end of the buffer */
+ dulint* val) /*!< out: read value */
{
ulint high;
ulint low;
diff --git a/storage/innobase/mem/mem0dbg.c b/storage/innobase/mem/mem0dbg.c
index 72452907c3f..01eda20ec45 100644
--- a/storage/innobase/mem/mem0dbg.c
+++ b/storage/innobase/mem/mem0dbg.c
@@ -1,28 +1,50 @@
-/************************************************************************
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file mem/mem0dbg.c
The memory management: the debug code. This is not a compilation module,
but is included in mem0mem.* !
-(c) 1994, 1995 Innobase Oy
-
Created 6/9/1994 Heikki Tuuri
*************************************************************************/
#ifdef UNIV_MEM_DEBUG
-mutex_t mem_hash_mutex; /* The mutex which protects in the
- debug version the hash table containing
- the list of live memory heaps, and
- also the global variables below. */
+# ifndef UNIV_HOTBACKUP
+/* The mutex which protects in the debug version the hash table
+containing the list of live memory heaps, and also the global
+variables below. */
+UNIV_INTERN mutex_t mem_hash_mutex;
+# endif /* !UNIV_HOTBACKUP */
/* The following variables contain information about the
extent of memory allocations. Only used in the debug version.
Protected by mem_hash_mutex above. */
-static ulint mem_n_created_heaps = 0;
-static ulint mem_n_allocations = 0;
-static ulint mem_total_allocated_memory = 0;
-ulint mem_current_allocated_memory = 0;
-static ulint mem_max_allocated_memory = 0;
-static ulint mem_last_print_info = 0;
+static ulint mem_n_created_heaps = 0;
+static ulint mem_n_allocations = 0;
+static ulint mem_total_allocated_memory = 0;
+UNIV_INTERN ulint mem_current_allocated_memory = 0;
+static ulint mem_max_allocated_memory = 0;
+# ifndef UNIV_HOTBACKUP
+static ulint mem_last_print_info = 0;
+static ibool mem_hash_initialized = FALSE;
+# endif /* !UNIV_HOTBACKUP */
/* Size of the hash table for memory management tracking */
#define MEM_HASH_SIZE 997
@@ -32,10 +54,10 @@ static ulint mem_last_print_info = 0;
typedef struct mem_hash_node_struct mem_hash_node_t;
struct mem_hash_node_struct {
UT_LIST_NODE_T(mem_hash_node_t)
- list; /* hash list node */
- mem_heap_t* heap; /* memory heap */
+ list; /*!< hash list node */
+ mem_heap_t* heap; /*!< memory heap */
const char* file_name;/* file where heap was created*/
- ulint line; /* file line of creation */
+ ulint line; /*!< file line of creation */
ulint nth_heap;/* this is the nth heap created */
UT_LIST_NODE_T(mem_hash_node_t)
all_list;/* list of all created heaps */
@@ -49,7 +71,6 @@ static mem_hash_cell_t mem_hash_table[MEM_HASH_SIZE];
/* The base node of the list of all allocated heaps */
static mem_hash_cell_t mem_all_list_base;
-static ibool mem_hash_initialized = FALSE;
UNIV_INLINE
@@ -68,37 +89,42 @@ mem_hash_get_nth_cell(ulint i)
}
/* Accessor functions for a memory field in the debug version */
-
+UNIV_INTERN
void
mem_field_header_set_len(byte* field, ulint len)
{
mach_write_to_4(field - 2 * sizeof(ulint), len);
}
+UNIV_INTERN
ulint
mem_field_header_get_len(byte* field)
{
return(mach_read_from_4(field - 2 * sizeof(ulint)));
}
+UNIV_INTERN
void
mem_field_header_set_check(byte* field, ulint check)
{
mach_write_to_4(field - sizeof(ulint), check);
}
+UNIV_INTERN
ulint
mem_field_header_get_check(byte* field)
{
return(mach_read_from_4(field - sizeof(ulint)));
}
+UNIV_INTERN
void
mem_field_trailer_set_check(byte* field, ulint check)
{
mach_write_to_4(field + mem_field_header_get_len(field), check);
}
+UNIV_INTERN
ulint
mem_field_trailer_get_check(byte* field)
{
@@ -107,13 +133,14 @@ mem_field_trailer_get_check(byte* field)
}
#endif /* UNIV_MEM_DEBUG */
-/**********************************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
Initializes the memory system. */
-
+UNIV_INTERN
void
mem_init(
/*=====*/
- ulint size) /* in: common pool size in bytes */
+ ulint size) /*!< in: common pool size in bytes */
{
#ifdef UNIV_MEM_DEBUG
@@ -133,18 +160,38 @@ mem_init(
mem_hash_initialized = TRUE;
#endif
+ if (UNIV_LIKELY(srv_use_sys_malloc)) {
+ /* When innodb_use_sys_malloc is set, the
+ mem_comm_pool won't be used for any allocations. We
+ create a dummy mem_comm_pool, because some statistics
+ and debugging code relies on it being initialized. */
+ size = 1;
+ }
+
mem_comm_pool = mem_pool_create(size);
}
+/******************************************************************//**
+Closes the memory system. */
+UNIV_INTERN
+void
+mem_close(void)
+/*===========*/
+{
+ mem_pool_free(mem_comm_pool);
+ mem_comm_pool = NULL;
+}
+#endif /* !UNIV_HOTBACKUP */
+
#ifdef UNIV_MEM_DEBUG
-/**********************************************************************
+/******************************************************************//**
Initializes an allocated memory field in the debug version. */
-
+UNIV_INTERN
void
mem_field_init(
/*===========*/
- byte* buf, /* in: memory field */
- ulint n) /* in: how many bytes the user requested */
+ byte* buf, /*!< in: memory field */
+ ulint n) /*!< in: how many bytes the user requested */
{
ulint rnd;
byte* usr_buf;
@@ -184,15 +231,15 @@ mem_field_init(
mem_init_buf(usr_buf, n);
}
-/**********************************************************************
+/******************************************************************//**
Erases an allocated memory field in the debug version. */
-
+UNIV_INTERN
void
mem_field_erase(
/*============*/
- byte* buf, /* in: memory field */
+ byte* buf, /*!< in: memory field */
ulint n __attribute__((unused)))
- /* in: how many bytes the user requested */
+ /*!< in: how many bytes the user requested */
{
byte* usr_buf;
@@ -211,15 +258,15 @@ mem_field_erase(
mem_erase_buf(buf, MEM_SPACE_NEEDED(n));
}
-/*******************************************************************
+/***************************************************************//**
Initializes a buffer to a random combination of hex BA and BE.
Used to initialize allocated memory. */
-
+UNIV_INTERN
void
mem_init_buf(
/*=========*/
- byte* buf, /* in: pointer to buffer */
- ulint n) /* in: length of buffer */
+ byte* buf, /*!< in: pointer to buffer */
+ ulint n) /*!< in: length of buffer */
{
byte* ptr;
@@ -237,15 +284,15 @@ mem_init_buf(
UNIV_MEM_INVALID(buf, n);
}
-/*******************************************************************
+/***************************************************************//**
Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory.*/
-
+Used to erase freed memory. */
+UNIV_INTERN
void
mem_erase_buf(
/*==========*/
- byte* buf, /* in: pointer to buffer */
- ulint n) /* in: length of buffer */
+ byte* buf, /*!< in: pointer to buffer */
+ ulint n) /*!< in: length of buffer */
{
byte* ptr;
@@ -262,16 +309,16 @@ mem_erase_buf(
UNIV_MEM_FREE(buf, n);
}
-/*******************************************************************
+/***************************************************************//**
Inserts a created memory heap to the hash table of current allocated
memory heaps. */
-
+UNIV_INTERN
void
mem_hash_insert(
/*============*/
- mem_heap_t* heap, /* in: the created heap */
- const char* file_name, /* in: file name of creation */
- ulint line) /* in: line where created */
+ mem_heap_t* heap, /*!< in: the created heap */
+ const char* file_name, /*!< in: file name of creation */
+ ulint line) /*!< in: line where created */
{
mem_hash_node_t* new_node;
ulint cell_no ;
@@ -300,7 +347,7 @@ mem_hash_insert(
mutex_exit(&mem_hash_mutex);
}
-/*******************************************************************
+/***************************************************************//**
Removes a memory heap (which is going to be freed by the caller)
from the list of live memory heaps. Returns the size of the heap
in terms of how much memory in bytes was allocated for the user of
@@ -308,13 +355,13 @@ the heap (not the total space occupied by the heap).
Also validates the heap.
NOTE: This function does not free the storage occupied by the
heap itself, only the node in the list of heaps. */
-
+UNIV_INTERN
void
mem_hash_remove(
/*============*/
- mem_heap_t* heap, /* in: the heap to be freed */
- const char* file_name, /* in: file name of freeing */
- ulint line) /* in: line where freed */
+ mem_heap_t* heap, /*!< in: the heap to be freed */
+ const char* file_name, /*!< in: file name of freeing */
+ ulint line) /*!< in: line where freed */
{
mem_hash_node_t* node;
ulint cell_no;
@@ -381,34 +428,34 @@ mem_hash_remove(
#endif /* UNIV_MEM_DEBUG */
#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/*******************************************************************
+/***************************************************************//**
Checks a memory heap for consistency and prints the contents if requested.
Outputs the sum of sizes of buffers given to the user (only in
the debug version), the physical size of the heap and the number of
blocks in the heap. In case of error returns 0 as sizes and number
of blocks. */
-
+UNIV_INTERN
void
mem_heap_validate_or_print(
/*=======================*/
- mem_heap_t* heap, /* in: memory heap */
+ mem_heap_t* heap, /*!< in: memory heap */
byte* top __attribute__((unused)),
- /* in: calculate and validate only until
+ /*!< in: calculate and validate only until
this top pointer in the heap is reached,
if this pointer is NULL, ignored */
- ibool print, /* in: if TRUE, prints the contents
+ ibool print, /*!< in: if TRUE, prints the contents
of the heap; works only in
the debug version */
- ibool* error, /* out: TRUE if error */
- ulint* us_size,/* out: allocated memory
+ ibool* error, /*!< out: TRUE if error */
+ ulint* us_size,/*!< out: allocated memory
(for the user) in the heap,
if a NULL pointer is passed as this
argument, it is ignored; in the
non-debug version this is always -1 */
- ulint* ph_size,/* out: physical size of the heap,
+ ulint* ph_size,/*!< out: physical size of the heap,
if a NULL pointer is passed as this
argument, it is ignored */
- ulint* n_blocks) /* out: number of blocks in the heap,
+ ulint* n_blocks) /*!< out: number of blocks in the heap,
if a NULL pointer is passed as this
argument, it is ignored */
{
@@ -486,6 +533,7 @@ mem_heap_validate_or_print(
if (print) {
ut_print_buf(stderr, user_field, len);
+ putc('\n', stderr);
}
total_len += len;
@@ -555,13 +603,13 @@ completed:
*error = FALSE;
}
-/******************************************************************
+/**************************************************************//**
Prints the contents of a memory heap. */
static
void
mem_heap_print(
/*===========*/
- mem_heap_t* heap) /* in: memory heap */
+ mem_heap_t* heap) /*!< in: memory heap */
{
ibool error;
ulint us_size;
@@ -580,14 +628,14 @@ mem_heap_print(
ut_a(!error);
}
-/******************************************************************
-Validates the contents of a memory heap. */
-
+/**************************************************************//**
+Validates the contents of a memory heap.
+@return TRUE if ok */
+UNIV_INTERN
ibool
mem_heap_validate(
/*==============*/
- /* out: TRUE if ok */
- mem_heap_t* heap) /* in: memory heap */
+ mem_heap_t* heap) /*!< in: memory heap */
{
ibool error;
ulint us_size;
@@ -609,14 +657,14 @@ mem_heap_validate(
#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
#ifdef UNIV_DEBUG
-/******************************************************************
-Checks that an object is a memory heap (or a block of it). */
-
+/**************************************************************//**
+Checks that an object is a memory heap (or a block of it).
+@return TRUE if ok */
+UNIV_INTERN
ibool
mem_heap_check(
/*===========*/
- /* out: TRUE if ok */
- mem_heap_t* heap) /* in: memory heap */
+ mem_heap_t* heap) /*!< in: memory heap */
{
ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N);
@@ -625,13 +673,13 @@ mem_heap_check(
#endif /* UNIV_DEBUG */
#ifdef UNIV_MEM_DEBUG
-/*********************************************************************
-TRUE if no memory is currently allocated. */
-
+/*****************************************************************//**
+TRUE if no memory is currently allocated.
+@return TRUE if no heaps exist */
+UNIV_INTERN
ibool
mem_all_freed(void)
/*===============*/
- /* out: TRUE if no heaps exist */
{
mem_hash_node_t* node;
ulint heap_count = 0;
@@ -653,8 +701,9 @@ mem_all_freed(void)
mutex_exit(&mem_hash_mutex);
if (heap_count == 0) {
-
+# ifndef UNIV_HOTBACKUP
ut_a(mem_pool_get_reserved(mem_comm_pool) == 0);
+# endif /* !UNIV_HOTBACKUP */
return(TRUE);
} else {
@@ -662,13 +711,13 @@ mem_all_freed(void)
}
}
-/*********************************************************************
-Validates the dynamic memory allocation system. */
-
+/*****************************************************************//**
+Validates the dynamic memory allocation system.
+@return TRUE if error */
+UNIV_INTERN
ibool
mem_validate_no_assert(void)
/*========================*/
- /* out: TRUE if error */
{
mem_hash_node_t* node;
ulint n_heaps = 0;
@@ -679,7 +728,9 @@ mem_validate_no_assert(void)
ulint n_blocks;
ulint i;
+# ifndef UNIV_HOTBACKUP
mem_pool_validate(mem_comm_pool);
+# endif /* !UNIV_HOTBACKUP */
mutex_enter(&mem_hash_mutex);
@@ -735,13 +786,13 @@ mem_validate_no_assert(void)
return(error);
}
-/****************************************************************
-Validates the dynamic memory */
-
+/************************************************************//**
+Validates the dynamic memory
+@return TRUE if ok */
+UNIV_INTERN
ibool
mem_validate(void)
/*==============*/
- /* out: TRUE if ok */
{
ut_a(!mem_validate_no_assert());
@@ -749,14 +800,14 @@ mem_validate(void)
}
#endif /* UNIV_MEM_DEBUG */
-/****************************************************************
+/************************************************************//**
Tries to find neigboring memory allocation blocks and dumps to stderr
the neighborhood of a given pointer. */
-
+UNIV_INTERN
void
mem_analyze_corruption(
/*===================*/
- void* ptr) /* in: pointer to place of possible corruption */
+ void* ptr) /*!< in: pointer to place of possible corruption */
{
byte* p;
ulint i;
@@ -857,14 +908,15 @@ mem_analyze_corruption(
}
}
-/*********************************************************************
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
Prints information of dynamic memory usage and currently allocated
memory heaps or buffers. Can only be used in the debug version. */
static
void
mem_print_info_low(
/*===============*/
- ibool print_all) /* in: if TRUE, all heaps are printed,
+ ibool print_all) /*!< in: if TRUE, all heaps are printed,
else only the heaps allocated after the
previous call of this function */
{
@@ -961,10 +1013,10 @@ next_heap:
#endif
}
-/*********************************************************************
+/*****************************************************************//**
Prints information of dynamic memory usage and currently allocated memory
heaps or buffers. Can only be used in the debug version. */
-
+UNIV_INTERN
void
mem_print_info(void)
/*================*/
@@ -972,13 +1024,14 @@ mem_print_info(void)
mem_print_info_low(TRUE);
}
-/*********************************************************************
+/*****************************************************************//**
Prints information of dynamic memory usage and currently allocated memory
heaps or buffers since the last ..._print_info or..._print_new_info. */
-
+UNIV_INTERN
void
mem_print_new_info(void)
/*====================*/
{
mem_print_info_low(FALSE);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/mem/mem0mem.c b/storage/innobase/mem/mem0mem.c
index f4fd178a39c..ccb2fd8a7b4 100644
--- a/storage/innobase/mem/mem0mem.c
+++ b/storage/innobase/mem/mem0mem.c
@@ -1,20 +1,34 @@
-/************************************************************************
-The memory management
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994, 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file mem/mem0mem.c
+The memory management
Created 6/9/1994 Heikki Tuuri
*************************************************************************/
-
#include "mem0mem.h"
#ifdef UNIV_NONINL
#include "mem0mem.ic"
#endif
-#include "mach0data.h"
#include "buf0buf.h"
-#include "btr0sea.h"
#include "srv0srv.h"
#include "mem0dbg.c"
#include <stdarg.h>
@@ -84,81 +98,43 @@ UT_LIST_BASE_NODE_T(mem_block_t) mem_block_list;
#endif
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
-
-void*
-mem_alloc_func_noninline(
-/*=====================*/
- /* out, own: free storage */
- ulint n, /* in: desired number of bytes */
- const char* file_name, /* in: file name where created */
- ulint line) /* in: line where created */
-{
- return(mem_alloc_func(n, file_name, line));
-}
-
-/**************************************************************************
-Duplicates a NUL-terminated string, allocated from a memory heap. */
-
+/**********************************************************************//**
+Duplicates a NUL-terminated string, allocated from a memory heap.
+@return own: a copy of the string */
+UNIV_INTERN
char*
mem_heap_strdup(
/*============*/
- /* out, own: a copy of the string */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* str) /* in: string to be copied */
+ mem_heap_t* heap, /*!< in: memory heap where string is allocated */
+ const char* str) /*!< in: string to be copied */
{
return(mem_heap_dup(heap, str, strlen(str) + 1));
}
-/**************************************************************************
-Duplicate a block of data, allocated from a memory heap. */
-
+/**********************************************************************//**
+Duplicate a block of data, allocated from a memory heap.
+@return own: a copy of the data */
+UNIV_INTERN
void*
mem_heap_dup(
/*=========*/
- /* out, own: a copy of the data */
- mem_heap_t* heap, /* in: memory heap where copy is allocated */
- const void* data, /* in: data to be copied */
- ulint len) /* in: length of data, in bytes */
+ mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
+ const void* data, /*!< in: data to be copied */
+ ulint len) /*!< in: length of data, in bytes */
{
return(memcpy(mem_heap_alloc(heap, len), data, len));
}
-/**************************************************************************
-Concatenate two memory blocks and return the result, using a memory heap. */
-
-void*
-mem_heap_cat(
-/*=========*/
- /* out, own: the result */
- mem_heap_t* heap, /* in: memory heap where result is allocated */
- const void* b1, /* in: block 1 */
- ulint len1, /* in: length of b1, in bytes */
- const void* b2, /* in: block 2 */
- ulint len2) /* in: length of b2, in bytes */
-{
- void* res = mem_heap_alloc(heap, len1 + len2);
-
- memcpy(res, b1, len1);
- memcpy((char*)res + len1, b2, len2);
-
- return(res);
-}
-
-/**************************************************************************
-Concatenate two strings and return the result, using a memory heap. */
-
+/**********************************************************************//**
+Concatenate two strings and return the result, using a memory heap.
+@return own: the result */
+UNIV_INTERN
char*
mem_heap_strcat(
/*============*/
- /* out, own: the result */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* s1, /* in: string 1 */
- const char* s2) /* in: string 2 */
+ mem_heap_t* heap, /*!< in: memory heap where string is allocated */
+ const char* s1, /*!< in: string 1 */
+ const char* s2) /*!< in: string 2 */
{
char* s;
ulint s1_len = strlen(s1);
@@ -175,18 +151,17 @@ mem_heap_strcat(
}
-/********************************************************************
-Helper function for mem_heap_printf. */
+/****************************************************************//**
+Helper function for mem_heap_printf.
+@return length of formatted string, including terminating NUL */
static
ulint
mem_heap_printf_low(
/*================*/
- /* out: length of formatted string,
- including terminating NUL */
- char* buf, /* in/out: buffer to store formatted string
+ char* buf, /*!< in/out: buffer to store formatted string
in, or NULL to just calculate length */
- const char* format, /* in: format string */
- va_list ap) /* in: arguments */
+ const char* format, /*!< in: format string */
+ va_list ap) /*!< in: arguments */
{
ulint len = 0;
@@ -285,18 +260,18 @@ mem_heap_printf_low(
return(len);
}
-/********************************************************************
+/****************************************************************//**
A simple (s)printf replacement that dynamically allocates the space for the
formatted string from the given heap. This supports a very limited set of
the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type). */
-
+required for the 'u' type).
+@return heap-allocated formatted string */
+UNIV_INTERN
char*
mem_heap_printf(
/*============*/
- /* out: heap-allocated formatted string */
- mem_heap_t* heap, /* in: memory heap */
- const char* format, /* in: format string */
+ mem_heap_t* heap, /*!< in: memory heap */
+ const char* format, /*!< in: format string */
...)
{
va_list ap;
@@ -318,26 +293,25 @@ mem_heap_printf(
return(str);
}
-/*******************************************************************
-Creates a memory heap block where data can be allocated. */
-
+/***************************************************************//**
+Creates a memory heap block where data can be allocated.
+@return own: memory heap block, NULL if did not succeed (only possible
+for MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
mem_block_t*
mem_heap_create_block(
/*==================*/
- /* out, own: memory heap block, NULL if
- did not succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps) */
- mem_heap_t* heap, /* in: memory heap or NULL if first block
+ mem_heap_t* heap, /*!< in: memory heap or NULL if first block
should be created */
- ulint n, /* in: number of bytes needed for user data, or
- if init_block is not NULL, its size in bytes */
- void* init_block, /* in: init block in fast create,
- type must be MEM_HEAP_DYNAMIC */
- ulint type, /* in: type of heap: MEM_HEAP_DYNAMIC or
+ ulint n, /*!< in: number of bytes needed for user data */
+ ulint type, /*!< in: type of heap: MEM_HEAP_DYNAMIC or
MEM_HEAP_BUFFER */
- const char* file_name,/* in: file name where created */
- ulint line) /* in: line where created */
+ const char* file_name,/*!< in: file name where created */
+ ulint line) /*!< in: line where created */
{
+#ifndef UNIV_HOTBACKUP
+ buf_block_t* buf_block = NULL;
+#endif /* !UNIV_HOTBACKUP */
mem_block_t* block;
ulint len;
@@ -349,48 +323,45 @@ mem_heap_create_block(
}
/* In dynamic allocation, calculate the size: block header + data. */
+ len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
- if (init_block != NULL) {
- ut_ad(type == MEM_HEAP_DYNAMIC);
- ut_ad(n > MEM_BLOCK_START_SIZE + MEM_BLOCK_HEADER_SIZE);
- len = n;
- block = init_block;
+#ifndef UNIV_HOTBACKUP
+ if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
- } else if (type == MEM_HEAP_DYNAMIC) {
+ ut_ad(type == MEM_HEAP_DYNAMIC || n <= MEM_MAX_ALLOC_IN_BUF);
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
- block = mem_area_alloc(len, mem_comm_pool);
+ block = mem_area_alloc(&len, mem_comm_pool);
} else {
- ut_ad(n <= MEM_MAX_ALLOC_IN_BUF);
+ len = UNIV_PAGE_SIZE;
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
+ if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
+ /* We cannot allocate the block from the
+ buffer pool, but must get the free block from
+ the heap header free block field */
- if (len < UNIV_PAGE_SIZE / 2) {
+ buf_block = heap->free_block;
+ heap->free_block = NULL;
- block = mem_area_alloc(len, mem_comm_pool);
- } else {
- len = UNIV_PAGE_SIZE;
-
- if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
- /* We cannot allocate the block from the
- buffer pool, but must get the free block from
- the heap header free block field */
+ if (UNIV_UNLIKELY(!buf_block)) {
- block = (mem_block_t*)heap->free_block;
- heap->free_block = NULL;
- } else {
- block = (mem_block_t*)buf_frame_alloc();
+ return(NULL);
}
+ } else {
+ buf_block = buf_block_alloc(0);
}
- }
- if (block == NULL) {
- /* Only MEM_HEAP_BTR_SEARCH allocation should ever fail. */
- ut_a(type & MEM_HEAP_BTR_SEARCH);
-
- return(NULL);
+ block = (mem_block_t*) buf_block->frame;
}
+ ut_ad(block);
+ block->buf_block = buf_block;
+ block->free_block = NULL;
+#else /* !UNIV_HOTBACKUP */
+ len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
+ block = ut_malloc(len);
+ ut_ad(block);
+#endif /* !UNIV_HOTBACKUP */
+
block->magic_n = MEM_BLOCK_MAGIC_N;
ut_strlcpy_rev(block->file_name, file_name, sizeof(block->file_name));
block->line = line;
@@ -412,25 +383,21 @@ mem_heap_create_block(
mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
- block->free_block = NULL;
- block->init_block = (init_block != NULL);
-
ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
return(block);
}
-/*******************************************************************
-Adds a new block to a memory heap. */
-
+/***************************************************************//**
+Adds a new block to a memory heap.
+@return created block, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
mem_block_t*
mem_heap_add_block(
/*===============*/
- /* out: created block, NULL if did not
- succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps)*/
- mem_heap_t* heap, /* in: memory heap */
- ulint n) /* in: number of bytes user needs */
+ mem_heap_t* heap, /*!< in: memory heap */
+ ulint n) /*!< in: number of bytes user needs */
{
mem_block_t* block;
mem_block_t* new_block;
@@ -462,7 +429,7 @@ mem_heap_add_block(
new_size = n;
}
- new_block = mem_heap_create_block(heap, new_size, NULL, heap->type,
+ new_block = mem_heap_create_block(heap, new_size, heap->type,
heap->file_name, heap->line);
if (new_block == NULL) {
@@ -476,18 +443,20 @@ mem_heap_add_block(
return(new_block);
}
-/**********************************************************************
+/******************************************************************//**
Frees a block from a memory heap. */
-
+UNIV_INTERN
void
mem_heap_block_free(
/*================*/
- mem_heap_t* heap, /* in: heap */
- mem_block_t* block) /* in: block to free */
+ mem_heap_t* heap, /*!< in: heap */
+ mem_block_t* block) /*!< in: block to free */
{
- ulint type;
- ulint len;
- ibool init_block;
+ ulint type;
+ ulint len;
+#ifndef UNIV_HOTBACKUP
+ buf_block_t* buf_block = block->buf_block;
+#endif /* !UNIV_HOTBACKUP */
if (block->magic_n != MEM_BLOCK_MAGIC_N) {
mem_analyze_corruption(block);
@@ -504,56 +473,65 @@ mem_heap_block_free(
#endif
type = heap->type;
len = block->len;
- init_block = block->init_block;
block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
+#ifndef UNIV_HOTBACKUP
+ if (!srv_use_sys_malloc) {
#ifdef UNIV_MEM_DEBUG
- /* In the debug version we set the memory to a random combination
- of hex 0xDE and 0xAD. */
+ /* In the debug version we set the memory to a random
+ combination of hex 0xDE and 0xAD. */
- mem_erase_buf((byte*)block, len);
+ mem_erase_buf((byte*)block, len);
#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_AND_FREE(block, len);
+ UNIV_MEM_ASSERT_AND_FREE(block, len);
#endif /* UNIV_MEM_DEBUG */
- if (init_block) {
- /* Do not have to free: do nothing */
-
- } else if (type == MEM_HEAP_DYNAMIC) {
+ }
+ if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
+ ut_ad(!buf_block);
mem_area_free(block, mem_comm_pool);
} else {
ut_ad(type & MEM_HEAP_BUFFER);
- if (len >= UNIV_PAGE_SIZE / 2) {
- buf_frame_free((byte*)block);
- } else {
- mem_area_free(block, mem_comm_pool);
- }
+ buf_block_free(buf_block);
}
+#else /* !UNIV_HOTBACKUP */
+#ifdef UNIV_MEM_DEBUG
+ /* In the debug version we set the memory to a random
+ combination of hex 0xDE and 0xAD. */
+
+ mem_erase_buf((byte*)block, len);
+#else /* UNIV_MEM_DEBUG */
+ UNIV_MEM_ASSERT_AND_FREE(block, len);
+#endif /* UNIV_MEM_DEBUG */
+ ut_free(block);
+#endif /* !UNIV_HOTBACKUP */
}
-/**********************************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
Frees the free_block field from a memory heap. */
-
+UNIV_INTERN
void
mem_heap_free_block_free(
/*=====================*/
- mem_heap_t* heap) /* in: heap */
+ mem_heap_t* heap) /*!< in: heap */
{
- if (heap->free_block) {
+ if (UNIV_LIKELY_NULL(heap->free_block)) {
- buf_frame_free(heap->free_block);
+ buf_block_free(heap->free_block);
heap->free_block = NULL;
}
}
+#endif /* !UNIV_HOTBACKUP */
#ifdef MEM_PERIODIC_CHECK
-/**********************************************************************
+/******************************************************************//**
Goes through the list of all allocated mem blocks, checks their magic
numbers, and reports possible corruption. */
-
+UNIV_INTERN
void
mem_validate_all_blocks(void)
/*=========================*/
diff --git a/storage/innobase/mem/mem0pool.c b/storage/innobase/mem/mem0pool.c
index 27da86a0309..c4f8af607e0 100644
--- a/storage/innobase/mem/mem0pool.c
+++ b/storage/innobase/mem/mem0pool.c
@@ -1,7 +1,24 @@
-/************************************************************************
-The lowest-level memory management
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file mem/mem0pool.c
+The lowest-level memory management
Created 5/12/1997 Heikki Tuuri
*************************************************************************/
@@ -11,6 +28,7 @@ Created 5/12/1997 Heikki Tuuri
#include "mem0pool.ic"
#endif
+#include "srv0srv.h"
#include "sync0sync.h"
#include "ut0mem.h"
#include "ut0lst.h"
@@ -72,39 +90,39 @@ and for the adaptive index. Thus, for each individual transaction, its locks
can occupy at most about the size of the buffer frame of memory in the common
pool, and after that its locks will grow into the buffer pool. */
-/* Mask used to extract the free bit from area->size */
+/** Mask used to extract the free bit from area->size */
#define MEM_AREA_FREE 1
-/* The smallest memory area total size */
+/** The smallest memory area total size */
#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
-/* Data structure for a memory pool. The space is allocated using the buddy
+/** Data structure for a memory pool. The space is allocated using the buddy
algorithm, where free list i contains areas of size 2 to power i. */
struct mem_pool_struct{
- byte* buf; /* memory pool */
- ulint size; /* memory common pool size */
- ulint reserved; /* amount of currently allocated
+ byte* buf; /*!< memory pool */
+ ulint size; /*!< memory common pool size */
+ ulint reserved; /*!< amount of currently allocated
memory */
- mutex_t mutex; /* mutex protecting this struct */
+ mutex_t mutex; /*!< mutex protecting this struct */
UT_LIST_BASE_NODE_T(mem_area_t)
- free_list[64]; /* lists of free memory areas: an
+ free_list[64]; /*!< lists of free memory areas: an
area is put to the list whose number
is the 2-logarithm of the area size */
};
-/* The common memory pool */
-mem_pool_t* mem_comm_pool = NULL;
+/** The common memory pool */
+UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
/* We use this counter to check that the mem pool mutex does not leak;
this is to track a strange assertion failure reported at
mysql@lists.mysql.com */
-ulint mem_n_threads_inside = 0;
+UNIV_INTERN ulint mem_n_threads_inside = 0;
-/************************************************************************
+/********************************************************************//**
Reserves the mem pool mutex. */
-
+UNIV_INTERN
void
mem_pool_mutex_enter(void)
/*======================*/
@@ -112,9 +130,9 @@ mem_pool_mutex_enter(void)
mutex_enter(&(mem_comm_pool->mutex));
}
-/************************************************************************
+/********************************************************************//**
Releases the mem pool mutex. */
-
+UNIV_INTERN
void
mem_pool_mutex_exit(void)
/*=====================*/
@@ -122,39 +140,39 @@ mem_pool_mutex_exit(void)
mutex_exit(&(mem_comm_pool->mutex));
}
-/************************************************************************
-Returns memory area size. */
+/********************************************************************//**
+Returns memory area size.
+@return size */
UNIV_INLINE
ulint
mem_area_get_size(
/*==============*/
- /* out: size */
- mem_area_t* area) /* in: area */
+ mem_area_t* area) /*!< in: area */
{
return(area->size_and_free & ~MEM_AREA_FREE);
}
-/************************************************************************
+/********************************************************************//**
Sets memory area size. */
UNIV_INLINE
void
mem_area_set_size(
/*==============*/
- mem_area_t* area, /* in: area */
- ulint size) /* in: size */
+ mem_area_t* area, /*!< in: area */
+ ulint size) /*!< in: size */
{
area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
| size;
}
-/************************************************************************
-Returns memory area free bit. */
+/********************************************************************//**
+Returns memory area free bit.
+@return TRUE if free */
UNIV_INLINE
ibool
mem_area_get_free(
/*==============*/
- /* out: TRUE if free */
- mem_area_t* area) /* in: area */
+ mem_area_t* area) /*!< in: area */
{
#if TRUE != MEM_AREA_FREE
# error "TRUE != MEM_AREA_FREE"
@@ -162,14 +180,14 @@ mem_area_get_free(
return(area->size_and_free & MEM_AREA_FREE);
}
-/************************************************************************
+/********************************************************************//**
Sets memory area free bit. */
UNIV_INLINE
void
mem_area_set_free(
/*==============*/
- mem_area_t* area, /* in: area */
- ibool free) /* in: free bit value */
+ mem_area_t* area, /*!< in: area */
+ ibool free) /*!< in: free bit value */
{
#if TRUE != MEM_AREA_FREE
# error "TRUE != MEM_AREA_FREE"
@@ -178,22 +196,20 @@ mem_area_set_free(
| free;
}
-/************************************************************************
-Creates a memory pool. */
-
+/********************************************************************//**
+Creates a memory pool.
+@return memory pool */
+UNIV_INTERN
mem_pool_t*
mem_pool_create(
/*============*/
- /* out: memory pool */
- ulint size) /* in: pool size in bytes */
+ ulint size) /*!< in: pool size in bytes */
{
mem_pool_t* pool;
mem_area_t* area;
ulint i;
ulint used;
- ut_a(size > 10000);
-
pool = ut_malloc(sizeof(mem_pool_t));
/* We do not set the memory to zero (FALSE) in the pool,
@@ -244,16 +260,27 @@ mem_pool_create(
return(pool);
}
-/************************************************************************
-Fills the specified free list. */
+/********************************************************************//**
+Frees a memory pool. */
+UNIV_INTERN
+void
+mem_pool_free(
+/*==========*/
+ mem_pool_t* pool) /*!< in, own: memory pool */
+{
+ ut_free(pool->buf);
+ ut_free(pool);
+}
+
+/********************************************************************//**
+Fills the specified free list.
+@return TRUE if we were able to insert a block to the free list */
static
ibool
mem_pool_fill_free_list(
/*====================*/
- /* out: TRUE if we were able to insert a
- block to the free list */
- ulint i, /* in: free list index */
- mem_pool_t* pool) /* in: memory pool */
+ ulint i, /*!< in: free list index */
+ mem_pool_t* pool) /*!< in: memory pool */
{
mem_area_t* area;
mem_area_t* area2;
@@ -261,7 +288,7 @@ mem_pool_fill_free_list(
ut_ad(mutex_own(&(pool->mutex)));
- if (i >= 63) {
+ if (UNIV_UNLIKELY(i >= 63)) {
/* We come here when we have run out of space in the
memory pool: */
@@ -293,7 +320,7 @@ mem_pool_fill_free_list(
area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
}
- if (UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0) {
+ if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
mem_analyze_corruption(area);
ut_error;
@@ -316,23 +343,33 @@ mem_pool_fill_free_list(
return(TRUE);
}
-/************************************************************************
+/********************************************************************//**
Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*! */
-
+used in mem0mem.*!
+@return own: allocated memory buffer */
+UNIV_INTERN
void*
mem_area_alloc(
/*===========*/
- /* out, own: allocated memory buffer */
- ulint size, /* in: allocated size in bytes; for optimum
+ ulint* psize, /*!< in: requested size in bytes; for optimum
space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE */
- mem_pool_t* pool) /* in: memory pool */
+ minus MEM_AREA_EXTRA_SIZE;
+ out: allocated size in bytes (greater than
+ or equal to the requested size) */
+ mem_pool_t* pool) /*!< in: memory pool */
{
mem_area_t* area;
+ ulint size;
ulint n;
ibool ret;
+ /* If we are using os allocator just make a simple call
+ to malloc */
+ if (UNIV_LIKELY(srv_use_sys_malloc)) {
+ return(malloc(*psize));
+ }
+
+ size = *psize;
n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
mutex_enter(&(pool->mutex));
@@ -403,22 +440,23 @@ mem_area_alloc(
mutex_exit(&(pool->mutex));
ut_ad(mem_pool_validate(pool));
- UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area,
- ut_2_exp(n) - MEM_AREA_EXTRA_SIZE);
+
+ *psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
+ UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);
return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
}
-/************************************************************************
-Gets the buddy of an area, if it exists in pool. */
+/********************************************************************//**
+Gets the buddy of an area, if it exists in pool.
+@return the buddy, NULL if no buddy in pool */
UNIV_INLINE
mem_area_t*
mem_area_get_buddy(
/*===============*/
- /* out: the buddy, NULL if no buddy in pool */
- mem_area_t* area, /* in: memory area */
- ulint size, /* in: memory area size */
- mem_pool_t* pool) /* in: memory pool */
+ mem_area_t* area, /*!< in: memory area */
+ ulint size, /*!< in: memory area size */
+ mem_pool_t* pool) /*!< in: memory pool */
{
mem_area_t* buddy;
@@ -449,15 +487,15 @@ mem_area_get_buddy(
return(buddy);
}
-/************************************************************************
+/********************************************************************//**
Frees memory to a pool. */
-
+UNIV_INTERN
void
mem_area_free(
/*==========*/
- void* ptr, /* in, own: pointer to allocated memory
+ void* ptr, /*!< in, own: pointer to allocated memory
buffer */
- mem_pool_t* pool) /* in: memory pool */
+ mem_pool_t* pool) /*!< in: memory pool */
{
mem_area_t* area;
mem_area_t* buddy;
@@ -465,6 +503,12 @@ mem_area_free(
ulint size;
ulint n;
+ if (UNIV_LIKELY(srv_use_sys_malloc)) {
+ free(ptr);
+
+ return;
+ }
+
/* It may be that the area was really allocated from the OS with
regular malloc: check if ptr points within our memory pool */
@@ -506,7 +550,7 @@ mem_area_free(
next_size = mem_area_get_size(
(mem_area_t*)(((byte*)area) + size));
- if (ut_2_power_up(next_size) != next_size) {
+ if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
fprintf(stderr,
"InnoDB: Error: Memory area size %lu,"
" next area size %lu not a power of 2!\n"
@@ -572,14 +616,14 @@ mem_area_free(
ut_ad(mem_pool_validate(pool));
}
-/************************************************************************
-Validates a memory pool. */
-
+/********************************************************************//**
+Validates a memory pool.
+@return TRUE if ok */
+UNIV_INTERN
ibool
mem_pool_validate(
/*==============*/
- /* out: TRUE if ok */
- mem_pool_t* pool) /* in: memory pool */
+ mem_pool_t* pool) /*!< in: memory pool */
{
mem_area_t* area;
mem_area_t* buddy;
@@ -592,7 +636,8 @@ mem_pool_validate(
for (i = 0; i < 64; i++) {
- UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i]);
+ UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i],
+ (void) 0);
area = UT_LIST_GET_FIRST(pool->free_list[i]);
@@ -618,14 +663,14 @@ mem_pool_validate(
return(TRUE);
}
-/************************************************************************
+/********************************************************************//**
Prints info of a memory pool. */
-
+UNIV_INTERN
void
mem_pool_print_info(
/*================*/
- FILE* outfile,/* in: output file to write to */
- mem_pool_t* pool) /* in: memory pool */
+ FILE* outfile,/*!< in: output file to write to */
+ mem_pool_t* pool) /*!< in: memory pool */
{
ulint i;
@@ -651,14 +696,14 @@ mem_pool_print_info(
mutex_exit(&(pool->mutex));
}
-/************************************************************************
-Returns the amount of reserved memory. */
-
+/********************************************************************//**
+Returns the amount of reserved memory.
+@return reserved memory in bytes */
+UNIV_INTERN
ulint
mem_pool_get_reserved(
/*==================*/
- /* out: reserved memory in bytes */
- mem_pool_t* pool) /* in: memory pool */
+ mem_pool_t* pool) /*!< in: memory pool */
{
ulint reserved;
diff --git a/storage/innobase/mtr/mtr0log.c b/storage/innobase/mtr/mtr0log.c
index e5d572bbfa7..3f3dab36b76 100644
--- a/storage/innobase/mtr/mtr0log.c
+++ b/storage/innobase/mtr/mtr0log.c
@@ -1,7 +1,24 @@
-/******************************************************
-Mini-transaction log routines
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1995 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file mtr/mtr0log.c
+Mini-transaction log routines
Created 12/7/1995 Heikki Tuuri
*******************************************************/
@@ -13,19 +30,22 @@ Created 12/7/1995 Heikki Tuuri
#endif
#include "buf0buf.h"
-#include "dict0boot.h"
+#include "dict0dict.h"
#include "log0recv.h"
#include "page0page.h"
-/************************************************************
-Catenates n bytes to the mtr log. */
+#ifndef UNIV_HOTBACKUP
+# include "dict0boot.h"
+/********************************************************//**
+Catenates n bytes to the mtr log. */
+UNIV_INTERN
void
mlog_catenate_string(
/*=================*/
- mtr_t* mtr, /* in: mtr */
- const byte* str, /* in: string to write */
- ulint len) /* in: string length */
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* str, /*!< in: string to write */
+ ulint len) /*!< in: string length */
{
dyn_array_t* mlog;
@@ -39,31 +59,25 @@ mlog_catenate_string(
dyn_push_string(mlog, str, len);
}
-/************************************************************
+/********************************************************//**
Writes the initial part of a log record consisting of one-byte item
type and four-byte space and page numbers. Also pushes info
to the mtr memo that a buffer page has been modified. */
-
+UNIV_INTERN
void
mlog_write_initial_log_record(
/*==========================*/
- byte* ptr, /* in: pointer to (inside) a buffer frame holding the
- file page where modification is made */
- byte type, /* in: log item type: MLOG_1BYTE, ... */
- mtr_t* mtr) /* in: mini-transaction handle */
+ const byte* ptr, /*!< in: pointer to (inside) a buffer
+ frame holding the file page where
+ modification is made */
+ byte type, /*!< in: log item type: MLOG_1BYTE, ... */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
byte* log_ptr;
ut_ad(type <= MLOG_BIGGEST_TYPE);
ut_ad(type > MLOG_8BYTES);
- if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
- fprintf(stderr,
- "InnoDB: Error: trying to write to"
- " a stray memory location %p\n", (void*) ptr);
- ut_error;
- }
-
log_ptr = mlog_open(mtr, 11);
/* If no logging is requested, we may return now */
@@ -76,20 +90,20 @@ mlog_write_initial_log_record(
mlog_close(mtr, log_ptr);
}
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************
-Parses an initial log record written by mlog_write_initial_log_record. */
-
+/********************************************************//**
+Parses an initial log record written by mlog_write_initial_log_record.
+@return parsed record end, NULL if not a complete record */
+UNIV_INTERN
byte*
mlog_parse_initial_log_record(
/*==========================*/
- /* out: parsed record end, NULL if not a complete
- record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* type, /* out: log record type: MLOG_1BYTE, ... */
- ulint* space, /* out: space id */
- ulint* page_no)/* out: page number */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ byte* type, /*!< out: log record type: MLOG_1BYTE, ... */
+ ulint* space, /*!< out: space id */
+ ulint* page_no)/*!< out: page number */
{
if (end_ptr < ptr + 1) {
@@ -118,24 +132,25 @@ mlog_parse_initial_log_record(
return(ptr);
}
-/************************************************************
-Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
-
+/********************************************************//**
+Parses a log record written by mlog_write_ulint or mlog_write_dulint.
+@return parsed record end, NULL if not a complete record or a corrupt record */
+UNIV_INTERN
byte*
mlog_parse_nbytes(
/*==============*/
- /* out: parsed record end, NULL if not a complete
- record or a corrupt record */
- ulint type, /* in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* page) /* in: page where to apply the log record, or NULL */
+ ulint type, /*!< in: log record type: MLOG_1BYTE, ... */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ byte* page, /*!< in: page where to apply the log record, or NULL */
+ void* page_zip)/*!< in/out: compressed page, or NULL */
{
ulint offset;
ulint val;
dulint dval;
ut_a(type <= MLOG_8BYTES);
+ ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
if (end_ptr < ptr + 2) {
@@ -160,6 +175,11 @@ mlog_parse_nbytes(
}
if (page) {
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_8
+ (((page_zip_des_t*) page_zip)->data
+ + offset, dval);
+ }
mach_write_to_8(page + offset, dval);
}
@@ -173,68 +193,78 @@ mlog_parse_nbytes(
return(NULL);
}
- if (type == MLOG_1BYTE) {
- if (val > 0xFFUL) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
+ switch (type) {
+ case MLOG_1BYTE:
+ if (UNIV_UNLIKELY(val > 0xFFUL)) {
+ goto corrupt;
}
- } else if (type == MLOG_2BYTES) {
- if (val > 0xFFFFUL) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
+ if (page) {
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_1
+ (((page_zip_des_t*) page_zip)->data
+ + offset, val);
+ }
+ mach_write_to_1(page + offset, val);
}
- } else {
- if (type != MLOG_4BYTES) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
+ break;
+ case MLOG_2BYTES:
+ if (UNIV_UNLIKELY(val > 0xFFFFUL)) {
+ goto corrupt;
}
- }
-
- if (page) {
- if (type == MLOG_1BYTE) {
- mach_write_to_1(page + offset, val);
- } else if (type == MLOG_2BYTES) {
+ if (page) {
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_2
+ (((page_zip_des_t*) page_zip)->data
+ + offset, val);
+ }
mach_write_to_2(page + offset, val);
- } else {
- ut_a(type == MLOG_4BYTES);
+ }
+ break;
+ case MLOG_4BYTES:
+ if (page) {
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_4
+ (((page_zip_des_t*) page_zip)->data
+ + offset, val);
+ }
mach_write_to_4(page + offset, val);
}
+ break;
+ default:
+ corrupt:
+ recv_sys->found_corrupt_log = TRUE;
+ ptr = NULL;
}
return(ptr);
}
-/************************************************************
+/********************************************************//**
Writes 1 - 4 bytes to a file page buffered in the buffer pool.
Writes the corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
void
mlog_write_ulint(
/*=============*/
- byte* ptr, /* in: pointer where to write */
- ulint val, /* in: value to write */
- byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr) /* in: mini-transaction handle */
+ byte* ptr, /*!< in: pointer where to write */
+ ulint val, /*!< in: value to write */
+ byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
byte* log_ptr;
- if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
- fprintf(stderr,
- "InnoDB: Error: trying to write to"
- " a stray memory location %p\n", (void*) ptr);
- ut_error;
- }
-
- if (type == MLOG_1BYTE) {
+ switch (type) {
+ case MLOG_1BYTE:
mach_write_to_1(ptr, val);
- } else if (type == MLOG_2BYTES) {
+ break;
+ case MLOG_2BYTES:
mach_write_to_2(ptr, val);
- } else {
- ut_ad(type == MLOG_4BYTES);
+ break;
+ case MLOG_4BYTES:
mach_write_to_4(ptr, val);
+ break;
+ default:
+ ut_error;
}
log_ptr = mlog_open(mtr, 11 + 2 + 5);
@@ -247,7 +277,7 @@ mlog_write_ulint(
log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
- mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+ mach_write_to_2(log_ptr, page_offset(ptr));
log_ptr += 2;
log_ptr += mach_write_compressed(log_ptr, val);
@@ -255,27 +285,19 @@ mlog_write_ulint(
mlog_close(mtr, log_ptr);
}
-/************************************************************
+/********************************************************//**
Writes 8 bytes to a file page buffered in the buffer pool.
Writes the corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
void
mlog_write_dulint(
/*==============*/
- byte* ptr, /* in: pointer where to write */
- dulint val, /* in: value to write */
- mtr_t* mtr) /* in: mini-transaction handle */
+ byte* ptr, /*!< in: pointer where to write */
+ dulint val, /*!< in: value to write */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
byte* log_ptr;
- if (UNIV_UNLIKELY(ptr < buf_pool->frame_zero)
- || UNIV_UNLIKELY(ptr >= buf_pool->high_end)) {
- fprintf(stderr,
- "InnoDB: Error: trying to write to"
- " a stray memory location %p\n", (void*) ptr);
- ut_error;
- }
-
ut_ad(ptr && mtr);
mach_write_to_8(ptr, val);
@@ -291,7 +313,7 @@ mlog_write_dulint(
log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_8BYTES,
log_ptr, mtr);
- mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+ mach_write_to_2(log_ptr, page_offset(ptr));
log_ptr += 2;
log_ptr += mach_dulint_write_compressed(log_ptr, val);
@@ -299,31 +321,42 @@ mlog_write_dulint(
mlog_close(mtr, log_ptr);
}
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
Writes a string to a file page buffered in the buffer pool. Writes the
corresponding log record to the mini-transaction log. */
-
+UNIV_INTERN
void
mlog_write_string(
/*==============*/
- byte* ptr, /* in: pointer where to write */
- const byte* str, /* in: string to write */
- ulint len, /* in: string length */
- mtr_t* mtr) /* in: mini-transaction handle */
+ byte* ptr, /*!< in: pointer where to write */
+ const byte* str, /*!< in: string to write */
+ ulint len, /*!< in: string length */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
- byte* log_ptr;
-
- if (UNIV_UNLIKELY(ptr < buf_pool->frame_zero)
- || UNIV_UNLIKELY(ptr >= buf_pool->high_end)) {
- fprintf(stderr,
- "InnoDB: Error: trying to write to"
- " a stray memory location %p\n", (void*) ptr);
- ut_error;
- }
ut_ad(ptr && mtr);
ut_a(len < UNIV_PAGE_SIZE);
- ut_memcpy(ptr, str, len);
+ memcpy(ptr, str, len);
+
+ mlog_log_string(ptr, len, mtr);
+}
+
+/********************************************************//**
+Logs a write of a string to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_log_string(
+/*============*/
+ byte* ptr, /*!< in: pointer written to */
+ ulint len, /*!< in: string length */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+{
+ byte* log_ptr;
+
+ ut_ad(ptr && mtr);
+ ut_ad(len <= UNIV_PAGE_SIZE);
log_ptr = mlog_open(mtr, 30);
@@ -335,7 +368,7 @@ mlog_write_string(
log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING,
log_ptr, mtr);
- mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
+ mach_write_to_2(log_ptr, page_offset(ptr));
log_ptr += 2;
mach_write_to_2(log_ptr, len);
@@ -343,24 +376,27 @@ mlog_write_string(
mlog_close(mtr, log_ptr);
- mlog_catenate_string(mtr, str, len);
+ mlog_catenate_string(mtr, ptr, len);
}
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************
-Parses a log record written by mlog_write_string. */
-
+/********************************************************//**
+Parses a log record written by mlog_write_string.
+@return parsed record end, NULL if not a complete record */
+UNIV_INTERN
byte*
mlog_parse_string(
/*==============*/
- /* out: parsed record end, NULL if not a complete
- record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* page) /* in: page where to apply the log record, or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ byte* page, /*!< in: page where to apply the log record, or NULL */
+ void* page_zip)/*!< in/out: compressed page, or NULL */
{
ulint offset;
ulint len;
+ ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
+
if (end_ptr < ptr + 4) {
return(NULL);
@@ -368,44 +404,46 @@ mlog_parse_string(
offset = mach_read_from_2(ptr);
ptr += 2;
+ len = mach_read_from_2(ptr);
+ ptr += 2;
- if (offset >= UNIV_PAGE_SIZE) {
+ if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
+ || UNIV_UNLIKELY(len + offset) > UNIV_PAGE_SIZE) {
recv_sys->found_corrupt_log = TRUE;
return(NULL);
}
- len = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(len + offset < UNIV_PAGE_SIZE);
-
if (end_ptr < ptr + len) {
return(NULL);
}
if (page) {
- ut_memcpy(page + offset, ptr, len);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ memcpy(((page_zip_des_t*) page_zip)->data
+ + offset, ptr, len);
+ }
+ memcpy(page + offset, ptr, len);
}
return(ptr + len);
}
-/************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index. */
-
+if needed, the field lengths of an index.
+@return buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
- /* out: buffer, NULL if log mode
- MTR_LOG_NONE */
- mtr_t* mtr, /* in: mtr */
- byte* rec, /* in: index record or page */
- dict_index_t* index, /* in: record descriptor */
- byte type, /* in: log item type */
- ulint size) /* in: requested buffer size in bytes
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* rec, /*!< in: index record or page */
+ dict_index_t* index, /*!< in: record descriptor */
+ byte type, /*!< in: log item type */
+ ulint size) /*!< in: requested buffer size in bytes
(if 0, calls mlog_close() and returns NULL) */
{
byte* log_ptr;
@@ -489,20 +527,19 @@ mlog_open_and_write_index(
}
return(log_ptr);
}
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************
-Parses a log record written by mlog_open_and_write_index. */
-
+/********************************************************//**
+Parses a log record written by mlog_open_and_write_index.
+@return parsed record end, NULL if not a complete record */
+UNIV_INTERN
byte*
mlog_parse_index(
/*=============*/
- /* out: parsed record end,
- NULL if not a complete record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- /* out: new value of log_ptr */
- ibool comp, /* in: TRUE=compact record format */
- dict_index_t** index) /* out, own: dummy index */
+ byte* ptr, /*!< in: buffer */
+ const byte* end_ptr,/*!< in: buffer end */
+ ibool comp, /*!< in: TRUE=compact record format */
+ dict_index_t** index) /*!< out, own: dummy index */
{
ulint i, n, n_uniq;
dict_table_t* table;
@@ -549,7 +586,7 @@ mlog_parse_index(
len & 0x8000 ? DATA_NOT_NULL : 0,
len & 0x7fff);
- dict_index_add_col(ind, table, (dict_col_t*)
+ dict_index_add_col(ind, table,
dict_table_get_nth_col(table, i),
0);
}
diff --git a/storage/innobase/mtr/mtr0mtr.c b/storage/innobase/mtr/mtr0mtr.c
index 365fa15878a..417e97732bb 100644
--- a/storage/innobase/mtr/mtr0mtr.c
+++ b/storage/innobase/mtr/mtr0mtr.c
@@ -1,7 +1,24 @@
-/******************************************************
-Mini-transaction buffer
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file mtr/mtr0mtr.c
+Mini-transaction buffer
Created 11/26/1995 Heikki Tuuri
*******************************************************/
@@ -17,28 +34,16 @@ Created 11/26/1995 Heikki Tuuri
#include "mtr0log.h"
#include "log0log.h"
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
-
-mtr_t*
-mtr_start_noninline(
-/*================*/
- /* out: mtr buffer which also acts as
- the mtr handle */
- mtr_t* mtr) /* in: memory buffer for the mtr buffer */
-{
- return(mtr_start(mtr));
-}
-
-/*********************************************************************
+#ifndef UNIV_HOTBACKUP
+# include "log0recv.h"
+/*****************************************************************//**
Releases the item in the slot given. */
UNIV_INLINE
void
mtr_memo_slot_release(
/*==================*/
- mtr_t* mtr, /* in: mtr */
- mtr_memo_slot_t* slot) /* in: memo slot */
+ mtr_t* mtr, /*!< in: mtr */
+ mtr_memo_slot_t* slot) /*!< in: memo slot */
{
void* object;
ulint type;
@@ -54,23 +59,20 @@ mtr_memo_slot_release(
} else if (type == MTR_MEMO_S_LOCK) {
rw_lock_s_unlock((rw_lock_t*)object);
#ifdef UNIV_DEBUG
- } else if (type == MTR_MEMO_X_LOCK) {
- rw_lock_x_unlock((rw_lock_t*)object);
- } else {
+ } else if (type != MTR_MEMO_X_LOCK) {
ut_ad(type == MTR_MEMO_MODIFY);
ut_ad(mtr_memo_contains(mtr, object,
MTR_MEMO_PAGE_X_FIX));
-#else
+#endif /* UNIV_DEBUG */
} else {
rw_lock_x_unlock((rw_lock_t*)object);
-#endif
}
}
slot->object = NULL;
}
-/**************************************************************
+/**********************************************************//**
Releases the mlocks and other objects stored in an mtr memo. They are released
in the order opposite to which they were pushed to the memo. NOTE! It is
essential that the x-rw-lock on a modified buffer page is not released before
@@ -81,7 +83,7 @@ UNIV_INLINE
void
mtr_memo_pop_all(
/*=============*/
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
mtr_memo_slot_t* slot;
dyn_array_t* memo;
@@ -103,18 +105,17 @@ mtr_memo_pop_all(
}
}
-/****************************************************************
+/************************************************************//**
Writes the contents of a mini-transaction log, if any, to the database log. */
static
void
mtr_log_reserve_and_write(
/*======================*/
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
dyn_array_t* mlog;
dyn_block_t* block;
ulint data_size;
- ibool success;
byte* first_data;
ut_ad(mtr);
@@ -133,8 +134,8 @@ mtr_log_reserve_and_write(
if (mlog->heap == NULL) {
mtr->end_lsn = log_reserve_and_write_fast(
first_data, dyn_block_get_used(mlog),
- &(mtr->start_lsn), &success);
- if (success) {
+ &mtr->start_lsn);
+ if (mtr->end_lsn) {
return;
}
@@ -161,22 +162,31 @@ mtr_log_reserve_and_write(
mtr->end_lsn = log_close();
}
+#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
+/***************************************************************//**
Commits a mini-transaction. */
-
+UNIV_INTERN
void
mtr_commit(
/*=======*/
- mtr_t* mtr) /* in: mini-transaction */
+ mtr_t* mtr) /*!< in: mini-transaction */
{
+#ifndef UNIV_HOTBACKUP
+ ibool write_log;
+#endif /* !UNIV_HOTBACKUP */
+
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
-#ifdef UNIV_DEBUG
- mtr->state = MTR_COMMITTING;
-#endif
- if (mtr->modifications) {
+ ut_d(mtr->state = MTR_COMMITTING);
+
+#ifndef UNIV_HOTBACKUP
+ /* This is a dirty read, for debugging. */
+ ut_ad(!recv_no_log_write);
+ write_log = mtr->modifications && mtr->n_log_recs;
+
+ if (write_log) {
mtr_log_reserve_and_write(mtr);
}
@@ -190,27 +200,27 @@ mtr_commit(
mtr_memo_pop_all(mtr);
- if (mtr->modifications) {
+ if (write_log) {
log_release();
}
+#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_DEBUG
- mtr->state = MTR_COMMITTED;
-#endif
+ ut_d(mtr->state = MTR_COMMITTED);
dyn_array_free(&(mtr->memo));
dyn_array_free(&(mtr->log));
}
-/**************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
Releases the latches stored in an mtr memo down to a savepoint.
NOTE! The mtr must not have made changes to buffer pages after the
savepoint, as these can be handled only by mtr_commit. */
-
+UNIV_INTERN
void
mtr_rollback_to_savepoint(
/*======================*/
- mtr_t* mtr, /* in: mtr */
- ulint savepoint) /* in: savepoint */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint savepoint) /*!< in: savepoint */
{
mtr_memo_slot_t* slot;
dyn_array_t* memo;
@@ -235,15 +245,15 @@ mtr_rollback_to_savepoint(
}
}
-/*******************************************************
+/***************************************************//**
Releases an object in the memo stack. */
-
+UNIV_INTERN
void
mtr_memo_release(
/*=============*/
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object */
- ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */
+ mtr_t* mtr, /*!< in: mtr */
+ void* object, /*!< in: object */
+ ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
{
mtr_memo_slot_t* slot;
dyn_array_t* memo;
@@ -270,24 +280,23 @@ mtr_memo_release(
}
}
}
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************
-Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
-
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return value read */
+UNIV_INTERN
ulint
mtr_read_ulint(
/*===========*/
- /* out: value read */
- byte* ptr, /* in: pointer from where to read */
- ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+ const byte* ptr, /*!< in: pointer from where to read */
+ ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
mtr_t* mtr __attribute__((unused)))
- /* in: mini-transaction handle */
+ /*!< in: mini-transaction handle */
{
ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
if (type == MLOG_1BYTE) {
return(mach_read_from_1(ptr));
} else if (type == MLOG_2BYTES) {
@@ -298,34 +307,46 @@ mtr_read_ulint(
}
}
-/************************************************************
-Reads 8 bytes from a file page buffered in the buffer pool. */
-
+/********************************************************//**
+Reads 8 bytes from a file page buffered in the buffer pool.
+@return value read */
+UNIV_INTERN
dulint
mtr_read_dulint(
/*============*/
- /* out: value read */
- byte* ptr, /* in: pointer from where to read */
+ const byte* ptr, /*!< in: pointer from where to read */
mtr_t* mtr __attribute__((unused)))
- /* in: mini-transaction handle */
+ /*!< in: mini-transaction handle */
{
ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(ptr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
return(mach_read_from_8(ptr));
}
#ifdef UNIV_DEBUG
-/*************************************************************
-Prints info of an mtr handle. */
+# ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Checks if memo contains the given page.
+@return TRUE if contains */
+UNIV_INTERN
+ibool
+mtr_memo_contains_page(
+/*===================*/
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* ptr, /*!< in: pointer to buffer frame */
+ ulint type) /*!< in: type of object */
+{
+ return(mtr_memo_contains(mtr, buf_block_align(ptr), type));
+}
+/*********************************************************//**
+Prints info of an mtr handle. */
+UNIV_INTERN
void
mtr_print(
/*======*/
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
fprintf(stderr,
"Mini-transaction handle: memo size %lu bytes"
@@ -333,4 +354,5 @@ mtr_print(
(ulong) dyn_array_get_data_size(&(mtr->memo)),
(ulong) dyn_array_get_data_size(&(mtr->log)));
}
+# endif /* !UNIV_HOTBACKUP */
#endif /* UNIV_DEBUG */
diff --git a/storage/innodb_plugin/mysql-test/ctype_innodb_like.inc b/storage/innobase/mysql-test/ctype_innodb_like.inc
index ae43342885a..ae43342885a 100644
--- a/storage/innodb_plugin/mysql-test/ctype_innodb_like.inc
+++ b/storage/innobase/mysql-test/ctype_innodb_like.inc
diff --git a/storage/innodb_plugin/mysql-test/have_innodb.inc b/storage/innobase/mysql-test/have_innodb.inc
index 8944cc46f3e..8944cc46f3e 100644
--- a/storage/innodb_plugin/mysql-test/have_innodb.inc
+++ b/storage/innobase/mysql-test/have_innodb.inc
diff --git a/storage/innodb_plugin/mysql-test/innodb-analyze.result b/storage/innobase/mysql-test/innodb-analyze.result
index 2aee004a2d6..2aee004a2d6 100644
--- a/storage/innodb_plugin/mysql-test/innodb-analyze.result
+++ b/storage/innobase/mysql-test/innodb-analyze.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-analyze.test b/storage/innobase/mysql-test/innodb-analyze.test
index 9bdb9db697c..9bdb9db697c 100644
--- a/storage/innodb_plugin/mysql-test/innodb-analyze.test
+++ b/storage/innobase/mysql-test/innodb-analyze.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-autoinc.result b/storage/innobase/mysql-test/innodb-autoinc.result
index d2e8eb19e0c..d2e8eb19e0c 100644
--- a/storage/innodb_plugin/mysql-test/innodb-autoinc.result
+++ b/storage/innobase/mysql-test/innodb-autoinc.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-autoinc.test b/storage/innobase/mysql-test/innodb-autoinc.test
index 61c42f45733..61c42f45733 100644
--- a/storage/innodb_plugin/mysql-test/innodb-autoinc.test
+++ b/storage/innobase/mysql-test/innodb-autoinc.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-consistent-master.opt b/storage/innobase/mysql-test/innodb-consistent-master.opt
index 8cca44767da..8cca44767da 100644
--- a/storage/innodb_plugin/mysql-test/innodb-consistent-master.opt
+++ b/storage/innobase/mysql-test/innodb-consistent-master.opt
diff --git a/storage/innodb_plugin/mysql-test/innodb-consistent.result b/storage/innobase/mysql-test/innodb-consistent.result
index 9115791b99c..9115791b99c 100644
--- a/storage/innodb_plugin/mysql-test/innodb-consistent.result
+++ b/storage/innobase/mysql-test/innodb-consistent.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-consistent.test b/storage/innobase/mysql-test/innodb-consistent.test
index 791600fc8a7..791600fc8a7 100644
--- a/storage/innodb_plugin/mysql-test/innodb-consistent.test
+++ b/storage/innobase/mysql-test/innodb-consistent.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-index.inc b/storage/innobase/mysql-test/innodb-index.inc
index 37de3162abe..37de3162abe 100644
--- a/storage/innodb_plugin/mysql-test/innodb-index.inc
+++ b/storage/innobase/mysql-test/innodb-index.inc
diff --git a/storage/innodb_plugin/mysql-test/innodb-index.result b/storage/innobase/mysql-test/innodb-index.result
index a7d66b15300..a7d66b15300 100644
--- a/storage/innodb_plugin/mysql-test/innodb-index.result
+++ b/storage/innobase/mysql-test/innodb-index.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-index.test b/storage/innobase/mysql-test/innodb-index.test
index 42888ff3686..42888ff3686 100644
--- a/storage/innodb_plugin/mysql-test/innodb-index.test
+++ b/storage/innobase/mysql-test/innodb-index.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-index_ucs2.result b/storage/innobase/mysql-test/innodb-index_ucs2.result
index c8a1e8c7da1..c8a1e8c7da1 100644
--- a/storage/innodb_plugin/mysql-test/innodb-index_ucs2.result
+++ b/storage/innobase/mysql-test/innodb-index_ucs2.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-index_ucs2.test b/storage/innobase/mysql-test/innodb-index_ucs2.test
index fff9a4da1a8..fff9a4da1a8 100644
--- a/storage/innodb_plugin/mysql-test/innodb-index_ucs2.test
+++ b/storage/innobase/mysql-test/innodb-index_ucs2.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-lock.result b/storage/innobase/mysql-test/innodb-lock.result
index 4ace4065c34..4ace4065c34 100644
--- a/storage/innodb_plugin/mysql-test/innodb-lock.result
+++ b/storage/innobase/mysql-test/innodb-lock.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-lock.test b/storage/innobase/mysql-test/innodb-lock.test
index eacf7e562be..eacf7e562be 100644
--- a/storage/innodb_plugin/mysql-test/innodb-lock.test
+++ b/storage/innobase/mysql-test/innodb-lock.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-master.opt b/storage/innobase/mysql-test/innodb-master.opt
index 4901efb416c..4901efb416c 100644
--- a/storage/innodb_plugin/mysql-test/innodb-master.opt
+++ b/storage/innobase/mysql-test/innodb-master.opt
diff --git a/storage/innodb_plugin/mysql-test/innodb-replace.result b/storage/innobase/mysql-test/innodb-replace.result
index c926bb89a2e..c926bb89a2e 100644
--- a/storage/innodb_plugin/mysql-test/innodb-replace.result
+++ b/storage/innobase/mysql-test/innodb-replace.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-replace.test b/storage/innobase/mysql-test/innodb-replace.test
index 8c3aacde5e8..8c3aacde5e8 100644
--- a/storage/innodb_plugin/mysql-test/innodb-replace.test
+++ b/storage/innobase/mysql-test/innodb-replace.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-semi-consistent-master.opt b/storage/innobase/mysql-test/innodb-semi-consistent-master.opt
index e76299453d3..e76299453d3 100644
--- a/storage/innodb_plugin/mysql-test/innodb-semi-consistent-master.opt
+++ b/storage/innobase/mysql-test/innodb-semi-consistent-master.opt
diff --git a/storage/innodb_plugin/mysql-test/innodb-semi-consistent.result b/storage/innobase/mysql-test/innodb-semi-consistent.result
index ca0e362ef80..ca0e362ef80 100644
--- a/storage/innodb_plugin/mysql-test/innodb-semi-consistent.result
+++ b/storage/innobase/mysql-test/innodb-semi-consistent.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-semi-consistent.test b/storage/innobase/mysql-test/innodb-semi-consistent.test
index 61ad7815ca9..61ad7815ca9 100644
--- a/storage/innodb_plugin/mysql-test/innodb-semi-consistent.test
+++ b/storage/innobase/mysql-test/innodb-semi-consistent.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-timeout.result b/storage/innobase/mysql-test/innodb-timeout.result
index be9a688cd72..be9a688cd72 100644
--- a/storage/innodb_plugin/mysql-test/innodb-timeout.result
+++ b/storage/innobase/mysql-test/innodb-timeout.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-timeout.test b/storage/innobase/mysql-test/innodb-timeout.test
index f23fe3cff2d..f23fe3cff2d 100644
--- a/storage/innodb_plugin/mysql-test/innodb-timeout.test
+++ b/storage/innobase/mysql-test/innodb-timeout.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc-master.opt b/storage/innobase/mysql-test/innodb-use-sys-malloc-master.opt
index 889834add01..889834add01 100644
--- a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc-master.opt
+++ b/storage/innobase/mysql-test/innodb-use-sys-malloc-master.opt
diff --git a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.result b/storage/innobase/mysql-test/innodb-use-sys-malloc.result
index 2ec4c7c8130..2ec4c7c8130 100644
--- a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.result
+++ b/storage/innobase/mysql-test/innodb-use-sys-malloc.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.test b/storage/innobase/mysql-test/innodb-use-sys-malloc.test
index 325dd19d086..325dd19d086 100644
--- a/storage/innodb_plugin/mysql-test/innodb-use-sys-malloc.test
+++ b/storage/innobase/mysql-test/innodb-use-sys-malloc.test
diff --git a/storage/innodb_plugin/mysql-test/innodb-zip.result b/storage/innobase/mysql-test/innodb-zip.result
index b26c4112826..b26c4112826 100644
--- a/storage/innodb_plugin/mysql-test/innodb-zip.result
+++ b/storage/innobase/mysql-test/innodb-zip.result
diff --git a/storage/innodb_plugin/mysql-test/innodb-zip.test b/storage/innobase/mysql-test/innodb-zip.test
index 5bcd0e3c824..5bcd0e3c824 100644
--- a/storage/innodb_plugin/mysql-test/innodb-zip.test
+++ b/storage/innobase/mysql-test/innodb-zip.test
diff --git a/storage/innodb_plugin/mysql-test/innodb.result b/storage/innobase/mysql-test/innodb.result
index bdae7633fd1..bdae7633fd1 100644
--- a/storage/innodb_plugin/mysql-test/innodb.result
+++ b/storage/innobase/mysql-test/innodb.result
diff --git a/storage/innodb_plugin/mysql-test/innodb.test b/storage/innobase/mysql-test/innodb.test
index f46a3a70b56..f46a3a70b56 100644
--- a/storage/innodb_plugin/mysql-test/innodb.test
+++ b/storage/innobase/mysql-test/innodb.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug21704.result b/storage/innobase/mysql-test/innodb_bug21704.result
index b8e0b15d50d..b8e0b15d50d 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug21704.result
+++ b/storage/innobase/mysql-test/innodb_bug21704.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug21704.test b/storage/innobase/mysql-test/innodb_bug21704.test
index c649b61034c..c649b61034c 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug21704.test
+++ b/storage/innobase/mysql-test/innodb_bug21704.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug34053.result b/storage/innobase/mysql-test/innodb_bug34053.result
index 195775f74c8..195775f74c8 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug34053.result
+++ b/storage/innobase/mysql-test/innodb_bug34053.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug34053.test b/storage/innobase/mysql-test/innodb_bug34053.test
index b935e45c06d..b935e45c06d 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug34053.test
+++ b/storage/innobase/mysql-test/innodb_bug34053.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug34300.result b/storage/innobase/mysql-test/innodb_bug34300.result
index ae9fee81ad7..ae9fee81ad7 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug34300.result
+++ b/storage/innobase/mysql-test/innodb_bug34300.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug34300.test b/storage/innobase/mysql-test/innodb_bug34300.test
index 68c385fd72a..68c385fd72a 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug34300.test
+++ b/storage/innobase/mysql-test/innodb_bug34300.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug35220.result b/storage/innobase/mysql-test/innodb_bug35220.result
index 195775f74c8..195775f74c8 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug35220.result
+++ b/storage/innobase/mysql-test/innodb_bug35220.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug35220.test b/storage/innobase/mysql-test/innodb_bug35220.test
index 26f7d6b1ddd..26f7d6b1ddd 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug35220.test
+++ b/storage/innobase/mysql-test/innodb_bug35220.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug36169.result b/storage/innobase/mysql-test/innodb_bug36169.result
index aa80e4d7aa4..aa80e4d7aa4 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug36169.result
+++ b/storage/innobase/mysql-test/innodb_bug36169.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug36169.test b/storage/innobase/mysql-test/innodb_bug36169.test
index 5bf55193b5c..5bf55193b5c 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug36169.test
+++ b/storage/innobase/mysql-test/innodb_bug36169.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug36172.result b/storage/innobase/mysql-test/innodb_bug36172.result
index 195775f74c8..195775f74c8 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug36172.result
+++ b/storage/innobase/mysql-test/innodb_bug36172.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug36172.test b/storage/innobase/mysql-test/innodb_bug36172.test
index c6c4e6fae47..c6c4e6fae47 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug36172.test
+++ b/storage/innobase/mysql-test/innodb_bug36172.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug40360.result b/storage/innobase/mysql-test/innodb_bug40360.result
index ef4cf463903..ef4cf463903 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug40360.result
+++ b/storage/innobase/mysql-test/innodb_bug40360.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug40360.test b/storage/innobase/mysql-test/innodb_bug40360.test
index e88837aab4f..e88837aab4f 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug40360.test
+++ b/storage/innobase/mysql-test/innodb_bug40360.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug40565.result b/storage/innobase/mysql-test/innodb_bug40565.result
index 21e923d9336..21e923d9336 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug40565.result
+++ b/storage/innobase/mysql-test/innodb_bug40565.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug40565.test b/storage/innobase/mysql-test/innodb_bug40565.test
index d7aa0fd514a..d7aa0fd514a 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug40565.test
+++ b/storage/innobase/mysql-test/innodb_bug40565.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug41904.result b/storage/innobase/mysql-test/innodb_bug41904.result
index 6070d32d181..6070d32d181 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug41904.result
+++ b/storage/innobase/mysql-test/innodb_bug41904.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug41904.test b/storage/innobase/mysql-test/innodb_bug41904.test
index 365c5229adc..365c5229adc 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug41904.test
+++ b/storage/innobase/mysql-test/innodb_bug41904.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero-master.opt b/storage/innobase/mysql-test/innodb_bug42101-nonzero-master.opt
index d71dbe17d5b..d71dbe17d5b 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero-master.opt
+++ b/storage/innobase/mysql-test/innodb_bug42101-nonzero-master.opt
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.result b/storage/innobase/mysql-test/innodb_bug42101-nonzero.result
index 277dfffdd35..277dfffdd35 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.result
+++ b/storage/innobase/mysql-test/innodb_bug42101-nonzero.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.test b/storage/innobase/mysql-test/innodb_bug42101-nonzero.test
index 685fdf20489..685fdf20489 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug42101-nonzero.test
+++ b/storage/innobase/mysql-test/innodb_bug42101-nonzero.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101.result b/storage/innobase/mysql-test/innodb_bug42101.result
index 805097ffe9d..805097ffe9d 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug42101.result
+++ b/storage/innobase/mysql-test/innodb_bug42101.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug42101.test b/storage/innobase/mysql-test/innodb_bug42101.test
index b6536490d48..b6536490d48 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug42101.test
+++ b/storage/innobase/mysql-test/innodb_bug42101.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug44032.result b/storage/innobase/mysql-test/innodb_bug44032.result
index da2a000b06e..da2a000b06e 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug44032.result
+++ b/storage/innobase/mysql-test/innodb_bug44032.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug44032.test b/storage/innobase/mysql-test/innodb_bug44032.test
index a963cb8b68f..a963cb8b68f 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug44032.test
+++ b/storage/innobase/mysql-test/innodb_bug44032.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug44369.result b/storage/innobase/mysql-test/innodb_bug44369.result
index e4b84ecac19..e4b84ecac19 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug44369.result
+++ b/storage/innobase/mysql-test/innodb_bug44369.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug44369.test b/storage/innobase/mysql-test/innodb_bug44369.test
index 495059eb5e6..495059eb5e6 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug44369.test
+++ b/storage/innobase/mysql-test/innodb_bug44369.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug44571.result b/storage/innobase/mysql-test/innodb_bug44571.result
index 36374edcb3e..36374edcb3e 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug44571.result
+++ b/storage/innobase/mysql-test/innodb_bug44571.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug44571.test b/storage/innobase/mysql-test/innodb_bug44571.test
index 685463ceff9..685463ceff9 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug44571.test
+++ b/storage/innobase/mysql-test/innodb_bug44571.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug45357.result b/storage/innobase/mysql-test/innodb_bug45357.result
index 7adeff2062f..7adeff2062f 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug45357.result
+++ b/storage/innobase/mysql-test/innodb_bug45357.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug45357.test b/storage/innobase/mysql-test/innodb_bug45357.test
index 81727f352dd..81727f352dd 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug45357.test
+++ b/storage/innobase/mysql-test/innodb_bug45357.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug46000.result b/storage/innobase/mysql-test/innodb_bug46000.result
index ccff888a48d..ccff888a48d 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug46000.result
+++ b/storage/innobase/mysql-test/innodb_bug46000.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_bug46000.test b/storage/innobase/mysql-test/innodb_bug46000.test
index 80c18c58ef0..80c18c58ef0 100644
--- a/storage/innodb_plugin/mysql-test/innodb_bug46000.test
+++ b/storage/innobase/mysql-test/innodb_bug46000.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_file_format.result b/storage/innobase/mysql-test/innodb_file_format.result
index 8e9a317308b..8e9a317308b 100644
--- a/storage/innodb_plugin/mysql-test/innodb_file_format.result
+++ b/storage/innobase/mysql-test/innodb_file_format.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_file_format.test b/storage/innobase/mysql-test/innodb_file_format.test
index d63c9b0228f..d63c9b0228f 100644
--- a/storage/innodb_plugin/mysql-test/innodb_file_format.test
+++ b/storage/innobase/mysql-test/innodb_file_format.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_information_schema.result b/storage/innobase/mysql-test/innodb_information_schema.result
index 396cae579ce..396cae579ce 100644
--- a/storage/innodb_plugin/mysql-test/innodb_information_schema.result
+++ b/storage/innobase/mysql-test/innodb_information_schema.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_information_schema.test b/storage/innobase/mysql-test/innodb_information_schema.test
index eaed653854a..eaed653854a 100644
--- a/storage/innodb_plugin/mysql-test/innodb_information_schema.test
+++ b/storage/innobase/mysql-test/innodb_information_schema.test
diff --git a/storage/innodb_plugin/mysql-test/innodb_trx_weight.inc b/storage/innobase/mysql-test/innodb_trx_weight.inc
index 56d3d47da36..56d3d47da36 100644
--- a/storage/innodb_plugin/mysql-test/innodb_trx_weight.inc
+++ b/storage/innobase/mysql-test/innodb_trx_weight.inc
diff --git a/storage/innodb_plugin/mysql-test/innodb_trx_weight.result b/storage/innobase/mysql-test/innodb_trx_weight.result
index 195775f74c8..195775f74c8 100644
--- a/storage/innodb_plugin/mysql-test/innodb_trx_weight.result
+++ b/storage/innobase/mysql-test/innodb_trx_weight.result
diff --git a/storage/innodb_plugin/mysql-test/innodb_trx_weight.test b/storage/innobase/mysql-test/innodb_trx_weight.test
index b72eaad345f..b72eaad345f 100644
--- a/storage/innodb_plugin/mysql-test/innodb_trx_weight.test
+++ b/storage/innobase/mysql-test/innodb_trx_weight.test
diff --git a/storage/innodb_plugin/mysql-test/patches/README b/storage/innobase/mysql-test/patches/README
index 122d756e9e3..122d756e9e3 100644
--- a/storage/innodb_plugin/mysql-test/patches/README
+++ b/storage/innobase/mysql-test/patches/README
diff --git a/storage/innodb_plugin/mysql-test/patches/index_merge_innodb-explain.diff b/storage/innobase/mysql-test/patches/index_merge_innodb-explain.diff
index d1ed8afc778..d1ed8afc778 100644
--- a/storage/innodb_plugin/mysql-test/patches/index_merge_innodb-explain.diff
+++ b/storage/innobase/mysql-test/patches/index_merge_innodb-explain.diff
diff --git a/storage/innodb_plugin/mysql-test/patches/information_schema.diff b/storage/innobase/mysql-test/patches/information_schema.diff
index a3a21f7a08d..a3a21f7a08d 100644
--- a/storage/innodb_plugin/mysql-test/patches/information_schema.diff
+++ b/storage/innobase/mysql-test/patches/information_schema.diff
diff --git a/storage/innodb_plugin/mysql-test/patches/innodb-index.diff b/storage/innobase/mysql-test/patches/innodb-index.diff
index 0b008c96f25..0b008c96f25 100644
--- a/storage/innodb_plugin/mysql-test/patches/innodb-index.diff
+++ b/storage/innobase/mysql-test/patches/innodb-index.diff
diff --git a/storage/innodb_plugin/mysql-test/patches/innodb_file_per_table.diff b/storage/innobase/mysql-test/patches/innodb_file_per_table.diff
index 8b7ae2036c9..8b7ae2036c9 100644
--- a/storage/innodb_plugin/mysql-test/patches/innodb_file_per_table.diff
+++ b/storage/innobase/mysql-test/patches/innodb_file_per_table.diff
diff --git a/storage/innodb_plugin/mysql-test/patches/innodb_lock_wait_timeout.diff b/storage/innobase/mysql-test/patches/innodb_lock_wait_timeout.diff
index bc61a0f5841..bc61a0f5841 100644
--- a/storage/innodb_plugin/mysql-test/patches/innodb_lock_wait_timeout.diff
+++ b/storage/innobase/mysql-test/patches/innodb_lock_wait_timeout.diff
diff --git a/storage/innodb_plugin/mysql-test/patches/innodb_thread_concurrency_basic.diff b/storage/innobase/mysql-test/patches/innodb_thread_concurrency_basic.diff
index 72e5457905f..72e5457905f 100644
--- a/storage/innodb_plugin/mysql-test/patches/innodb_thread_concurrency_basic.diff
+++ b/storage/innobase/mysql-test/patches/innodb_thread_concurrency_basic.diff
diff --git a/storage/innodb_plugin/mysql-test/patches/partition_innodb.diff b/storage/innobase/mysql-test/patches/partition_innodb.diff
index 01bc073008e..01bc073008e 100644
--- a/storage/innodb_plugin/mysql-test/patches/partition_innodb.diff
+++ b/storage/innobase/mysql-test/patches/partition_innodb.diff
diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
index 085f62daacc..37edad442db 100644
--- a/storage/innobase/os/os0file.c
+++ b/storage/innobase/os/os0file.c
@@ -1,177 +1,238 @@
-/******************************************************
-The interface to the operating system file i/o primitives
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
-(c) 1995 Innobase Oy
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file os/os0file.c
+The interface to the operating system file i/o primitives
Created 10/21/1995 Heikki Tuuri
*******************************************************/
#include "os0file.h"
-#include "os0sync.h"
-#include "os0thread.h"
#include "ut0mem.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0fil.h"
#include "buf0buf.h"
-
-#if defined(UNIV_HOTBACKUP) && defined(__WIN__)
+#ifndef UNIV_HOTBACKUP
+# include "os0sync.h"
+# include "os0thread.h"
+#else /* !UNIV_HOTBACKUP */
+# ifdef __WIN__
/* Add includes for the _stat() call to compile on Windows */
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <errno.h>
-#endif /* UNIV_HOTBACKUP */
-
-#ifdef POSIX_ASYNC_IO
-/* We assume in this case that the OS has standard Posix aio (at least SunOS
-2.6, HP-UX 11i and AIX 4.3 have) */
-
-#endif
+# include <sys/types.h>
+# include <sys/stat.h>
+# include <errno.h>
+# endif /* __WIN__ */
+#endif /* !UNIV_HOTBACKUP */
/* This specifies the file permissions InnoDB uses when it creates files in
Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
my_umask */
#ifndef __WIN__
-ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+/** Umask for creating files */
+UNIV_INTERN ulint os_innodb_umask
+ = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
#else
-ulint os_innodb_umask = 0;
+/** Umask for creating files */
+UNIV_INTERN ulint os_innodb_umask = 0;
#endif
#ifdef UNIV_DO_FLUSH
/* If the following is set to TRUE, we do not call os_file_flush in every
os_file_write. We can set this TRUE when the doublewrite buffer is used. */
-ibool os_do_not_call_flush_at_each_write = FALSE;
+UNIV_INTERN ibool os_do_not_call_flush_at_each_write = FALSE;
#else
/* We do not call os_file_flush in every os_file_write. */
#endif /* UNIV_DO_FLUSH */
+#ifdef UNIV_HOTBACKUP
+# define os_aio_use_native_aio FALSE
+#else /* UNIV_HOTBACKUP */
/* We use these mutexes to protect lseek + file i/o operation, if the
OS does not provide an atomic pread or pwrite, or similar */
#define OS_FILE_N_SEEK_MUTEXES 16
-os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
/* In simulated aio, merge at most this many consecutive i/os */
#define OS_AIO_MERGE_N_CONSECUTIVE 64
-/* If this flag is TRUE, then we will use the native aio of the
+/** If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
-ibool os_aio_use_native_aio = FALSE;
+UNIV_INTERN ibool os_aio_use_native_aio = FALSE;
-ibool os_aio_print_debug = FALSE;
+/** Flag: enable debug printout for asynchronous i/o */
+UNIV_INTERN ibool os_aio_print_debug = FALSE;
-/* The aio array slot structure */
+/** The asynchronous i/o array slot structure */
typedef struct os_aio_slot_struct os_aio_slot_t;
+/** The asynchronous i/o array slot structure */
struct os_aio_slot_struct{
- ibool is_read; /* TRUE if a read operation */
- ulint pos; /* index of the slot in the aio
+ ibool is_read; /*!< TRUE if a read operation */
+ ulint pos; /*!< index of the slot in the aio
array */
- ibool reserved; /* TRUE if this slot is reserved */
- time_t reservation_time;/* time when reserved */
- ulint len; /* length of the block to read or
+ ibool reserved; /*!< TRUE if this slot is reserved */
+ time_t reservation_time;/*!< time when reserved */
+ ulint len; /*!< length of the block to read or
write */
- byte* buf; /* buffer used in i/o */
- ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
- ulint offset; /* 32 low bits of file offset in
+ byte* buf; /*!< buffer used in i/o */
+ ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
+ ulint offset; /*!< 32 low bits of file offset in
bytes */
- ulint offset_high; /* 32 high bits of file offset */
- os_file_t file; /* file where to read or write */
- const char* name; /* file name or path */
- ibool io_already_done;/* used only in simulated aio:
+ ulint offset_high; /*!< 32 high bits of file offset */
+ os_file_t file; /*!< file where to read or write */
+ const char* name; /*!< file name or path */
+ ibool io_already_done;/*!< used only in simulated aio:
TRUE if the physical i/o already
made and only the slot message
needs to be passed to the caller
of os_aio_simulated_handle */
- fil_node_t* message1; /* message which is given by the */
- void* message2; /* the requester of an aio operation
+ fil_node_t* message1; /*!< message which is given by the */
+ void* message2; /*!< the requester of an aio operation
and which can be used to identify
which pending aio operation was
completed */
#ifdef WIN_ASYNC_IO
- os_event_t event; /* event object we need in the
+ os_event_t event; /*!< event object we need in the
OVERLAPPED struct */
- OVERLAPPED control; /* Windows control block for the
+ OVERLAPPED control; /*!< Windows control block for the
aio request */
-#elif defined(POSIX_ASYNC_IO)
- struct aiocb control; /* Posix control block for aio
- request */
#endif
};
-/* The aio array structure */
+/** The asynchronous i/o array structure */
typedef struct os_aio_array_struct os_aio_array_t;
+/** The asynchronous i/o array structure */
struct os_aio_array_struct{
- os_mutex_t mutex; /* the mutex protecting the aio array */
- os_event_t not_full; /* The event which is set to the signaled
- state when there is space in the aio
- outside the ibuf segment */
- os_event_t is_empty; /* The event which is set to the signaled
- state when there are no pending i/os
- in this array */
- ulint n_slots; /* Total number of slots in the aio array.
- This must be divisible by n_threads. */
- ulint n_segments;/* Number of segments in the aio array of
- pending aio requests. A thread can wait
- separately for any one of the segments. */
- ulint n_reserved;/* Number of reserved slots in the
- aio array outside the ibuf segment */
- os_aio_slot_t* slots; /* Pointer to the slots in the array */
+ os_mutex_t mutex; /*!< the mutex protecting the aio array */
+ os_event_t not_full;
+ /*!< The event which is set to the
+ signaled state when there is space in
+ the aio outside the ibuf segment */
+ os_event_t is_empty;
+ /*!< The event which is set to the
+ signaled state when there are no
+ pending i/os in this array */
+ ulint n_slots;/*!< Total number of slots in the aio
+ array. This must be divisible by
+ n_threads. */
+ ulint n_segments;
+ /*!< Number of segments in the aio
+ array of pending aio requests. A
+ thread can wait separately for any one
+ of the segments. */
+ ulint n_reserved;
+ /*!< Number of reserved slots in the
+ aio array outside the ibuf segment */
+ os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__
os_native_event_t* native_events;
- /* Pointer to an array of OS native event
- handles where we copied the handles from
- slots, in the same order. This can be used
- in WaitForMultipleObjects; used only in
- Windows */
+ /*!< Pointer to an array of OS native
+ event handles where we copied the
+ handles from slots, in the same
+ order. This can be used in
+ WaitForMultipleObjects; used only in
+ Windows */
#endif
};
-/* Array of events used in simulated aio */
-os_event_t* os_aio_segment_wait_events = NULL;
+/** Array of events used in simulated aio */
+static os_event_t* os_aio_segment_wait_events = NULL;
-/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
-are NULL when the module has not yet been initialized. */
-static os_aio_array_t* os_aio_read_array = NULL;
-static os_aio_array_t* os_aio_write_array = NULL;
-static os_aio_array_t* os_aio_ibuf_array = NULL;
-static os_aio_array_t* os_aio_log_array = NULL;
-static os_aio_array_t* os_aio_sync_array = NULL;
+/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
+are NULL when the module has not yet been initialized. @{ */
+static os_aio_array_t* os_aio_read_array = NULL; /*!< Reads */
+static os_aio_array_t* os_aio_write_array = NULL; /*!< Writes */
+static os_aio_array_t* os_aio_ibuf_array = NULL; /*!< Insert buffer */
+static os_aio_array_t* os_aio_log_array = NULL; /*!< Redo log */
+static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */
+/* @} */
+/** Number of asynchronous I/O segments. Set by os_aio_init(). */
static ulint os_aio_n_segments = ULINT_UNDEFINED;
-/* If the following is TRUE, read i/o handler threads try to
+/** If the following is TRUE, read i/o handler threads try to
wait until a batch of new read requests have been posted */
static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
+#endif /* UNIV_HOTBACKUP */
-ulint os_n_file_reads = 0;
-ulint os_bytes_read_since_printout = 0;
-ulint os_n_file_writes = 0;
-ulint os_n_fsyncs = 0;
-ulint os_n_file_reads_old = 0;
-ulint os_n_file_writes_old = 0;
-ulint os_n_fsyncs_old = 0;
-time_t os_last_printout;
-
-ibool os_has_said_disk_full = FALSE;
-
-/* The mutex protecting the following counts of pending I/O operations */
-static os_mutex_t os_file_count_mutex;
-ulint os_file_n_pending_preads = 0;
-ulint os_file_n_pending_pwrites = 0;
-ulint os_n_pending_writes = 0;
-ulint os_n_pending_reads = 0;
-
-/***************************************************************************
-Gets the operating system version. Currently works only on Windows. */
-
+UNIV_INTERN ulint os_n_file_reads = 0;
+UNIV_INTERN ulint os_bytes_read_since_printout = 0;
+UNIV_INTERN ulint os_n_file_writes = 0;
+UNIV_INTERN ulint os_n_fsyncs = 0;
+UNIV_INTERN ulint os_n_file_reads_old = 0;
+UNIV_INTERN ulint os_n_file_writes_old = 0;
+UNIV_INTERN ulint os_n_fsyncs_old = 0;
+UNIV_INTERN time_t os_last_printout;
+
+UNIV_INTERN ibool os_has_said_disk_full = FALSE;
+
+#ifndef UNIV_HOTBACKUP
+/** The mutex protecting the following counts of pending I/O operations */
+static os_mutex_t os_file_count_mutex;
+#endif /* !UNIV_HOTBACKUP */
+/** Number of pending os_file_pread() operations */
+UNIV_INTERN ulint os_file_n_pending_preads = 0;
+/** Number of pending os_file_pwrite() operations */
+UNIV_INTERN ulint os_file_n_pending_pwrites = 0;
+/** Number of pending write operations */
+UNIV_INTERN ulint os_n_pending_writes = 0;
+/** Number of pending read operations */
+UNIV_INTERN ulint os_n_pending_reads = 0;
+
+/***********************************************************************//**
+Gets the operating system version. Currently works only on Windows.
+@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
+UNIV_INTERN
ulint
os_get_os_version(void)
/*===================*/
- /* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
{
#ifdef __WIN__
OSVERSIONINFO os_info;
@@ -201,18 +262,17 @@ os_get_os_version(void)
#endif
}
-/***************************************************************************
+/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned. */
-
+the OS error number + 100 is returned.
+@return error number, or OS error number + 100 */
+UNIV_INTERN
ulint
os_file_get_last_error(
/*===================*/
- /* out: error number, or OS error
- number + 100 */
- ibool report_all_errors) /* in: TRUE if we want an error message
+ ibool report_all_errors) /*!< in: TRUE if we want an error message
printed of all errors */
{
ulint err;
@@ -257,6 +317,12 @@ os_file_get_last_error(
" software or another instance\n"
"InnoDB: of MySQL."
" Please close it to get rid of this error.\n");
+ } else if (err == ERROR_WORKING_SET_QUOTA
+ || err == ERROR_NO_SYSTEM_RESOURCES) {
+ fprintf(stderr,
+ "InnoDB: The error means that there are no"
+ " sufficient system resources or quota to"
+ " complete the operation.\n");
} else if (err == ERROR_OPERATION_ABORTED) {
fprintf(stderr,
"InnoDB: The error means that the I/O"
@@ -269,7 +335,7 @@ os_file_get_last_error(
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
+ REFMAN
"operating-system-error-codes.html\n");
}
}
@@ -285,6 +351,9 @@ os_file_get_last_error(
} else if (err == ERROR_SHARING_VIOLATION
|| err == ERROR_LOCK_VIOLATION) {
return(OS_FILE_SHARING_VIOLATION);
+ } else if (err == ERROR_WORKING_SET_QUOTA
+ || err == ERROR_NO_SYSTEM_RESOURCES) {
+ return(OS_FILE_INSUFFICIENT_RESOURCE);
} else if (err == ERROR_OPERATION_ABORTED) {
return(OS_FILE_OPERATION_ABORTED);
} else {
@@ -330,7 +399,7 @@ os_file_get_last_error(
"InnoDB: Some operating system"
" error numbers are described at\n"
"InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
+ REFMAN
"operating-system-error-codes.html\n");
}
}
@@ -339,10 +408,6 @@ os_file_get_last_error(
if (err == ENOSPC) {
return(OS_FILE_DISK_FULL);
-#ifdef POSIX_ASYNC_IO
- } else if (err == EAGAIN) {
- return(OS_FILE_AIO_RESOURCES_RESERVED);
-#endif
} else if (err == ENOENT) {
return(OS_FILE_NOT_FOUND);
} else if (err == EEXIST) {
@@ -355,20 +420,18 @@ os_file_get_last_error(
#endif
}
-/********************************************************************
+/****************************************************************//**
Does error handling when a file operation fails.
Conditionally exits (calling exit(3)) based on should_exit value and the
-error type */
-
+error type
+@return TRUE if we should retry the operation */
static
ibool
os_file_handle_error_cond_exit(
/*===========================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation, /* in: operation */
- ibool should_exit) /* in: call exit(3) if unknown error
+ const char* name, /*!< in: name of a file or NULL */
+ const char* operation, /*!< in: operation */
+ ibool should_exit) /*!< in: call exit(3) if unknown error
and this parameter is TRUE */
{
ulint err;
@@ -411,6 +474,10 @@ os_file_handle_error_cond_exit(
os_thread_sleep(10000000); /* 10 sec */
return(TRUE);
+ } else if (err == OS_FILE_INSUFFICIENT_RESOURCE) {
+
+ os_thread_sleep(100000); /* 100 ms */
+ return(TRUE);
} else if (err == OS_FILE_OPERATION_ABORTED) {
os_thread_sleep(100000); /* 100 ms */
@@ -435,31 +502,29 @@ os_file_handle_error_cond_exit(
return(FALSE);
}
-/********************************************************************
-Does error handling when a file operation fails. */
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return TRUE if we should retry the operation */
static
ibool
os_file_handle_error(
/*=================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation)/* in: operation */
+ const char* name, /*!< in: name of a file or NULL */
+ const char* operation)/*!< in: operation */
{
/* exit in case of unknown error */
return(os_file_handle_error_cond_exit(name, operation, TRUE));
}
-/********************************************************************
-Does error handling when a file operation fails. */
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return TRUE if we should retry the operation */
static
ibool
os_file_handle_error_no_exit(
/*=========================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation)/* in: operation */
+ const char* name, /*!< in: name of a file or NULL */
+ const char* operation)/*!< in: operation */
{
/* don't exit in case of unknown error */
return(os_file_handle_error_cond_exit(name, operation, FALSE));
@@ -474,15 +539,15 @@ os_file_handle_error_no_exit(
# undef USE_FILE_LOCK
#endif
#ifdef USE_FILE_LOCK
-/********************************************************************
-Obtain an exclusive lock on a file. */
+/****************************************************************//**
+Obtain an exclusive lock on a file.
+@return 0 on success */
static
int
os_file_lock(
/*=========*/
- /* out: 0 on success */
- int fd, /* in: file descriptor */
- const char* name) /* in: file name */
+ int fd, /*!< in: file descriptor */
+ const char* name) /*!< in: file name */
{
struct flock lk;
lk.l_type = F_WRLCK;
@@ -507,9 +572,10 @@ os_file_lock(
}
#endif /* USE_FILE_LOCK */
-/********************************************************************
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
Creates the seek mutexes used in positioned reads and writes. */
-
+UNIV_INTERN
void
os_io_init_simple(void)
/*===================*/
@@ -523,74 +589,57 @@ os_io_init_simple(void)
}
}
-#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
-/*************************************************************************
-Creates a temporary file that will be deleted on close.
-This function is defined in ha_innodb.cc. */
-
-int
-innobase_mysql_tmpfile(void);
-/*========================*/
- /* out: temporary file descriptor, or < 0 on error */
-#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
-
-/***************************************************************************
+/***********************************************************************//**
Creates a temporary file. This function is like tmpfile(3), but
the temporary file is created in the MySQL temporary directory.
On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag. */
-
+library of Netware does not expose the delete-on-close flag.
+@return temporary file handle, or NULL on error */
+UNIV_INTERN
FILE*
os_file_create_tmpfile(void)
/*========================*/
- /* out: temporary file handle, or NULL on error */
{
-#ifdef UNIV_HOTBACKUP
- ut_error;
-
- return(NULL);
-#else
-# ifdef __NETWARE__
+#ifdef __NETWARE__
FILE* file = tmpfile();
-# else /* __NETWARE__ */
+#else /* __NETWARE__ */
FILE* file = NULL;
int fd = innobase_mysql_tmpfile();
if (fd >= 0) {
file = fdopen(fd, "w+b");
}
-# endif /* __NETWARE__ */
+#endif /* __NETWARE__ */
if (!file) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: unable to create temporary file;"
" errno: %d\n", errno);
-# ifndef __NETWARE__
+#ifndef __NETWARE__
if (fd >= 0) {
close(fd);
}
-# endif /* !__NETWARE__ */
+#endif /* !__NETWARE__ */
}
return(file);
-#endif /* UNIV_HOTBACKUP */
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************************
+/***********************************************************************//**
The os_file_opendir() function opens a directory stream corresponding to the
directory named by the dirname argument. The directory stream is positioned
at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing. */
-
+and '..' items at the start of the directory listing.
+@return directory stream, NULL if error */
+UNIV_INTERN
os_file_dir_t
os_file_opendir(
/*============*/
- /* out: directory stream, NULL if
- error */
- const char* dirname, /* in: directory name; it must not
+ const char* dirname, /*!< in: directory name; it must not
contain a trailing '\' or '/' */
- ibool error_is_fatal) /* in: TRUE if we should treat an
+ ibool error_is_fatal) /*!< in: TRUE if we should treat an
error as a fatal error; if we try to
open symlinks then we do not wish a
fatal error if it happens not to be
@@ -637,14 +686,14 @@ os_file_opendir(
#endif
}
-/***************************************************************************
-Closes a directory stream. */
-
+/***********************************************************************//**
+Closes a directory stream.
+@return 0 if success, -1 if failure */
+UNIV_INTERN
int
os_file_closedir(
/*=============*/
- /* out: 0 if success, -1 if failure */
- os_file_dir_t dir) /* in: directory stream */
+ os_file_dir_t dir) /*!< in: directory stream */
{
#ifdef __WIN__
BOOL ret;
@@ -671,18 +720,17 @@ os_file_closedir(
#endif
}
-/***************************************************************************
+/***********************************************************************//**
This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory. */
-
+over the '.' and '..' entries in the directory.
+@return 0 if ok, -1 if error, 1 if at the end of the directory */
+UNIV_INTERN
int
os_file_readdir_next_file(
/*======================*/
- /* out: 0 if ok, -1 if error, 1 if at the end
- of the directory */
- const char* dirname,/* in: directory name or path */
- os_file_dir_t dir, /* in: directory stream */
- os_file_stat_t* info) /* in/out: buffer where the info is returned */
+ const char* dirname,/*!< in: directory name or path */
+ os_file_dir_t dir, /*!< in: directory stream */
+ os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
{
#ifdef __WIN__
LPWIN32_FIND_DATA lpFindFileData;
@@ -704,8 +752,8 @@ next_file:
strcpy(info->name, (char *) lpFindFileData->cFileName);
- info->size = (ib_longlong)(lpFindFileData->nFileSizeLow)
- + (((ib_longlong)(lpFindFileData->nFileSizeHigh))
+ info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
+ + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
<< 32);
if (lpFindFileData->dwFileAttributes
@@ -714,8 +762,7 @@ next_file:
/* TODO: MySQL has apparently its own symlink
implementation in Windows, dbname.sym can
redirect a database directory:
- http://dev.mysql.com/doc/refman/5.1/en/
- windows-symbolic-links.html */
+ REFMAN "windows-symbolic-links.html" */
info->type = OS_FILE_TYPE_LINK;
} else if (lpFindFileData->dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY) {
@@ -798,6 +845,23 @@ next_file:
ret = stat(full_path, &statinfo);
if (ret) {
+
+ if (errno == ENOENT) {
+ /* readdir() returned a file that does not exist,
+ it must have been deleted in the meantime. Do what
+ would have happened if the file was deleted before
+ readdir() - ignore and go to the next entry.
+ If this is the last entry then info->name will still
+ contain the name of the deleted file when this
+ function returns, but this is not an issue since the
+ caller shouldn't be looking at info when end of
+ directory is returned. */
+
+ ut_free(full_path);
+
+ goto next_file;
+ }
+
os_file_handle_error_no_exit(full_path, "stat");
ut_free(full_path);
@@ -805,7 +869,7 @@ next_file:
return(-1);
}
- info->size = (ib_longlong)statinfo.st_size;
+ info->size = (ib_int64_t)statinfo.st_size;
if (S_ISDIR(statinfo.st_mode)) {
info->type = OS_FILE_TYPE_DIR;
@@ -823,20 +887,19 @@ next_file:
#endif
}
-/*********************************************************************
+/*****************************************************************//**
This function attempts to create a directory named pathname. The new directory
gets default permissions. On Unix the permissions are (0770 & ~umask). If the
directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true. */
-
+fail_if_exists arguments is true.
+@return TRUE if call succeeds, FALSE on error */
+UNIV_INTERN
ibool
os_file_create_directory(
/*=====================*/
- /* out: TRUE if call succeeds,
- FALSE on error */
- const char* pathname, /* in: directory name as
+ const char* pathname, /*!< in: directory name as
null-terminated string */
- ibool fail_if_exists) /* in: if TRUE, pre-existing directory
+ ibool fail_if_exists) /*!< in: if TRUE, pre-existing directory
is treated as an error. */
{
#ifdef __WIN__
@@ -869,27 +932,26 @@ os_file_create_directory(
#endif
}
-/********************************************************************
-A simple function to open or create a file. */
-
+/****************************************************************//**
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
os_file_t
os_file_create_simple(
/*==================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error), or
OS_FILE_CREATE_PATH if new file
(if exists, error) and subdirectories along
its path are created (if needed)*/
- ulint access_type,/* in: OS_FILE_READ_ONLY or
+ ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
+ ibool* success)/*!< out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t file;
@@ -935,7 +997,7 @@ try_again:
NULL, /* default security attributes */
create_flag,
attributes,
- NULL); /* no template file */
+ NULL); /*!< no template file */
if (file == INVALID_HANDLE_VALUE) {
*success = FALSE;
@@ -1011,26 +1073,25 @@ try_again:
#endif /* __WIN__ */
}
-/********************************************************************
-A simple function to open or create a file. */
-
+/****************************************************************//**
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
os_file_t
os_file_create_simple_no_error_handling(
/*====================================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
is opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error) */
- ulint access_type,/* in: OS_FILE_READ_ONLY,
+ ulint access_type,/*!< in: OS_FILE_READ_ONLY,
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
+ ibool* success)/*!< out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t file;
@@ -1057,7 +1118,7 @@ os_file_create_simple_no_error_handling(
} else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
access = GENERIC_READ;
share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
- | FILE_SHARE_WRITE; /* A backup program has to give
+ | FILE_SHARE_WRITE; /*!< A backup program has to give
mysqld the maximum freedom to
do what it likes with the
file */
@@ -1072,7 +1133,7 @@ os_file_create_simple_no_error_handling(
NULL, /* default security attributes */
create_flag,
attributes,
- NULL); /* no template file */
+ NULL); /*!< no template file */
if (file == INVALID_HANDLE_VALUE) {
*success = FALSE;
@@ -1124,19 +1185,17 @@ os_file_create_simple_no_error_handling(
#endif /* __WIN__ */
}
-/********************************************************************
+/****************************************************************//**
Tries to disable OS caching on an opened file descriptor. */
-
+UNIV_INTERN
void
os_file_set_nocache(
/*================*/
- int fd, /* in: file descriptor to alter */
- const char* file_name, /* in: used in the diagnostic message */
- const char* operation_name) /* in: used in the diagnostic message,
- we call os_file_set_nocache()
- immediately after opening or creating
- a file, so this is either "open" or
- "create" */
+ int fd, /*!< in: file descriptor to alter */
+ const char* file_name, /*!< in: file name, used in the
+ diagnostic message */
+ const char* operation_name) /*!< in: "open" or "create"; used in the
+ diagnostic message */
{
/* some versions of Solaris may not have DIRECTIO_ON */
#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
@@ -1169,18 +1228,17 @@ os_file_set_nocache(
#endif
}
-/********************************************************************
-Opens an existing file or creates a new. */
-
+/****************************************************************//**
+Opens an existing file or creates a new.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
os_file_t
os_file_create(
/*===========*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
is opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error),
@@ -1188,15 +1246,15 @@ os_file_create(
or an old overwritten;
OS_FILE_OPEN_RAW, if a raw device or disk
partition should be opened */
- ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
+ ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
non-buffered i/o is desired,
OS_FILE_NORMAL, if any normal file;
NOTE that it also depends on type, os_aio_..
and srv_.. variables whether we really use
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
- ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
+ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
+ ibool* success)/*!< out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t file;
@@ -1232,6 +1290,7 @@ try_again:
}
#endif
#ifdef UNIV_NON_BUFFERED_IO
+# ifndef UNIV_HOTBACKUP
if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
/* Do not use unbuffered i/o to log files because
value 2 denotes that we do not flush the log at every
@@ -1240,10 +1299,14 @@ try_again:
== SRV_WIN_IO_UNBUFFERED) {
attributes = attributes | FILE_FLAG_NO_BUFFERING;
}
-#endif
+# else /* !UNIV_HOTBACKUP */
+ attributes = attributes | FILE_FLAG_NO_BUFFERING;
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_NON_BUFFERED_IO */
} else if (purpose == OS_FILE_NORMAL) {
attributes = 0;
#ifdef UNIV_NON_BUFFERED_IO
+# ifndef UNIV_HOTBACKUP
if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
/* Do not use unbuffered i/o to log files because
value 2 denotes that we do not flush the log at every
@@ -1252,7 +1315,10 @@ try_again:
== SRV_WIN_IO_UNBUFFERED) {
attributes = attributes | FILE_FLAG_NO_BUFFERING;
}
-#endif
+# else /* !UNIV_HOTBACKUP */
+ attributes = attributes | FILE_FLAG_NO_BUFFERING;
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_NON_BUFFERED_IO */
} else {
attributes = 0;
ut_error;
@@ -1275,7 +1341,7 @@ try_again:
NULL, /* default security attributes */
create_flag,
attributes,
- NULL); /* no template file */
+ NULL); /*!< no template file */
if (file == INVALID_HANDLE_VALUE) {
*success = FALSE;
@@ -1428,14 +1494,14 @@ try_again:
#endif /* __WIN__ */
}
-/***************************************************************************
-Deletes a file if it exists. The file has to be closed before calling this. */
-
+/***********************************************************************//**
+Deletes a file if it exists. The file has to be closed before calling this.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_delete_if_exists(
/*=====================*/
- /* out: TRUE if success */
- const char* name) /* in: file path as a null-terminated string */
+ const char* name) /*!< in: file path as a null-terminated string */
{
#ifdef __WIN__
BOOL ret;
@@ -1478,7 +1544,7 @@ loop:
#else
int ret;
- ret = unlink((const char*)name);
+ ret = unlink(name);
if (ret != 0 && errno != ENOENT) {
os_file_handle_error_no_exit(name, "delete");
@@ -1490,14 +1556,14 @@ loop:
#endif
}
-/***************************************************************************
-Deletes a file. The file has to be closed before calling this. */
-
+/***********************************************************************//**
+Deletes a file. The file has to be closed before calling this.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_delete(
/*===========*/
- /* out: TRUE if success */
- const char* name) /* in: file path as a null-terminated string */
+ const char* name) /*!< in: file path as a null-terminated string */
{
#ifdef __WIN__
BOOL ret;
@@ -1541,7 +1607,7 @@ loop:
#else
int ret;
- ret = unlink((const char*)name);
+ ret = unlink(name);
if (ret != 0) {
os_file_handle_error_no_exit(name, "delete");
@@ -1553,17 +1619,17 @@ loop:
#endif
}
-/***************************************************************************
+/***********************************************************************//**
Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function. */
-
+file is closed before calling this function.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_rename(
/*===========*/
- /* out: TRUE if success */
- const char* oldpath,/* in: old file path as a null-terminated
+ const char* oldpath,/*!< in: old file path as a null-terminated
string */
- const char* newpath)/* in: new file path */
+ const char* newpath)/*!< in: new file path */
{
#ifdef __WIN__
BOOL ret;
@@ -1580,7 +1646,7 @@ os_file_rename(
#else
int ret;
- ret = rename((const char*)oldpath, (const char*)newpath);
+ ret = rename(oldpath, newpath);
if (ret != 0) {
os_file_handle_error_no_exit(oldpath, "rename");
@@ -1592,15 +1658,15 @@ os_file_rename(
#endif
}
-/***************************************************************************
+/***********************************************************************//**
Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error. */
-
+os_file_get_last_error.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_close(
/*==========*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
+ os_file_t file) /*!< in, own: handle to a file */
{
#ifdef __WIN__
BOOL ret;
@@ -1631,14 +1697,15 @@ os_file_close(
#endif
}
-/***************************************************************************
-Closes a file handle. */
-
+#ifdef UNIV_HOTBACKUP
+/***********************************************************************//**
+Closes a file handle.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_close_no_error_handling(
/*============================*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
+ os_file_t file) /*!< in, own: handle to a file */
{
#ifdef __WIN__
BOOL ret;
@@ -1665,18 +1732,19 @@ os_file_close_no_error_handling(
return(TRUE);
#endif
}
+#endif /* UNIV_HOTBACKUP */
-/***************************************************************************
-Gets a file size. */
-
+/***********************************************************************//**
+Gets a file size.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_get_size(
/*=============*/
- /* out: TRUE if success */
- os_file_t file, /* in: handle to a file */
- ulint* size, /* out: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ ulint* size, /*!< out: least significant 32 bits of file
size */
- ulint* size_high)/* out: most significant 32 bits of size */
+ ulint* size_high)/*!< out: most significant 32 bits of size */
{
#ifdef __WIN__
DWORD high;
@@ -1714,14 +1782,14 @@ os_file_get_size(
#endif
}
-/***************************************************************************
-Gets file size as a 64-bit integer ib_longlong. */
-
-ib_longlong
+/***********************************************************************//**
+Gets file size as a 64-bit integer ib_int64_t.
+@return size in bytes, -1 if error */
+UNIV_INTERN
+ib_int64_t
os_file_get_size_as_iblonglong(
/*===========================*/
- /* out: size in bytes, -1 if error */
- os_file_t file) /* in: handle to a file */
+ os_file_t file) /*!< in: handle to a file */
{
ulint size;
ulint size_high;
@@ -1734,25 +1802,25 @@ os_file_get_size_as_iblonglong(
return(-1);
}
- return((((ib_longlong)size_high) << 32) + (ib_longlong)size);
+ return((((ib_int64_t)size_high) << 32) + (ib_int64_t)size);
}
-/***************************************************************************
-Write the specified number of zeros to a newly created file. */
-
+/***********************************************************************//**
+Write the specified number of zeros to a newly created file.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_set_size(
/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- os_file_t file, /* in: handle to a file */
- ulint size, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ ulint size, /*!< in: least significant 32 bits of file
size */
- ulint size_high)/* in: most significant 32 bits of size */
+ ulint size_high)/*!< in: most significant 32 bits of size */
{
- ib_longlong current_size;
- ib_longlong desired_size;
+ ib_int64_t current_size;
+ ib_int64_t desired_size;
ibool ret;
byte* buf;
byte* buf2;
@@ -1761,7 +1829,7 @@ os_file_set_size(
ut_a(size == (size & 0xFFFFFFFF));
current_size = 0;
- desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32);
+ desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
@@ -1774,7 +1842,7 @@ os_file_set_size(
/* Write buffer full of zeros */
memset(buf, 0, buf_size);
- if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
+ if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, "InnoDB: Progress in MB:");
}
@@ -1782,7 +1850,7 @@ os_file_set_size(
while (current_size < desired_size) {
ulint n_bytes;
- if (desired_size - current_size < (ib_longlong) buf_size) {
+ if (desired_size - current_size < (ib_int64_t) buf_size) {
n_bytes = (ulint) (desired_size - current_size);
} else {
n_bytes = buf_size;
@@ -1798,18 +1866,18 @@ os_file_set_size(
}
/* Print about progress for each 100 MB written */
- if ((ib_longlong) (current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024)
- != current_size / (ib_longlong)(100 * 1024 * 1024)) {
+ if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
+ != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, " %lu00",
(ulong) ((current_size + n_bytes)
- / (ib_longlong)(100 * 1024 * 1024)));
+ / (ib_int64_t)(100 * 1024 * 1024)));
}
current_size += n_bytes;
}
- if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
+ if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
fprintf(stderr, "\n");
}
@@ -1826,14 +1894,14 @@ error_handling:
return(FALSE);
}
-/***************************************************************************
-Truncates a file at its current position. */
-
+/***********************************************************************//**
+Truncates a file at its current position.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_set_eof(
/*============*/
- /* out: TRUE if success */
- FILE* file) /* in: file to be truncated */
+ FILE* file) /*!< in: file to be truncated */
{
#ifdef __WIN__
HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
@@ -1844,17 +1912,17 @@ os_file_set_eof(
}
#ifndef __WIN__
-/***************************************************************************
+/***********************************************************************//**
Wrapper to fsync(2) that retries the call on some errors.
Returns the value 0 if successful; otherwise the value -1 is returned and
-the global variable errno is set to indicate the error. */
+the global variable errno is set to indicate the error.
+@return 0 if success, -1 otherwise */
static
int
os_file_fsync(
/*==========*/
- /* out: 0 if success, -1 otherwise */
- os_file_t file) /* in: handle to a file */
+ os_file_t file) /*!< in: handle to a file */
{
int ret;
int failures;
@@ -1892,14 +1960,14 @@ os_file_fsync(
}
#endif /* !__WIN__ */
-/***************************************************************************
-Flushes the write buffers of a given file to the disk. */
-
+/***********************************************************************//**
+Flushes the write buffers of a given file to the disk.
+@return TRUE if success */
+UNIV_INTERN
ibool
os_file_flush(
/*==========*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
+ os_file_t file) /*!< in, own: handle to a file */
{
#ifdef __WIN__
BOOL ret;
@@ -1991,23 +2059,25 @@ os_file_flush(
}
#ifndef __WIN__
-/***********************************************************************
-Does a synchronous read operation in Posix. */
+/*******************************************************************//**
+Does a synchronous read operation in Posix.
+@return number of bytes read, -1 if error */
static
ssize_t
os_file_pread(
/*==========*/
- /* out: number of bytes read, -1 if error */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint n, /* in: number of bytes to read */
- ulint offset, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint n, /*!< in: number of bytes to read */
+ ulint offset, /*!< in: least significant 32 bits of file
offset from where to read */
- ulint offset_high) /* in: most significant 32 bits of
+ ulint offset_high) /*!< in: most significant 32 bits of
offset */
{
off_t offs;
+#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
ssize_t n_bytes;
+#endif /* HAVE_PREAD && !HAVE_BROKEN_PREAD */
ut_a((offset & 0xFFFFFFFFUL) == offset);
@@ -2046,16 +2116,20 @@ os_file_pread(
{
off_t ret_offset;
ssize_t ret;
+#ifndef UNIV_HOTBACKUP
ulint i;
+#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
+#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
ret_offset = lseek(file, offs, SEEK_SET);
@@ -2065,7 +2139,9 @@ os_file_pread(
ret = read(file, buf, (ssize_t)n);
}
+#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
@@ -2076,19 +2152,19 @@ os_file_pread(
#endif
}
-/***********************************************************************
-Does a synchronous write operation in Posix. */
+/*******************************************************************//**
+Does a synchronous write operation in Posix.
+@return number of bytes written, -1 if error */
static
ssize_t
os_file_pwrite(
/*===========*/
- /* out: number of bytes written, -1 if error */
- os_file_t file, /* in: handle to a file */
- const void* buf, /* in: buffer from where to write */
- ulint n, /* in: number of bytes to write */
- ulint offset, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ const void* buf, /*!< in: buffer from where to write */
+ ulint n, /*!< in: number of bytes to write */
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to write */
- ulint offset_high) /* in: most significant 32 bits of
+ ulint offset_high) /*!< in: most significant 32 bits of
offset */
{
ssize_t ret;
@@ -2143,16 +2219,20 @@ os_file_pwrite(
#else
{
off_t ret_offset;
+# ifndef UNIV_HOTBACKUP
ulint i;
+# endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex);
+# ifndef UNIV_HOTBACKUP
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
+# endif /* UNIV_HOTBACKUP */
ret_offset = lseek(file, offs, SEEK_SET);
@@ -2178,7 +2258,9 @@ os_file_pwrite(
# endif /* UNIV_DO_FLUSH */
func_exit:
+# ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
+# endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
@@ -2190,21 +2272,20 @@ func_exit:
}
#endif
-/***********************************************************************
-Requests a synchronous positioned read operation. */
-
+/*******************************************************************//**
+Requests a synchronous positioned read operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
ibool
os_file_read(
/*=========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to read */
- ulint offset_high, /* in: most significant 32 bits of
+ ulint offset_high, /*!< in: most significant 32 bits of
offset */
- ulint n) /* in: number of bytes to read */
+ ulint n) /*!< in: number of bytes to read */
{
#ifdef __WIN__
BOOL ret;
@@ -2213,7 +2294,9 @@ os_file_read(
DWORD low;
DWORD high;
ibool retry;
+#ifndef UNIV_HOTBACKUP
ulint i;
+#endif /* !UNIV_HOTBACKUP */
ut_a((offset & 0xFFFFFFFFUL) == offset);
@@ -2232,16 +2315,20 @@ try_again:
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
+#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
@@ -2252,7 +2339,9 @@ try_again:
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
+#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
@@ -2261,7 +2350,7 @@ try_again:
if (ret && len == n) {
return(TRUE);
}
-#else
+#else /* __WIN__ */
ibool retry;
ssize_t ret;
@@ -2280,7 +2369,7 @@ try_again:
"InnoDB: Was only able to read %ld.\n",
(ulong)n, (ulong)offset_high,
(ulong)offset, (long)ret);
-#endif
+#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
@@ -2306,22 +2395,21 @@ error_handling:
return(FALSE);
}
-/***********************************************************************
+/*******************************************************************//**
Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE. */
-
+any error handling. In case of error it returns FALSE.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
ibool
os_file_read_no_error_handling(
/*===========================*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to read */
- ulint offset_high, /* in: most significant 32 bits of
+ ulint offset_high, /*!< in: most significant 32 bits of
offset */
- ulint n) /* in: number of bytes to read */
+ ulint n) /*!< in: number of bytes to read */
{
#ifdef __WIN__
BOOL ret;
@@ -2330,7 +2418,9 @@ os_file_read_no_error_handling(
DWORD low;
DWORD high;
ibool retry;
+#ifndef UNIV_HOTBACKUP
ulint i;
+#endif /* !UNIV_HOTBACKUP */
ut_a((offset & 0xFFFFFFFFUL) == offset);
@@ -2349,16 +2439,20 @@ try_again:
os_n_pending_reads++;
os_mutex_exit(os_file_count_mutex);
+#ifndef UNIV_HOTBACKUP
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
@@ -2369,7 +2463,9 @@ try_again:
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
+#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_reads--;
@@ -2378,7 +2474,7 @@ try_again:
if (ret && len == n) {
return(TRUE);
}
-#else
+#else /* __WIN__ */
ibool retry;
ssize_t ret;
@@ -2391,7 +2487,7 @@ try_again:
return(TRUE);
}
-#endif
+#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
#endif
@@ -2404,17 +2500,17 @@ error_handling:
return(FALSE);
}
-/***********************************************************************
+/*******************************************************************//**
Rewind file to its start, read at most size - 1 bytes from it to str, and
NUL-terminate str. All errors are silently ignored. This function is
mostly meant to be used with temporary files. */
-
+UNIV_INTERN
void
os_file_read_string(
/*================*/
- FILE* file, /* in: file to read from */
- char* str, /* in: buffer where to read */
- ulint size) /* in: size of buffer */
+ FILE* file, /*!< in: file to read from */
+ char* str, /*!< in: buffer where to read */
+ ulint size) /*!< in: size of buffer */
{
size_t flen;
@@ -2427,23 +2523,22 @@ os_file_read_string(
str[flen] = '\0';
}
-/***********************************************************************
-Requests a synchronous write operation. */
-
+/*******************************************************************//**
+Requests a synchronous write operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
ibool
os_file_write(
/*==========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- os_file_t file, /* in: handle to a file */
- const void* buf, /* in: buffer from which to write */
- ulint offset, /* in: least significant 32 bits of file
+ os_file_t file, /*!< in: handle to a file */
+ const void* buf, /*!< in: buffer from which to write */
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to write */
- ulint offset_high, /* in: most significant 32 bits of
+ ulint offset_high, /*!< in: most significant 32 bits of
offset */
- ulint n) /* in: number of bytes to write */
+ ulint n) /*!< in: number of bytes to write */
{
#ifdef __WIN__
BOOL ret;
@@ -2451,9 +2546,11 @@ os_file_write(
DWORD ret2;
DWORD low;
DWORD high;
- ulint i;
ulint n_retries = 0;
ulint err;
+#ifndef UNIV_HOTBACKUP
+ ulint i;
+#endif /* !UNIV_HOTBACKUP */
ut_a((offset & 0xFFFFFFFF) == offset);
@@ -2470,16 +2567,20 @@ retry:
os_n_pending_writes++;
os_mutex_exit(os_file_count_mutex);
+#ifndef UNIV_HOTBACKUP
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
os_mutex_enter(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
@@ -2495,8 +2596,7 @@ retry:
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n",
+ REFMAN "operating-system-error-codes.html\n",
name, (ulong) offset_high, (ulong) offset,
(ulong) GetLastError());
@@ -2514,7 +2614,9 @@ retry:
}
# endif /* UNIV_DO_FLUSH */
+#ifndef UNIV_HOTBACKUP
os_mutex_exit(os_file_seek_mutexes[i]);
+#endif /* !UNIV_HOTBACKUP */
os_mutex_enter(os_file_count_mutex);
os_n_pending_writes--;
@@ -2567,8 +2669,7 @@ retry:
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
+ REFMAN "operating-system-error-codes.html\n");
os_has_said_disk_full = TRUE;
}
@@ -2610,8 +2711,7 @@ retry:
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
+ REFMAN "operating-system-error-codes.html\n");
os_has_said_disk_full = TRUE;
}
@@ -2620,16 +2720,16 @@ retry:
#endif
}
-/***********************************************************************
-Check the existence and type of the given file. */
-
+/*******************************************************************//**
+Check the existence and type of the given file.
+@return TRUE if call succeeded */
+UNIV_INTERN
ibool
os_file_status(
/*===========*/
- /* out: TRUE if call succeeded */
- const char* path, /* in: pathname of the file */
- ibool* exists, /* out: TRUE if file exists */
- os_file_type_t* type) /* out: type of the file (if it exists) */
+ const char* path, /*!< in: pathname of the file */
+ ibool* exists, /*!< out: TRUE if file exists */
+ os_file_type_t* type) /*!< out: type of the file (if it exists) */
{
#ifdef __WIN__
int ret;
@@ -2692,16 +2792,15 @@ os_file_status(
#endif
}
-/***********************************************************************
-This function returns information about the specified file */
-
+/*******************************************************************//**
+This function returns information about the specified file
+@return TRUE if stat information found */
+UNIV_INTERN
ibool
os_file_get_status(
/*===============*/
- /* out: TRUE if stat
- information found */
- const char* path, /* in: pathname of the file */
- os_file_stat_t* stat_info) /* information of a file in a
+ const char* path, /*!< in: pathname of the file */
+ os_file_stat_t* stat_info) /*!< information of a file in a
directory */
{
#ifdef __WIN__
@@ -2778,7 +2877,7 @@ os_file_get_status(
# define OS_FILE_PATH_SEPARATOR '/'
#endif
-/********************************************************************
+/****************************************************************//**
The function os_file_dirname returns a directory component of a
null-terminated pathname string. In the usual case, dirname returns
the string up to, but not including, the final '/', and basename
@@ -2804,14 +2903,13 @@ returned by dirname and basename for different paths:
"/" "/" "/"
"." "." "."
".." "." ".."
-*/
+@return own: directory component of the pathname */
+UNIV_INTERN
char*
os_file_dirname(
/*============*/
- /* out, own: directory component of the
- pathname */
- const char* path) /* in: pathname */
+ const char* path) /*!< in: pathname */
{
/* Find the offset of the last slash */
const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
@@ -2834,15 +2932,14 @@ os_file_dirname(
return(mem_strdupl(path, last_slash - path));
}
-/********************************************************************
-Creates all missing subdirectories along the given path. */
-
+/****************************************************************//**
+Creates all missing subdirectories along the given path.
+@return TRUE if call succeeded FALSE otherwise */
+UNIV_INTERN
ibool
os_file_create_subdirs_if_needed(
/*=============================*/
- /* out: TRUE if call succeeded
- FALSE otherwise */
- const char* path) /* in: path name */
+ const char* path) /*!< in: path name */
{
char* subdir;
ibool success, subdir_exists;
@@ -2875,31 +2972,32 @@ os_file_create_subdirs_if_needed(
return(success);
}
-/********************************************************************
-Returns a pointer to the nth slot in the aio array. */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Returns a pointer to the nth slot in the aio array.
+@return pointer to slot */
static
os_aio_slot_t*
os_aio_array_get_nth_slot(
/*======================*/
- /* out: pointer to slot */
- os_aio_array_t* array, /* in: aio array */
- ulint index) /* in: index of the slot */
+ os_aio_array_t* array, /*!< in: aio array */
+ ulint index) /*!< in: index of the slot */
{
ut_a(index < array->n_slots);
return((array->slots) + index);
}
-/****************************************************************************
-Creates an aio wait array. */
+/************************************************************************//**
+Creates an aio wait array.
+@return own: aio array */
static
os_aio_array_t*
os_aio_array_create(
/*================*/
- /* out, own: aio array */
- ulint n, /* in: maximum number of pending aio operations
+ ulint n, /*!< in: maximum number of pending aio operations
allowed; n must be divisible by n_segments */
- ulint n_segments) /* in: number of segments in the aio array */
+ ulint n_segments) /*!< in: number of segments in the aio array */
{
os_aio_array_t* array;
ulint i;
@@ -2944,33 +3042,55 @@ os_aio_array_create(
return(array);
}
-/****************************************************************************
-Initializes the asynchronous io system. Calls also os_io_init_simple.
-Creates a separate aio array for
-non-ibuf read and write, a third aio array for the ibuf i/o, with just one
-segment, two aio arrays for log reads and writes with one segment, and a
-synchronous aio array of the specified size. The combined number of segments
-in the three first aio arrays is the parameter n_segments given to the
-function. The caller must create an i/o handler thread for each segment in
-the four first arrays, but not for the sync aio array. */
+/************************************************************************//**
+Frees an aio wait array. */
+static
+void
+os_aio_array_free(
+/*==============*/
+ os_aio_array_t* array) /*!< in, own: array to free */
+{
+#ifdef WIN_ASYNC_IO
+ ulint i;
+
+ for (i = 0; i < array->n_slots; i++) {
+ os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
+ os_event_free(slot->event);
+ }
+#endif /* WIN_ASYNC_IO */
+
+#ifdef __WIN__
+ ut_free(array->native_events);
+#endif /* __WIN__ */
+ os_mutex_free(array->mutex);
+ os_event_free(array->not_full);
+ os_event_free(array->is_empty);
+ ut_free(array->slots);
+ ut_free(array);
+}
+
+/***********************************************************************
+Initializes the asynchronous io system. Creates one array each for ibuf
+and log i/o. Also creates one array each for read and write where each
+array is divided logically into n_read_segs and n_write_segs
+respectively. The caller must create an i/o handler thread for each
+segment in these arrays. This function also creates the sync array.
+No i/o handler thread needs to be created for that */
+UNIV_INTERN
void
os_aio_init(
/*========*/
- ulint n, /* in: maximum number of pending aio operations
- allowed; n must be divisible by n_segments */
- ulint n_segments, /* in: combined number of segments in the four
- first aio arrays; must be >= 4 */
- ulint n_slots_sync) /* in: number of slots in the sync aio array */
+ ulint n_per_seg, /*<! in: maximum number of pending aio
+ operations allowed per segment */
+ ulint n_read_segs, /*<! in: number of reader threads */
+ ulint n_write_segs, /*<! in: number of writer threads */
+ ulint n_slots_sync) /*<! in: number of slots in the sync aio
+ array */
{
- ulint n_read_segs;
- ulint n_write_segs;
- ulint n_per_seg;
ulint i;
-#ifdef POSIX_ASYNC_IO
- sigset_t sigset;
-#endif
- ut_ad(n % n_segments == 0);
+ ulint n_segments = 2 + n_read_segs + n_write_segs;
+
ut_ad(n_segments >= 4);
os_io_init_simple();
@@ -2979,9 +3099,6 @@ os_aio_init(
srv_set_io_thread_op_info(i, "not started yet");
}
- n_per_seg = n / n_segments;
- n_write_segs = (n_segments - 2) / 2;
- n_read_segs = n_segments - 2 - n_write_segs;
/* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
@@ -3021,33 +3138,46 @@ os_aio_init(
os_last_printout = time(NULL);
-#ifdef POSIX_ASYNC_IO
- /* Block aio signals from the current thread and its children:
- for this to work, the current thread must be the first created
- in the database, so that all its children will inherit its
- signal mask */
-
- /* TODO: to work MySQL needs the SIGALARM signal; the following
- will not work yet! */
- sigemptyset(&sigset);
- sigaddset(&sigset, SIGRTMIN + 1 + 0);
- sigaddset(&sigset, SIGRTMIN + 1 + 1);
- sigaddset(&sigset, SIGRTMIN + 1 + 2);
- sigaddset(&sigset, SIGRTMIN + 1 + 3);
-
- pthread_sigmask(SIG_BLOCK, &sigset, NULL); */
-#endif
- }
+}
+
+/***********************************************************************
+Frees the asynchronous io system. */
+UNIV_INTERN
+void
+os_aio_free(void)
+/*=============*/
+{
+ ulint i;
+
+ os_aio_array_free(os_aio_ibuf_array);
+ os_aio_ibuf_array = NULL;
+ os_aio_array_free(os_aio_log_array);
+ os_aio_log_array = NULL;
+ os_aio_array_free(os_aio_read_array);
+ os_aio_read_array = NULL;
+ os_aio_array_free(os_aio_write_array);
+ os_aio_write_array = NULL;
+ os_aio_array_free(os_aio_sync_array);
+ os_aio_sync_array = NULL;
+
+ for (i = 0; i < os_aio_n_segments; i++) {
+ os_event_free(os_aio_segment_wait_events[i]);
+ }
+
+ ut_free(os_aio_segment_wait_events);
+ os_aio_segment_wait_events = 0;
+ os_aio_n_segments = 0;
+}
#ifdef WIN_ASYNC_IO
-/****************************************************************************
+/************************************************************************//**
Wakes up all async i/o threads in the array in Windows async i/o at
shutdown. */
static
void
os_aio_array_wake_win_aio_at_shutdown(
/*==================================*/
- os_aio_array_t* array) /* in: aio array */
+ os_aio_array_t* array) /*!< in: aio array */
{
ulint i;
@@ -3058,10 +3188,10 @@ os_aio_array_wake_win_aio_at_shutdown(
}
#endif
-/****************************************************************************
+/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
-
+UNIV_INTERN
void
os_aio_wake_all_threads_at_shutdown(void)
/*=====================================*/
@@ -3083,10 +3213,10 @@ os_aio_wake_all_threads_at_shutdown(void)
}
}
-/****************************************************************************
+/************************************************************************//**
Waits until there are no pending writes in os_aio_write_array. There can
be other, synchronous, pending writes. */
-
+UNIV_INTERN
void
os_aio_wait_until_no_pending_writes(void)
/*=====================================*/
@@ -3094,16 +3224,16 @@ os_aio_wait_until_no_pending_writes(void)
os_event_wait(os_aio_write_array->is_empty);
}
-/**************************************************************************
-Calculates segment number for a slot. */
+/**********************************************************************//**
+Calculates segment number for a slot.
+@return segment number (which is the number used by, for example,
+i/o-handler threads) */
static
ulint
os_aio_get_segment_no_from_slot(
/*============================*/
- /* out: segment number (which is the number
- used by, for example, i/o-handler threads) */
- os_aio_array_t* array, /* in: aio wait array */
- os_aio_slot_t* slot) /* in: slot in this array */
+ os_aio_array_t* array, /*!< in: aio wait array */
+ os_aio_slot_t* slot) /*!< in: slot in this array */
{
ulint segment;
ulint seg_len;
@@ -3131,16 +3261,15 @@ os_aio_get_segment_no_from_slot(
return(segment);
}
-/**************************************************************************
-Calculates local segment number and aio array from global segment number. */
+/**********************************************************************//**
+Calculates local segment number and aio array from global segment number.
+@return local segment number within the aio array */
static
ulint
os_aio_get_array_and_local_segment(
/*===============================*/
- /* out: local segment number within
- the aio array */
- os_aio_array_t** array, /* out: aio wait array */
- ulint global_segment)/* in: global segment number */
+ os_aio_array_t** array, /*!< out: aio wait array */
+ ulint global_segment)/*!< in: global segment number */
{
ulint segment;
@@ -3167,100 +3296,48 @@ os_aio_get_array_and_local_segment(
return(segment);
}
-/***********************************************************************
-Gets an integer value designating a specified aio array. This is used
-to give numbers to signals in Posix aio. */
-
-#if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO)
-static
-ulint
-os_aio_get_array_no(
-/*================*/
- os_aio_array_t* array) /* in: aio array */
-{
- if (array == os_aio_ibuf_array) {
-
- return(0);
-
- } else if (array == os_aio_log_array) {
-
- return(1);
-
- } else if (array == os_aio_read_array) {
-
- return(2);
- } else if (array == os_aio_write_array) {
-
- return(3);
- } else {
- ut_error;
-
- return(0);
- }
-}
-
-/***********************************************************************
-Gets the aio array for its number. */
-static
-os_aio_array_t*
-os_aio_get_array_from_no(
-/*=====================*/
- /* out: aio array */
- ulint n) /* in: array number */
-{
- if (n == 0) {
- return(os_aio_ibuf_array);
- } else if (n == 1) {
-
- return(os_aio_log_array);
- } else if (n == 2) {
-
- return(os_aio_read_array);
- } else if (n == 3) {
-
- return(os_aio_write_array);
- } else {
- ut_error;
-
- return(NULL);
- }
-}
-#endif /* if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO) */
-
-/***********************************************************************
+/*******************************************************************//**
Requests for a slot in the aio array. If no slot is available, waits until
-not_full-event becomes signaled. */
+not_full-event becomes signaled.
+@return pointer to slot */
static
os_aio_slot_t*
os_aio_array_reserve_slot(
/*======================*/
- /* out: pointer to slot */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
- os_aio_array_t* array, /* in: aio array */
- fil_node_t* message1,/* in: message to be passed along with
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
+ os_aio_array_t* array, /*!< in: aio array */
+ fil_node_t* message1,/*!< in: message to be passed along with
the aio operation */
- void* message2,/* in: message to be passed along with
+ void* message2,/*!< in: message to be passed along with
the aio operation */
- os_file_t file, /* in: file handle */
- const char* name, /* in: name of the file or path as a
+ os_file_t file, /*!< in: file handle */
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- void* buf, /* in: buffer where to read or from which
+ void* buf, /*!< in: buffer where to read or from which
to write */
- ulint offset, /* in: least significant 32 bits of file
+ ulint offset, /*!< in: least significant 32 bits of file
offset */
- ulint offset_high, /* in: most significant 32 bits of
+ ulint offset_high, /*!< in: most significant 32 bits of
offset */
- ulint len) /* in: length of the block to read or write */
+ ulint len) /*!< in: length of the block to read or write */
{
os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
OVERLAPPED* control;
-
-#elif defined(POSIX_ASYNC_IO)
-
- struct aiocb* control;
#endif
ulint i;
+ ulint slots_per_seg;
+ ulint local_seg;
+
+ /* No need of a mutex. Only reading constant fields */
+ slots_per_seg = array->n_slots / array->n_segments;
+
+ /* We attempt to keep adjacent blocks in the same local
+ segment. This can help in merging IO requests when we are
+ doing simulated AIO */
+ local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
+ % array->n_segments;
+
loop:
os_mutex_enter(array->mutex);
@@ -3279,14 +3356,26 @@ loop:
goto loop;
}
+ /* First try to find a slot in the preferred local segment */
+ for (i = local_seg * slots_per_seg; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved == FALSE) {
+ goto found;
+ }
+ }
+
+ /* Fall back to a full scan. We are guaranteed to find a slot */
for (i = 0;; i++) {
slot = os_aio_array_get_nth_slot(array, i);
if (slot->reserved == FALSE) {
- break;
+ goto found;
}
}
+found:
+ ut_a(slot->reserved == FALSE);
array->n_reserved++;
if (array->n_reserved == 1) {
@@ -3315,43 +3404,21 @@ loop:
control->Offset = (DWORD)offset;
control->OffsetHigh = (DWORD)offset_high;
os_event_reset(slot->event);
-
-#elif defined(POSIX_ASYNC_IO)
-
-#if (UNIV_WORD_SIZE == 8)
- offset = offset + (offset_high << 32);
-#else
- ut_a(offset_high == 0);
-#endif
- control = &(slot->control);
- control->aio_fildes = file;
- control->aio_buf = buf;
- control->aio_nbytes = len;
- control->aio_offset = offset;
- control->aio_reqprio = 0;
- control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
- control->aio_sigevent.sigev_signo
- = SIGRTMIN + 1 + os_aio_get_array_no(array);
- /* TODO: How to choose the signal numbers? */
- /*
- fprintf(stderr, "AIO signal number %lu\n",
- (ulint) control->aio_sigevent.sigev_signo);
- */
- control->aio_sigevent.sigev_value.sival_ptr = slot;
#endif
+
os_mutex_exit(array->mutex);
return(slot);
}
-/***********************************************************************
+/*******************************************************************//**
Frees a slot in the aio array. */
static
void
os_aio_array_free_slot(
/*===================*/
- os_aio_array_t* array, /* in: aio array */
- os_aio_slot_t* slot) /* in: pointer to slot */
+ os_aio_array_t* array, /*!< in: aio array */
+ os_aio_slot_t* slot) /*!< in: pointer to slot */
{
ut_ad(array);
ut_ad(slot);
@@ -3378,13 +3445,13 @@ os_aio_array_free_slot(
os_mutex_exit(array->mutex);
}
-/**************************************************************************
+/**********************************************************************//**
Wakes up a simulated aio i/o-handler thread if it has something to do. */
static
void
os_aio_simulated_wake_handler_thread(
/*=================================*/
- ulint global_segment) /* in: the number of the segment in the aio
+ ulint global_segment) /*!< in: the number of the segment in the aio
arrays */
{
os_aio_array_t* array;
@@ -3420,9 +3487,9 @@ os_aio_simulated_wake_handler_thread(
}
}
-/**************************************************************************
+/**********************************************************************//**
Wakes up simulated aio i/o-handler threads if they have something to do. */
-
+UNIV_INTERN
void
os_aio_simulated_wake_handler_threads(void)
/*=======================================*/
@@ -3442,19 +3509,31 @@ os_aio_simulated_wake_handler_threads(void)
}
}
-/**************************************************************************
+/**********************************************************************//**
This function can be called if one wants to post a batch of reads and
prefers an i/o-handler thread to handle them all at once later. You must
call os_aio_simulated_wake_handler_threads later to ensure the threads
are not left sleeping! */
-
+UNIV_INTERN
void
os_aio_simulated_put_read_threads_to_sleep(void)
/*============================================*/
{
+
+/* The idea of putting background IO threads to sleep is only for
+Windows when using simulated AIO. Windows XP seems to schedule
+background threads too eagerly to allow for coalescing during
+readahead requests. */
+#ifdef __WIN__
os_aio_array_t* array;
ulint g;
+ if (os_aio_use_native_aio) {
+ /* We do not use simulated aio: do nothing */
+
+ return;
+ }
+
os_aio_recommend_sleep_for_read_threads = TRUE;
for (g = 0; g < os_aio_n_segments; g++) {
@@ -3465,18 +3544,18 @@ os_aio_simulated_put_read_threads_to_sleep(void)
os_event_reset(os_aio_segment_wait_events[g]);
}
}
+#endif /* __WIN__ */
}
-/***********************************************************************
-Requests an asynchronous i/o operation. */
-
+/*******************************************************************//**
+Requests an asynchronous i/o operation.
+@return TRUE if request was queued successfully, FALSE if fail */
+UNIV_INTERN
ibool
os_aio(
/*===*/
- /* out: TRUE if request was queued
- successfully, FALSE if fail */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
- ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the
last flag advises this function not to wake
i/o-handler threads, but the caller will
@@ -3489,21 +3568,24 @@ os_aio(
because i/os are not actually handled until
all have been posted: use with great
caution! */
- const char* name, /* in: name of the file or path as a
+ const char* name, /*!< in: name of the file or path as a
null-terminated string */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read or from which
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read or from which
to write */
- ulint offset, /* in: least significant 32 bits of file
+ ulint offset, /*!< in: least significant 32 bits of file
offset where to read or write */
- ulint offset_high, /* in: most significant 32 bits of
+ ulint offset_high, /*!< in: most significant 32 bits of
offset */
- ulint n, /* in: number of bytes to read or write */
- fil_node_t* message1,/* in: messages for the aio handler (these
- can be used to identify a completed aio
- operation); if mode is OS_AIO_SYNC, these
- are ignored */
- void* message2)
+ ulint n, /*!< in: number of bytes to read or write */
+ fil_node_t* message1,/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+ void* message2)/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
{
os_aio_array_t* array;
os_aio_slot_t* slot;
@@ -3586,10 +3668,6 @@ try_again:
ret = ReadFile(file, buf, (DWORD)n, &len,
&(slot->control));
-#elif defined(POSIX_ASYNC_IO)
- slot->control.aio_lio_opcode = LIO_READ;
- err = (ulint) aio_read(&(slot->control));
- fprintf(stderr, "Starting POSIX aio read %lu\n", err);
#endif
} else {
if (!wake_later) {
@@ -3604,10 +3682,6 @@ try_again:
os_n_file_writes++;
ret = WriteFile(file, buf, (DWORD)n, &len,
&(slot->control));
-#elif defined(POSIX_ASYNC_IO)
- slot->control.aio_lio_opcode = LIO_WRITE;
- err = (ulint) aio_write(&(slot->control));
- fprintf(stderr, "Starting POSIX aio write %lu\n", err);
#endif
} else {
if (!wake_later) {
@@ -3667,19 +3741,19 @@ try_again:
}
#ifdef WIN_ASYNC_IO
-/**************************************************************************
+/**********************************************************************//**
This function is only used in Windows asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
for completed requests. The aio array of pending requests is divided
into segments. The thread specifies which segment or slot it wants to wait
for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing! */
-
+therefore no other thread is allowed to do the freeing!
+@return TRUE if the aio operation succeeded */
+UNIV_INTERN
ibool
os_aio_windows_handle(
/*==================*/
- /* out: TRUE if the aio operation succeeded */
- ulint segment, /* in: the number of the segment in the aio
+ ulint segment, /*!< in: the number of the segment in the aio
arrays to wait for; segment 0 is the ibuf
i/o thread, segment 1 the log i/o thread,
then follow the non-ibuf read threads, and as
@@ -3687,15 +3761,15 @@ os_aio_windows_handle(
this is ULINT_UNDEFINED, then it means that
sync aio is used, and this parameter is
ignored */
- ulint pos, /* this parameter is used only in sync aio:
+ ulint pos, /*!< this parameter is used only in sync aio:
wait for the aio slot at this position */
- fil_node_t**message1, /* out: the messages passed with the aio
+ fil_node_t**message1, /*!< out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2,
- ulint* type) /* out: OS_FILE_WRITE or ..._READ */
+ ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
{
ulint orig_seg = segment;
os_aio_array_t* array;
@@ -3753,12 +3827,12 @@ os_aio_windows_handle(
if (ret && len == slot->len) {
ret_val = TRUE;
-# ifdef UNIV_DO_FLUSH
+#ifdef UNIV_DO_FLUSH
if (slot->type == OS_FILE_WRITE
&& !os_do_not_call_flush_at_each_write) {
ut_a(TRUE == os_file_flush(slot->file));
}
-# endif /* UNIV_DO_FLUSH */
+#endif /* UNIV_DO_FLUSH */
} else if (os_file_handle_error(slot->name, "Windows aio")) {
retry = TRUE;
@@ -3811,143 +3885,26 @@ os_aio_windows_handle(
}
#endif
-#ifdef POSIX_ASYNC_IO
-
-/**************************************************************************
-This function is only used in Posix asynchronous i/o. Waits for an aio
-operation to complete. */
-
-ibool
-os_aio_posix_handle(
-/*================*/
- /* out: TRUE if the aio operation succeeded */
- ulint array_no, /* in: array number 0 - 3 */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2)
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- siginfo_t info;
- sigset_t sigset;
- sigset_t proc_sigset;
- sigset_t thr_sigset;
- int ret;
- int i;
- int sig;
-
- sigemptyset(&sigset);
- sigaddset(&sigset, SIGRTMIN + 1 + array_no);
-
- pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
-
-#if 0
- sigprocmask(0, NULL, &proc_sigset);
- pthread_sigmask(0, NULL, &thr_sigset);
-
- for (i = 32 ; i < 40; i++) {
- fprintf(stderr, "%lu : %lu %lu\n", (ulint)i,
- (ulint) sigismember(&proc_sigset, i),
- (ulint) sigismember(&thr_sigset, i));
- }
-#endif
-
- ret = sigwaitinfo(&sigset, &info);
-
- if (sig != SIGRTMIN + 1 + array_no) {
-
- ut_error;
-
- return(FALSE);
- }
-
- fputs("Handling POSIX aio\n", stderr);
-
- array = os_aio_get_array_from_no(array_no);
-
- os_mutex_enter(array->mutex);
-
- slot = info.si_value.sival_ptr;
-
- ut_a(slot->reserved);
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
-# ifdef UNIV_DO_FLUSH
- if (slot->type == OS_FILE_WRITE
- && !os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(slot->file));
- }
-# endif /* UNIV_DO_FLUSH */
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
-
- return(TRUE);
-}
-#endif
-
-/**************************************************************************
-Do a 'last millisecond' check that the page end is sensible;
-reported page checksum errors from Linux seem to wipe over the page end. */
-static
-void
-os_file_check_page_trailers(
-/*========================*/
- byte* combined_buf, /* in: combined write buffer */
- ulint total_len) /* in: size of combined_buf, in bytes
- (a multiple of UNIV_PAGE_SIZE) */
-{
- ulint len;
-
- for (len = 0; len + UNIV_PAGE_SIZE <= total_len;
- len += UNIV_PAGE_SIZE) {
- byte* buf = combined_buf + len;
-
- if (UNIV_UNLIKELY
- (memcmp(buf + (FIL_PAGE_LSN + 4),
- buf + (UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: Writing a block of %lu bytes,"
- " currently at offset %lu\n",
- (ulong)total_len, (ulong)len);
- buf_page_print(buf);
- fprintf(stderr,
- "InnoDB: ERROR: The page to be written"
- " seems corrupt!\n");
- }
- }
-}
-
-/**************************************************************************
+/**********************************************************************//**
Does simulated aio. This function should be called by an i/o-handler
-thread. */
-
+thread.
+@return TRUE if the aio operation succeeded */
+UNIV_INTERN
ibool
os_aio_simulated_handle(
/*====================*/
- /* out: TRUE if the aio operation succeeded */
- ulint global_segment, /* in: the number of the segment in the aio
+ ulint global_segment, /*!< in: the number of the segment in the aio
arrays to wait for; segment 0 is the ibuf
i/o thread, segment 1 the log i/o thread,
then follow the non-ibuf read threads, and as
the last are the non-ibuf write threads */
- fil_node_t**message1, /* out: the messages passed with the aio
+ fil_node_t**message1, /*!< out: the messages passed with the aio
request; note that also in the case where
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2,
- ulint* type) /* out: OS_FILE_WRITE or ..._READ */
+ ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
{
os_aio_array_t* array;
ulint segment;
@@ -4164,28 +4121,9 @@ consecutive_loop:
/* Do the i/o with ordinary, synchronous i/o functions: */
if (slot->type == OS_FILE_WRITE) {
- if (array == os_aio_write_array) {
- if ((total_len % UNIV_PAGE_SIZE != 0)
- || (slot->offset % UNIV_PAGE_SIZE != 0)) {
- fprintf(stderr,
- "InnoDB: Error: trying a displaced"
- " write to %s %lu %lu, len %lu\n",
- slot->name, (ulong) slot->offset_high,
- (ulong) slot->offset,
- (ulong) total_len);
- ut_error;
- }
-
- os_file_check_page_trailers(combined_buf, total_len);
- }
-
ret = os_file_write(slot->name, slot->file, combined_buf,
slot->offset, slot->offset_high,
total_len);
-
- if (array == os_aio_write_array) {
- os_file_check_page_trailers(combined_buf, total_len);
- }
} else {
ret = os_file_read(slot->file, combined_buf,
slot->offset, slot->offset_high, total_len);
@@ -4268,14 +4206,14 @@ recommended_sleep:
goto restart;
}
-/**************************************************************************
-Validates the consistency of an aio array. */
+/**********************************************************************//**
+Validates the consistency of an aio array.
+@return TRUE if ok */
static
ibool
os_aio_array_validate(
/*==================*/
- /* out: TRUE if ok */
- os_aio_array_t* array) /* in: aio wait array */
+ os_aio_array_t* array) /*!< in: aio wait array */
{
os_aio_slot_t* slot;
ulint n_reserved = 0;
@@ -4304,13 +4242,13 @@ os_aio_array_validate(
return(TRUE);
}
-/**************************************************************************
-Validates the consistency the aio system. */
-
+/**********************************************************************//**
+Validates the consistency the aio system.
+@return TRUE if ok */
+UNIV_INTERN
ibool
os_aio_validate(void)
/*=================*/
- /* out: TRUE if ok */
{
os_aio_array_validate(os_aio_read_array);
os_aio_array_validate(os_aio_write_array);
@@ -4321,13 +4259,13 @@ os_aio_validate(void)
return(TRUE);
}
-/**************************************************************************
+/**********************************************************************//**
Prints info of the aio arrays. */
-
+UNIV_INTERN
void
os_aio_print(
/*=========*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
os_aio_array_t* array;
os_aio_slot_t* slot;
@@ -4458,9 +4396,9 @@ loop:
os_last_printout = current_time;
}
-/**************************************************************************
+/**********************************************************************//**
Refreshes the statistics used to print per-second averages. */
-
+UNIV_INTERN
void
os_aio_refresh_stats(void)
/*======================*/
@@ -4474,14 +4412,14 @@ os_aio_refresh_stats(void)
}
#ifdef UNIV_DEBUG
-/**************************************************************************
+/**********************************************************************//**
Checks that all slots in the system have been freed, that is, there are
-no pending io operations. */
-
+no pending io operations.
+@return TRUE if all free */
+UNIV_INTERN
ibool
os_aio_all_slots_free(void)
/*=======================*/
- /* out: TRUE if all free */
{
os_aio_array_t* array;
ulint n_res = 0;
@@ -4534,3 +4472,5 @@ os_aio_all_slots_free(void)
return(FALSE);
}
#endif /* UNIV_DEBUG */
+
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
index f00475fc528..48922886f23 100644
--- a/storage/innobase/os/os0proc.c
+++ b/storage/innobase/os/os0proc.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file os/os0proc.c
The interface to the operating system
process control primitives
-(c) 1995 Innobase Oy
-
Created 9/30/1995 Heikki Tuuri
*******************************************************/
@@ -15,511 +32,25 @@ Created 9/30/1995 Heikki Tuuri
#include "ut0mem.h"
#include "ut0byte.h"
-
-/*
-How to get AWE to compile on Windows?
--------------------------------------
-
-In the project settings of the innobase project the Visual C++ source,
-__WIN2000__ has to be defined.
-
-The Visual C++ has to be relatively recent and _WIN32_WINNT has to be
-defined to a value >= 0x0500 when windows.h is included.
-
-#define _WIN32_WINNT 0x0500
-
-Where does AWE work?
--------------------
-
-See the error message in os_awe_allocate_physical_mem().
-
-How to assign privileges for mysqld to use AWE?
------------------------------------------------
-
-See the error message in os_awe_enable_lock_pages_in_mem().
-
-Use Windows AWE functions in this order
----------------------------------------
-
-(1) os_awe_enable_lock_pages_in_mem();
-(2) os_awe_allocate_physical_mem();
-(3) os_awe_allocate_virtual_mem_window();
-(4) os_awe_map_physical_mem_to_window().
-
-To test 'AWE' in a computer which does not have the AWE API,
-you can compile with UNIV_SIMULATE_AWE defined in this file.
-*/
-
-#ifdef UNIV_SIMULATE_AWE
-/* If we simulate AWE, we allocate the 'physical memory' here */
-byte* os_awe_simulate_mem;
-ulint os_awe_simulate_mem_size;
-os_awe_t* os_awe_simulate_page_info;
-byte* os_awe_simulate_window;
-ulint os_awe_simulate_window_size;
-/* In simulated AWE the following contains a NULL pointer or a pointer
-to a mapped 'physical page' for each 4 kB page in the AWE window */
-byte** os_awe_simulate_map;
+/* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and
+MAP_ANON but MAP_ANON is marked as deprecated */
+#if defined(MAP_ANONYMOUS)
+#define OS_MAP_ANON MAP_ANONYMOUS
+#elif defined(MAP_ANON)
+#define OS_MAP_ANON MAP_ANON
#endif
-#ifdef __WIN2000__
-os_awe_t* os_awe_page_info;
-ulint os_awe_n_pages;
-byte* os_awe_window;
-ulint os_awe_window_size;
-#endif
-
-ibool os_use_large_pages;
+UNIV_INTERN ibool os_use_large_pages;
/* Large page size. This may be a boot-time option on some platforms */
-ulint os_large_page_size;
-
-/********************************************************************
-Windows AWE support. Tries to enable the "lock pages in memory" privilege for
-the current process so that the current process can allocate memory-locked
-virtual address space to act as the window where AWE maps physical memory. */
-
-ibool
-os_awe_enable_lock_pages_in_mem(void)
-/*=================================*/
- /* out: TRUE if success, FALSE if error;
- prints error info to stderr if no success */
-{
-#ifdef UNIV_SIMULATE_AWE
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- struct {
- DWORD Count;
- LUID_AND_ATTRIBUTES Privilege[1];
- } Info;
- HANDLE hProcess;
- HANDLE Token;
- BOOL Result;
-
- hProcess = GetCurrentProcess();
-
- /* Open the token of the current process */
-
- Result = OpenProcessToken(hProcess,
- TOKEN_ADJUST_PRIVILEGES, &Token);
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot open process token, error %lu\n",
- (ulint)GetLastError());
- return(FALSE);
- }
-
- Info.Count = 1;
-
- Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
-
- /* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
- privilege */
-
- Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
- &(Info.Privilege[0].Luid));
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot get local privilege"
- " value for %s, error %lu.\n",
- SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
-
- return(FALSE);
- }
-
- /* Try to adjust the privilege */
-
- Result = AdjustTokenPrivileges(Token, FALSE,
- (PTOKEN_PRIVILEGES)&Info,
- 0, NULL, NULL);
- /* Check the result */
-
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot adjust process token privileges,"
- " error %u.\n",
- GetLastError());
- return(FALSE);
- } else if (GetLastError() != ERROR_SUCCESS) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege,"
- " error %lu.\n"
- "InnoDB: In Windows XP Home you cannot use AWE."
- " In Windows 2000 and XP\n"
- "InnoDB: Professional you must go to the"
- " Control Panel, to\n"
- "InnoDB: Security Settings, to Local Policies,"
- " and enable\n"
- "InnoDB: the 'lock pages in memory' privilege"
- " for the user who runs\n"
- "InnoDB: the MySQL server.\n", GetLastError());
-
- return(FALSE);
- }
-
- CloseHandle(Token);
-
- return(TRUE);
-#else
-#ifdef __WIN__
- fprintf(stderr,
- "InnoDB: AWE: Error: to use AWE you must use"
- " a ...-nt MySQL executable.\n");
-#endif
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
-processor. */
-
-ibool
-os_awe_allocate_physical_mem(
-/*=========================*/
- /* out: TRUE if success */
- os_awe_t** page_info, /* out, own: array of opaque data containing
- the info for allocated physical memory pages;
- each allocated 4 kB physical memory page has
- one slot of type os_awe_t in the array */
- ulint n_megabytes) /* in: number of megabytes to allocate */
-{
-#ifdef UNIV_SIMULATE_AWE
- os_awe_simulate_page_info = ut_malloc
- (sizeof(os_awe_t) * n_megabytes
- * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
-
- os_awe_simulate_mem
- = ut_align(ut_malloc(4096 + 1024 * 1024 * n_megabytes), 4096);
- os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
-
- *page_info = os_awe_simulate_page_info;
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- BOOL bResult;
- os_awe_t NumberOfPages; /* Question: why does Windows
- use the name ULONG_PTR for
- a scalar integer type? Maybe
- because we may also refer to
- &NumberOfPages? */
- os_awe_t NumberOfPagesInitial;
- SYSTEM_INFO sSysInfo;
- int PFNArraySize;
-
- if (n_megabytes > 64 * 1024) {
-
- fprintf(stderr,
- "InnoDB: AWE: Error: tried to allocate %lu MB.\n"
- "InnoDB: AWE cannot allocate more than"
- " 64 GB in any computer.\n", n_megabytes);
-
- return(FALSE);
- }
-
- GetSystemInfo(&sSysInfo); /* fill the system information structure */
-
- if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
- fprintf(stderr,
- "InnoDB: AWE: Error: this computer has a page size"
- " of %lu.\n"
- "InnoDB: Should be 4096 bytes for"
- " InnoDB AWE support to work.\n",
- (ulint)sSysInfo.dwPageSize);
-
- return(FALSE);
- }
-
- /* Calculate the number of pages of memory to request */
-
- NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
-
- /* Calculate the size of page_info for allocated physical pages */
-
- PFNArraySize = NumberOfPages * sizeof(os_awe_t);
-
- *page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
-
- if (*page_info == NULL) {
- fprintf(stderr,
- "InnoDB: AWE: Failed to allocate page info"
- " array from process heap, error %lu\n",
- (ulint)GetLastError());
-
- return(FALSE);
- }
-
- ut_total_allocated_memory += PFNArraySize;
-
- /* Enable this process' privilege to lock pages to physical memory */
-
- if (!os_awe_enable_lock_pages_in_mem()) {
-
- return(FALSE);
- }
-
- /* Allocate the physical memory */
-
- NumberOfPagesInitial = NumberOfPages;
-
- os_awe_page_info = *page_info;
- os_awe_n_pages = (ulint)NumberOfPages;
-
- /* Compilation note: if the compiler complains the function is not
- defined, see the note at the start of this file */
-
- bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
- &NumberOfPages, *page_info);
- if (bResult != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate physical pages,"
- " error %lu.\n",
- (ulint)GetLastError());
-
- return(FALSE);
- }
-
- if (NumberOfPagesInitial != NumberOfPages) {
- fprintf(stderr,
- "InnoDB: AWE: Error: allocated only %lu pages"
- " of %lu requested.\n"
- "InnoDB: Check that you have enough free RAM.\n"
- "InnoDB: In Windows XP Professional and"
- " 2000 Professional\n"
- "InnoDB: Windows PAE size is max 4 GB."
- " In 2000 and .NET\n"
- "InnoDB: Advanced Servers and 2000 Datacenter Server"
- " it is 32 GB,\n"
- "InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
- "InnoDB: A Microsoft web page said that"
- " the processor must be an Intel\n"
- "InnoDB: processor.\n",
- (ulint)NumberOfPages,
- (ulint)NumberOfPagesInitial);
-
- return(FALSE);
- }
-
- fprintf(stderr,
- "InnoDB: Using Address Windowing Extensions (AWE);"
- " allocated %lu MB\n",
- n_megabytes);
-
- return(TRUE);
-#else
- UT_NOT_USED(n_megabytes);
- UT_NOT_USED(page_info);
-
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Allocates a window in the virtual address space where we can map then
-pages of physical memory. */
-
-byte*
-os_awe_allocate_virtual_mem_window(
-/*===============================*/
- /* out, own: allocated memory, or NULL if did not
- succeed */
- ulint size) /* in: virtual memory allocation size in bytes, must
- be < 2 GB */
-{
-#ifdef UNIV_SIMULATE_AWE
- ulint i;
-
- os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
- os_awe_simulate_window_size = size;
-
- os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
-
- for (i = 0; i < (size / 4096); i++) {
- *(os_awe_simulate_map + i) = NULL;
- }
-
- return(os_awe_simulate_window);
-
-#elif defined(__WIN2000__)
- byte* ptr;
-
- if (size > (ulint)0x7FFFFFFFUL) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate %lu bytes"
- " of virtual memory\n", size);
-
- return(NULL);
- }
-
- ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
- PAGE_READWRITE);
- if (ptr == NULL) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate %lu bytes"
- " of virtual memory, error %lu\n",
- size, (ulint)GetLastError());
-
- return(NULL);
- }
-
- os_awe_window = ptr;
- os_awe_window_size = size;
-
- ut_total_allocated_memory += size;
-
- return(ptr);
-#else
- UT_NOT_USED(size);
-
- return(NULL);
-#endif
-}
-
-/********************************************************************
-With this function you can map parts of physical memory allocated with
-the ..._allocate_physical_mem to the virtual address space allocated with
-the previous function. Intel implements this so that the process page
-tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
-showed that this takes < 1 microsecond, much better than the estimated 80 us
-for copying a 16 kB page memory to memory. But, the operation will at least
-partially invalidate the translation lookaside buffer (TLB) of all
-processors. Under a real-world load the performance hit may be bigger. */
-
-ibool
-os_awe_map_physical_mem_to_window(
-/*==============================*/
- /* out: TRUE if success; the function
- calls exit(1) in case of an error */
- byte* ptr, /* in: a page-aligned pointer to
- somewhere in the virtual address
- space window; we map the physical mem
- pages here */
- ulint n_mem_pages, /* in: number of 4 kB mem pages to
- map */
- os_awe_t* page_info) /* in: array of page infos for those
- pages; each page has one slot in the
- array */
-{
-#ifdef UNIV_SIMULATE_AWE
- ulint i;
- byte** map;
- byte* page;
- byte* phys_page;
-
- ut_a(ptr >= os_awe_simulate_window);
- ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
- ut_a(page_info >= os_awe_simulate_page_info);
- ut_a(page_info < os_awe_simulate_page_info
- + (os_awe_simulate_mem_size / 4096));
-
- /* First look if some other 'physical pages' are mapped at ptr,
- and copy them back to where they were if yes */
-
- map = os_awe_simulate_map
- + ((ulint)(ptr - os_awe_simulate_window)) / 4096;
- page = ptr;
-
- for (i = 0; i < n_mem_pages; i++) {
- if (*map != NULL) {
- ut_memcpy(*map, page, 4096);
- }
- map++;
- page += 4096;
- }
-
- /* Then copy to ptr the 'physical pages' determined by page_info; we
- assume page_info is a segment of the array we created at the start */
-
- phys_page = os_awe_simulate_mem
- + (ulint)(page_info - os_awe_simulate_page_info)
- * 4096;
-
- ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
-
- /* Update the map */
-
- map = os_awe_simulate_map
- + ((ulint)(ptr - os_awe_simulate_window)) / 4096;
-
- for (i = 0; i < n_mem_pages; i++) {
- *map = phys_page;
-
- map++;
- phys_page += 4096;
- }
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- BOOL bResult;
- os_awe_t n_pages;
-
- n_pages = (os_awe_t)n_mem_pages;
-
- if (!(ptr >= os_awe_window)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map to address %lx"
- " but AWE window start %lx\n",
- (ulint)ptr, (ulint)os_awe_window);
- ut_a(0);
- }
-
- if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map to address %lx"
- " but AWE window end %lx\n",
- (ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
- ut_a(0);
- }
-
- if (!(page_info >= os_awe_page_info)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map page info"
- " at %lx but array start %lx\n",
- (ulint)page_info, (ulint)os_awe_page_info);
- ut_a(0);
- }
-
- if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map page info"
- " at %lx but array end %lx\n",
- (ulint)page_info,
- (ulint)(os_awe_page_info + os_awe_n_pages));
- ut_a(0);
- }
-
- bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
-
- if (bResult != TRUE) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: AWE: Mapping of %lu physical pages"
- " to address %lx failed,\n"
- "InnoDB: error %lu.\n"
- "InnoDB: Cannot continue operation.\n",
- n_mem_pages, (ulint)ptr, (ulint)GetLastError());
- exit(1);
- }
+UNIV_INTERN ulint os_large_page_size;
- return(TRUE);
-#else
- UT_NOT_USED(ptr);
- UT_NOT_USED(n_mem_pages);
- UT_NOT_USED(page_info);
-
- return(FALSE);
-#endif
-}
-
-/********************************************************************
+/****************************************************************//**
Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current
thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'. */
-
+the thread id is not the same as one sees in 'top'.
+@return process id as a number */
+UNIV_INTERN
ulint
os_proc_get_number(void)
/*====================*/
@@ -531,60 +62,35 @@ os_proc_get_number(void)
#endif
}
-/********************************************************************
-Allocates non-cacheable memory. */
-
-void*
-os_mem_alloc_nocache(
-/*=================*/
- /* out: allocated memory */
- ulint n) /* in: number of bytes */
-{
-#ifdef __WIN__
- void* ptr;
-
- ptr = VirtualAlloc(NULL, n, MEM_COMMIT,
- PAGE_READWRITE | PAGE_NOCACHE);
- ut_a(ptr);
-
- return(ptr);
-#else
- return(ut_malloc(n));
-#endif
-}
-
-/********************************************************************
-Allocates large pages memory. */
-
+/****************************************************************//**
+Allocates large pages memory.
+@return allocated memory */
+UNIV_INTERN
void*
os_mem_alloc_large(
/*===============*/
- /* out: allocated memory */
- ulint n, /* in: number of bytes */
- ibool set_to_zero, /* in: TRUE if allocated memory
- should be set to zero if
- UNIV_SET_MEM_TO_ZERO is defined */
- ibool assert_on_error)/* in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
+ ulint* n) /*!< in/out: number of bytes */
{
-#ifdef HAVE_LARGE_PAGES
- ulint size;
+ void* ptr;
+ ulint size;
+#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
int shmid;
- void *ptr = NULL;
struct shmid_ds buf;
if (!os_use_large_pages || !os_large_page_size) {
goto skip;
}
-#ifdef UNIV_LINUX
/* Align block size to os_large_page_size */
- size = ((n - 1) & ~(os_large_page_size - 1)) + os_large_page_size;
+ ut_ad(ut_is_2pow(os_large_page_size));
+ size = ut_2pow_round(*n + (os_large_page_size - 1),
+ os_large_page_size);
shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
if (shmid < 0) {
fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
- " %lu bytes. errno %d\n", n, errno);
+ " %lu bytes. errno %d\n", size, errno);
+ ptr = NULL;
} else {
ptr = shmat(shmid, NULL, 0);
if (ptr == (void *)-1) {
@@ -599,77 +105,127 @@ os_mem_alloc_large(
process exits */
shmctl(shmid, IPC_RMID, &buf);
}
-#endif
if (ptr) {
- if (set_to_zero) {
-#ifdef UNIV_SET_MEM_TO_ZERO
- memset(ptr, '\0', size);
-#endif
- }
-
+ *n = size;
+ os_fast_mutex_lock(&ut_list_mutex);
+ ut_total_allocated_memory += size;
+ os_fast_mutex_unlock(&ut_list_mutex);
+# ifdef UNIV_SET_MEM_TO_ZERO
+ memset(ptr, '\0', size);
+# endif
+ UNIV_MEM_ALLOC(ptr, size);
return(ptr);
}
fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
" memory pool\n");
skip:
-#endif /* HAVE_LARGE_PAGES */
+#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
- return(ut_malloc_low(n, set_to_zero, assert_on_error));
+#ifdef __WIN__
+ SYSTEM_INFO system_info;
+ GetSystemInfo(&system_info);
+
+ /* Align block size to system page size */
+ ut_ad(ut_is_2pow(system_info.dwPageSize));
+ /* system_info.dwPageSize is only 32-bit. Casting to ulint is required
+ on 64-bit Windows. */
+ size = *n = ut_2pow_round(*n + (system_info.dwPageSize - 1),
+ (ulint) system_info.dwPageSize);
+ ptr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE,
+ PAGE_READWRITE);
+ if (!ptr) {
+ fprintf(stderr, "InnoDB: VirtualAlloc(%lu bytes) failed;"
+ " Windows error %lu\n",
+ (ulong) size, (ulong) GetLastError());
+ } else {
+ os_fast_mutex_lock(&ut_list_mutex);
+ ut_total_allocated_memory += size;
+ os_fast_mutex_unlock(&ut_list_mutex);
+ UNIV_MEM_ALLOC(ptr, size);
+ }
+#elif defined __NETWARE__ || !defined OS_MAP_ANON
+ size = *n;
+ ptr = ut_malloc_low(size, TRUE, FALSE);
+#else
+# ifdef HAVE_GETPAGESIZE
+ size = getpagesize();
+# else
+ size = UNIV_PAGE_SIZE;
+# endif
+ /* Align block size to system page size */
+ ut_ad(ut_is_2pow(size));
+ size = *n = ut_2pow_round(*n + (size - 1), size);
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | OS_MAP_ANON, -1, 0);
+ if (UNIV_UNLIKELY(ptr == (void*) -1)) {
+ fprintf(stderr, "InnoDB: mmap(%lu bytes) failed;"
+ " errno %lu\n",
+ (ulong) size, (ulong) errno);
+ ptr = NULL;
+ } else {
+ os_fast_mutex_lock(&ut_list_mutex);
+ ut_total_allocated_memory += size;
+ os_fast_mutex_unlock(&ut_list_mutex);
+ UNIV_MEM_ALLOC(ptr, size);
+ }
+#endif
+ return(ptr);
}
-/********************************************************************
+/****************************************************************//**
Frees large pages memory. */
-
+UNIV_INTERN
void
os_mem_free_large(
/*==============*/
- void *ptr) /* in: number of bytes */
+ void *ptr, /*!< in: pointer returned by
+ os_mem_alloc_large() */
+ ulint size) /*!< in: size returned by
+ os_mem_alloc_large() */
{
-#ifdef HAVE_LARGE_PAGES
- if (os_use_large_pages && os_large_page_size
-#ifdef UNIV_LINUX
- && !shmdt(ptr)
-#endif
- ) {
+ os_fast_mutex_lock(&ut_list_mutex);
+ ut_a(ut_total_allocated_memory >= size);
+ os_fast_mutex_unlock(&ut_list_mutex);
+
+#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
+ if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
+ os_fast_mutex_lock(&ut_list_mutex);
+ ut_a(ut_total_allocated_memory >= size);
+ ut_total_allocated_memory -= size;
+ os_fast_mutex_unlock(&ut_list_mutex);
+ UNIV_MEM_FREE(ptr, size);
return;
}
-#endif
-
- ut_free(ptr);
-}
-
-/********************************************************************
-Sets the priority boost for threads released from waiting within the current
-process. */
-
-void
-os_process_set_priority_boost(
-/*==========================*/
- ibool do_boost) /* in: TRUE if priority boost should be done,
- FALSE if not */
-{
+#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
#ifdef __WIN__
- ibool no_boost;
-
- if (do_boost) {
- no_boost = FALSE;
+ /* When RELEASE memory, the size parameter must be 0.
+ Do not use MEM_RELEASE with MEM_DECOMMIT. */
+ if (!VirtualFree(ptr, 0, MEM_RELEASE)) {
+ fprintf(stderr, "InnoDB: VirtualFree(%p, %lu) failed;"
+ " Windows error %lu\n",
+ ptr, (ulong) size, (ulong) GetLastError());
} else {
- no_boost = TRUE;
+ os_fast_mutex_lock(&ut_list_mutex);
+ ut_a(ut_total_allocated_memory >= size);
+ ut_total_allocated_memory -= size;
+ os_fast_mutex_unlock(&ut_list_mutex);
+ UNIV_MEM_FREE(ptr, size);
}
-
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
-
- /* Does not do anything currently!
- SetProcessPriorityBoost(GetCurrentProcess(), no_boost);
- */
- fputs("Warning: process priority boost setting"
- " currently not functional!\n",
- stderr);
+#elif defined __NETWARE__ || !defined OS_MAP_ANON
+ ut_free(ptr);
#else
- UT_NOT_USED(do_boost);
+ if (munmap(ptr, size)) {
+ fprintf(stderr, "InnoDB: munmap(%p, %lu) failed;"
+ " errno %lu\n",
+ ptr, (ulong) size, (ulong) errno);
+ } else {
+ os_fast_mutex_lock(&ut_list_mutex);
+ ut_a(ut_total_allocated_memory >= size);
+ ut_total_allocated_memory -= size;
+ os_fast_mutex_unlock(&ut_list_mutex);
+ UNIV_MEM_FREE(ptr, size);
+ }
#endif
}
diff --git a/storage/innobase/os/os0sync.c b/storage/innobase/os/os0sync.c
index 18fd38f3f9b..60467242e14 100644
--- a/storage/innobase/os/os0sync.c
+++ b/storage/innobase/os/os0sync.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file os/os0sync.c
The interface to the operating system
synchronization primitives.
-(c) 1995 Innobase Oy
-
Created 9/6/1995 Heikki Tuuri
*******************************************************/
@@ -21,9 +38,9 @@ Created 9/6/1995 Heikki Tuuri
/* Type definition for an operating system mutex struct */
struct os_mutex_struct{
- os_event_t event; /* Used by sync0arr.c for queing threads */
- void* handle; /* OS handle to mutex */
- ulint count; /* we use this counter to check
+ os_event_t event; /*!< Used by sync0arr.c for queing threads */
+ void* handle; /*!< OS handle to mutex */
+ ulint count; /*!< we use this counter to check
that the same thread does not
recursively lock the mutex: we
do not assume that the OS mutex
@@ -33,33 +50,35 @@ struct os_mutex_struct{
/* list of all 'slow' OS mutexes created */
};
-/* Mutex protecting counts and the lists of OS mutexes and events */
-os_mutex_t os_sync_mutex;
-ibool os_sync_mutex_inited = FALSE;
-ibool os_sync_free_called = FALSE;
+/** Mutex protecting counts and the lists of OS mutexes and events */
+UNIV_INTERN os_mutex_t os_sync_mutex;
+/** TRUE if os_sync_mutex has been initialized */
+static ibool os_sync_mutex_inited = FALSE;
+/** TRUE when os_sync_free() is being executed */
+static ibool os_sync_free_called = FALSE;
-/* This is incremented by 1 in os_thread_create and decremented by 1 in
+/** This is incremented by 1 in os_thread_create and decremented by 1 in
os_thread_exit */
-ulint os_thread_count = 0;
+UNIV_INTERN ulint os_thread_count = 0;
-/* The list of all events created */
-UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
+/** The list of all events created */
+static UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
-/* The list of all OS 'slow' mutexes */
-UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
+/** The list of all OS 'slow' mutexes */
+static UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
-ulint os_event_count = 0;
-ulint os_mutex_count = 0;
-ulint os_fast_mutex_count = 0;
+UNIV_INTERN ulint os_event_count = 0;
+UNIV_INTERN ulint os_mutex_count = 0;
+UNIV_INTERN ulint os_fast_mutex_count = 0;
/* Because a mutex is embedded inside an event and there is an
event embedded inside a mutex, on free, this generates a recursive call.
This version of the free event function doesn't acquire the global lock */
static void os_event_free_internal(os_event_t event);
-/*************************************************************
+/*********************************************************//**
Initializes global event and OS 'slow' mutex lists. */
-
+UNIV_INTERN
void
os_sync_init(void)
/*==============*/
@@ -67,14 +86,17 @@ os_sync_init(void)
UT_LIST_INIT(os_event_list);
UT_LIST_INIT(os_mutex_list);
+ os_sync_mutex = NULL;
+ os_sync_mutex_inited = FALSE;
+
os_sync_mutex = os_mutex_create(NULL);
os_sync_mutex_inited = TRUE;
}
-/*************************************************************
+/*********************************************************//**
Frees created events and OS 'slow' mutexes. */
-
+UNIV_INTERN
void
os_sync_free(void)
/*==============*/
@@ -109,16 +131,16 @@ os_sync_free(void)
os_sync_free_called = FALSE;
}
-/*************************************************************
+/*********************************************************//**
Creates an event semaphore, i.e., a semaphore which may just have two
states: signaled and nonsignaled. The created event is manual reset: it
-must be reset explicitly by calling sync_os_reset_event. */
-
+must be reset explicitly by calling sync_os_reset_event.
+@return the event handle */
+UNIV_INTERN
os_event_t
os_event_create(
/*============*/
- /* out: the event handle */
- const char* name) /* in: the name of the event, if NULL
+ const char* name) /*!< in: the name of the event, if NULL
the event is created without a name */
{
#ifdef __WIN__
@@ -145,12 +167,8 @@ os_event_create(
os_fast_mutex_init(&(event->os_mutex));
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- ut_a(0 == pthread_cond_init(&(event->cond_var),
- pthread_condattr_default));
-#else
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
-#endif
+
event->is_set = FALSE;
/* We return this value in os_event_reset(), which can then be
@@ -181,55 +199,14 @@ os_event_create(
return(event);
}
-#ifdef __WIN__
-/*************************************************************
-Creates an auto-reset event semaphore, i.e., an event which is automatically
-reset when a single thread is released. Works only in Windows. */
-
-os_event_t
-os_event_create_auto(
-/*=================*/
- /* out: the event handle */
- const char* name) /* in: the name of the event, if NULL
- the event is created without a name */
-{
- os_event_t event;
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- event->handle = CreateEvent(NULL, /* No security attributes */
- FALSE, /* Auto-reset */
- FALSE, /* Initial state nonsignaled */
- (LPCTSTR) name);
-
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows auto"
- " event semaphore; Windows error %lu\n",
- (ulong) GetLastError());
- }
-
- /* Put to the list of events */
- os_mutex_enter(os_sync_mutex);
-
- UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
- os_event_count++;
-
- os_mutex_exit(os_sync_mutex);
-
- return(event);
-}
-#endif
-
-/**************************************************************
+/**********************************************************//**
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
-
+UNIV_INTERN
void
os_event_set(
/*=========*/
- os_event_t event) /* in: event to set */
+ os_event_t event) /*!< in: event to set */
{
#ifdef __WIN__
ut_a(event);
@@ -251,21 +228,21 @@ os_event_set(
#endif
}
-/**************************************************************
+/**********************************************************//**
Resets an event semaphore to the nonsignaled state. Waiting threads will
stop to wait for the event.
The return value should be passed to os_even_wait_low() if it is desired
that this thread should not wait in case of an intervening call to
os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-
-ib_longlong
+os_event_wait_low() call. See comments for os_event_wait_low().
+@return current signal_count. */
+UNIV_INTERN
+ib_int64_t
os_event_reset(
/*===========*/
- /* out: current signal_count. */
- os_event_t event) /* in: event to reset */
+ os_event_t event) /*!< in: event to reset */
{
- ib_longlong ret = 0;
+ ib_int64_t ret = 0;
#ifdef __WIN__
ut_a(event);
@@ -288,13 +265,13 @@ os_event_reset(
return(ret);
}
-/**************************************************************
+/**********************************************************//**
Frees an event object, without acquiring the global lock. */
static
void
os_event_free_internal(
/*===================*/
- os_event_t event) /* in: event to free */
+ os_event_t event) /*!< in: event to free */
{
#ifdef __WIN__
ut_a(event);
@@ -317,13 +294,13 @@ os_event_free_internal(
ut_free(event);
}
-/**************************************************************
+/**********************************************************//**
Frees an event object. */
-
+UNIV_INTERN
void
os_event_free(
/*==========*/
- os_event_t event) /* in: event to free */
+ os_event_t event) /*!< in: event to free */
{
#ifdef __WIN__
@@ -349,7 +326,7 @@ os_event_free(
ut_free(event);
}
-/**************************************************************
+/**********************************************************//**
Waits for an event object until it is in the signaled state. If
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
@@ -369,12 +346,12 @@ thread C calls os_event_wait() [infinite wait!]
Where such a scenario is possible, to avoid infinite wait, the
value returned by os_event_reset() should be passed in as
reset_sig_count. */
-
+UNIV_INTERN
void
os_event_wait_low(
/*==============*/
- os_event_t event, /* in: event to wait */
- ib_longlong reset_sig_count)/* in: zero or the value
+ os_event_t event, /*!< in: event to wait */
+ ib_int64_t reset_sig_count)/*!< in: zero or the value
returned by previous call of
os_event_reset(). */
{
@@ -394,7 +371,7 @@ os_event_wait_low(
os_thread_exit(NULL);
}
#else
- ib_longlong old_signal_count;
+ ib_int64_t old_signal_count;
os_fast_mutex_lock(&(event->os_mutex));
@@ -428,17 +405,16 @@ os_event_wait_low(
#endif
}
-/**************************************************************
+/**********************************************************//**
Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite. */
-
+a timeout is exceeded. In Unix the timeout is always infinite.
+@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+UNIV_INTERN
ulint
os_event_wait_time(
/*===============*/
- /* out: 0 if success, OS_SYNC_TIME_EXCEEDED if
- timeout was exceeded */
- os_event_t event, /* in: event to wait */
- ulint time) /* in: timeout in microseconds, or
+ os_event_t event, /*!< in: event to wait */
+ ulint time) /*!< in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
{
#ifdef __WIN__
@@ -474,19 +450,18 @@ os_event_wait_time(
}
#ifdef __WIN__
-/**************************************************************
+/**********************************************************//**
Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled. */
-
+one is signaled or becomes signaled.
+@return index of the event which was signaled */
+UNIV_INTERN
ulint
os_event_wait_multiple(
/*===================*/
- /* out: index of the event
- which was signaled */
- ulint n, /* in: number of events in the
+ ulint n, /*!< in: number of events in the
array */
os_native_event_t* native_event_array)
- /* in: pointer to an array of event
+ /*!< in: pointer to an array of event
handles */
{
DWORD index;
@@ -498,7 +473,7 @@ os_event_wait_multiple(
FALSE, /* Wait for any 1 event */
INFINITE); /* Infinite wait time
limit */
- ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparision */
+ ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */
ut_a(index < WAIT_OBJECT_0 + n);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
@@ -509,15 +484,15 @@ os_event_wait_multiple(
}
#endif
-/*************************************************************
+/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
-
+mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+@return the mutex handle */
+UNIV_INTERN
os_mutex_t
os_mutex_create(
/*============*/
- /* out: the mutex handle */
- const char* name) /* in: the name of the mutex, if NULL
+ const char* name) /*!< in: the name of the mutex, if NULL
the mutex is created without a name */
{
#ifdef __WIN__
@@ -544,7 +519,7 @@ os_mutex_create(
mutex_str->count = 0;
mutex_str->event = os_event_create(NULL);
- if (os_sync_mutex_inited) {
+ if (UNIV_LIKELY(os_sync_mutex_inited)) {
/* When creating os_sync_mutex itself we cannot reserve it */
os_mutex_enter(os_sync_mutex);
}
@@ -553,20 +528,20 @@ os_mutex_create(
os_mutex_count++;
- if (os_sync_mutex_inited) {
+ if (UNIV_LIKELY(os_sync_mutex_inited)) {
os_mutex_exit(os_sync_mutex);
}
return(mutex_str);
}
-/**************************************************************
+/**********************************************************//**
Acquires ownership of a mutex semaphore. */
-
+UNIV_INTERN
void
os_mutex_enter(
/*===========*/
- os_mutex_t mutex) /* in: mutex to acquire */
+ os_mutex_t mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
DWORD err;
@@ -589,13 +564,13 @@ os_mutex_enter(
#endif
}
-/**************************************************************
+/**********************************************************//**
Releases ownership of a mutex. */
-
+UNIV_INTERN
void
os_mutex_exit(
/*==========*/
- os_mutex_t mutex) /* in: mutex to release */
+ os_mutex_t mutex) /*!< in: mutex to release */
{
ut_a(mutex);
@@ -609,21 +584,21 @@ os_mutex_exit(
#endif
}
-/**************************************************************
+/**********************************************************//**
Frees a mutex object. */
-
+UNIV_INTERN
void
os_mutex_free(
/*==========*/
- os_mutex_t mutex) /* in: mutex to free */
+ os_mutex_t mutex) /*!< in: mutex to free */
{
ut_a(mutex);
- if (!os_sync_free_called) {
+ if (UNIV_LIKELY(!os_sync_free_called)) {
os_event_free_internal(mutex->event);
}
- if (os_sync_mutex_inited) {
+ if (UNIV_LIKELY(os_sync_mutex_inited)) {
os_mutex_enter(os_sync_mutex);
}
@@ -631,7 +606,7 @@ os_mutex_free(
os_mutex_count--;
- if (os_sync_mutex_inited) {
+ if (UNIV_LIKELY(os_sync_mutex_inited)) {
os_mutex_exit(os_sync_mutex);
}
@@ -646,26 +621,22 @@ os_mutex_free(
#endif
}
-/*************************************************************
+/*********************************************************//**
Initializes an operating system fast mutex semaphore. */
-
+UNIV_INTERN
void
os_fast_mutex_init(
/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: fast mutex */
+ os_fast_mutex_t* fast_mutex) /*!< in: fast mutex */
{
#ifdef __WIN__
ut_a(fast_mutex);
InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
#else
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- ut_a(0 == pthread_mutex_init(fast_mutex, pthread_mutexattr_default));
-#else
ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
#endif
-#endif
- if (os_sync_mutex_inited) {
+ if (UNIV_LIKELY(os_sync_mutex_inited)) {
/* When creating os_sync_mutex itself (in Unix) we cannot
reserve it */
@@ -674,18 +645,18 @@ os_fast_mutex_init(
os_fast_mutex_count++;
- if (os_sync_mutex_inited) {
+ if (UNIV_LIKELY(os_sync_mutex_inited)) {
os_mutex_exit(os_sync_mutex);
}
}
-/**************************************************************
+/**********************************************************//**
Acquires ownership of a fast mutex. */
-
+UNIV_INTERN
void
os_fast_mutex_lock(
/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
+ os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
@@ -694,13 +665,13 @@ os_fast_mutex_lock(
#endif
}
-/**************************************************************
+/**********************************************************//**
Releases ownership of a fast mutex. */
-
+UNIV_INTERN
void
os_fast_mutex_unlock(
/*=================*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to release */
+ os_fast_mutex_t* fast_mutex) /*!< in: mutex to release */
{
#ifdef __WIN__
LeaveCriticalSection(fast_mutex);
@@ -709,13 +680,13 @@ os_fast_mutex_unlock(
#endif
}
-/**************************************************************
+/**********************************************************//**
Frees a mutex object. */
-
+UNIV_INTERN
void
os_fast_mutex_free(
/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to free */
+ os_fast_mutex_t* fast_mutex) /*!< in: mutex to free */
{
#ifdef __WIN__
ut_a(fast_mutex);
@@ -726,7 +697,7 @@ os_fast_mutex_free(
ret = pthread_mutex_destroy(fast_mutex);
- if (ret != 0) {
+ if (UNIV_UNLIKELY(ret != 0)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: error: return value %lu when calling\n"
@@ -735,19 +706,20 @@ os_fast_mutex_free(
"InnoDB: Byte contents of the pthread mutex at %p:\n",
(void*) fast_mutex);
ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
- fprintf(stderr, "\n");
+ putc('\n', stderr);
}
#endif
- if (os_sync_mutex_inited) {
+ if (UNIV_LIKELY(os_sync_mutex_inited)) {
/* When freeing the last mutexes, we have
already freed os_sync_mutex */
os_mutex_enter(os_sync_mutex);
}
+ ut_ad(os_fast_mutex_count > 0);
os_fast_mutex_count--;
- if (os_sync_mutex_inited) {
+ if (UNIV_LIKELY(os_sync_mutex_inited)) {
os_mutex_exit(os_sync_mutex);
}
}
diff --git a/storage/innobase/os/os0thread.c b/storage/innobase/os/os0thread.c
index a0b1e51d359..34818ada804 100644
--- a/storage/innobase/os/os0thread.c
+++ b/storage/innobase/os/os0thread.c
@@ -1,7 +1,24 @@
-/******************************************************
-The interface to the operating system thread control primitives
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1995 Innobase Oy
+/**************************************************//**
+@file os/os0thread.c
+The interface to the operating system thread control primitives
Created 9/8/1995 Heikki Tuuri
*******************************************************/
@@ -15,18 +32,19 @@ Created 9/8/1995 Heikki Tuuri
#include <windows.h>
#endif
+#ifndef UNIV_HOTBACKUP
#include "srv0srv.h"
#include "os0sync.h"
-/*******************************************************************
-Compares two thread ids for equality. */
-
+/***************************************************************//**
+Compares two thread ids for equality.
+@return TRUE if equal */
+UNIV_INTERN
ibool
os_thread_eq(
/*=========*/
- /* out: TRUE if equal */
- os_thread_id_t a, /* in: OS thread or thread id */
- os_thread_id_t b) /* in: OS thread or thread id */
+ os_thread_id_t a, /*!< in: OS thread or thread id */
+ os_thread_id_t b) /*!< in: OS thread or thread id */
{
#ifdef __WIN__
if (a == b) {
@@ -43,14 +61,15 @@ os_thread_eq(
#endif
}
-/********************************************************************
+/****************************************************************//**
Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though! */
-
+unique for the thread though!
+@return thread identifier as a number */
+UNIV_INTERN
ulint
os_thread_pf(
/*=========*/
- os_thread_id_t a)
+ os_thread_id_t a) /*!< in: OS thread identifier */
{
#ifdef UNIV_HPUX10
/* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2,
@@ -62,11 +81,12 @@ os_thread_pf(
#endif
}
-/*********************************************************************
+/*****************************************************************//**
Returns the thread identifier of current thread. Currently the thread
identifier in Unix is the thread handle itself. Note that in HP-UX
-pthread_t is a struct of 3 fields. */
-
+pthread_t is a struct of 3 fields.
+@return current thread identifier */
+UNIV_INTERN
os_thread_id_t
os_thread_get_curr_id(void)
/*=======================*/
@@ -78,24 +98,24 @@ os_thread_get_curr_id(void)
#endif
}
-/********************************************************************
+/****************************************************************//**
Creates a new thread of execution. The execution starts from
the function given. The start function takes a void* parameter
-and returns an ulint. */
-
+and returns an ulint.
+@return handle to the thread */
+UNIV_INTERN
os_thread_t
os_thread_create(
/*=============*/
- /* out: handle to the thread */
#ifndef __WIN__
os_posix_f_t start_f,
#else
- ulint (*start_f)(void*), /* in: pointer to function
+ ulint (*start_f)(void*), /*!< in: pointer to function
from which to start */
#endif
- void* arg, /* in: argument to start
+ void* arg, /*!< in: argument to start
function */
- os_thread_id_t* thread_id) /* out: id of the created
+ os_thread_id_t* thread_id) /*!< out: id of the created
thread, or NULL */
{
#ifdef __WIN__
@@ -132,7 +152,7 @@ os_thread_create(
os_thread_t pthread;
pthread_attr_t attr;
-#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10))
+#ifndef UNIV_HPUX10
pthread_attr_init(&attr);
#endif
@@ -166,7 +186,7 @@ os_thread_create(
os_thread_count++;
os_mutex_exit(os_sync_mutex);
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
+#ifdef UNIV_HPUX10
ret = pthread_create(&pthread, pthread_attr_default, start_f, arg);
#else
ret = pthread_create(&pthread, &attr, start_f, arg);
@@ -177,7 +197,7 @@ os_thread_create(
exit(1);
}
-#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10))
+#ifndef UNIV_HPUX10
pthread_attr_destroy(&attr);
#endif
if (srv_set_thread_priorities) {
@@ -193,13 +213,13 @@ os_thread_create(
#endif
}
-/*********************************************************************
+/*****************************************************************//**
Exits the current thread. */
-
+UNIV_INTERN
void
os_thread_exit(
/*===========*/
- void* exit_value) /* in: exit value; in Windows this void*
+ void* exit_value) /*!< in: exit value; in Windows this void*
is cast as a DWORD */
{
#ifdef UNIV_DEBUG_THREAD_CREATION
@@ -213,22 +233,15 @@ os_thread_exit(
#ifdef __WIN__
ExitThread((DWORD)exit_value);
#else
+ pthread_detach(pthread_self());
pthread_exit(exit_value);
#endif
}
-#ifdef HAVE_PTHREAD_JOIN
-int
-os_thread_join(
-/*===========*/
- os_thread_id_t thread_id) /* in: id of the thread to join */
-{
- return(pthread_join(thread_id, NULL));
-}
-#endif
-/*********************************************************************
-Returns handle to the current thread. */
-
+/*****************************************************************//**
+Returns handle to the current thread.
+@return current thread handle */
+UNIV_INTERN
os_thread_t
os_thread_get_curr(void)
/*====================*/
@@ -240,9 +253,9 @@ os_thread_get_curr(void)
#endif
}
-/*********************************************************************
+/*****************************************************************//**
Advises the os to give up remainder of the thread's time slice. */
-
+UNIV_INTERN
void
os_thread_yield(void)
/*=================*/
@@ -259,14 +272,15 @@ os_thread_yield(void)
os_thread_sleep(0);
#endif
}
+#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************
+/*****************************************************************//**
The thread sleeps at least the time given in microseconds. */
-
+UNIV_INTERN
void
os_thread_sleep(
/*============*/
- ulint tm) /* in: time in microseconds */
+ ulint tm) /*!< in: time in microseconds */
{
#ifdef __WIN__
Sleep((DWORD) tm / 1000);
@@ -282,14 +296,15 @@ os_thread_sleep(
#endif
}
-/**********************************************************************
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
Sets a thread priority. */
-
+UNIV_INTERN
void
os_thread_set_priority(
/*===================*/
- os_thread_t handle, /* in: OS handle to the thread */
- ulint pri) /* in: priority */
+ os_thread_t handle, /*!< in: OS handle to the thread */
+ ulint pri) /*!< in: priority */
{
#ifdef __WIN__
int os_pri;
@@ -311,15 +326,15 @@ os_thread_set_priority(
#endif
}
-/**********************************************************************
-Gets a thread priority. */
-
+/******************************************************************//**
+Gets a thread priority.
+@return priority */
+UNIV_INTERN
ulint
os_thread_get_priority(
/*===================*/
- /* out: priority */
os_thread_t handle __attribute__((unused)))
- /* in: OS handle to the thread */
+ /*!< in: OS handle to the thread */
{
#ifdef __WIN__
int os_pri;
@@ -343,9 +358,10 @@ os_thread_get_priority(
#endif
}
-/**********************************************************************
-Gets the last operating system error code for the calling thread. */
-
+/******************************************************************//**
+Gets the last operating system error code for the calling thread.
+@return last error on Windows, 0 otherwise */
+UNIV_INTERN
ulint
os_thread_get_last_error(void)
/*==========================*/
@@ -356,3 +372,4 @@ os_thread_get_last_error(void)
return(0);
#endif
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c
index ea011843890..f10f16a7dd9 100644
--- a/storage/innobase/page/page0cur.c
+++ b/storage/innobase/page/page0cur.c
@@ -1,7 +1,24 @@
-/************************************************************************
-The page cursor
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994-1996 Innobase Oy
+*****************************************************************************/
+
+/********************************************************************//**
+@file page/page0cur.c
+The page cursor
Created 10/4/1994 Heikki Tuuri
*************************************************************************/
@@ -11,48 +28,42 @@ Created 10/4/1994 Heikki Tuuri
#include "page0cur.ic"
#endif
-#include "rem0cmp.h"
+#include "page0zip.h"
#include "mtr0log.h"
#include "log0recv.h"
-#include "rem0cmp.h"
-#include "srv0srv.h"
#include "ut0ut.h"
-
-static ulint page_rnd = 976722341;
+#ifndef UNIV_HOTBACKUP
+#include "rem0cmp.h"
#ifdef PAGE_CUR_ADAPT
# ifdef UNIV_SEARCH_PERF_STAT
-ulint page_cur_short_succ = 0;
+static ulint page_cur_short_succ = 0;
# endif /* UNIV_SEARCH_PERF_STAT */
-/***********************************************************************
+/*******************************************************************//**
This is a linear congruential generator PRNG. Returns a pseudo random
number between 0 and 2^64-1 inclusive. The formula and the constants
being used are:
X[n+1] = (a * X[n] + c) mod m
where:
-X[0] = ut_usectime()
+X[0] = ut_time_us(NULL)
a = 1103515245 (3^5 * 5 * 7 * 129749)
c = 12345 (3 * 5 * 823)
m = 18446744073709551616 (2^64)
-*/
-#define LCG_a 1103515245
-#define LCG_c 12345
+
+@return number between 0 and 2^64-1 */
static
-unsigned long long
-page_cur_lcg_prng()
-/*===============*/
- /* out: number between 0 and 2^64-1 */
+ib_uint64_t
+page_cur_lcg_prng(void)
+/*===================*/
{
- static unsigned long long lcg_current = 0;
+#define LCG_a 1103515245
+#define LCG_c 12345
+ static ib_uint64_t lcg_current = 0;
static ibool initialized = FALSE;
- ulint time_sec;
- ulint time_ms;
if (!initialized) {
- ut_usectime(&time_sec, &time_ms);
- lcg_current = (unsigned long long) (time_sec * 1000000
- + time_ms);
+ lcg_current = (ib_uint64_t) ut_time_us(NULL);
initialized = TRUE;
}
@@ -63,44 +74,47 @@ page_cur_lcg_prng()
return(lcg_current);
}
-/********************************************************************
-Tries a search shortcut based on the last insert. */
+/****************************************************************//**
+Tries a search shortcut based on the last insert.
+@return TRUE on success */
UNIV_INLINE
ibool
page_cur_try_search_shortcut(
/*=========================*/
- /* out: TRUE on success */
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint* iup_matched_fields,
- /* in/out: already matched fields in upper
- limit record */
- ulint* iup_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- ulint* ilow_matched_fields,
- /* in/out: already matched fields in lower
- limit record */
- ulint* ilow_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- page_cur_t* cursor) /* out: page cursor */
+ const buf_block_t* block, /*!< in: index page */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ ulint* iup_matched_fields,
+ /*!< in/out: already matched
+ fields in upper limit record */
+ ulint* iup_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ ulint* ilow_matched_fields,
+ /*!< in/out: already matched
+ fields in lower limit record */
+ ulint* ilow_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ page_cur_t* cursor) /*!< out: page cursor */
{
- rec_t* rec;
- rec_t* next_rec;
- ulint low_match;
- ulint low_bytes;
- ulint up_match;
- ulint up_bytes;
+ const rec_t* rec;
+ const rec_t* next_rec;
+ ulint low_match;
+ ulint low_bytes;
+ ulint up_match;
+ ulint up_bytes;
#ifdef UNIV_SEARCH_DEBUG
- page_cur_t cursor2;
+ page_cur_t cursor2;
#endif
ibool success = FALSE;
+ const page_t* page = buf_block_get_frame(block);
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
ut_ad(dtuple_check_typed(tuple));
@@ -123,7 +137,7 @@ page_cur_try_search_shortcut(
goto exit_func;
}
- next_rec = page_rec_get_next(rec);
+ next_rec = page_rec_get_next_const(rec);
offsets = rec_get_offsets(next_rec, index, offsets,
dtuple_get_n_fields(tuple), &heap);
@@ -132,10 +146,10 @@ page_cur_try_search_shortcut(
goto exit_func;
}
- cursor->rec = rec;
+ page_cur_position(rec, block, cursor);
#ifdef UNIV_SEARCH_DEBUG
- page_cur_search_with_match(page, index, tuple, PAGE_CUR_DBG,
+ page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG,
iup_matched_fields,
iup_matched_bytes,
ilow_matched_fields,
@@ -143,7 +157,7 @@ page_cur_try_search_shortcut(
&cursor2);
ut_a(cursor2.rec == cursor->rec);
- if (next_rec != page_get_supremum_rec(page)) {
+ if (!page_rec_is_supremum(next_rec)) {
ut_a(*iup_matched_fields == up_match);
ut_a(*iup_matched_bytes == up_bytes);
@@ -175,25 +189,24 @@ exit_func:
#endif
#ifdef PAGE_CUR_LE_OR_EXTENDS
-/********************************************************************
+/****************************************************************//**
Checks if the nth field in a record is a character type field which extends
the nth field in tuple, i.e., the field is longer or equal in length and has
-common first characters. */
+common first characters.
+@return TRUE if rec field extends tuple field */
static
ibool
page_cur_rec_field_extends(
/*=======================*/
- /* out: TRUE if rec field
- extends tuple field */
- dtuple_t* tuple, /* in: data tuple */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n) /* in: compare nth field */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: record */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n) /*!< in: compare nth field */
{
- dtype_t* type;
- dfield_t* dfield;
- byte* rec_f;
- ulint rec_f_len;
+ const dtype_t* type;
+ const dfield_t* dfield;
+ const byte* rec_f;
+ ulint rec_f_len;
ut_ad(rec_offs_validate(rec, NULL, offsets));
dfield = dtuple_get_nth_field(tuple, n);
@@ -213,7 +226,7 @@ page_cur_rec_field_extends(
if (dfield_get_len(dfield) != UNIV_SQL_NULL
&& rec_f_len != UNIV_SQL_NULL
&& rec_f_len >= dfield_get_len(dfield)
- && !cmp_data_data_slow(type,
+ && !cmp_data_data_slow(type->mtype, type->prtype,
dfield_get_data(dfield),
dfield_get_len(dfield),
rec_f, dfield_get_len(dfield))) {
@@ -226,59 +239,65 @@ page_cur_rec_field_extends(
}
#endif /* PAGE_CUR_LE_OR_EXTENDS */
-/********************************************************************
+/****************************************************************//**
Searches the right position for a page cursor. */
-
+UNIV_INTERN
void
page_cur_search_with_match(
/*=======================*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- ulint* iup_matched_fields,
- /* in/out: already matched fields in upper
- limit record */
- ulint* iup_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- ulint* ilow_matched_fields,
- /* in/out: already matched fields in lower
- limit record */
- ulint* ilow_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- page_cur_t* cursor) /* out: page cursor */
+ const buf_block_t* block, /*!< in: buffer block */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* tuple, /*!< in: data tuple */
+ ulint mode, /*!< in: PAGE_CUR_L,
+ PAGE_CUR_LE, PAGE_CUR_G, or
+ PAGE_CUR_GE */
+ ulint* iup_matched_fields,
+ /*!< in/out: already matched
+ fields in upper limit record */
+ ulint* iup_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ ulint* ilow_matched_fields,
+ /*!< in/out: already matched
+ fields in lower limit record */
+ ulint* ilow_matched_bytes,
+ /*!< in/out: already matched
+ bytes in a field not yet
+ completely matched */
+ page_cur_t* cursor) /*!< out: page cursor */
{
- ulint up;
- ulint low;
- ulint mid;
- page_dir_slot_t* slot;
- rec_t* up_rec;
- rec_t* low_rec;
- rec_t* mid_rec;
- ulint up_matched_fields;
- ulint up_matched_bytes;
- ulint low_matched_fields;
- ulint low_matched_bytes;
- ulint cur_matched_fields;
- ulint cur_matched_bytes;
- int cmp;
+ ulint up;
+ ulint low;
+ ulint mid;
+ const page_t* page;
+ const page_dir_slot_t* slot;
+ const rec_t* up_rec;
+ const rec_t* low_rec;
+ const rec_t* mid_rec;
+ ulint up_matched_fields;
+ ulint up_matched_bytes;
+ ulint low_matched_fields;
+ ulint low_matched_bytes;
+ ulint cur_matched_fields;
+ ulint cur_matched_bytes;
+ int cmp;
#ifdef UNIV_SEARCH_DEBUG
- int dbg_cmp;
- ulint dbg_matched_fields;
- ulint dbg_matched_bytes;
+ int dbg_cmp;
+ ulint dbg_matched_fields;
+ ulint dbg_matched_bytes;
#endif
+#ifdef UNIV_ZIP_DEBUG
+ const page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+#endif /* UNIV_ZIP_DEBUG */
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
- ut_ad(page && tuple && iup_matched_fields && iup_matched_bytes
+ ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes
&& ilow_matched_fields && ilow_matched_bytes && cursor);
ut_ad(dtuple_validate(tuple));
- ut_ad(dtuple_check_typed(tuple));
#ifdef UNIV_DEBUG
# ifdef PAGE_CUR_DBG
if (mode != PAGE_CUR_DBG)
@@ -289,18 +308,22 @@ page_cur_search_with_match(
ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
|| mode == PAGE_CUR_G || mode == PAGE_CUR_GE);
#endif /* UNIV_DEBUG */
+ page = buf_block_get_frame(block);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
page_check_dir(page);
#ifdef PAGE_CUR_ADAPT
- if ((page_header_get_field(page, PAGE_LEVEL) == 0)
+ if (page_is_leaf(page)
&& (mode == PAGE_CUR_LE)
&& (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
&& (page_header_get_ptr(page, PAGE_LAST_INSERT))
&& (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
if (page_cur_try_search_shortcut(
- page, index, tuple,
+ block, index, tuple,
iup_matched_fields, iup_matched_bytes,
ilow_matched_fields, ilow_matched_bytes,
cursor)) {
@@ -399,9 +422,9 @@ up_slot_match:
/* Perform linear search until the upper and lower records come to
distance 1 of each other. */
- while (page_rec_get_next(low_rec) != up_rec) {
+ while (page_rec_get_next_const(low_rec) != up_rec) {
- mid_rec = page_rec_get_next(low_rec);
+ mid_rec = page_rec_get_next_const(low_rec);
ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
low_matched_fields, low_matched_bytes,
@@ -469,7 +492,7 @@ up_rec_match:
ut_a(dbg_cmp >= 0);
}
- if (low_rec != page_get_infimum_rec(page)) {
+ if (!page_rec_is_infimum(low_rec)) {
ut_a(low_matched_fields == dbg_matched_fields);
ut_a(low_matched_bytes == dbg_matched_bytes);
@@ -493,16 +516,16 @@ up_rec_match:
ut_a(dbg_cmp == -1);
}
- if (up_rec != page_get_supremum_rec(page)) {
+ if (!page_rec_is_supremum(up_rec)) {
ut_a(up_matched_fields == dbg_matched_fields);
ut_a(up_matched_bytes == dbg_matched_bytes);
}
#endif
if (mode <= PAGE_CUR_GE) {
- cursor->rec = up_rec;
+ page_cur_position(up_rec, block, cursor);
} else {
- cursor->rec = low_rec;
+ page_cur_position(low_rec, block, cursor);
}
*iup_matched_fields = up_matched_fields;
@@ -514,76 +537,58 @@ up_rec_match:
}
}
-/***************************************************************
+/***********************************************************//**
Positions a page cursor on a randomly chosen user record on a page. If there
are no user records, sets the cursor on the infimum record. */
-
+UNIV_INTERN
void
page_cur_open_on_rnd_user_rec(
/*==========================*/
- page_t* page, /* in: page */
- page_cur_t* cursor) /* in/out: page cursor */
+ buf_block_t* block, /*!< in: page */
+ page_cur_t* cursor) /*!< out: page cursor */
{
ulint rnd;
- rec_t* rec;
+ ulint n_recs = page_get_n_recs(buf_block_get_frame(block));
- if (page_get_n_recs(page) == 0) {
- page_cur_position(page_get_infimum_rec(page), cursor);
+ page_cur_set_before_first(block, cursor);
- return;
- }
+ if (UNIV_UNLIKELY(n_recs == 0)) {
- if (srv_use_legacy_cardinality_algorithm) {
- page_rnd += 87584577;
-
- rnd = page_rnd % page_get_n_recs(page);
- } else {
- rnd = (ulint) (page_cur_lcg_prng() % page_get_n_recs(page));
+ return;
}
- rec = page_get_infimum_rec(page);
-
- rec = page_rec_get_next(rec);
-
- while (rnd > 0) {
- rec = page_rec_get_next(rec);
-
- rnd--;
- }
+ rnd = (ulint) (page_cur_lcg_prng() % n_recs);
- page_cur_position(rec, cursor);
+ do {
+ page_cur_move_to_next(cursor);
+ } while (rnd--);
}
-/***************************************************************
+/***********************************************************//**
Writes the log record of a record insert on a page. */
static
void
page_cur_insert_rec_write_log(
/*==========================*/
- rec_t* insert_rec, /* in: inserted physical record */
- ulint rec_size, /* in: insert_rec size */
- rec_t* cursor_rec, /* in: record the
+ rec_t* insert_rec, /*!< in: inserted physical record */
+ ulint rec_size, /*!< in: insert_rec size */
+ rec_t* cursor_rec, /*!< in: record the
cursor is pointing to */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mini-transaction handle */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint cur_rec_size;
ulint extra_size;
ulint cur_extra_size;
- ulint min_rec_size;
- byte* ins_ptr;
- byte* cur_ptr;
- ulint extra_info_yes;
+ const byte* ins_ptr;
byte* log_ptr;
- byte* log_end;
+ const byte* log_end;
ulint i;
- ulint comp;
ut_a(rec_size < UNIV_PAGE_SIZE);
- ut_ad(buf_frame_align(insert_rec) == buf_frame_align(cursor_rec));
+ ut_ad(page_align(insert_rec) == page_align(cursor_rec));
ut_ad(!page_rec_is_comp(insert_rec)
== !dict_table_is_comp(index->table));
- comp = page_rec_is_comp(insert_rec);
{
mem_heap_t* heap = NULL;
@@ -593,8 +598,8 @@ page_cur_insert_rec_write_log(
ulint* cur_offs;
ulint* ins_offs;
- *cur_offs_ = (sizeof cur_offs_) / sizeof *cur_offs_;
- *ins_offs_ = (sizeof ins_offs_) / sizeof *ins_offs_;
+ rec_offs_init(cur_offs_);
+ rec_offs_init(ins_offs_);
cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
ULINT_UNDEFINED, &heap);
@@ -616,54 +621,61 @@ page_cur_insert_rec_write_log(
i = 0;
if (cur_extra_size == extra_size) {
- min_rec_size = ut_min(cur_rec_size, rec_size);
+ ulint min_rec_size = ut_min(cur_rec_size, rec_size);
- cur_ptr = cursor_rec - cur_extra_size;
+ const byte* cur_ptr = cursor_rec - cur_extra_size;
/* Find out the first byte in insert_rec which differs from
cursor_rec; skip the bytes in the record info */
- for (;;) {
- if (i >= min_rec_size) {
-
- break;
- } else if (*ins_ptr == *cur_ptr) {
+ do {
+ if (*ins_ptr == *cur_ptr) {
i++;
ins_ptr++;
cur_ptr++;
} else if ((i < extra_size)
&& (i >= extra_size
- - (comp
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES))) {
+ - page_rec_get_base_extra_size
+ (insert_rec))) {
i = extra_size;
ins_ptr = insert_rec;
cur_ptr = cursor_rec;
} else {
break;
}
- }
+ } while (i < min_rec_size);
}
if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
- log_ptr = mlog_open_and_write_index(mtr, insert_rec, index,
- comp
- ? MLOG_COMP_REC_INSERT
- : MLOG_REC_INSERT,
- 2 + 5 + 1 + 5 + 5
- + MLOG_BUF_MARGIN);
+ if (page_rec_is_comp(insert_rec)) {
+ log_ptr = mlog_open_and_write_index(
+ mtr, insert_rec, index, MLOG_COMP_REC_INSERT,
+ 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
+ if (UNIV_UNLIKELY(!log_ptr)) {
+ /* Logging in mtr is switched off
+ during crash recovery: in that case
+ mlog_open returns NULL */
+ return;
+ }
+ } else {
+ log_ptr = mlog_open(mtr, 11
+ + 2 + 5 + 1 + 5 + 5
+ + MLOG_BUF_MARGIN);
+ if (UNIV_UNLIKELY(!log_ptr)) {
+ /* Logging in mtr is switched off
+ during crash recovery: in that case
+ mlog_open returns NULL */
+ return;
+ }
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash
- recovery: in that case mlog_open returns NULL */
- return;
+ log_ptr = mlog_write_initial_log_record_fast(
+ insert_rec, MLOG_REC_INSERT, log_ptr, mtr);
}
log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
/* Write the cursor rec offset as a 2-byte ulint */
- mach_write_to_2(log_ptr, cursor_rec
- - buf_frame_align(cursor_rec));
+ mach_write_to_2(log_ptr, page_offset(cursor_rec));
log_ptr += 2;
} else {
log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
@@ -675,25 +687,34 @@ page_cur_insert_rec_write_log(
log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
}
- if ((rec_get_info_and_status_bits(insert_rec, comp)
- != rec_get_info_and_status_bits(cursor_rec, comp))
- || (extra_size != cur_extra_size)
- || (rec_size != cur_rec_size)) {
+ if (page_rec_is_comp(insert_rec)) {
+ if (UNIV_UNLIKELY
+ (rec_get_info_and_status_bits(insert_rec, TRUE)
+ != rec_get_info_and_status_bits(cursor_rec, TRUE))) {
- extra_info_yes = 1;
+ goto need_extra_info;
+ }
} else {
- extra_info_yes = 0;
+ if (UNIV_UNLIKELY
+ (rec_get_info_and_status_bits(insert_rec, FALSE)
+ != rec_get_info_and_status_bits(cursor_rec, FALSE))) {
+
+ goto need_extra_info;
+ }
}
- /* Write the record end segment length and the extra info storage
- flag */
- log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i)
- + extra_info_yes);
- if (extra_info_yes) {
+ if (extra_size != cur_extra_size || rec_size != cur_rec_size) {
+need_extra_info:
+ /* Write the record end segment length
+ and the extra info storage flag */
+ log_ptr += mach_write_compressed(log_ptr,
+ 2 * (rec_size - i) + 1);
+
/* Write the info bits */
mach_write_to_1(log_ptr,
- rec_get_info_and_status_bits(insert_rec,
- comp));
+ rec_get_info_and_status_bits(
+ insert_rec,
+ page_rec_is_comp(insert_rec)));
log_ptr++;
/* Write the record origin offset */
@@ -704,6 +725,10 @@ page_cur_insert_rec_write_log(
ut_a(i < UNIV_PAGE_SIZE);
ut_a(extra_size < UNIV_PAGE_SIZE);
+ } else {
+ /* Write the record end segment length
+ and the extra info storage flag */
+ log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i));
}
/* Write to the log the inserted index record end segment which
@@ -720,55 +745,64 @@ page_cur_insert_rec_write_log(
mlog_catenate_string(mtr, ins_ptr, rec_size);
}
}
-
-/***************************************************************
-Parses a log record of a record insert on a page. */
-
+#else /* !UNIV_HOTBACKUP */
+# define page_cur_insert_rec_write_log(ins_rec,size,cur,index,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses a log record of a record insert on a page.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
page_cur_parse_insert_rec(
/*======================*/
- /* out: end of log record or NULL */
- ibool is_short,/* in: TRUE if short inserts */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ ibool is_short,/*!< in: TRUE if short inserts */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
- ulint extra_info_yes;
- ulint offset = 0; /* remove warning */
ulint origin_offset;
ulint end_seg_len;
ulint mismatch_index;
+ page_t* page;
rec_t* cursor_rec;
byte buf1[1024];
byte* buf;
- byte* ptr2 = ptr;
+ byte* ptr2 = ptr;
ulint info_and_status_bits = 0; /* remove warning */
page_cur_t cursor;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
+
+ page = block ? buf_block_get_frame(block) : NULL;
+
+ if (is_short) {
+ cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
+ } else {
+ ulint offset;
- if (!is_short) {
/* Read the cursor rec offset as a 2-byte ulint */
- if (end_ptr < ptr + 2) {
+ if (UNIV_UNLIKELY(end_ptr < ptr + 2)) {
return(NULL);
}
offset = mach_read_from_2(ptr);
+ ptr += 2;
- if (offset >= UNIV_PAGE_SIZE) {
+ cursor_rec = page + offset;
+
+ if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) {
recv_sys->found_corrupt_log = TRUE;
return(NULL);
}
-
- ptr += 2;
}
ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
@@ -778,16 +812,13 @@ page_cur_parse_insert_rec(
return(NULL);
}
- extra_info_yes = end_seg_len & 0x1UL;
- end_seg_len >>= 1;
-
- if (end_seg_len >= UNIV_PAGE_SIZE) {
+ if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
recv_sys->found_corrupt_log = TRUE;
return(NULL);
}
- if (extra_info_yes) {
+ if (end_seg_len & 0x1UL) {
/* Read the info bits */
if (end_ptr < ptr + 1) {
@@ -817,37 +848,34 @@ page_cur_parse_insert_rec(
ut_a(mismatch_index < UNIV_PAGE_SIZE);
}
- if (end_ptr < ptr + end_seg_len) {
+ if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) {
return(NULL);
}
- if (page == NULL) {
+ if (!block) {
- return(ptr + end_seg_len);
+ return(ptr + (end_seg_len >> 1));
}
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+ ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
/* Read from the log the inserted index record end segment which
differs from the cursor record */
- if (is_short) {
- cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
- } else {
- cursor_rec = page + offset;
- }
-
offsets = rec_get_offsets(cursor_rec, index, offsets,
ULINT_UNDEFINED, &heap);
- if (extra_info_yes == 0) {
+ if (!(end_seg_len & 0x1UL)) {
info_and_status_bits = rec_get_info_and_status_bits(
cursor_rec, page_is_comp(page));
origin_offset = rec_offs_extra_size(offsets);
- mismatch_index = rec_offs_size(offsets) - end_seg_len;
+ mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1);
}
+ end_seg_len >>= 1;
+
if (mismatch_index + end_seg_len < sizeof buf1) {
buf = buf1;
} else {
@@ -856,22 +884,23 @@ page_cur_parse_insert_rec(
/* Build the inserted record to buf */
- if (mismatch_index >= UNIV_PAGE_SIZE) {
+ if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
fprintf(stderr,
"Is short %lu, info_and_status_bits %lu, offset %lu, "
"o_offset %lu\n"
"mismatch index %lu, end_seg_len %lu\n"
"parsed len %lu\n",
(ulong) is_short, (ulong) info_and_status_bits,
- (ulong) offset,
+ (ulong) page_offset(cursor_rec),
(ulong) origin_offset,
(ulong) mismatch_index, (ulong) end_seg_len,
(ulong) (ptr - ptr2));
fputs("Dump of 300 bytes of log:\n", stderr);
ut_print_buf(stderr, ptr2, 300);
+ putc('\n', stderr);
- buf_page_print(page);
+ buf_page_print(page, 0);
ut_error;
}
@@ -879,14 +908,25 @@ page_cur_parse_insert_rec(
ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
- rec_set_info_and_status_bits(buf + origin_offset, page_is_comp(page),
+ if (page_is_comp(page)) {
+ rec_set_info_and_status_bits(buf + origin_offset,
info_and_status_bits);
+ } else {
+ rec_set_info_bits_old(buf + origin_offset,
+ info_and_status_bits);
+ }
- page_cur_position(cursor_rec, &cursor);
+ page_cur_position(cursor_rec, block, &cursor);
offsets = rec_get_offsets(buf + origin_offset, index, offsets,
ULINT_UNDEFINED, &heap);
- page_cur_rec_insert(&cursor, buf + origin_offset, index, offsets, mtr);
+ if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor,
+ buf + origin_offset,
+ index, offsets, mtr))) {
+ /* The redo log record should only have been written
+ after the write was successful. */
+ ut_error;
+ }
if (buf != buf1) {
@@ -900,127 +940,515 @@ page_cur_parse_insert_rec(
return(ptr + end_seg_len);
}
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The record to be
-inserted can be in a data tuple or as a physical record. The other parameter
-must then be NULL. The cursor stays at the same position. */
-
+/***********************************************************//**
+Inserts a record next to page cursor on an uncompressed page.
+Returns pointer to inserted record if succeed, i.e., enough
+space available, NULL otherwise. The cursor stays at the same position.
+@return pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
rec_t*
page_cur_insert_rec_low(
/*====================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
- dict_index_t* index, /* in: record descriptor */
- rec_t* rec, /* in: pointer to a physical record or NULL */
- ulint* offsets,/* in: rec_get_offsets(rec, index) or NULL */
- mtr_t* mtr) /* in: mini-transaction handle */
+ rec_t* current_rec,/*!< in: pointer to current record after
+ which the new record is inserted */
+ dict_index_t* index, /*!< in: record descriptor */
+ const rec_t* rec, /*!< in: pointer to a physical record */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
- byte* insert_buf = NULL;
+ byte* insert_buf;
ulint rec_size;
- byte* page; /* the relevant page */
- rec_t* last_insert; /* cursor position at previous
+ page_t* page; /*!< the relevant page */
+ rec_t* last_insert; /*!< cursor position at previous
insert */
- rec_t* insert_rec; /* inserted record */
- ulint heap_no; /* heap number of the inserted
+ rec_t* free_rec; /*!< a free record that was reused,
+ or NULL */
+ rec_t* insert_rec; /*!< inserted record */
+ ulint heap_no; /*!< heap number of the inserted
record */
- rec_t* current_rec; /* current record after which the
- new record is inserted */
- rec_t* next_rec; /* next record after current before
- the insertion */
- ulint owner_slot; /* the slot which owns the
- inserted record */
- rec_t* owner_rec;
- ulint n_owned;
- mem_heap_t* heap = NULL;
- ulint comp;
- ut_ad(cursor && mtr);
- ut_ad(tuple || rec);
- ut_ad(!(tuple && rec));
- ut_ad(rec || dtuple_check_typed(tuple));
+ ut_ad(rec_offs_validate(rec, index, offsets));
- page = page_cur_get_page(cursor);
- comp = page_is_comp(page);
- ut_ad(dict_table_is_comp(index->table) == !!comp);
+ page = page_align(current_rec);
+ ut_ad(dict_table_is_comp(index->table)
+ == (ibool) !!page_is_comp(page));
- ut_ad(cursor->rec != page_get_supremum_rec(page));
+ ut_ad(!page_rec_is_supremum(current_rec));
/* 1. Get the size of the physical record in the page */
- if (tuple != NULL) {
- rec_size = rec_get_converted_size(index, tuple);
- } else {
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- }
- ut_ad(rec_offs_validate(rec, index, offsets));
- rec_size = rec_offs_size(offsets);
+ rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG_VALGRIND
+ {
+ const void* rec_start
+ = rec - rec_offs_extra_size(offsets);
+ ulint extra_size
+ = rec_offs_extra_size(offsets)
+ - (rec_offs_comp(offsets)
+ ? REC_N_NEW_EXTRA_BYTES
+ : REC_N_OLD_EXTRA_BYTES);
+
+ /* All data bytes of the record must be valid. */
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ /* The variable-length header must be valid. */
+ UNIV_MEM_ASSERT_RW(rec_start, extra_size);
}
+#endif /* UNIV_DEBUG_VALGRIND */
/* 2. Try to find suitable space from page memory management */
- insert_buf = page_mem_alloc(page, rec_size, index, &heap_no);
- if (insert_buf == NULL) {
+ free_rec = page_header_get_ptr(page, PAGE_FREE);
+ if (UNIV_LIKELY_NULL(free_rec)) {
+ /* Try to allocate from the head of the free list. */
+ ulint foffsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* foffsets = foffsets_;
+ mem_heap_t* heap = NULL;
+
+ rec_offs_init(foffsets_);
+
+ foffsets = rec_get_offsets(free_rec, index, foffsets,
+ ULINT_UNDEFINED, &heap);
+ if (rec_offs_size(foffsets) < rec_size) {
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ goto use_heap;
+ }
+
+ insert_buf = free_rec - rec_offs_extra_size(foffsets);
+
+ if (page_is_comp(page)) {
+ heap_no = rec_get_heap_no_new(free_rec);
+ page_mem_alloc_free(page, NULL,
+ rec_get_next_ptr(free_rec, TRUE),
+ rec_size);
+ } else {
+ heap_no = rec_get_heap_no_old(free_rec);
+ page_mem_alloc_free(page, NULL,
+ rec_get_next_ptr(free_rec, FALSE),
+ rec_size);
+ }
+
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
- return(NULL);
+ } else {
+use_heap:
+ free_rec = NULL;
+ insert_buf = page_mem_alloc_heap(page, NULL,
+ rec_size, &heap_no);
+
+ if (UNIV_UNLIKELY(insert_buf == NULL)) {
+ return(NULL);
+ }
}
/* 3. Create the record */
- if (tuple != NULL) {
- insert_rec = rec_convert_dtuple_to_rec(insert_buf,
- index, tuple);
- offsets = rec_get_offsets(insert_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
+ insert_rec = rec_copy(insert_buf, rec, offsets);
+ rec_offs_make_valid(insert_rec, index, offsets);
+
+ /* 4. Insert the record in the linked list of records */
+ ut_ad(current_rec != insert_rec);
+
+ {
+ /* next record after current before the insertion */
+ rec_t* next_rec = page_rec_get_next(current_rec);
+#ifdef UNIV_DEBUG
+ if (page_is_comp(page)) {
+ ut_ad(rec_get_status(current_rec)
+ <= REC_STATUS_INFIMUM);
+ ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+ ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
+ }
+#endif
+ page_rec_set_next(insert_rec, next_rec);
+ page_rec_set_next(current_rec, insert_rec);
+ }
+
+ page_header_set_field(page, NULL, PAGE_N_RECS,
+ 1 + page_get_n_recs(page));
+
+ /* 5. Set the n_owned field in the inserted record to zero,
+ and set the heap_no field */
+ if (page_is_comp(page)) {
+ rec_set_n_owned_new(insert_rec, NULL, 0);
+ rec_set_heap_no_new(insert_rec, heap_no);
} else {
- insert_rec = rec_copy(insert_buf, rec, offsets);
- ut_ad(rec_offs_validate(rec, index, offsets));
- rec_offs_make_valid(insert_rec, index, offsets);
+ rec_set_n_owned_old(insert_rec, 0);
+ rec_set_heap_no_old(insert_rec, heap_no);
}
- ut_ad(insert_rec);
- ut_ad(rec_size == rec_offs_size(offsets));
+ UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
+ rec_offs_size(offsets));
+ /* 6. Update the last insertion info in page header */
- /* 4. Insert the record in the linked list of records */
- current_rec = cursor->rec;
+ last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
+ ut_ad(!last_insert || !page_is_comp(page)
+ || rec_get_node_ptr_flag(last_insert)
+ == rec_get_node_ptr_flag(insert_rec));
+
+ if (UNIV_UNLIKELY(last_insert == NULL)) {
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+
+ } else if ((last_insert == current_rec)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_LEFT)) {
+
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_RIGHT);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+
+ } else if ((page_rec_get_next(insert_rec) == last_insert)
+ && (page_header_get_field(page, PAGE_DIRECTION)
+ != PAGE_RIGHT)) {
+
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_LEFT);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION,
+ page_header_get_field(
+ page, PAGE_N_DIRECTION) + 1);
+ } else {
+ page_header_set_field(page, NULL, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+ }
+
+ page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec);
+
+ /* 7. It remains to update the owner record. */
+ {
+ rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
+ ulint n_owned;
+ if (page_is_comp(page)) {
+ n_owned = rec_get_n_owned_new(owner_rec);
+ rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
+ } else {
+ n_owned = rec_get_n_owned_old(owner_rec);
+ rec_set_n_owned_old(owner_rec, n_owned + 1);
+ }
+
+ /* 8. Now we have incremented the n_owned field of the owner
+ record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
+ we have to split the corresponding directory slot in two. */
+
+ if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
+ page_dir_split_slot(
+ page, NULL,
+ page_dir_find_owner_slot(owner_rec));
+ }
+ }
+
+ /* 9. Write log record of the insert */
+ if (UNIV_LIKELY(mtr != NULL)) {
+ page_cur_insert_rec_write_log(insert_rec, rec_size,
+ current_rec, index, mtr);
+ }
+
+ return(insert_rec);
+}
+
+/***********************************************************//**
+Compresses or reorganizes a page after an optimistic insert.
+@return rec if succeed, NULL otherwise */
+static
+rec_t*
+page_cur_insert_rec_zip_reorg(
+/*==========================*/
+ rec_t** current_rec,/*!< in/out: pointer to current record after
+ which the new record is inserted */
+ buf_block_t* block, /*!< in: buffer block */
+ dict_index_t* index, /*!< in: record descriptor */
+ rec_t* rec, /*!< in: inserted record */
+ page_t* page, /*!< in: uncompressed page */
+ page_zip_des_t* page_zip,/*!< in: compressed page */
+ mtr_t* mtr) /*!< in: mini-transaction, or NULL */
+{
+ ulint pos;
+
+ /* Recompress or reorganize and recompress the page. */
+ if (UNIV_LIKELY(page_zip_compress(page_zip, page, index, mtr))) {
+ return(rec);
+ }
+
+ /* Before trying to reorganize the page,
+ store the number of preceding records on the page. */
+ pos = page_rec_get_n_recs_before(rec);
+
+ if (page_zip_reorganize(block, index, mtr)) {
+ /* The page was reorganized: Find rec by seeking to pos,
+ and update *current_rec. */
+ rec = page + PAGE_NEW_INFIMUM;
+
+ while (--pos) {
+ rec = page + rec_get_next_offs(rec, TRUE);
+ }
+
+ *current_rec = rec;
+ rec = page + rec_get_next_offs(rec, TRUE);
+
+ return(rec);
+ }
+
+ /* Out of space: restore the page */
+ if (!page_zip_decompress(page_zip, page, FALSE)) {
+ ut_error; /* Memory corrupted? */
+ }
+ ut_ad(page_validate(page, index));
+ return(NULL);
+}
+
+/***********************************************************//**
+Inserts a record next to page cursor on a compressed and uncompressed
+page. Returns pointer to inserted record if succeed, i.e.,
+enough space available, NULL otherwise.
+The cursor stays at the same position.
+@return pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_zip(
+/*====================*/
+ rec_t** current_rec,/*!< in/out: pointer to current record after
+ which the new record is inserted */
+ buf_block_t* block, /*!< in: buffer block of *current_rec */
+ dict_index_t* index, /*!< in: record descriptor */
+ const rec_t* rec, /*!< in: pointer to a physical record */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+{
+ byte* insert_buf;
+ ulint rec_size;
+ page_t* page; /*!< the relevant page */
+ rec_t* last_insert; /*!< cursor position at previous
+ insert */
+ rec_t* free_rec; /*!< a free record that was reused,
+ or NULL */
+ rec_t* insert_rec; /*!< inserted record */
+ ulint heap_no; /*!< heap number of the inserted
+ record */
+ page_zip_des_t* page_zip;
- ut_ad(!comp || rec_get_status(current_rec) <= REC_STATUS_INFIMUM);
- ut_ad(!comp || rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+ page_zip = buf_block_get_page_zip(block);
+ ut_ad(page_zip);
+
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ page = page_align(*current_rec);
+ ut_ad(dict_table_is_comp(index->table));
+ ut_ad(page_is_comp(page));
+
+ ut_ad(!page_rec_is_supremum(*current_rec));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+ /* 1. Get the size of the physical record in the page */
+ rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG_VALGRIND
+ {
+ const void* rec_start
+ = rec - rec_offs_extra_size(offsets);
+ ulint extra_size
+ = rec_offs_extra_size(offsets)
+ - (rec_offs_comp(offsets)
+ ? REC_N_NEW_EXTRA_BYTES
+ : REC_N_OLD_EXTRA_BYTES);
+
+ /* All data bytes of the record must be valid. */
+ UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
+ /* The variable-length header must be valid. */
+ UNIV_MEM_ASSERT_RW(rec_start, extra_size);
+ }
+#endif /* UNIV_DEBUG_VALGRIND */
- next_rec = page_rec_get_next(current_rec);
- ut_ad(!comp || rec_get_status(next_rec) != REC_STATUS_INFIMUM);
- page_rec_set_next(insert_rec, next_rec);
- page_rec_set_next(current_rec, insert_rec);
+ /* 2. Try to find suitable space from page memory management */
+ if (!page_zip_available(page_zip, dict_index_is_clust(index),
+ rec_size, 1)) {
+
+ /* Try compressing the whole page afterwards. */
+ insert_rec = page_cur_insert_rec_low(*current_rec,
+ index, rec, offsets,
+ NULL);
+
+ if (UNIV_LIKELY(insert_rec != NULL)) {
+ insert_rec = page_cur_insert_rec_zip_reorg(
+ current_rec, block, index, insert_rec,
+ page, page_zip, mtr);
+ }
+
+ return(insert_rec);
+ }
+
+ free_rec = page_header_get_ptr(page, PAGE_FREE);
+ if (UNIV_LIKELY_NULL(free_rec)) {
+ /* Try to allocate from the head of the free list. */
+ lint extra_size_diff;
+ ulint foffsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* foffsets = foffsets_;
+ mem_heap_t* heap = NULL;
+
+ rec_offs_init(foffsets_);
+
+ foffsets = rec_get_offsets(free_rec, index, foffsets,
+ ULINT_UNDEFINED, &heap);
+ if (rec_offs_size(foffsets) < rec_size) {
+too_small:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ goto use_heap;
+ }
+
+ insert_buf = free_rec - rec_offs_extra_size(foffsets);
+
+ /* On compressed pages, do not relocate records from
+ the free list. If extra_size would grow, use the heap. */
+ extra_size_diff
+ = rec_offs_extra_size(offsets)
+ - rec_offs_extra_size(foffsets);
+
+ if (UNIV_UNLIKELY(extra_size_diff < 0)) {
+ /* Add an offset to the extra_size. */
+ if (rec_offs_size(foffsets)
+ < rec_size - extra_size_diff) {
+
+ goto too_small;
+ }
- page_header_set_field(page, PAGE_N_RECS, 1 + page_get_n_recs(page));
+ insert_buf -= extra_size_diff;
+ } else if (UNIV_UNLIKELY(extra_size_diff)) {
+ /* Do not allow extra_size to grow */
+
+ goto too_small;
+ }
+
+ heap_no = rec_get_heap_no_new(free_rec);
+ page_mem_alloc_free(page, page_zip,
+ rec_get_next_ptr(free_rec, TRUE),
+ rec_size);
+
+ if (!page_is_leaf(page)) {
+ /* Zero out the node pointer of free_rec,
+ in case it will not be overwritten by
+ insert_rec. */
+
+ ut_ad(rec_size > REC_NODE_PTR_SIZE);
+
+ if (rec_offs_extra_size(foffsets)
+ + rec_offs_data_size(foffsets) > rec_size) {
+
+ memset(rec_get_end(free_rec, foffsets)
+ - REC_NODE_PTR_SIZE, 0,
+ REC_NODE_PTR_SIZE);
+ }
+ } else if (dict_index_is_clust(index)) {
+ /* Zero out the DB_TRX_ID and DB_ROLL_PTR
+ columns of free_rec, in case it will not be
+ overwritten by insert_rec. */
+
+ ulint trx_id_col;
+ ulint trx_id_offs;
+ ulint len;
+
+ trx_id_col = dict_index_get_sys_col_pos(index,
+ DATA_TRX_ID);
+ ut_ad(trx_id_col > 0);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+ trx_id_offs = rec_get_nth_field_offs(foffsets,
+ trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+
+ if (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + trx_id_offs
+ + rec_offs_extra_size(foffsets) > rec_size) {
+ /* We will have to zero out the
+ DB_TRX_ID and DB_ROLL_PTR, because
+ they will not be fully overwritten by
+ insert_rec. */
+
+ memset(free_rec + trx_id_offs, 0,
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+ }
+
+ ut_ad(free_rec + trx_id_offs + DATA_TRX_ID_LEN
+ == rec_get_nth_field(free_rec, foffsets,
+ trx_id_col + 1, &len));
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ } else {
+use_heap:
+ free_rec = NULL;
+ insert_buf = page_mem_alloc_heap(page, page_zip,
+ rec_size, &heap_no);
+
+ if (UNIV_UNLIKELY(insert_buf == NULL)) {
+ return(NULL);
+ }
+
+ page_zip_dir_add_slot(page_zip, dict_index_is_clust(index));
+ }
+
+ /* 3. Create the record */
+ insert_rec = rec_copy(insert_buf, rec, offsets);
+ rec_offs_make_valid(insert_rec, index, offsets);
+
+ /* 4. Insert the record in the linked list of records */
+ ut_ad(*current_rec != insert_rec);
+
+ {
+ /* next record after current before the insertion */
+ rec_t* next_rec = page_rec_get_next(*current_rec);
+ ut_ad(rec_get_status(*current_rec)
+ <= REC_STATUS_INFIMUM);
+ ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
+ ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
+
+ page_rec_set_next(insert_rec, next_rec);
+ page_rec_set_next(*current_rec, insert_rec);
+ }
+
+ page_header_set_field(page, page_zip, PAGE_N_RECS,
+ 1 + page_get_n_recs(page));
/* 5. Set the n_owned field in the inserted record to zero,
and set the heap_no field */
+ rec_set_n_owned_new(insert_rec, NULL, 0);
+ rec_set_heap_no_new(insert_rec, heap_no);
- rec_set_n_owned(insert_rec, comp, 0);
- rec_set_heap_no(insert_rec, comp, heap_no);
+ UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
+ rec_offs_size(offsets));
+
+ page_zip_dir_insert(page_zip, *current_rec, free_rec, insert_rec);
/* 6. Update the last insertion info in page header */
last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
- ut_ad(!last_insert || !comp
+ ut_ad(!last_insert
|| rec_get_node_ptr_flag(last_insert)
== rec_get_node_ptr_flag(insert_rec));
- if (last_insert == NULL) {
- page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, PAGE_N_DIRECTION, 0);
+ if (UNIV_UNLIKELY(last_insert == NULL)) {
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
- } else if ((last_insert == current_rec)
+ } else if ((last_insert == *current_rec)
&& (page_header_get_field(page, PAGE_DIRECTION)
!= PAGE_LEFT)) {
- page_header_set_field(page, PAGE_DIRECTION, PAGE_RIGHT);
- page_header_set_field(page, PAGE_N_DIRECTION,
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_RIGHT);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
page_header_get_field(
page, PAGE_N_DIRECTION) + 1);
@@ -1028,53 +1456,61 @@ page_cur_insert_rec_low(
&& (page_header_get_field(page, PAGE_DIRECTION)
!= PAGE_RIGHT)) {
- page_header_set_field(page, PAGE_DIRECTION, PAGE_LEFT);
- page_header_set_field(page, PAGE_N_DIRECTION,
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_LEFT);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
page_header_get_field(
page, PAGE_N_DIRECTION) + 1);
} else {
- page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, PAGE_N_DIRECTION, 0);
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
}
- page_header_set_ptr(page, PAGE_LAST_INSERT, insert_rec);
+ page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
/* 7. It remains to update the owner record. */
+ {
+ rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
+ ulint n_owned;
- owner_rec = page_rec_find_owner_rec(insert_rec);
- n_owned = rec_get_n_owned(owner_rec, comp);
- rec_set_n_owned(owner_rec, comp, n_owned + 1);
+ n_owned = rec_get_n_owned_new(owner_rec);
+ rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
- /* 8. Now we have incremented the n_owned field of the owner
- record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
- we have to split the corresponding directory slot in two. */
+ /* 8. Now we have incremented the n_owned field of the owner
+ record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
+ we have to split the corresponding directory slot in two. */
- if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) {
- owner_slot = page_dir_find_owner_slot(owner_rec);
- page_dir_split_slot(page, owner_slot);
+ if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
+ page_dir_split_slot(
+ page, page_zip,
+ page_dir_find_owner_slot(owner_rec));
+ }
}
- /* 9. Write log record of the insert */
- page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec,
- index, mtr);
+ page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
+ /* 9. Write log record of the insert */
+ if (UNIV_LIKELY(mtr != NULL)) {
+ page_cur_insert_rec_write_log(insert_rec, rec_size,
+ *current_rec, index, mtr);
}
+
return(insert_rec);
}
-/**************************************************************
-Writes a log record of copying a record list end to a new created page. */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Writes a log record of copying a record list end to a new created page.
+@return 4-byte field where to write the log data length, or NULL if
+logging is disabled */
UNIV_INLINE
byte*
page_copy_rec_list_to_created_page_write_log(
/*=========================================*/
- /* out: 4-byte field where to
- write the log data length */
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ page_t* page, /*!< in: index page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
@@ -1084,27 +1520,31 @@ page_copy_rec_list_to_created_page_write_log(
page_is_comp(page)
? MLOG_COMP_LIST_END_COPY_CREATED
: MLOG_LIST_END_COPY_CREATED, 4);
- ut_a(log_ptr);
- mlog_close(mtr, log_ptr + 4);
+ if (UNIV_LIKELY(log_ptr != NULL)) {
+ mlog_close(mtr, log_ptr + 4);
+ }
return(log_ptr);
}
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************
-Parses a log record of copying a record list end to a new created page. */
-
+/**********************************************************//**
+Parses a log record of copying a record list end to a new created page.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
page_parse_copy_rec_list_to_created_page(
/*=====================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
- byte* rec_end;
- ulint log_data_len;
+ byte* rec_end;
+ ulint log_data_len;
+ page_t* page;
+ page_zip_des_t* page_zip;
if (ptr + 4 > end_ptr) {
@@ -1121,37 +1561,41 @@ page_parse_copy_rec_list_to_created_page(
return(NULL);
}
- if (!page) {
+ if (!block) {
return(rec_end);
}
while (ptr < rec_end) {
ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
- index, page, mtr);
+ block, index, mtr);
}
ut_a(ptr == rec_end);
- page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, PAGE_N_DIRECTION, 0);
+ page = buf_block_get_frame(block);
+ page_zip = buf_block_get_page_zip(block);
+
+ page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(page, page_zip, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
return(rec_end);
}
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Copies records from page to a newly created page, from a given record onward,
including that record. Infimum and supremum records are not copied. */
-
+UNIV_INTERN
void
page_copy_rec_list_end_to_created_page(
/*===================================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: first record to copy */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ page_t* new_page, /*!< in/out: index page to copy to */
+ rec_t* rec, /*!< in: first record to copy */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
page_dir_slot_t* slot = 0; /* remove warning */
byte* heap_top;
@@ -1164,22 +1608,21 @@ page_copy_rec_list_end_to_created_page(
ulint log_mode;
byte* log_ptr;
ulint log_data_len;
- ulint comp = page_is_comp(page);
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
- ut_ad(page_dir_get_n_heap(new_page) == 2);
- ut_ad(page != new_page);
- ut_ad(comp == page_is_comp(new_page));
+ ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
+ ut_ad(page_align(rec) != new_page);
+ ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
- if (rec == page_get_infimum_rec(page)) {
+ if (page_rec_is_infimum(rec)) {
rec = page_rec_get_next(rec);
}
- if (rec == page_get_supremum_rec(page)) {
+ if (page_rec_is_supremum(rec)) {
return;
}
@@ -1187,8 +1630,8 @@ page_copy_rec_list_end_to_created_page(
#ifdef UNIV_DEBUG
/* To pass the debug tests we have to set these dummy values
in the debug version */
- page_dir_set_n_slots(new_page, UNIV_PAGE_SIZE / 2);
- page_header_set_ptr(new_page, PAGE_HEAP_TOP,
+ page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
+ page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
new_page + UNIV_PAGE_SIZE - 1);
#endif
@@ -1202,7 +1645,7 @@ page_copy_rec_list_end_to_created_page(
log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
prev_rec = page_get_infimum_rec(new_page);
- if (comp) {
+ if (page_is_comp(new_page)) {
heap_top = new_page + PAGE_NEW_SUPREMUM_END;
} else {
heap_top = new_page + PAGE_OLD_SUPREMUM_END;
@@ -1211,43 +1654,54 @@ page_copy_rec_list_end_to_created_page(
slot_index = 0;
n_recs = 0;
- /* should be do ... until, comment by Jani */
- while (rec != page_get_supremum_rec(page)) {
+ do {
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
insert_rec = rec_copy(heap_top, rec, offsets);
- rec_set_next_offs(prev_rec, comp, insert_rec - new_page);
-
- rec_set_n_owned(insert_rec, comp, 0);
- rec_set_heap_no(insert_rec, comp, 2 + n_recs);
+ if (page_is_comp(new_page)) {
+ rec_set_next_offs_new(prev_rec,
+ page_offset(insert_rec));
- rec_size = rec_offs_size(offsets);
-
- heap_top = heap_top + rec_size;
+ rec_set_n_owned_new(insert_rec, NULL, 0);
+ rec_set_heap_no_new(insert_rec,
+ PAGE_HEAP_NO_USER_LOW + n_recs);
+ } else {
+ rec_set_next_offs_old(prev_rec,
+ page_offset(insert_rec));
- ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
+ rec_set_n_owned_old(insert_rec, 0);
+ rec_set_heap_no_old(insert_rec,
+ PAGE_HEAP_NO_USER_LOW + n_recs);
+ }
count++;
n_recs++;
- if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) {
+ if (UNIV_UNLIKELY
+ (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) {
slot_index++;
slot = page_dir_get_nth_slot(new_page, slot_index);
page_dir_slot_set_rec(slot, insert_rec);
- page_dir_slot_set_n_owned(slot, count);
+ page_dir_slot_set_n_owned(slot, NULL, count);
count = 0;
}
+ rec_size = rec_offs_size(offsets);
+
+ ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
+
+ heap_top += rec_size;
+
page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
index, mtr);
prev_rec = insert_rec;
rec = page_rec_get_next(rec);
- }
+ } while (!page_rec_is_supremum(rec));
if ((slot_index > 0) && (count + 1
+ (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
@@ -1261,7 +1715,7 @@ page_copy_rec_list_end_to_created_page(
count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
- page_dir_slot_set_n_owned(slot, 0);
+ page_dir_slot_set_n_owned(slot, NULL, 0);
slot_index--;
}
@@ -1274,39 +1728,45 @@ page_copy_rec_list_end_to_created_page(
ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
- mach_write_to_4(log_ptr, log_data_len);
+ if (UNIV_LIKELY(log_ptr != NULL)) {
+ mach_write_to_4(log_ptr, log_data_len);
+ }
- rec_set_next_offs(insert_rec, comp,
- comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM);
+ if (page_is_comp(new_page)) {
+ rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM);
+ } else {
+ rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
+ }
slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
- page_dir_slot_set_n_owned(slot, count + 1);
+ page_dir_slot_set_n_owned(slot, NULL, count + 1);
- page_dir_set_n_slots(new_page, 2 + slot_index);
- page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top);
- page_dir_set_n_heap(new_page, 2 + n_recs);
- page_header_set_field(new_page, PAGE_N_RECS, n_recs);
+ page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
+ page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top);
+ page_dir_set_n_heap(new_page, NULL, PAGE_HEAP_NO_USER_LOW + n_recs);
+ page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
- page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL);
- page_header_set_field(new_page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(new_page, PAGE_N_DIRECTION, 0);
+ page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(new_page, NULL, PAGE_DIRECTION,
+ PAGE_NO_DIRECTION);
+ page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
/* Restore the log mode */
mtr_set_log_mode(mtr, log_mode);
}
-/***************************************************************
+/***********************************************************//**
Writes log record of a record delete on a page. */
UNIV_INLINE
void
page_cur_delete_rec_write_log(
/*==========================*/
- rec_t* rec, /* in: record to be deleted */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mini-transaction handle */
+ rec_t* rec, /*!< in: record to be deleted */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
byte* log_ptr;
@@ -1328,19 +1788,22 @@ page_cur_delete_rec_write_log(
mlog_close(mtr, log_ptr + 2);
}
-
-/***************************************************************
-Parses log record of a record delete on a page. */
-
+#else /* !UNIV_HOTBACKUP */
+# define page_cur_delete_rec_write_log(rec,index,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses log record of a record delete on a page.
+@return pointer to record end or NULL */
+UNIV_INTERN
byte*
page_cur_parse_delete_rec(
/*======================*/
- /* out: pointer to record end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in: page or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
ulint offset;
page_cur_t cursor;
@@ -1356,13 +1819,15 @@ page_cur_parse_delete_rec(
ut_a(offset <= UNIV_PAGE_SIZE);
- if (page) {
+ if (block) {
+ page_t* page = buf_block_get_frame(block);
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_t* rec = page + offset;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
- page_cur_position(rec, &cursor);
+ page_cur_position(rec, block, &cursor);
+ ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
page_cur_delete_rec(&cursor, index,
rec_get_offsets(rec, index, offsets_,
@@ -1376,21 +1841,22 @@ page_cur_parse_delete_rec(
return(ptr);
}
-/***************************************************************
+/***********************************************************//**
Deletes a record at the page cursor. The cursor is moved to the next
record after the deleted one. */
-
+UNIV_INTERN
void
page_cur_delete_rec(
/*================*/
- page_cur_t* cursor, /* in: a page cursor */
- dict_index_t* index, /* in: record descriptor */
- const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr) /* in: mini-transaction handle */
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ dict_index_t* index, /*!< in: record descriptor */
+ const ulint* offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
page_dir_slot_t* cur_dir_slot;
page_dir_slot_t* prev_slot;
page_t* page;
+ page_zip_des_t* page_zip;
rec_t* current_rec;
rec_t* prev_rec = NULL;
rec_t* next_rec;
@@ -1401,13 +1867,22 @@ page_cur_delete_rec(
ut_ad(cursor && mtr);
page = page_cur_get_page(cursor);
+ page_zip = page_cur_get_page_zip(cursor);
+
+ /* page_zip_validate() will fail here when
+ btr_cur_pessimistic_delete() invokes btr_set_min_rec_mark().
+ Then, both "page_zip" and "page" would have the min-rec-mark
+ set on the smallest user record, but "page" would additionally
+ have it set on the smallest-but-one record. Because sloppy
+ page_zip_validate_low() only ignores min-rec-flag differences
+ in the smallest user record, it cannot be used here either. */
+
current_rec = cursor->rec;
ut_ad(rec_offs_validate(current_rec, index, offsets));
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
/* The record must not be the supremum or infimum record. */
- ut_ad(current_rec != page_get_supremum_rec(page));
- ut_ad(current_rec != page_get_infimum_rec(page));
+ ut_ad(page_rec_is_user_rec(current_rec));
/* Save to local variables some data associated with current_rec */
cur_slot_no = page_dir_find_owner_slot(current_rec);
@@ -1420,12 +1895,12 @@ page_cur_delete_rec(
/* 1. Reset the last insert info in the page header and increment
the modify clock for the frame */
- page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
+ page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
/* The page gets invalid for optimistic searches: increment the
frame modify clock */
- buf_frame_modify_clock_inc(page);
+ buf_block_modify_clock_inc(page_cur_get_block(cursor));
/* 2. Find the next and the previous record. Note that the cursor is
left at the next record. */
@@ -1433,7 +1908,7 @@ page_cur_delete_rec(
ut_ad(cur_slot_no > 0);
prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
- rec = page_dir_slot_get_rec(prev_slot);
+ rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
/* rec now points to the record of the previous directory slot. Look
for the immediate predecessor of current_rec in a loop. */
@@ -1449,8 +1924,6 @@ page_cur_delete_rec(
/* 3. Remove the record from the linked list of records */
page_rec_set_next(prev_rec, next_rec);
- page_header_set_field(page, PAGE_N_RECS,
- (ulint)(page_get_n_recs(page) - 1));
/* 4. If the deleted record is pointed to by a dir slot, update the
record pointer in slot. In the following if-clause we assume that
@@ -1468,29 +1941,33 @@ page_cur_delete_rec(
/* 5. Update the number of owned records of the slot */
- page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1);
+ page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
/* 6. Free the memory occupied by the record */
- page_mem_free(page, current_rec, offsets);
+ page_mem_free(page, page_zip, current_rec, index, offsets);
/* 7. Now we have decremented the number of owned records of the slot.
If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
slots. */
- if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
- page_dir_balance_slot(page, cur_slot_no);
+ if (UNIV_UNLIKELY(cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED)) {
+ page_dir_balance_slot(page, page_zip, cur_slot_no);
}
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
}
#ifdef UNIV_COMPILE_TEST_FUNCS
-/***********************************************************************
+/*******************************************************************//**
Print the first n numbers, generated by page_cur_lcg_prng() to make sure
(visually) that it works properly. */
void
test_page_cur_lcg_prng(
/*===================*/
- int n) /* in: print first n numbers */
+ int n) /*!< in: print first n numbers */
{
int i;
unsigned long long rnd;
diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
index 543cf9e34eb..ab2ba60570e 100644
--- a/storage/innobase/page/page0page.c
+++ b/storage/innobase/page/page0page.c
@@ -1,7 +1,24 @@
-/******************************************************
-Index page routines
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file page/page0page.c
+Index page routines
Created 2/2/1994 Heikki Tuuri
*******************************************************/
@@ -14,18 +31,21 @@ Created 2/2/1994 Heikki Tuuri
#undef THIS_MODULE
#include "page0cur.h"
-#include "lock0lock.h"
-#include "fut0lst.h"
-#include "btr0sea.h"
+#include "page0zip.h"
#include "buf0buf.h"
-#include "srv0srv.h"
#include "btr0btr.h"
+#ifndef UNIV_HOTBACKUP
+# include "srv0srv.h"
+# include "lock0lock.h"
+# include "fut0lst.h"
+# include "btr0sea.h"
+#endif /* !UNIV_HOTBACKUP */
/* THE INDEX PAGE
==============
The index page consists of a page header which contains the page's
-id and other information. On top of it are the the index records
+id and other information. On top of it are the index records
in a heap linked into a one way linear list according to alphabetic order.
Just below page end is an array of pointers which we call page directory,
@@ -63,36 +83,36 @@ Assuming a page size of 8 kB, a typical index page of a secondary
index contains 300 index entries, and the size of the page directory
is 50 x 4 bytes = 200 bytes. */
-/*******************************************************************
-Looks for the directory slot which owns the given record. */
-
+/***************************************************************//**
+Looks for the directory slot which owns the given record.
+@return the directory slot number */
+UNIV_INTERN
ulint
page_dir_find_owner_slot(
/*=====================*/
- /* out: the directory slot number */
- rec_t* rec) /* in: the physical record */
+ const rec_t* rec) /*!< in: the physical record */
{
- page_t* page;
+ const page_t* page;
register uint16 rec_offs_bytes;
- register page_dir_slot_t* slot;
+ register const page_dir_slot_t* slot;
register const page_dir_slot_t* first_slot;
- register rec_t* r = rec;
+ register const rec_t* r = rec;
ut_ad(page_rec_check(rec));
- page = buf_frame_align(rec);
+ page = page_align(rec);
first_slot = page_dir_get_nth_slot(page, 0);
slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
if (page_is_comp(page)) {
- while (rec_get_n_owned(r, TRUE) == 0) {
- r = page + rec_get_next_offs(r, TRUE);
+ while (rec_get_n_owned_new(r) == 0) {
+ r = rec_get_next_ptr_const(r, TRUE);
ut_ad(r >= page + PAGE_NEW_SUPREMUM);
ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
}
} else {
- while (rec_get_n_owned(r, FALSE) == 0) {
- r = page + rec_get_next_offs(r, FALSE);
+ while (rec_get_n_owned_old(r) == 0) {
+ r = rec_get_next_ptr_const(r, FALSE);
ut_ad(r >= page + PAGE_OLD_SUPREMUM);
ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
}
@@ -107,7 +127,7 @@ page_dir_find_owner_slot(
"InnoDB: Probable data corruption on"
" page %lu\n"
"InnoDB: Original record ",
- (ulong) buf_frame_get_page_no(page));
+ (ulong) page_get_page_no(page));
if (page_is_comp(page)) {
fputs("(compact record)", stderr);
@@ -128,7 +148,7 @@ page_dir_find_owner_slot(
fputs("\n"
"InnoDB: on that page!\n", stderr);
- buf_page_print(page);
+ buf_page_print(page, 0);
ut_error;
}
@@ -139,14 +159,14 @@ page_dir_find_owner_slot(
return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
}
-/******************************************************************
-Used to check the consistency of a directory slot. */
+/**************************************************************//**
+Used to check the consistency of a directory slot.
+@return TRUE if succeed */
static
ibool
page_dir_slot_check(
/*================*/
- /* out: TRUE if succeed */
- page_dir_slot_t* slot) /* in: slot */
+ page_dir_slot_t* slot) /*!< in: slot */
{
page_t* page;
ulint n_slots;
@@ -154,7 +174,7 @@ page_dir_slot_check(
ut_a(slot);
- page = buf_frame_align(slot);
+ page = page_align(slot);
n_slots = page_dir_get_n_slots(page);
@@ -163,8 +183,11 @@ page_dir_slot_check(
ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
- n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot),
- page_is_comp(page));
+ if (page_is_comp(page)) {
+ n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot));
+ } else {
+ n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot));
+ }
if (slot == page_dir_get_nth_slot(page, 0)) {
ut_a(n_owned == 1);
@@ -179,119 +202,84 @@ page_dir_slot_check(
return(TRUE);
}
-/*****************************************************************
+/*************************************************************//**
Sets the max trx id field value. */
-
+UNIV_INTERN
void
page_set_max_trx_id(
/*================*/
- page_t* page, /* in: page */
- dulint trx_id) /* in: transaction id */
+ buf_block_t* block, /*!< in/out: page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */
{
- buf_block_t* block;
-
- ut_ad(page);
-
- block = buf_block_align(page);
+ page_t* page = buf_block_get_frame(block);
+#ifndef UNIV_HOTBACKUP
+ const ibool is_hashed = block->is_hashed;
- if (block->is_hashed) {
+ if (is_hashed) {
rw_lock_x_lock(&btr_search_latch);
}
+ ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#endif /* !UNIV_HOTBACKUP */
+
/* It is not necessary to write this change to the redo log, as
during a database recovery we assume that the max trx id of every
page is the maximum trx id assigned before the crash. */
- mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
+ page_zip_write_header(page_zip,
+ page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
+ 8, mtr);
+#ifndef UNIV_HOTBACKUP
+ } else if (mtr) {
+ mlog_write_dulint(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
+ trx_id, mtr);
+#endif /* !UNIV_HOTBACKUP */
+ } else {
+ mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
+ }
- if (block->is_hashed) {
+#ifndef UNIV_HOTBACKUP
+ if (is_hashed) {
rw_lock_x_unlock(&btr_search_latch);
}
+#endif /* !UNIV_HOTBACKUP */
}
-/*****************************************************************
-Calculates free space if a page is emptied. */
-
-ulint
-page_get_free_space_of_empty_noninline(
-/*===================================*/
- /* out: free space */
- ulint comp) /* in: nonzero=compact page format */
-{
- return(page_get_free_space_of_empty(comp));
-}
-
-/****************************************************************
-Allocates a block of memory from an index page. */
-
+/************************************************************//**
+Allocates a block of memory from the heap of an index page.
+@return pointer to start of allocated buffer, or NULL if allocation fails */
+UNIV_INTERN
byte*
-page_mem_alloc(
-/*===========*/
- /* out: pointer to start of allocated
- buffer, or NULL if allocation fails */
- page_t* page, /* in: index page */
- ulint need, /* in: number of bytes needed */
- dict_index_t* index, /* in: record descriptor */
- ulint* heap_no)/* out: this contains the heap number
+page_mem_alloc_heap(
+/*================*/
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
+ space available for inserting the record,
+ or NULL */
+ ulint need, /*!< in: total number of bytes needed */
+ ulint* heap_no)/*!< out: this contains the heap number
of the allocated record
if allocation succeeds */
{
- rec_t* rec;
byte* block;
ulint avl_space;
- ulint garbage;
ut_ad(page && heap_no);
- /* If there are records in the free list, look if the first is
- big enough */
-
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- if (rec) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (rec_offs_size(offsets) >= need) {
- page_header_set_ptr(page, PAGE_FREE,
- page_rec_get_next(rec));
-
- garbage = page_header_get_field(page, PAGE_GARBAGE);
- ut_ad(garbage >= need);
-
- page_header_set_field(page, PAGE_GARBAGE,
- garbage - need);
-
- *heap_no = rec_get_heap_no(rec, page_is_comp(page));
-
- block = rec_get_start(rec, offsets);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(block);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- /* Could not find space from the free list, try top of heap */
-
avl_space = page_get_max_insert_size(page, 1);
if (avl_space >= need) {
block = page_header_get_ptr(page, PAGE_HEAP_TOP);
- page_header_set_ptr(page, PAGE_HEAP_TOP, block + need);
+ page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP,
+ block + need);
*heap_no = page_dir_get_n_heap(page);
- page_dir_set_n_heap(page, 1 + *heap_no);
+ page_dir_set_n_heap(page, page_zip, 1 + *heap_no);
return(block);
}
@@ -299,57 +287,60 @@ page_mem_alloc(
return(NULL);
}
-/**************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
Writes a log record of page creation. */
UNIV_INLINE
void
page_create_write_log(
/*==================*/
- buf_frame_t* frame, /* in: a buffer frame where the page is
+ buf_frame_t* frame, /*!< in: a buffer frame where the page is
created */
- mtr_t* mtr, /* in: mini-transaction handle */
- ulint comp) /* in: nonzero=compact page format */
+ mtr_t* mtr, /*!< in: mini-transaction handle */
+ ibool comp) /*!< in: TRUE=compact page format */
{
mlog_write_initial_log_record(frame, comp
? MLOG_COMP_PAGE_CREATE
: MLOG_PAGE_CREATE, mtr);
}
-
-/***************************************************************
-Parses a redo log record of creating a page. */
-
+#else /* !UNIV_HOTBACKUP */
+# define page_create_write_log(frame,mtr,comp) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses a redo log record of creating a page.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
page_parse_create(
/*==============*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- ulint comp, /* in: nonzero=compact page format */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
+ ulint comp, /*!< in: nonzero=compact page format */
+ buf_block_t* block, /*!< in: block or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
ut_ad(ptr && end_ptr);
/* The record is empty, except for the record initial part */
- if (page) {
- page_create(page, mtr, comp);
+ if (block) {
+ page_create(block, mtr, comp);
}
return(ptr);
}
-/**************************************************************
-The index page creation function. */
-
+/**********************************************************//**
+The index page creation function.
+@return pointer to the page */
+static
page_t*
-page_create(
-/*========*/
- /* out: pointer to the page */
- buf_frame_t* frame, /* in: a buffer frame where the page is
- created */
- mtr_t* mtr, /* in: mini-transaction handle */
- ulint comp) /* in: nonzero=compact page format */
+page_create_low(
+/*============*/
+ buf_block_t* block, /*!< in: a buffer block where the
+ page is created */
+ ulint comp) /*!< in: nonzero=compact page format */
{
page_dir_slot_t* slot;
mem_heap_t* heap;
@@ -362,9 +353,7 @@ page_create(
dict_index_t* index;
ulint* offsets;
- index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1;
-
- ut_ad(frame && mtr);
+ ut_ad(block);
#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
#endif
@@ -372,13 +361,17 @@ page_create(
# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
#endif
- /* 1. INCREMENT MODIFY CLOCK */
- buf_frame_modify_clock_inc(frame);
+ /* The infimum and supremum records use a dummy index. */
+ if (UNIV_LIKELY(comp)) {
+ index = dict_ind_compact;
+ } else {
+ index = dict_ind_redundant;
+ }
- /* 2. WRITE LOG INFORMATION */
- page_create_write_log(frame, mtr, comp);
+ /* 1. INCREMENT MODIFY CLOCK */
+ buf_block_modify_clock_inc(block);
- page = frame;
+ page = buf_block_get_frame(block);
fil_page_set_type(page, FIL_PAGE_INDEX);
@@ -399,13 +392,20 @@ page_create(
heap_top = page + PAGE_DATA;
- infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
+ infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
- ut_a(infimum_rec == page
- + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
+ if (UNIV_LIKELY(comp)) {
+ ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
+
+ rec_set_n_owned_new(infimum_rec, NULL, 1);
+ rec_set_heap_no_new(infimum_rec, 0);
+ } else {
+ ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
+
+ rec_set_n_owned_old(infimum_rec, 1);
+ rec_set_heap_no_old(infimum_rec, 0);
+ }
- rec_set_n_owned(infimum_rec, comp, 1);
- rec_set_heap_no(infimum_rec, comp, 0);
offsets = rec_get_offsets(infimum_rec, index, NULL,
ULINT_UNDEFINED, &heap);
@@ -421,13 +421,19 @@ page_create(
dtype_set(dfield_get_type(field),
DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
- supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
+ supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
+
+ if (UNIV_LIKELY(comp)) {
+ ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
- ut_a(supremum_rec == page
- + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM));
+ rec_set_n_owned_new(supremum_rec, NULL, 1);
+ rec_set_heap_no_new(supremum_rec, 1);
+ } else {
+ ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
- rec_set_n_owned(supremum_rec, comp, 1);
- rec_set_heap_no(supremum_rec, comp, 1);
+ rec_set_n_owned_old(supremum_rec, 1);
+ rec_set_heap_no_old(supremum_rec, 1);
+ }
offsets = rec_get_offsets(supremum_rec, index, offsets,
ULINT_UNDEFINED, &heap);
@@ -440,18 +446,20 @@ page_create(
/* 4. INITIALIZE THE PAGE */
- page_header_set_field(page, PAGE_N_DIR_SLOTS, 2);
- page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top);
- page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2);
- page_header_set_ptr(page, PAGE_FREE, NULL);
- page_header_set_field(page, PAGE_GARBAGE, 0);
- page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, PAGE_N_DIRECTION, 0);
- page_header_set_field(page, PAGE_N_RECS, 0);
- page_set_max_trx_id(page, ut_dulint_zero);
+ page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
+ page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
+ page_header_set_field(page, NULL, PAGE_N_HEAP, comp
+ ? 0x8000 | PAGE_HEAP_NO_USER_LOW
+ : PAGE_HEAP_NO_USER_LOW);
+ page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
+ page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
+ page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
+ page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
+ page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
+ page_header_set_field(page, NULL, PAGE_N_RECS, 0);
+ page_set_max_trx_id(block, NULL, ut_dulint_zero, NULL);
memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
- - (heap_top - page));
+ - page_offset(heap_top));
/* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
@@ -465,34 +473,88 @@ page_create(
/* Set the next pointers in infimum and supremum */
- rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page));
- rec_set_next_offs(supremum_rec, comp, 0);
+ if (UNIV_LIKELY(comp)) {
+ rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
+ rec_set_next_offs_new(supremum_rec, 0);
+ } else {
+ rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
+ rec_set_next_offs_old(supremum_rec, 0);
+ }
return(page);
}
-/*****************************************************************
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page. */
+/**********************************************************//**
+Create an uncompressed B-tree index page.
+@return pointer to the page */
+UNIV_INTERN
+page_t*
+page_create(
+/*========*/
+ buf_block_t* block, /*!< in: a buffer block where the
+ page is created */
+ mtr_t* mtr, /*!< in: mini-transaction handle */
+ ulint comp) /*!< in: nonzero=compact page format */
+{
+ page_create_write_log(buf_block_get_frame(block), mtr, comp);
+ return(page_create_low(block, comp));
+}
+
+/**********************************************************//**
+Create a compressed B-tree index page.
+@return pointer to the page */
+UNIV_INTERN
+page_t*
+page_create_zip(
+/*============*/
+ buf_block_t* block, /*!< in/out: a buffer frame where the
+ page is created */
+ dict_index_t* index, /*!< in: the index of the page */
+ ulint level, /*!< in: the B-tree level of the page */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+{
+ page_t* page;
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+
+ ut_ad(block);
+ ut_ad(page_zip);
+ ut_ad(index);
+ ut_ad(dict_table_is_comp(index->table));
+
+ page = page_create_low(block, TRUE);
+ mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
+
+ if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
+ /* The compression of a newly created page
+ should always succeed. */
+ ut_error;
+ }
+ return(page);
+}
+
+/*************************************************************//**
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page. */
+UNIV_INTERN
void
page_copy_rec_list_end_no_locks(
/*============================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* new_block, /*!< in: index page to copy to */
+ buf_block_t* block, /*!< in: index page of rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
+ page_t* new_page = buf_block_get_frame(new_block);
page_cur_t cur1;
- page_cur_t cur2;
- rec_t* sup;
+ rec_t* cur2;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
- page_cur_position(rec, &cur1);
+ page_cur_position(rec, block, &cur1);
if (page_cur_is_before_first(&cur1)) {
@@ -501,45 +563,40 @@ page_copy_rec_list_end_no_locks(
ut_a((ibool)!!page_is_comp(new_page)
== dict_table_is_comp(index->table));
- ut_a(page_is_comp(new_page) == page_is_comp(page));
+ ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
- (page_is_comp(new_page)
- ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
+ (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
- page_cur_set_before_first(new_page, &cur2);
+ cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
/* Copy records from the original page to the new page */
- sup = page_get_supremum_rec(page);
-
- for (;;) {
+ while (!page_cur_is_after_last(&cur1)) {
rec_t* cur1_rec = page_cur_get_rec(&cur1);
- if (cur1_rec == sup) {
- break;
- }
+ rec_t* ins_rec;
offsets = rec_get_offsets(cur1_rec, index, offsets,
ULINT_UNDEFINED, &heap);
- if (UNIV_UNLIKELY(!page_cur_rec_insert(&cur2, cur1_rec, index,
- offsets, mtr))) {
+ ins_rec = page_cur_insert_rec_low(cur2, index,
+ cur1_rec, offsets, mtr);
+ if (UNIV_UNLIKELY(!ins_rec)) {
/* Track an assertion failure reported on the mailing
list on June 18th, 2003 */
- buf_page_print(new_page);
- buf_page_print(page);
+ buf_page_print(new_page, 0);
+ buf_page_print(page_align(rec), 0);
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: rec offset %lu, cur1 offset %lu,"
" cur2 offset %lu\n",
- (ulong)(rec - page),
- (ulong)(page_cur_get_rec(&cur1) - page),
- (ulong)(page_cur_get_rec(&cur2) - new_page));
-
+ (ulong) page_offset(rec),
+ (ulong) page_offset(page_cur_get_rec(&cur1)),
+ (ulong) page_offset(cur2));
ut_error;
}
page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
+ cur2 = ins_rec;
}
if (UNIV_LIKELY_NULL(heap)) {
@@ -547,111 +604,238 @@ page_copy_rec_list_end_no_locks(
}
}
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Copies records from page to new_page, from a given record onward,
including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page. */
-
-void
+The records are copied to the start of the record list on new_page.
+@return pointer to the original successor of the infimum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
page_copy_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* new_block, /*!< in/out: index page to copy to */
+ buf_block_t* block, /*!< in: index page containing rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
- if (page_dir_get_n_heap(new_page) == 2) {
- page_copy_rec_list_end_to_created_page(new_page, page, rec,
+ page_t* new_page = buf_block_get_frame(new_block);
+ page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
+ page_t* page = page_align(rec);
+ rec_t* ret = page_rec_get_next(
+ page_get_infimum_rec(new_page));
+ ulint log_mode = 0; /* remove warning */
+
+#ifdef UNIV_ZIP_DEBUG
+ if (new_page_zip) {
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ ut_a(page_zip);
+
+ /* Strict page_zip_validate() may fail here.
+ Furthermore, btr_compress() may set FIL_PAGE_PREV to
+ FIL_NULL on new_page while leaving it intact on
+ new_page_zip. So, we cannot validate new_page_zip. */
+ ut_a(page_zip_validate_low(page_zip, page, TRUE));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+ ut_ad(buf_block_get_frame(block) == page);
+ ut_ad(page_is_leaf(page) == page_is_leaf(new_page));
+ ut_ad(page_is_comp(page) == page_is_comp(new_page));
+ /* Here, "ret" may be pointing to a user record or the
+ predefined supremum record. */
+
+ if (UNIV_LIKELY_NULL(new_page_zip)) {
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ }
+
+ if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) {
+ page_copy_rec_list_end_to_created_page(new_page, rec,
index, mtr);
} else {
- page_copy_rec_list_end_no_locks(new_page, page, rec,
+ page_copy_rec_list_end_no_locks(new_block, block, rec,
index, mtr);
}
+ if (UNIV_LIKELY_NULL(new_page_zip)) {
+ mtr_set_log_mode(mtr, log_mode);
+
+ if (UNIV_UNLIKELY
+ (!page_zip_compress(new_page_zip, new_page, index, mtr))) {
+ /* Before trying to reorganize the page,
+ store the number of preceding records on the page. */
+ ulint ret_pos
+ = page_rec_get_n_recs_before(ret);
+ /* Before copying, "ret" was the successor of
+ the predefined infimum record. It must still
+ have at least one predecessor (the predefined
+ infimum record, or a freshly copied record
+ that is smaller than "ret"). */
+ ut_a(ret_pos > 0);
+
+ if (UNIV_UNLIKELY
+ (!page_zip_reorganize(new_block, index, mtr))) {
+
+ if (UNIV_UNLIKELY
+ (!page_zip_decompress(new_page_zip,
+ new_page, FALSE))) {
+ ut_error;
+ }
+ ut_ad(page_validate(new_page, index));
+ return(NULL);
+ } else {
+ /* The page was reorganized:
+ Seek to ret_pos. */
+ ret = new_page + PAGE_NEW_INFIMUM;
+
+ do {
+ ret = rec_get_next_ptr(ret, TRUE);
+ } while (--ret_pos);
+ }
+ }
+ }
+
/* Update the lock table, MAX_TRX_ID, and possible hash index */
- lock_move_rec_list_end(new_page, page, rec);
+ lock_move_rec_list_end(new_block, block, rec);
+
+ if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+ page_update_max_trx_id(new_block, new_page_zip,
+ page_get_max_trx_id(page), mtr);
+ }
- page_update_max_trx_id(new_page, page_get_max_trx_id(page));
+ btr_search_move_or_delete_hash_entries(new_block, block, index);
- btr_search_move_or_delete_hash_entries(new_page, page, index);
+ return(ret);
}
-/*****************************************************************
+/*************************************************************//**
Copies records from page to new_page, up to the given record,
NOT including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page. */
-
-void
+The records are copied to the end of the record list on new_page.
+@return pointer to the original predecessor of the supremum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
page_copy_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* new_block, /*!< in/out: index page to copy to */
+ buf_block_t* block, /*!< in: index page containing rec */
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
+ page_t* new_page = buf_block_get_frame(new_block);
+ page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
page_cur_t cur1;
- page_cur_t cur2;
- rec_t* old_end;
+ rec_t* cur2;
+ ulint log_mode = 0 /* remove warning */;
mem_heap_t* heap = NULL;
+ rec_t* ret
+ = page_rec_get_prev(page_get_supremum_rec(new_page));
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
- page_cur_set_before_first(page, &cur1);
+ /* Here, "ret" may be pointing to a user record or the
+ predefined infimum record. */
- if (rec == page_cur_get_rec(&cur1)) {
+ if (page_rec_is_infimum(rec)) {
- return;
+ return(ret);
+ }
+
+ if (UNIV_LIKELY_NULL(new_page_zip)) {
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
}
+ page_cur_set_before_first(block, &cur1);
page_cur_move_to_next(&cur1);
- page_cur_set_after_last(new_page, &cur2);
- page_cur_move_to_prev(&cur2);
- old_end = page_cur_get_rec(&cur2);
+ cur2 = ret;
/* Copy records from the original page to the new page */
while (page_cur_get_rec(&cur1) != rec) {
- rec_t* ins_rec;
rec_t* cur1_rec = page_cur_get_rec(&cur1);
offsets = rec_get_offsets(cur1_rec, index, offsets,
ULINT_UNDEFINED, &heap);
- ins_rec = page_cur_rec_insert(&cur2, cur1_rec, index,
- offsets, mtr);
- ut_a(ins_rec);
+ cur2 = page_cur_insert_rec_low(cur2, index,
+ cur1_rec, offsets, mtr);
+ ut_a(cur2);
page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
}
- /* Update the lock table, MAX_TRX_ID, and possible hash index */
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
- lock_move_rec_list_start(new_page, page, rec, old_end);
+ if (UNIV_LIKELY_NULL(new_page_zip)) {
+ mtr_set_log_mode(mtr, log_mode);
+
+ if (UNIV_UNLIKELY
+ (!page_zip_compress(new_page_zip, new_page, index, mtr))) {
+ /* Before trying to reorganize the page,
+ store the number of preceding records on the page. */
+ ulint ret_pos
+ = page_rec_get_n_recs_before(ret);
+ /* Before copying, "ret" was the predecessor
+ of the predefined supremum record. If it was
+ the predefined infimum record, then it would
+ still be the infimum. Thus, the assertion
+ ut_a(ret_pos > 0) would fail here. */
+
+ if (UNIV_UNLIKELY
+ (!page_zip_reorganize(new_block, index, mtr))) {
+
+ if (UNIV_UNLIKELY
+ (!page_zip_decompress(new_page_zip,
+ new_page, FALSE))) {
+ ut_error;
+ }
+ ut_ad(page_validate(new_page, index));
+ return(NULL);
+ } else {
+ /* The page was reorganized:
+ Seek to ret_pos. */
+ ret = new_page + PAGE_NEW_INFIMUM;
- page_update_max_trx_id(new_page, page_get_max_trx_id(page));
+ do {
+ ret = rec_get_next_ptr(ret, TRUE);
+ } while (--ret_pos);
+ }
+ }
+ }
- btr_search_move_or_delete_hash_entries(new_page, page, index);
+ /* Update MAX_TRX_ID, the lock table, and possible hash index */
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
+ if (dict_index_is_sec_or_ibuf(index)
+ && page_is_leaf(page_align(rec))) {
+ page_update_max_trx_id(new_block, new_page_zip,
+ page_get_max_trx_id(page_align(rec)),
+ mtr);
}
+
+ lock_move_rec_list_start(new_block, block, rec, ret);
+
+ btr_search_move_or_delete_hash_entries(new_block, block, index);
+
+ return(ret);
}
-/**************************************************************
+/**********************************************************//**
Writes a log record of a record list end or start deletion. */
UNIV_INLINE
void
page_delete_rec_list_write_log(
/*===========================*/
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- byte type, /* in: operation type:
+ rec_t* rec, /*!< in: record on page */
+ dict_index_t* index, /*!< in: record descriptor */
+ byte type, /*!< in: operation type:
MLOG_LIST_END_DELETE, ... */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
ut_ad(type == MLOG_LIST_END_DELETE
@@ -666,24 +850,28 @@ page_delete_rec_list_write_log(
mlog_close(mtr, log_ptr + 2);
}
}
-
-/**************************************************************
-Parses a log record of a record list end or start deletion. */
-
+#else /* !UNIV_HOTBACKUP */
+# define page_delete_rec_list_write_log(rec,index,type,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Parses a log record of a record list end or start deletion.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
page_parse_delete_rec_list(
/*=======================*/
- /* out: end of log record or NULL */
- byte type, /* in: MLOG_LIST_END_DELETE,
+ byte type, /*!< in: MLOG_LIST_END_DELETE,
MLOG_LIST_START_DELETE,
MLOG_COMP_LIST_END_DELETE or
MLOG_COMP_LIST_START_DELETE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ buf_block_t* block, /*!< in/out: buffer block or NULL */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
+ page_t* page;
ulint offset;
ut_ad(type == MLOG_LIST_END_DELETE
@@ -701,95 +889,127 @@ page_parse_delete_rec_list(
offset = mach_read_from_2(ptr);
ptr += 2;
- if (!page) {
+ if (!block) {
return(ptr);
}
+ page = buf_block_get_frame(block);
+
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
if (type == MLOG_LIST_END_DELETE
|| type == MLOG_COMP_LIST_END_DELETE) {
- page_delete_rec_list_end(page, page + offset, index,
- ULINT_UNDEFINED,
- ULINT_UNDEFINED, mtr);
+ page_delete_rec_list_end(page + offset, block, index,
+ ULINT_UNDEFINED, ULINT_UNDEFINED,
+ mtr);
} else {
- page_delete_rec_list_start(page, page + offset, index, mtr);
+ page_delete_rec_list_start(page + offset, block, index, mtr);
}
return(ptr);
}
-/*****************************************************************
+/*************************************************************//**
Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */
-
+UNIV_INTERN
void
page_delete_rec_list_end(
/*=====================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- ulint n_recs, /* in: number of records to delete,
+ rec_t* rec, /*!< in: pointer to record on page */
+ buf_block_t* block, /*!< in: buffer block of the page */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint n_recs, /*!< in: number of records to delete,
or ULINT_UNDEFINED if not known */
- ulint size, /* in: the sum of the sizes of the
+ ulint size, /*!< in: the sum of the sizes of the
records in the end of the chain to
delete, or ULINT_UNDEFINED if not known */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_dir_slot_t* slot;
- ulint slot_index;
- rec_t* last_rec;
- rec_t* prev_rec;
- rec_t* free;
- rec_t* rec2;
- ulint count;
- ulint n_owned;
- rec_t* sup;
- ulint comp;
+ page_dir_slot_t*slot;
+ ulint slot_index;
+ rec_t* last_rec;
+ rec_t* prev_rec;
+ ulint n_owned;
+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+ page_t* page = page_align(rec);
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
+ ut_ad(!page_zip || page_rec_is_comp(rec));
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (page_rec_is_infimum(rec)) {
+ rec = page_rec_get_next(rec);
+ }
+
+ if (page_rec_is_supremum(rec)) {
+
+ return;
+ }
/* Reset the last insert info in the page header and increment
the modify clock for the frame */
- ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
- page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
+ page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
/* The page gets invalid for optimistic searches: increment the
frame modify clock */
- buf_frame_modify_clock_inc(page);
-
- sup = page_get_supremum_rec(page);
+ buf_block_modify_clock_inc(block);
- comp = page_is_comp(page);
- if (page_rec_is_infimum_low(rec - page)) {
- rec = page_rec_get_next(rec);
- }
-
- page_delete_rec_list_write_log(rec, index, comp
+ page_delete_rec_list_write_log(rec, index, page_is_comp(page)
? MLOG_COMP_LIST_END_DELETE
: MLOG_LIST_END_DELETE, mtr);
- if (rec == sup) {
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ ulint log_mode;
+
+ ut_a(page_is_comp(page));
+ /* Individual deletes are not logged */
+
+ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+ do {
+ page_cur_t cur;
+ page_cur_position(rec, block, &cur);
+
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+ rec = rec_get_next_ptr(rec, TRUE);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(page_zip_validate(page_zip, page));
+#endif /* UNIV_ZIP_DEBUG */
+ page_cur_delete_rec(&cur, index, offsets, mtr);
+ } while (page_offset(rec) != PAGE_NEW_SUPREMUM);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ /* Restore log mode */
+
+ mtr_set_log_mode(mtr, log_mode);
return;
}
prev_rec = page_rec_get_prev(rec);
- last_rec = page_rec_get_prev(sup);
+ last_rec = page_rec_get_prev(page_get_supremum_rec(page));
if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_t* rec2 = rec;
/* Calculate the sum of sizes and the number of records */
size = 0;
n_recs = 0;
- rec2 = rec;
- while (rec2 != sup) {
+ do {
ulint s;
offsets = rec_get_offsets(rec2, index, offsets,
ULINT_UNDEFINED, &heap);
@@ -801,7 +1021,7 @@ page_delete_rec_list_end(
n_recs++;
rec2 = page_rec_get_next(rec2);
- }
+ } while (!page_rec_is_supremum(rec2));
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -814,55 +1034,69 @@ page_delete_rec_list_end(
of the records owned by the supremum record, as it is allowed to be
less than PAGE_DIR_SLOT_MIN_N_OWNED */
- rec2 = rec;
- count = 0;
+ if (page_is_comp(page)) {
+ rec_t* rec2 = rec;
+ ulint count = 0;
- while (rec_get_n_owned(rec2, comp) == 0) {
- count++;
+ while (rec_get_n_owned_new(rec2) == 0) {
+ count++;
- rec2 = page_rec_get_next(rec2);
- }
+ rec2 = rec_get_next_ptr(rec2, TRUE);
+ }
+
+ ut_ad(rec_get_n_owned_new(rec2) > count);
+
+ n_owned = rec_get_n_owned_new(rec2) - count;
+ slot_index = page_dir_find_owner_slot(rec2);
+ slot = page_dir_get_nth_slot(page, slot_index);
+ } else {
+ rec_t* rec2 = rec;
+ ulint count = 0;
+
+ while (rec_get_n_owned_old(rec2) == 0) {
+ count++;
- ut_ad(rec_get_n_owned(rec2, comp) - count > 0);
+ rec2 = rec_get_next_ptr(rec2, FALSE);
+ }
- n_owned = rec_get_n_owned(rec2, comp) - count;
+ ut_ad(rec_get_n_owned_old(rec2) > count);
- slot_index = page_dir_find_owner_slot(rec2);
- slot = page_dir_get_nth_slot(page, slot_index);
+ n_owned = rec_get_n_owned_old(rec2) - count;
+ slot_index = page_dir_find_owner_slot(rec2);
+ slot = page_dir_get_nth_slot(page, slot_index);
+ }
- page_dir_slot_set_rec(slot, sup);
- page_dir_slot_set_n_owned(slot, n_owned);
+ page_dir_slot_set_rec(slot, page_get_supremum_rec(page));
+ page_dir_slot_set_n_owned(slot, NULL, n_owned);
- page_dir_set_n_slots(page, slot_index + 1);
+ page_dir_set_n_slots(page, NULL, slot_index + 1);
/* Remove the record chain segment from the record chain */
page_rec_set_next(prev_rec, page_get_supremum_rec(page));
/* Catenate the deleted chain segment to the page free list */
- free = page_header_get_ptr(page, PAGE_FREE);
+ page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
+ page_header_set_ptr(page, NULL, PAGE_FREE, rec);
- page_rec_set_next(last_rec, free);
- page_header_set_ptr(page, PAGE_FREE, rec);
-
- page_header_set_field(page, PAGE_GARBAGE, size
+ page_header_set_field(page, NULL, PAGE_GARBAGE, size
+ page_header_get_field(page, PAGE_GARBAGE));
- page_header_set_field(page, PAGE_N_RECS,
+ page_header_set_field(page, NULL, PAGE_N_RECS,
(ulint)(page_get_n_recs(page) - n_recs));
}
-/*****************************************************************
+/*************************************************************//**
Deletes records from page, up to the given record, NOT including
that record. Infimum and supremum records are not deleted. */
-
+UNIV_INTERN
void
page_delete_rec_list_start(
/*=======================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ rec_t* rec, /*!< in: record on page */
+ buf_block_t* block, /*!< in: buffer block of the page */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
page_cur_t cur1;
ulint log_mode;
@@ -870,11 +1104,31 @@ page_delete_rec_list_start(
ulint* offsets = offsets_;
mem_heap_t* heap = NULL;
byte type;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+ rec_offs_init(offsets_);
- if (page_is_comp(page)) {
+ ut_ad((ibool) !!page_rec_is_comp(rec)
+ == dict_table_is_comp(index->table));
+#ifdef UNIV_ZIP_DEBUG
+ {
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+ page_t* page = buf_block_get_frame(block);
+
+ /* page_zip_validate() would detect a min_rec_mark mismatch
+ in btr_page_split_and_insert()
+ between btr_attach_half_pages() and insert_page = ...
+ when btr_page_get_split_rec_to_left() holds
+ (direction == FSP_DOWN). */
+ ut_a(!page_zip || page_zip_validate_low(page_zip, page, TRUE));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (page_rec_is_infimum(rec)) {
+
+ return;
+ }
+
+ if (page_rec_is_comp(rec)) {
type = MLOG_COMP_LIST_START_DELETE;
} else {
type = MLOG_LIST_START_DELETE;
@@ -882,13 +1136,7 @@ page_delete_rec_list_start(
page_delete_rec_list_write_log(rec, index, type, mtr);
- page_cur_set_before_first(page, &cur1);
-
- if (rec == page_cur_get_rec(&cur1)) {
-
- return;
- }
-
+ page_cur_set_before_first(block, &cur1);
page_cur_move_to_next(&cur1);
/* Individual deletes are not logged */
@@ -910,68 +1158,96 @@ page_delete_rec_list_start(
mtr_set_log_mode(mtr, log_mode);
}
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Moves record list end to another page. Moved records include
-split_rec. */
-
-void
+split_rec.
+@return TRUE on success; FALSE on compression failure (new_block will
+be decompressed) */
+UNIV_INTERN
+ibool
page_move_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record to move */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* new_block, /*!< in/out: index page where to move */
+ buf_block_t* block, /*!< in: index page from where to move */
+ rec_t* split_rec, /*!< in: first record to move */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
- ulint old_data_size;
- ulint new_data_size;
- ulint old_n_recs;
- ulint new_n_recs;
+ page_t* new_page = buf_block_get_frame(new_block);
+ ulint old_data_size;
+ ulint new_data_size;
+ ulint old_n_recs;
+ ulint new_n_recs;
old_data_size = page_get_data_size(new_page);
old_n_recs = page_get_n_recs(new_page);
-
- page_copy_rec_list_end(new_page, page, split_rec, index, mtr);
+#ifdef UNIV_ZIP_DEBUG
+ {
+ page_zip_des_t* new_page_zip
+ = buf_block_get_page_zip(new_block);
+ page_zip_des_t* page_zip
+ = buf_block_get_page_zip(block);
+ ut_a(!new_page_zip == !page_zip);
+ ut_a(!new_page_zip
+ || page_zip_validate(new_page_zip, new_page));
+ ut_a(!page_zip
+ || page_zip_validate(page_zip, page_align(split_rec)));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block,
+ split_rec, index, mtr))) {
+ return(FALSE);
+ }
new_data_size = page_get_data_size(new_page);
new_n_recs = page_get_n_recs(new_page);
ut_ad(new_data_size >= old_data_size);
- page_delete_rec_list_end(page, split_rec, index,
+ page_delete_rec_list_end(split_rec, block, index,
new_n_recs - old_n_recs,
new_data_size - old_data_size, mtr);
+
+ return(TRUE);
}
-/*****************************************************************
+/*************************************************************//**
Moves record list start to another page. Moved records do not include
-split_rec. */
-
-void
+split_rec.
+@return TRUE on success; FALSE on compression failure */
+UNIV_INTERN
+ibool
page_move_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record not to move */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
+ buf_block_t* new_block, /*!< in/out: index page where to move */
+ buf_block_t* block, /*!< in/out: page containing split_rec */
+ rec_t* split_rec, /*!< in: first record not to move */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
- page_copy_rec_list_start(new_page, page, split_rec, index, mtr);
+ if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block,
+ split_rec, index, mtr))) {
+ return(FALSE);
+ }
+
+ page_delete_rec_list_start(split_rec, block, index, mtr);
- page_delete_rec_list_start(page, split_rec, index, mtr);
+ return(TRUE);
}
-/***************************************************************************
+/***********************************************************************//**
This is a low-level operation which is used in a database index creation
to update the page number of a created B-tree to a data dictionary record. */
-
+UNIV_INTERN
void
page_rec_write_index_page_no(
/*=========================*/
- rec_t* rec, /* in: record to update */
- ulint i, /* in: index of the field to update */
- ulint page_no,/* in: value to write */
- mtr_t* mtr) /* in: mtr */
+ rec_t* rec, /*!< in: record to update */
+ ulint i, /*!< in: index of the field to update */
+ ulint page_no,/*!< in: value to write */
+ mtr_t* mtr) /*!< in: mtr */
{
byte* data;
ulint len;
@@ -982,106 +1258,96 @@ page_rec_write_index_page_no(
mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
}
+#endif /* !UNIV_HOTBACKUP */
-/******************************************************************
+/**************************************************************//**
Used to delete n slots from the directory. This function updates
also n_owned fields in the records, so that the first slot after
the deleted ones inherits the records of the deleted slots. */
UNIV_INLINE
void
-page_dir_delete_slots(
-/*==================*/
- page_t* page, /* in: the index page */
- ulint start, /* in: first slot to be deleted */
- ulint n) /* in: number of slots to delete (currently
- only n == 1 allowed) */
+page_dir_delete_slot(
+/*=================*/
+ page_t* page, /*!< in/out: the index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint slot_no)/*!< in: slot to be deleted */
{
page_dir_slot_t* slot;
+ ulint n_owned;
ulint i;
- ulint sum_owned = 0;
ulint n_slots;
- rec_t* rec;
- ut_ad(n == 1);
- ut_ad(start > 0);
- ut_ad(start + n < page_dir_get_n_slots(page));
+ ut_ad(!page_zip || page_is_comp(page));
+ ut_ad(slot_no > 0);
+ ut_ad(slot_no + 1 < page_dir_get_n_slots(page));
n_slots = page_dir_get_n_slots(page);
/* 1. Reset the n_owned fields of the slots to be
deleted */
- for (i = start; i < start + n; i++) {
- slot = page_dir_get_nth_slot(page, i);
- sum_owned += page_dir_slot_get_n_owned(slot);
- page_dir_slot_set_n_owned(slot, 0);
- }
+ slot = page_dir_get_nth_slot(page, slot_no);
+ n_owned = page_dir_slot_get_n_owned(slot);
+ page_dir_slot_set_n_owned(slot, page_zip, 0);
/* 2. Update the n_owned value of the first non-deleted slot */
- slot = page_dir_get_nth_slot(page, start + n);
- page_dir_slot_set_n_owned(slot,
- sum_owned + page_dir_slot_get_n_owned(slot));
-
- /* 3. Destroy start and other slots by copying slots */
- for (i = start + n; i < n_slots; i++) {
- slot = page_dir_get_nth_slot(page, i);
- rec = page_dir_slot_get_rec(slot);
+ slot = page_dir_get_nth_slot(page, slot_no + 1);
+ page_dir_slot_set_n_owned(slot, page_zip,
+ n_owned + page_dir_slot_get_n_owned(slot));
- slot = page_dir_get_nth_slot(page, i - n);
- page_dir_slot_set_rec(slot, rec);
+ /* 3. Destroy the slot by copying slots */
+ for (i = slot_no + 1; i < n_slots; i++) {
+ rec_t* rec = (rec_t*)
+ page_dir_slot_get_rec(page_dir_get_nth_slot(page, i));
+ page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec);
}
- /* 4. Update the page header */
- page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n);
+ /* 4. Zero out the last slot, which will be removed */
+ mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0);
+
+ /* 5. Update the page header */
+ page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1);
}
-/******************************************************************
+/**************************************************************//**
Used to add n slots to the directory. Does not set the record pointers
in the added slots or update n_owned values: this is the responsibility
of the caller. */
UNIV_INLINE
void
-page_dir_add_slots(
-/*===============*/
- page_t* page, /* in: the index page */
- ulint start, /* in: the slot above which the new slots are added */
- ulint n) /* in: number of slots to add (currently only n == 1
- allowed) */
+page_dir_add_slot(
+/*==============*/
+ page_t* page, /*!< in/out: the index page */
+ page_zip_des_t* page_zip,/*!< in/out: comprssed page, or NULL */
+ ulint start) /*!< in: the slot above which the new slots
+ are added */
{
page_dir_slot_t* slot;
ulint n_slots;
- ulint i;
- rec_t* rec;
-
- ut_ad(n == 1);
n_slots = page_dir_get_n_slots(page);
ut_ad(start < n_slots - 1);
/* Update the page header */
- page_dir_set_n_slots(page, n_slots + n);
+ page_dir_set_n_slots(page, page_zip, n_slots + 1);
/* Move slots up */
-
- for (i = n_slots - 1; i > start; i--) {
-
- slot = page_dir_get_nth_slot(page, i);
- rec = page_dir_slot_get_rec(slot);
-
- slot = page_dir_get_nth_slot(page, i + n);
- page_dir_slot_set_rec(slot, rec);
- }
+ slot = page_dir_get_nth_slot(page, n_slots);
+ memmove(slot, slot + PAGE_DIR_SLOT_SIZE,
+ (n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE);
}
-/********************************************************************
+/****************************************************************//**
Splits a directory slot which owns too many records. */
-
+UNIV_INTERN
void
page_dir_split_slot(
/*================*/
- page_t* page, /* in: the index page in question */
- ulint slot_no) /* in: the directory slot */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page whose
+ uncompressed part will be written, or NULL */
+ ulint slot_no)/*!< in: the directory slot */
{
rec_t* rec;
page_dir_slot_t* new_slot;
@@ -1091,6 +1357,7 @@ page_dir_split_slot(
ulint n_owned;
ut_ad(page);
+ ut_ad(!page_zip || page_is_comp(page));
ut_ad(slot_no > 0);
slot = page_dir_get_nth_slot(page, slot_no);
@@ -1102,7 +1369,7 @@ page_dir_split_slot(
records owned by the slot. */
prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
- rec = page_dir_slot_get_rec(prev_slot);
+ rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
for (i = 0; i < n_owned / 2; i++) {
rec = page_rec_get_next(rec);
@@ -1113,7 +1380,7 @@ page_dir_split_slot(
/* 2. We add one directory slot immediately below the slot to be
split. */
- page_dir_add_slots(page, slot_no - 1, 1);
+ page_dir_add_slot(page, page_zip, slot_no - 1);
/* The added slot is now number slot_no, and the old slot is
now number slot_no + 1 */
@@ -1124,24 +1391,25 @@ page_dir_split_slot(
/* 3. We store the appropriate values to the new slot. */
page_dir_slot_set_rec(new_slot, rec);
- page_dir_slot_set_n_owned(new_slot, n_owned / 2);
+ page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2);
/* 4. Finally, we update the number of records field of the
original slot */
- page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2));
+ page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2));
}
-/*****************************************************************
+/*************************************************************//**
Tries to balance the given directory slot with too few records with the upper
neighbor, so that there are at least the minimum number of records owned by
the slot; this may result in the merging of two slots. */
-
+UNIV_INTERN
void
page_dir_balance_slot(
/*==================*/
- page_t* page, /* in: index page */
- ulint slot_no) /* in: the directory slot */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ ulint slot_no)/*!< in: the directory slot */
{
page_dir_slot_t* slot;
page_dir_slot_t* up_slot;
@@ -1151,6 +1419,7 @@ page_dir_balance_slot(
rec_t* new_rec;
ut_ad(page);
+ ut_ad(!page_zip || page_is_comp(page));
ut_ad(slot_no > 0);
slot = page_dir_get_nth_slot(page, slot_no);
@@ -1158,7 +1427,7 @@ page_dir_balance_slot(
/* The last directory slot cannot be balanced with the upper
neighbor, as there is none. */
- if (slot_no == page_dir_get_n_slots(page) - 1) {
+ if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) {
return;
}
@@ -1178,30 +1447,39 @@ page_dir_balance_slot(
/* In this case we can just transfer one record owned
by the upper slot to the property of the lower slot */
- old_rec = page_dir_slot_get_rec(slot);
- new_rec = page_rec_get_next(old_rec);
+ old_rec = (rec_t*) page_dir_slot_get_rec(slot);
+
+ if (page_is_comp(page)) {
+ new_rec = rec_get_next_ptr(old_rec, TRUE);
- rec_set_n_owned(old_rec, page_is_comp(page), 0);
- rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1);
+ rec_set_n_owned_new(old_rec, page_zip, 0);
+ rec_set_n_owned_new(new_rec, page_zip, n_owned + 1);
+ } else {
+ new_rec = rec_get_next_ptr(old_rec, FALSE);
+
+ rec_set_n_owned_old(old_rec, 0);
+ rec_set_n_owned_old(new_rec, n_owned + 1);
+ }
page_dir_slot_set_rec(slot, new_rec);
- page_dir_slot_set_n_owned(up_slot, up_n_owned -1);
+ page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1);
} else {
/* In this case we may merge the two slots */
- page_dir_delete_slots(page, slot_no, 1);
+ page_dir_delete_slot(page, page_zip, slot_no);
}
}
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
Returns the middle record of the record list. If there are an even number
-of records in the list, returns the first record of the upper half-list. */
-
+of records in the list, returns the first record of the upper half-list.
+@return middle record */
+UNIV_INTERN
rec_t*
page_get_middle_rec(
/*================*/
- /* out: middle record */
- page_t* page) /* in: page */
+ page_t* page) /*!< in: page */
{
page_dir_slot_t* slot;
ulint middle;
@@ -1211,7 +1489,7 @@ page_get_middle_rec(
rec_t* rec;
/* This many records we must leave behind */
- middle = (page_get_n_recs(page) + 2) / 2;
+ middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
count = 0;
@@ -1229,7 +1507,7 @@ page_get_middle_rec(
ut_ad(i > 0);
slot = page_dir_get_nth_slot(page, i - 1);
- rec = page_dir_slot_get_rec(slot);
+ rec = (rec_t*) page_dir_slot_get_rec(slot);
rec = page_rec_get_next(rec);
/* There are now count records behind rec */
@@ -1240,44 +1518,62 @@ page_get_middle_rec(
return(rec);
}
+#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
+/***************************************************************//**
Returns the number of records before the given record in chain.
-The number includes infimum and supremum records. */
-
+The number includes infimum and supremum records.
+@return number of records */
+UNIV_INTERN
ulint
page_rec_get_n_recs_before(
/*=======================*/
- /* out: number of records */
- rec_t* rec) /* in: the physical record */
+ const rec_t* rec) /*!< in: the physical record */
{
- page_dir_slot_t* slot;
- rec_t* slot_rec;
- page_t* page;
+ const page_dir_slot_t* slot;
+ const rec_t* slot_rec;
+ const page_t* page;
ulint i;
- ulint comp;
lint n = 0;
ut_ad(page_rec_check(rec));
- page = buf_frame_align(rec);
- comp = page_is_comp(page);
+ page = page_align(rec);
+ if (page_is_comp(page)) {
+ while (rec_get_n_owned_new(rec) == 0) {
- while (rec_get_n_owned(rec, comp) == 0) {
+ rec = rec_get_next_ptr_const(rec, TRUE);
+ n--;
+ }
- rec = page_rec_get_next(rec);
- n--;
- }
+ for (i = 0; ; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ slot_rec = page_dir_slot_get_rec(slot);
- for (i = 0; ; i++) {
- slot = page_dir_get_nth_slot(page, i);
- slot_rec = page_dir_slot_get_rec(slot);
+ n += rec_get_n_owned_new(slot_rec);
- n += rec_get_n_owned(slot_rec, comp);
+ if (rec == slot_rec) {
- if (rec == slot_rec) {
+ break;
+ }
+ }
+ } else {
+ while (rec_get_n_owned_old(rec) == 0) {
- break;
+ rec = rec_get_next_ptr_const(rec, FALSE);
+ n--;
+ }
+
+ for (i = 0; ; i++) {
+ slot = page_dir_get_nth_slot(page, i);
+ slot_rec = page_dir_slot_get_rec(slot);
+
+ n += rec_get_n_owned_old(slot_rec);
+
+ if (rec == slot_rec) {
+
+ break;
+ }
}
}
@@ -1288,39 +1584,46 @@ page_rec_get_n_recs_before(
return((ulint) n);
}
-/****************************************************************
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
Prints record contents including the data relevant only in
the index page context. */
-
+UNIV_INTERN
void
page_rec_print(
/*===========*/
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: record descriptor */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: record descriptor */
{
- ulint comp = page_is_comp(buf_frame_align(rec));
-
- ut_a(!comp == !rec_offs_comp(offsets));
+ ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
rec_print_new(stderr, rec, offsets);
- fprintf(stderr,
- " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
- (ulong) rec_get_n_owned(rec, comp),
- (ulong) rec_get_heap_no(rec, comp),
- (ulong) rec_get_next_offs(rec, comp));
+ if (page_rec_is_comp(rec)) {
+ fprintf(stderr,
+ " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
+ (ulong) rec_get_n_owned_new(rec),
+ (ulong) rec_get_heap_no_new(rec),
+ (ulong) rec_get_next_offs(rec, TRUE));
+ } else {
+ fprintf(stderr,
+ " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
+ (ulong) rec_get_n_owned_old(rec),
+ (ulong) rec_get_heap_no_old(rec),
+ (ulong) rec_get_next_offs(rec, TRUE));
+ }
page_rec_check(rec);
rec_validate(rec, offsets);
}
-/*******************************************************************
+/***************************************************************//**
This is used to print the contents of the directory for
debugging purposes. */
-
+UNIV_INTERN
void
page_dir_print(
/*===========*/
- page_t* page, /* in: index page */
- ulint pr_n) /* in: print n first and n last entries */
+ page_t* page, /*!< in: index page */
+ ulint pr_n) /*!< in: print n first and n last entries */
{
ulint n;
ulint i;
@@ -1332,7 +1635,7 @@ page_dir_print(
"PAGE DIRECTORY\n"
"Page address %p\n"
"Directory stack top at offs: %lu; number of slots: %lu\n",
- page, (ulong)(page_dir_get_nth_slot(page, n - 1) - page),
+ page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)),
(ulong) n);
for (i = 0; i < n; i++) {
slot = page_dir_get_nth_slot(page, i);
@@ -1345,32 +1648,34 @@ page_dir_print(
" rec offs: %lu\n",
(ulong) i,
(ulong) page_dir_slot_get_n_owned(slot),
- (ulong)(page_dir_slot_get_rec(slot) - page));
+ (ulong)
+ page_offset(page_dir_slot_get_rec(slot)));
}
}
fprintf(stderr, "Total of %lu records\n"
"--------------------------------\n",
- (ulong) (2 + page_get_n_recs(page)));
+ (ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page)));
}
-/*******************************************************************
+/***************************************************************//**
This is used to print the contents of the page record list for
debugging purposes. */
-
+UNIV_INTERN
void
page_print_list(
/*============*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: dictionary index of the page */
- ulint pr_n) /* in: print n first and n last entries */
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index, /*!< in: dictionary index of the page */
+ ulint pr_n) /*!< in: print n first and n last entries */
{
+ page_t* page = block->frame;
page_cur_t cur;
ulint count;
ulint n_recs;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
@@ -1381,7 +1686,7 @@ page_print_list(
n_recs = page_get_n_recs(page);
- page_cur_set_before_first(page, &cur);
+ page_cur_set_before_first(block, &cur);
count = 0;
for (;;) {
offsets = rec_get_offsets(cur.rec, index, offsets,
@@ -1423,13 +1728,13 @@ page_print_list(
}
}
-/*******************************************************************
+/***************************************************************//**
Prints the info in a page header. */
-
+UNIV_INTERN
void
page_header_print(
/*==============*/
- page_t* page)
+ const page_t* page)
{
fprintf(stderr,
"--------------------------------\n"
@@ -1450,63 +1755,69 @@ page_header_print(
(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
}
-/*******************************************************************
+/***************************************************************//**
This is used to print the contents of the page for
debugging purposes. */
-
+UNIV_INTERN
void
page_print(
/*=======*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: dictionary index of the page */
- ulint dn, /* in: print dn first and last entries
+ buf_block_t* block, /*!< in: index page */
+ dict_index_t* index, /*!< in: dictionary index of the page */
+ ulint dn, /*!< in: print dn first and last entries
in directory */
- ulint rn) /* in: print rn first and last records
+ ulint rn) /*!< in: print rn first and last records
in directory */
{
+ page_t* page = block->frame;
+
page_header_print(page);
page_dir_print(page, dn);
- page_print_list(page, index, rn);
+ page_print_list(block, index, rn);
}
+#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
+/***************************************************************//**
The following is used to validate a record on a page. This function
differs from rec_validate as it can also check the n_owned field and
-the heap_no field. */
-
+the heap_no field.
+@return TRUE if ok */
+UNIV_INTERN
ibool
page_rec_validate(
/*==============*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint n_owned;
ulint heap_no;
page_t* page;
- ulint comp;
- page = buf_frame_align(rec);
- comp = page_is_comp(page);
- ut_a(!comp == !rec_offs_comp(offsets));
+ page = page_align(rec);
+ ut_a(!page_is_comp(page) == !rec_offs_comp(offsets));
page_rec_check(rec);
rec_validate(rec, offsets);
- n_owned = rec_get_n_owned(rec, comp);
- heap_no = rec_get_heap_no(rec, comp);
+ if (page_rec_is_comp(rec)) {
+ n_owned = rec_get_n_owned_new(rec);
+ heap_no = rec_get_heap_no_new(rec);
+ } else {
+ n_owned = rec_get_n_owned_old(rec);
+ heap_no = rec_get_heap_no_old(rec);
+ }
- if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
+ if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
fprintf(stderr,
"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
- (ulong)(rec - page), (ulong) n_owned);
+ (ulong) page_offset(rec), (ulong) n_owned);
return(FALSE);
}
- if (!(heap_no < page_dir_get_n_heap(page))) {
+ if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
fprintf(stderr,
"InnoDB: Heap no of rec %lu too big %lu %lu\n",
- (ulong)(rec - page), (ulong) heap_no,
+ (ulong) page_offset(rec), (ulong) heap_no,
(ulong) page_dir_get_n_heap(page));
return(FALSE);
}
@@ -1514,51 +1825,55 @@ page_rec_validate(
return(TRUE);
}
-/*******************************************************************
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
bug fixed in 4.0.14 has caused corruption to users' databases. */
-
+UNIV_INTERN
void
page_check_dir(
/*===========*/
- page_t* page) /* in: index page */
+ const page_t* page) /*!< in: index page */
{
ulint n_slots;
+ ulint infimum_offs;
+ ulint supremum_offs;
n_slots = page_dir_get_n_slots(page);
+ infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0));
+ supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page,
+ n_slots - 1));
- if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0))
- != page_get_infimum_rec(page)) {
+ if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
fprintf(stderr,
"InnoDB: Page directory corruption:"
" infimum not pointed to\n");
- buf_page_print(page);
+ buf_page_print(page, 0);
}
- if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1))
- != page_get_supremum_rec(page)) {
+ if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
fprintf(stderr,
"InnoDB: Page directory corruption:"
" supremum not pointed to\n");
- buf_page_print(page);
+ buf_page_print(page, 0);
}
}
+#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
+/***************************************************************//**
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
-even if the page is total garbage. */
-
+even if the page is total garbage.
+@return TRUE if ok */
+UNIV_INTERN
ibool
-page_simple_validate(
-/*=================*/
- /* out: TRUE if ok */
- page_t* page) /* in: index page */
+page_simple_validate_old(
+/*=====================*/
+ page_t* page) /*!< in: old-style index page */
{
- page_cur_t cur;
page_dir_slot_t* slot;
ulint slot_no;
ulint n_slots;
@@ -1567,14 +1882,15 @@ page_simple_validate(
ulint count;
ulint own_count;
ibool ret = FALSE;
- ulint comp = page_is_comp(page);
+
+ ut_a(!page_is_comp(page));
/* Check first that the record heap and the directory do not
overlap. */
n_slots = page_dir_get_n_slots(page);
- if (n_slots > UNIV_PAGE_SIZE / 4) {
+ if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
fprintf(stderr,
"InnoDB: Nonsensical number %lu of page dir slots\n",
(ulong) n_slots);
@@ -1584,15 +1900,15 @@ page_simple_validate(
rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
- if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) {
+ if (UNIV_UNLIKELY(rec_heap_top
+ > page_dir_get_nth_slot(page, n_slots - 1))) {
fprintf(stderr,
"InnoDB: Record heap and dir overlap on a page,"
" heap top %lu, dir %lu\n",
+ (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
(ulong)
- (page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
- (ulong)
- (page_dir_get_nth_slot(page, n_slots - 1) - page));
+ page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
goto func_exit;
}
@@ -1605,12 +1921,10 @@ page_simple_validate(
slot_no = 0;
slot = page_dir_get_nth_slot(page, slot_no);
- page_cur_set_before_first(page, &cur);
+ rec = page_get_infimum_rec(page);
for (;;) {
- rec = (&cur)->rec;
-
- if (rec > rec_heap_top) {
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
fprintf(stderr,
"InnoDB: Record %lu is above"
" rec heap top %lu\n",
@@ -1620,21 +1934,23 @@ page_simple_validate(
goto func_exit;
}
- if (rec_get_n_owned(rec, comp) != 0) {
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) {
/* This is a record pointed to by a dir slot */
- if (rec_get_n_owned(rec, comp) != own_count) {
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
+ != own_count)) {
fprintf(stderr,
"InnoDB: Wrong owned count %lu, %lu,"
" rec %lu\n",
- (ulong) rec_get_n_owned(rec, comp),
+ (ulong) rec_get_n_owned_old(rec),
(ulong) own_count,
(ulong)(rec - page));
goto func_exit;
}
- if (page_dir_slot_get_rec(slot) != rec) {
+ if (UNIV_UNLIKELY
+ (page_dir_slot_get_rec(slot) != rec)) {
fprintf(stderr,
"InnoDB: Dir slot does not point"
" to right rec %lu\n",
@@ -1645,31 +1961,32 @@ page_simple_validate(
own_count = 0;
- if (!page_cur_is_after_last(&cur)) {
+ if (!page_rec_is_supremum(rec)) {
slot_no++;
slot = page_dir_get_nth_slot(page, slot_no);
}
}
- if (page_cur_is_after_last(&cur)) {
+ if (page_rec_is_supremum(rec)) {
break;
}
- if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
- || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
+ if (UNIV_UNLIKELY
+ (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
+ || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
fprintf(stderr,
"InnoDB: Next record offset"
" nonsensical %lu for rec %lu\n",
- (ulong) rec_get_next_offs(rec, comp),
- (ulong)(rec - page));
+ (ulong) rec_get_next_offs(rec, FALSE),
+ (ulong) (rec - page));
goto func_exit;
}
count++;
- if (count > UNIV_PAGE_SIZE) {
+ if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
fprintf(stderr,
"InnoDB: Page record list appears"
" to be circular %lu\n",
@@ -1677,25 +1994,28 @@ page_simple_validate(
goto func_exit;
}
- page_cur_move_to_next(&cur);
+ rec = page_rec_get_next(rec);
own_count++;
}
- if (rec_get_n_owned(rec, comp) == 0) {
+ if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
goto func_exit;
}
- if (slot_no != n_slots - 1) {
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
(ulong) slot_no, (ulong) (n_slots - 1));
goto func_exit;
}
- if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
+ if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
+ (ulong) page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW,
(ulong) (count + 1));
goto func_exit;
@@ -1705,8 +2025,8 @@ page_simple_validate(
rec = page_header_get_ptr(page, PAGE_FREE);
while (rec != NULL) {
- if (rec < page + FIL_PAGE_DATA
- || rec >= page + UNIV_PAGE_SIZE) {
+ if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
+ || rec >= page + UNIV_PAGE_SIZE)) {
fprintf(stderr,
"InnoDB: Free list record has"
" a nonsensical offset %lu\n",
@@ -1715,7 +2035,7 @@ page_simple_validate(
goto func_exit;
}
- if (rec > rec_heap_top) {
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
fprintf(stderr,
"InnoDB: Free list record %lu"
" is above rec heap top %lu\n",
@@ -1727,7 +2047,7 @@ page_simple_validate(
count++;
- if (count > UNIV_PAGE_SIZE) {
+ if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
fprintf(stderr,
"InnoDB: Page free list appears"
" to be circular %lu\n",
@@ -1738,7 +2058,7 @@ page_simple_validate(
rec = page_rec_get_next(rec);
}
- if (page_dir_get_n_heap(page) != count + 1) {
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
(ulong) page_dir_get_n_heap(page),
@@ -1753,23 +2073,234 @@ func_exit:
return(ret);
}
-/*******************************************************************
-This function checks the consistency of an index page. */
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_new(
+/*=====================*/
+ page_t* page) /*!< in: new-style index page */
+{
+ page_dir_slot_t* slot;
+ ulint slot_no;
+ ulint n_slots;
+ rec_t* rec;
+ byte* rec_heap_top;
+ ulint count;
+ ulint own_count;
+ ibool ret = FALSE;
+
+ ut_a(page_is_comp(page));
+
+ /* Check first that the record heap and the directory do not
+ overlap. */
+
+ n_slots = page_dir_get_n_slots(page);
+
+ if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
+ fprintf(stderr,
+ "InnoDB: Nonsensical number %lu"
+ " of page dir slots\n", (ulong) n_slots);
+
+ goto func_exit;
+ }
+
+ rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
+
+ if (UNIV_UNLIKELY(rec_heap_top
+ > page_dir_get_nth_slot(page, n_slots - 1))) {
+
+ fprintf(stderr,
+ "InnoDB: Record heap and dir overlap on a page,"
+ " heap top %lu, dir %lu\n",
+ (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
+ (ulong)
+ page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
+
+ goto func_exit;
+ }
+
+ /* Validate the record list in a loop checking also that it is
+ consistent with the page record directory. */
+
+ count = 0;
+ own_count = 1;
+ slot_no = 0;
+ slot = page_dir_get_nth_slot(page, slot_no);
+
+ rec = page_get_infimum_rec(page);
+
+ for (;;) {
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ fprintf(stderr,
+ "InnoDB: Record %lu is above rec"
+ " heap top %lu\n",
+ (ulong) page_offset(rec),
+ (ulong) page_offset(rec_heap_top));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
+ /* This is a record pointed to by a dir slot */
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
+ != own_count)) {
+
+ fprintf(stderr,
+ "InnoDB: Wrong owned count %lu, %lu,"
+ " rec %lu\n",
+ (ulong) rec_get_n_owned_new(rec),
+ (ulong) own_count,
+ (ulong) page_offset(rec));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY
+ (page_dir_slot_get_rec(slot) != rec)) {
+ fprintf(stderr,
+ "InnoDB: Dir slot does not point"
+ " to right rec %lu\n",
+ (ulong) page_offset(rec));
+
+ goto func_exit;
+ }
+
+ own_count = 0;
+
+ if (!page_rec_is_supremum(rec)) {
+ slot_no++;
+ slot = page_dir_get_nth_slot(page, slot_no);
+ }
+ }
+
+ if (page_rec_is_supremum(rec)) {
+
+ break;
+ }
+
+ if (UNIV_UNLIKELY
+ (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
+ || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Next record offset nonsensical %lu"
+ " for rec %lu\n",
+ (ulong) rec_get_next_offs(rec, TRUE),
+ (ulong) page_offset(rec));
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Page record list appears"
+ " to be circular %lu\n",
+ (ulong) count);
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next(rec);
+ own_count++;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
+ fprintf(stderr, "InnoDB: n owned is zero"
+ " in a supremum rec\n");
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
+ fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
+ (ulong) slot_no, (ulong) (n_slots - 1));
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
+ fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
+ (ulong) page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW,
+ (ulong) (count + 1));
+
+ goto func_exit;
+ }
+
+ /* Check then the free list */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+
+ while (rec != NULL) {
+ if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
+ || rec >= page + UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Free list record has"
+ " a nonsensical offset %lu\n",
+ (ulong) page_offset(rec));
+
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(rec > rec_heap_top)) {
+ fprintf(stderr,
+ "InnoDB: Free list record %lu"
+ " is above rec heap top %lu\n",
+ (ulong) page_offset(rec),
+ (ulong) page_offset(rec_heap_top));
+
+ goto func_exit;
+ }
+
+ count++;
+
+ if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+ "InnoDB: Page free list appears"
+ " to be circular %lu\n",
+ (ulong) count);
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next(rec);
+ }
+
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
+
+ fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
+ (ulong) page_dir_get_n_heap(page),
+ (ulong) (count + 1));
+
+ goto func_exit;
+ }
+
+ ret = TRUE;
+
+func_exit:
+ return(ret);
+}
+/***************************************************************//**
+This function checks the consistency of an index page.
+@return TRUE if ok */
+UNIV_INTERN
ibool
page_validate(
/*==========*/
- /* out: TRUE if ok */
- page_t* page, /* in: index page */
- dict_index_t* index) /* in: data dictionary index containing
+ page_t* page, /*!< in: index page */
+ dict_index_t* index) /*!< in: data dictionary index containing
the page record type definition */
{
- page_dir_slot_t* slot;
+ page_dir_slot_t*slot;
mem_heap_t* heap;
- page_cur_t cur;
byte* buf;
ulint count;
ulint own_count;
+ ulint rec_own_count;
ulint slot_no;
ulint data_size;
rec_t* rec;
@@ -1778,16 +2309,22 @@ page_validate(
ulint n_slots;
ibool ret = FALSE;
ulint i;
- ulint comp = page_is_comp(page);
ulint* offsets = NULL;
ulint* old_offsets = NULL;
- if ((ibool)!!comp != dict_table_is_comp(index->table)) {
+ if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
+ != dict_table_is_comp(index->table))) {
fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
goto func_exit2;
}
- if (!page_simple_validate(page)) {
- goto func_exit2;
+ if (page_is_comp(page)) {
+ if (UNIV_UNLIKELY(!page_simple_validate_new(page))) {
+ goto func_exit2;
+ }
+ } else {
+ if (UNIV_UNLIKELY(!page_simple_validate_old(page))) {
+ goto func_exit2;
+ }
}
heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
@@ -1795,21 +2332,21 @@ page_validate(
/* The following buffer is used to check that the
records in the page record heap do not overlap */
- buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE);
- memset(buf, 0, UNIV_PAGE_SIZE);
+ buf = mem_heap_zalloc(heap, UNIV_PAGE_SIZE);
/* Check first that the record heap and the directory do not
overlap. */
n_slots = page_dir_get_n_slots(page);
- if (!(page_header_get_ptr(page, PAGE_HEAP_TOP)
- <= page_dir_get_nth_slot(page, n_slots - 1))) {
+ if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
+ <= page_dir_get_nth_slot(page, n_slots - 1)))) {
- fputs("InnoDB: Record heap and dir overlap on a page ",
- stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, ", %p, %p\n",
+ fprintf(stderr,
+ "InnoDB: Record heap and dir overlap"
+ " on space %lu page %lu index %s, %p, %p\n",
+ (ulong) page_get_space_id(page),
+ (ulong) page_get_page_no(page), index->name,
page_header_get_ptr(page, PAGE_HEAP_TOP),
page_dir_get_nth_slot(page, n_slots - 1));
@@ -1824,34 +2361,36 @@ page_validate(
slot_no = 0;
slot = page_dir_get_nth_slot(page, slot_no);
- page_cur_set_before_first(page, &cur);
+ rec = page_get_infimum_rec(page);
for (;;) {
- rec = cur.rec;
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
- if (comp && page_rec_is_user_rec(rec)
- && rec_get_node_ptr_flag(rec)
- != (ibool)
- (btr_page_get_level_low(page) != 0)) {
+ if (page_is_comp(page) && page_rec_is_user_rec(rec)
+ && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
+ == page_is_leaf(page))) {
fputs("InnoDB: node_ptr flag mismatch\n", stderr);
goto func_exit;
}
- if (!page_rec_validate(rec, offsets)) {
+ if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
goto func_exit;
}
+#ifndef UNIV_HOTBACKUP
/* Check that the records are in the ascending order */
- if ((count >= 2) && (!page_cur_is_after_last(&cur))) {
- if (!(1 == cmp_rec_rec(rec, old_rec,
- offsets, old_offsets, index))) {
- fprintf(stderr,
+ if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
+ && !page_rec_is_supremum(rec)) {
+ if (UNIV_UNLIKELY
+ (1 != cmp_rec_rec(rec, old_rec,
+ offsets, old_offsets, index))) {
+ fprintf(stderr,
"InnoDB: Records in wrong order"
- " on page %lu ",
- (ulong) buf_frame_get_page_no(page));
- dict_index_name_print(stderr, NULL, index);
+ " on space %lu page %lu index %s\n",
+ (ulong) page_get_space_id(page),
+ (ulong) page_get_page_no(page),
+ index->name);
fputs("\nInnoDB: previous record ", stderr);
rec_print_new(stderr, old_rec, old_offsets);
fputs("\nInnoDB: record ", stderr);
@@ -1861,16 +2400,17 @@ page_validate(
goto func_exit;
}
}
+#endif /* !UNIV_HOTBACKUP */
if (page_rec_is_user_rec(rec)) {
data_size += rec_offs_size(offsets);
}
- offs = rec_get_start(rec, offsets) - page;
+ offs = page_offset(rec_get_start(rec, offsets));
- for (i = 0; i < rec_offs_size(offsets); i++) {
- if (!buf[offs + i] == 0) {
+ for (i = rec_offs_size(offsets); i--; ) {
+ if (UNIV_UNLIKELY(buf[offs + i])) {
/* No other record may overlap this */
fputs("InnoDB: Record overlaps another\n",
@@ -1881,12 +2421,18 @@ page_validate(
buf[offs + i] = 1;
}
- if (rec_get_n_owned(rec, comp) != 0) {
+ if (page_is_comp(page)) {
+ rec_own_count = rec_get_n_owned_new(rec);
+ } else {
+ rec_own_count = rec_get_n_owned_old(rec);
+ }
+
+ if (UNIV_UNLIKELY(rec_own_count)) {
/* This is a record pointed to by a dir slot */
- if (rec_get_n_owned(rec, comp) != own_count) {
+ if (UNIV_UNLIKELY(rec_own_count != own_count)) {
fprintf(stderr,
"InnoDB: Wrong owned count %lu, %lu\n",
- (ulong) rec_get_n_owned(rec, comp),
+ (ulong) rec_own_count,
(ulong) own_count);
goto func_exit;
}
@@ -1901,28 +2447,21 @@ page_validate(
page_dir_slot_check(slot);
own_count = 0;
- if (!page_cur_is_after_last(&cur)) {
+ if (!page_rec_is_supremum(rec)) {
slot_no++;
slot = page_dir_get_nth_slot(page, slot_no);
}
}
- if (page_cur_is_after_last(&cur)) {
+ if (page_rec_is_supremum(rec)) {
break;
}
- if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
- || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Next record offset wrong %lu\n",
- (ulong) rec_get_next_offs(rec, comp));
- goto func_exit;
- }
-
count++;
- page_cur_move_to_next(&cur);
own_count++;
old_rec = rec;
+ rec = page_rec_get_next(rec);
+
/* set old_offsets to offsets; recycle offsets */
{
ulint* offs = old_offsets;
@@ -1931,25 +2470,34 @@ page_validate(
}
}
- if (rec_get_n_owned(rec, comp) == 0) {
+ if (page_is_comp(page)) {
+ if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
+
+ goto n_owned_zero;
+ }
+ } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
+n_owned_zero:
fputs("InnoDB: n owned is zero\n", stderr);
goto func_exit;
}
- if (slot_no != n_slots - 1) {
+ if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
(ulong) slot_no, (ulong) (n_slots - 1));
goto func_exit;
}
- if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
+ if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW
+ != count + 1)) {
fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
+ (ulong) page_header_get_field(page, PAGE_N_RECS)
+ + PAGE_HEAP_NO_USER_LOW,
(ulong) (count + 1));
goto func_exit;
}
- if (data_size != page_get_data_size(page)) {
+ if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
fprintf(stderr,
"InnoDB: Summed data size %lu, returned by func %lu\n",
(ulong) data_size, (ulong) page_get_data_size(page));
@@ -1962,17 +2510,17 @@ page_validate(
while (rec != NULL) {
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
- if (!page_rec_validate(rec, offsets)) {
+ if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
goto func_exit;
}
count++;
- offs = rec_get_start(rec, offsets) - page;
+ offs = page_offset(rec_get_start(rec, offsets));
- for (i = 0; i < rec_offs_size(offsets); i++) {
+ for (i = rec_offs_size(offsets); i--; ) {
- if (buf[offs + i] != 0) {
+ if (UNIV_UNLIKELY(buf[offs + i])) {
fputs("InnoDB: Record overlaps another"
" in free list\n", stderr);
goto func_exit;
@@ -1984,7 +2532,7 @@ page_validate(
rec = page_rec_get_next(rec);
}
- if (page_dir_get_n_heap(page) != count + 1) {
+ if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
(ulong) page_dir_get_n_heap(page),
(ulong) count + 1);
@@ -1996,43 +2544,65 @@ page_validate(
func_exit:
mem_heap_free(heap);
- if (ret == FALSE) {
+ if (UNIV_UNLIKELY(ret == FALSE)) {
func_exit2:
- fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ",
- (ulong) buf_frame_get_page_no(page));
- dict_index_name_print(stderr, NULL, index);
- putc('\n', stderr);
- buf_page_print(page);
+ fprintf(stderr,
+ "InnoDB: Apparent corruption"
+ " in space %lu page %lu index %s\n",
+ (ulong) page_get_space_id(page),
+ (ulong) page_get_page_no(page),
+ index->name);
+ buf_page_print(page, 0);
}
return(ret);
}
-/*******************************************************************
-Looks in the page record list for a record with the given heap number. */
-
-rec_t*
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Looks in the page record list for a record with the given heap number.
+@return record, NULL if not found */
+UNIV_INTERN
+const rec_t*
page_find_rec_with_heap_no(
/*=======================*/
- /* out: record, NULL if not found */
- page_t* page, /* in: index page */
- ulint heap_no)/* in: heap number */
+ const page_t* page, /*!< in: index page */
+ ulint heap_no)/*!< in: heap number */
{
- page_cur_t cur;
+ const rec_t* rec;
- page_cur_set_before_first(page, &cur);
+ if (page_is_comp(page)) {
+ rec = page + PAGE_NEW_INFIMUM;
- for (;;) {
- if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) {
+ for(;;) {
+ ulint rec_heap_no = rec_get_heap_no_new(rec);
- return(cur.rec);
- }
+ if (rec_heap_no == heap_no) {
- if (page_cur_is_after_last(&cur)) {
+ return(rec);
+ } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
- return(NULL);
+ return(NULL);
+ }
+
+ rec = page + rec_get_next_offs(rec, TRUE);
}
+ } else {
+ rec = page + PAGE_OLD_INFIMUM;
- page_cur_move_to_next(&cur);
+ for (;;) {
+ ulint rec_heap_no = rec_get_heap_no_old(rec);
+
+ if (rec_heap_no == heap_no) {
+
+ return(rec);
+ } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
+
+ return(NULL);
+ }
+
+ rec = page + rec_get_next_offs(rec, FALSE);
+ }
}
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/page/page0zip.c b/storage/innobase/page/page0zip.c
index aa5e39ff04a..aa5e39ff04a 100644
--- a/storage/innodb_plugin/page/page0zip.c
+++ b/storage/innobase/page/page0zip.c
diff --git a/storage/innobase/pars/lexyy.c b/storage/innobase/pars/lexyy.c
index b65de138573..815395ea316 100644
--- a/storage/innobase/pars/lexyy.c
+++ b/storage/innobase/pars/lexyy.c
@@ -1,7 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
#include "univ.i"
-#line 2 "_flex_tmp.c"
+#line 2 "lexyy.c"
-#line 4 "_flex_tmp.c"
+#line 4 "lexyy.c"
#define YY_INT_ALIGNED short int
@@ -141,9 +159,9 @@ typedef unsigned int flex_uint32_t;
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
-extern int yyleng;
+static int yyleng;
-extern FILE *yyin, *yyout;
+static FILE *yyin, *yyout;
#define EOB_ACT_CONTINUE_SCAN 0
#define EOB_ACT_END_OF_FILE 1
@@ -265,7 +283,7 @@ static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
/* yy_hold_char holds the character lost when yytext is formed. */
static char yy_hold_char;
static int yy_n_chars; /* number of characters read into yy_ch_buf */
-int yyleng;
+static int yyleng;
/* Points to current character in buffer. */
static char *yy_c_buf_p = (char *) 0;
@@ -277,13 +295,13 @@ static int yy_start = 0; /* start state number */
*/
static int yy_did_buffer_switch_on_eof;
-void yyrestart (FILE *input_file );
-void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer );
-YY_BUFFER_STATE yy_create_buffer (FILE *file,int size );
-void yy_delete_buffer (YY_BUFFER_STATE b );
-void yy_flush_buffer (YY_BUFFER_STATE b );
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer );
-void yypop_buffer_state (void );
+static void yyrestart (FILE *input_file );
+__attribute__((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer );
+static YY_BUFFER_STATE yy_create_buffer (FILE *file,int size );
+static void yy_delete_buffer (YY_BUFFER_STATE b );
+static void yy_flush_buffer (YY_BUFFER_STATE b );
+__attribute__((unused)) static void yypush_buffer_state (YY_BUFFER_STATE new_buffer );
+__attribute__((unused)) static void yypop_buffer_state (void );
static void yyensure_buffer_stack (void );
static void yy_load_buffer_state (void );
@@ -295,9 +313,9 @@ YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size );
YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str );
YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len );
-void *yyalloc (yy_size_t );
-void *yyrealloc (void *,yy_size_t );
-void yyfree (void * );
+static void *yyalloc (yy_size_t );
+static void *yyrealloc (void *,yy_size_t );
+static void yyfree (void * );
#define yy_new_buffer yy_create_buffer
@@ -330,15 +348,15 @@ void yyfree (void * );
typedef unsigned char YY_CHAR;
-FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+static FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
typedef int yy_state_type;
-extern int yylineno;
+static int yylineno;
-int yylineno = 1;
+static int yylineno = 1;
-extern char *yytext;
+static char *yytext;
#define yytext_ptr yytext
static yy_state_type yy_get_previous_state (void );
@@ -673,8 +691,8 @@ static yyconst flex_int16_t yy_chk[499] =
static yy_state_type yy_last_accepting_state;
static char *yy_last_accepting_cpos;
-extern int yy_flex_debug;
-int yy_flex_debug = 0;
+static int yy_flex_debug;
+static int yy_flex_debug = 0;
/* The intent behind this definition is that it'll catch
* any uses of REJECT which flex missed.
@@ -683,9 +701,9 @@ int yy_flex_debug = 0;
#define yymore() yymore_used_but_not_detected
#define YY_MORE_ADJ 0
#define YY_RESTORE_YY_MORE_OFFSET
-char *yytext;
+static char *yytext;
#line 1 "pars0lex.l"
-/******************************************************
+/**************************************************//**
SQL parser lexical analyzer: input file for the GNU Flex lexer generator
(c) 1997 Innobase Oy
@@ -729,13 +747,13 @@ Linux.
static ulint stringbuf_len_alloc = 0; /* Allocated length */
static ulint stringbuf_len = 0; /* Current length */
static char* stringbuf; /* Start of buffer */
-/* Appends a string to the buffer. */
+/** Appends a string to the buffer. */
static
void
string_append(
/*==========*/
- const char* str, /* in: string to be appended */
- ulint len) /* in: length of the string */
+ const char* str, /*!< in: string to be appended */
+ ulint len) /*!< in: length of the string */
{
if (stringbuf == NULL) {
stringbuf = malloc(1);
@@ -756,7 +774,7 @@ string_append(
-#line 759 "_flex_tmp.c"
+#line 759 "lexyy.c"
#define INITIAL 0
#define comment 1
@@ -880,9 +898,9 @@ static int input (void );
#ifndef YY_DECL
#define YY_DECL_IS_OURS 1
-extern int yylex (void);
+UNIV_INTERN int yylex (void);
-#define YY_DECL int yylex (void)
+#define YY_DECL UNIV_INTERN int yylex (void)
#endif /* !YY_DECL */
/* Code executed at the beginning of each rule, after yytext and yyleng
@@ -911,7 +929,7 @@ YY_DECL
#line 92 "pars0lex.l"
-#line 914 "_flex_tmp.c"
+#line 914 "lexyy.c"
if ( (yy_init) )
{
@@ -1913,7 +1931,7 @@ YY_RULE_SETUP
#line 648 "pars0lex.l"
YY_FATAL_ERROR( "flex scanner jammed" );
YY_BREAK
-#line 1916 "_flex_tmp.c"
+#line 1916 "lexyy.c"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(comment):
case YY_STATE_EOF(quoted):
@@ -2317,7 +2335,7 @@ static int yy_get_next_buffer (void)
*
* @note This function does not reset the start condition to @c INITIAL .
*/
- void yyrestart (FILE * input_file )
+ static void yyrestart (FILE * input_file )
{
if ( ! YY_CURRENT_BUFFER ){
@@ -2334,7 +2352,7 @@ static int yy_get_next_buffer (void)
* @param new_buffer The new input buffer.
*
*/
- void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
+ __attribute__((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
{
/* TODO. We should be able to replace this entire function body
@@ -2379,7 +2397,7 @@ static void yy_load_buffer_state (void)
*
* @return the allocated buffer state.
*/
- YY_BUFFER_STATE yy_create_buffer (FILE * file, int size )
+ static YY_BUFFER_STATE yy_create_buffer (FILE * file, int size )
{
YY_BUFFER_STATE b;
@@ -2407,7 +2425,7 @@ static void yy_load_buffer_state (void)
* @param b a buffer created with yy_create_buffer()
*
*/
- void yy_delete_buffer (YY_BUFFER_STATE b )
+ static void yy_delete_buffer (YY_BUFFER_STATE b )
{
if ( ! b )
@@ -2454,7 +2472,7 @@ static void yy_load_buffer_state (void)
* @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
*
*/
- void yy_flush_buffer (YY_BUFFER_STATE b )
+ static void yy_flush_buffer (YY_BUFFER_STATE b )
{
if ( ! b )
return;
@@ -2483,7 +2501,7 @@ static void yy_load_buffer_state (void)
* @param new_buffer The new state.
*
*/
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
+__attribute__((unused)) static void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
{
if (new_buffer == NULL)
return;
@@ -2513,7 +2531,7 @@ void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
* The next element becomes the new top.
*
*/
-void yypop_buffer_state (void)
+__attribute__((unused)) static void yypop_buffer_state (void)
{
if (!YY_CURRENT_BUFFER)
return;
@@ -2603,7 +2621,7 @@ static void yy_fatal_error (yyconst char* msg )
/** Get the current line number.
*
*/
-int yyget_lineno (void)
+__attribute__((unused)) static int yyget_lineno (void)
{
return yylineno;
@@ -2612,7 +2630,7 @@ int yyget_lineno (void)
/** Get the input stream.
*
*/
-FILE *yyget_in (void)
+__attribute__((unused)) static FILE *yyget_in (void)
{
return yyin;
}
@@ -2620,7 +2638,7 @@ FILE *yyget_in (void)
/** Get the output stream.
*
*/
-FILE *yyget_out (void)
+__attribute__((unused)) static FILE *yyget_out (void)
{
return yyout;
}
@@ -2628,7 +2646,7 @@ FILE *yyget_out (void)
/** Get the length of the current token.
*
*/
-int yyget_leng (void)
+__attribute__((unused)) static int yyget_leng (void)
{
return yyleng;
}
@@ -2637,7 +2655,7 @@ int yyget_leng (void)
*
*/
-char *yyget_text (void)
+__attribute__((unused)) static char *yyget_text (void)
{
return yytext;
}
@@ -2646,7 +2664,7 @@ char *yyget_text (void)
* @param line_number
*
*/
-void yyset_lineno (int line_number )
+__attribute__((unused)) static void yyset_lineno (int line_number )
{
yylineno = line_number;
@@ -2658,28 +2676,28 @@ void yyset_lineno (int line_number )
*
* @see yy_switch_to_buffer
*/
-void yyset_in (FILE * in_str )
+__attribute__((unused)) static void yyset_in (FILE * in_str )
{
yyin = in_str ;
}
-void yyset_out (FILE * out_str )
+__attribute__((unused)) static void yyset_out (FILE * out_str )
{
yyout = out_str ;
}
-int yyget_debug (void)
+__attribute__((unused)) static int yyget_debug (void)
{
return yy_flex_debug;
}
-void yyset_debug (int bdebug )
+__attribute__((unused)) static void yyset_debug (int bdebug )
{
yy_flex_debug = bdebug ;
}
/* yylex_destroy is for both reentrant and non-reentrant scanners. */
-int yylex_destroy (void)
+__attribute__((unused)) static int yylex_destroy (void)
{
/* Pop the buffer stack, destroying each element. */
@@ -2720,12 +2738,12 @@ static int yy_flex_strlen (yyconst char * s )
}
#endif
-void *yyalloc (yy_size_t size )
+static void *yyalloc (yy_size_t size )
{
return (void *) malloc( size );
}
-void *yyrealloc (void * ptr, yy_size_t size )
+static void *yyrealloc (void * ptr, yy_size_t size )
{
/* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
@@ -2737,7 +2755,7 @@ void *yyrealloc (void * ptr, yy_size_t size )
return (void *) realloc( (char *) ptr, size );
}
-void yyfree (void * ptr )
+static void yyfree (void * ptr )
{
free( (char *) ptr ); /* see yyrealloc() for (char *) cast */
}
@@ -2760,3 +2778,16 @@ void yyfree (void * ptr )
+
+/**********************************************************************
+Release any resources used by the lexer. */
+UNIV_INTERN
+void
+pars_lexer_close(void)
+/*==================*/
+{
+ yylex_destroy();
+ free(stringbuf);
+ stringbuf = NULL;
+ stringbuf_len_alloc = stringbuf_len = 0;
+}
diff --git a/storage/innobase/pars/make_bison.sh b/storage/innobase/pars/make_bison.sh
index c11456230c4..09bb86e3106 100755
--- a/storage/innobase/pars/make_bison.sh
+++ b/storage/innobase/pars/make_bison.sh
@@ -1,10 +1,32 @@
#!/bin/bash
#
+# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
# generate parser files from bison input files.
set -eu
+TMPFILE=pars0grm.tab.c
+OUTFILE=pars0grm.c
bison -d pars0grm.y
-mv pars0grm.tab.c pars0grm.c
-mv pars0grm.tab.h pars0grm.h
-cp pars0grm.h ../include
+mv pars0grm.tab.h ../include/pars0grm.h
+
+sed -e '
+s/'"$TMPFILE"'/'"$OUTFILE"'/;
+s/^\(\(YYSTYPE\|int\) yy\(char\|nerrs\)\)/static \1/;
+s/\(\(YYSTYPE\|int\) yy\(lval\|parse\)\)/UNIV_INTERN \1/;
+' < "$TMPFILE" > "$OUTFILE"
+
+rm "$TMPFILE"
diff --git a/storage/innobase/pars/make_flex.sh b/storage/innobase/pars/make_flex.sh
index c015327bf8c..89308a6636f 100755
--- a/storage/innobase/pars/make_flex.sh
+++ b/storage/innobase/pars/make_flex.sh
@@ -1,5 +1,19 @@
#!/bin/bash
#
+# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
# generate lexer files from flex input files.
set -eu
@@ -14,7 +28,21 @@ flex -o $TMPFILE pars0lex.l
echo '#include "univ.i"' > $OUTFILE
# flex assigns a pointer to an int in one place without a cast, resulting in
-# a warning on Win64. this adds the cast.
-sed -e 's/int offset = (yy_c_buf_p) - (yytext_ptr);/int offset = (int)((yy_c_buf_p) - (yytext_ptr));/;' < $TMPFILE >> $OUTFILE
+# a warning on Win64. Add the cast. Also define some symbols as static.
+sed -e '
+s/'"$TMPFILE"'/'"$OUTFILE"'/;
+s/\(int offset = \)\((yy_c_buf_p) - (yytext_ptr)\);/\1(int)(\2);/;
+s/\(void yy\(restart\|_\(delete\|flush\)_buffer\)\)/static \1/;
+s/\(void yy_switch_to_buffer\)/__attribute__((unused)) static \1/;
+s/\(void yy\(push\|pop\)_buffer_state\)/__attribute__((unused)) static \1/;
+s/\(YY_BUFFER_STATE yy_create_buffer\)/static \1/;
+s/\(\(int\|void\) yy[gs]et_\)/__attribute__((unused)) static \1/;
+s/\(void \*\?yy\(\(re\)\?alloc\|free\)\)/static \1/;
+s/\(extern \)\?\(int yy\(leng\|lineno\|_flex_debug\)\)/static \2/;
+s/\(int yylex_destroy\)/__attribute__((unused)) static \1/;
+s/\(extern \)\?\(int yylex \)/UNIV_INTERN \2/;
+s/^\(\(FILE\|char\) *\* *yyget\)/__attribute__((unused)) static \1/;
+s/^\(extern \)\?\(\(FILE\|char\) *\* *yy\)/static \2/;
+' < $TMPFILE >> $OUTFILE
rm $TMPFILE
diff --git a/storage/innobase/pars/pars0grm.c b/storage/innobase/pars/pars0grm.c
index 2e39b05bada..d667970735e 100644
--- a/storage/innobase/pars/pars0grm.c
+++ b/storage/innobase/pars/pars0grm.c
@@ -1,27 +1,29 @@
-/* A Bison parser, made by GNU Bison 1.875d. */
+/*****************************************************************************
-/* Skeleton parser for Yacc-like parsing with Bison,
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+As a special exception, when this file is copied by Bison into a
+Bison output file, you may use that output file without restriction.
+This special exception was added by the Free Software Foundation
+in version 1.24 of Bison.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-/* As a special exception, when this file is copied by Bison into a
- Bison output file, you may use that output file without restriction.
- This special exception was added by the Free Software Foundation
- in version 1.24 of Bison. */
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/* A Bison parser, made by GNU Bison 2.0. */
/* Written by Richard Stallman by simplifying the original so called
``semantic'' parser. */
@@ -292,8 +294,8 @@ typedef int YYSTYPE;
/* Copy the second part of user declarations. */
-/* Line 214 of yacc.c. */
-#line 297 "pars0grm.tab.c"
+/* Line 213 of yacc.c. */
+#line 297 "pars0grm.c"
#if ! defined (yyoverflow) || YYERROR_VERBOSE
@@ -308,14 +310,10 @@ typedef int YYSTYPE;
# ifdef YYSTACK_USE_ALLOCA
# if YYSTACK_USE_ALLOCA
-# define YYSTACK_ALLOC alloca
-# endif
-# else
-# if defined (alloca) || defined (_ALLOCA_H)
-# define YYSTACK_ALLOC alloca
-# else
# ifdef __GNUC__
# define YYSTACK_ALLOC __builtin_alloca
+# else
+# define YYSTACK_ALLOC alloca
# endif
# endif
# endif
@@ -1059,20 +1057,53 @@ do \
} \
while (0)
+
#define YYTERROR 1
#define YYERRCODE 256
-/* YYLLOC_DEFAULT -- Compute the default location (before the actions
- are run). */
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+ If N is 0, then set CURRENT to the empty location which ends
+ the previous symbol: RHS[0] (always defined). */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N) \
- ((Current).first_line = (Rhs)[1].first_line, \
- (Current).first_column = (Rhs)[1].first_column, \
- (Current).last_line = (Rhs)[N].last_line, \
- (Current).last_column = (Rhs)[N].last_column)
+# define YYLLOC_DEFAULT(Current, Rhs, N) \
+ do \
+ if (N) \
+ { \
+ (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
+ (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
+ (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
+ (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
+ } \
+ else \
+ { \
+ (Current).first_line = (Current).last_line = \
+ YYRHSLOC (Rhs, 0).last_line; \
+ (Current).first_column = (Current).last_column = \
+ YYRHSLOC (Rhs, 0).last_column; \
+ } \
+ while (0)
#endif
+
+/* YY_LOCATION_PRINT -- Print the location on the stream.
+ This macro was not mandated originally: define only if we know
+ we won't break user code: when these are the locations we know. */
+
+#ifndef YY_LOCATION_PRINT
+# if YYLTYPE_IS_TRIVIAL
+# define YY_LOCATION_PRINT(File, Loc) \
+ fprintf (File, "%d.%d-%d.%d", \
+ (Loc).first_line, (Loc).first_column, \
+ (Loc).last_line, (Loc).last_column)
+# else
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+# endif
+#endif
+
+
/* YYLEX -- calling `yylex' with the right arguments. */
#ifdef YYLEX_PARAM
@@ -1095,19 +1126,13 @@ do { \
YYFPRINTF Args; \
} while (0)
-# define YYDSYMPRINT(Args) \
-do { \
- if (yydebug) \
- yysymprint Args; \
-} while (0)
-
-# define YYDSYMPRINTF(Title, Token, Value, Location) \
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
do { \
if (yydebug) \
{ \
YYFPRINTF (stderr, "%s ", Title); \
yysymprint (stderr, \
- Token, Value); \
+ Type, Value); \
YYFPRINTF (stderr, "\n"); \
} \
} while (0)
@@ -1174,8 +1199,7 @@ do { \
int yydebug;
#else /* !YYDEBUG */
# define YYDPRINTF(Args)
-# define YYDSYMPRINT(Args)
-# define YYDSYMPRINTF(Title, Token, Value, Location)
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
# define YY_STACK_PRINT(Bottom, Top)
# define YY_REDUCE_PRINT(Rule)
#endif /* !YYDEBUG */
@@ -1193,10 +1217,6 @@ int yydebug;
SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
evaluated with infinite-precision integer arithmetic. */
-#if defined (YYMAXDEPTH) && YYMAXDEPTH == 0
-# undef YYMAXDEPTH
-#endif
-
#ifndef YYMAXDEPTH
# define YYMAXDEPTH 10000
#endif
@@ -1278,15 +1298,15 @@ yysymprint (yyoutput, yytype, yyvaluep)
(void) yyvaluep;
if (yytype < YYNTOKENS)
- {
- YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
-# ifdef YYPRINT
- YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
-# endif
- }
+ YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
else
YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
+
+# ifdef YYPRINT
+ if (yytype < YYNTOKENS)
+ YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
+# endif
switch (yytype)
{
default:
@@ -1302,10 +1322,11 @@ yysymprint (yyoutput, yytype, yyvaluep)
#if defined (__STDC__) || defined (__cplusplus)
static void
-yydestruct (int yytype, YYSTYPE *yyvaluep)
+yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
#else
static void
-yydestruct (yytype, yyvaluep)
+yydestruct (yymsg, yytype, yyvaluep)
+ const char *yymsg;
int yytype;
YYSTYPE *yyvaluep;
#endif
@@ -1313,6 +1334,10 @@ yydestruct (yytype, yyvaluep)
/* Pacify ``unused variable'' warnings. */
(void) yyvaluep;
+ if (!yymsg)
+ yymsg = "Deleting";
+ YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+
switch (yytype)
{
@@ -1326,28 +1351,28 @@ yydestruct (yytype, yyvaluep)
#ifdef YYPARSE_PARAM
# if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void *YYPARSE_PARAM);
+UNIV_INTERN int yyparse (void *YYPARSE_PARAM);
# else
-int yyparse ();
+UNIV_INTERN int yyparse ();
# endif
#else /* ! YYPARSE_PARAM */
#if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void);
+UNIV_INTERN int yyparse (void);
#else
-int yyparse ();
+UNIV_INTERN int yyparse ();
#endif
#endif /* ! YYPARSE_PARAM */
-/* The lookahead symbol. */
-int yychar;
+/* The look-ahead symbol. */
+static int yychar;
-/* The semantic value of the lookahead symbol. */
-YYSTYPE yylval;
+/* The semantic value of the look-ahead symbol. */
+UNIV_INTERN YYSTYPE yylval;
/* Number of syntax errors so far. */
-int yynerrs;
+static int yynerrs;
@@ -1357,9 +1382,9 @@ int yynerrs;
#ifdef YYPARSE_PARAM
# if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void *YYPARSE_PARAM)
+UNIV_INTERN int yyparse (void *YYPARSE_PARAM)
# else
-int yyparse (YYPARSE_PARAM)
+UNIV_INTERN int yyparse (YYPARSE_PARAM)
void *YYPARSE_PARAM;
# endif
#else /* ! YYPARSE_PARAM */
@@ -1379,7 +1404,7 @@ yyparse ()
int yyresult;
/* Number of tokens to shift before error messages enabled. */
int yyerrstatus;
- /* Lookahead token as an internal (translated) token number. */
+ /* Look-ahead token as an internal (translated) token number. */
int yytoken = 0;
/* Three stacks and their tools:
@@ -1431,6 +1456,8 @@ yyparse ()
yyvsp = yyvs;
+ yyvsp[0] = yylval;
+
goto yysetstate;
/*------------------------------------------------------------.
@@ -1520,18 +1547,18 @@ yyparse ()
yybackup:
/* Do appropriate processing given the current state. */
-/* Read a lookahead token if we need one and don't already have one. */
+/* Read a look-ahead token if we need one and don't already have one. */
/* yyresume: */
- /* First try to decide what to do without reference to lookahead token. */
+ /* First try to decide what to do without reference to look-ahead token. */
yyn = yypact[yystate];
if (yyn == YYPACT_NINF)
goto yydefault;
- /* Not known => get a lookahead token if don't already have one. */
+ /* Not known => get a look-ahead token if don't already have one. */
- /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
+ /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */
if (yychar == YYEMPTY)
{
YYDPRINTF ((stderr, "Reading a token: "));
@@ -1546,7 +1573,7 @@ yybackup:
else
{
yytoken = YYTRANSLATE (yychar);
- YYDSYMPRINTF ("Next token is", yytoken, &yylval, &yylloc);
+ YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
}
/* If the proper action on seeing token YYTOKEN is to reduce or to
@@ -1566,8 +1593,8 @@ yybackup:
if (yyn == YYFINAL)
YYACCEPT;
- /* Shift the lookahead token. */
- YYDPRINTF ((stderr, "Shifting token %s, ", yytname[yytoken]));
+ /* Shift the look-ahead token. */
+ YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
/* Discard the token being shifted unless it is eof. */
if (yychar != YYEOF)
@@ -1618,277 +1645,277 @@ yyreduce:
{
case 25:
#line 166 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 26:
#line 168 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-1]), (yyvsp[0])); ;}
break;
case 27:
#line 172 "pars0grm.y"
- { yyval = yyvsp[0];;}
+ { (yyval) = (yyvsp[0]);;}
break;
case 28:
#line 174 "pars0grm.y"
- { yyval = pars_func(yyvsp[-3], yyvsp[-1]); ;}
+ { (yyval) = pars_func((yyvsp[-3]), (yyvsp[-1])); ;}
break;
case 29:
#line 175 "pars0grm.y"
- { yyval = yyvsp[0];;}
+ { (yyval) = (yyvsp[0]);;}
break;
case 30:
#line 176 "pars0grm.y"
- { yyval = yyvsp[0];;}
+ { (yyval) = (yyvsp[0]);;}
break;
case 31:
#line 177 "pars0grm.y"
- { yyval = yyvsp[0];;}
+ { (yyval) = (yyvsp[0]);;}
break;
case 32:
#line 178 "pars0grm.y"
- { yyval = yyvsp[0];;}
+ { (yyval) = (yyvsp[0]);;}
break;
case 33:
#line 179 "pars0grm.y"
- { yyval = yyvsp[0];;}
+ { (yyval) = (yyvsp[0]);;}
break;
case 34:
#line 180 "pars0grm.y"
- { yyval = yyvsp[0];;}
+ { (yyval) = (yyvsp[0]);;}
break;
case 35:
#line 181 "pars0grm.y"
- { yyval = yyvsp[0];;}
+ { (yyval) = (yyvsp[0]);;}
break;
case 36:
#line 182 "pars0grm.y"
- { yyval = pars_op('+', yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op('+', (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 37:
#line 183 "pars0grm.y"
- { yyval = pars_op('-', yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op('-', (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 38:
#line 184 "pars0grm.y"
- { yyval = pars_op('*', yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op('*', (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 39:
#line 185 "pars0grm.y"
- { yyval = pars_op('/', yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op('/', (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 40:
#line 186 "pars0grm.y"
- { yyval = pars_op('-', yyvsp[0], NULL); ;}
+ { (yyval) = pars_op('-', (yyvsp[0]), NULL); ;}
break;
case 41:
#line 187 "pars0grm.y"
- { yyval = yyvsp[-1]; ;}
+ { (yyval) = (yyvsp[-1]); ;}
break;
case 42:
#line 188 "pars0grm.y"
- { yyval = pars_op('=', yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op('=', (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 43:
#line 189 "pars0grm.y"
- { yyval = pars_op('<', yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op('<', (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 44:
#line 190 "pars0grm.y"
- { yyval = pars_op('>', yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op('>', (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 45:
#line 191 "pars0grm.y"
- { yyval = pars_op(PARS_GE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op(PARS_GE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 46:
#line 192 "pars0grm.y"
- { yyval = pars_op(PARS_LE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op(PARS_LE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 47:
#line 193 "pars0grm.y"
- { yyval = pars_op(PARS_NE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op(PARS_NE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 48:
#line 194 "pars0grm.y"
- { yyval = pars_op(PARS_AND_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op(PARS_AND_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 49:
#line 195 "pars0grm.y"
- { yyval = pars_op(PARS_OR_TOKEN, yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_op(PARS_OR_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 50:
#line 196 "pars0grm.y"
- { yyval = pars_op(PARS_NOT_TOKEN, yyvsp[0], NULL); ;}
+ { (yyval) = pars_op(PARS_NOT_TOKEN, (yyvsp[0]), NULL); ;}
break;
case 51:
#line 198 "pars0grm.y"
- { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;}
+ { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[-2]), NULL); ;}
break;
case 52:
#line 200 "pars0grm.y"
- { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;}
+ { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[-2]), NULL); ;}
break;
case 53:
#line 204 "pars0grm.y"
- { yyval = &pars_to_char_token; ;}
+ { (yyval) = &pars_to_char_token; ;}
break;
case 54:
#line 205 "pars0grm.y"
- { yyval = &pars_to_number_token; ;}
+ { (yyval) = &pars_to_number_token; ;}
break;
case 55:
#line 206 "pars0grm.y"
- { yyval = &pars_to_binary_token; ;}
+ { (yyval) = &pars_to_binary_token; ;}
break;
case 56:
#line 208 "pars0grm.y"
- { yyval = &pars_binary_to_number_token; ;}
+ { (yyval) = &pars_binary_to_number_token; ;}
break;
case 57:
#line 209 "pars0grm.y"
- { yyval = &pars_substr_token; ;}
+ { (yyval) = &pars_substr_token; ;}
break;
case 58:
#line 210 "pars0grm.y"
- { yyval = &pars_concat_token; ;}
+ { (yyval) = &pars_concat_token; ;}
break;
case 59:
#line 211 "pars0grm.y"
- { yyval = &pars_instr_token; ;}
+ { (yyval) = &pars_instr_token; ;}
break;
case 60:
#line 212 "pars0grm.y"
- { yyval = &pars_length_token; ;}
+ { (yyval) = &pars_length_token; ;}
break;
case 61:
#line 213 "pars0grm.y"
- { yyval = &pars_sysdate_token; ;}
+ { (yyval) = &pars_sysdate_token; ;}
break;
case 62:
#line 214 "pars0grm.y"
- { yyval = &pars_rnd_token; ;}
+ { (yyval) = &pars_rnd_token; ;}
break;
case 63:
#line 215 "pars0grm.y"
- { yyval = &pars_rnd_str_token; ;}
+ { (yyval) = &pars_rnd_str_token; ;}
break;
case 67:
#line 226 "pars0grm.y"
- { yyval = pars_stored_procedure_call(yyvsp[-4]); ;}
+ { (yyval) = pars_stored_procedure_call((yyvsp[-4])); ;}
break;
case 68:
#line 231 "pars0grm.y"
- { yyval = pars_procedure_call(yyvsp[-3], yyvsp[-1]); ;}
+ { (yyval) = pars_procedure_call((yyvsp[-3]), (yyvsp[-1])); ;}
break;
case 69:
#line 235 "pars0grm.y"
- { yyval = &pars_replstr_token; ;}
+ { (yyval) = &pars_replstr_token; ;}
break;
case 70:
#line 236 "pars0grm.y"
- { yyval = &pars_printf_token; ;}
+ { (yyval) = &pars_printf_token; ;}
break;
case 71:
#line 237 "pars0grm.y"
- { yyval = &pars_assert_token; ;}
+ { (yyval) = &pars_assert_token; ;}
break;
case 72:
#line 241 "pars0grm.y"
- { yyval = yyvsp[-2]; ;}
+ { (yyval) = (yyvsp[-2]); ;}
break;
case 73:
#line 245 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 74:
#line 247 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 75:
#line 251 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 76:
#line 252 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 77:
#line 254 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 78:
#line 258 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 79:
#line 259 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]);;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0]));;}
break;
case 80:
#line 260 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 81:
#line 264 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
+ { (yyval) = (yyvsp[0]); ;}
break;
case 82:
#line 266 "pars0grm.y"
- { yyval = pars_func(&pars_count_token,
+ { (yyval) = pars_func(&pars_count_token,
que_node_list_add_last(NULL,
sym_tab_add_int_lit(
pars_sym_tab_global, 1))); ;}
@@ -1896,74 +1923,74 @@ yyreduce:
case 83:
#line 271 "pars0grm.y"
- { yyval = pars_func(&pars_count_token,
+ { (yyval) = pars_func(&pars_count_token,
que_node_list_add_last(NULL,
pars_func(&pars_distinct_token,
que_node_list_add_last(
- NULL, yyvsp[-1])))); ;}
+ NULL, (yyvsp[-1]))))); ;}
break;
case 84:
#line 277 "pars0grm.y"
- { yyval = pars_func(&pars_sum_token,
+ { (yyval) = pars_func(&pars_sum_token,
que_node_list_add_last(NULL,
- yyvsp[-1])); ;}
+ (yyvsp[-1]))); ;}
break;
case 85:
#line 283 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 86:
#line 284 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 87:
#line 286 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 88:
#line 290 "pars0grm.y"
- { yyval = pars_select_list(&pars_star_denoter,
+ { (yyval) = pars_select_list(&pars_star_denoter,
NULL); ;}
break;
case 89:
#line 293 "pars0grm.y"
- { yyval = pars_select_list(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_select_list((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 90:
#line 294 "pars0grm.y"
- { yyval = pars_select_list(yyvsp[0], NULL); ;}
+ { (yyval) = pars_select_list((yyvsp[0]), NULL); ;}
break;
case 91:
#line 298 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 92:
#line 299 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
+ { (yyval) = (yyvsp[0]); ;}
break;
case 93:
#line 303 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 94:
#line 305 "pars0grm.y"
- { yyval = &pars_update_token; ;}
+ { (yyval) = &pars_update_token; ;}
break;
case 95:
#line 309 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 96:
@@ -1973,375 +2000,375 @@ yyreduce:
case 97:
#line 315 "pars0grm.y"
- { yyval = &pars_asc_token; ;}
+ { (yyval) = &pars_asc_token; ;}
break;
case 98:
#line 316 "pars0grm.y"
- { yyval = &pars_asc_token; ;}
+ { (yyval) = &pars_asc_token; ;}
break;
case 99:
#line 317 "pars0grm.y"
- { yyval = &pars_desc_token; ;}
+ { (yyval) = &pars_desc_token; ;}
break;
case 100:
#line 321 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 101:
#line 323 "pars0grm.y"
- { yyval = pars_order_by(yyvsp[-1], yyvsp[0]); ;}
+ { (yyval) = pars_order_by((yyvsp[-1]), (yyvsp[0])); ;}
break;
case 102:
#line 332 "pars0grm.y"
- { yyval = pars_select_statement(yyvsp[-6], yyvsp[-4], yyvsp[-3],
- yyvsp[-2], yyvsp[-1], yyvsp[0]); ;}
+ { (yyval) = pars_select_statement((yyvsp[-6]), (yyvsp[-4]), (yyvsp[-3]),
+ (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); ;}
break;
case 103:
#line 338 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
+ { (yyval) = (yyvsp[0]); ;}
break;
case 104:
#line 343 "pars0grm.y"
- { yyval = pars_insert_statement(yyvsp[-4], yyvsp[-1], NULL); ;}
+ { (yyval) = pars_insert_statement((yyvsp[-4]), (yyvsp[-1]), NULL); ;}
break;
case 105:
#line 345 "pars0grm.y"
- { yyval = pars_insert_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
+ { (yyval) = pars_insert_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
break;
case 106:
#line 349 "pars0grm.y"
- { yyval = pars_column_assignment(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_column_assignment((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 107:
#line 353 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 108:
#line 355 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 109:
#line 361 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
+ { (yyval) = (yyvsp[0]); ;}
break;
case 110:
#line 367 "pars0grm.y"
- { yyval = pars_update_statement_start(FALSE,
- yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_update_statement_start(FALSE,
+ (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 111:
#line 373 "pars0grm.y"
- { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
+ { (yyval) = pars_update_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
break;
case 112:
#line 378 "pars0grm.y"
- { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;}
+ { (yyval) = pars_update_statement((yyvsp[-1]), (yyvsp[0]), NULL); ;}
break;
case 113:
#line 383 "pars0grm.y"
- { yyval = pars_update_statement_start(TRUE,
- yyvsp[0], NULL); ;}
+ { (yyval) = pars_update_statement_start(TRUE,
+ (yyvsp[0]), NULL); ;}
break;
case 114:
#line 389 "pars0grm.y"
- { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
+ { (yyval) = pars_update_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
break;
case 115:
#line 394 "pars0grm.y"
- { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;}
+ { (yyval) = pars_update_statement((yyvsp[-1]), (yyvsp[0]), NULL); ;}
break;
case 116:
#line 399 "pars0grm.y"
- { yyval = pars_row_printf_statement(yyvsp[0]); ;}
+ { (yyval) = pars_row_printf_statement((yyvsp[0])); ;}
break;
case 117:
#line 404 "pars0grm.y"
- { yyval = pars_assignment_statement(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_assignment_statement((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 118:
#line 410 "pars0grm.y"
- { yyval = pars_elsif_element(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_elsif_element((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 119:
#line 414 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 120:
#line 416 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-1]), (yyvsp[0])); ;}
break;
case 121:
#line 420 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 122:
#line 422 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
+ { (yyval) = (yyvsp[0]); ;}
break;
case 123:
#line 423 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
+ { (yyval) = (yyvsp[0]); ;}
break;
case 124:
#line 430 "pars0grm.y"
- { yyval = pars_if_statement(yyvsp[-5], yyvsp[-3], yyvsp[-2]); ;}
+ { (yyval) = pars_if_statement((yyvsp[-5]), (yyvsp[-3]), (yyvsp[-2])); ;}
break;
case 125:
#line 436 "pars0grm.y"
- { yyval = pars_while_statement(yyvsp[-4], yyvsp[-2]); ;}
+ { (yyval) = pars_while_statement((yyvsp[-4]), (yyvsp[-2])); ;}
break;
case 126:
#line 444 "pars0grm.y"
- { yyval = pars_for_statement(yyvsp[-8], yyvsp[-6], yyvsp[-4], yyvsp[-2]); ;}
+ { (yyval) = pars_for_statement((yyvsp[-8]), (yyvsp[-6]), (yyvsp[-4]), (yyvsp[-2])); ;}
break;
case 127:
#line 448 "pars0grm.y"
- { yyval = pars_exit_statement(); ;}
+ { (yyval) = pars_exit_statement(); ;}
break;
case 128:
#line 452 "pars0grm.y"
- { yyval = pars_return_statement(); ;}
+ { (yyval) = pars_return_statement(); ;}
break;
case 129:
#line 457 "pars0grm.y"
- { yyval = pars_open_statement(
- ROW_SEL_OPEN_CURSOR, yyvsp[0]); ;}
+ { (yyval) = pars_open_statement(
+ ROW_SEL_OPEN_CURSOR, (yyvsp[0])); ;}
break;
case 130:
#line 463 "pars0grm.y"
- { yyval = pars_open_statement(
- ROW_SEL_CLOSE_CURSOR, yyvsp[0]); ;}
+ { (yyval) = pars_open_statement(
+ ROW_SEL_CLOSE_CURSOR, (yyvsp[0])); ;}
break;
case 131:
#line 469 "pars0grm.y"
- { yyval = pars_fetch_statement(yyvsp[-2], yyvsp[0], NULL); ;}
+ { (yyval) = pars_fetch_statement((yyvsp[-2]), (yyvsp[0]), NULL); ;}
break;
case 132:
#line 471 "pars0grm.y"
- { yyval = pars_fetch_statement(yyvsp[-2], NULL, yyvsp[0]); ;}
+ { (yyval) = pars_fetch_statement((yyvsp[-2]), NULL, (yyvsp[0])); ;}
break;
case 133:
#line 476 "pars0grm.y"
- { yyval = pars_column_def(yyvsp[-4], yyvsp[-3], yyvsp[-2], yyvsp[-1], yyvsp[0]); ;}
+ { (yyval) = pars_column_def((yyvsp[-4]), (yyvsp[-3]), (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); ;}
break;
case 134:
#line 480 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 135:
#line 482 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 136:
#line 486 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 137:
#line 488 "pars0grm.y"
- { yyval = yyvsp[-1]; ;}
+ { (yyval) = (yyvsp[-1]); ;}
break;
case 138:
#line 492 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 139:
#line 494 "pars0grm.y"
- { yyval = &pars_int_token;
+ { (yyval) = &pars_int_token;
/* pass any non-NULL pointer */ ;}
break;
case 140:
#line 499 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 141:
#line 501 "pars0grm.y"
- { yyval = &pars_int_token;
+ { (yyval) = &pars_int_token;
/* pass any non-NULL pointer */ ;}
break;
case 142:
#line 506 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 143:
#line 508 "pars0grm.y"
- { yyval = &pars_int_token;
+ { (yyval) = &pars_int_token;
/* pass any non-NULL pointer */ ;}
break;
case 144:
#line 515 "pars0grm.y"
- { yyval = pars_create_table(yyvsp[-4], yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = pars_create_table((yyvsp[-4]), (yyvsp[-2]), (yyvsp[0])); ;}
break;
case 145:
#line 519 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 146:
#line 521 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 147:
#line 525 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 148:
#line 526 "pars0grm.y"
- { yyval = &pars_unique_token; ;}
+ { (yyval) = &pars_unique_token; ;}
break;
case 149:
#line 530 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 150:
#line 531 "pars0grm.y"
- { yyval = &pars_clustered_token; ;}
+ { (yyval) = &pars_clustered_token; ;}
break;
case 151:
#line 539 "pars0grm.y"
- { yyval = pars_create_index(yyvsp[-8], yyvsp[-7], yyvsp[-5], yyvsp[-3], yyvsp[-1]); ;}
+ { (yyval) = pars_create_index((yyvsp[-8]), (yyvsp[-7]), (yyvsp[-5]), (yyvsp[-3]), (yyvsp[-1])); ;}
break;
case 152:
#line 544 "pars0grm.y"
- { yyval = pars_commit_statement(); ;}
+ { (yyval) = pars_commit_statement(); ;}
break;
case 153:
#line 549 "pars0grm.y"
- { yyval = pars_rollback_statement(); ;}
+ { (yyval) = pars_rollback_statement(); ;}
break;
case 154:
#line 553 "pars0grm.y"
- { yyval = &pars_int_token; ;}
+ { (yyval) = &pars_int_token; ;}
break;
case 155:
#line 554 "pars0grm.y"
- { yyval = &pars_int_token; ;}
+ { (yyval) = &pars_int_token; ;}
break;
case 156:
#line 555 "pars0grm.y"
- { yyval = &pars_char_token; ;}
+ { (yyval) = &pars_char_token; ;}
break;
case 157:
#line 556 "pars0grm.y"
- { yyval = &pars_binary_token; ;}
+ { (yyval) = &pars_binary_token; ;}
break;
case 158:
#line 557 "pars0grm.y"
- { yyval = &pars_blob_token; ;}
+ { (yyval) = &pars_blob_token; ;}
break;
case 159:
#line 562 "pars0grm.y"
- { yyval = pars_parameter_declaration(yyvsp[-2],
- PARS_INPUT, yyvsp[0]); ;}
+ { (yyval) = pars_parameter_declaration((yyvsp[-2]),
+ PARS_INPUT, (yyvsp[0])); ;}
break;
case 160:
#line 565 "pars0grm.y"
- { yyval = pars_parameter_declaration(yyvsp[-2],
- PARS_OUTPUT, yyvsp[0]); ;}
+ { (yyval) = pars_parameter_declaration((yyvsp[-2]),
+ PARS_OUTPUT, (yyvsp[0])); ;}
break;
case 161:
#line 570 "pars0grm.y"
- { yyval = NULL; ;}
+ { (yyval) = NULL; ;}
break;
case 162:
#line 571 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
break;
case 163:
#line 573 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
+ { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
break;
case 164:
#line 578 "pars0grm.y"
- { yyval = pars_variable_declaration(yyvsp[-2], yyvsp[-1]); ;}
+ { (yyval) = pars_variable_declaration((yyvsp[-2]), (yyvsp[-1])); ;}
break;
case 168:
#line 590 "pars0grm.y"
- { yyval = pars_cursor_declaration(yyvsp[-3], yyvsp[-1]); ;}
+ { (yyval) = pars_cursor_declaration((yyvsp[-3]), (yyvsp[-1])); ;}
break;
case 169:
#line 595 "pars0grm.y"
- { yyval = pars_function_declaration(yyvsp[-1]); ;}
+ { (yyval) = pars_function_declaration((yyvsp[-1])); ;}
break;
case 175:
#line 616 "pars0grm.y"
- { yyval = pars_procedure_definition(yyvsp[-9], yyvsp[-7],
- yyvsp[-1]); ;}
+ { (yyval) = pars_procedure_definition((yyvsp[-9]), (yyvsp[-7]),
+ (yyvsp[-1])); ;}
break;
}
/* Line 1010 of yacc.c. */
-#line 2345 "pars0grm.tab.c"
+#line 2345 "pars0grm.c"
yyvsp -= yylen;
yyssp -= yylen;
@@ -2441,7 +2468,7 @@ yyerrlab:
if (yyerrstatus == 3)
{
- /* If just tried and failed to reuse lookahead token after an
+ /* If just tried and failed to reuse look-ahead token after an
error, discard it. */
if (yychar <= YYEOF)
@@ -2451,23 +2478,22 @@ yyerrlab:
if (yychar == YYEOF)
for (;;)
{
+
YYPOPSTACK;
if (yyssp == yyss)
YYABORT;
- YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp);
- yydestruct (yystos[*yyssp], yyvsp);
+ yydestruct ("Error: popping",
+ yystos[*yyssp], yyvsp);
}
}
else
{
- YYDSYMPRINTF ("Error: discarding", yytoken, &yylval, &yylloc);
- yydestruct (yytoken, &yylval);
+ yydestruct ("Error: discarding", yytoken, &yylval);
yychar = YYEMPTY;
-
}
}
- /* Else will try to reuse lookahead token after shifting the error
+ /* Else will try to reuse look-ahead token after shifting the error
token. */
goto yyerrlab1;
@@ -2484,7 +2510,7 @@ yyerrorlab:
goto yyerrorlab;
#endif
- yyvsp -= yylen;
+yyvsp -= yylen;
yyssp -= yylen;
yystate = *yyssp;
goto yyerrlab1;
@@ -2514,8 +2540,8 @@ yyerrlab1:
if (yyssp == yyss)
YYABORT;
- YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp);
- yydestruct (yystos[yystate], yyvsp);
+
+ yydestruct ("Error: popping", yystos[yystate], yyvsp);
YYPOPSTACK;
yystate = *yyssp;
YY_STACK_PRINT (yyss, yyssp);
@@ -2524,11 +2550,12 @@ yyerrlab1:
if (yyn == YYFINAL)
YYACCEPT;
- YYDPRINTF ((stderr, "Shifting error token, "));
-
*++yyvsp = yylval;
+ /* Shift the error token. */
+ YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+
yystate = yyn;
goto yynewstate;
@@ -2544,6 +2571,9 @@ yyacceptlab:
| yyabortlab -- YYABORT comes here. |
`-----------------------------------*/
yyabortlab:
+ yydestruct ("Error: discarding lookahead",
+ yytoken, &yylval);
+ yychar = YYEMPTY;
yyresult = 1;
goto yyreturn;
diff --git a/storage/innobase/pars/pars0grm.h b/storage/innobase/pars/pars0grm.h
deleted file mode 100644
index 0062b8314ee..00000000000
--- a/storage/innobase/pars/pars0grm.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* A Bison parser, made by GNU Bison 1.875d. */
-
-/* Skeleton parser for Yacc-like parsing with Bison,
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* As a special exception, when this file is copied by Bison into a
- Bison output file, you may use that output file without restriction.
- This special exception was added by the Free Software Foundation
- in version 1.24 of Bison. */
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- NEG = 350
- };
-#endif
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define NEG 350
-
-
-
-
-#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-extern YYSTYPE yylval;
-
-
-
diff --git a/storage/innobase/pars/pars0grm.y b/storage/innobase/pars/pars0grm.y
index a07be9975a1..14d64f1826f 100644
--- a/storage/innobase/pars/pars0grm.y
+++ b/storage/innobase/pars/pars0grm.y
@@ -1,13 +1,28 @@
-/******************************************************
-SQL parser: input file for the GNU Bison parser generator
+/*****************************************************************************
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-Created 12/14/1997 Heikki Tuuri
-Published under the GPL version 2
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************
+SQL parser: input file for the GNU Bison parser generator
Look from pars0lex.l for instructions how to generate the C files for
the InnoDB parser.
+
+Created 12/14/1997 Heikki Tuuri
*******************************************************/
%{
diff --git a/storage/innobase/pars/pars0lex.l b/storage/innobase/pars/pars0lex.l
index ad65034fab0..55ed17f82e1 100644
--- a/storage/innobase/pars/pars0lex.l
+++ b/storage/innobase/pars/pars0lex.l
@@ -1,10 +1,23 @@
-/******************************************************
-SQL parser lexical analyzer: input file for the GNU Flex lexer generator
+/*****************************************************************************
-(c) 1997 Innobase Oy
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-Created 12/14/1997 Heikki Tuuri
-Published under the GPL version 2
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************
+SQL parser lexical analyzer: input file for the GNU Flex lexer generator
The InnoDB parser is frozen because MySQL takes care of SQL parsing.
Therefore we normally keep the InnoDB parser C files as they are, and do
@@ -18,6 +31,8 @@ How to make the InnoDB parser and lexer C files:
These instructions seem to work at least with bison-1.875d and flex-2.5.31 on
Linux.
+
+Created 12/14/1997 Heikki Tuuri
*******************************************************/
%option nostdinit
@@ -55,13 +70,13 @@ Linux.
static ulint stringbuf_len_alloc = 0; /* Allocated length */
static ulint stringbuf_len = 0; /* Current length */
static char* stringbuf; /* Start of buffer */
-/* Appends a string to the buffer. */
+/** Appends a string to the buffer. */
static
void
string_append(
/*==========*/
- const char* str, /* in: string to be appended */
- ulint len) /* in: length of the string */
+ const char* str, /*!< in: string to be appended */
+ ulint len) /*!< in: length of the string */
{
if (stringbuf == NULL) {
stringbuf = malloc(1);
@@ -646,3 +661,16 @@ In the state 'id', only two actions are possible (defined below). */
}
%%
+
+/**********************************************************************
+Release any resources used by the lexer. */
+UNIV_INTERN
+void
+pars_lexer_close(void)
+/*==================*/
+{
+ yylex_destroy();
+ free(stringbuf);
+ stringbuf = NULL;
+ stringbuf_len_alloc = stringbuf_len = 0;
+}
diff --git a/storage/innobase/pars/pars0opt.c b/storage/innobase/pars/pars0opt.c
index 2abe6720235..2e392ba4836 100644
--- a/storage/innobase/pars/pars0opt.c
+++ b/storage/innobase/pars/pars0opt.c
@@ -1,7 +1,24 @@
-/******************************************************
-Simple SQL optimizer
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1997 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file pars/pars0opt.c
+Simple SQL optimizer
Created 12/21/1997 Heikki Tuuri
*******************************************************/
@@ -31,15 +48,14 @@ Created 12/21/1997 Heikki Tuuri
#define OPT_SCROLL_COND 4
-/***********************************************************************
-Inverts a comparison operator. */
+/*******************************************************************//**
+Inverts a comparison operator.
+@return the equivalent operator when the order of the arguments is switched */
static
int
opt_invert_cmp_op(
/*==============*/
- /* out: the equivalent operator when the order of
- the arguments is switched */
- int op) /* in: operator */
+ int op) /*!< in: operator */
{
if (op == '<') {
return('>');
@@ -58,18 +74,18 @@ opt_invert_cmp_op(
return(0);
}
-/***********************************************************************
+/*******************************************************************//**
Checks if the value of an expression can be calculated BEFORE the nth table
in a join is accessed. If this is the case, it can possibly be used in an
-index search for the nth table. */
+index search for the nth table.
+@return TRUE if already determined */
static
ibool
opt_check_exp_determined_before(
/*============================*/
- /* out: TRUE if already determined */
- que_node_t* exp, /* in: expression */
- sel_node_t* sel_node, /* in: select node */
- ulint nth_table) /* in: nth table will be accessed */
+ que_node_t* exp, /*!< in: expression */
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint nth_table) /*!< in: nth table will be accessed */
{
func_node_t* func_node;
sym_node_t* sym_node;
@@ -118,24 +134,22 @@ opt_check_exp_determined_before(
return(FALSE);
}
-/***********************************************************************
+/*******************************************************************//**
Looks in a comparison condition if a column value is already restricted by
-it BEFORE the nth table is accessed. */
+it BEFORE the nth table is accessed.
+@return expression restricting the value of the column, or NULL if not known */
static
que_node_t*
opt_look_for_col_in_comparison_before(
/*==================================*/
- /* out: expression restricting the
- value of the column, or NULL if not
- known */
- ulint cmp_type, /* in: OPT_EQUAL, OPT_COMPARISON */
- ulint col_no, /* in: column number */
- func_node_t* search_cond, /* in: comparison condition */
- sel_node_t* sel_node, /* in: select node */
- ulint nth_table, /* in: nth table in a join (a query
+ ulint cmp_type, /*!< in: OPT_EQUAL, OPT_COMPARISON */
+ ulint col_no, /*!< in: column number */
+ func_node_t* search_cond, /*!< in: comparison condition */
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint nth_table, /*!< in: nth table in a join (a query
from a single table is considered a
join of 1 table) */
- ulint* op) /* out: comparison operator ('=',
+ ulint* op) /*!< out: comparison operator ('=',
PARS_GE_TOKEN, ... ); this is inverted
if the column appears on the right
side */
@@ -215,26 +229,24 @@ opt_look_for_col_in_comparison_before(
return(NULL);
}
-/***********************************************************************
+/*******************************************************************//**
Looks in a search condition if a column value is already restricted by the
search condition BEFORE the nth table is accessed. Takes into account that
if we will fetch in an ascending order, we cannot utilize an upper limit for
-a column value; in a descending order, respectively, a lower limit. */
+a column value; in a descending order, respectively, a lower limit.
+@return expression restricting the value of the column, or NULL if not known */
static
que_node_t*
opt_look_for_col_in_cond_before(
/*============================*/
- /* out: expression restricting the
- value of the column, or NULL if not
- known */
- ulint cmp_type, /* in: OPT_EQUAL, OPT_COMPARISON */
- ulint col_no, /* in: column number */
- func_node_t* search_cond, /* in: search condition or NULL */
- sel_node_t* sel_node, /* in: select node */
- ulint nth_table, /* in: nth table in a join (a query
+ ulint cmp_type, /*!< in: OPT_EQUAL, OPT_COMPARISON */
+ ulint col_no, /*!< in: column number */
+ func_node_t* search_cond, /*!< in: search condition or NULL */
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint nth_table, /*!< in: nth table in a join (a query
from a single table is considered a
join of 1 table) */
- ulint* op) /* out: comparison operator ('=',
+ ulint* op) /*!< out: comparison operator ('=',
PARS_GE_TOKEN, ... ) */
{
func_node_t* new_cond;
@@ -293,24 +305,24 @@ opt_look_for_col_in_cond_before(
return(exp);
}
-/***********************************************************************
+/*******************************************************************//**
Calculates the goodness for an index according to a select node. The
goodness is 4 times the number of first fields in index whose values we
already know exactly in the query. If we have a comparison condition for
an additional field, 2 point are added. If the index is unique, and we know
all the unique fields for the index we add 1024 points. For a clustered index
-we add 1 point. */
+we add 1 point.
+@return goodness */
static
ulint
opt_calc_index_goodness(
/*====================*/
- /* out: goodness */
- dict_index_t* index, /* in: index */
- sel_node_t* sel_node, /* in: parsed select node */
- ulint nth_table, /* in: nth table in a join */
- que_node_t** index_plan, /* in/out: comparison expressions for
+ dict_index_t* index, /*!< in: index */
+ sel_node_t* sel_node, /*!< in: parsed select node */
+ ulint nth_table, /*!< in: nth table in a join */
+ que_node_t** index_plan, /*!< in/out: comparison expressions for
this index */
- ulint* last_op) /* out: last comparison operator, if
+ ulint* last_op) /*!< out: last comparison operator, if
goodness > 1 */
{
que_node_t* exp;
@@ -362,14 +374,14 @@ opt_calc_index_goodness(
if (goodness >= 4 * dict_index_get_n_unique(index)) {
goodness += 1024;
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
goodness += 1024;
}
}
/* We have to test for goodness here, as last_op may note be set */
- if (goodness && index->type & DICT_CLUSTERED) {
+ if (goodness && dict_index_is_clust(index)) {
goodness++;
}
@@ -377,30 +389,29 @@ opt_calc_index_goodness(
return(goodness);
}
-/***********************************************************************
-Calculates the number of matched fields based on an index goodness. */
+/*******************************************************************//**
+Calculates the number of matched fields based on an index goodness.
+@return number of excatly or partially matched fields */
UNIV_INLINE
ulint
opt_calc_n_fields_from_goodness(
/*============================*/
- /* out: number of excatly or partially matched
- fields */
- ulint goodness) /* in: goodness */
+ ulint goodness) /*!< in: goodness */
{
return(((goodness % 1024) + 2) / 4);
}
-/***********************************************************************
+/*******************************************************************//**
Converts a comparison operator to the corresponding search mode PAGE_CUR_GE,
-... */
+...
+@return search mode */
UNIV_INLINE
ulint
opt_op_to_search_mode(
/*==================*/
- /* out: search mode */
- ibool asc, /* in: TRUE if the rows should be fetched in an
+ ibool asc, /*!< in: TRUE if the rows should be fetched in an
ascending order */
- ulint op) /* in: operator '=', PARS_GE_TOKEN, ... */
+ ulint op) /*!< in: operator '=', PARS_GE_TOKEN, ... */
{
if (op == '=') {
if (asc) {
@@ -427,15 +438,15 @@ opt_op_to_search_mode(
return(0);
}
-/***********************************************************************
-Determines if a node is an argument node of a function node. */
+/*******************************************************************//**
+Determines if a node is an argument node of a function node.
+@return TRUE if is an argument */
static
ibool
opt_is_arg(
/*=======*/
- /* out: TRUE if is an argument */
- que_node_t* arg_node, /* in: possible argument node */
- func_node_t* func_node) /* in: function node */
+ que_node_t* arg_node, /*!< in: possible argument node */
+ func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg;
@@ -453,7 +464,7 @@ opt_is_arg(
return(FALSE);
}
-/***********************************************************************
+/*******************************************************************//**
Decides if the fetching of rows should be made in a descending order, and
also checks that the chosen query plan produces a result which satisfies
the order-by. */
@@ -461,7 +472,7 @@ static
void
opt_check_order_by(
/*===============*/
- sel_node_t* sel_node) /* in: select node; asserts an error
+ sel_node_t* sel_node) /*!< in: select node; asserts an error
if the plan does not agree with the
order-by */
{
@@ -505,7 +516,7 @@ opt_check_order_by(
}
}
-/***********************************************************************
+/*******************************************************************//**
Optimizes a select. Decides which indexes to tables to use. The tables
are accessed in the order that they were written to the FROM part in the
select statement. */
@@ -513,9 +524,9 @@ static
void
opt_search_plan_for_table(
/*======================*/
- sel_node_t* sel_node, /* in: parsed select node */
- ulint i, /* in: this is the ith table */
- dict_table_t* table) /* in: table */
+ sel_node_t* sel_node, /*!< in: parsed select node */
+ ulint i, /*!< in: this is the ith table */
+ dict_table_t* table) /*!< in: table */
{
plan_t* plan;
dict_index_t* index;
@@ -587,7 +598,7 @@ opt_search_plan_for_table(
best_last_op);
}
- if ((best_index->type & DICT_CLUSTERED)
+ if (dict_index_is_clust(best_index)
&& (plan->n_exact_match >= dict_index_get_n_unique(best_index))) {
plan->unique_search = TRUE;
@@ -601,22 +612,19 @@ opt_search_plan_for_table(
btr_pcur_init(&(plan->clust_pcur));
}
-/***********************************************************************
+/*******************************************************************//**
Looks at a comparison condition and decides if it can, and need, be tested for
-a table AFTER the table has been accessed. */
+a table AFTER the table has been accessed.
+@return OPT_NOT_COND if not for this table, else OPT_END_COND,
+OPT_TEST_COND, or OPT_SCROLL_COND, where the last means that the
+condition need not be tested, except when scroll cursors are used */
static
ulint
opt_classify_comparison(
/*====================*/
- /* out: OPT_NOT_COND if not for this
- table, else OPT_END_COND,
- OPT_TEST_COND, or OPT_SCROLL_COND,
- where the last means that the
- condition need not be tested, except
- when scroll cursors are used */
- sel_node_t* sel_node, /* in: select node */
- ulint i, /* in: ith table in the join */
- func_node_t* cond) /* in: comparison condition */
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint i, /*!< in: ith table in the join */
+ func_node_t* cond) /*!< in: comparison condition */
{
plan_t* plan;
ulint n_fields;
@@ -697,15 +705,15 @@ opt_classify_comparison(
return(OPT_TEST_COND);
}
-/***********************************************************************
+/*******************************************************************//**
Recursively looks for test conditions for a table in a join. */
static
void
opt_find_test_conds(
/*================*/
- sel_node_t* sel_node, /* in: select node */
- ulint i, /* in: ith table in the join */
- func_node_t* cond) /* in: conjunction of search
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint i, /*!< in: ith table in the join */
+ func_node_t* cond) /*!< in: conjunction of search
conditions or NULL */
{
func_node_t* new_cond;
@@ -742,7 +750,7 @@ opt_find_test_conds(
}
}
-/***********************************************************************
+/*******************************************************************//**
Normalizes a list of comparison conditions so that a column of the table
appears on the left side of the comparison if possible. This is accomplished
by switching the arguments of the operator. */
@@ -750,9 +758,9 @@ static
void
opt_normalize_cmp_conds(
/*====================*/
- func_node_t* cond, /* in: first in a list of comparison
+ func_node_t* cond, /*!< in: first in a list of comparison
conditions, or NULL */
- dict_table_t* table) /* in: table */
+ dict_table_t* table) /*!< in: table */
{
que_node_t* arg1;
que_node_t* arg2;
@@ -784,7 +792,7 @@ opt_normalize_cmp_conds(
}
}
-/***********************************************************************
+/*******************************************************************//**
Finds out the search condition conjuncts we can, and need, to test as the ith
table in a join is accessed. The search tuple can eliminate the need to test
some conjuncts. */
@@ -792,8 +800,8 @@ static
void
opt_determine_and_normalize_test_conds(
/*===================================*/
- sel_node_t* sel_node, /* in: select node */
- ulint i) /* in: ith table in the join */
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint i) /*!< in: ith table in the join */
{
plan_t* plan;
@@ -812,24 +820,24 @@ opt_determine_and_normalize_test_conds(
ut_a(UT_LIST_GET_LEN(plan->end_conds) >= plan->n_exact_match);
}
-/***********************************************************************
+/*******************************************************************//**
Looks for occurrences of the columns of the table in the query subgraph and
adds them to the list of columns if an occurrence of the same column does not
already exist in the list. If the column is already in the list, puts a value
indirection to point to the occurrence in the column list, except if the
column occurrence we are looking at is in the column list, in which case
nothing is done. */
-
+UNIV_INTERN
void
opt_find_all_cols(
/*==============*/
- ibool copy_val, /* in: if TRUE, new found columns are
+ ibool copy_val, /*!< in: if TRUE, new found columns are
added as columns to copy */
- dict_index_t* index, /* in: index of the table to use */
- sym_node_list_t* col_list, /* in: base node of a list where
+ dict_index_t* index, /*!< in: index of the table to use */
+ sym_node_list_t* col_list, /*!< in: base node of a list where
to add new found columns */
- plan_t* plan, /* in: plan or NULL */
- que_node_t* exp) /* in: expression or condition or
+ plan_t* plan, /*!< in: plan or NULL */
+ que_node_t* exp) /*!< in: expression or condition or
NULL */
{
func_node_t* func_node;
@@ -906,7 +914,7 @@ opt_find_all_cols(
sym_node->field_nos[SYM_CLUST_FIELD_NO] = dict_index_get_nth_col_pos(
dict_table_get_first_index(index->table), sym_node->col_no);
- if (!(index->type & DICT_CLUSTERED)) {
+ if (!dict_index_is_clust(index)) {
ut_a(plan);
@@ -921,7 +929,7 @@ opt_find_all_cols(
}
}
-/***********************************************************************
+/*******************************************************************//**
Looks for occurrences of the columns of the table in conditions which are
not yet determined AFTER the join operation has fetched a row in the ith
table. The values for these column must be copied to dynamic memory for
@@ -930,9 +938,9 @@ static
void
opt_find_copy_cols(
/*===============*/
- sel_node_t* sel_node, /* in: select node */
- ulint i, /* in: ith table in the join */
- func_node_t* search_cond) /* in: search condition or NULL */
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint i, /*!< in: ith table in the join */
+ func_node_t* search_cond) /*!< in: search condition or NULL */
{
func_node_t* new_cond;
plan_t* plan;
@@ -969,7 +977,7 @@ opt_find_copy_cols(
}
}
-/***********************************************************************
+/*******************************************************************//**
Classifies the table columns according to whether we use the column only while
holding the latch on the page, or whether we have to copy the column value to
dynamic memory. Puts the first occurrence of a column to either list in the
@@ -978,8 +986,8 @@ static
void
opt_classify_cols(
/*==============*/
- sel_node_t* sel_node, /* in: select node */
- ulint i) /* in: ith table in the join */
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint i) /*!< in: ith table in the join */
{
plan_t* plan;
que_node_t* exp;
@@ -1013,15 +1021,15 @@ opt_classify_cols(
sel_node->search_cond);
}
-/***********************************************************************
+/*******************************************************************//**
Fills in the info in plan which is used in accessing a clustered index
record. The columns must already be classified for the plan node. */
static
void
opt_clust_access(
/*=============*/
- sel_node_t* sel_node, /* in: select node */
- ulint n) /* in: nth table in select */
+ sel_node_t* sel_node, /*!< in: select node */
+ ulint n) /*!< in: nth table in select */
{
plan_t* plan;
dict_table_t* table;
@@ -1041,7 +1049,7 @@ opt_clust_access(
plan->no_prefetch = FALSE;
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
plan->clust_map = NULL;
plan->clust_ref = NULL;
@@ -1085,15 +1093,15 @@ opt_clust_access(
}
}
-/***********************************************************************
+/*******************************************************************//**
Optimizes a select. Decides which indexes to tables to use. The tables
are accessed in the order that they were written to the FROM part in the
select statement. */
-
+UNIV_INTERN
void
opt_search_plan(
/*============*/
- sel_node_t* sel_node) /* in: parsed select node */
+ sel_node_t* sel_node) /*!< in: parsed select node */
{
sym_node_t* table_node;
dict_table_t* table;
@@ -1160,13 +1168,13 @@ opt_search_plan(
#endif
}
-/************************************************************************
+/********************************************************************//**
Prints info of a query plan. */
-
+UNIV_INTERN
void
opt_print_query_plan(
/*=================*/
- sel_node_t* sel_node) /* in: select node */
+ sel_node_t* sel_node) /*!< in: select node */
{
plan_t* plan;
ulint n_fields;
diff --git a/storage/innobase/pars/pars0pars.c b/storage/innobase/pars/pars0pars.c
index 89f6f862995..9faf36d00a8 100644
--- a/storage/innobase/pars/pars0pars.c
+++ b/storage/innobase/pars/pars0pars.c
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1996 Innobase Oy
+/**************************************************//**
+@file pars/pars0pars.c
+SQL parser
Created 11/19/1996 Heikki Tuuri
*******************************************************/
@@ -32,114 +49,107 @@ on 1/27/1998 */
#include "eval0eval.h"
#ifdef UNIV_SQL_DEBUG
-/* If the following is set TRUE, the lexer will print the SQL string
+/** If the following is set TRUE, the lexer will print the SQL string
as it tokenizes it */
-
-ibool pars_print_lexed = FALSE;
+UNIV_INTERN ibool pars_print_lexed = FALSE;
#endif /* UNIV_SQL_DEBUG */
/* Global variable used while parsing a single procedure or query : the code is
NOT re-entrant */
-sym_tab_t* pars_sym_tab_global;
+UNIV_INTERN sym_tab_t* pars_sym_tab_global;
/* Global variables used to denote certain reserved words, used in
constructing the parsing tree */
-pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN};
-pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
-pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
-pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
-pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN};
-pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN};
-pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN};
-pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN};
-pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN};
-pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN};
-pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN};
-pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN};
-pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN};
-pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN};
-pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN};
-pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN};
-pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN};
-pars_res_word_t pars_binary_token = {PARS_BINARY_TOKEN};
-pars_res_word_t pars_blob_token = {PARS_BLOB_TOKEN};
-pars_res_word_t pars_int_token = {PARS_INT_TOKEN};
-pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN};
-pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN};
-pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN};
-pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN};
-pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN};
-pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN};
-pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN};
-pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN};
-pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN};
-pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN};
-
-/* Global variable used to denote the '*' in SELECT * FROM.. */
-#define PARS_STAR_DENOTER 12345678
-ulint pars_star_denoter = PARS_STAR_DENOTER;
-
-
-/*************************************************************************
-Determines the class of a function code. */
+UNIV_INTERN pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN};
+UNIV_INTERN pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
+UNIV_INTERN pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
+UNIV_INTERN pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
+UNIV_INTERN pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN};
+UNIV_INTERN pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN};
+UNIV_INTERN pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN};
+UNIV_INTERN pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN};
+UNIV_INTERN pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN};
+UNIV_INTERN pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN};
+UNIV_INTERN pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN};
+UNIV_INTERN pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN};
+UNIV_INTERN pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN};
+UNIV_INTERN pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN};
+UNIV_INTERN pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN};
+UNIV_INTERN pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN};
+UNIV_INTERN pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN};
+UNIV_INTERN pars_res_word_t pars_binary_token = {PARS_BINARY_TOKEN};
+UNIV_INTERN pars_res_word_t pars_blob_token = {PARS_BLOB_TOKEN};
+UNIV_INTERN pars_res_word_t pars_int_token = {PARS_INT_TOKEN};
+UNIV_INTERN pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN};
+UNIV_INTERN pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN};
+UNIV_INTERN pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN};
+UNIV_INTERN pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN};
+UNIV_INTERN pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN};
+UNIV_INTERN pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN};
+UNIV_INTERN pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN};
+UNIV_INTERN pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN};
+UNIV_INTERN pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN};
+UNIV_INTERN pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN};
+
+/** Global variable used to denote the '*' in SELECT * FROM.. */
+UNIV_INTERN ulint pars_star_denoter = 12345678;
+
+
+/*********************************************************************//**
+Determines the class of a function code.
+@return function class: PARS_FUNC_ARITH, ... */
static
ulint
pars_func_get_class(
/*================*/
- /* out: function class: PARS_FUNC_ARITH, ... */
- int func) /* in: function code: '=', PARS_GE_TOKEN, ... */
+ int func) /*!< in: function code: '=', PARS_GE_TOKEN, ... */
{
- if ((func == '+') || (func == '-') || (func == '*') || (func == '/')) {
-
+ switch (func) {
+ case '+': case '-': case '*': case '/':
return(PARS_FUNC_ARITH);
- } else if ((func == '=') || (func == '<') || (func == '>')
- || (func == PARS_GE_TOKEN) || (func == PARS_LE_TOKEN)
- || (func == PARS_NE_TOKEN)) {
-
+ case '=': case '<': case '>':
+ case PARS_GE_TOKEN: case PARS_LE_TOKEN: case PARS_NE_TOKEN:
return(PARS_FUNC_CMP);
- } else if ((func == PARS_AND_TOKEN) || (func == PARS_OR_TOKEN)
- || (func == PARS_NOT_TOKEN)) {
-
+ case PARS_AND_TOKEN: case PARS_OR_TOKEN: case PARS_NOT_TOKEN:
return(PARS_FUNC_LOGICAL);
- } else if ((func == PARS_COUNT_TOKEN) || (func == PARS_SUM_TOKEN)) {
-
+ case PARS_COUNT_TOKEN: case PARS_SUM_TOKEN:
return(PARS_FUNC_AGGREGATE);
- } else if ((func == PARS_TO_CHAR_TOKEN)
- || (func == PARS_TO_NUMBER_TOKEN)
- || (func == PARS_TO_BINARY_TOKEN)
- || (func == PARS_BINARY_TO_NUMBER_TOKEN)
- || (func == PARS_SUBSTR_TOKEN)
- || (func == PARS_CONCAT_TOKEN)
- || (func == PARS_LENGTH_TOKEN)
- || (func == PARS_INSTR_TOKEN)
- || (func == PARS_SYSDATE_TOKEN)
- || (func == PARS_NOTFOUND_TOKEN)
- || (func == PARS_PRINTF_TOKEN)
- || (func == PARS_ASSERT_TOKEN)
- || (func == PARS_RND_TOKEN)
- || (func == PARS_RND_STR_TOKEN)
- || (func == PARS_REPLSTR_TOKEN)) {
-
+ case PARS_TO_CHAR_TOKEN:
+ case PARS_TO_NUMBER_TOKEN:
+ case PARS_TO_BINARY_TOKEN:
+ case PARS_BINARY_TO_NUMBER_TOKEN:
+ case PARS_SUBSTR_TOKEN:
+ case PARS_CONCAT_TOKEN:
+ case PARS_LENGTH_TOKEN:
+ case PARS_INSTR_TOKEN:
+ case PARS_SYSDATE_TOKEN:
+ case PARS_NOTFOUND_TOKEN:
+ case PARS_PRINTF_TOKEN:
+ case PARS_ASSERT_TOKEN:
+ case PARS_RND_TOKEN:
+ case PARS_RND_STR_TOKEN:
+ case PARS_REPLSTR_TOKEN:
return(PARS_FUNC_PREDEFINED);
- } else {
+
+ default:
return(PARS_FUNC_OTHER);
}
}
-/*************************************************************************
-Parses an operator or predefined function expression. */
+/*********************************************************************//**
+Parses an operator or predefined function expression.
+@return own: function node in a query tree */
static
func_node_t*
pars_func_low(
/*==========*/
- /* out, own: function node in a query tree */
- int func, /* in: function token code */
- que_node_t* arg) /* in: first argument in the argument list */
+ int func, /*!< in: function token code */
+ que_node_t* arg) /*!< in: first argument in the argument list */
{
func_node_t* node;
@@ -160,29 +170,29 @@ pars_func_low(
return(node);
}
-/*************************************************************************
-Parses a function expression. */
-
+/*********************************************************************//**
+Parses a function expression.
+@return own: function node in a query tree */
+UNIV_INTERN
func_node_t*
pars_func(
/*======*/
- /* out, own: function node in a query tree */
- que_node_t* res_word,/* in: function name reserved word */
- que_node_t* arg) /* in: first argument in the argument list */
+ que_node_t* res_word,/*!< in: function name reserved word */
+ que_node_t* arg) /*!< in: first argument in the argument list */
{
return(pars_func_low(((pars_res_word_t*)res_word)->code, arg));
}
-/*************************************************************************
-Parses an operator expression. */
-
+/*********************************************************************//**
+Parses an operator expression.
+@return own: function node in a query tree */
+UNIV_INTERN
func_node_t*
pars_op(
/*====*/
- /* out, own: function node in a query tree */
- int func, /* in: operator token code */
- que_node_t* arg1, /* in: first argument */
- que_node_t* arg2) /* in: second argument or NULL for an unary
+ int func, /*!< in: operator token code */
+ que_node_t* arg1, /*!< in: first argument */
+ que_node_t* arg2) /*!< in: second argument or NULL for an unary
operator */
{
que_node_list_add_last(NULL, arg1);
@@ -194,15 +204,15 @@ pars_op(
return(pars_func_low(func, arg1));
}
-/*************************************************************************
-Parses an ORDER BY clause. Order by a single column only is supported. */
-
+/*********************************************************************//**
+Parses an ORDER BY clause. Order by a single column only is supported.
+@return own: order-by node in a query tree */
+UNIV_INTERN
order_node_t*
pars_order_by(
/*==========*/
- /* out, own: order-by node in a query tree */
- sym_node_t* column, /* in: column name */
- pars_res_word_t* asc) /* in: &pars_asc_token or pars_desc_token */
+ sym_node_t* column, /*!< in: column name */
+ pars_res_word_t* asc) /*!< in: &pars_asc_token or pars_desc_token */
{
order_node_t* node;
@@ -222,28 +232,43 @@ pars_order_by(
return(node);
}
-/*************************************************************************
+/*********************************************************************//**
+Determine if a data type is a built-in string data type of the InnoDB
+SQL parser.
+@return TRUE if string data type */
+static
+ibool
+pars_is_string_type(
+/*================*/
+ ulint mtype) /*!< in: main data type */
+{
+ switch (mtype) {
+ case DATA_VARCHAR: case DATA_CHAR:
+ case DATA_FIXBINARY: case DATA_BINARY:
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
Resolves the data type of a function in an expression. The argument data
types must already be resolved. */
static
void
pars_resolve_func_data_type(
/*========================*/
- func_node_t* node) /* in: function node */
+ func_node_t* node) /*!< in: function node */
{
que_node_t* arg;
- ulint func;
ut_a(que_node_get_type(node) == QUE_NODE_FUNC);
arg = node->args;
- func = node->func;
-
- if ((func == PARS_SUM_TOKEN)
- || (func == '+') || (func == '-') || (func == '*')
- || (func == '/') || (func == '+')) {
-
+ switch (node->func) {
+ case PARS_SUM_TOKEN:
+ case '+': case '-': case '*': case '/':
/* Inherit the data type from the first argument (which must
not be the SQL null literal whose type is DATA_ERROR) */
@@ -252,15 +277,21 @@ pars_resolve_func_data_type(
ut_a(dtype_get_mtype(que_node_get_data_type(node))
== DATA_INT);
- } else if (func == PARS_COUNT_TOKEN) {
+ break;
+
+ case PARS_COUNT_TOKEN:
ut_a(arg);
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+ break;
- } else if (func == PARS_TO_CHAR_TOKEN) {
+ case PARS_TO_CHAR_TOKEN:
+ case PARS_RND_STR_TOKEN:
ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
DATA_ENGLISH, 0);
- } else if (func == PARS_TO_BINARY_TOKEN) {
+ break;
+
+ case PARS_TO_BINARY_TOKEN:
if (dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT) {
dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
DATA_ENGLISH, 0);
@@ -268,77 +299,63 @@ pars_resolve_func_data_type(
dtype_set(que_node_get_data_type(node), DATA_BINARY,
0, 0);
}
- } else if (func == PARS_TO_NUMBER_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_LENGTH_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+ break;
- } else if (func == PARS_INSTR_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
+ case PARS_TO_NUMBER_TOKEN:
+ case PARS_BINARY_TO_NUMBER_TOKEN:
+ case PARS_LENGTH_TOKEN:
+ case PARS_INSTR_TOKEN:
+ ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+ break;
- } else if (func == PARS_SYSDATE_TOKEN) {
+ case PARS_SYSDATE_TOKEN:
ut_a(arg == NULL);
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+ break;
- } else if ((func == PARS_SUBSTR_TOKEN)
- || (func == PARS_CONCAT_TOKEN)) {
-
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
+ case PARS_SUBSTR_TOKEN:
+ case PARS_CONCAT_TOKEN:
+ ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
DATA_ENGLISH, 0);
+ break;
- } else if ((func == '>') || (func == '<') || (func == '=')
- || (func == PARS_GE_TOKEN)
- || (func == PARS_LE_TOKEN)
- || (func == PARS_NE_TOKEN)
- || (func == PARS_AND_TOKEN)
- || (func == PARS_OR_TOKEN)
- || (func == PARS_NOT_TOKEN)
- || (func == PARS_NOTFOUND_TOKEN)) {
+ case '>': case '<': case '=':
+ case PARS_GE_TOKEN:
+ case PARS_LE_TOKEN:
+ case PARS_NE_TOKEN:
+ case PARS_AND_TOKEN:
+ case PARS_OR_TOKEN:
+ case PARS_NOT_TOKEN:
+ case PARS_NOTFOUND_TOKEN:
/* We currently have no iboolean type: use integer type */
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+ break;
- } else if (func == PARS_RND_TOKEN) {
+ case PARS_RND_TOKEN:
ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
-
dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
+ break;
- } else if (func == PARS_RND_STR_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
-
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- } else {
+ default:
ut_error;
}
}
-/*************************************************************************
+/*********************************************************************//**
Resolves the meaning of variables in an expression and the data types of
functions. It is an error if some identifier cannot be resolved here. */
static
void
pars_resolve_exp_variables_and_types(
/*=================================*/
- sel_node_t* select_node, /* in: select node or NULL; if
+ sel_node_t* select_node, /*!< in: select node or NULL; if
this is not NULL then the variable
sym nodes are added to the
copy_variables list of select_node */
- que_node_t* exp_node) /* in: expression */
+ que_node_t* exp_node) /*!< in: expression */
{
func_node_t* func_node;
que_node_t* arg;
@@ -417,7 +434,7 @@ pars_resolve_exp_variables_and_types(
que_node_get_data_type(node));
}
-/*************************************************************************
+/*********************************************************************//**
Resolves the meaning of variables in an expression list. It is an error if
some identifier cannot be resolved here. Resolves also the data types of
functions. */
@@ -425,8 +442,8 @@ static
void
pars_resolve_exp_list_variables_and_types(
/*======================================*/
- sel_node_t* select_node, /* in: select node or NULL */
- que_node_t* exp_node) /* in: expression list first node, or
+ sel_node_t* select_node, /*!< in: select node or NULL */
+ que_node_t* exp_node) /*!< in: expression list first node, or
NULL */
{
while (exp_node) {
@@ -436,14 +453,14 @@ pars_resolve_exp_list_variables_and_types(
}
}
-/*************************************************************************
+/*********************************************************************//**
Resolves the columns in an expression. */
static
void
pars_resolve_exp_columns(
/*=====================*/
- sym_node_t* table_node, /* in: first node in a table list */
- que_node_t* exp_node) /* in: expression */
+ sym_node_t* table_node, /*!< in: first node in a table list */
+ que_node_t* exp_node) /*!< in: expression */
{
func_node_t* func_node;
que_node_t* arg;
@@ -517,14 +534,14 @@ pars_resolve_exp_columns(
}
}
-/*************************************************************************
+/*********************************************************************//**
Resolves the meaning of columns in an expression list. */
static
void
pars_resolve_exp_list_columns(
/*==========================*/
- sym_node_t* table_node, /* in: first node in a table list */
- que_node_t* exp_node) /* in: expression list first node, or
+ sym_node_t* table_node, /*!< in: first node in a table list */
+ que_node_t* exp_node) /*!< in: expression list first node, or
NULL */
{
while (exp_node) {
@@ -534,13 +551,13 @@ pars_resolve_exp_list_columns(
}
}
-/*************************************************************************
+/*********************************************************************//**
Retrieves the table definition for a table name id. */
static
void
pars_retrieve_table_def(
/*====================*/
- sym_node_t* sym_node) /* in: table node */
+ sym_node_t* sym_node) /*!< in: table node */
{
const char* table_name;
@@ -557,14 +574,14 @@ pars_retrieve_table_def(
ut_a(sym_node->table);
}
-/*************************************************************************
-Retrieves the table definitions for a list of table name ids. */
+/*********************************************************************//**
+Retrieves the table definitions for a list of table name ids.
+@return number of tables */
static
ulint
pars_retrieve_table_list_defs(
/*==========================*/
- /* out: number of tables */
- sym_node_t* sym_node) /* in: first table node in list */
+ sym_node_t* sym_node) /*!< in: first table node in list */
{
ulint count = 0;
@@ -584,13 +601,13 @@ pars_retrieve_table_list_defs(
return(count);
}
-/*************************************************************************
+/*********************************************************************//**
Adds all columns to the select list if the query is SELECT * FROM ... */
static
void
pars_select_all_columns(
/*====================*/
- sel_node_t* select_node) /* in: select node already containing
+ sel_node_t* select_node) /*!< in: select node already containing
the table list */
{
sym_node_t* col_node;
@@ -621,17 +638,16 @@ pars_select_all_columns(
}
}
-/*************************************************************************
+/*********************************************************************//**
Parses a select list; creates a query graph node for the whole SELECT
-statement. */
-
+statement.
+@return own: select node in a query tree */
+UNIV_INTERN
sel_node_t*
pars_select_list(
/*=============*/
- /* out, own: select node in a query
- tree */
- que_node_t* select_list, /* in: select list */
- sym_node_t* into_list) /* in: variables list or NULL */
+ que_node_t* select_list, /*!< in: select list */
+ sym_node_t* into_list) /*!< in: variables list or NULL */
{
sel_node_t* node;
@@ -645,14 +661,14 @@ pars_select_list(
return(node);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if the query is an aggregate query, in which case the selct list must
contain only aggregate function items. */
static
void
pars_check_aggregate(
/*=================*/
- sel_node_t* select_node) /* in: select node already containing
+ sel_node_t* select_node) /*!< in: select node already containing
the select list */
{
que_node_t* exp_node;
@@ -688,21 +704,20 @@ pars_check_aggregate(
}
}
-/*************************************************************************
-Parses a select statement. */
-
+/*********************************************************************//**
+Parses a select statement.
+@return own: select node in a query tree */
+UNIV_INTERN
sel_node_t*
pars_select_statement(
/*==================*/
- /* out, own: select node in a query
- tree */
- sel_node_t* select_node, /* in: select node already containing
+ sel_node_t* select_node, /*!< in: select node already containing
the select list */
- sym_node_t* table_list, /* in: table list */
- que_node_t* search_cond, /* in: search condition or NULL */
- pars_res_word_t* for_update, /* in: NULL or &pars_update_token */
- pars_res_word_t* lock_shared, /* in: NULL or &pars_share_token */
- order_node_t* order_by) /* in: NULL or an order-by node */
+ sym_node_t* table_list, /*!< in: table list */
+ que_node_t* search_cond, /*!< in: search condition or NULL */
+ pars_res_word_t* for_update, /*!< in: NULL or &pars_update_token */
+ pars_res_word_t* lock_shared, /*!< in: NULL or &pars_share_token */
+ order_node_t* order_by) /*!< in: NULL or an order-by node */
{
select_node->state = SEL_NODE_OPEN;
@@ -772,16 +787,16 @@ pars_select_statement(
return(select_node);
}
-/*************************************************************************
-Parses a cursor declaration. */
-
+/*********************************************************************//**
+Parses a cursor declaration.
+@return sym_node */
+UNIV_INTERN
que_node_t*
pars_cursor_declaration(
/*====================*/
- /* out: sym_node */
- sym_node_t* sym_node, /* in: cursor id node in the symbol
+ sym_node_t* sym_node, /*!< in: cursor id node in the symbol
table */
- sel_node_t* select_node) /* in: select node */
+ sel_node_t* select_node) /*!< in: select node */
{
sym_node->resolved = TRUE;
sym_node->token_type = SYM_CURSOR;
@@ -793,14 +808,14 @@ pars_cursor_declaration(
return(sym_node);
}
-/*************************************************************************
-Parses a function declaration. */
-
+/*********************************************************************//**
+Parses a function declaration.
+@return sym_node */
+UNIV_INTERN
que_node_t*
pars_function_declaration(
/*======================*/
- /* out: sym_node */
- sym_node_t* sym_node) /* in: function id node in the symbol
+ sym_node_t* sym_node) /*!< in: function id node in the symbol
table */
{
sym_node->resolved = TRUE;
@@ -813,17 +828,16 @@ pars_function_declaration(
return(sym_node);
}
-/*************************************************************************
-Parses a delete or update statement start. */
-
+/*********************************************************************//**
+Parses a delete or update statement start.
+@return own: update node in a query tree */
+UNIV_INTERN
upd_node_t*
pars_update_statement_start(
/*========================*/
- /* out, own: update node in a query
- tree */
- ibool is_delete, /* in: TRUE if delete */
- sym_node_t* table_sym, /* in: table name node */
- col_assign_node_t* col_assign_list)/* in: column assignment list, NULL
+ ibool is_delete, /*!< in: TRUE if delete */
+ sym_node_t* table_sym, /*!< in: table name node */
+ col_assign_node_t* col_assign_list)/*!< in: column assignment list, NULL
if delete */
{
upd_node_t* node;
@@ -838,15 +852,15 @@ pars_update_statement_start(
return(node);
}
-/*************************************************************************
-Parses a column assignment in an update. */
-
+/*********************************************************************//**
+Parses a column assignment in an update.
+@return column assignment node */
+UNIV_INTERN
col_assign_node_t*
pars_column_assignment(
/*===================*/
- /* out: column assignment node */
- sym_node_t* column, /* in: column to assign */
- que_node_t* exp) /* in: value to assign */
+ sym_node_t* column, /*!< in: column to assign */
+ que_node_t* exp) /*!< in: value to assign */
{
col_assign_node_t* node;
@@ -860,13 +874,13 @@ pars_column_assignment(
return(node);
}
-/*************************************************************************
+/*********************************************************************//**
Processes an update node assignment list. */
static
void
pars_process_assign_list(
/*=====================*/
- upd_node_t* node) /* in: update node */
+ upd_node_t* node) /*!< in: update node */
{
col_assign_node_t* col_assign_list;
sym_node_t* table_sym;
@@ -927,7 +941,8 @@ pars_process_assign_list(
if (!dict_col_get_fixed_size(
dict_index_get_nth_col(clust_index,
- upd_field->field_no))) {
+ upd_field->field_no),
+ dict_table_is_comp(node->table))) {
changes_field_size = 0;
}
@@ -946,18 +961,17 @@ pars_process_assign_list(
node->cmpl_info = changes_ord_field | changes_field_size;
}
-/*************************************************************************
-Parses an update or delete statement. */
-
+/*********************************************************************//**
+Parses an update or delete statement.
+@return own: update node in a query tree */
+UNIV_INTERN
upd_node_t*
pars_update_statement(
/*==================*/
- /* out, own: update node in a query
- tree */
- upd_node_t* node, /* in: update node */
- sym_node_t* cursor_sym, /* in: pointer to a cursor entry in
+ upd_node_t* node, /*!< in: update node */
+ sym_node_t* cursor_sym, /*!< in: pointer to a cursor entry in
the symbol table or NULL */
- que_node_t* search_cond) /* in: search condition or NULL */
+ que_node_t* search_cond) /*!< in: search condition or NULL */
{
sym_node_t* table_sym;
sel_node_t* sel_node;
@@ -1021,7 +1035,7 @@ pars_update_statement(
plan->no_prefetch = TRUE;
- if (!((plan->index)->type & DICT_CLUSTERED)) {
+ if (!dict_index_is_clust(plan->index)) {
plan->must_get_clust = TRUE;
@@ -1030,33 +1044,19 @@ pars_update_statement(
node->pcur = &(plan->pcur);
}
- if (!node->is_delete && node->searched_update
- && (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE)
- && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-
- /* The select node can perform the update in-place */
-
- ut_a(plan->asc);
-
- node->select_will_do_update = TRUE;
- sel_node->select_will_do_update = TRUE;
- sel_node->latch_mode = BTR_MODIFY_LEAF;
- }
-
return(node);
}
-/*************************************************************************
-Parses an insert statement. */
-
+/*********************************************************************//**
+Parses an insert statement.
+@return own: update node in a query tree */
+UNIV_INTERN
ins_node_t*
pars_insert_statement(
/*==================*/
- /* out, own: update node in a query
- tree */
- sym_node_t* table_sym, /* in: table name node */
- que_node_t* values_list, /* in: value expression list or NULL */
- sel_node_t* select) /* in: select condition or NULL */
+ sym_node_t* table_sym, /*!< in: table name node */
+ que_node_t* values_list, /*!< in: value expression list or NULL */
+ sel_node_t* select) /*!< in: select condition or NULL */
{
ins_node_t* node;
dtuple_t* row;
@@ -1104,19 +1104,19 @@ pars_insert_statement(
return(node);
}
-/*************************************************************************
+/*********************************************************************//**
Set the type of a dfield. */
static
void
pars_set_dfield_type(
/*=================*/
- dfield_t* dfield, /* in: dfield */
- pars_res_word_t* type, /* in: pointer to a type
+ dfield_t* dfield, /*!< in: dfield */
+ pars_res_word_t* type, /*!< in: pointer to a type
token */
- ulint len, /* in: length, or 0 */
- ibool is_unsigned, /* in: if TRUE, column is
+ ulint len, /*!< in: length, or 0 */
+ ibool is_unsigned, /*!< in: if TRUE, column is
UNSIGNED. */
- ibool is_not_null) /* in: if TRUE, column is
+ ibool is_not_null) /*!< in: if TRUE, column is
NOT NULL. */
{
ulint flags = 0;
@@ -1154,17 +1154,16 @@ pars_set_dfield_type(
}
}
-/*************************************************************************
-Parses a variable declaration. */
-
+/*********************************************************************//**
+Parses a variable declaration.
+@return own: symbol table node of type SYM_VAR */
+UNIV_INTERN
sym_node_t*
pars_variable_declaration(
/*======================*/
- /* out, own: symbol table node of type
- SYM_VAR */
- sym_node_t* node, /* in: symbol table node allocated for the
+ sym_node_t* node, /*!< in: symbol table node allocated for the
id of the variable */
- pars_res_word_t* type) /* in: pointer to a type token */
+ pars_res_word_t* type) /*!< in: pointer to a type token */
{
node->resolved = TRUE;
node->token_type = SYM_VAR;
@@ -1176,19 +1175,18 @@ pars_variable_declaration(
return(node);
}
-/*************************************************************************
-Parses a procedure parameter declaration. */
-
+/*********************************************************************//**
+Parses a procedure parameter declaration.
+@return own: symbol table node of type SYM_VAR */
+UNIV_INTERN
sym_node_t*
pars_parameter_declaration(
/*=======================*/
- /* out, own: symbol table node of type
- SYM_VAR */
- sym_node_t* node, /* in: symbol table node allocated for the
+ sym_node_t* node, /*!< in: symbol table node allocated for the
id of the parameter */
ulint param_type,
- /* in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type) /* in: pointer to a type token */
+ /*!< in: PARS_INPUT or PARS_OUTPUT */
+ pars_res_word_t* type) /*!< in: pointer to a type token */
{
ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT));
@@ -1199,14 +1197,14 @@ pars_parameter_declaration(
return(node);
}
-/*************************************************************************
+/*********************************************************************//**
Sets the parent field in a query node list. */
static
void
pars_set_parent_in_list(
/*====================*/
- que_node_t* node_list, /* in: first node in a list */
- que_node_t* parent) /* in: parent value to set in all
+ que_node_t* node_list, /*!< in: first node in a list */
+ que_node_t* parent) /*!< in: parent value to set in all
nodes of the list */
{
que_common_t* common;
@@ -1220,15 +1218,15 @@ pars_set_parent_in_list(
}
}
-/*************************************************************************
-Parses an elsif element. */
-
+/*********************************************************************//**
+Parses an elsif element.
+@return elsif node */
+UNIV_INTERN
elsif_node_t*
pars_elsif_element(
/*===============*/
- /* out: elsif node */
- que_node_t* cond, /* in: if-condition */
- que_node_t* stat_list) /* in: statement list */
+ que_node_t* cond, /*!< in: if-condition */
+ que_node_t* stat_list) /*!< in: statement list */
{
elsif_node_t* node;
@@ -1245,16 +1243,16 @@ pars_elsif_element(
return(node);
}
-/*************************************************************************
-Parses an if-statement. */
-
+/*********************************************************************//**
+Parses an if-statement.
+@return if-statement node */
+UNIV_INTERN
if_node_t*
pars_if_statement(
/*==============*/
- /* out: if-statement node */
- que_node_t* cond, /* in: if-condition */
- que_node_t* stat_list, /* in: statement list */
- que_node_t* else_part) /* in: else-part statement list
+ que_node_t* cond, /*!< in: if-condition */
+ que_node_t* stat_list, /*!< in: statement list */
+ que_node_t* else_part) /*!< in: else-part statement list
or elsif element list */
{
if_node_t* node;
@@ -1296,15 +1294,15 @@ pars_if_statement(
return(node);
}
-/*************************************************************************
-Parses a while-statement. */
-
+/*********************************************************************//**
+Parses a while-statement.
+@return while-statement node */
+UNIV_INTERN
while_node_t*
pars_while_statement(
/*=================*/
- /* out: while-statement node */
- que_node_t* cond, /* in: while-condition */
- que_node_t* stat_list) /* in: statement list */
+ que_node_t* cond, /*!< in: while-condition */
+ que_node_t* stat_list) /*!< in: statement list */
{
while_node_t* node;
@@ -1323,17 +1321,17 @@ pars_while_statement(
return(node);
}
-/*************************************************************************
-Parses a for-loop-statement. */
-
+/*********************************************************************//**
+Parses a for-loop-statement.
+@return for-statement node */
+UNIV_INTERN
for_node_t*
pars_for_statement(
/*===============*/
- /* out: for-statement node */
- sym_node_t* loop_var, /* in: loop variable */
- que_node_t* loop_start_limit,/* in: loop start expression */
- que_node_t* loop_end_limit, /* in: loop end expression */
- que_node_t* stat_list) /* in: statement list */
+ sym_node_t* loop_var, /*!< in: loop variable */
+ que_node_t* loop_start_limit,/*!< in: loop start expression */
+ que_node_t* loop_end_limit, /*!< in: loop end expression */
+ que_node_t* stat_list) /*!< in: statement list */
{
for_node_t* node;
@@ -1359,13 +1357,13 @@ pars_for_statement(
return(node);
}
-/*************************************************************************
-Parses an exit statement. */
-
+/*********************************************************************//**
+Parses an exit statement.
+@return exit statement node */
+UNIV_INTERN
exit_node_t*
pars_exit_statement(void)
/*=====================*/
- /* out: exit statement node */
{
exit_node_t* node;
@@ -1375,13 +1373,13 @@ pars_exit_statement(void)
return(node);
}
-/*************************************************************************
-Parses a return-statement. */
-
+/*********************************************************************//**
+Parses a return-statement.
+@return return-statement node */
+UNIV_INTERN
return_node_t*
pars_return_statement(void)
/*=======================*/
- /* out: return-statement node */
{
return_node_t* node;
@@ -1392,15 +1390,15 @@ pars_return_statement(void)
return(node);
}
-/*************************************************************************
-Parses an assignment statement. */
-
+/*********************************************************************//**
+Parses an assignment statement.
+@return assignment statement node */
+UNIV_INTERN
assign_node_t*
pars_assignment_statement(
/*======================*/
- /* out: assignment statement node */
- sym_node_t* var, /* in: variable to assign */
- que_node_t* val) /* in: value to assign */
+ sym_node_t* var, /*!< in: variable to assign */
+ que_node_t* val) /*!< in: value to assign */
{
assign_node_t* node;
@@ -1420,15 +1418,15 @@ pars_assignment_statement(
return(node);
}
-/*************************************************************************
-Parses a procedure call. */
-
+/*********************************************************************//**
+Parses a procedure call.
+@return function node */
+UNIV_INTERN
func_node_t*
pars_procedure_call(
/*================*/
- /* out: function node */
- que_node_t* res_word,/* in: procedure name reserved word */
- que_node_t* args) /* in: argument list */
+ que_node_t* res_word,/*!< in: procedure name reserved word */
+ que_node_t* args) /*!< in: argument list */
{
func_node_t* node;
@@ -1439,17 +1437,17 @@ pars_procedure_call(
return(node);
}
-/*************************************************************************
+/*********************************************************************//**
Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL. */
-
+non-NULL.
+@return fetch statement node */
+UNIV_INTERN
fetch_node_t*
pars_fetch_statement(
/*=================*/
- /* out: fetch statement node */
- sym_node_t* cursor, /* in: cursor node */
- sym_node_t* into_list, /* in: variables to set, or NULL */
- sym_node_t* user_func) /* in: user function name, or NULL */
+ sym_node_t* cursor, /*!< in: cursor node */
+ sym_node_t* into_list, /*!< in: variables to set, or NULL */
+ sym_node_t* user_func) /*!< in: user function name, or NULL */
{
sym_node_t* cursor_decl;
fetch_node_t* node;
@@ -1491,16 +1489,16 @@ pars_fetch_statement(
return(node);
}
-/*************************************************************************
-Parses an open or close cursor statement. */
-
+/*********************************************************************//**
+Parses an open or close cursor statement.
+@return fetch statement node */
+UNIV_INTERN
open_node_t*
pars_open_statement(
/*================*/
- /* out: fetch statement node */
- ulint type, /* in: ROW_SEL_OPEN_CURSOR
+ ulint type, /*!< in: ROW_SEL_OPEN_CURSOR
or ROW_SEL_CLOSE_CURSOR */
- sym_node_t* cursor) /* in: cursor node */
+ sym_node_t* cursor) /*!< in: cursor node */
{
sym_node_t* cursor_decl;
open_node_t* node;
@@ -1521,14 +1519,14 @@ pars_open_statement(
return(node);
}
-/*************************************************************************
-Parses a row_printf-statement. */
-
+/*********************************************************************//**
+Parses a row_printf-statement.
+@return row_printf-statement node */
+UNIV_INTERN
row_printf_node_t*
pars_row_printf_statement(
/*======================*/
- /* out: row_printf-statement node */
- sel_node_t* sel_node) /* in: select node */
+ sel_node_t* sel_node) /*!< in: select node */
{
row_printf_node_t* node;
@@ -1543,9 +1541,10 @@ pars_row_printf_statement(
return(node);
}
-/*************************************************************************
-Parses a commit statement. */
-
+/*********************************************************************//**
+Parses a commit statement.
+@return own: commit node struct */
+UNIV_INTERN
commit_node_t*
pars_commit_statement(void)
/*=======================*/
@@ -1553,9 +1552,10 @@ pars_commit_statement(void)
return(commit_node_create(pars_sym_tab_global->heap));
}
-/*************************************************************************
-Parses a rollback statement. */
-
+/*********************************************************************//**
+Parses a rollback statement.
+@return own: rollback node struct */
+UNIV_INTERN
roll_node_t*
pars_rollback_statement(void)
/*=========================*/
@@ -1563,22 +1563,21 @@ pars_rollback_statement(void)
return(roll_node_create(pars_sym_tab_global->heap));
}
-/*************************************************************************
-Parses a column definition at a table creation. */
-
+/*********************************************************************//**
+Parses a column definition at a table creation.
+@return column sym table node */
+UNIV_INTERN
sym_node_t*
pars_column_def(
/*============*/
- /* out: column sym table
- node */
- sym_node_t* sym_node, /* in: column node in the
+ sym_node_t* sym_node, /*!< in: column node in the
symbol table */
- pars_res_word_t* type, /* in: data type */
- sym_node_t* len, /* in: length of column, or
+ pars_res_word_t* type, /*!< in: data type */
+ sym_node_t* len, /*!< in: length of column, or
NULL */
- void* is_unsigned, /* in: if not NULL, column
+ void* is_unsigned, /*!< in: if not NULL, column
is of type UNSIGNED. */
- void* is_not_null) /* in: if not NULL, column
+ void* is_not_null) /*!< in: if not NULL, column
is of type NOT NULL. */
{
ulint len2;
@@ -1595,18 +1594,18 @@ pars_column_def(
return(sym_node);
}
-/*************************************************************************
-Parses a table creation operation. */
-
+/*********************************************************************//**
+Parses a table creation operation.
+@return table create subgraph */
+UNIV_INTERN
tab_node_t*
pars_create_table(
/*==============*/
- /* out: table create subgraph */
- sym_node_t* table_sym, /* in: table name node in the symbol
+ sym_node_t* table_sym, /*!< in: table name node in the symbol
table */
- sym_node_t* column_defs, /* in: list of column names */
+ sym_node_t* column_defs, /*!< in: list of column names */
void* not_fit_in_memory __attribute__((unused)))
- /* in: a non-NULL pointer means that
+ /*!< in: a non-NULL pointer means that
this is a table which in simulations
should be simulated as not fitting
in memory; thread is put to sleep
@@ -1620,7 +1619,7 @@ pars_create_table(
dict_table_t* table;
sym_node_t* column;
tab_node_t* node;
- dtype_t* dtype;
+ const dtype_t* dtype;
ulint n_cols;
n_cols = que_node_list_get_len(column_defs);
@@ -1657,20 +1656,20 @@ pars_create_table(
return(node);
}
-/*************************************************************************
-Parses an index creation operation. */
-
+/*********************************************************************//**
+Parses an index creation operation.
+@return index create subgraph */
+UNIV_INTERN
ind_node_t*
pars_create_index(
/*==============*/
- /* out: index create subgraph */
- pars_res_word_t* unique_def, /* in: not NULL if a unique index */
- pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */
- sym_node_t* index_sym, /* in: index name node in the symbol
+ pars_res_word_t* unique_def, /*!< in: not NULL if a unique index */
+ pars_res_word_t* clustered_def, /*!< in: not NULL if a clustered index */
+ sym_node_t* index_sym, /*!< in: index name node in the symbol
table */
- sym_node_t* table_sym, /* in: table name node in the symbol
+ sym_node_t* table_sym, /*!< in: table name node in the symbol
table */
- sym_node_t* column_list) /* in: list of column names */
+ sym_node_t* column_list) /*!< in: list of column names */
{
dict_index_t* index;
sym_node_t* column;
@@ -1714,17 +1713,17 @@ pars_create_index(
return(node);
}
-/*************************************************************************
-Parses a procedure definition. */
-
+/*********************************************************************//**
+Parses a procedure definition.
+@return query fork node */
+UNIV_INTERN
que_fork_t*
pars_procedure_definition(
/*======================*/
- /* out: query fork node */
- sym_node_t* sym_node, /* in: procedure id node in the symbol
+ sym_node_t* sym_node, /*!< in: procedure id node in the symbol
table */
- sym_node_t* param_list, /* in: parameter declaration list */
- que_node_t* stat_list) /* in: statement list */
+ sym_node_t* param_list, /*!< in: parameter declaration list */
+ que_node_t* stat_list) /*!< in: statement list */
{
proc_node_t* node;
que_fork_t* fork;
@@ -1761,32 +1760,32 @@ pars_procedure_definition(
return(fork);
}
-/*****************************************************************
+/*************************************************************//**
Parses a stored procedure call, when this is not within another stored
procedure, that is, the client issues a procedure call directly.
In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used. */
-
+parsed procedure tree, not via InnoDB SQL, so this function is not used.
+@return query graph */
+UNIV_INTERN
que_fork_t*
pars_stored_procedure_call(
/*=======================*/
- /* out: query graph */
sym_node_t* sym_node __attribute__((unused)))
- /* in: stored procedure name */
+ /*!< in: stored procedure name */
{
ut_error;
return(NULL);
}
-/*****************************************************************
+/*************************************************************//**
Retrieves characters to the lexical analyzer. */
-
+UNIV_INTERN
void
pars_get_lex_chars(
/*===============*/
- char* buf, /* in/out: buffer where to copy */
- int* result, /* out: number of characters copied or EOF */
- int max_size) /* in: maximum number of characters which fit
+ char* buf, /*!< in/out: buffer where to copy */
+ int* result, /*!< out: number of characters copied or EOF */
+ int max_size) /*!< in: maximum number of characters which fit
in the buffer */
{
int len;
@@ -1826,14 +1825,14 @@ pars_get_lex_chars(
pars_sym_tab_global->next_char_pos += len;
}
-/*****************************************************************
+/*************************************************************//**
Called by yyparse on error. */
-
+UNIV_INTERN
void
yyerror(
/*====*/
const char* s __attribute__((unused)))
- /* in: error message string */
+ /*!< in: error message string */
{
ut_ad(s);
@@ -1842,15 +1841,15 @@ yyerror(
ut_error;
}
-/*****************************************************************
-Parses an SQL string returning the query graph. */
-
+/*************************************************************//**
+Parses an SQL string returning the query graph.
+@return own: the query graph */
+UNIV_INTERN
que_t*
pars_sql(
/*=====*/
- /* out, own: the query graph */
- pars_info_t* info, /* in: extra information, or NULL */
- const char* str) /* in: SQL string */
+ pars_info_t* info, /*!< in: extra information, or NULL */
+ const char* str) /*!< in: SQL string */
{
sym_node_t* sym_node;
mem_heap_t* heap;
@@ -1891,19 +1890,19 @@ pars_sql(
return(graph);
}
-/**********************************************************************
+/******************************************************************//**
Completes a query graph by adding query thread and fork nodes
above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE. */
-
+type QUE_FORK_MYSQL_INTERFACE.
+@return query thread node to run */
+UNIV_INTERN
que_thr_t*
pars_complete_graph_for_exec(
/*=========================*/
- /* out: query thread node to run */
- que_node_t* node, /* in: root node for an incomplete
+ que_node_t* node, /*!< in: root node for an incomplete
query graph */
- trx_t* trx, /* in: transaction handle */
- mem_heap_t* heap) /* in: memory heap from which allocated */
+ trx_t* trx, /*!< in: transaction handle */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
{
que_fork_t* fork;
que_thr_t* thr;
@@ -1922,13 +1921,13 @@ pars_complete_graph_for_exec(
return(thr);
}
-/********************************************************************
-Create parser info struct.*/
-
+/****************************************************************//**
+Create parser info struct.
+@return own: info struct */
+UNIV_INTERN
pars_info_t*
pars_info_create(void)
/*==================*/
- /* out, own: info struct */
{
pars_info_t* info;
mem_heap_t* heap;
@@ -1946,29 +1945,29 @@ pars_info_create(void)
return(info);
}
-/********************************************************************
-Free info struct and everything it contains.*/
-
+/****************************************************************//**
+Free info struct and everything it contains. */
+UNIV_INTERN
void
pars_info_free(
/*===========*/
- pars_info_t* info) /* in: info struct */
+ pars_info_t* info) /*!< in, own: info struct */
{
mem_heap_free(info->heap);
}
-/********************************************************************
+/****************************************************************//**
Add bound literal. */
-
+UNIV_INTERN
void
pars_info_add_literal(
/*==================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const void* address, /* in: address */
- ulint length, /* in: length of data */
- ulint type, /* in: type, e.g. DATA_FIXBINARY */
- ulint prtype) /* in: precise type, e.g.
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const void* address, /*!< in: address */
+ ulint length, /*!< in: length of data */
+ ulint type, /*!< in: type, e.g. DATA_FIXBINARY */
+ ulint prtype) /*!< in: precise type, e.g.
DATA_UNSIGNED */
{
pars_bound_lit_t* pbl;
@@ -1990,22 +1989,22 @@ pars_info_add_literal(
ib_vector_push(info->bound_lits, pbl);
}
-/********************************************************************
+/****************************************************************//**
Equivalent to pars_info_add_literal(info, name, str, strlen(str),
DATA_VARCHAR, DATA_ENGLISH). */
-
+UNIV_INTERN
void
pars_info_add_str_literal(
/*======================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const char* str) /* in: string */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const char* str) /*!< in: string */
{
pars_info_add_literal(info, name, str, strlen(str),
DATA_VARCHAR, DATA_ENGLISH);
}
-/********************************************************************
+/****************************************************************//**
Equivalent to:
char buf[4];
@@ -2014,13 +2013,13 @@ pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-
+UNIV_INTERN
void
pars_info_add_int4_literal(
/*=======================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- lint val) /* in: value */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ lint val) /*!< in: value */
{
byte* buf = mem_heap_alloc(info->heap, 4);
@@ -2028,7 +2027,7 @@ pars_info_add_int4_literal(
pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
}
-/********************************************************************
+/****************************************************************//**
Equivalent to:
char buf[8];
@@ -2037,13 +2036,13 @@ pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
-
+UNIV_INTERN
void
pars_info_add_dulint_literal(
/*=========================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- dulint val) /* in: value */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ dulint val) /*!< in: value */
{
byte* buf = mem_heap_alloc(info->heap, 8);
@@ -2052,16 +2051,16 @@ pars_info_add_dulint_literal(
pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
}
-/********************************************************************
+/****************************************************************//**
Add user function. */
-
+UNIV_INTERN
void
pars_info_add_function(
/*===================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: function name */
- pars_user_func_cb_t func, /* in: function address */
- void* arg) /* in: user-supplied argument */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: function name */
+ pars_user_func_cb_t func, /*!< in: function address */
+ void* arg) /*!< in: user-supplied argument */
{
pars_user_func_t* puf;
@@ -2080,15 +2079,15 @@ pars_info_add_function(
ib_vector_push(info->funcs, puf);
}
-/********************************************************************
+/****************************************************************//**
Add bound id. */
-
+UNIV_INTERN
void
pars_info_add_id(
/*=============*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const char* id) /* in: id */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const char* id) /*!< in: id */
{
pars_bound_id_t* bid;
@@ -2106,16 +2105,15 @@ pars_info_add_id(
ib_vector_push(info->bound_ids, bid);
}
-/********************************************************************
-Get user function with the given name.*/
-
+/****************************************************************//**
+Get user function with the given name.
+@return user func, or NULL if not found */
+UNIV_INTERN
pars_user_func_t*
pars_info_get_user_func(
/*====================*/
- /* out: user func, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: function name to find*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name) /*!< in: function name to find*/
{
ulint i;
ib_vector_t* vec;
@@ -2137,16 +2135,15 @@ pars_info_get_user_func(
return(NULL);
}
-/********************************************************************
-Get bound literal with the given name.*/
-
+/****************************************************************//**
+Get bound literal with the given name.
+@return bound literal, or NULL if not found */
+UNIV_INTERN
pars_bound_lit_t*
pars_info_get_bound_lit(
/*====================*/
- /* out: bound literal, or NULL if
- not found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: bound literal name to find */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name) /*!< in: bound literal name to find */
{
ulint i;
ib_vector_t* vec;
@@ -2168,16 +2165,15 @@ pars_info_get_bound_lit(
return(NULL);
}
-/********************************************************************
-Get bound id with the given name.*/
-
+/****************************************************************//**
+Get bound id with the given name.
+@return bound id, or NULL if not found */
+UNIV_INTERN
pars_bound_id_t*
pars_info_get_bound_id(
/*===================*/
- /* out: bound id, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: bound id name to find */
+ pars_info_t* info, /*!< in: info struct */
+ const char* name) /*!< in: bound id name to find */
{
ulint i;
ib_vector_t* vec;
diff --git a/storage/innobase/pars/pars0sym.c b/storage/innobase/pars/pars0sym.c
index 2d56fff2d42..b56350116bb 100644
--- a/storage/innobase/pars/pars0sym.c
+++ b/storage/innobase/pars/pars0sym.c
@@ -1,7 +1,24 @@
-/******************************************************
-SQL parser symbol table
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file pars/pars0sym.c
+SQL parser symbol table
Created 12/15/1997 Heikki Tuuri
*******************************************************/
@@ -21,14 +38,14 @@ Created 12/15/1997 Heikki Tuuri
#include "eval0eval.h"
#include "row0sel.h"
-/**********************************************************************
-Creates a symbol table for a single stored procedure or query. */
-
+/******************************************************************//**
+Creates a symbol table for a single stored procedure or query.
+@return own: symbol table */
+UNIV_INTERN
sym_tab_t*
sym_tab_create(
/*===========*/
- /* out, own: symbol table */
- mem_heap_t* heap) /* in: memory heap where to create */
+ mem_heap_t* heap) /*!< in: memory heap where to create */
{
sym_tab_t* sym_tab;
@@ -42,15 +59,15 @@ sym_tab_create(
return(sym_tab);
}
-/**********************************************************************
+/******************************************************************//**
Frees the memory allocated dynamically AFTER parsing phase for variables
etc. in the symbol table. Does not free the mem heap where the table was
originally created. Frees also SQL explicit cursor definitions. */
-
+UNIV_INTERN
void
sym_tab_free_private(
/*=================*/
- sym_tab_t* sym_tab) /* in, own: symbol table */
+ sym_tab_t* sym_tab) /*!< in, own: symbol table */
{
sym_node_t* sym;
func_node_t* func;
@@ -80,15 +97,15 @@ sym_tab_free_private(
}
}
-/**********************************************************************
-Adds an integer literal to a symbol table. */
-
+/******************************************************************//**
+Adds an integer literal to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_int_lit(
/*================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- ulint val) /* in: integer value */
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ ulint val) /*!< in: integer value */
{
sym_node_t* node;
byte* data;
@@ -102,7 +119,7 @@ sym_tab_add_int_lit(
node->indirection = NULL;
- dtype_set(&(node->common.val.type), DATA_INT, 0, 4);
+ dtype_set(dfield_get_type(&node->common.val), DATA_INT, 0, 4);
data = mem_heap_alloc(sym_tab->heap, 4);
mach_write_to_4(data, val);
@@ -120,17 +137,17 @@ sym_tab_add_int_lit(
return(node);
}
-/**********************************************************************
-Adds a string literal to a symbol table. */
-
+/******************************************************************//**
+Adds a string literal to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_str_lit(
/*================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- byte* str, /* in: string with no quotes around
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ byte* str, /*!< in: string with no quotes around
it */
- ulint len) /* in: string length */
+ ulint len) /*!< in: string length */
{
sym_node_t* node;
byte* data;
@@ -144,7 +161,8 @@ sym_tab_add_str_lit(
node->indirection = NULL;
- dtype_set(&(node->common.val.type), DATA_VARCHAR, DATA_ENGLISH, 0);
+ dtype_set(dfield_get_type(&node->common.val),
+ DATA_VARCHAR, DATA_ENGLISH, 0);
if (len) {
data = mem_heap_alloc(sym_tab->heap, len);
@@ -166,16 +184,16 @@ sym_tab_add_str_lit(
return(node);
}
-/**********************************************************************
-Add a bound literal to a symbol table. */
-
+/******************************************************************//**
+Add a bound literal to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_bound_lit(
/*==================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- const char* name, /* in: name of bound literal */
- ulint* lit_type) /* out: type of literal (PARS_*_LIT) */
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ const char* name, /*!< in: name of bound literal */
+ ulint* lit_type) /*!< out: type of literal (PARS_*_LIT) */
{
sym_node_t* node;
pars_bound_lit_t* blit;
@@ -226,7 +244,8 @@ sym_tab_add_bound_lit(
ut_error;
}
- dtype_set(&(node->common.val.type), blit->type, blit->prtype, len);
+ dtype_set(dfield_get_type(&node->common.val),
+ blit->type, blit->prtype, len);
dfield_set_data(&(node->common.val), blit->address, blit->length);
@@ -241,14 +260,14 @@ sym_tab_add_bound_lit(
return(node);
}
-/**********************************************************************
-Adds an SQL null literal to a symbol table. */
-
+/******************************************************************//**
+Adds an SQL null literal to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_null_lit(
/*=================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab) /* in: symbol table */
+ sym_tab_t* sym_tab) /*!< in: symbol table */
{
sym_node_t* node;
@@ -261,9 +280,9 @@ sym_tab_add_null_lit(
node->indirection = NULL;
- node->common.val.type.mtype = DATA_ERROR;
+ dfield_get_type(&node->common.val)->mtype = DATA_ERROR;
- dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
+ dfield_set_null(&node->common.val);
node->common.val_buf_size = 0;
node->prefetch_buf = NULL;
@@ -276,16 +295,16 @@ sym_tab_add_null_lit(
return(node);
}
-/**********************************************************************
-Adds an identifier to a symbol table. */
-
+/******************************************************************//**
+Adds an identifier to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_id(
/*===========*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- byte* name, /* in: identifier name */
- ulint len) /* in: identifier length */
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ byte* name, /*!< in: identifier name */
+ ulint len) /*!< in: identifier length */
{
sym_node_t* node;
@@ -301,7 +320,7 @@ sym_tab_add_id(
UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
- dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
+ dfield_set_null(&node->common.val);
node->common.val_buf_size = 0;
node->prefetch_buf = NULL;
@@ -312,15 +331,15 @@ sym_tab_add_id(
return(node);
}
-/**********************************************************************
-Add a bound identifier to a symbol table. */
-
+/******************************************************************//**
+Add a bound identifier to a symbol table.
+@return symbol table node */
+UNIV_INTERN
sym_node_t*
sym_tab_add_bound_id(
/*===========*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- const char* name) /* in: name of bound id */
+ sym_tab_t* sym_tab, /*!< in: symbol table */
+ const char* name) /*!< in: name of bound id */
{
sym_node_t* node;
pars_bound_id_t* bid;
@@ -340,7 +359,7 @@ sym_tab_add_bound_id(
UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
- dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
+ dfield_set_null(&node->common.val);
node->common.val_buf_size = 0;
node->prefetch_buf = NULL;
diff --git a/storage/innobase/plug.in b/storage/innobase/plug.in
index f7d2abed751..09a95ecc157 100644
--- a/storage/innobase/plug.in
+++ b/storage/innobase/plug.in
@@ -1,18 +1,30 @@
+#
+# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
MYSQL_STORAGE_ENGINE(innobase, innodb, [InnoDB Storage Engine],
[Transactional Tables using InnoDB], [max,max-no-ndb])
MYSQL_PLUGIN_DIRECTORY(innobase, [storage/innobase])
MYSQL_PLUGIN_STATIC(innobase, [libinnobase.a])
MYSQL_PLUGIN_DYNAMIC(innobase, [ha_innodb.la])
MYSQL_PLUGIN_ACTIONS(innobase, [
- AC_CHECK_LIB(rt, aio_read, [innodb_system_libs="-lrt"])
- AC_SUBST(innodb_system_libs)
- AC_CHECK_HEADERS(aio.h sched.h)
+ AC_CHECK_HEADERS(sched.h)
AC_CHECK_SIZEOF(int, 4)
AC_CHECK_SIZEOF(long, 4)
AC_CHECK_SIZEOF(void*, 4)
- AC_CHECK_FUNCS(sched_yield)
- AC_CHECK_FUNCS(fdatasync)
- AC_CHECK_FUNCS(localtime_r)
+ AC_CHECK_FUNCS(sched_yield fdatasync localtime_r)
AC_C_BIGENDIAN
case "$target_os" in
lin*)
@@ -28,7 +40,9 @@ MYSQL_PLUGIN_ACTIONS(innobase, [
*solaris*|*SunOS*)
CFLAGS="$CFLAGS -DUNIV_SOLARIS";;
esac
+
INNODB_DYNAMIC_CFLAGS="-DMYSQL_DYNAMIC_PLUGIN"
+
case "$target_cpu" in
x86_64)
# The AMD64 ABI forbids absolute addresses in shared libraries
@@ -39,6 +53,173 @@ MYSQL_PLUGIN_ACTIONS(innobase, [
;;
esac
AC_SUBST(INNODB_DYNAMIC_CFLAGS)
+
+ AC_MSG_CHECKING(whether GCC atomic builtins are available)
+ # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
+ AC_TRY_RUN(
+ [
+ int main()
+ {
+ long x;
+ long y;
+ long res;
+ char c;
+
+ x = 10;
+ y = 123;
+ res = __sync_bool_compare_and_swap(&x, x, y);
+ if (!res || x != y) {
+ return(1);
+ }
+
+ x = 10;
+ y = 123;
+ res = __sync_bool_compare_and_swap(&x, x + 1, y);
+ if (res || x != 10) {
+ return(1);
+ }
+
+ x = 10;
+ y = 123;
+ res = __sync_add_and_fetch(&x, y);
+ if (res != 123 + 10 || x != 123 + 10) {
+ return(1);
+ }
+
+ c = 10;
+ res = __sync_lock_test_and_set(&c, 123);
+ if (res != 10 || c != 123) {
+ return(1);
+ }
+
+ return(0);
+ }
+ ],
+ [
+ AC_DEFINE([HAVE_IB_GCC_ATOMIC_BUILTINS], [1],
+ [GCC atomic builtins are available])
+ AC_MSG_RESULT(yes)
+ ],
+ [
+ AC_MSG_RESULT(no)
+ ]
+ )
+
+ AC_MSG_CHECKING(whether pthread_t can be used by GCC atomic builtins)
+ # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
+ AC_TRY_RUN(
+ [
+ #include <pthread.h>
+ #include <string.h>
+
+ int main(int argc, char** argv) {
+ pthread_t x1;
+ pthread_t x2;
+ pthread_t x3;
+
+ memset(&x1, 0x0, sizeof(x1));
+ memset(&x2, 0x0, sizeof(x2));
+ memset(&x3, 0x0, sizeof(x3));
+
+ __sync_bool_compare_and_swap(&x1, x2, x3);
+
+ return(0);
+ }
+ ],
+ [
+ AC_DEFINE([HAVE_IB_ATOMIC_PTHREAD_T_GCC], [1],
+ [pthread_t can be used by GCC atomic builtins])
+ AC_MSG_RESULT(yes)
+ ],
+ [
+ AC_MSG_RESULT(no)
+ ]
+ )
+
+ AC_MSG_CHECKING(whether Solaris libc atomic functions are available)
+ # either define HAVE_IB_SOLARIS_ATOMICS or not
+ AC_CHECK_FUNCS(atomic_add_long \
+ atomic_cas_32 \
+ atomic_cas_64 \
+ atomic_cas_ulong,
+
+ AC_DEFINE([HAVE_IB_SOLARIS_ATOMICS], [1],
+ [Define to 1 if Solaris libc atomic functions \
+ are available])
+ )
+
+ AC_MSG_CHECKING(whether pthread_t can be used by Solaris libc atomic functions)
+ # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
+ AC_TRY_RUN(
+ [
+ #include <pthread.h>
+ #include <string.h>
+
+ int main(int argc, char** argv) {
+ pthread_t x1;
+ pthread_t x2;
+ pthread_t x3;
+
+ memset(&x1, 0x0, sizeof(x1));
+ memset(&x2, 0x0, sizeof(x2));
+ memset(&x3, 0x0, sizeof(x3));
+
+ if (sizeof(pthread_t) == 4) {
+
+ atomic_cas_32(&x1, x2, x3);
+
+ } else if (sizeof(pthread_t) == 8) {
+
+ atomic_cas_64(&x1, x2, x3);
+
+ } else {
+
+ return(1);
+ }
+
+ return(0);
+ }
+ ],
+ [
+ AC_DEFINE([HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS], [1],
+ [pthread_t can be used by solaris atomics])
+ AC_MSG_RESULT(yes)
+ ],
+ [
+ AC_MSG_RESULT(no)
+ ]
+ )
+
+ # this is needed to know which one of atomic_cas_32() or atomic_cas_64()
+ # to use in the source
+ AC_CHECK_SIZEOF([pthread_t], [], [#include <pthread.h>])
+
+ # Check for x86 PAUSE instruction
+ AC_MSG_CHECKING(for x86 PAUSE instruction)
+ # We have to actually try running the test program, because of a bug
+ # in Solaris on x86_64, where it wrongly reports that PAUSE is not
+ # supported when trying to run an application. See
+ # http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684
+ # We use ib_ prefix to avoid collisoins if this code is added to
+ # mysql's configure.in.
+ AC_TRY_RUN(
+ [
+ int main() {
+ __asm__ __volatile__ ("pause");
+ return(0);
+ }
+ ],
+ [
+ AC_DEFINE([HAVE_IB_PAUSE_INSTRUCTION], [1], [Does x86 PAUSE instruction exist])
+ AC_MSG_RESULT(yes)
+ ],
+ [
+ AC_MSG_RESULT(no)
+ ],
+ [
+ AC_MSG_RESULT(no)
+ ]
+ )
])
# vim: set ft=config:
diff --git a/storage/innobase/que/que0que.c b/storage/innobase/que/que0que.c
index bf83f28f04e..2fe046fa9b8 100644
--- a/storage/innobase/que/que0que.c
+++ b/storage/innobase/que/que0que.c
@@ -1,7 +1,24 @@
-/******************************************************
-Query graph
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file que/que0que.c
+Query graph
Created 5/27/1996 Heikki Tuuri
*******************************************************/
@@ -31,11 +48,11 @@ Created 5/27/1996 Heikki Tuuri
#define QUE_ROUND_ROBIN_LIMIT (64 * 256 * 256 * 256)
#define QUE_MAX_LOOPS_WITHOUT_CHECK 16
+#ifdef UNIV_DEBUG
/* If the following flag is set TRUE, the module will print trace info
of SQL execution in the UNIV_SQL_DEBUG version */
-ibool que_trace_on = FALSE;
-
-ibool que_always_false = FALSE;
+UNIV_INTERN ibool que_trace_on = FALSE;
+#endif /* UNIV_DEBUG */
/* Short introduction to query graphs
==================================
@@ -107,7 +124,7 @@ When the execution of the graph completes, it is like returning
from a subprocedure: the query thread which requested the operation
starts running again. */
-/**************************************************************************
+/**********************************************************************//**
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction.
***NOTE***: This is the only function in which such a transition is allowed
@@ -116,35 +133,35 @@ static
void
que_thr_move_to_run_state(
/*======================*/
- que_thr_t* thr); /* in: an query thread */
+ que_thr_t* thr); /*!< in: an query thread */
-/***************************************************************************
+/***********************************************************************//**
Adds a query graph to the session's list of graphs. */
-
+UNIV_INTERN
void
que_graph_publish(
/*==============*/
- que_t* graph, /* in: graph */
- sess_t* sess) /* in: session */
+ que_t* graph, /*!< in: graph */
+ sess_t* sess) /*!< in: session */
{
ut_ad(mutex_own(&kernel_mutex));
UT_LIST_ADD_LAST(graphs, sess->graphs, graph);
}
-/***************************************************************************
-Creates a query graph fork node. */
-
+/***********************************************************************//**
+Creates a query graph fork node.
+@return own: fork node */
+UNIV_INTERN
que_fork_t*
que_fork_create(
/*============*/
- /* out, own: fork node */
- que_t* graph, /* in: graph, if NULL then this
+ que_t* graph, /*!< in: graph, if NULL then this
fork node is assumed to be the
graph root */
- que_node_t* parent, /* in: parent node */
- ulint fork_type, /* in: fork type */
- mem_heap_t* heap) /* in: memory heap where created */
+ que_node_t* parent, /*!< in: parent node */
+ ulint fork_type, /*!< in: fork type */
+ mem_heap_t* heap) /*!< in: memory heap where created */
{
que_fork_t* fork;
@@ -178,15 +195,15 @@ que_fork_create(
return(fork);
}
-/***************************************************************************
-Creates a query graph thread node. */
-
+/***********************************************************************//**
+Creates a query graph thread node.
+@return own: query thread node */
+UNIV_INTERN
que_thr_t*
que_thr_create(
/*===========*/
- /* out, own: query thread node */
- que_fork_t* parent, /* in: parent node, i.e., a fork node */
- mem_heap_t* heap) /* in: memory heap where created */
+ que_fork_t* parent, /*!< in: parent node, i.e., a fork node */
+ mem_heap_t* heap) /*!< in: memory heap where created */
{
que_thr_t* thr;
@@ -214,20 +231,20 @@ que_thr_create(
return(thr);
}
-/**************************************************************************
+/**********************************************************************//**
Moves a suspended query thread to the QUE_THR_RUNNING state and may release
a single worker thread to execute it. This function should be used to end
the wait state of a query thread waiting for a lock or a stored procedure
completion. */
-
+UNIV_INTERN
void
que_thr_end_wait(
/*=============*/
- que_thr_t* thr, /* in: query thread in the
+ que_thr_t* thr, /*!< in: query thread in the
QUE_THR_LOCK_WAIT,
or QUE_THR_PROCEDURE_WAIT, or
QUE_THR_SIG_REPLY_WAIT state */
- que_thr_t** next_thr) /* in/out: next query thread to run;
+ que_thr_t** next_thr) /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
@@ -262,13 +279,13 @@ que_thr_end_wait(
}
}
-/**************************************************************************
+/**********************************************************************//**
Same as que_thr_end_wait, but no parameter next_thr available. */
-
+UNIV_INTERN
void
que_thr_end_wait_no_next_thr(
/*=========================*/
- que_thr_t* thr) /* in: query thread in the QUE_THR_LOCK_WAIT,
+ que_thr_t* thr) /*!< in: query thread in the QUE_THR_LOCK_WAIT,
or QUE_THR_PROCEDURE_WAIT, or
QUE_THR_SIG_REPLY_WAIT state */
{
@@ -299,13 +316,13 @@ que_thr_end_wait_no_next_thr(
/* srv_que_task_enqueue_low(thr); */
}
-/**************************************************************************
+/**********************************************************************//**
Inits a query thread for a command. */
UNIV_INLINE
void
que_thr_init_command(
/*=================*/
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
thr->run_node = thr;
thr->prev_node = thr->common.parent;
@@ -313,20 +330,19 @@ que_thr_init_command(
que_thr_move_to_run_state(thr);
}
-/**************************************************************************
+/**********************************************************************//**
Starts execution of a command in a query fork. Picks a query thread which
is not in the QUE_THR_RUNNING state and moves it to that state. If none
can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned. */
-
+is returned.
+@return a query thread of the graph moved to QUE_THR_RUNNING state, or
+NULL; the query thread should be executed by que_run_threads by the
+caller */
+UNIV_INTERN
que_thr_t*
que_fork_start_command(
/*===================*/
- /* out: a query thread of the graph moved to
- QUE_THR_RUNNING state, or NULL; the query
- thread should be executed by que_run_threads
- by the caller */
- que_fork_t* fork) /* in: a query fork */
+ que_fork_t* fork) /*!< in: a query fork */
{
que_thr_t* thr;
que_thr_t* suspended_thr = NULL;
@@ -336,6 +352,9 @@ que_fork_start_command(
fork->last_sel_node = NULL;
+ suspended_thr = NULL;
+ completed_thr = NULL;
+
/* Choose the query thread to run: usually there is just one thread,
but in a parallelized select, which necessarily is non-scrollable,
there may be several to choose from */
@@ -399,16 +418,16 @@ que_fork_start_command(
return(thr);
}
-/**************************************************************************
+/**********************************************************************//**
After signal handling is finished, returns control to a query graph error
handling routine. (Currently, just returns the control to the root of the
graph so that the graph can communicate an error message to the client.) */
-
+UNIV_INTERN
void
que_fork_error_handle(
/*==================*/
- trx_t* trx __attribute__((unused)), /* in: trx */
- que_t* fork) /* in: query graph which was run before signal
+ trx_t* trx __attribute__((unused)), /*!< in: trx */
+ que_t* fork) /*!< in: query graph which was run before signal
handling started, NULL not allowed */
{
que_thr_t* thr;
@@ -440,16 +459,16 @@ que_fork_error_handle(
srv_que_task_enqueue_low(thr);
}
-/********************************************************************
-Tests if all the query threads in the same fork have a given state. */
+/****************************************************************//**
+Tests if all the query threads in the same fork have a given state.
+@return TRUE if all the query threads in the same fork were in the
+given state */
UNIV_INLINE
ibool
que_fork_all_thrs_in_state(
/*=======================*/
- /* out: TRUE if all the query threads in the
- same fork were in the given state */
- que_fork_t* fork, /* in: query fork */
- ulint state) /* in: state */
+ que_fork_t* fork, /*!< in: query fork */
+ ulint state) /*!< in: state */
{
que_thr_t* thr_node;
@@ -467,13 +486,13 @@ que_fork_all_thrs_in_state(
return(TRUE);
}
-/**************************************************************************
+/**********************************************************************//**
Calls que_graph_free_recursive for statements in a statement list. */
static
void
que_graph_free_stat_list(
/*=====================*/
- que_node_t* node) /* in: first query graph node in the list */
+ que_node_t* node) /*!< in: first query graph node in the list */
{
while (node) {
que_graph_free_recursive(node);
@@ -482,14 +501,14 @@ que_graph_free_stat_list(
}
}
-/**************************************************************************
+/**********************************************************************//**
Frees a query graph, but not the heap where it was created. Does not free
explicit cursor declarations, they are freed in que_graph_free. */
-
+UNIV_INTERN
void
que_graph_free_recursive(
/*=====================*/
- que_node_t* node) /* in: query graph node */
+ que_node_t* node) /*!< in: query graph node */
{
que_fork_t* fork;
que_thr_t* thr;
@@ -499,6 +518,7 @@ que_graph_free_recursive(
upd_node_t* upd;
tab_node_t* cre_tab;
ind_node_t* cre_ind;
+ purge_node_t* purge;
if (node == NULL) {
@@ -560,6 +580,13 @@ que_graph_free_recursive(
mem_heap_free(ins->entry_sys_heap);
break;
+ case QUE_NODE_PURGE:
+ purge = node;
+
+ mem_heap_free(purge->heap);
+
+ break;
+
case QUE_NODE_UPDATE:
upd = node;
@@ -646,13 +673,13 @@ que_graph_free_recursive(
}
}
-/**************************************************************************
+/**********************************************************************//**
Frees a query graph. */
-
+UNIV_INTERN
void
que_graph_free(
/*===========*/
- que_t* graph) /* in: query graph; we assume that the memory
+ que_t* graph) /*!< in: query graph; we assume that the memory
heap where this graph was created is private
to this graph: if not, then use
que_graph_free_recursive and free the heap
@@ -677,46 +704,14 @@ que_graph_free(
mem_heap_free(graph->heap);
}
-/**************************************************************************
-Checks if the query graph is in a state where it should be freed, and
-frees it in that case. If the session is in a state where it should be
-closed, also this is done. */
-
-ibool
-que_graph_try_free(
-/*===============*/
- /* out: TRUE if freed */
- que_t* graph) /* in: query graph */
-{
- sess_t* sess;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sess = (graph->trx)->sess;
-
- if ((graph->state == QUE_FORK_BEING_FREED)
- && (graph->n_active_thrs == 0)) {
-
- UT_LIST_REMOVE(graphs, sess->graphs, graph);
- que_graph_free(graph);
-
- sess_try_close(sess);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/********************************************************************
-Performs an execution step on a thr node. */
+/****************************************************************//**
+Performs an execution step on a thr node.
+@return query thread to run next, or NULL if none */
static
que_thr_t*
que_thr_node_step(
/*==============*/
- /* out: query thread to run next, or NULL
- if none */
- que_thr_t* thr) /* in: query thread where run_node must
+ que_thr_t* thr) /*!< in: query thread where run_node must
be the thread node itself */
{
ut_ad(thr->run_node == thr);
@@ -748,7 +743,7 @@ que_thr_node_step(
return(NULL);
}
-/**************************************************************************
+/**********************************************************************//**
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction if thr was
not active.
@@ -758,7 +753,7 @@ static
void
que_thr_move_to_run_state(
/*======================*/
- que_thr_t* thr) /* in: an query thread */
+ que_thr_t* thr) /*!< in: an query thread */
{
trx_t* trx;
@@ -781,7 +776,7 @@ que_thr_move_to_run_state(
thr->state = QUE_THR_RUNNING;
}
-/**************************************************************************
+/**********************************************************************//**
Decrements the query thread reference counts in the query graph and the
transaction. May start signal handling, e.g., a rollback.
*** NOTE ***:
@@ -793,8 +788,8 @@ static
void
que_thr_dec_refer_count(
/*====================*/
- que_thr_t* thr, /* in: query thread */
- que_thr_t** next_thr) /* in/out: next query thread to run;
+ que_thr_t* thr, /*!< in: query thread */
+ que_thr_t** next_thr) /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
@@ -802,13 +797,11 @@ que_thr_dec_refer_count(
{
que_fork_t* fork;
trx_t* trx;
- sess_t* sess;
ulint fork_type;
ibool stopped;
fork = thr->common.parent;
trx = thr_get_trx(thr);
- sess = trx->sess;
mutex_enter(&kernel_mutex);
@@ -835,7 +828,7 @@ que_thr_dec_refer_count(
*next_thr = thr;
} else {
- ut_a(0);
+ ut_error;
srv_que_task_enqueue_low(thr);
}
@@ -866,7 +859,8 @@ que_thr_dec_refer_count(
if (que_fork_all_thrs_in_state(fork, QUE_THR_COMPLETED)) {
- if (fork_type == QUE_FORK_ROLLBACK) {
+ switch (fork_type) {
+ case QUE_FORK_ROLLBACK:
/* This is really the undo graph used in rollback,
no roll_node in this graph */
@@ -874,18 +868,17 @@ que_thr_dec_refer_count(
ut_ad(trx->handling_signals == TRUE);
trx_finish_rollback_off_kernel(fork, trx, next_thr);
+ break;
- } else if (fork_type == QUE_FORK_PURGE) {
-
- /* Do nothing */
- } else if (fork_type == QUE_FORK_RECOVERY) {
+ case QUE_FORK_PURGE:
+ case QUE_FORK_RECOVERY:
+ case QUE_FORK_MYSQL_INTERFACE:
/* Do nothing */
- } else if (fork_type == QUE_FORK_MYSQL_INTERFACE) {
+ break;
- /* Do nothing */
- } else {
- ut_error; /* not used in MySQL */
+ default:
+ ut_error; /*!< not used in MySQL */
}
}
@@ -906,16 +899,16 @@ que_thr_dec_refer_count(
mutex_exit(&kernel_mutex);
}
-/**************************************************************************
+/**********************************************************************//**
Stops a query thread if graph or trx is in a state requiring it. The
conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved. */
-
+to be reserved.
+@return TRUE if stopped */
+UNIV_INTERN
ibool
que_thr_stop(
/*=========*/
- /* out: TRUE if stopped */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
que_t* graph;
@@ -953,16 +946,16 @@ que_thr_stop(
return(ret);
}
-/**************************************************************************
+/**********************************************************************//**
A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
query thread is stopped and made inactive, except in the case where
it was put to the lock wait state in lock0lock.c, but the lock has already
been granted or the transaction chosen as a victim in deadlock resolution. */
-
+UNIV_INTERN
void
que_thr_stop_for_mysql(
/*===================*/
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
@@ -1000,16 +993,16 @@ que_thr_stop_for_mysql(
mutex_exit(&kernel_mutex);
}
-/**************************************************************************
+/**********************************************************************//**
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction if thr was
not active. */
-
+UNIV_INTERN
void
que_thr_move_to_run_state_for_mysql(
/*================================*/
- que_thr_t* thr, /* in: an query thread */
- trx_t* trx) /* in: transaction */
+ que_thr_t* thr, /*!< in: an query thread */
+ trx_t* trx) /*!< in: transaction */
{
if (thr->magic_n != QUE_THR_MAGIC_N) {
fprintf(stderr,
@@ -1033,15 +1026,15 @@ que_thr_move_to_run_state_for_mysql(
thr->state = QUE_THR_RUNNING;
}
-/**************************************************************************
+/**********************************************************************//**
A patch for MySQL used to 'stop' a dummy query thread used in MySQL
select, when there is no error or lock wait. */
-
+UNIV_INTERN
void
que_thr_stop_for_mysql_no_error(
/*============================*/
- que_thr_t* thr, /* in: query thread */
- trx_t* trx) /* in: transaction */
+ que_thr_t* thr, /*!< in: query thread */
+ trx_t* trx) /*!< in: transaction */
{
ut_ad(thr->state == QUE_THR_RUNNING);
ut_ad(thr->is_active == TRUE);
@@ -1066,15 +1059,15 @@ que_thr_stop_for_mysql_no_error(
trx->n_active_thrs--;
}
-/********************************************************************
+/****************************************************************//**
Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop. */
-
+given node, or NULL if the node is not within a loop.
+@return containing loop node, or NULL. */
+UNIV_INTERN
que_node_t*
que_node_get_containing_loop_node(
/*==============================*/
- /* out: containing loop node, or NULL. */
- que_node_t* node) /* in: node */
+ que_node_t* node) /*!< in: node */
{
ut_ad(node);
@@ -1097,13 +1090,13 @@ que_node_get_containing_loop_node(
return(node);
}
-/**************************************************************************
+/**********************************************************************//**
Prints info of an SQL query graph node. */
-
+UNIV_INTERN
void
que_node_print_info(
/*================*/
- que_node_t* node) /* in: query graph node */
+ que_node_t* node) /*!< in: query graph node */
{
ulint type;
const char* str;
@@ -1160,16 +1153,15 @@ que_node_print_info(
(ulong) type, str, (void*) node);
}
-/**************************************************************************
-Performs an execution step on a query thread. */
+/**********************************************************************//**
+Performs an execution step on a query thread.
+@return query thread to run next: it may differ from the input
+parameter if, e.g., a subprocedure call is made */
UNIV_INLINE
que_thr_t*
que_thr_step(
/*=========*/
- /* out: query thread to run next: it may
- differ from the input parameter if, e.g., a
- subprocedure call is made */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
que_node_t* node;
que_thr_t* old_thr;
@@ -1283,13 +1275,13 @@ que_thr_step(
return(thr);
}
-/**************************************************************************
+/**********************************************************************//**
Run a query thread until it finishes or encounters e.g. a lock wait. */
static
void
que_run_threads_low(
/*================*/
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
que_thr_t* next_thr;
ulint cumul_resource;
@@ -1343,12 +1335,13 @@ loop:
goto loop;
}
-/**************************************************************************
+/**********************************************************************//**
Run a query thread. Handles lock waits. */
+UNIV_INTERN
void
que_run_threads(
/*============*/
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
loop:
ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
@@ -1397,19 +1390,19 @@ loop:
mutex_exit(&kernel_mutex);
}
-/*************************************************************************
-Evaluate the given SQL. */
-
+/*********************************************************************//**
+Evaluate the given SQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
que_eval_sql(
/*=========*/
- /* out: error code or DB_SUCCESS */
- pars_info_t* info, /* in: info struct, or NULL */
- const char* sql, /* in: SQL string */
+ pars_info_t* info, /*!< in: info struct, or NULL */
+ const char* sql, /*!< in: SQL string */
ibool reserve_dict_mutex,
- /* in: if TRUE, acquire/release
+ /*!< in: if TRUE, acquire/release
dict_sys->mutex around call to pars_sql. */
- trx_t* trx) /* in: trx */
+ trx_t* trx) /*!< in: trx */
{
que_thr_t* thr;
que_t* graph;
diff --git a/storage/innobase/read/read0read.c b/storage/innobase/read/read0read.c
index 4068cf4fa69..85adae4ddff 100644
--- a/storage/innobase/read/read0read.c
+++ b/storage/innobase/read/read0read.c
@@ -1,7 +1,24 @@
-/******************************************************
-Cursor read
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file read/read0read.c
+Cursor read
Created 2/16/1997 Heikki Tuuri
*******************************************************/
@@ -120,39 +137,40 @@ TODO: proof this
*/
-/*************************************************************************
-Creates a read view object. */
+/*********************************************************************//**
+Creates a read view object.
+@return own: read view struct */
UNIV_INLINE
read_view_t*
read_view_create_low(
/*=================*/
- /* out, own: read view struct */
- ulint n, /* in: number of cells in the trx_ids array */
- mem_heap_t* heap) /* in: memory heap from which allocated */
+ ulint n, /*!< in: number of cells in the trx_ids array */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
{
read_view_t* view;
view = mem_heap_alloc(heap, sizeof(read_view_t));
view->n_trx_ids = n;
- view->trx_ids = mem_heap_alloc(heap, n * sizeof(dulint));
+ view->trx_ids = mem_heap_alloc(heap, n * sizeof *view->trx_ids);
return(view);
}
-/*************************************************************************
+/*********************************************************************//**
Makes a copy of the oldest existing read view, with the exception that also
the creating trx of the oldest view is set as not visible in the 'copied'
view. Opens a new view if no views currently exist. The view must be closed
-with ..._close. This is used in purge. */
-
+with ..._close. This is used in purge.
+@return own: read view struct */
+UNIV_INTERN
read_view_t*
read_view_oldest_copy_or_open_new(
/*==============================*/
- /* out, own: read view struct */
- dulint cr_trx_id, /* in: trx_id of creating
- transaction, or (0, 0) used in purge*/
- mem_heap_t* heap) /* in: memory heap from which
+ trx_id_t cr_trx_id, /*!< in: trx_id of creating
+ transaction, or ut_dulint_zero
+ used in purge */
+ mem_heap_t* heap) /*!< in: memory heap from which
allocated */
{
read_view_t* old_view;
@@ -173,8 +191,7 @@ read_view_oldest_copy_or_open_new(
n = old_view->n_trx_ids;
- if (ut_dulint_cmp(old_view->creator_trx_id,
- ut_dulint_create(0,0)) != 0) {
+ if (!ut_dulint_is_zero(old_view->creator_trx_id)) {
n++;
} else {
needs_insert = FALSE;
@@ -226,18 +243,18 @@ read_view_oldest_copy_or_open_new(
return(view_copy);
}
-/*************************************************************************
+/*********************************************************************//**
Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view. */
-
+point in time are seen in the view.
+@return own: read view struct */
+UNIV_INTERN
read_view_t*
read_view_open_now(
/*===============*/
- /* out, own: read view struct */
- dulint cr_trx_id, /* in: trx_id of creating
- transaction, or (0, 0) used in
- purge */
- mem_heap_t* heap) /* in: memory heap from which
+ trx_id_t cr_trx_id, /*!< in: trx_id of creating
+ transaction, or ut_dulint_zero
+ used in purge */
+ mem_heap_t* heap) /*!< in: memory heap from which
allocated */
{
read_view_t* view;
@@ -250,7 +267,7 @@ read_view_open_now(
view->creator_trx_id = cr_trx_id;
view->type = VIEW_NORMAL;
- view->undo_no = ut_dulint_create(0, 0);
+ view->undo_no = ut_dulint_zero;
/* No future transactions should be visible in the view */
@@ -301,27 +318,27 @@ read_view_open_now(
return(view);
}
-/*************************************************************************
+/*********************************************************************//**
Closes a read view. */
-
+UNIV_INTERN
void
read_view_close(
/*============*/
- read_view_t* view) /* in: read view */
+ read_view_t* view) /*!< in: read view */
{
ut_ad(mutex_own(&kernel_mutex));
UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
}
-/*************************************************************************
+/*********************************************************************//**
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-
+UNIV_INTERN
void
read_view_close_for_mysql(
/*======================*/
- trx_t* trx) /* in: trx which has a read view */
+ trx_t* trx) /*!< in: trx which has a read view */
{
ut_a(trx->global_read_view);
@@ -337,13 +354,13 @@ read_view_close_for_mysql(
mutex_exit(&kernel_mutex);
}
-/*************************************************************************
+/*********************************************************************//**
Prints a read view to stderr. */
-
+UNIV_INTERN
void
read_view_print(
/*============*/
- read_view_t* view) /* in: read view */
+ const read_view_t* view) /*!< in: read view */
{
ulint n_ids;
ulint i;
@@ -361,37 +378,33 @@ read_view_print(
(ulong) ut_dulint_get_high(view->low_limit_no),
(ulong) ut_dulint_get_low(view->low_limit_no));
- fprintf(stderr, "Read view up limit trx id %lu %lu\n",
- (ulong) ut_dulint_get_high(view->up_limit_id),
- (ulong) ut_dulint_get_low(view->up_limit_id));
+ fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(view->up_limit_id));
- fprintf(stderr, "Read view low limit trx id %lu %lu\n",
- (ulong) ut_dulint_get_high(view->low_limit_id),
- (ulong) ut_dulint_get_low(view->low_limit_id));
+ fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(view->low_limit_id));
fprintf(stderr, "Read view individually stored trx ids:\n");
n_ids = view->n_trx_ids;
for (i = 0; i < n_ids; i++) {
- fprintf(stderr, "Read view trx id %lu %lu\n",
- (ulong) ut_dulint_get_high(
- read_view_get_nth_trx_id(view, i)),
- (ulong) ut_dulint_get_low(
+ fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(
read_view_get_nth_trx_id(view, i)));
}
}
-/*************************************************************************
+/*********************************************************************//**
Create a high-granularity consistent cursor view for mysql to be used
in cursors. In this consistent read view modifications done by the
creating transaction after the cursor is created or future transactions
are not visible. */
-
+UNIV_INTERN
cursor_view_t*
read_cursor_view_create_for_mysql(
/*==============================*/
- trx_t* cr_trx) /* in: trx where cursor view is created */
+ trx_t* cr_trx) /*!< in: trx where cursor view is created */
{
cursor_view_t* curview;
read_view_t* view;
@@ -474,15 +487,15 @@ read_cursor_view_create_for_mysql(
return(curview);
}
-/*************************************************************************
+/*********************************************************************//**
Close a given consistent cursor view for mysql and restore global read view
back to a transaction read view. */
-
+UNIV_INTERN
void
read_cursor_view_close_for_mysql(
/*=============================*/
- trx_t* trx, /* in: trx */
- cursor_view_t* curview)/* in: cursor view to be closed */
+ trx_t* trx, /*!< in: trx */
+ cursor_view_t* curview)/*!< in: cursor view to be closed */
{
ut_a(curview);
ut_a(curview->read_view);
@@ -502,16 +515,16 @@ read_cursor_view_close_for_mysql(
mem_heap_free(curview->heap);
}
-/*************************************************************************
+/*********************************************************************//**
This function sets a given consistent cursor view to a transaction
read view if given consistent cursor view is not NULL. Otherwise, function
restores a global read view to a transaction read view. */
-
+UNIV_INTERN
void
read_cursor_set_for_mysql(
/*======================*/
- trx_t* trx, /* in: transaction where cursor is set */
- cursor_view_t* curview)/* in: consistent cursor view to be set */
+ trx_t* trx, /*!< in: transaction where cursor is set */
+ cursor_view_t* curview)/*!< in: consistent cursor view to be set */
{
ut_a(trx);
diff --git a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c
index ca0ec663548..e6dab0bc66b 100644
--- a/storage/innobase/rem/rem0cmp.c
+++ b/storage/innobase/rem/rem0cmp.c
@@ -1,7 +1,24 @@
-/***********************************************************************
-Comparison services for records
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file rem/rem0cmp.c
+Comparison services for records
Created 7/1/1994 Heikki Tuuri
************************************************************************/
@@ -19,7 +36,7 @@ Created 7/1/1994 Heikki Tuuri
The records are put into alphabetical order in the following
way: let F be the first field where two records disagree.
-If there is a character in some position n where the the
+If there is a character in some position n where the
records disagree, the order is determined by comparison of
the characters at position n, possibly after
collating transformation. If there is no such character,
@@ -34,75 +51,70 @@ where two records disagree only in the way that one
has more fields than the other. */
#ifdef UNIV_DEBUG
-/*****************************************************************
+/*************************************************************//**
Used in debug checking of cmp_dtuple_... .
This function is used to compare a data tuple to a physical record. If
dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has. */
+differ from dtuple in some of the m fields rec has.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
static
int
cmp_debug_dtuple_rec_with_match(
/*============================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record which differs from
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record which differs from
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields);/* in/out: number of already
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint* matched_fields);/*!< in/out: number of already
completely matched fields; when function
returns, contains the value for current
comparison */
#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************
+/*************************************************************//**
This function is used to compare two data fields for which the data type
is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the the one in ha_innobase.cc! */
+must be a copy of the one in ha_innobase.cc!
+@return 1, 0, -1, if a is greater, equal, less than b, respectively */
extern
int
innobase_mysql_cmp(
/*===============*/
- /* out: 1, 0, -1, if a is greater,
- equal, less than b, respectively */
- int mysql_type, /* in: MySQL type */
- uint charset_number, /* in: number of the charset */
- unsigned char* a, /* in: data field */
- unsigned int a_length, /* in: data field length,
+ int mysql_type, /*!< in: MySQL type */
+ uint charset_number, /*!< in: number of the charset */
+ const unsigned char* a, /*!< in: data field */
+ unsigned int a_length, /*!< in: data field length,
not UNIV_SQL_NULL */
- unsigned char* b, /* in: data field */
- unsigned int b_length); /* in: data field length,
+ const unsigned char* b, /*!< in: data field */
+ unsigned int b_length); /*!< in: data field length,
not UNIV_SQL_NULL */
-#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
+/*********************************************************************//**
Transforms the character code so that it is ordered appropriately for the
language. This is only used for the latin1 char set. MySQL does the
-comparisons for other char sets. */
+comparisons for other char sets.
+@return collation order position */
UNIV_INLINE
ulint
cmp_collate(
/*========*/
- /* out: collation order position */
- ulint code) /* in: code of a character stored in database record */
+ ulint code) /*!< in: code of a character stored in database record */
{
return((ulint) srv_latin1_ordering[code]);
}
-/*****************************************************************
-Returns TRUE if two columns are equal for comparison purposes. */
-
+/*************************************************************//**
+Returns TRUE if two columns are equal for comparison purposes.
+@return TRUE if the columns are considered equal in comparisons */
+UNIV_INTERN
ibool
cmp_cols_are_equal(
/*===============*/
- /* out: TRUE if the columns are
- considered equal in comparisons */
- const dict_col_t* col1, /* in: column 1 */
- const dict_col_t* col2, /* in: column 2 */
+ const dict_col_t* col1, /*!< in: column 1 */
+ const dict_col_t* col2, /*!< in: column 2 */
ibool check_charsets)
- /* in: whether to check charsets */
+ /*!< in: whether to check charsets */
{
if (dtype_is_non_binary_string_type(col1->mtype, col1->prtype)
&& dtype_is_non_binary_string_type(col2->mtype, col2->prtype)) {
@@ -145,23 +157,21 @@ cmp_cols_are_equal(
return(col1->mtype != DATA_INT || col1->len == col2->len);
}
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************
+/*************************************************************//**
Innobase uses this function to compare two data fields for which the data type
-is such that we must compare whole fields or call MySQL to do the comparison */
+is such that we must compare whole fields or call MySQL to do the comparison
+@return 1, 0, -1, if a is greater, equal, less than b, respectively */
static
int
cmp_whole_field(
/*============*/
- /* out: 1, 0, -1, if a is greater,
- equal, less than b, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- unsigned char* a, /* in: data field */
- unsigned int a_length, /* in: data field length,
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ const byte* a, /*!< in: data field */
+ unsigned int a_length, /*!< in: data field length,
not UNIV_SQL_NULL */
- unsigned char* b, /* in: data field */
- unsigned int b_length) /* in: data field length,
+ const byte* b, /*!< in: data field */
+ unsigned int b_length) /*!< in: data field length,
not UNIV_SQL_NULL */
{
float f_1;
@@ -272,27 +282,24 @@ cmp_whole_field(
return(0);
}
-#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************
+/*************************************************************//**
This function is used to compare two data fields for which we know the
-data type. */
-
+data type.
+@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
int
cmp_data_data_slow(
/*===============*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- byte* data1, /* in: data field (== a pointer to a memory
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ const byte* data1, /*!< in: data field (== a pointer to a memory
buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
+ ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
+ const byte* data2, /*!< in: data field (== a pointer to a memory
buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
+ ulint len2) /*!< in: data field length or UNIV_SQL_NULL */
{
-#ifndef UNIV_HOTBACKUP
ulint data1_byte;
ulint data2_byte;
ulint cur_bytes;
@@ -385,57 +392,48 @@ next_byte:
data1++;
data2++;
}
-#else /* !UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* !UNIV_HOTBACKUP */
return(0); /* Not reached */
}
-/*****************************************************************
+/*************************************************************//**
This function is used to compare a data tuple to a physical record.
Only dtuple->n_fields_cmp first fields are taken into account for
-the the data tuple! If we denote by n = n_fields_cmp, then rec must
+the data tuple! If we denote by n = n_fields_cmp, then rec must
have either m >= n fields, or it must differ from dtuple in some of
the m fields rec has. If rec has an externally stored field we do not
compare it but return with value 0 if such a comparison should be
-made. */
-
+made.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared, or until
+the first externally stored field in rec */
+UNIV_INTERN
int
cmp_dtuple_rec_with_match(
/*======================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared, or
- until the first externally stored field in
- rec */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record which differs from
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record which differs from
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields, /* in/out: number of already completely
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
- ulint* matched_bytes) /* in/out: number of already matched
+ ulint* matched_bytes) /*!< in/out: number of already matched
bytes within the first field not completely
matched; when function returns, contains the
value for current comparison */
{
-#ifndef UNIV_HOTBACKUP
- dfield_t* dtuple_field; /* current field in logical record */
+ const dfield_t* dtuple_field; /* current field in logical record */
ulint dtuple_f_len; /* the length of the current field
in the logical record */
- byte* dtuple_b_ptr; /* pointer to the current byte in
+ const byte* dtuple_b_ptr; /* pointer to the current byte in
logical field data */
ulint dtuple_byte; /* value of current byte to be compared
in dtuple*/
ulint rec_f_len; /* length of current field in rec */
- byte* rec_b_ptr; /* pointer to the current byte in
+ const byte* rec_b_ptr; /* pointer to the current byte in
rec field */
ulint rec_byte; /* value of current byte to be
compared in rec */
@@ -459,10 +457,10 @@ cmp_dtuple_rec_with_match(
rec_offs_comp(offsets));
ulint tup_info = dtuple_get_info_bits(dtuple);
- if (rec_info & REC_INFO_MIN_REC_FLAG) {
+ if (UNIV_UNLIKELY(rec_info & REC_INFO_MIN_REC_FLAG)) {
ret = !(tup_info & REC_INFO_MIN_REC_FLAG);
goto order_resolved;
- } else if (tup_info & REC_INFO_MIN_REC_FLAG) {
+ } else if (UNIV_UNLIKELY(tup_info & REC_INFO_MIN_REC_FLAG)) {
ret = -1;
goto order_resolved;
}
@@ -598,7 +596,7 @@ cmp_dtuple_rec_with_match(
}
ret = (int) (dtuple_byte - rec_byte);
- if (UNIV_UNLIKELY(ret)) {
+ if (UNIV_LIKELY(ret)) {
if (ret < 0) {
ret = -1;
goto order_resolved;
@@ -634,27 +632,19 @@ order_resolved:
*matched_bytes = cur_bytes;
return(ret);
-#else /* !UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(0);
-#endif /* !UNIV_HOTBACKUP */
}
-/******************************************************************
-Compares a data tuple to a physical record. */
-
+/**************************************************************//**
+Compares a data tuple to a physical record.
+@see cmp_dtuple_rec_with_match
+@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
+UNIV_INTERN
int
cmp_dtuple_rec(
/*===========*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively; see the comments
- for cmp_dtuple_rec_with_match */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint matched_fields = 0;
ulint matched_bytes = 0;
@@ -664,17 +654,17 @@ cmp_dtuple_rec(
&matched_fields, &matched_bytes));
}
-/******************************************************************
+/**************************************************************//**
Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record. */
-
+is allowed to be a prefix of the corresponding field in the record.
+@return TRUE if prefix */
+UNIV_INTERN
ibool
cmp_dtuple_is_prefix_of_rec(
/*========================*/
- /* out: TRUE if prefix */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ulint n_fields;
ulint matched_fields = 0;
@@ -704,47 +694,195 @@ cmp_dtuple_is_prefix_of_rec(
return(FALSE);
}
-/*****************************************************************
+/*************************************************************//**
+Compare two physical records that contain the same number of columns,
+none of which are stored externally.
+@return 1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
+UNIV_INTERN
+int
+cmp_rec_rec_simple(
+/*===============*/
+ const rec_t* rec1, /*!< in: physical record */
+ const rec_t* rec2, /*!< in: physical record */
+ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+ const dict_index_t* index) /*!< in: data dictionary index */
+{
+ ulint rec1_f_len; /*!< length of current field in rec1 */
+ const byte* rec1_b_ptr; /*!< pointer to the current byte
+ in rec1 field */
+ ulint rec1_byte; /*!< value of current byte to be
+ compared in rec1 */
+ ulint rec2_f_len; /*!< length of current field in rec2 */
+ const byte* rec2_b_ptr; /*!< pointer to the current byte
+ in rec2 field */
+ ulint rec2_byte; /*!< value of current byte to be
+ compared in rec2 */
+ ulint cur_field; /*!< current field number */
+ ulint n_uniq;
+
+ n_uniq = dict_index_get_n_unique(index);
+ ut_ad(rec_offs_n_fields(offsets1) >= n_uniq);
+ ut_ad(rec_offs_n_fields(offsets2) >= n_uniq);
+
+ ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
+
+ for (cur_field = 0; cur_field < n_uniq; cur_field++) {
+
+ ulint cur_bytes;
+ ulint mtype;
+ ulint prtype;
+
+ {
+ const dict_col_t* col
+ = dict_index_get_nth_col(index, cur_field);
+
+ mtype = col->mtype;
+ prtype = col->prtype;
+ }
+
+ ut_ad(!rec_offs_nth_extern(offsets1, cur_field));
+ ut_ad(!rec_offs_nth_extern(offsets2, cur_field));
+
+ rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
+ cur_field, &rec1_f_len);
+ rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
+ cur_field, &rec2_f_len);
+
+ if (rec1_f_len == UNIV_SQL_NULL
+ || rec2_f_len == UNIV_SQL_NULL) {
+
+ if (rec1_f_len == rec2_f_len) {
+
+ goto next_field;
+
+ } else if (rec2_f_len == UNIV_SQL_NULL) {
+
+ /* We define the SQL null to be the
+ smallest possible value of a field
+ in the alphabetical order */
+
+ return(1);
+ } else {
+ return(-1);
+ }
+ }
+
+ if (mtype >= DATA_FLOAT
+ || (mtype == DATA_BLOB
+ && 0 == (prtype & DATA_BINARY_TYPE)
+ && dtype_get_charset_coll(prtype)
+ != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
+ int ret = cmp_whole_field(mtype, prtype,
+ rec1_b_ptr,
+ (unsigned) rec1_f_len,
+ rec2_b_ptr,
+ (unsigned) rec2_f_len);
+ if (ret) {
+ return(ret);
+ }
+
+ goto next_field;
+ }
+
+ /* Compare the fields */
+ for (cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
+ if (rec2_f_len <= cur_bytes) {
+
+ if (rec1_f_len <= cur_bytes) {
+
+ goto next_field;
+ }
+
+ rec2_byte = dtype_get_pad_char(mtype, prtype);
+
+ if (rec2_byte == ULINT_UNDEFINED) {
+ return(1);
+ }
+ } else {
+ rec2_byte = *rec2_b_ptr;
+ }
+
+ if (rec1_f_len <= cur_bytes) {
+ rec1_byte = dtype_get_pad_char(mtype, prtype);
+
+ if (rec1_byte == ULINT_UNDEFINED) {
+ return(-1);
+ }
+ } else {
+ rec1_byte = *rec1_b_ptr;
+ }
+
+ if (rec1_byte == rec2_byte) {
+ /* If the bytes are equal, they will remain
+ such even after the collation transformation
+ below */
+
+ continue;
+ }
+
+ if (mtype <= DATA_CHAR
+ || (mtype == DATA_BLOB
+ && !(prtype & DATA_BINARY_TYPE))) {
+
+ rec1_byte = cmp_collate(rec1_byte);
+ rec2_byte = cmp_collate(rec2_byte);
+ }
+
+ if (rec1_byte < rec2_byte) {
+ return(-1);
+ } else if (rec1_byte > rec2_byte) {
+ return(1);
+ }
+ }
+next_field:
+ continue;
+ }
+
+ /* If we ran out of fields, rec1 was equal to rec2. */
+ return(0);
+}
+
+/*************************************************************//**
This function is used to compare two physical records. Only the common
first fields are compared, and if an externally stored field is
-encountered, then 0 is returned. */
-
+encountered, then 0 is returned.
+@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
+UNIV_INTERN
int
cmp_rec_rec_with_match(
/*===================*/
- /* out: 1, 0 , -1 if rec1 is greater, equal,
- less, respectively, than rec2; only the common
- first fields are compared */
- rec_t* rec1, /* in: physical record */
- rec_t* rec2, /* in: physical record */
- const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /* in: data dictionary index */
- ulint* matched_fields, /* in/out: number of already completely
+ const rec_t* rec1, /*!< in: physical record */
+ const rec_t* rec2, /*!< in: physical record */
+ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
+ dict_index_t* index, /*!< in: data dictionary index */
+ ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when the function returns,
contains the value the for current
comparison */
- ulint* matched_bytes) /* in/out: number of already matched
+ ulint* matched_bytes) /*!< in/out: number of already matched
bytes within the first field not completely
matched; when the function returns, contains
the value for the current comparison */
{
-#ifndef UNIV_HOTBACKUP
- ulint rec1_n_fields; /* the number of fields in rec */
- ulint rec1_f_len; /* length of current field in rec */
- byte* rec1_b_ptr; /* pointer to the current byte in rec field */
- ulint rec1_byte; /* value of current byte to be compared in
- rec */
- ulint rec2_n_fields; /* the number of fields in rec */
- ulint rec2_f_len; /* length of current field in rec */
- byte* rec2_b_ptr; /* pointer to the current byte in rec field */
- ulint rec2_byte; /* value of current byte to be compared in
- rec */
- ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched bytes in current
- field */
- int ret = 3333; /* return value */
- ulint comp;
+ ulint rec1_n_fields; /* the number of fields in rec */
+ ulint rec1_f_len; /* length of current field in rec */
+ const byte* rec1_b_ptr; /* pointer to the current byte
+ in rec field */
+ ulint rec1_byte; /* value of current byte to be
+ compared in rec */
+ ulint rec2_n_fields; /* the number of fields in rec */
+ ulint rec2_f_len; /* length of current field in rec */
+ const byte* rec2_b_ptr; /* pointer to the current byte
+ in rec field */
+ ulint rec2_byte; /* value of current byte to be
+ compared in rec */
+ ulint cur_field; /* current field number */
+ ulint cur_bytes; /* number of already matched
+ bytes in current field */
+ int ret = 0; /* return value */
+ ulint comp;
ut_ad(rec1 && rec2 && index);
ut_ad(rec_offs_validate(rec1, index, offsets1));
@@ -786,20 +924,19 @@ cmp_rec_rec_with_match(
if (cur_field == 0) {
/* Test if rec is the predefined minimum
record */
- if (rec_get_info_bits(rec1, comp)
- & REC_INFO_MIN_REC_FLAG) {
+ if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
+ & REC_INFO_MIN_REC_FLAG)) {
- if (rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG) {
- ret = 0;
- } else {
+ if (!(rec_get_info_bits(rec2, comp)
+ & REC_INFO_MIN_REC_FLAG)) {
ret = -1;
}
goto order_resolved;
- } else if (rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG) {
+ } else if (UNIV_UNLIKELY
+ (rec_get_info_bits(rec2, comp)
+ & REC_INFO_MIN_REC_FLAG)) {
ret = 1;
@@ -812,8 +949,6 @@ cmp_rec_rec_with_match(
/* We do not compare to an externally
stored field */
- ret = 0;
-
goto order_resolved;
}
@@ -933,8 +1068,9 @@ next_field:
ut_ad(cur_bytes == 0);
- ret = 0; /* If we ran out of fields, rec1 was equal to rec2 up
- to the common fields */
+ /* If we ran out of fields, rec1 was equal to rec2 up
+ to the common fields */
+ ut_ad(ret == 0);
order_resolved:
ut_ad((ret >= - 1) && (ret <= 1));
@@ -943,47 +1079,39 @@ order_resolved:
*matched_bytes = cur_bytes;
return(ret);
-#else /* !UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(0);
-#endif /* !UNIV_HOTBACKUP */
}
#ifdef UNIV_DEBUG
-/*****************************************************************
+/*************************************************************//**
Used in debug checking of cmp_dtuple_... .
This function is used to compare a data tuple to a physical record. If
dtuple has n fields then rec must have either m >= n fields, or it must
differ from dtuple in some of the m fields rec has. If encounters an
-externally stored field, returns 0. */
+externally stored field, returns 0.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
static
int
cmp_debug_dtuple_rec_with_match(
/*============================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record which differs from
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ const rec_t* rec, /*!< in: physical record which differs from
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields) /* in/out: number of already
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint* matched_fields) /*!< in/out: number of already
completely matched fields; when function
returns, contains the value for current
comparison */
{
- dfield_t* dtuple_field; /* current field in logical record */
+ const dfield_t* dtuple_field; /* current field in logical record */
ulint dtuple_f_len; /* the length of the current field
in the logical record */
- byte* dtuple_f_data; /* pointer to the current logical
+ const byte* dtuple_f_data; /* pointer to the current logical
field data */
ulint rec_f_len; /* length of current field in rec */
- byte* rec_f_data; /* pointer to the current rec field */
+ const byte* rec_f_data; /* pointer to the current rec field */
int ret = 3333; /* return value */
ulint cur_field; /* current field number */
@@ -997,8 +1125,9 @@ cmp_debug_dtuple_rec_with_match(
cur_field = *matched_fields;
if (cur_field == 0) {
- if (rec_get_info_bits(rec, rec_offs_comp(offsets))
- & REC_INFO_MIN_REC_FLAG) {
+ if (UNIV_UNLIKELY
+ (rec_get_info_bits(rec, rec_offs_comp(offsets))
+ & REC_INFO_MIN_REC_FLAG)) {
ret = !(dtuple_get_info_bits(dtuple)
& REC_INFO_MIN_REC_FLAG);
@@ -1006,7 +1135,8 @@ cmp_debug_dtuple_rec_with_match(
goto order_resolved;
}
- if (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG) {
+ if (UNIV_UNLIKELY
+ (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG)) {
ret = -1;
goto order_resolved;
diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c
index 64f8e2d319c..1c8b3fd8c1e 100644
--- a/storage/innobase/rem/rem0rec.c
+++ b/storage/innobase/rem/rem0rec.c
@@ -1,7 +1,24 @@
-/************************************************************************
-Record manager
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994-2001 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file rem/rem0rec.c
+Record manager
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
@@ -124,26 +141,204 @@ end of some field (containing also <FIELD-END>).
A record is a complete-field prefix of another record, if
the corresponding canonical strings have the same property. */
-ulint rec_dummy; /* this is used to fool compiler in
- rec_validate */
+/* this is used to fool compiler in rec_validate */
+UNIV_INTERN ulint rec_dummy;
-/*******************************************************************
-Validates the consistency of an old-style physical record. */
+/***************************************************************//**
+Validates the consistency of an old-style physical record.
+@return TRUE if ok */
static
ibool
rec_validate_old(
/*=============*/
- /* out: TRUE if ok */
- rec_t* rec); /* in: physical record */
+ const rec_t* rec); /*!< in: physical record */
+
+/******************************************************//**
+Determine how many of the first n columns in a compact
+physical record are stored externally.
+@return number of externally stored columns */
+UNIV_INTERN
+ulint
+rec_get_n_extern_new(
+/*=================*/
+ const rec_t* rec, /*!< in: compact physical record */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint n) /*!< in: number of columns to scan */
+{
+ const byte* nulls;
+ const byte* lens;
+ dict_field_t* field;
+ ulint null_mask;
+ ulint n_extern;
+ ulint i;
+
+ ut_ad(dict_table_is_comp(index->table));
+ ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
+ ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index));
+
+ if (n == ULINT_UNDEFINED) {
+ n = dict_index_get_n_fields(index);
+ }
+
+ nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
+ lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
+ null_mask = 1;
+ n_extern = 0;
+ i = 0;
+
+ /* read the lengths of fields 0..n */
+ do {
+ ulint len;
+
+ field = dict_index_get_nth_field(index, i);
+ if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
+ /* nullable field => read the null flag */
+
+ if (UNIV_UNLIKELY(!(byte) null_mask)) {
+ nulls--;
+ null_mask = 1;
+ }
+
+ if (*nulls & null_mask) {
+ null_mask <<= 1;
+ /* No length is stored for NULL fields. */
+ continue;
+ }
+ null_mask <<= 1;
+ }
+
+ if (UNIV_UNLIKELY(!field->fixed_len)) {
+ /* Variable-length field: read the length */
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ len = *lens--;
+ if (UNIV_UNLIKELY(col->len > 255)
+ || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
+ if (len & 0x80) {
+ /* 1exxxxxxx xxxxxxxx */
+ if (len & 0x40) {
+ n_extern++;
+ }
+ lens--;
+ }
+ }
+ }
+ } while (++i < n);
+
+ return(n_extern);
+}
+
+/******************************************************//**
+Determine the offset to each field in a leaf-page record
+in ROW_FORMAT=COMPACT. This is a special case of
+rec_init_offsets() and rec_get_offsets_func(). */
+UNIV_INTERN
+void
+rec_init_offsets_comp_ordinary(
+/*===========================*/
+ const rec_t* rec, /*!< in: physical record in
+ ROW_FORMAT=COMPACT */
+ ulint extra, /*!< in: number of bytes to reserve
+ between the record header and
+ the data payload
+ (usually REC_N_NEW_EXTRA_BYTES) */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets)/*!< in/out: array of offsets;
+ in: n=rec_offs_n_fields(offsets) */
+{
+ ulint i = 0;
+ ulint offs = 0;
+ ulint any_ext = 0;
+ const byte* nulls = rec - (extra + 1);
+ const byte* lens = nulls
+ - UT_BITS_IN_BYTES(index->n_nullable);
+ dict_field_t* field;
+ ulint null_mask = 1;
-/**********************************************************
+#ifdef UNIV_DEBUG
+ /* We cannot invoke rec_offs_make_valid() here, because it can hold
+ that extra != REC_N_NEW_EXTRA_BYTES. Similarly, rec_offs_validate()
+ will fail in that case, because it invokes rec_get_status(). */
+ offsets[2] = (ulint) rec;
+ offsets[3] = (ulint) index;
+#endif /* UNIV_DEBUG */
+
+ /* read the lengths of fields 0..n */
+ do {
+ ulint len;
+
+ field = dict_index_get_nth_field(index, i);
+ if (!(dict_field_get_col(field)->prtype
+ & DATA_NOT_NULL)) {
+ /* nullable field => read the null flag */
+
+ if (UNIV_UNLIKELY(!(byte) null_mask)) {
+ nulls--;
+ null_mask = 1;
+ }
+
+ if (*nulls & null_mask) {
+ null_mask <<= 1;
+ /* No length is stored for NULL fields.
+ We do not advance offs, and we set
+ the length to zero and enable the
+ SQL NULL flag in offsets[]. */
+ len = offs | REC_OFFS_SQL_NULL;
+ goto resolved;
+ }
+ null_mask <<= 1;
+ }
+
+ if (UNIV_UNLIKELY(!field->fixed_len)) {
+ /* Variable-length field: read the length */
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ len = *lens--;
+ if (UNIV_UNLIKELY(col->len > 255)
+ || UNIV_UNLIKELY(col->mtype
+ == DATA_BLOB)) {
+ if (len & 0x80) {
+ /* 1exxxxxxx xxxxxxxx */
+ len <<= 8;
+ len |= *lens--;
+
+ offs += len & 0x3fff;
+ if (UNIV_UNLIKELY(len
+ & 0x4000)) {
+ ut_ad(dict_index_is_clust
+ (index));
+ any_ext = REC_OFFS_EXTERNAL;
+ len = offs
+ | REC_OFFS_EXTERNAL;
+ } else {
+ len = offs;
+ }
+
+ goto resolved;
+ }
+ }
+
+ len = offs += len;
+ } else {
+ len = offs += field->fixed_len;
+ }
+resolved:
+ rec_offs_base(offsets)[i + 1] = len;
+ } while (++i < rec_offs_n_fields(offsets));
+
+ *rec_offs_base(offsets)
+ = (rec - (lens + 1)) | REC_OFFS_COMPACT | any_ext;
+}
+
+/******************************************************//**
The following function determines the offsets to each field in the
record. The offsets are written to a previously allocated array of
ulint, where rec_offs_n_fields(offsets) has been initialized to the
number of fields in the record. The rest of the array will be
initialized by this function. rec_offs_base(offsets)[0] will be set
to the extra size (if REC_OFFS_COMPACT is set, the record is in the
-new format), and rec_offs_base(offsets)[1..n_fields] will be set to
+new format; if REC_OFFS_EXTERNAL is set, the record contains externally
+stored columns), and rec_offs_base(offsets)[1..n_fields] will be set to
offsets past the end of fields 0..n_fields, or to the beginning of
fields 1..n_fields+1. When the high-order bit of the offset at [i+1]
is set (REC_OFFS_SQL_NULL), the field i is NULL. When the second
@@ -153,10 +348,10 @@ static
void
rec_init_offsets(
/*=============*/
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets)/* in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets)/*!< in/out: array of offsets;
+ in: n=rec_offs_n_fields(offsets) */
{
ulint i = 0;
ulint offs;
@@ -184,7 +379,10 @@ rec_init_offsets(
= dict_index_get_n_unique_in_tree(index);
break;
case REC_STATUS_ORDINARY:
- break;
+ rec_init_offsets_comp_ordinary(rec,
+ REC_N_NEW_EXTRA_BYTES,
+ index, offsets);
+ return;
}
nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
@@ -232,17 +430,17 @@ rec_init_offsets(
== DATA_BLOB)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
+
len <<= 8;
len |= *lens--;
+ /* B-tree node pointers
+ must not contain externally
+ stored columns. Thus
+ the "e" flag must be 0. */
+ ut_a(!(len & 0x4000));
offs += len & 0x3fff;
- if (UNIV_UNLIKELY(len
- & 0x4000)) {
- len = offs
- | REC_OFFS_EXTERNAL;
- } else {
- len = offs;
- }
+ len = offs;
goto resolved;
}
@@ -286,6 +484,7 @@ resolved:
if (offs & REC_2BYTE_EXTERN_MASK) {
offs &= ~REC_2BYTE_EXTERN_MASK;
offs |= REC_OFFS_EXTERNAL;
+ *rec_offs_base(offsets) |= REC_OFFS_EXTERNAL;
}
rec_offs_base(offsets)[1 + i] = offs;
} while (++i < rec_offs_n_fields(offsets));
@@ -293,24 +492,26 @@ resolved:
}
}
-/**********************************************************
+/******************************************************//**
The following function determines the offsets to each field
-in the record. It can reuse a previously returned array. */
-
+in the record. It can reuse a previously returned array.
+@return the new offsets */
+UNIV_INTERN
ulint*
rec_get_offsets_func(
/*=================*/
- /* out: the new offsets */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets,/* in/out: array consisting of offsets[0]
- allocated elements, or an array from
- rec_get_offsets(), or NULL */
- ulint n_fields,/* in: maximum number of initialized fields
- (ULINT_UNDEFINED if all fields) */
- mem_heap_t** heap, /* in/out: memory heap */
- const char* file, /* in: file name where called */
- ulint line) /* in: line number where called */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets,/*!< in/out: array consisting of
+ offsets[0] allocated elements,
+ or an array from rec_get_offsets(),
+ or NULL */
+ ulint n_fields,/*!< in: maximum number of
+ initialized fields
+ (ULINT_UNDEFINED if all fields) */
+ mem_heap_t** heap, /*!< in/out: memory heap */
+ const char* file, /*!< in: file name where called */
+ ulint line) /*!< in: line number where called */
{
ulint n;
ulint size;
@@ -349,9 +550,9 @@ rec_get_offsets_func(
if (UNIV_UNLIKELY(!offsets)
|| UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) {
- if (!*heap) {
+ if (UNIV_UNLIKELY(!*heap)) {
*heap = mem_heap_create_func(size * sizeof(ulint),
- NULL, MEM_HEAP_DYNAMIC,
+ MEM_HEAP_DYNAMIC,
file, line);
}
offsets = mem_heap_alloc(*heap, size * sizeof(ulint));
@@ -363,18 +564,133 @@ rec_get_offsets_func(
return(offsets);
}
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in an old-style record. */
-
-byte*
-rec_get_nth_field_old(
-/*==================*/
- /* out: pointer to the field */
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
+/******************************************************//**
+The following function determines the offsets to each field
+in the record. It can reuse a previously allocated array. */
+UNIV_INTERN
+void
+rec_get_offsets_reverse(
+/*====================*/
+ const byte* extra, /*!< in: the extra bytes of a
+ compact record in reverse order,
+ excluding the fixed-size
+ REC_N_NEW_EXTRA_BYTES */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint node_ptr,/*!< in: nonzero=node pointer,
+ 0=leaf node */
+ ulint* offsets)/*!< in/out: array consisting of
+ offsets[0] allocated elements */
+{
+ ulint n;
+ ulint i;
+ ulint offs;
+ ulint any_ext;
+ const byte* nulls;
+ const byte* lens;
+ dict_field_t* field;
+ ulint null_mask;
+ ulint n_node_ptr_field;
+
+ ut_ad(extra);
+ ut_ad(index);
+ ut_ad(offsets);
+ ut_ad(dict_table_is_comp(index->table));
+
+ if (UNIV_UNLIKELY(node_ptr)) {
+ n_node_ptr_field = dict_index_get_n_unique_in_tree(index);
+ n = n_node_ptr_field + 1;
+ } else {
+ n_node_ptr_field = ULINT_UNDEFINED;
+ n = dict_index_get_n_fields(index);
+ }
+
+ ut_a(rec_offs_get_n_alloc(offsets) >= n + (1 + REC_OFFS_HEADER_SIZE));
+ rec_offs_set_n_fields(offsets, n);
+
+ nulls = extra;
+ lens = nulls + UT_BITS_IN_BYTES(index->n_nullable);
+ i = offs = 0;
+ null_mask = 1;
+ any_ext = 0;
+
+ /* read the lengths of fields 0..n */
+ do {
+ ulint len;
+ if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
+ len = offs += 4;
+ goto resolved;
+ }
+
+ field = dict_index_get_nth_field(index, i);
+ if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
+ /* nullable field => read the null flag */
+
+ if (UNIV_UNLIKELY(!(byte) null_mask)) {
+ nulls++;
+ null_mask = 1;
+ }
+
+ if (*nulls & null_mask) {
+ null_mask <<= 1;
+ /* No length is stored for NULL fields.
+ We do not advance offs, and we set
+ the length to zero and enable the
+ SQL NULL flag in offsets[]. */
+ len = offs | REC_OFFS_SQL_NULL;
+ goto resolved;
+ }
+ null_mask <<= 1;
+ }
+
+ if (UNIV_UNLIKELY(!field->fixed_len)) {
+ /* Variable-length field: read the length */
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ len = *lens++;
+ if (UNIV_UNLIKELY(col->len > 255)
+ || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
+ if (len & 0x80) {
+ /* 1exxxxxxx xxxxxxxx */
+ len <<= 8;
+ len |= *lens++;
+
+ offs += len & 0x3fff;
+ if (UNIV_UNLIKELY(len & 0x4000)) {
+ any_ext = REC_OFFS_EXTERNAL;
+ len = offs | REC_OFFS_EXTERNAL;
+ } else {
+ len = offs;
+ }
+
+ goto resolved;
+ }
+ }
+
+ len = offs += len;
+ } else {
+ len = offs += field->fixed_len;
+ }
+resolved:
+ rec_offs_base(offsets)[i + 1] = len;
+ } while (++i < rec_offs_n_fields(offsets));
+
+ ut_ad(lens >= extra);
+ *rec_offs_base(offsets) = (lens - extra + REC_N_NEW_EXTRA_BYTES)
+ | REC_OFFS_COMPACT | any_ext;
+}
+
+/************************************************************//**
+The following function is used to get the offset to the nth
+data field in an old-style record.
+@return offset to the field */
+UNIV_INTERN
+ulint
+rec_get_nth_field_offs_old(
+/*=======================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n, /*!< in: index of the field */
+ ulint* len) /*!< out: length of the field;
+ UNIV_SQL_NULL if SQL null */
{
ulint os;
ulint next_os;
@@ -382,13 +698,13 @@ rec_get_nth_field_old(
ut_ad(rec && len);
ut_ad(n < rec_get_n_fields_old(rec));
- if (n > REC_MAX_N_FIELDS) {
+ if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
fprintf(stderr, "Error: trying to access field %lu in rec\n",
(ulong) n);
ut_error;
}
- if (rec == NULL) {
+ if (UNIV_UNLIKELY(rec == NULL)) {
fputs("Error: rec is NULL pointer\n", stderr);
ut_error;
}
@@ -401,7 +717,7 @@ rec_get_nth_field_old(
if (next_os & REC_1BYTE_SQL_NULL_MASK) {
*len = UNIV_SQL_NULL;
- return(rec + os);
+ return(os);
}
next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK;
@@ -413,7 +729,7 @@ rec_get_nth_field_old(
if (next_os & REC_2BYTE_SQL_NULL_MASK) {
*len = UNIV_SQL_NULL;
- return(rec + os);
+ return(os);
}
next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
@@ -424,62 +740,50 @@ rec_get_nth_field_old(
ut_ad(*len < UNIV_PAGE_SIZE);
- return(rec + os);
+ return(os);
}
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a new-style physical record. */
-
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return total size */
+UNIV_INTERN
ulint
-rec_get_converted_size_new(
-/*=======================*/
- /* out: size */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple) /* in: data tuple */
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+ const dict_index_t* index, /*!< in: record descriptor;
+ dict_table_is_comp() is
+ assumed to hold, even if
+ it does not */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra) /*!< out: extra size */
{
- ulint size = REC_N_NEW_EXTRA_BYTES
- + UT_BITS_IN_BYTES(index->n_nullable);
- ulint i;
- ulint n_fields;
- ut_ad(index && dtuple);
- ut_ad(dict_table_is_comp(index->table));
+ ulint extra_size;
+ ulint data_size;
+ ulint i;
+ ut_ad(index);
+ ut_ad(fields);
+ ut_ad(n_fields > 0);
+ ut_ad(n_fields <= dict_index_get_n_fields(index));
- switch (dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) {
- case REC_STATUS_ORDINARY:
- n_fields = dict_index_get_n_fields(index);
- ut_ad(n_fields == dtuple_get_n_fields(dtuple));
- break;
- case REC_STATUS_NODE_PTR:
- n_fields = dict_index_get_n_unique_in_tree(index);
- ut_ad(n_fields + 1 == dtuple_get_n_fields(dtuple));
- ut_ad(dtuple_get_nth_field(dtuple, n_fields)->len == 4);
- size += 4; /* child page number */
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record, 8 data bytes */
- return(REC_N_NEW_EXTRA_BYTES + 8);
- default:
- ut_error;
- return(ULINT_UNDEFINED);
- }
+ extra_size = REC_N_NEW_EXTRA_BYTES
+ + UT_BITS_IN_BYTES(index->n_nullable);
+ data_size = 0;
/* read the lengths of fields 0..n */
for (i = 0; i < n_fields; i++) {
- dict_field_t* field;
+ const dict_field_t* field;
ulint len;
const dict_col_t* col;
field = dict_index_get_nth_field(index, i);
- len = dtuple_get_nth_field(dtuple, i)->len;
+ len = dfield_get_len(&fields[i]);
col = dict_field_get_col(field);
- ut_ad(dict_col_type_assert_equal(
- col, dfield_get_type(dtuple_get_nth_field(
- dtuple, i))));
+ ut_ad(dict_col_type_assert_equal(col,
+ dfield_get_type(&fields[i])));
- if (len == UNIV_SQL_NULL) {
+ if (dfield_is_null(&fields[i])) {
/* No length is stored for NULL fields. */
ut_ad(!(col->prtype & DATA_NOT_NULL));
continue;
@@ -492,31 +796,85 @@ rec_get_converted_size_new(
/* dict_index_add_col() should guarantee this */
ut_ad(!field->prefix_len
|| field->fixed_len == field->prefix_len);
+ } else if (dfield_is_ext(&fields[i])) {
+ extra_size += 2;
} else if (len < 128
|| (col->len < 256 && col->mtype != DATA_BLOB)) {
- size++;
+ extra_size++;
} else {
/* For variable-length columns, we look up the
maximum length from the column itself. If this
is a prefix index column shorter than 256 bytes,
this will waste one byte. */
- size += 2;
+ extra_size += 2;
}
- size += len;
+ data_size += len;
}
- return(size);
+ if (UNIV_LIKELY_NULL(extra)) {
+ *extra = extra_size;
+ }
+
+ return(extra_size + data_size);
}
-/***************************************************************
-Sets the value of the ith field SQL null bit of an old-style record. */
+/**********************************************************//**
+Determines the size of a data tuple in ROW_FORMAT=COMPACT.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp(
+/*========================*/
+ const dict_index_t* index, /*!< in: record descriptor;
+ dict_table_is_comp() is
+ assumed to hold, even if
+ it does not */
+ ulint status, /*!< in: status bits of the record */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra) /*!< out: extra size */
+{
+ ulint size;
+ ut_ad(index);
+ ut_ad(fields);
+ ut_ad(n_fields > 0);
+ switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
+ case REC_STATUS_ORDINARY:
+ ut_ad(n_fields == dict_index_get_n_fields(index));
+ size = 0;
+ break;
+ case REC_STATUS_NODE_PTR:
+ n_fields--;
+ ut_ad(n_fields == dict_index_get_n_unique_in_tree(index));
+ ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE);
+ size = REC_NODE_PTR_SIZE; /* child page number */
+ break;
+ case REC_STATUS_INFIMUM:
+ case REC_STATUS_SUPREMUM:
+ /* infimum or supremum record, 8 data bytes */
+ if (UNIV_LIKELY_NULL(extra)) {
+ *extra = REC_N_NEW_EXTRA_BYTES;
+ }
+ return(REC_N_NEW_EXTRA_BYTES + 8);
+ default:
+ ut_error;
+ return(ULINT_UNDEFINED);
+ }
+
+ return(size + rec_get_converted_size_comp_prefix(index, fields,
+ n_fields, extra));
+}
+
+/***********************************************************//**
+Sets the value of the ith field SQL null bit of an old-style record. */
+UNIV_INTERN
void
rec_set_nth_field_null_bit(
/*=======================*/
- rec_t* rec, /* in: record */
- ulint i, /* in: ith field */
- ibool val) /* in: value to set */
+ rec_t* rec, /*!< in: record */
+ ulint i, /*!< in: ith field */
+ ibool val) /*!< in: value to set */
{
ulint info;
@@ -546,169 +904,15 @@ rec_set_nth_field_null_bit(
rec_2_set_field_end_info(rec, i, info);
}
-/***************************************************************
-Sets the value of the ith field extern storage bit of an old-style record. */
-
-void
-rec_set_nth_field_extern_bit_old(
-/*=============================*/
- rec_t* rec, /* in: old-style record */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr holding an X-latch to the page where
- rec is, or NULL; in the NULL case we do not
- write to log about the change */
-{
- ulint info;
-
- ut_a(!rec_get_1byte_offs_flag(rec));
- ut_a(i < rec_get_n_fields_old(rec));
-
- info = rec_2_get_field_end_info(rec, i);
-
- if (val) {
- info = info | REC_2BYTE_EXTERN_MASK;
- } else {
- info = info & ~REC_2BYTE_EXTERN_MASK;
- }
-
- if (mtr) {
- mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1),
- info, MLOG_2BYTES, mtr);
- } else {
- rec_2_set_field_end_info(rec, i, info);
- }
-}
-
-/***************************************************************
-Sets the value of the ith field extern storage bit of a new-style record. */
-
-void
-rec_set_nth_field_extern_bit_new(
-/*=============================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- ulint ith, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-{
- byte* nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- byte* lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- ulint i;
- ulint n_fields;
- ulint null_mask = 1;
- ut_ad(rec && index);
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
-
- n_fields = dict_index_get_n_fields(index);
-
- ut_ad(ith < n_fields);
-
- /* read the lengths of fields 0..n */
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- const dict_col_t* col;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* NULL fields cannot be external. */
- ut_ad(i != ith);
- continue;
- }
-
- null_mask <<= 1;
- }
- if (field->fixed_len) {
- /* fixed-length fields cannot be external
- (Fixed-length fields longer than
- DICT_MAX_INDEX_COL_LEN will be treated as
- variable-length ones in dict_index_add_col().) */
- ut_ad(i != ith);
- continue;
- }
- lens--;
- if (col->len > 255 || col->mtype == DATA_BLOB) {
- ulint len = lens[1];
- if (len & 0x80) { /* 1exxxxxx: 2-byte length */
- if (i == ith) {
- if (!val == !(len & 0x40)) {
- return; /* no change */
- }
- /* toggle the extern bit */
- len ^= 0x40;
- if (mtr) {
- mlog_write_ulint(lens + 1,
- len,
- MLOG_1BYTE,
- mtr);
- } else {
- lens[1] = (byte) len;
- }
- return;
- }
- lens--;
- } else {
- /* short fields cannot be external */
- ut_ad(i != ith);
- }
- } else {
- /* short fields cannot be external */
- ut_ad(i != ith);
- }
- }
-}
-
-/***************************************************************
-Sets TRUE the extern storage bits of fields mentioned in an array. */
-
-void
-rec_set_field_extern_bits(
-/*======================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- const ulint* vec, /* in: array of field numbers */
- ulint n_fields,/* in: number of fields numbers */
- mtr_t* mtr) /* in: mtr holding an X-latch to the
- page where rec is, or NULL;
- in the NULL case we do not write
- to log about the change */
-{
- ulint i;
-
- if (dict_table_is_comp(index->table)) {
- for (i = 0; i < n_fields; i++) {
- rec_set_nth_field_extern_bit_new(rec, index, vec[i],
- TRUE, mtr);
- }
- } else {
- for (i = 0; i < n_fields; i++) {
- rec_set_nth_field_extern_bit_old(rec, vec[i],
- TRUE, mtr);
- }
- }
-}
-
-/***************************************************************
+/***********************************************************//**
Sets an old-style record field to SQL null.
The physical size of the field is not changed. */
-
+UNIV_INTERN
void
rec_set_nth_field_sql_null(
/*=======================*/
- rec_t* rec, /* in: record */
- ulint n) /* in: index of the field */
+ rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: index of the field */
{
ulint offset;
@@ -719,25 +923,24 @@ rec_set_nth_field_sql_null(
rec_set_nth_field_null_bit(rec, n, TRUE);
}
-/*************************************************************
+/*********************************************************//**
Builds an old-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
+stores it beginning from the start of the given buffer.
+@return pointer to the origin of physical record */
static
rec_t*
rec_convert_dtuple_to_rec_old(
/*==========================*/
- /* out: pointer to the origin of
- physical record */
- byte* buf, /* in: start address of the physical record */
- dtuple_t* dtuple)/* in: data tuple */
+ byte* buf, /*!< in: start address of the physical record */
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ ulint n_ext) /*!< in: number of externally stored columns */
{
- dfield_t* field;
+ const dfield_t* field;
ulint n_fields;
ulint data_size;
rec_t* rec;
ulint end_offset;
ulint ored_offset;
- byte* data;
ulint len;
ulint i;
@@ -746,13 +949,13 @@ rec_convert_dtuple_to_rec_old(
ut_ad(dtuple_check_typed(dtuple));
n_fields = dtuple_get_n_fields(dtuple);
- data_size = dtuple_get_data_size(dtuple);
+ data_size = dtuple_get_data_size(dtuple, 0);
ut_ad(n_fields > 0);
/* Calculate the offset of the origin in the physical record */
- rec = buf + rec_get_converted_extra_size(data_size, n_fields);
+ rec = buf + rec_get_converted_extra_size(data_size, n_fields, n_ext);
#ifdef UNIV_DEBUG
/* Suppress Valgrind warnings of ut_ad()
in mach_write_to_1(), mach_write_to_2() et al. */
@@ -762,14 +965,14 @@ rec_convert_dtuple_to_rec_old(
rec_set_n_fields_old(rec, n_fields);
/* Set the info bits of the record */
- rec_set_info_bits(rec, FALSE,
- dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
+ rec_set_info_bits_old(rec, dtuple_get_info_bits(dtuple)
+ & REC_INFO_BITS_MASK);
/* Store the data and the offsets */
end_offset = 0;
- if (data_size <= REC_1BYTE_OFFS_LIMIT) {
+ if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
rec_set_1byte_offs_flag(rec, TRUE);
@@ -777,12 +980,9 @@ rec_convert_dtuple_to_rec_old(
field = dtuple_get_nth_field(dtuple, i);
- data = dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len == UNIV_SQL_NULL) {
+ if (dfield_is_null(field)) {
len = dtype_get_sql_null_size(
- dfield_get_type(field));
+ dfield_get_type(field), 0);
data_write_sql_null(rec + end_offset, len);
end_offset += len;
@@ -790,7 +990,10 @@ rec_convert_dtuple_to_rec_old(
| REC_1BYTE_SQL_NULL_MASK;
} else {
/* If the data is not SQL null, store it */
- ut_memcpy(rec + end_offset, data, len);
+ len = dfield_get_len(field);
+
+ memcpy(rec + end_offset,
+ dfield_get_data(field), len);
end_offset += len;
ored_offset = end_offset;
@@ -805,12 +1008,9 @@ rec_convert_dtuple_to_rec_old(
field = dtuple_get_nth_field(dtuple, i);
- data = dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len == UNIV_SQL_NULL) {
+ if (dfield_is_null(field)) {
len = dtype_get_sql_null_size(
- dfield_get_type(field));
+ dfield_get_type(field), 0);
data_write_sql_null(rec + end_offset, len);
end_offset += len;
@@ -818,10 +1018,17 @@ rec_convert_dtuple_to_rec_old(
| REC_2BYTE_SQL_NULL_MASK;
} else {
/* If the data is not SQL null, store it */
- ut_memcpy(rec + end_offset, data, len);
+ len = dfield_get_len(field);
+
+ memcpy(rec + end_offset,
+ dfield_get_data(field), len);
end_offset += len;
ored_offset = end_offset;
+
+ if (dfield_is_ext(field)) {
+ ored_offset |= REC_2BYTE_EXTERN_MASK;
+ }
}
rec_2_set_field_end_info(rec, i, ored_offset);
@@ -831,22 +1038,24 @@ rec_convert_dtuple_to_rec_old(
return(rec);
}
-/*************************************************************
-Builds a new-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
-static
-rec_t*
-rec_convert_dtuple_to_rec_new(
-/*==========================*/
- /* out: pointer to the origin
- of physical record */
- byte* buf, /* in: start address of the physical record */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple) /* in: data tuple */
+/*********************************************************//**
+Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_rec_comp(
+/*===========================*/
+ rec_t* rec, /*!< in: origin of record */
+ ulint extra, /*!< in: number of bytes to
+ reserve between the record
+ header and the data payload
+ (normally REC_N_NEW_EXTRA_BYTES) */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint status, /*!< in: status bits of the record */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields)/*!< in: number of data fields */
{
- dfield_t* field;
- dtype_t* type;
- rec_t* rec = buf + REC_N_NEW_EXTRA_BYTES;
+ const dfield_t* field;
+ const dtype_t* type;
byte* end;
byte* nulls;
byte* lens;
@@ -855,18 +1064,10 @@ rec_convert_dtuple_to_rec_new(
ulint n_node_ptr_field;
ulint fixed_len;
ulint null_mask = 1;
- const ulint n_fields = dtuple_get_n_fields(dtuple);
- const ulint status = dtuple_get_info_bits(dtuple)
- & REC_NEW_STATUS_MASK;
- ut_ad(dict_table_is_comp(index->table));
+ ut_ad(extra == 0 || dict_table_is_comp(index->table));
+ ut_ad(extra == 0 || extra == REC_N_NEW_EXTRA_BYTES);
ut_ad(n_fields > 0);
- /* Try to ensure that the memset() between the for() loops
- completes fast. The address is not exact, but UNIV_PREFETCH
- should never generate a memory fault. */
- UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields);
- UNIV_PREFETCH_RW(rec);
-
switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY:
ut_ad(n_fields <= dict_index_get_n_fields(index));
@@ -880,73 +1081,21 @@ rec_convert_dtuple_to_rec_new(
case REC_STATUS_SUPREMUM:
ut_ad(n_fields == 1);
n_node_ptr_field = ULINT_UNDEFINED;
- goto init;
+ break;
default:
- ut_a(0);
- return(0);
- }
-
- /* Calculate the offset of the origin in the physical record.
- We must loop over all fields to do this. */
- rec += UT_BITS_IN_BYTES(index->n_nullable);
-
- for (i = 0; i < n_fields; i++) {
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
-#ifdef UNIV_DEBUG
- field = dtuple_get_nth_field(dtuple, i);
- type = dfield_get_type(field);
- ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
- ut_ad(dfield_get_len(field) == 4);
-#endif /* UNIV_DEBUG */
- goto init;
- }
- field = dtuple_get_nth_field(dtuple, i);
- type = dfield_get_type(field);
- len = dfield_get_len(field);
- fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
-
- ut_ad(dict_col_type_assert_equal(
- dict_field_get_col(dict_index_get_nth_field(
- index, i)),
- dfield_get_type(field)));
-
- if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
- if (len == UNIV_SQL_NULL)
- continue;
- }
- /* only nullable fields can be null */
- ut_ad(len != UNIV_SQL_NULL);
- if (fixed_len) {
- ut_ad(len == fixed_len);
- } else {
- ut_ad(len <= dtype_get_len(type)
- || dtype_get_mtype(type) == DATA_BLOB);
- rec++;
- if (len >= 128
- && (dtype_get_len(type) >= 256
- || dtype_get_mtype(type) == DATA_BLOB)) {
- rec++;
- }
- }
+ ut_error;
+ return;
}
-init:
end = rec;
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
+ nulls = rec - (extra + 1);
lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
/* clear the SQL-null flags */
- memset (lens + 1, 0, nulls - lens);
-
- /* Set the info bits of the record */
- rec_set_status(rec, status);
-
- rec_set_info_bits(rec, TRUE,
- dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
+ memset(lens + 1, 0, nulls - lens);
/* Store the data and the offsets */
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(dtuple, i);
+ for (i = 0, field = fields; i < n_fields; i++, field++) {
type = dfield_get_type(field);
len = dfield_get_len(field);
@@ -954,9 +1103,9 @@ init:
ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
ut_ad(len == 4);
memcpy(end, dfield_get_data(field), len);
+ end += 4;
break;
}
- fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
/* nullable field */
@@ -970,7 +1119,7 @@ init:
ut_ad(*nulls < null_mask);
/* set the null flag if necessary */
- if (len == UNIV_SQL_NULL) {
+ if (dfield_is_null(field)) {
*nulls |= null_mask;
null_mask <<= 1;
continue;
@@ -979,9 +1128,18 @@ init:
null_mask <<= 1;
}
/* only nullable fields can be null */
- ut_ad(len != UNIV_SQL_NULL);
+ ut_ad(!dfield_is_null(field));
+
+ fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
+
if (fixed_len) {
ut_ad(len == fixed_len);
+ ut_ad(!dfield_is_ext(field));
+ } else if (dfield_is_ext(field)) {
+ ut_ad(len <= REC_MAX_INDEX_COL_LEN
+ + BTR_EXTERN_FIELD_REF_SIZE);
+ *lens-- = (byte) (len >> 8) | 0xc0;
+ *lens-- = (byte) len;
} else {
ut_ad(len <= dtype_get_len(type)
|| dtype_get_mtype(type) == DATA_BLOB);
@@ -991,7 +1149,6 @@ init:
*lens-- = (byte) len;
} else {
- /* the extern bits will be set later */
ut_ad(len < 16384);
*lens-- = (byte) (len >> 8) | 0x80;
*lens-- = (byte) len;
@@ -1001,23 +1158,55 @@ init:
memcpy(end, dfield_get_data(field), len);
end += len;
}
+}
+
+/*********************************************************//**
+Builds a new-style physical record out of a data tuple and
+stores it beginning from the start of the given buffer.
+@return pointer to the origin of physical record */
+static
+rec_t*
+rec_convert_dtuple_to_rec_new(
+/*==========================*/
+ byte* buf, /*!< in: start address of
+ the physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* dtuple) /*!< in: data tuple */
+{
+ ulint extra_size;
+ ulint status;
+ rec_t* rec;
+
+ status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
+ rec_get_converted_size_comp(index, status,
+ dtuple->fields, dtuple->n_fields,
+ &extra_size);
+ rec = buf + extra_size;
+
+ rec_convert_dtuple_to_rec_comp(
+ rec, REC_N_NEW_EXTRA_BYTES, index, status,
+ dtuple->fields, dtuple->n_fields);
+
+ /* Set the info bits of the record */
+ rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
return(rec);
}
-/*************************************************************
+/*********************************************************//**
Builds a physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
-
+stores it beginning from the start of the given buffer.
+@return pointer to the origin of physical record */
+UNIV_INTERN
rec_t*
rec_convert_dtuple_to_rec(
/*======================*/
- /* out: pointer to the origin
- of physical record */
- byte* buf, /* in: start address of the
+ byte* buf, /*!< in: start address of the
physical record */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple) /* in: data tuple */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dtuple_t* dtuple, /*!< in: data tuple */
+ ulint n_ext) /*!< in: number of
+ externally stored columns */
{
rec_t* rec;
@@ -1028,7 +1217,7 @@ rec_convert_dtuple_to_rec(
if (dict_table_is_comp(index->table)) {
rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple);
} else {
- rec = rec_convert_dtuple_to_rec_old(buf, dtuple);
+ rec = rec_convert_dtuple_to_rec_old(buf, dtuple, n_ext);
}
#ifdef UNIV_DEBUG
@@ -1036,7 +1225,7 @@ rec_convert_dtuple_to_rec(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
offsets = rec_get_offsets(rec, index,
offsets_, ULINT_UNDEFINED, &heap);
@@ -1049,27 +1238,24 @@ rec_convert_dtuple_to_rec(
return(rec);
}
-/******************************************************************
+/**************************************************************//**
Copies the first n fields of a physical record to a data tuple. The fields
are copied to the memory heap. */
-
+UNIV_INTERN
void
rec_copy_prefix_to_dtuple(
/*======================*/
- dtuple_t* tuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint n_fields, /* in: number of fields to copy */
- mem_heap_t* heap) /* in: memory heap */
+ dtuple_t* tuple, /*!< out: data tuple */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint n_fields, /*!< in: number of fields
+ to copy */
+ mem_heap_t* heap) /*!< in: memory heap */
{
- dfield_t* field;
- byte* data;
- ulint len;
- byte* buf = NULL;
- ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ ulint i;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
offsets = rec_get_offsets(rec, index, offsets, n_fields, &heap);
@@ -1080,34 +1266,37 @@ rec_copy_prefix_to_dtuple(
rec, dict_table_is_comp(index->table)));
for (i = 0; i < n_fields; i++) {
+ dfield_t* field;
+ const byte* data;
+ ulint len;
field = dtuple_get_nth_field(tuple, i);
data = rec_get_nth_field(rec, offsets, i, &len);
if (len != UNIV_SQL_NULL) {
- buf = mem_heap_alloc(heap, len);
-
- ut_memcpy(buf, data, len);
+ dfield_set_data(field,
+ mem_heap_dup(heap, data, len), len);
+ ut_ad(!rec_offs_nth_extern(offsets, i));
+ } else {
+ dfield_set_null(field);
}
-
- dfield_set_data(field, buf, len);
}
}
-/******************************************************************
+/**************************************************************//**
Copies the first n fields of an old-style physical record
-to a new physical record in a buffer. */
+to a new physical record in a buffer.
+@return own: copied record */
static
rec_t*
rec_copy_prefix_to_buf_old(
/*=======================*/
- /* out, own: copied record */
- rec_t* rec, /* in: physical record */
- ulint n_fields, /* in: number of fields to copy */
- ulint area_end, /* in: end of the prefix data */
- byte** buf, /* in/out: memory buffer for the copied prefix,
- or NULL */
- ulint* buf_size) /* in/out: buffer size */
+ const rec_t* rec, /*!< in: physical record */
+ ulint n_fields, /*!< in: number of fields to copy */
+ ulint area_end, /*!< in: end of the prefix data */
+ byte** buf, /*!< in/out: memory buffer for
+ the copied prefix, or NULL */
+ ulint* buf_size) /*!< in/out: buffer size */
{
rec_t* copy_rec;
ulint area_start;
@@ -1126,8 +1315,7 @@ rec_copy_prefix_to_buf_old(
mem_free(*buf);
}
- *buf = mem_alloc(prefix_len);
- *buf_size = prefix_len;
+ *buf = mem_alloc2(prefix_len, buf_size);
}
ut_memcpy(*buf, rec - area_start, prefix_len);
@@ -1139,23 +1327,25 @@ rec_copy_prefix_to_buf_old(
return(copy_rec);
}
-/******************************************************************
+/**************************************************************//**
Copies the first n fields of a physical record to a new physical record in
-a buffer. */
-
+a buffer.
+@return own: copied record */
+UNIV_INTERN
rec_t*
rec_copy_prefix_to_buf(
/*===================*/
- /* out, own: copied record */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint n_fields, /* in: number of fields to copy */
- byte** buf, /* in/out: memory buffer
- for the copied prefix, or NULL */
- ulint* buf_size) /* in/out: buffer size */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint n_fields, /*!< in: number of fields
+ to copy */
+ byte** buf, /*!< in/out: memory buffer
+ for the copied prefix,
+ or NULL */
+ ulint* buf_size) /*!< in/out: buffer size */
{
- byte* nulls;
- byte* lens;
+ const byte* nulls;
+ const byte* lens;
ulint i;
ulint prefix_len;
ulint null_mask;
@@ -1244,8 +1434,7 @@ rec_copy_prefix_to_buf(
mem_free(*buf);
}
- *buf = mem_alloc(prefix_len);
- *buf_size = prefix_len;
+ *buf = mem_alloc2(prefix_len, buf_size);
}
memcpy(*buf, lens + 1, prefix_len);
@@ -1253,21 +1442,21 @@ rec_copy_prefix_to_buf(
return(*buf + (rec - (lens + 1)));
}
-/*******************************************************************
-Validates the consistency of an old-style physical record. */
+/***************************************************************//**
+Validates the consistency of an old-style physical record.
+@return TRUE if ok */
static
ibool
rec_validate_old(
/*=============*/
- /* out: TRUE if ok */
- rec_t* rec) /* in: physical record */
+ const rec_t* rec) /*!< in: physical record */
{
- byte* data;
- ulint len;
- ulint n_fields;
- ulint len_sum = 0;
- ulint sum = 0;
- ulint i;
+ const byte* data;
+ ulint len;
+ ulint n_fields;
+ ulint len_sum = 0;
+ ulint sum = 0;
+ ulint i;
ut_a(rec);
n_fields = rec_get_n_fields_old(rec);
@@ -1313,15 +1502,15 @@ rec_validate_old(
return(TRUE);
}
-/*******************************************************************
-Validates the consistency of a physical record. */
-
+/***************************************************************//**
+Validates the consistency of a physical record.
+@return TRUE if ok */
+UNIV_INTERN
ibool
rec_validate(
/*=========*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
const byte* data;
ulint len;
@@ -1363,11 +1552,11 @@ rec_validate(
}
}
- if (len_sum != (ulint)(rec_get_end(rec, offsets) - rec)) {
+ if (len_sum != rec_offs_data_size(offsets)) {
fprintf(stderr,
"InnoDB: Error: record len should be %lu, len %lu\n",
(ulong) len_sum,
- (ulong) (rec_get_end(rec, offsets) - rec));
+ (ulong) rec_offs_data_size(offsets));
return(FALSE);
}
@@ -1380,14 +1569,14 @@ rec_validate(
return(TRUE);
}
-/*******************************************************************
+/***************************************************************//**
Prints an old-style physical record. */
-
+UNIV_INTERN
void
rec_print_old(
/*==========*/
- FILE* file, /* in: file where to print */
- rec_t* rec) /* in: physical record */
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec) /*!< in: physical record */
{
const byte* data;
ulint len;
@@ -1417,49 +1606,38 @@ rec_print_old(
} else {
ut_print_buf(file, data, 30);
- fputs("...(truncated)", file);
+ fprintf(file, " (total %lu bytes)",
+ (ulong) len);
}
} else {
fprintf(file, " SQL NULL, size %lu ",
rec_get_nth_field_size(rec, i));
}
+
putc(';', file);
+ putc('\n', file);
}
- putc('\n', file);
-
rec_validate_old(rec);
}
-/*******************************************************************
-Prints a physical record. */
-
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
+record header. */
+UNIV_INTERN
void
-rec_print_new(
-/*==========*/
- FILE* file, /* in: file where to print */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
+rec_print_comp(
+/*===========*/
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
- const byte* data;
- ulint len;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (!rec_offs_comp(offsets)) {
- rec_print_old(file, rec);
- return;
- }
-
- ut_ad(rec);
-
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " compact format; info bits %lu\n",
- (ulong) rec_offs_n_fields(offsets),
- (ulong) rec_get_info_bits(rec, TRUE));
+ ulint i;
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ const byte* data;
+ ulint len;
data = rec_get_nth_field(rec, offsets, i, &len);
@@ -1472,28 +1650,54 @@ rec_print_new(
} else {
ut_print_buf(file, data, 30);
- fputs("...(truncated)", file);
+ fprintf(file, " (total %lu bytes)",
+ (ulong) len);
}
} else {
fputs(" SQL NULL", file);
}
putc(';', file);
+ putc('\n', file);
}
+}
- putc('\n', file);
+/***************************************************************//**
+Prints a physical record. */
+UNIV_INTERN
+void
+rec_print_new(
+/*==========*/
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec, /*!< in: physical record */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+{
+ ut_ad(rec);
+ ut_ad(offsets);
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+ if (!rec_offs_comp(offsets)) {
+ rec_print_old(file, rec);
+ return;
+ }
+ fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
+ " compact format; info bits %lu\n",
+ (ulong) rec_offs_n_fields(offsets),
+ (ulong) rec_get_info_bits(rec, TRUE));
+
+ rec_print_comp(file, rec, offsets);
rec_validate(rec, offsets);
}
-/*******************************************************************
+/***************************************************************//**
Prints a physical record. */
-
+UNIV_INTERN
void
rec_print(
/*======*/
- FILE* file, /* in: file where to print */
- rec_t* rec, /* in: physical record */
- dict_index_t* index) /* in: record descriptor */
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec, /*!< in: physical record */
+ dict_index_t* index) /*!< in: record descriptor */
{
ut_ad(index);
@@ -1503,7 +1707,7 @@ rec_print(
} else {
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
rec_print_new(file, rec,
rec_get_offsets(rec, index, offsets_,
@@ -1513,3 +1717,4 @@ rec_print(
}
}
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/revert_gen.sh b/storage/innobase/revert_gen.sh
index 231e05a21e0..231e05a21e0 100755
--- a/storage/innodb_plugin/revert_gen.sh
+++ b/storage/innobase/revert_gen.sh
diff --git a/storage/innodb_plugin/row/row0ext.c b/storage/innobase/row/row0ext.c
index 7320f5b1dca..7320f5b1dca 100644
--- a/storage/innodb_plugin/row/row0ext.c
+++ b/storage/innobase/row/row0ext.c
diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
index ad14b927170..fe51fce82c4 100644
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
@@ -1,7 +1,24 @@
-/******************************************************
-Insert into a table
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0ins.c
+Insert into a table
Created 4/20/1996 Heikki Tuuri
*******************************************************/
@@ -12,6 +29,7 @@ Created 4/20/1996 Heikki Tuuri
#include "row0ins.ic"
#endif
+#include "ha_prototypes.h"
#include "dict0dict.h"
#include "dict0boot.h"
#include "trx0undo.h"
@@ -34,33 +52,16 @@ Created 4/20/1996 Heikki Tuuri
#define ROW_INS_NEXT 2
-/*********************************************************************
-This prototype is copied from /mysql/sql/ha_innodb.cc.
-Invalidates the MySQL query cache for the table.
-NOTE that the exact prototype of this function has to be in
-/innobase/row/row0ins.c! */
-extern
-void
-innobase_invalidate_query_cache(
-/*============================*/
- trx_t* trx, /* in: transaction which modifies the table */
- char* full_name, /* in: concatenation of database name, null
- char '\0', table name, null char'\0';
- NOTE that in Windows this is always
- in LOWER CASE! */
- ulint full_name_len); /* in: full name length where also the null
- chars count */
-
-/*************************************************************************
-Creates an insert node struct. */
-
+/*********************************************************************//**
+Creates an insert node struct.
+@return own: insert node struct */
+UNIV_INTERN
ins_node_t*
ins_node_create(
/*============*/
- /* out, own: insert node struct */
- ulint ins_type, /* in: INS_VALUES, ... */
- dict_table_t* table, /* in: table where to insert */
- mem_heap_t* heap) /* in: mem heap where created */
+ ulint ins_type, /*!< in: INS_VALUES, ... */
+ dict_table_t* table, /*!< in: table where to insert */
+ mem_heap_t* heap) /*!< in: mem heap where created */
{
ins_node_t* node;
@@ -86,13 +87,13 @@ ins_node_create(
return(node);
}
-/***************************************************************
+/***********************************************************//**
Creates an entry template for each index of a table. */
-static
+UNIV_INTERN
void
ins_node_create_entry_list(
/*=======================*/
- ins_node_t* node) /* in: row insert node */
+ ins_node_t* node) /*!< in: row insert node */
{
dict_index_t* index;
dtuple_t* entry;
@@ -104,7 +105,7 @@ ins_node_create_entry_list(
index = dict_table_get_first_index(node->table);
while (index != NULL) {
- entry = row_build_index_entry(node->row, index,
+ entry = row_build_index_entry(node->row, NULL, index,
node->entry_sys_heap);
UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
@@ -112,13 +113,13 @@ ins_node_create_entry_list(
}
}
-/*********************************************************************
+/*****************************************************************//**
Adds system field buffers to a row. */
static
void
row_ins_alloc_sys_fields(
/*=====================*/
- ins_node_t* node) /* in: insert node */
+ ins_node_t* node) /*!< in: insert node */
{
dtuple_t* row;
dict_table_t* table;
@@ -140,7 +141,7 @@ row_ins_alloc_sys_fields(
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = mem_heap_alloc(heap, DATA_ROW_ID_LEN);
+ ptr = mem_heap_zalloc(heap, DATA_ROW_ID_LEN);
dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
@@ -151,7 +152,7 @@ row_ins_alloc_sys_fields(
col = dict_table_get_sys_col(table, DATA_TRX_ID);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
+ ptr = mem_heap_zalloc(heap, DATA_TRX_ID_LEN);
dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
@@ -162,21 +163,21 @@ row_ins_alloc_sys_fields(
col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
+ ptr = mem_heap_zalloc(heap, DATA_ROLL_PTR_LEN);
dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
}
-/*************************************************************************
+/*********************************************************************//**
Sets a new row to insert for an INS_DIRECT node. This function is only used
if we have constructed the row separately, which is a rare case; this
function is quite slow. */
-
+UNIV_INTERN
void
ins_node_set_new_row(
/*=================*/
- ins_node_t* node, /* in: insert node */
- dtuple_t* row) /* in: new row (or first row) for the node */
+ ins_node_t* node, /*!< in: insert node */
+ dtuple_t* row) /*!< in: new row (or first row) for the node */
{
node->state = INS_NODE_SET_IX_LOCK;
node->index = NULL;
@@ -200,22 +201,23 @@ ins_node_set_new_row(
node->trx_id = ut_dulint_zero;
}
-/***********************************************************************
+/*******************************************************************//**
Does an insert operation by updating a delete-marked existing record
in the index. This situation can occur if the delete-marked record is
-kept in the index for consistent reads. */
+kept in the index for consistent reads.
+@return DB_SUCCESS or error code */
static
ulint
row_ins_sec_index_entry_by_modify(
/*==============================*/
- /* out: DB_SUCCESS or error code */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether mtr holds just a leaf
latch or also a tree latch */
- btr_cur_t* cursor, /* in: B-tree cursor */
- dtuple_t* entry, /* in: index entry to insert */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ btr_cur_t* cursor, /*!< in: B-tree cursor */
+ const dtuple_t* entry, /*!< in: index entry to insert */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr; must be committed before
+ latching any further pages */
{
big_rec_t* dummy_big_rec;
mem_heap_t* heap;
@@ -225,7 +227,7 @@ row_ins_sec_index_entry_by_modify(
rec = btr_cur_get_rec(cursor);
- ut_ad((cursor->index->type & DICT_CLUSTERED) == 0);
+ ut_ad(!dict_index_is_clust(cursor->index));
ut_ad(rec_get_deleted_flag(rec,
dict_table_is_comp(cursor->index->table)));
@@ -244,7 +246,10 @@ row_ins_sec_index_entry_by_modify(
err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
update, 0, thr, mtr);
- if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+ switch (err) {
+ case DB_OVERFLOW:
+ case DB_UNDERFLOW:
+ case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
@@ -257,8 +262,9 @@ row_ins_sec_index_entry_by_modify(
}
err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
- &dummy_big_rec, update,
+ &heap, &dummy_big_rec, update,
0, thr, mtr);
+ ut_ad(!dummy_big_rec);
}
func_exit:
mem_heap_free(heap);
@@ -266,35 +272,33 @@ func_exit:
return(err);
}
-/***********************************************************************
+/*******************************************************************//**
Does an insert operation by delete unmarking and updating a delete marked
existing record in the index. This situation can occur if the delete marked
-record is kept in the index for consistent reads. */
+record is kept in the index for consistent reads.
+@return DB_SUCCESS, DB_FAIL, or error code */
static
ulint
row_ins_clust_index_entry_by_modify(
/*================================*/
- /* out: DB_SUCCESS, DB_FAIL, or error code */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether mtr holds just a leaf
latch or also a tree latch */
- btr_cur_t* cursor, /* in: B-tree cursor */
- big_rec_t** big_rec,/* out: possible big rec vector of fields
+ btr_cur_t* cursor, /*!< in: B-tree cursor */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ big_rec_t** big_rec,/*!< out: possible big rec vector of fields
which have to be stored externally by the
caller */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ const dtuple_t* entry, /*!< in: index entry to insert */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr; must be committed before
+ latching any further pages */
{
- mem_heap_t* heap;
rec_t* rec;
upd_t* update;
ulint err;
- ut_ad(cursor->index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(cursor->index));
*big_rec = NULL;
@@ -303,51 +307,53 @@ row_ins_clust_index_entry_by_modify(
ut_ad(rec_get_deleted_flag(rec,
dict_table_is_comp(cursor->index->table)));
- heap = mem_heap_create(1024);
+ if (!*heap) {
+ *heap = mem_heap_create(1024);
+ }
/* Build an update vector containing all the fields to be modified;
NOTE that this vector may NOT contain system columns trx_id or
roll_ptr */
- update = row_upd_build_difference_binary(cursor->index, entry, ext_vec,
- n_ext_vec, rec,
- thr_get_trx(thr), heap);
+ update = row_upd_build_difference_binary(cursor->index, entry, rec,
+ thr_get_trx(thr), *heap);
if (mode == BTR_MODIFY_LEAF) {
/* Try optimistic updating of the record, keeping changes
within the page */
err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
mtr);
- if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+ switch (err) {
+ case DB_OVERFLOW:
+ case DB_UNDERFLOW:
+ case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
ut_a(mode == BTR_MODIFY_TREE);
if (buf_LRU_buf_pool_running_out()) {
- err = DB_LOCK_TABLE_FULL;
+ return(DB_LOCK_TABLE_FULL);
- goto func_exit;
}
- err = btr_cur_pessimistic_update(0, cursor, big_rec, update,
+ err = btr_cur_pessimistic_update(0, cursor,
+ heap, big_rec, update,
0, thr, mtr);
}
-func_exit:
- mem_heap_free(heap);
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Returns TRUE if in a cascaded update/delete an ancestor node of node
-updates (not DELETE, but UPDATE) table. */
+updates (not DELETE, but UPDATE) table.
+@return TRUE if an ancestor updates table */
static
ibool
row_ins_cascade_ancestor_updates_table(
/*===================================*/
- /* out: TRUE if an ancestor updates table */
- que_node_t* node, /* in: node in a query graph */
- dict_table_t* table) /* in: table */
+ que_node_t* node, /*!< in: node in a query graph */
+ dict_table_t* table) /*!< in: table */
{
que_node_t* parent;
upd_node_t* upd_node;
@@ -371,15 +377,15 @@ row_ins_cascade_ancestor_updates_table(
return(FALSE);
}
-/*************************************************************************
+/*********************************************************************//**
Returns the number of ancestor UPDATE or DELETE nodes of a
-cascaded update/delete node. */
+cascaded update/delete node.
+@return number of ancestors */
static
ulint
row_ins_cascade_n_ancestors(
/*========================*/
- /* out: number of ancestors */
- que_node_t* node) /* in: node in a query graph */
+ que_node_t* node) /*!< in: node in a query graph */
{
que_node_t* parent;
ulint n_ancestors = 0;
@@ -397,26 +403,22 @@ row_ins_cascade_n_ancestors(
return(n_ancestors);
}
-/**********************************************************************
+/******************************************************************//**
Calculates the update vector node->cascade->update for a child table in
-a cascaded update. */
+a cascaded update.
+@return number of fields in the calculated update vector; the value
+can also be 0 if no foreign key fields changed; the returned value is
+ULINT_UNDEFINED if the column type in the child table is too short to
+fit the new value in the parent table: that means the update fails */
static
ulint
row_ins_cascade_calc_update_vec(
/*============================*/
- /* out: number of fields in the
- calculated update vector; the value
- can also be 0 if no foreign key
- fields changed; the returned value
- is ULINT_UNDEFINED if the column
- type in the child table is too short
- to fit the new value in the parent
- table: that means the update fails */
- upd_node_t* node, /* in: update node of the parent
+ upd_node_t* node, /*!< in: update node of the parent
table */
- dict_foreign_t* foreign, /* in: foreign key constraint whose
+ dict_foreign_t* foreign, /*!< in: foreign key constraint whose
type is != 0 */
- mem_heap_t* heap) /* in: memory heap to use as
+ mem_heap_t* heap) /*!< in: memory heap to use as
temporary storage */
{
upd_node_t* cascade = node->cascade_node;
@@ -469,6 +471,7 @@ row_ins_cascade_calc_update_vec(
ulint min_size;
const dict_col_t* col;
+ ulint ufield_len;
col = dict_index_get_nth_col(index, i);
@@ -484,11 +487,15 @@ row_ins_cascade_calc_update_vec(
ufield->exp = NULL;
ufield->new_val = parent_ufield->new_val;
+ ufield_len = dfield_get_len(&ufield->new_val);
+
+ /* Clear the "external storage" flag */
+ dfield_set_len(&ufield->new_val, ufield_len);
/* Do not allow a NOT NULL column to be
updated as NULL */
- if (ufield->new_val.len == UNIV_SQL_NULL
+ if (dfield_is_null(&ufield->new_val)
&& (col->prtype & DATA_NOT_NULL)) {
return(ULINT_UNDEFINED);
@@ -497,14 +504,14 @@ row_ins_cascade_calc_update_vec(
/* If the new value would not fit in the
column, do not allow the update */
- if (ufield->new_val.len != UNIV_SQL_NULL
+ if (!dfield_is_null(&ufield->new_val)
&& dtype_get_at_most_n_mbchars(
col->prtype,
col->mbminlen, col->mbmaxlen,
col->len,
- ufield->new_val.len,
- ufield->new_val.data)
- < ufield->new_val.len) {
+ ufield_len,
+ dfield_get_data(&ufield->new_val))
+ < ufield_len) {
return(ULINT_UNDEFINED);
}
@@ -516,28 +523,31 @@ row_ins_cascade_calc_update_vec(
min_size = dict_col_get_min_size(col);
- if (min_size
- && ufield->new_val.len != UNIV_SQL_NULL
- && ufield->new_val.len < min_size) {
+ /* Because UNIV_SQL_NULL (the marker
+ of SQL NULL values) exceeds all possible
+ values of min_size, the test below will
+ not hold for SQL NULL columns. */
+
+ if (min_size > ufield_len) {
char* pad_start;
const char* pad_end;
- ufield->new_val.data = mem_heap_alloc(
- heap, min_size);
- pad_start = ((char*) ufield
- ->new_val.data)
- + ufield->new_val.len;
- pad_end = ((char*) ufield
- ->new_val.data)
- + min_size;
- ufield->new_val.len = min_size;
- ut_memcpy(ufield->new_val.data,
- parent_ufield->new_val.data,
- parent_ufield->new_val.len);
+ char* padded_data
+ = mem_heap_alloc(
+ heap, min_size);
+ pad_start = padded_data + ufield_len;
+ pad_end = padded_data + min_size;
+
+ memcpy(padded_data,
+ dfield_get_data(&ufield
+ ->new_val),
+ dfield_get_len(&ufield
+ ->new_val));
switch (UNIV_EXPECT(col->mbminlen,1)) {
default:
ut_error;
+ return(ULINT_UNDEFINED);
case 1:
if (UNIV_UNLIKELY
(dtype_get_charset_coll(
@@ -554,8 +564,7 @@ row_ins_cascade_calc_update_vec(
break;
case 2:
/* space=0x0020 */
- ut_a(!(ufield->new_val.len
- % 2));
+ ut_a(!(ufield_len % 2));
ut_a(!(min_size % 2));
do {
*pad_start++ = 0x00;
@@ -563,9 +572,10 @@ row_ins_cascade_calc_update_vec(
} while (pad_start < pad_end);
break;
}
- }
- ufield->extern_storage = FALSE;
+ dfield_set_data(&ufield->new_val,
+ padded_data, min_size);
+ }
n_fields_updated++;
}
@@ -577,15 +587,15 @@ row_ins_cascade_calc_update_vec(
return(n_fields_updated);
}
-/*************************************************************************
+/*********************************************************************//**
Set detailed error message associated with foreign key errors for
the given transaction. */
static
void
row_ins_set_detailed(
/*=================*/
- trx_t* trx, /* in: transaction */
- dict_foreign_t* foreign) /* in: foreign key constraint */
+ trx_t* trx, /*!< in: transaction */
+ dict_foreign_t* foreign) /*!< in: foreign key constraint */
{
mutex_enter(&srv_misc_tmpfile_mutex);
rewind(srv_misc_tmpfile);
@@ -603,21 +613,21 @@ row_ins_set_detailed(
mutex_exit(&srv_misc_tmpfile_mutex);
}
-/*************************************************************************
+/*********************************************************************//**
Reports a foreign key error associated with an update or a delete of a
parent table index entry. */
static
void
row_ins_foreign_report_err(
/*=======================*/
- const char* errstr, /* in: error string from the viewpoint
+ const char* errstr, /*!< in: error string from the viewpoint
of the parent table */
- que_thr_t* thr, /* in: query thread whose run_node
+ que_thr_t* thr, /*!< in: query thread whose run_node
is an update node */
- dict_foreign_t* foreign, /* in: foreign key constraint */
- rec_t* rec, /* in: a matching index record in the
+ dict_foreign_t* foreign, /*!< in: foreign key constraint */
+ const rec_t* rec, /*!< in: a matching index record in the
child table */
- dtuple_t* entry) /* in: index entry in the parent
+ const dtuple_t* entry) /*!< in: index entry in the parent
table */
{
FILE* ef = dict_foreign_err_file;
@@ -659,7 +669,7 @@ row_ins_foreign_report_err(
mutex_exit(&dict_foreign_err_mutex);
}
-/*************************************************************************
+/*********************************************************************//**
Reports a foreign key error to dict_foreign_err_file when we are trying
to add an index entry to a child table. Note that the adding may be the result
of an update, too. */
@@ -667,12 +677,12 @@ static
void
row_ins_foreign_report_add_err(
/*===========================*/
- trx_t* trx, /* in: transaction */
- dict_foreign_t* foreign, /* in: foreign key constraint */
- rec_t* rec, /* in: a record in the parent table:
+ trx_t* trx, /*!< in: transaction */
+ dict_foreign_t* foreign, /*!< in: foreign key constraint */
+ const rec_t* rec, /*!< in: a record in the parent table:
it does not match entry because we
have an error! */
- dtuple_t* entry) /* in: index entry to insert in the
+ const dtuple_t* entry) /*!< in: index entry to insert in the
child table */
{
FILE* ef = dict_foreign_err_file;
@@ -693,6 +703,8 @@ row_ins_foreign_report_add_err(
ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
if (entry) {
fputs(" tuple:\n", ef);
+ /* TODO: DB_TRX_ID and DB_ROLL_PTR may be uninitialized.
+ It would be better to only display the user columns. */
dtuple_print(ef, entry);
}
fputs("\nBut in parent table ", ef);
@@ -704,7 +716,7 @@ row_ins_foreign_report_add_err(
/* If the cursor ended on a supremum record, it is better
to report the previous record in the error message, so that
the user gets a more descriptive error message. */
- rec = page_rec_get_prev(rec);
+ rec = page_rec_get_prev_const(rec);
}
if (rec) {
@@ -715,15 +727,15 @@ row_ins_foreign_report_add_err(
mutex_exit(&dict_foreign_err_mutex);
}
-/*************************************************************************
+/*********************************************************************//**
Invalidate the query cache for the given table. */
static
void
row_ins_invalidate_query_cache(
/*===========================*/
- que_thr_t* thr, /* in: query thread whose run_node
+ que_thr_t* thr, /*!< in: query thread whose run_node
is an update node */
- const char* name) /* in: table name prefixed with
+ const char* name) /*!< in: table name prefixed with
database name and a '/' character */
{
char* buf;
@@ -736,32 +748,28 @@ row_ins_invalidate_query_cache(
ut_a(ptr);
*ptr = '\0';
- /* We call a function in ha_innodb.cc */
-#ifndef UNIV_HOTBACKUP
innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
-#endif
mem_free(buf);
}
-/*************************************************************************
+/*********************************************************************//**
Perform referential actions or checks when a parent row is deleted or updated
and the constraint had an ON DELETE or ON UPDATE condition which was not
-RESTRICT. */
+RESTRICT.
+@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
static
ulint
row_ins_foreign_check_on_constraint(
/*================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- or error code */
- que_thr_t* thr, /* in: query thread whose run_node
+ que_thr_t* thr, /*!< in: query thread whose run_node
is an update node */
- dict_foreign_t* foreign, /* in: foreign key constraint whose
+ dict_foreign_t* foreign, /*!< in: foreign key constraint whose
type is != 0 */
- btr_pcur_t* pcur, /* in: cursor placed on a matching
+ btr_pcur_t* pcur, /*!< in: cursor placed on a matching
index record in the child table */
- dtuple_t* entry, /* in: index entry in the parent
+ dtuple_t* entry, /*!< in: index entry in the parent
table */
- mtr_t* mtr) /* in: mtr holding the latch of pcur
+ mtr_t* mtr) /*!< in: mtr holding the latch of pcur
page */
{
upd_node_t* node;
@@ -771,8 +779,9 @@ row_ins_foreign_check_on_constraint(
dict_index_t* clust_index;
dtuple_t* ref;
mem_heap_t* upd_vec_heap = NULL;
- rec_t* rec;
- rec_t* clust_rec;
+ const rec_t* rec;
+ const rec_t* clust_rec;
+ const buf_block_t* clust_block;
upd_t* update;
ulint n_to_update;
ulint err;
@@ -899,12 +908,13 @@ row_ins_foreign_check_on_constraint(
rec = btr_pcur_get_rec(pcur);
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
/* pcur is already positioned in the clustered index of
the child table */
clust_index = index;
clust_rec = rec;
+ clust_block = btr_pcur_get_block(pcur);
} else {
/* We have to look for the record in the clustered index
in the child table */
@@ -920,6 +930,7 @@ row_ins_foreign_check_on_constraint(
cascade->pcur, 0, mtr);
clust_rec = btr_pcur_get_rec(cascade->pcur);
+ clust_block = btr_pcur_get_block(cascade->pcur);
if (!page_rec_is_user_rec(clust_rec)
|| btr_pcur_get_low_match(cascade->pcur)
@@ -955,8 +966,8 @@ row_ins_foreign_check_on_constraint(
gap if the search criterion was not unique */
err = lock_clust_rec_read_check_and_lock_alt(
- 0, clust_rec, clust_index, LOCK_X, LOCK_REC_NOT_GAP,
- thr);
+ 0, clust_block, clust_rec, clust_index,
+ LOCK_X, LOCK_REC_NOT_GAP, thr);
}
if (err != DB_SUCCESS) {
@@ -987,14 +998,14 @@ row_ins_foreign_check_on_constraint(
update->n_fields = foreign->n_fields;
for (i = 0; i < foreign->n_fields; i++) {
- (update->fields + i)->field_no
- = dict_table_get_nth_col_pos(
- table,
- dict_index_get_nth_col_no(index, i));
- (update->fields + i)->exp = NULL;
- (update->fields + i)->new_val.len = UNIV_SQL_NULL;
- (update->fields + i)->new_val.data = NULL;
- (update->fields + i)->extern_storage = FALSE;
+ upd_field_t* ufield = &update->fields[i];
+
+ ufield->field_no = dict_table_get_nth_col_pos(
+ table,
+ dict_index_get_nth_col_no(index, i));
+ ufield->orig_len = 0;
+ ufield->exp = NULL;
+ dfield_set_null(&ufield->new_val);
}
}
@@ -1107,95 +1118,92 @@ nonstandard_exit_func:
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Sets a shared lock on a record. Used in locking possible duplicate key
-records and also in checking foreign key constraints. */
+records and also in checking foreign key constraints.
+@return DB_SUCCESS or error code */
static
ulint
row_ins_set_shared_rec_lock(
/*========================*/
- /* out: DB_SUCCESS or error code */
- ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP type lock */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /* in: query thread */
+ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP type lock */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
ut_ad(rec_offs_validate(rec, index, offsets));
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
err = lock_clust_rec_read_check_and_lock(
- 0, rec, index, offsets, LOCK_S, type, thr);
+ 0, block, rec, index, offsets, LOCK_S, type, thr);
} else {
err = lock_sec_rec_read_check_and_lock(
- 0, rec, index, offsets, LOCK_S, type, thr);
+ 0, block, rec, index, offsets, LOCK_S, type, thr);
}
return(err);
}
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
Sets a exclusive lock on a record. Used in locking possible duplicate key
-records */
+records
+@return DB_SUCCESS or error code */
static
ulint
row_ins_set_exclusive_rec_lock(
/*===========================*/
- /* out: DB_SUCCESS or error code */
- ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP type lock */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /* in: query thread */
+ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP type lock */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
ut_ad(rec_offs_validate(rec, index, offsets));
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
err = lock_clust_rec_read_check_and_lock(
- 0, rec, index, offsets, LOCK_X, type, thr);
+ 0, block, rec, index, offsets, LOCK_X, type, thr);
} else {
err = lock_sec_rec_read_check_and_lock(
- 0, rec, index, offsets, LOCK_X, type, thr);
+ 0, block, rec, index, offsets, LOCK_X, type, thr);
}
return(err);
}
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
+/***************************************************************//**
Checks if foreign key constraint fails for an index entry. Sets shared locks
which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_operation_lock. */
-
+the caller must have a shared latch on dict_operation_lock.
+@return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
+UNIV_INTERN
ulint
row_ins_check_foreign_constraint(
/*=============================*/
- /* out: DB_SUCCESS,
- DB_NO_REFERENCED_ROW,
- or DB_ROW_IS_REFERENCED */
- ibool check_ref,/* in: TRUE if we want to check that
+ ibool check_ref,/*!< in: TRUE if we want to check that
the referenced table is ok, FALSE if we
- want to to check the foreign key table */
- dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the
+ want to check the foreign key table */
+ dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the
tables mentioned in it must be in the
dictionary cache if they exist at all */
- dict_table_t* table, /* in: if check_ref is TRUE, then the foreign
+ dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign
table, else the referenced table */
- dtuple_t* entry, /* in: index entry for index */
- que_thr_t* thr) /* in: query thread */
+ dtuple_t* entry, /*!< in: index entry for index */
+ que_thr_t* thr) /*!< in: query thread */
{
upd_node_t* upd_node;
dict_table_t* check_table;
dict_index_t* check_index;
ulint n_fields_cmp;
- rec_t* rec;
btr_pcur_t pcur;
ibool moved;
int cmp;
@@ -1206,7 +1214,7 @@ row_ins_check_foreign_constraint(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
run_again:
#ifdef UNIV_SYNC_DEBUG
@@ -1329,7 +1337,8 @@ run_again:
/* Scan index records and check if there is a matching record */
for (;;) {
- rec = btr_pcur_get_rec(&pcur);
+ const rec_t* rec = btr_pcur_get_rec(&pcur);
+ const buf_block_t* block = btr_pcur_get_block(&pcur);
if (page_rec_is_infimum(rec)) {
@@ -1341,8 +1350,9 @@ run_again:
if (page_rec_is_supremum(rec)) {
- err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, rec, check_index, offsets, thr);
+ err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block,
+ rec, check_index,
+ offsets, thr);
if (err != DB_SUCCESS) {
break;
@@ -1357,8 +1367,8 @@ run_again:
if (rec_get_deleted_flag(rec,
rec_offs_comp(offsets))) {
err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, rec, check_index,
- offsets, thr);
+ LOCK_ORDINARY, block,
+ rec, check_index, offsets, thr);
if (err != DB_SUCCESS) {
break;
@@ -1369,8 +1379,8 @@ run_again:
into gaps */
err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP, rec, check_index,
- offsets, thr);
+ LOCK_REC_NOT_GAP, block,
+ rec, check_index, offsets, thr);
if (err != DB_SUCCESS) {
@@ -1408,6 +1418,11 @@ run_again:
break;
}
+
+ /* row_ins_foreign_check_on_constraint
+ may have repositioned pcur on a
+ different block */
+ block = btr_pcur_get_block(&pcur);
} else {
row_ins_foreign_report_err(
"Trying to delete or update",
@@ -1421,7 +1436,8 @@ run_again:
if (cmp < 0) {
err = row_ins_set_shared_rec_lock(
- LOCK_GAP, rec, check_index, offsets, thr);
+ LOCK_GAP, block,
+ rec, check_index, offsets, thr);
if (err != DB_SUCCESS) {
break;
@@ -1486,21 +1502,21 @@ exit_func:
return(err);
}
-/*******************************************************************
+/***************************************************************//**
Checks if foreign key constraints fail for an index entry. If index
is not mentioned in any constraint, this function does nothing,
Otherwise does searches to the indexes of referenced tables and
sets shared locks which lock either the success or the failure of
-a constraint. */
+a constraint.
+@return DB_SUCCESS or error code */
static
ulint
row_ins_check_foreign_constraints(
/*==============================*/
- /* out: DB_SUCCESS or error code */
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry for index */
- que_thr_t* thr) /* in: query thread */
+ dict_table_t* table, /*!< in: table */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in: index entry for index */
+ que_thr_t* thr) /*!< in: query thread */
{
dict_foreign_t* foreign;
ulint err;
@@ -1568,21 +1584,20 @@ row_ins_check_foreign_constraints(
return(DB_SUCCESS);
}
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************
+/***************************************************************//**
Checks if a unique key violation to rec would occur at the index entry
-insert. */
+insert.
+@return TRUE if error */
static
ibool
row_ins_dupl_error_with_rec(
/*========================*/
- /* out: TRUE if error */
- rec_t* rec, /* in: user record; NOTE that we assume
+ const rec_t* rec, /*!< in: user record; NOTE that we assume
that the caller already has a record lock on
the record! */
- dtuple_t* entry, /* in: entry to insert */
- dict_index_t* index, /* in: index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
+ const dtuple_t* entry, /*!< in: entry to insert */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint matched_fields;
ulint matched_bytes;
@@ -1607,7 +1622,7 @@ row_ins_dupl_error_with_rec(
/* In a unique secondary index we allow equal key values if they
contain SQL NULLs */
- if (!(index->type & DICT_CLUSTERED)) {
+ if (!dict_index_is_clust(index)) {
for (i = 0; i < n_unique; i++) {
if (UNIV_SQL_NULL == dfield_get_len(
@@ -1620,37 +1635,32 @@ row_ins_dupl_error_with_rec(
return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
}
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
+/***************************************************************//**
Scans a unique non-clustered index at a given index entry to determine
whether a uniqueness violation has occurred for the key value of the entry.
-Set shared locks on possible duplicate records. */
+Set shared locks on possible duplicate records.
+@return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
static
ulint
row_ins_scan_sec_index_for_duplicate(
/*=================================*/
- /* out: DB_SUCCESS, DB_DUPLICATE_KEY, or
- DB_LOCK_WAIT */
- dict_index_t* index, /* in: non-clustered unique index */
- dtuple_t* entry, /* in: index entry */
- que_thr_t* thr) /* in: query thread */
+ dict_index_t* index, /*!< in: non-clustered unique index */
+ dtuple_t* entry, /*!< in: index entry */
+ que_thr_t* thr) /*!< in: query thread */
{
-#ifndef UNIV_HOTBACKUP
ulint n_unique;
ulint i;
int cmp;
ulint n_fields_cmp;
- rec_t* rec;
btr_pcur_t pcur;
ulint err = DB_SUCCESS;
- ibool moved;
unsigned allow_duplicates;
mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
n_unique = dict_index_get_n_unique(index);
@@ -1680,12 +1690,13 @@ row_ins_scan_sec_index_for_duplicate(
/* Scan index records and check if there is a duplicate */
- for (;;) {
- rec = btr_pcur_get_rec(&pcur);
+ do {
+ const rec_t* rec = btr_pcur_get_rec(&pcur);
+ const buf_block_t* block = btr_pcur_get_block(&pcur);
if (page_rec_is_infimum(rec)) {
- goto next_rec;
+ continue;
}
offsets = rec_get_offsets(rec, index, offsets,
@@ -1699,11 +1710,13 @@ row_ins_scan_sec_index_for_duplicate(
INSERT ON DUPLICATE KEY UPDATE). */
err = row_ins_set_exclusive_rec_lock(
- LOCK_ORDINARY, rec, index, offsets, thr);
+ LOCK_ORDINARY, block,
+ rec, index, offsets, thr);
} else {
err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, rec, index, offsets, thr);
+ LOCK_ORDINARY, block,
+ rec, index, offsets, thr);
}
if (err != DB_SUCCESS) {
@@ -1713,7 +1726,7 @@ row_ins_scan_sec_index_for_duplicate(
if (page_rec_is_supremum(rec)) {
- goto next_rec;
+ continue;
}
cmp = cmp_dtuple_rec(entry, rec, offsets);
@@ -1734,13 +1747,7 @@ row_ins_scan_sec_index_for_duplicate(
}
ut_a(cmp == 0);
-next_rec:
- moved = btr_pcur_move_to_next(&pcur, &mtr);
-
- if (!moved) {
- break;
- }
- }
+ } while (btr_pcur_move_to_next(&pcur, &mtr));
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -1751,33 +1758,24 @@ next_rec:
dtuple_set_n_fields_cmp(entry, n_fields_cmp);
return(err);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
}
-/*******************************************************************
+/***************************************************************//**
Checks if a unique key violation error would occur at an index entry
insert. Sets shared locks on possible duplicate records. Works only
-for a clustered index! */
+for a clustered index!
+@return DB_SUCCESS if no error, DB_DUPLICATE_KEY if error,
+DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate
+record */
static
ulint
row_ins_duplicate_error_in_clust(
/*=============================*/
- /* out: DB_SUCCESS if no error,
- DB_DUPLICATE_KEY if error, DB_LOCK_WAIT if we
- have to wait for a lock on a possible
- duplicate record */
- btr_cur_t* cursor, /* in: B-tree cursor */
- dtuple_t* entry, /* in: entry to insert */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ btr_cur_t* cursor, /*!< in: B-tree cursor */
+ dtuple_t* entry, /*!< in: entry to insert */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr */
{
-#ifndef UNIV_HOTBACKUP
ulint err;
rec_t* rec;
ulint n_unique;
@@ -1785,12 +1783,12 @@ row_ins_duplicate_error_in_clust(
mem_heap_t*heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
UT_NOT_USED(mtr);
- ut_a(cursor->index->type & DICT_CLUSTERED);
- ut_ad(cursor->index->type & DICT_UNIQUE);
+ ut_a(dict_index_is_clust(cursor->index));
+ ut_ad(dict_index_is_unique(cursor->index));
/* NOTE: For unique non-clustered indexes there may be any number
of delete marked records with the same value for the non-clustered
@@ -1828,12 +1826,14 @@ row_ins_duplicate_error_in_clust(
INSERT ON DUPLICATE KEY UPDATE). */
err = row_ins_set_exclusive_rec_lock(
- LOCK_REC_NOT_GAP, rec,
- cursor->index, offsets, thr);
+ LOCK_REC_NOT_GAP,
+ btr_cur_get_block(cursor),
+ rec, cursor->index, offsets, thr);
} else {
err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP, rec,
+ LOCK_REC_NOT_GAP,
+ btr_cur_get_block(cursor), rec,
cursor->index, offsets, thr);
}
@@ -1866,13 +1866,15 @@ row_ins_duplicate_error_in_clust(
INSERT ON DUPLICATE KEY UPDATE). */
err = row_ins_set_exclusive_rec_lock(
- LOCK_REC_NOT_GAP, rec,
- cursor->index, offsets, thr);
+ LOCK_REC_NOT_GAP,
+ btr_cur_get_block(cursor),
+ rec, cursor->index, offsets, thr);
} else {
err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP, rec,
- cursor->index, offsets, thr);
+ LOCK_REC_NOT_GAP,
+ btr_cur_get_block(cursor),
+ rec, cursor->index, offsets, thr);
}
if (err != DB_SUCCESS) {
@@ -1887,7 +1889,7 @@ row_ins_duplicate_error_in_clust(
}
}
- ut_a(!(cursor->index->type & DICT_CLUSTERED));
+ ut_a(!dict_index_is_clust(cursor->index));
/* This should never happen */
}
@@ -1897,31 +1899,22 @@ func_exit:
mem_heap_free(heap);
}
return(err);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
}
-/*******************************************************************
+/***************************************************************//**
Checks if an index entry has long enough common prefix with an existing
record so that the intended insert of the entry must be changed to a modify of
the existing record. In the case of a clustered index, the prefix must be
n_unique fields long, and in the case of a secondary index, all fields must be
-equal. */
+equal.
+@return 0 if no update, ROW_INS_PREV if previous should be updated;
+currently we do the search so that only the low_match record can match
+enough to the search tuple, not the next record */
UNIV_INLINE
ulint
row_ins_must_modify(
/*================*/
- /* out: 0 if no update, ROW_INS_PREV if
- previous should be updated; currently we
- do the search so that only the low_match
- record can match enough to the search tuple,
- not the next record */
- btr_cur_t* cursor) /* in: B-tree cursor */
+ btr_cur_t* cursor) /*!< in: B-tree cursor */
{
ulint enough_match;
rec_t* rec;
@@ -1948,7 +1941,7 @@ row_ins_must_modify(
return(0);
}
-/*******************************************************************
+/***************************************************************//**
Tries to insert an index entry to an index. If the index is clustered
and a record with the same unique key is found, the other record is
necessarily marked deleted by a committed transaction, or a unique key
@@ -1956,22 +1949,20 @@ violation error occurs. The delete marked record is then updated to an
existing record, and we must write an undo log record on the delete
marked record. If the index is secondary, and a record with exactly the
same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index. */
-
+It is then unmarked. Otherwise, the entry is just inserted to the index.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL if pessimistic retry needed,
+or error code */
+static
ulint
row_ins_index_entry_low(
/*====================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
- if pessimistic retry needed, or error code */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether we wish optimistic or
pessimistic descent down the index tree */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr) /* in: query thread */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in: index entry to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ que_thr_t* thr) /*!< in: query thread */
{
btr_cur_t cursor;
ulint ignore_sec_unique = 0;
@@ -1983,9 +1974,6 @@ row_ins_index_entry_low(
big_rec_t* big_rec = NULL;
mtr_t mtr;
mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
log_free_check();
@@ -2020,19 +2008,18 @@ row_ins_index_entry_low(
rec_t* first_rec = page_rec_get_next(
page_get_infimum_rec(page));
- if (UNIV_LIKELY(first_rec != page_get_supremum_rec(page))) {
- ut_a(rec_get_n_fields(first_rec, index)
- == dtuple_get_n_fields(entry));
- }
+ ut_ad(page_rec_is_supremum(first_rec)
+ || rec_get_n_fields(first_rec, index)
+ == dtuple_get_n_fields(entry));
}
#endif
n_unique = dict_index_get_n_unique(index);
- if (index->type & DICT_UNIQUE && (cursor.up_match >= n_unique
- || cursor.low_match >= n_unique)) {
+ if (dict_index_is_unique(index) && (cursor.up_match >= n_unique
+ || cursor.low_match >= n_unique)) {
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
/* Note that the following may return also
DB_LOCK_WAIT */
@@ -2076,23 +2063,24 @@ row_ins_index_entry_low(
if (modify == ROW_INS_NEXT) {
rec = page_rec_get_next(btr_cur_get_rec(&cursor));
- btr_cur_position(index, rec, &cursor);
+ btr_cur_position(index, rec,
+ btr_cur_get_block(&cursor),&cursor);
}
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
err = row_ins_clust_index_entry_by_modify(
- mode, &cursor, &big_rec, entry,
- ext_vec, n_ext_vec, thr, &mtr);
+ mode, &cursor, &heap, &big_rec, entry,
+ thr, &mtr);
} else {
+ ut_ad(!n_ext);
err = row_ins_sec_index_entry_by_modify(
mode, &cursor, entry, thr, &mtr);
}
-
} else {
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_insert(
0, &cursor, entry, &insert_rec, &big_rec,
- thr, &mtr);
+ n_ext, thr, &mtr);
} else {
ut_a(mode == BTR_MODIFY_TREE);
if (buf_LRU_buf_pool_running_out()) {
@@ -2103,33 +2091,27 @@ row_ins_index_entry_low(
}
err = btr_cur_pessimistic_insert(
0, &cursor, entry, &insert_rec, &big_rec,
- thr, &mtr);
- }
-
- if (err == DB_SUCCESS) {
- if (ext_vec) {
- rec_set_field_extern_bits(insert_rec, index,
- ext_vec, n_ext_vec,
- &mtr);
- }
+ n_ext, thr, &mtr);
}
}
function_exit:
mtr_commit(&mtr);
- if (big_rec) {
- rec_t* rec;
+ if (UNIV_LIKELY_NULL(big_rec)) {
+ rec_t* rec;
+ ulint* offsets;
mtr_start(&mtr);
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
BTR_MODIFY_TREE, &cursor, 0, &mtr);
rec = btr_cur_get_rec(&cursor);
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, index, NULL,
ULINT_UNDEFINED, &heap);
- err = btr_store_big_rec_extern_fields(index, rec,
- offsets, big_rec, &mtr);
+ err = btr_store_big_rec_extern_fields(
+ index, btr_cur_get_block(&cursor),
+ rec, offsets, big_rec, &mtr);
if (modify) {
dtuple_big_rec_free(big_rec);
@@ -2146,27 +2128,25 @@ function_exit:
return(err);
}
-/*******************************************************************
+/***************************************************************//**
Inserts an index entry to index. Tries first optimistic, then pessimistic
descent down the tree. If the entry matches enough to a delete marked record,
performs the insert by updating or delete unmarking the delete marked
-record. */
-
+record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
ulint
row_ins_index_entry(
/*================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DUPLICATE_KEY, or some other error code */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr) /* in: query thread */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in: index entry to insert */
+ ulint n_ext, /*!< in: number of externally stored columns */
+ ibool foreign,/*!< in: TRUE=check foreign key constraints */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
- if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
+ if (foreign && UT_LIST_GET_FIRST(index->table->foreign_list)) {
err = row_ins_check_foreign_constraints(index->table, index,
entry, thr);
if (err != DB_SUCCESS) {
@@ -2178,7 +2158,7 @@ row_ins_index_entry(
/* Try first optimistic descent to the B-tree */
err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
- ext_vec, n_ext_vec, thr);
+ n_ext, thr);
if (err != DB_FAIL) {
return(err);
@@ -2187,36 +2167,38 @@ row_ins_index_entry(
/* Try then pessimistic descent to the B-tree */
err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
- ext_vec, n_ext_vec, thr);
+ n_ext, thr);
return(err);
}
-/***************************************************************
+/***********************************************************//**
Sets the values of the dtuple fields in entry from the values of appropriate
columns in row. */
static
void
row_ins_index_entry_set_vals(
/*=========================*/
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to make */
- dtuple_t* row) /* in: row */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in: index entry to make */
+ const dtuple_t* row) /*!< in: row */
{
- dict_field_t* ind_field;
- dfield_t* field;
- dfield_t* row_field;
- ulint n_fields;
- ulint i;
+ ulint n_fields;
+ ulint i;
ut_ad(entry && row);
n_fields = dtuple_get_n_fields(entry);
for (i = 0; i < n_fields; i++) {
+ dict_field_t* ind_field;
+ dfield_t* field;
+ const dfield_t* row_field;
+ ulint len;
+
field = dtuple_get_nth_field(entry, i);
ind_field = dict_index_get_nth_field(index, i);
-
row_field = dtuple_get_nth_field(row, ind_field->col->ind);
+ len = dfield_get_len(row_field);
/* Check column prefix indexes */
if (ind_field->prefix_len > 0
@@ -2225,28 +2207,32 @@ row_ins_index_entry_set_vals(
const dict_col_t* col
= dict_field_get_col(ind_field);
- field->len = dtype_get_at_most_n_mbchars(
+ len = dtype_get_at_most_n_mbchars(
col->prtype, col->mbminlen, col->mbmaxlen,
ind_field->prefix_len,
- row_field->len, row_field->data);
- } else {
- field->len = row_field->len;
+ len, dfield_get_data(row_field));
+
+ ut_ad(!dfield_is_ext(row_field));
}
- field->data = row_field->data;
+ dfield_set_data(field, dfield_get_data(row_field), len);
+ if (dfield_is_ext(row_field)) {
+ ut_ad(dict_index_is_clust(index));
+ dfield_set_ext(field);
+ }
}
}
-/***************************************************************
-Inserts a single index entry to the table. */
+/***********************************************************//**
+Inserts a single index entry to the table.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
static
ulint
row_ins_index_entry_step(
/*=====================*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- ins_node_t* node, /* in: row insert node */
- que_thr_t* thr) /* in: query thread */
+ ins_node_t* node, /*!< in: row insert node */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
@@ -2256,24 +2242,24 @@ row_ins_index_entry_step(
ut_ad(dtuple_check_typed(node->entry));
- err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr);
+ err = row_ins_index_entry(node->index, node->entry, 0, TRUE, thr);
return(err);
}
-/***************************************************************
+/***********************************************************//**
Allocates a row id for row and inits the node->index field. */
UNIV_INLINE
void
row_ins_alloc_row_id_step(
/*======================*/
- ins_node_t* node) /* in: row insert node */
+ ins_node_t* node) /*!< in: row insert node */
{
dulint row_id;
ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
- if (dict_table_get_first_index(node->table)->type & DICT_UNIQUE) {
+ if (dict_index_is_unique(dict_table_get_first_index(node->table))) {
/* No row id is stored if the clustered index is unique */
@@ -2287,13 +2273,13 @@ row_ins_alloc_row_id_step(
dict_sys_write_row_id(node->row_id_buf, row_id);
}
-/***************************************************************
+/***********************************************************//**
Gets a row to insert from the values list. */
UNIV_INLINE
void
row_ins_get_row_from_values(
/*========================*/
- ins_node_t* node) /* in: row insert node */
+ ins_node_t* node) /*!< in: row insert node */
{
que_node_t* list_node;
dfield_t* dfield;
@@ -2320,13 +2306,13 @@ row_ins_get_row_from_values(
}
}
-/***************************************************************
+/***********************************************************//**
Gets a row to insert from the select list. */
UNIV_INLINE
void
row_ins_get_row_from_select(
/*========================*/
- ins_node_t* node) /* in: row insert node */
+ ins_node_t* node) /*!< in: row insert node */
{
que_node_t* list_node;
dfield_t* dfield;
@@ -2351,16 +2337,16 @@ row_ins_get_row_from_select(
}
}
-/***************************************************************
-Inserts a row to a table. */
-
+/***********************************************************//**
+Inserts a row to a table.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
+static
ulint
row_ins(
/*====*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- ins_node_t* node, /* in: row insert node */
- que_thr_t* thr) /* in: query thread */
+ ins_node_t* node, /*!< in: row insert node */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
@@ -2406,15 +2392,15 @@ row_ins(
return(DB_SUCCESS);
}
-/***************************************************************
+/***********************************************************//**
Inserts a row to a table. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_ins_step(
/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
ins_node_t* node;
que_node_t* parent;
diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innobase/row/row0merge.c
index 25f041c0885..25f041c0885 100644
--- a/storage/innodb_plugin/row/row0merge.c
+++ b/storage/innobase/row/row0merge.c
diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
index 4fcb1fbf9f2..181c39de881 100644
--- a/storage/innobase/row/row0mysql.c
+++ b/storage/innobase/row/row0mysql.c
@@ -1,9 +1,26 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0mysql.c
Interface between Innobase row operations and MySQL.
Contains also create table and other data dictionary operations.
-(c) 2000 Innobase Oy
-
Created 9/17/2000 Heikki Tuuri
*******************************************************/
@@ -14,6 +31,7 @@ Created 9/17/2000 Heikki Tuuri
#endif
#include "row0ins.h"
+#include "row0merge.h"
#include "row0sel.h"
#include "row0upd.h"
#include "row0row.h"
@@ -25,6 +43,8 @@ Created 9/17/2000 Heikki Tuuri
#include "dict0boot.h"
#include "trx0roll.h"
#include "trx0purge.h"
+#include "trx0rec.h"
+#include "trx0undo.h"
#include "lock0lock.h"
#include "rem0cmp.h"
#include "log0log.h"
@@ -32,45 +52,54 @@ Created 9/17/2000 Heikki Tuuri
#include "fil0fil.h"
#include "ibuf0ibuf.h"
-/* A dummy variable used to fool the compiler */
-ibool row_mysql_identically_false = FALSE;
+/** Provide optional 4.x backwards compatibility for 5.0 and above */
+UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
-/* Provide optional 4.x backwards compatibility for 5.0 and above */
-ibool row_rollback_on_timeout = FALSE;
-
-/* List of tables we should drop in background. ALTER TABLE in MySQL requires
-that the table handler can drop the table in background when there are no
-queries to it any more. Protected by the kernel mutex. */
+/** Chain node of the list of tables to drop in the background. */
typedef struct row_mysql_drop_struct row_mysql_drop_t;
+
+/** Chain node of the list of tables to drop in the background. */
struct row_mysql_drop_struct{
- char* table_name;
- UT_LIST_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
+ char* table_name; /*!< table name */
+ UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
+ /*!< list chain node */
};
-UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
-ibool row_mysql_drop_list_inited = FALSE;
+/** @brief List of tables we should drop in background.
-/* Magic table names for invoking various monitor threads */
+ALTER TABLE in MySQL requires that the table handler can drop the
+table in background when there are no queries to it any
+more. Protected by kernel_mutex. */
+static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
+/** Flag: has row_mysql_drop_list been initialized? */
+static ibool row_mysql_drop_list_inited = FALSE;
+
+/** Magic table names for invoking various monitor threads */
+/* @{ */
static const char S_innodb_monitor[] = "innodb_monitor";
static const char S_innodb_lock_monitor[] = "innodb_lock_monitor";
static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor";
static const char S_innodb_table_monitor[] = "innodb_table_monitor";
static const char S_innodb_mem_validate[] = "innodb_mem_validate";
-
-/* Evaluates to true if str1 equals str2_onstack, used for comparing
-the above strings. */
+/* @} */
+
+/** Evaluates to true if str1 equals str2_onstack, used for comparing
+the magic table names.
+@param str1 in: string to compare
+@param str1_len in: length of str1, in bytes, including terminating NUL
+@param str2_onstack in: char[] array containing a NUL terminated string
+@return TRUE if str1 equals str2_onstack */
#define STR_EQ(str1, str1_len, str2_onstack) \
((str1_len) == sizeof(str2_onstack) \
&& memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0)
-/***********************************************************************
-Determine if the given name is a name reserved for MySQL system tables. */
+/*******************************************************************//**
+Determine if the given name is a name reserved for MySQL system tables.
+@return TRUE if name is a MySQL system table name */
static
ibool
row_mysql_is_system_table(
/*======================*/
- /* out: TRUE if name is a MySQL
- system table name */
const char* name)
{
if (strncmp(name, "mysql/", 6) != 0) {
@@ -83,7 +112,20 @@ row_mysql_is_system_table(
|| 0 == strcmp(name + 6, "db"));
}
-/***********************************************************************
+/*********************************************************************//**
+If a table is not yet in the drop list, adds the table to the list of tables
+which the master thread drops in background. We need this on Unix because in
+ALTER TABLE MySQL may call drop table even if the table has running queries on
+it. Also, if there are running foreign key checks on the table, we drop the
+table lazily.
+@return TRUE if the table was not yet in the drop list, and was added there */
+static
+ibool
+row_add_table_to_background_drop_list(
+/*==================================*/
+ const char* name); /*!< in: table name */
+
+/*******************************************************************//**
Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
static
void
@@ -95,31 +137,31 @@ row_mysql_delay_if_needed(void)
}
}
-/***********************************************************************
+/*******************************************************************//**
Frees the blob heap in prebuilt when no longer needed. */
-
+UNIV_INTERN
void
row_mysql_prebuilt_free_blob_heap(
/*==============================*/
- row_prebuilt_t* prebuilt) /* in: prebuilt struct of a
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a
ha_innobase:: table handle */
{
mem_heap_free(prebuilt->blob_heap);
prebuilt->blob_heap = NULL;
}
-/***********************************************************************
+/*******************************************************************//**
Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format. */
-
+format.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
byte*
row_mysql_store_true_var_len(
/*=========================*/
- /* out: pointer to the data, we skip the 1 or 2 bytes
- at the start that are used to store the len */
- byte* dest, /* in: where to store */
- ulint len, /* in: length, must fit in two bytes */
- ulint lenlen) /* in: storage length of len: either 1 or 2 bytes */
+ byte* dest, /*!< in: where to store */
+ ulint len, /*!< in: length, must fit in two bytes */
+ ulint lenlen) /*!< in: storage length of len: either 1 or 2 bytes */
{
if (lenlen == 2) {
ut_a(len < 256 * 256);
@@ -137,18 +179,19 @@ row_mysql_store_true_var_len(
return(dest + 1);
}
-/***********************************************************************
+/*******************************************************************//**
Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data. */
-
-byte*
+returns a pointer to the data.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
+const byte*
row_mysql_read_true_varchar(
/*========================*/
- /* out: pointer to the data, we skip the 1 or 2 bytes
- at the start that are used to store the len */
- ulint* len, /* out: variable-length field length */
- byte* field, /* in: field in the MySQL format */
- ulint lenlen) /* in: storage length of len: either 1 or 2 bytes */
+ ulint* len, /*!< out: variable-length field length */
+ const byte* field, /*!< in: field in the MySQL format */
+ ulint lenlen) /*!< in: storage length of len: either 1
+ or 2 bytes */
{
if (lenlen == 2) {
*len = mach_read_from_2_little_endian(field);
@@ -163,20 +206,20 @@ row_mysql_read_true_varchar(
return(field + 1);
}
-/***********************************************************************
+/*******************************************************************//**
Stores a reference to a BLOB in the MySQL format. */
-
+UNIV_INTERN
void
row_mysql_store_blob_ref(
/*=====================*/
- byte* dest, /* in: where to store */
- ulint col_len, /* in: dest buffer size: determines into
+ byte* dest, /*!< in: where to store */
+ ulint col_len,/*!< in: dest buffer size: determines into
how many bytes the BLOB length is stored,
the space for the length may vary from 1
to 4 bytes */
- byte* data, /* in: BLOB data; if the value to store
+ const void* data, /*!< in: BLOB data; if the value to store
is SQL NULL this should be NULL pointer */
- ulint len) /* in: BLOB length; if the value to store
+ ulint len) /*!< in: BLOB length; if the value to store
is SQL NULL this should be 0; remember
also to set the NULL bit in the MySQL record
header! */
@@ -197,68 +240,68 @@ row_mysql_store_blob_ref(
mach_write_to_n_little_endian(dest, col_len - 8, len);
- ut_memcpy(dest + col_len - 8, &data, sizeof(byte*));
+ memcpy(dest + col_len - 8, &data, sizeof data);
}
-/***********************************************************************
-Reads a reference to a BLOB in the MySQL format. */
-
-byte*
+/*******************************************************************//**
+Reads a reference to a BLOB in the MySQL format.
+@return pointer to BLOB data */
+UNIV_INTERN
+const byte*
row_mysql_read_blob_ref(
/*====================*/
- /* out: pointer to BLOB data */
- ulint* len, /* out: BLOB length */
- byte* ref, /* in: BLOB reference in the MySQL format */
- ulint col_len) /* in: BLOB reference length (not BLOB
- length) */
+ ulint* len, /*!< out: BLOB length */
+ const byte* ref, /*!< in: BLOB reference in the
+ MySQL format */
+ ulint col_len) /*!< in: BLOB reference length
+ (not BLOB length) */
{
byte* data;
*len = mach_read_from_n_little_endian(ref, col_len - 8);
- ut_memcpy(&data, ref + col_len - 8, sizeof(byte*));
+ memcpy(&data, ref + col_len - 8, sizeof data);
return(data);
}
-/******************************************************************
+/**************************************************************//**
Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c. */
-
+row0sel.c.
+@return up to which byte we used buf in the conversion */
+UNIV_INTERN
byte*
row_mysql_store_col_in_innobase_format(
/*===================================*/
- /* out: up to which byte we used
- buf in the conversion */
- dfield_t* dfield, /* in/out: dfield where dtype
+ dfield_t* dfield, /*!< in/out: dfield where dtype
information must be already set when
this function is called! */
- byte* buf, /* in/out: buffer for a converted
+ byte* buf, /*!< in/out: buffer for a converted
integer value; this must be at least
col_len long then! */
- ibool row_format_col, /* TRUE if the mysql_data is from
+ ibool row_format_col, /*!< TRUE if the mysql_data is from
a MySQL row, FALSE if from a MySQL
key value;
in MySQL, a true VARCHAR storage
format differs in a row and in a
key value: in a key value the length
is always stored in 2 bytes! */
- byte* mysql_data, /* in: MySQL column value, not
+ const byte* mysql_data, /*!< in: MySQL column value, not
SQL NULL; NOTE that dfield may also
get a pointer to mysql_data,
therefore do not discard this as long
as dfield is used! */
- ulint col_len, /* in: MySQL column length; NOTE that
+ ulint col_len, /*!< in: MySQL column length; NOTE that
this is the storage length of the
column in the MySQL format row, not
necessarily the length of the actual
payload data; if the column is a true
VARCHAR then this is irrelevant */
- ulint comp) /* in: nonzero=compact format */
+ ulint comp) /*!< in: nonzero=compact format */
{
- byte* ptr = mysql_data;
- dtype_t* dtype;
+ const byte* ptr = mysql_data;
+ const dtype_t* dtype;
ulint type;
ulint lenlen;
@@ -271,12 +314,12 @@ row_mysql_store_col_in_innobase_format(
sign bit negated if the data is a signed integer. In MySQL,
integers are stored in a little-endian format. */
- ptr = buf + col_len;
+ byte* p = buf + col_len;
for (;;) {
- ptr--;
- *ptr = *mysql_data;
- if (ptr == buf) {
+ p--;
+ *p = *mysql_data;
+ if (p == buf) {
break;
}
mysql_data++;
@@ -284,9 +327,10 @@ row_mysql_store_col_in_innobase_format(
if (!(dtype->prtype & DATA_UNSIGNED)) {
- *ptr = (byte) (*ptr ^ 128);
+ *buf ^= 128;
}
+ ptr = buf;
buf += col_len;
} else if ((type == DATA_VARCHAR
|| type == DATA_VARMYSQL
@@ -382,7 +426,7 @@ row_mysql_store_col_in_innobase_format(
return(buf);
}
-/******************************************************************
+/**************************************************************//**
Convert a row in the MySQL format to a row in the Innobase format. Note that
the function to convert a MySQL format key value to an InnoDB dtuple is
row_sel_convert_mysql_key_to_innobase() in row0sel.c. */
@@ -390,12 +434,12 @@ static
void
row_mysql_convert_row_to_innobase(
/*==============================*/
- dtuple_t* row, /* in/out: Innobase row where the
+ dtuple_t* row, /*!< in/out: Innobase row where the
field type information is already
copied there! */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct where template
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct where template
must be of type ROW_MYSQL_WHOLE_ROW */
- byte* mysql_rec) /* in: row in the MySQL format;
+ byte* mysql_rec) /*!< in: row in the MySQL format;
NOTE: do not discard as long as
row is used, as row may contain
pointers to this record! */
@@ -420,7 +464,7 @@ row_mysql_convert_row_to_innobase(
/* It is SQL NULL */
- dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
+ dfield_set_null(dfield);
goto next_column;
}
@@ -438,23 +482,22 @@ next_column:
}
}
-/********************************************************************
-Handles user errors and lock waits detected by the database engine. */
-
+/****************************************************************//**
+Handles user errors and lock waits detected by the database engine.
+@return TRUE if it was a lock wait and we should continue running the
+query thread */
+UNIV_INTERN
ibool
row_mysql_handle_errors(
/*====================*/
- /* out: TRUE if it was a lock wait and
- we should continue running the query thread */
- ulint* new_err,/* out: possible new error encountered in
+ ulint* new_err,/*!< out: possible new error encountered in
lock wait, or if no new error, the value
of trx->error_state at the entry of this
function */
- trx_t* trx, /* in: transaction */
- que_thr_t* thr, /* in: query thread */
- trx_savept_t* savept) /* in: savepoint or NULL */
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t* thr, /*!< in: query thread */
+ trx_savept_t* savept) /*!< in: savepoint or NULL */
{
-#ifndef UNIV_HOTBACKUP
ulint err;
handle_new_error:
@@ -464,35 +507,30 @@ handle_new_error:
trx->error_state = DB_SUCCESS;
- if ((err == DB_DUPLICATE_KEY)
- || (err == DB_FOREIGN_DUPLICATE_KEY)) {
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_general_rollback_for_mysql(trx, TRUE, savept);
- }
- } else if (err == DB_TOO_BIG_RECORD) {
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_general_rollback_for_mysql(trx, TRUE, savept);
+ switch (err) {
+ case DB_LOCK_WAIT_TIMEOUT:
+ if (row_rollback_on_timeout) {
+ trx_general_rollback_for_mysql(trx, NULL);
+ break;
}
- /* MySQL will roll back the latest SQL statement */
- } else if (err == DB_ROW_IS_REFERENCED
- || err == DB_NO_REFERENCED_ROW
- || err == DB_CANNOT_ADD_CONSTRAINT
- || err == DB_TOO_MANY_CONCURRENT_TRXS) {
+ /* fall through */
+ case DB_DUPLICATE_KEY:
+ case DB_FOREIGN_DUPLICATE_KEY:
+ case DB_TOO_BIG_RECORD:
+ case DB_ROW_IS_REFERENCED:
+ case DB_NO_REFERENCED_ROW:
+ case DB_CANNOT_ADD_CONSTRAINT:
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ case DB_OUT_OF_FILE_SPACE:
if (savept) {
/* Roll back the latest, possibly incomplete
insertion or update */
- trx_general_rollback_for_mysql(trx, TRUE, savept);
+ trx_general_rollback_for_mysql(trx, savept);
}
/* MySQL will roll back the latest SQL statement */
- } else if (err == DB_LOCK_WAIT) {
-
+ break;
+ case DB_LOCK_WAIT:
srv_suspend_mysql_thread(thr);
if (trx->error_state != DB_SUCCESS) {
@@ -505,31 +543,15 @@ handle_new_error:
return(TRUE);
- } else if (err == DB_DEADLOCK
- || err == DB_LOCK_TABLE_FULL
- || (err == DB_LOCK_WAIT_TIMEOUT
- && row_rollback_on_timeout)) {
+ case DB_DEADLOCK:
+ case DB_LOCK_TABLE_FULL:
/* Roll back the whole transaction; this resolution was added
to version 3.23.43 */
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
-
- } else if (err == DB_OUT_OF_FILE_SPACE
- || err == DB_LOCK_WAIT_TIMEOUT) {
-
- ut_ad(!(err == DB_LOCK_WAIT_TIMEOUT
- && row_rollback_on_timeout));
-
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_general_rollback_for_mysql(trx, TRUE, savept);
- }
- /* MySQL will roll back the latest SQL statement */
-
- } else if (err == DB_MUST_GET_MORE_FILE_SPACE) {
+ trx_general_rollback_for_mysql(trx, NULL);
+ break;
+ case DB_MUST_GET_MORE_FILE_SPACE:
fputs("InnoDB: The database cannot continue"
" operation because of\n"
"InnoDB: lack of space. You must add"
@@ -537,8 +559,8 @@ handle_new_error:
"InnoDB: my.cnf and restart the database.\n", stderr);
exit(1);
- } else if (err == DB_CORRUPTION) {
+ case DB_CORRUPTION:
fputs("InnoDB: We detected index corruption"
" in an InnoDB type table.\n"
"InnoDB: You have to dump + drop + reimport"
@@ -550,11 +572,10 @@ handle_new_error:
"InnoDB: If the mysqld server crashes"
" after the startup or when\n"
"InnoDB: you dump the tables, look at\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html"
+ "InnoDB: " REFMAN "forcing-recovery.html"
" for help.\n", stderr);
-
- } else {
+ break;
+ default:
fprintf(stderr, "InnoDB: unknown error code %lu\n",
(ulong) err);
ut_error;
@@ -569,62 +590,34 @@ handle_new_error:
trx->error_state = DB_SUCCESS;
return(FALSE);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(FALSE);
-#endif /* UNIV_HOTBACKUP */
}
-/************************************************************************
-Create a prebuilt struct for a MySQL table handle. */
-
+/********************************************************************//**
+Create a prebuilt struct for a MySQL table handle.
+@return own: a prebuilt struct */
+UNIV_INTERN
row_prebuilt_t*
row_create_prebuilt(
/*================*/
- /* out, own: a prebuilt struct */
- dict_table_t* table) /* in: Innobase table handle */
+ dict_table_t* table) /*!< in: Innobase table handle */
{
row_prebuilt_t* prebuilt;
mem_heap_t* heap;
dict_index_t* clust_index;
dtuple_t* ref;
ulint ref_len;
- ulint i;
- heap = mem_heap_create(128);
+ heap = mem_heap_create(sizeof *prebuilt + 128);
- prebuilt = mem_heap_alloc(heap, sizeof(row_prebuilt_t));
+ prebuilt = mem_heap_zalloc(heap, sizeof *prebuilt);
prebuilt->magic_n = ROW_PREBUILT_ALLOCATED;
prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED;
prebuilt->table = table;
- prebuilt->trx = NULL;
-
prebuilt->sql_stat_start = TRUE;
-
- prebuilt->mysql_has_locked = FALSE;
-
- prebuilt->index = NULL;
-
- prebuilt->used_in_HANDLER = FALSE;
-
- prebuilt->n_template = 0;
- prebuilt->mysql_template = NULL;
-
prebuilt->heap = heap;
- prebuilt->ins_node = NULL;
-
- prebuilt->ins_upd_rec_buff = NULL;
- prebuilt->default_rec = NULL;
-
- prebuilt->upd_node = NULL;
- prebuilt->ins_graph = NULL;
- prebuilt->upd_graph = NULL;
prebuilt->pcur = btr_pcur_create_for_mysql();
prebuilt->clust_pcur = btr_pcur_create_for_mysql();
@@ -632,10 +625,6 @@ row_create_prebuilt(
prebuilt->select_lock_type = LOCK_NONE;
prebuilt->stored_select_lock_type = 99999999;
- prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
-
- prebuilt->sel_graph = NULL;
-
prebuilt->search_tuple = dtuple_create(
heap, 2 * dict_table_get_n_cols(table));
@@ -652,16 +641,6 @@ row_create_prebuilt(
prebuilt->clust_ref = ref;
- for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
- prebuilt->fetch_cache[i] = NULL;
- }
-
- prebuilt->n_fetch_cached = 0;
-
- prebuilt->blob_heap = NULL;
-
- prebuilt->old_vers_heap = NULL;
-
prebuilt->autoinc_error = 0;
prebuilt->autoinc_offset = 0;
@@ -674,22 +653,25 @@ row_create_prebuilt(
return(prebuilt);
}
-/************************************************************************
+/********************************************************************//**
Free a prebuilt struct for a MySQL table handle. */
-
+UNIV_INTERN
void
row_prebuilt_free(
/*==============*/
- row_prebuilt_t* prebuilt) /* in, own: prebuilt struct */
+ row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
+ ibool dict_locked) /*!< in: TRUE=data dictionary locked */
{
ulint i;
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
- || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED) {
+ if (UNIV_UNLIKELY
+ (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
+ || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED)) {
+
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
"InnoDB: table handle. Magic n %lu,"
- " magic n2 %lu, table name",
+ " magic n2 %lu, table name ",
(ulong) prebuilt->magic_n,
(ulong) prebuilt->magic_n2);
ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
@@ -751,22 +733,21 @@ row_prebuilt_free(
}
}
- dict_table_decrement_handle_count(prebuilt->table);
+ dict_table_decrement_handle_count(prebuilt->table, dict_locked);
mem_heap_free(prebuilt->heap);
}
-/*************************************************************************
+/*********************************************************************//**
Updates the transaction pointers in query graphs stored in the prebuilt
struct. */
-
+UNIV_INTERN
void
row_update_prebuilt_trx(
/*====================*/
- /* out: prebuilt dtuple */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
- handle */
- trx_t* trx) /* in: transaction handle */
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
+ in MySQL handle */
+ trx_t* trx) /*!< in: transaction handle */
{
if (trx->magic_n != TRX_MAGIC_N) {
fprintf(stderr,
@@ -782,9 +763,9 @@ row_update_prebuilt_trx(
if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
fprintf(stderr,
"InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name",
+ "InnoDB: table handle. Magic n %lu, table name ",
(ulong) prebuilt->magic_n);
- ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
+ ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
putc('\n', stderr);
mem_analyze_corruption(prebuilt);
@@ -807,23 +788,21 @@ row_update_prebuilt_trx(
}
}
-/*************************************************************************
+/*********************************************************************//**
Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
has not yet been built in the prebuilt struct, then this function first
-builds it. */
+builds it.
+@return prebuilt dtuple; the column type information is also set in it */
static
dtuple_t*
row_get_prebuilt_insert_row(
/*========================*/
- /* out: prebuilt dtuple; the column
- type information is also set in it */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
{
ins_node_t* node;
dtuple_t* row;
dict_table_t* table = prebuilt->table;
- ulint i;
ut_ad(prebuilt && table && prebuilt->trx);
@@ -846,14 +825,6 @@ row_get_prebuilt_insert_row(
dict_table_copy_types(row, table);
- /* We init the value of every field to the SQL NULL to avoid
- a debug assertion from failing */
-
- for (i = 0; i < dtuple_get_n_fields(row); i++) {
-
- dtuple_get_nth_field(row, i)->len = UNIV_SQL_NULL;
- }
-
ins_node_set_new_row(node, row);
prebuilt->ins_graph = que_node_get_parent(
@@ -866,14 +837,14 @@ row_get_prebuilt_insert_row(
return(prebuilt->ins_node->row);
}
-/*************************************************************************
+/*********************************************************************//**
Updates the table modification counter and calculates new estimates
for table and index statistics if necessary. */
UNIV_INLINE
void
row_update_statistics_if_needed(
/*============================*/
- dict_table_t* table) /* in: table */
+ dict_table_t* table) /*!< in: table */
{
ulint counter;
@@ -888,52 +859,59 @@ row_update_statistics_if_needed(
a counter table which is very small and updated very often. */
if (counter > 2000000000
- || ((ib_longlong)counter > 16 + table->stat_n_rows / 16)) {
+ || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) {
dict_update_statistics(table);
}
}
-/*************************************************************************
-Unlocks an AUTO_INC type lock possibly reserved by trx. */
-
+/*********************************************************************//**
+Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
+function should be called at the the end of an SQL statement, by the
+connection thread that owns the transaction (trx->mysql_thd). */
+UNIV_INTERN
void
row_unlock_table_autoinc_for_mysql(
/*===============================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in/out: transaction */
{
- if (!trx->auto_inc_lock) {
+ if (lock_trx_holds_autoinc_locks(trx)) {
+ mutex_enter(&kernel_mutex);
- return;
- }
+ lock_release_autoinc_locks(trx);
- lock_table_unlock_auto_inc(trx);
+ mutex_exit(&kernel_mutex);
+ }
}
-/*************************************************************************
+/*********************************************************************//**
Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
AUTO_INC lock gives exclusive access to the auto-inc counter of the
table. The lock is reserved only for the duration of an SQL statement.
It is not compatible with another AUTO_INC or exclusive lock on the
-table. */
-
+table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_lock_table_autoinc_for_mysql(
/*=============================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in the MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
table handle */
{
- trx_t* trx = prebuilt->trx;
- ins_node_t* node = prebuilt->ins_node;
- que_thr_t* thr;
- ulint err;
- ibool was_lock_wait;
+ trx_t* trx = prebuilt->trx;
+ ins_node_t* node = prebuilt->ins_node;
+ const dict_table_t* table = prebuilt->table;
+ que_thr_t* thr;
+ ulint err;
+ ibool was_lock_wait;
ut_ad(trx);
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- if (trx->auto_inc_lock) {
+ /* If we already hold an AUTOINC lock on the table then do nothing.
+ Note: We peek at the value of the current owner without acquiring
+ the kernel mutex. **/
+ if (trx == table->autoinc_trx) {
return(DB_SUCCESS);
}
@@ -986,20 +964,20 @@ run_again:
return((int) err);
}
-/*************************************************************************
-Sets a table lock on the table mentioned in prebuilt. */
-
+/*********************************************************************//**
+Sets a table lock on the table mentioned in prebuilt.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_lock_table_for_mysql(
/*=====================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
table handle */
- dict_table_t* table, /* in: table to lock, or NULL
+ dict_table_t* table, /*!< in: table to lock, or NULL
if prebuilt->table should be
locked as
prebuilt->select_lock_type */
- ulint mode) /* in: lock mode of table
+ ulint mode) /*!< in: lock mode of table
(ignored if table==NULL) */
{
trx_t* trx = prebuilt->trx;
@@ -1063,15 +1041,15 @@ run_again:
return((int) err);
}
-/*************************************************************************
-Does an insert for MySQL. */
-
+/*********************************************************************//**
+Does an insert for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_insert_for_mysql(
/*=================*/
- /* out: error code or DB_SUCCESS */
- byte* mysql_rec, /* in: row in the MySQL format */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ byte* mysql_rec, /*!< in: row in the MySQL format */
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
{
trx_savept_t savept;
@@ -1095,20 +1073,18 @@ row_insert_for_mysql(
"InnoDB: the MySQL datadir, or have you"
" used DISCARD TABLESPACE?\n"
"InnoDB: Look from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
"InnoDB: how you can resolve the problem.\n",
prebuilt->table->name);
return(DB_ERROR);
}
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
+ if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name",
+ "InnoDB: table handle. Magic n %lu, table name ",
(ulong) prebuilt->magic_n);
- ut_print_name(stderr, prebuilt->trx, TRUE,
- prebuilt->table->name);
+ ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
putc('\n', stderr);
mem_analyze_corruption(prebuilt);
@@ -1116,7 +1092,7 @@ row_insert_for_mysql(
ut_error;
}
- if (srv_created_new_raw || srv_force_recovery) {
+ if (UNIV_UNLIKELY(srv_created_new_raw || srv_force_recovery)) {
fputs("InnoDB: A new raw disk partition was initialized or\n"
"InnoDB: innodb_force_recovery is on: we do not allow\n"
"InnoDB: database modifications by the user. Shut down\n"
@@ -1197,13 +1173,13 @@ run_again:
return((int) err);
}
-/*************************************************************************
+/*********************************************************************//**
Builds a dummy query graph used in selects. */
-
+UNIV_INTERN
void
row_prebuild_sel_graph(
/*===================*/
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
{
sel_node_t* node;
@@ -1223,16 +1199,16 @@ row_prebuild_sel_graph(
}
}
-/*************************************************************************
+/*********************************************************************//**
Creates an query graph node of 'update' type to be used in the MySQL
-interface. */
-
+interface.
+@return own: update node */
+UNIV_INTERN
upd_node_t*
row_create_update_node_for_mysql(
/*=============================*/
- /* out, own: update node */
- dict_table_t* table, /* in: table to update */
- mem_heap_t* heap) /* in: mem heap from which allocated */
+ dict_table_t* table, /*!< in: table to update */
+ mem_heap_t* heap) /*!< in: mem heap from which allocated */
{
upd_node_t* node;
@@ -1241,7 +1217,6 @@ row_create_update_node_for_mysql(
node->in_mysql_interface = TRUE;
node->is_delete = FALSE;
node->searched_update = FALSE;
- node->select_will_do_update = FALSE;
node->select = NULL;
node->pcur = btr_pcur_create_for_mysql();
node->table = table;
@@ -1260,16 +1235,16 @@ row_create_update_node_for_mysql(
return(node);
}
-/*************************************************************************
+/*********************************************************************//**
Gets pointer to a prebuilt update vector used in updates. If the update
graph has not yet been built in the prebuilt struct, then this function
-first builds it. */
-
+first builds it.
+@return prebuilt update vector */
+UNIV_INTERN
upd_t*
row_get_prebuilt_update_vector(
/*===========================*/
- /* out: prebuilt update vector */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
{
dict_table_t* table = prebuilt->table;
@@ -1296,16 +1271,16 @@ row_get_prebuilt_update_vector(
return(prebuilt->upd_node->update);
}
-/*************************************************************************
-Does an update or delete of a row for MySQL. */
-
+/*********************************************************************//**
+Does an update or delete of a row for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_update_for_mysql(
/*=================*/
- /* out: error code or DB_SUCCESS */
- byte* mysql_rec, /* in: the row to be updated, in
+ byte* mysql_rec, /*!< in: the row to be updated, in
the MySQL format */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
{
trx_savept_t savept;
@@ -1333,20 +1308,18 @@ row_update_for_mysql(
"InnoDB: the MySQL datadir, or have you"
" used DISCARD TABLESPACE?\n"
"InnoDB: Look from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
"InnoDB: how you can resolve the problem.\n",
prebuilt->table->name);
return(DB_ERROR);
}
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
+ if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name",
+ "InnoDB: table handle. Magic n %lu, table name ",
(ulong) prebuilt->magic_n);
- ut_print_name(stderr, prebuilt->trx, TRUE,
- prebuilt->table->name);
+ ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
putc('\n', stderr);
mem_analyze_corruption(prebuilt);
@@ -1354,7 +1327,7 @@ row_update_for_mysql(
ut_error;
}
- if (srv_created_new_raw || srv_force_recovery) {
+ if (UNIV_UNLIKELY(srv_created_new_raw || srv_force_recovery)) {
fputs("InnoDB: A new raw disk partition was initialized or\n"
"InnoDB: innodb_force_recovery is on: we do not allow\n"
"InnoDB: database modifications by the user. Shut down\n"
@@ -1453,7 +1426,7 @@ run_again:
return((int) err);
}
-/*************************************************************************
+/*********************************************************************//**
This can only be used when srv_locks_unsafe_for_binlog is TRUE or
this session is using a READ COMMITTED isolation level. Before
calling this function we must use trx_reset_new_rec_lock_info() and
@@ -1463,15 +1436,15 @@ and also under prebuilt->clust_pcur. Currently, this is only used and tested
in the case of an UPDATE or a DELETE statement, where the row lock is of the
LOCK_X type.
Thus, this implements a 'mini-rollback' that releases the latest record
-locks we set. */
-
+locks we set.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_unlock_for_mysql(
/*=================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL
handle */
- ibool has_latches_on_recs)/* TRUE if called so that we have
+ ibool has_latches_on_recs)/*!< TRUE if called so that we have
the latches on the records under pcur
and clust_pcur, and we do not need to
reposition the cursors. */
@@ -1500,9 +1473,9 @@ row_unlock_for_mysql(
if (prebuilt->new_rec_locks >= 1) {
- rec_t* rec;
+ const rec_t* rec;
dict_index_t* index;
- dulint rec_trx_id;
+ trx_id_t rec_trx_id;
mtr_t mtr;
mtr_start(&mtr);
@@ -1529,7 +1502,7 @@ row_unlock_for_mysql(
index = btr_pcur_get_btr_cur(clust_pcur)->index;
}
- if (UNIV_UNLIKELY(!(index->type & DICT_CLUSTERED))) {
+ if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
/* This is not a clustered index record. We
do not know how to unlock the record. */
goto no_unlock;
@@ -1546,7 +1519,7 @@ row_unlock_for_mysql(
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
@@ -1563,13 +1536,16 @@ row_unlock_for_mysql(
rec = btr_pcur_get_rec(pcur);
index = btr_pcur_get_btr_cur(pcur)->index;
- lock_rec_unlock(trx, rec, prebuilt->select_lock_type);
+ lock_rec_unlock(trx, btr_pcur_get_block(pcur),
+ rec, prebuilt->select_lock_type);
if (prebuilt->new_rec_locks >= 2) {
rec = btr_pcur_get_rec(clust_pcur);
index = btr_pcur_get_btr_cur(clust_pcur)->index;
- lock_rec_unlock(trx, rec,
+ lock_rec_unlock(trx,
+ btr_pcur_get_block(clust_pcur),
+ rec,
prebuilt->select_lock_type);
}
}
@@ -1582,17 +1558,17 @@ no_unlock:
return(DB_SUCCESS);
}
-/**************************************************************************
-Does a cascaded delete or set null in a foreign key operation. */
-
+/**********************************************************************//**
+Does a cascaded delete or set null in a foreign key operation.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
ulint
row_update_cascade_for_mysql(
/*=========================*/
- /* out: error code or DB_SUCCESS */
- que_thr_t* thr, /* in: query thread */
- upd_node_t* node, /* in: update node used in the cascade
+ que_thr_t* thr, /*!< in: query thread */
+ upd_node_t* node, /*!< in: update node used in the cascade
or set null operation */
- dict_table_t* table) /* in: table where we do the operation */
+ dict_table_t* table) /*!< in: table where we do the operation */
{
ulint err;
trx_t* trx;
@@ -1651,14 +1627,15 @@ run_again:
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if a table is such that we automatically created a clustered
-index on it (on row id). */
-
+index on it (on row id).
+@return TRUE if the clustered index was generated automatically */
+UNIV_INTERN
ibool
row_table_got_default_clust_index(
/*==============================*/
- dict_table_t* table)
+ const dict_table_t* table) /*!< in: table */
{
const dict_index_t* clust_index;
@@ -1667,17 +1644,18 @@ row_table_got_default_clust_index(
return(dict_index_get_nth_col(clust_index, 0)->mtype == DATA_SYS);
}
-/*************************************************************************
+/*********************************************************************//**
Calculates the key number used inside MySQL for an Innobase index. We have
-to take into account if we generated a default clustered index for the table */
-
+to take into account if we generated a default clustered index for the table
+@return the key number used inside MySQL */
+UNIV_INTERN
ulint
row_get_mysql_key_number_for_index(
/*===============================*/
- dict_index_t* index)
+ const dict_index_t* index) /*!< in: index */
{
- dict_index_t* ind;
- ulint i;
+ const dict_index_t* ind;
+ ulint i;
ut_a(index);
@@ -1697,29 +1675,31 @@ row_get_mysql_key_number_for_index(
return(i);
}
-/*************************************************************************
+/*********************************************************************//**
Locks the data dictionary in shared mode from modifications, for performing
foreign key check, rollback, or other operation invisible to MySQL. */
-
+UNIV_INTERN
void
-row_mysql_freeze_data_dictionary(
-/*=============================*/
- trx_t* trx) /* in: transaction */
+row_mysql_freeze_data_dictionary_func(
+/*==================================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const char* file, /*!< in: file name */
+ ulint line) /*!< in: line number */
{
ut_a(trx->dict_operation_lock_mode == 0);
- rw_lock_s_lock(&dict_operation_lock);
+ rw_lock_s_lock_func(&dict_operation_lock, 0, file, line);
trx->dict_operation_lock_mode = RW_S_LATCH;
}
-/*************************************************************************
+/*********************************************************************//**
Unlocks the data dictionary shared lock. */
-
+UNIV_INTERN
void
row_mysql_unfreeze_data_dictionary(
/*===============================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in/out: transaction */
{
ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
@@ -1728,14 +1708,16 @@ row_mysql_unfreeze_data_dictionary(
trx->dict_operation_lock_mode = 0;
}
-/*************************************************************************
+/*********************************************************************//**
Locks the data dictionary exclusively for performing a table create or other
data dictionary modification operation. */
-
+UNIV_INTERN
void
-row_mysql_lock_data_dictionary(
-/*===========================*/
- trx_t* trx) /* in: transaction */
+row_mysql_lock_data_dictionary_func(
+/*================================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const char* file, /*!< in: file name */
+ ulint line) /*!< in: line number */
{
ut_a(trx->dict_operation_lock_mode == 0
|| trx->dict_operation_lock_mode == RW_X_LATCH);
@@ -1743,19 +1725,19 @@ row_mysql_lock_data_dictionary(
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks or lock waits can occur then in these operations */
- rw_lock_x_lock(&dict_operation_lock);
+ rw_lock_x_lock_func(&dict_operation_lock, 0, file, line);
trx->dict_operation_lock_mode = RW_X_LATCH;
mutex_enter(&(dict_sys->mutex));
}
-/*************************************************************************
+/*********************************************************************//**
Unlocks the data dictionary exclusive lock. */
-
+UNIV_INTERN
void
row_mysql_unlock_data_dictionary(
/*=============================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in/out: transaction */
{
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
@@ -1768,19 +1750,20 @@ row_mysql_unlock_data_dictionary(
trx->dict_operation_lock_mode = 0;
}
-/*************************************************************************
+/*********************************************************************//**
Creates a table for MySQL. If the name of the table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also start the printing of monitor
output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). */
-
+InnoDB will try to invoke mem_validate().
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_create_table_for_mysql(
/*=======================*/
- /* out: error code or DB_SUCCESS */
- dict_table_t* table, /* in: table definition */
- trx_t* trx) /* in: transaction handle */
+ dict_table_t* table, /*!< in, own: table definition
+ (will be freed) */
+ trx_t* trx) /*!< in: transaction handle */
{
tab_node_t* node;
mem_heap_t* heap;
@@ -1802,7 +1785,7 @@ row_create_table_for_mysql(
" by the user.\n"
"InnoDB: Shut down mysqld and edit my.cnf so that newraw"
" is replaced with raw.\n", stderr);
-
+err_exit:
dict_mem_table_free(table);
trx_commit_for_mysql(trx);
@@ -1819,11 +1802,7 @@ row_create_table_for_mysql(
"InnoDB: MySQL system tables must be"
" of the MyISAM type!\n",
table->name);
-
- dict_mem_table_free(table);
- trx_commit_for_mysql(trx);
-
- return(DB_ERROR);
+ goto err_exit;
}
trx_start_if_not_started(trx);
@@ -1887,7 +1866,7 @@ row_create_table_for_mysql(
heap = mem_heap_create(512);
- trx->dict_operation = TRUE;
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
node = tab_create_graph_create(table, heap);
@@ -1898,60 +1877,33 @@ row_create_table_for_mysql(
err = trx->error_state;
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
trx->error_state = DB_SUCCESS;
+ trx_general_rollback_for_mysql(trx, NULL);
+ /* TO DO: free table? The code below will dereference
+ table->name, though. */
+ }
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
-
- if (err == DB_OUT_OF_FILE_SPACE) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Warning: cannot create table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" because tablespace full\n", stderr);
-
- if (dict_table_get_low(table->name)) {
+ switch (err) {
+ case DB_OUT_OF_FILE_SPACE:
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Warning: cannot create table ",
+ stderr);
+ ut_print_name(stderr, trx, TRUE, table->name);
+ fputs(" because tablespace full\n", stderr);
- row_drop_table_for_mysql(table->name, trx,
- FALSE);
- }
+ if (dict_table_get_low(table->name)) {
- } else if (err == DB_DUPLICATE_KEY) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" already exists in InnoDB internal\n"
- "InnoDB: data dictionary. Have you deleted"
- " the .frm file\n"
- "InnoDB: and not used DROP TABLE?"
- " Have you used DROP DATABASE\n"
- "InnoDB: for InnoDB tables in"
- " MySQL version <= 3.23.43?\n"
- "InnoDB: See the Restrictions section"
- " of the InnoDB manual.\n"
- "InnoDB: You can drop the orphaned table"
- " inside InnoDB by\n"
- "InnoDB: creating an InnoDB table with"
- " the same name in another\n"
- "InnoDB: database and copying the .frm file"
- " to the current database.\n"
- "InnoDB: Then MySQL thinks the table exists,"
- " and DROP TABLE will\n"
- "InnoDB: succeed.\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
- stderr);
+ row_drop_table_for_mysql(table->name, trx, FALSE);
+ trx_commit_for_mysql(trx);
}
+ break;
+ case DB_DUPLICATE_KEY:
/* We may also get err == DB_ERROR if the .ibd file for the
table already exists */
- trx->error_state = DB_SUCCESS;
+ break;
}
que_graph_free((que_t*) que_node_get_parent(thr));
@@ -1961,18 +1913,19 @@ row_create_table_for_mysql(
return((int) err);
}
-/*************************************************************************
+/*********************************************************************//**
Does an index creation operation for MySQL. TODO: currently failure
to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table. */
-
+currently as all indexes must be created at the same time as the table.
+@return error number or DB_SUCCESS */
+UNIV_INTERN
int
row_create_index_for_mysql(
/*=======================*/
- /* out: error number or DB_SUCCESS */
- dict_index_t* index, /* in: index definition */
- trx_t* trx, /* in: transaction handle */
- const ulint* field_lengths) /* in: if not NULL, must contain
+ dict_index_t* index, /*!< in, own: index definition
+ (will be freed) */
+ trx_t* trx, /*!< in: transaction handle */
+ const ulint* field_lengths) /*!< in: if not NULL, must contain
dict_index_get_n_fields(index)
actual field lengths for the
index columns, which are
@@ -1983,7 +1936,7 @@ row_create_index_for_mysql(
mem_heap_t* heap;
que_thr_t* thr;
ulint err;
- ulint i, j;
+ ulint i;
ulint len;
char* table_name;
@@ -2007,11 +1960,12 @@ row_create_index_for_mysql(
safer not to allow them. */
for (i = 0; i < dict_index_get_n_fields(index); i++) {
+ ulint j;
+
for (j = 0; j < i; j++) {
if (0 == ut_strcmp(
dict_index_get_nth_field(index, j)->name,
dict_index_get_nth_field(index, i)->name)) {
-
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: column ", stderr);
@@ -2048,7 +2002,7 @@ row_create_index_for_mysql(
heap = mem_heap_create(512);
- trx->dict_operation = TRUE;
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
/* Note that the space id where we store the index is inherited from
the table in dict_build_index_def_step() in dict0crea.c. */
@@ -2070,10 +2024,12 @@ error_handling:
trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx_general_rollback_for_mysql(trx, NULL);
row_drop_table_for_mysql(table_name, trx, FALSE);
+ trx_commit_for_mysql(trx);
+
trx->error_state = DB_SUCCESS;
}
@@ -2084,29 +2040,29 @@ error_handling:
return((int) err);
}
-/*************************************************************************
+/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
should be called after the indexes for a table have been created.
Each foreign key constraint must be accompanied with indexes in
both participating tables. The indexes are allowed to contain more
fields than mentioned in the constraint. Check also that foreign key
-constraints which reference this table are ok. */
-
+constraints which reference this table are ok.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_table_add_foreign_constraints(
/*==============================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- const char* sql_string, /* in: table create statement where
+ trx_t* trx, /*!< in: transaction */
+ const char* sql_string, /*!< in: table create statement where
foreign keys are declared like:
FOREIGN KEY (a, b) REFERENCES table2(c, d),
table2 can be written also with the
database name before it: test.table2 */
- const char* name, /* in: table full name in the
+ const char* name, /*!< in: table full name in the
normalized form
database_name/table_name */
- ibool reject_fks) /* in: if TRUE, fail with error
+ ibool reject_fks) /*!< in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
{
@@ -2122,11 +2078,10 @@ row_table_add_foreign_constraints(
trx_start_if_not_started(trx);
- trx->dict_operation = TRUE;
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
err = dict_create_foreign_constraints(trx, sql_string, name,
reject_fks);
-
if (err == DB_SUCCESS) {
/* Check that also referencing constraints are ok */
err = dict_load_foreigns(name, TRUE);
@@ -2137,29 +2092,31 @@ row_table_add_foreign_constraints(
trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx_general_rollback_for_mysql(trx, NULL);
row_drop_table_for_mysql(name, trx, FALSE);
+ trx_commit_for_mysql(trx);
+
trx->error_state = DB_SUCCESS;
}
return((int) err);
}
-/*************************************************************************
+/*********************************************************************//**
Drops a table for MySQL as a background operation. MySQL relies on Unix
in ALTER TABLE to the fact that the table handler does not remove the
table before all handles to it has been removed. Furhermore, the MySQL's
call to drop table must be non-blocking. Therefore we do the drop table
as a background operation, which is taken care of by the master thread
-in srv0srv.c. */
+in srv0srv.c.
+@return error code or DB_SUCCESS */
static
int
row_drop_table_for_mysql_in_background(
/*===================================*/
- /* out: error code or DB_SUCCESS */
- const char* name) /* in: table name */
+ const char* name) /*!< in: table name */
{
ulint error;
trx_t* trx;
@@ -2193,16 +2150,15 @@ row_drop_table_for_mysql_in_background(
return((int) error);
}
-/*************************************************************************
+/*********************************************************************//**
The master thread in srv0srv.c calls this regularly to drop tables which
we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix. */
-
+dropping of tables is needed in ALTER TABLE on Unix.
+@return how many tables dropped + remaining tables in list */
+UNIV_INTERN
ulint
row_drop_tables_for_mysql_in_background(void)
/*=========================================*/
- /* out: how many tables dropped
- + remaining tables in list */
{
row_mysql_drop_t* drop;
dict_table_t* table;
@@ -2256,9 +2212,9 @@ already_dropped:
UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Dropped table %s in background drop queue.\n",
- drop->table_name);
+ fputs(" InnoDB: Dropped table ", stderr);
+ ut_print_name(stderr, NULL, TRUE, drop->table_name);
+ fputs(" in background drop queue.\n", stderr);
mem_free(drop->table_name);
@@ -2269,14 +2225,14 @@ already_dropped:
goto loop;
}
-/*************************************************************************
+/*********************************************************************//**
Get the background drop list length. NOTE: the caller must own the kernel
-mutex! */
-
+mutex!
+@return how many tables in list */
+UNIV_INTERN
ulint
row_get_background_drop_list_len_low(void)
/*======================================*/
- /* out: how many tables in list */
{
ut_ad(mutex_own(&kernel_mutex));
@@ -2289,19 +2245,18 @@ row_get_background_drop_list_len_low(void)
return(UT_LIST_GET_LEN(row_mysql_drop_list));
}
-/*************************************************************************
+/*********************************************************************//**
If a table is not yet in the drop list, adds the table to the list of tables
which the master thread drops in background. We need this on Unix because in
ALTER TABLE MySQL may call drop table even if the table has running queries on
it. Also, if there are running foreign key checks on the table, we drop the
-table lazily. */
+table lazily.
+@return TRUE if the table was not yet in the drop list, and was added there */
static
ibool
row_add_table_to_background_drop_list(
/*==================================*/
- /* out: TRUE if the table was not yet in the
- drop list, and was added there */
- dict_table_t* table) /* in: table */
+ const char* name) /*!< in: table name */
{
row_mysql_drop_t* drop;
@@ -2317,7 +2272,7 @@ row_add_table_to_background_drop_list(
drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
while (drop != NULL) {
- if (strcmp(drop->table_name, table->name) == 0) {
+ if (strcmp(drop->table_name, name) == 0) {
/* Already in the list */
mutex_exit(&kernel_mutex);
@@ -2330,7 +2285,7 @@ row_add_table_to_background_drop_list(
drop = mem_alloc(sizeof(row_mysql_drop_t));
- drop->table_name = mem_strdup(table->name);
+ drop->table_name = mem_strdup(name);
UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop);
@@ -2343,18 +2298,17 @@ row_add_table_to_background_drop_list(
return(TRUE);
}
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
Discards the tablespace of a table which stored in an .ibd file. Discarding
means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE. */
-
+the table. Also the flag table->ibd_file_missing is set TRUE.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_discard_tablespace_for_mysql(
/*=============================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx) /* in: transaction handle */
+ const char* name, /*!< in: table name */
+ trx_t* trx) /*!< in: transaction handle */
{
dict_foreign_t* foreign;
dulint new_id;
@@ -2461,10 +2415,10 @@ row_discard_tablespace_for_mysql(
ut_print_timestamp(ef);
fputs(" Cannot DISCARD table ", ef);
- ut_print_name(ef, trx, TRUE, name);
+ ut_print_name(stderr, trx, TRUE, name);
fputs("\n"
"because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
+ ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name);
putc('\n', ef);
mutex_exit(&dict_foreign_err_mutex);
@@ -2505,7 +2459,7 @@ row_discard_tablespace_for_mysql(
if (err != DB_SUCCESS) {
trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx_general_rollback_for_mysql(trx, NULL);
trx->error_state = DB_SUCCESS;
} else {
dict_table_change_id_in_cache(table, new_id);
@@ -2514,7 +2468,7 @@ row_discard_tablespace_for_mysql(
if (!success) {
trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx_general_rollback_for_mysql(trx, NULL);
trx->error_state = DB_SUCCESS;
err = DB_ERROR;
@@ -2536,20 +2490,20 @@ funct_exit:
return((int) err);
}
-/*********************************************************************
+/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary. */
-
+of the table in the data dictionary.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_import_tablespace_for_mysql(
/*============================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx) /* in: transaction handle */
+ const char* name, /*!< in: table name */
+ trx_t* trx) /*!< in: transaction handle */
{
dict_table_t* table;
ibool success;
- dulint current_lsn;
+ ib_uint64_t current_lsn;
ulint err = DB_SUCCESS;
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
@@ -2642,8 +2596,10 @@ row_import_tablespace_for_mysql(
ibuf_delete_for_discarded_space(table->space);
- success = fil_open_single_table_tablespace(TRUE, table->space,
- table->name);
+ success = fil_open_single_table_tablespace(
+ TRUE, table->space,
+ table->flags == DICT_TF_COMPACT ? 0 : table->flags,
+ table->name);
if (success) {
table->ibd_file_missing = FALSE;
table->tablespace_discarded = FALSE;
@@ -2672,15 +2628,15 @@ funct_exit:
return((int) err);
}
-/*************************************************************************
-Truncates a table for MySQL. */
-
+/*********************************************************************//**
+Truncates a table for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_truncate_table_for_mysql(
/*=========================*/
- /* out: error code or DB_SUCCESS */
- dict_table_t* table, /* in: table handle */
- trx_t* trx) /* in: transaction handle */
+ dict_table_t* table, /*!< in: table handle */
+ trx_t* trx) /*!< in: transaction handle */
{
dict_foreign_t* foreign;
ulint err;
@@ -2692,6 +2648,7 @@ row_truncate_table_for_mysql(
btr_pcur_t pcur;
mtr_t mtr;
dulint new_id;
+ ulint recreate_space = 0;
pars_info_t* info = NULL;
/* How do we prevent crashes caused by ongoing operations on
@@ -2716,17 +2673,23 @@ row_truncate_table_for_mysql(
reallocated, the allocator will remove the ibuf entries for
it.
- TODO: when we truncate *.ibd files (analogous to DISCARD
- TABLESPACE), we will have to remove we remove all entries for
- the table in the insert buffer tree!
+ When we truncate *.ibd files by recreating them (analogous to
+ DISCARD TABLESPACE), we remove all entries for the table in the
+ insert buffer tree. This is not strictly necessary, because
+ in 6) we will assign a new tablespace identifier, but we can
+ free up some space in the system tablespace.
4) Linear readahead and random readahead: we use the same
- method as in 3) to discard ongoing operations. (This will only
- be relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
+ method as in 3) to discard ongoing operations. (This is only
+ relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
5) FOREIGN KEY operations: if
table->n_foreign_key_checks_running > 0, we do not allow the
- TRUNCATE. We also reserve the data dictionary latch. */
+ TRUNCATE. We also reserve the data dictionary latch.
+
+ 6) Crash recovery: To prevent the application of pre-truncation
+ redo log records on the truncated tablespace, we will assign
+ a new tablespace identifier to the truncated tablespace. */
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_ad(table);
@@ -2814,6 +2777,51 @@ row_truncate_table_for_mysql(
trx->table_id = table->id;
+ if (table->space && !table->dir_path_of_temp_table) {
+ /* Discard and create the single-table tablespace. */
+ ulint space = table->space;
+ ulint flags = fil_space_get_flags(space);
+
+ if (flags != ULINT_UNDEFINED
+ && fil_discard_tablespace(space)) {
+
+ dict_index_t* index;
+
+ space = 0;
+
+ if (fil_create_new_single_table_tablespace(
+ &space, table->name, FALSE, flags,
+ FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: TRUNCATE TABLE %s failed to"
+ " create a new tablespace\n",
+ table->name);
+ table->ibd_file_missing = 1;
+ err = DB_ERROR;
+ goto funct_exit;
+ }
+
+ recreate_space = space;
+
+ /* Replace the space_id in the data dictionary cache.
+ The persisent data dictionary (SYS_TABLES.SPACE
+ and SYS_INDEXES.SPACE) are updated later in this
+ function. */
+ table->space = space;
+ index = dict_table_get_first_index(table);
+ do {
+ index->space = space;
+ index = dict_table_get_next_index(index);
+ } while (index);
+
+ mtr_start(&mtr);
+ fsp_header_init(space,
+ FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+ mtr_commit(&mtr);
+ }
+ }
+
/* scan SYS_INDEXES for all indexes of the table */
heap = mem_heap_create(800);
@@ -2836,7 +2844,7 @@ row_truncate_table_for_mysql(
ulint len;
ulint root_page_no;
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
/* The end of SYS_INDEXES has been reached. */
break;
}
@@ -2858,7 +2866,8 @@ row_truncate_table_for_mysql(
/* This call may commit and restart mtr
and reposition pcur. */
- root_page_no = dict_truncate_index_tree(table, &pcur, &mtr);
+ root_page_no = dict_truncate_index_tree(table, recreate_space,
+ &pcur, &mtr);
rec = btr_pcur_get_rec(&pcur);
@@ -2890,17 +2899,20 @@ next_rec:
info = pars_info_create();
+ pars_info_add_int4_literal(info, "space", (lint) table->space);
pars_info_add_dulint_literal(info, "old_id", table->id);
pars_info_add_dulint_literal(info, "new_id", new_id);
err = que_eval_sql(info,
"PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
"BEGIN\n"
- "UPDATE SYS_TABLES SET ID = :new_id\n"
+ "UPDATE SYS_TABLES"
+ " SET ID = :new_id, SPACE = :space\n"
" WHERE ID = :old_id;\n"
"UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
" WHERE TABLE_ID = :old_id;\n"
- "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
+ "UPDATE SYS_INDEXES"
+ " SET TABLE_ID = :new_id, SPACE = :space\n"
" WHERE TABLE_ID = :old_id;\n"
"COMMIT WORK;\n"
"END;\n"
@@ -2908,7 +2920,7 @@ next_rec:
if (err != DB_SUCCESS) {
trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx_general_rollback_for_mysql(trx, NULL);
trx->error_state = DB_SUCCESS;
ut_print_timestamp(stderr);
fputs(" InnoDB: Unable to assign a new identifier to table ",
@@ -2941,21 +2953,22 @@ funct_exit:
return((int) err);
}
-#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
-Drops a table for MySQL. If the name of the dropped table ends in
+/*********************************************************************//**
+Drops a table for MySQL. If the name of the dropped table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. */
-
+output by the master thread. If the data dictionary was not already locked
+by the transaction, the transaction will be committed. Otherwise, the
+data dictionary will remain locked.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_drop_table_for_mysql(
/*=====================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx, /* in: transaction handle */
- ibool drop_db)/* in: TRUE=dropping whole database */
+ const char* name, /*!< in: table name */
+ trx_t* trx, /*!< in: transaction handle */
+ ibool drop_db)/*!< in: TRUE=dropping whole database */
{
dict_foreign_t* foreign;
dict_table_t* table;
@@ -3050,8 +3063,7 @@ row_drop_table_for_mysql(
"InnoDB: MySQL database directory"
" from another database?\n"
"InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
stderr);
goto funct_exit;
}
@@ -3098,7 +3110,7 @@ check_next_foreign:
if (table->n_mysql_handles_opened > 0) {
ibool added;
- added = row_add_table_to_background_drop_list(table);
+ added = row_add_table_to_background_drop_list(table->name);
if (added) {
ut_print_timestamp(stderr);
@@ -3114,7 +3126,6 @@ check_next_foreign:
/* We return DB_SUCCESS to MySQL though the drop will
happen lazily later */
-
err = DB_SUCCESS;
} else {
/* The table is already in the background drop list */
@@ -3132,15 +3143,16 @@ check_next_foreign:
if (table->n_foreign_key_checks_running > 0) {
- ibool added;
+ const char* table_name = table->name;
+ ibool added;
- added = row_add_table_to_background_drop_list(table);
+ added = row_add_table_to_background_drop_list(table_name);
if (added) {
ut_print_timestamp(stderr);
fputs(" InnoDB: You are trying to drop table ",
stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
+ ut_print_name(stderr, trx, TRUE, table_name);
fputs("\n"
"InnoDB: though there is a"
" foreign key check running on it.\n"
@@ -3163,7 +3175,7 @@ check_next_foreign:
/* Remove all locks there are on the table or its records */
lock_remove_all_on_table(table, TRUE);
- trx->dict_operation = TRUE;
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
trx->table_id = table->id;
/* We use the private SQL parser of Innobase to generate the
@@ -3188,7 +3200,6 @@ check_next_foreign:
"WHERE NAME = :table_name\n"
"LOCK IN SHARE MODE;\n"
"IF (SQL % NOTFOUND) THEN\n"
- " COMMIT WORK;\n"
" RETURN;\n"
"END IF;\n"
"found := 1;\n"
@@ -3241,7 +3252,6 @@ check_next_foreign:
"WHERE TABLE_ID = table_id;\n"
"DELETE FROM SYS_TABLES\n"
"WHERE ID = table_id;\n"
- "COMMIT WORK;\n"
"END;\n"
, FALSE, trx);
@@ -3325,31 +3335,29 @@ check_next_foreign:
}
funct_exit:
- trx_commit_for_mysql(trx);
-
if (locked_dictionary) {
+ trx_commit_for_mysql(trx);
+
row_mysql_unlock_data_dictionary(trx);
}
trx->op_info = "";
-#ifndef UNIV_HOTBACKUP
srv_wake_master_thread();
-#endif /* !UNIV_HOTBACKUP */
return((int) err);
}
-/***********************************************************************
+/*******************************************************************//**
Drop all foreign keys in a database, see Bug#18942.
-Called at the end of row_drop_database_for_mysql(). */
+Called at the end of row_drop_database_for_mysql().
+@return error code or DB_SUCCESS */
static
ulint
drop_all_foreign_keys_in_db(
/*========================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: database name which ends to '/' */
- trx_t* trx) /* in: transaction handle */
+ const char* name, /*!< in: database name which ends to '/' */
+ trx_t* trx) /*!< in: transaction handle */
{
pars_info_t* pinfo;
ulint err;
@@ -3360,7 +3368,7 @@ drop_all_foreign_keys_in_db(
pars_info_add_str_literal(pinfo, "dbname", name);
-/* true if for_name is not prefixed with dbname */
+/** true if for_name is not prefixed with dbname */
#define TABLE_NOT_IN_THIS_DB \
"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname"
@@ -3400,15 +3408,15 @@ drop_all_foreign_keys_in_db(
return(err);
}
-/*************************************************************************
-Drops a database for MySQL. */
-
+/*********************************************************************//**
+Drops a database for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
row_drop_database_for_mysql(
/*========================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: database name which ends to '/' */
- trx_t* trx) /* in: transaction handle */
+ const char* name, /*!< in: database name which ends to '/' */
+ trx_t* trx) /*!< in: transaction handle */
{
dict_table_t* table;
char* table_name;
@@ -3456,6 +3464,7 @@ loop:
}
err = row_drop_table_for_mysql(table_name, trx, TRUE);
+ trx_commit_for_mysql(trx);
if (err != DB_SUCCESS) {
fputs("InnoDB: DROP DATABASE ", stderr);
@@ -3493,30 +3502,30 @@ loop:
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL. */
+tables in MySQL.
+@return TRUE if temporary table */
static
ibool
row_is_mysql_tmp_table_name(
/*========================*/
- /* out: TRUE if temporary table */
- const char* name) /* in: table name in the form
+ const char* name) /*!< in: table name in the form
'database/tablename' */
{
return(strstr(name, "/#sql") != NULL);
/* return(strstr(name, "/@0023sql") != NULL); */
}
-/********************************************************************
-Delete a single constraint. */
+/****************************************************************//**
+Delete a single constraint.
+@return error code or DB_SUCCESS */
static
int
row_delete_constraint_low(
/*======================*/
- /* out: error code or DB_SUCCESS */
- const char* id, /* in: constraint id */
- trx_t* trx) /* in: transaction handle */
+ const char* id, /*!< in: constraint id */
+ trx_t* trx) /*!< in: transaction handle */
{
pars_info_t* info = pars_info_create();
@@ -3531,18 +3540,18 @@ row_delete_constraint_low(
, FALSE, trx));
}
-/********************************************************************
-Delete a single constraint. */
+/****************************************************************//**
+Delete a single constraint.
+@return error code or DB_SUCCESS */
static
int
row_delete_constraint(
/*==================*/
- /* out: error code or DB_SUCCESS */
- const char* id, /* in: constraint id */
- const char* database_name, /* in: database name, with the
+ const char* id, /*!< in: constraint id */
+ const char* database_name, /*!< in: database name, with the
trailing '/' */
- mem_heap_t* heap, /* in: memory heap */
- trx_t* trx) /* in: transaction handle */
+ mem_heap_t* heap, /*!< in: memory heap */
+ trx_t* trx) /*!< in: transaction handle */
{
ulint err;
@@ -3552,7 +3561,7 @@ row_delete_constraint(
if ((err == DB_SUCCESS) && !strchr(id, '/')) {
/* Old format < 4.0.18 constraints have constraint ids
- <number>_<number>. We only try deleting them if the
+ NUMBER_NUMBER. We only try deleting them if the
constraint name does not contain a '/' character, otherwise
deleting a new format constraint named 'foo/bar' from
database 'baz' would remove constraint 'bar' from database
@@ -3564,19 +3573,20 @@ row_delete_constraint(
return((int) err);
}
-/*************************************************************************
-Renames a table for MySQL. */
-
-int
+/*********************************************************************//**
+Renames a table for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
row_rename_table_for_mysql(
/*=======================*/
- /* out: error code or DB_SUCCESS */
- const char* old_name, /* in: old table name */
- const char* new_name, /* in: new table name */
- trx_t* trx) /* in: transaction handle */
+ const char* old_name, /*!< in: old table name */
+ const char* new_name, /*!< in: new table name */
+ trx_t* trx, /*!< in: transaction handle */
+ ibool commit) /*!< in: if TRUE then commit trx */
{
dict_table_t* table;
- ulint err;
+ ulint err = DB_ERROR;
mem_heap_t* heap = NULL;
const char** constraints_to_drop = NULL;
ulint n_constraints_to_drop = 0;
@@ -3596,11 +3606,8 @@ row_rename_table_for_mysql(
"InnoDB: with raw, and innodb_force_... is removed.\n",
stderr);
- trx_commit_for_mysql(trx);
- return(DB_ERROR);
- }
-
- if (row_mysql_is_system_table(new_name)) {
+ goto funct_exit;
+ } else if (row_mysql_is_system_table(new_name)) {
fprintf(stderr,
"InnoDB: Error: trying to create a MySQL"
@@ -3609,8 +3616,7 @@ row_rename_table_for_mysql(
" of the MyISAM type!\n",
new_name);
- trx_commit_for_mysql(trx);
- return(DB_ERROR);
+ goto funct_exit;
}
trx->op_info = "renaming table";
@@ -3619,11 +3625,6 @@ row_rename_table_for_mysql(
old_is_tmp = row_is_mysql_tmp_table_name(old_name);
new_is_tmp = row_is_mysql_tmp_table_name(new_name);
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- row_mysql_lock_data_dictionary(trx);
-
table = dict_table_get_low(old_name);
if (!table) {
@@ -3640,13 +3641,10 @@ row_rename_table_for_mysql(
"InnoDB: MySQL database directory"
" from another database?\n"
"InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
stderr);
goto funct_exit;
- }
-
- if (table->ibd_file_missing) {
+ } else if (table->ibd_file_missing) {
err = DB_TABLE_NOT_FOUND;
ut_print_timestamp(stderr);
@@ -3655,13 +3653,10 @@ row_rename_table_for_mysql(
fputs(" does not have an .ibd file"
" in the database directory.\n"
"InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
stderr);
goto funct_exit;
- }
-
- if (new_is_tmp) {
+ } else if (new_is_tmp) {
/* MySQL is doing an ALTER TABLE command and it renames the
original table to a temporary table name. We want to preserve
the original foreign key constraint definitions despite the
@@ -3681,9 +3676,7 @@ row_rename_table_for_mysql(
}
/* We use the private SQL parser of Innobase to generate the query
- graphs needed in deleting the dictionary data from system tables in
- Innobase. Deleting a row from SYS_INDEXES table also frees the file
- segments of the B-tree associated with the index. */
+ graphs needed in updating the dictionary data from system tables. */
info = pars_info_create();
@@ -3701,9 +3694,7 @@ row_rename_table_for_mysql(
if (err != DB_SUCCESS) {
goto end;
- }
-
- if (!new_is_tmp) {
+ } else if (!new_is_tmp) {
/* Rename all constraints. */
info = pars_info_create();
@@ -3814,8 +3805,7 @@ end:
"InnoDB: Have you deleted the .frm file"
" and not used DROP TABLE?\n"
"InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
"InnoDB: If table ", stderr);
ut_print_name(stderr, trx, TRUE, new_name);
fputs(" is a temporary table #sql..., then"
@@ -3835,28 +3825,17 @@ end:
"InnoDB: succeed.\n", stderr);
}
trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx_general_rollback_for_mysql(trx, NULL);
trx->error_state = DB_SUCCESS;
} else {
/* The following call will also rename the .ibd data file if
the table is stored in a single-table tablespace */
- ibool success = dict_table_rename_in_cache(table, new_name,
- !new_is_tmp);
-
- if (!success) {
+ if (!dict_table_rename_in_cache(table, new_name,
+ !new_is_tmp)) {
trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx_general_rollback_for_mysql(trx, NULL);
trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error in table rename,"
- " cannot rename ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" to ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- putc('\n', stderr);
- err = DB_ERROR;
-
goto funct_exit;
}
@@ -3864,7 +3843,7 @@ end:
an ALTER, not in a RENAME. */
err = dict_load_foreigns(
- new_name, old_is_tmp ? trx->check_foreigns : TRUE);
+ new_name, !old_is_tmp || trx->check_foreigns);
if (err != DB_SUCCESS) {
ut_print_timestamp(stderr);
@@ -3895,14 +3874,16 @@ end:
ut_a(dict_table_rename_in_cache(table,
old_name, FALSE));
trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
+ trx_general_rollback_for_mysql(trx, NULL);
trx->error_state = DB_SUCCESS;
}
}
funct_exit:
- trx_commit_for_mysql(trx);
- row_mysql_unlock_data_dictionary(trx);
+
+ if (commit) {
+ trx_commit_for_mysql(trx);
+ }
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -3910,21 +3891,21 @@ funct_exit:
trx->op_info = "";
- return((int) err);
+ return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Checks that the index contains entries in an ascending order, unique
constraint is not broken, and calculates the number of index entries
-in the read view of the current transaction. */
+in the read view of the current transaction.
+@return TRUE if ok */
static
ibool
row_scan_and_check_index(
/*=====================*/
- /* out: TRUE if ok */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL */
- dict_index_t* index, /* in: index */
- ulint* n_rows) /* out: number of entries seen in the
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL */
+ dict_index_t* index, /*!< in: index */
+ ulint* n_rows) /*!< out: number of entries seen in the
current consistent read */
{
dtuple_t* prev_entry = NULL;
@@ -3939,12 +3920,21 @@ row_scan_and_check_index(
ulint i;
ulint cnt;
mem_heap_t* heap = NULL;
+ ulint n_ext;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ ulint* offsets;
+ rec_offs_init(offsets_);
*n_rows = 0;
+ if (!row_merge_is_index_usable(prebuilt->trx, index)) {
+ /* A newly created index may lack some delete-marked
+ records that may exist in the read view of
+ prebuilt->trx. Thus, such indexes must not be
+ accessed by consistent read. */
+ return(is_ok);
+ }
+
buf = mem_alloc(UNIV_PAGE_SIZE);
heap = mem_heap_create(100);
@@ -3952,6 +3942,8 @@ row_scan_and_check_index(
in scanning the index entries */
prebuilt->index = index;
+ /* row_merge_is_index_usable() was already checked above. */
+ prebuilt->index_usable = TRUE;
prebuilt->sql_stat_start = TRUE;
prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
prebuilt->n_template = 0;
@@ -3971,7 +3963,17 @@ loop:
}
cnt = 1000;
}
- if (ret != DB_SUCCESS) {
+
+ switch (ret) {
+ case DB_SUCCESS:
+ break;
+ default:
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Warning: CHECK TABLE on ", stderr);
+ dict_index_name_print(stderr, prebuilt->trx, index);
+ fprintf(stderr, " returned %lu\n", ret);
+ /* fall through (this error is ignored by CHECK TABLE) */
+ case DB_END_OF_INDEX:
func_exit:
mem_free(buf);
mem_heap_free(heap);
@@ -3987,12 +3989,13 @@ func_exit:
rec = buf + mach_read_from_4(buf);
+ offsets = rec_get_offsets(rec, index, offsets_,
+ ULINT_UNDEFINED, &heap);
+
if (prev_entry != NULL) {
matched_fields = 0;
matched_bytes = 0;
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
&matched_fields,
&matched_bytes);
@@ -4025,7 +4028,7 @@ not_ok:
rec_print_new(stderr, rec, offsets);
putc('\n', stderr);
is_ok = FALSE;
- } else if ((index->type & DICT_UNIQUE)
+ } else if (dict_index_is_unique(index)
&& !contains_null
&& matched_fields
>= dict_index_get_n_ordering_defined_by_user(
@@ -4036,24 +4039,44 @@ not_ok:
}
}
- mem_heap_empty(heap);
- offsets = offsets_;
+ {
+ mem_heap_t* tmp_heap = NULL;
+
+ /* Empty the heap on each round. But preserve offsets[]
+ for the row_rec_to_index_entry() call, by copying them
+ into a separate memory heap when needed. */
+ if (UNIV_UNLIKELY(offsets != offsets_)) {
+ ulint size = rec_offs_get_n_alloc(offsets)
+ * sizeof *offsets;
+
+ tmp_heap = mem_heap_create(size);
+ offsets = mem_heap_dup(tmp_heap, offsets, size);
+ }
+
+ mem_heap_empty(heap);
+
+ prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec,
+ index, offsets,
+ &n_ext, heap);
- prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+ if (UNIV_LIKELY_NULL(tmp_heap)) {
+ mem_heap_free(tmp_heap);
+ }
+ }
ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
goto loop;
}
-/*************************************************************************
-Checks a table for corruption. */
-
+/*********************************************************************//**
+Checks a table for corruption.
+@return DB_ERROR or DB_SUCCESS */
+UNIV_INTERN
ulint
row_check_table_for_mysql(
/*======================*/
- /* out: DB_ERROR or DB_SUCCESS */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
{
dict_table_t* table = prebuilt->table;
@@ -4063,7 +4086,7 @@ row_check_table_for_mysql(
ulint ret = DB_SUCCESS;
ulint old_isolation_level;
- if (prebuilt->table->ibd_file_missing) {
+ if (table->ibd_file_missing) {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Error:\n"
"InnoDB: MySQL is trying to use a table handle"
@@ -4074,10 +4097,9 @@ row_check_table_for_mysql(
"InnoDB: the MySQL datadir, or have you"
" used DISCARD TABLESPACE?\n"
"InnoDB: Look from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
"InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
+ table->name);
return(DB_ERROR);
}
@@ -4107,12 +4129,12 @@ row_check_table_for_mysql(
if (!btr_validate_index(index, prebuilt->trx)) {
ret = DB_ERROR;
} else {
- if (!row_scan_and_check_index(prebuilt,
- index, &n_rows)) {
+ if (!row_scan_and_check_index(prebuilt,index, &n_rows)){
ret = DB_ERROR;
}
if (trx_is_interrupted(prebuilt->trx)) {
+ ret = DB_INTERRUPTED;
break;
}
@@ -4160,14 +4182,14 @@ row_check_table_for_mysql(
return(ret);
}
-/*************************************************************************
-Determines if a table is a magic monitor table. */
-
+/*********************************************************************//**
+Determines if a table is a magic monitor table.
+@return TRUE if monitor table */
+UNIV_INTERN
ibool
row_is_magic_monitor_table(
/*=======================*/
- /* out: TRUE if monitor table */
- const char* table_name) /* in: name of the table, in the
+ const char* table_name) /*!< in: name of the table, in the
form database/table_name */
{
const char* name; /* table_name without database/ */
diff --git a/storage/innobase/row/row0purge.c b/storage/innobase/row/row0purge.c
index 1fef47da13f..500ebe571ab 100644
--- a/storage/innobase/row/row0purge.c
+++ b/storage/innobase/row/row0purge.c
@@ -1,7 +1,24 @@
-/******************************************************
-Purge obsolete records
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0purge.c
+Purge obsolete records
Created 3/14/1997 Heikki Tuuri
*******************************************************/
@@ -27,15 +44,15 @@ Created 3/14/1997 Heikki Tuuri
#include "row0mysql.h"
#include "log0log.h"
-/************************************************************************
-Creates a purge node to a query graph. */
-
+/********************************************************************//**
+Creates a purge node to a query graph.
+@return own: purge node */
+UNIV_INTERN
purge_node_t*
row_purge_node_create(
/*==================*/
- /* out, own: purge node */
- que_thr_t* parent, /* in: parent node, i.e., a thr node */
- mem_heap_t* heap) /* in: memory heap where created */
+ que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
+ mem_heap_t* heap) /*!< in: memory heap where created */
{
purge_node_t* node;
@@ -51,17 +68,17 @@ row_purge_node_create(
return(node);
}
-/***************************************************************
+/***********************************************************//**
Repositions the pcur in the purge node on the clustered index record,
-if found. */
+if found.
+@return TRUE if the record was found */
static
ibool
row_purge_reposition_pcur(
/*======================*/
- /* out: TRUE if the record was found */
- ulint mode, /* in: latching mode */
- purge_node_t* node, /* in: row purge node */
- mtr_t* mtr) /* in: mtr */
+ ulint mode, /*!< in: latching mode */
+ purge_node_t* node, /*!< in: row purge node */
+ mtr_t* mtr) /*!< in: mtr */
{
ibool found;
@@ -82,16 +99,16 @@ row_purge_reposition_pcur(
return(found);
}
-/***************************************************************
-Removes a delete marked clustered index record if possible. */
+/***********************************************************//**
+Removes a delete marked clustered index record if possible.
+@return TRUE if success, or if not found, or if modified after the
+delete marking */
static
ibool
row_purge_remove_clust_if_poss_low(
/*===============================*/
- /* out: TRUE if success, or if not found, or
- if modified after the delete marking */
- purge_node_t* node, /* in: row purge node */
- ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+ purge_node_t* node, /*!< in: row purge node */
+ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
dict_index_t* index;
btr_pcur_t* pcur;
@@ -102,7 +119,7 @@ row_purge_remove_clust_if_poss_low(
rec_t* rec;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
index = dict_table_get_first_index(node->table);
@@ -144,7 +161,8 @@ row_purge_remove_clust_if_poss_low(
success = btr_cur_optimistic_delete(btr_cur, &mtr);
} else {
ut_ad(mode == BTR_MODIFY_TREE);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr);
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ RB_NONE, &mtr);
if (err == DB_SUCCESS) {
success = TRUE;
@@ -160,14 +178,14 @@ row_purge_remove_clust_if_poss_low(
return(success);
}
-/***************************************************************
+/***********************************************************//**
Removes a clustered index record if it has not been modified after the delete
marking. */
static
void
row_purge_remove_clust_if_poss(
/*===========================*/
- purge_node_t* node) /* in: row purge node */
+ purge_node_t* node) /*!< in: row purge node */
{
ibool success;
ulint n_tries = 0;
@@ -196,17 +214,17 @@ retry:
ut_a(success);
}
-/***************************************************************
-Removes a secondary index entry if possible. */
+/***********************************************************//**
+Removes a secondary index entry if possible.
+@return TRUE if success or if not found */
static
ibool
row_purge_remove_sec_if_poss_low(
/*=============================*/
- /* out: TRUE if success or if not found */
- purge_node_t* node, /* in: row purge node */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint mode) /* in: latch mode BTR_MODIFY_LEAF or
+ purge_node_t* node, /*!< in: row purge node */
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* entry, /*!< in: index entry */
+ ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
{
btr_pcur_t pcur;
@@ -216,7 +234,7 @@ row_purge_remove_sec_if_poss_low(
ibool found;
ulint err;
mtr_t mtr;
- mtr_t* mtr_vers;
+ mtr_t mtr_vers;
log_free_check();
mtr_start(&mtr);
@@ -224,10 +242,18 @@ row_purge_remove_sec_if_poss_low(
found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
if (!found) {
- /* Not found */
+ /* Not found. This is a legitimate condition. In a
+ rollback, InnoDB will remove secondary recs that would
+ be purged anyway. Then the actual purge will not find
+ the secondary index record. Also, the purge itself is
+ eager: if it comes to consider a secondary index
+ record, and notices it does not need to exist in the
+ index, it will remove it. Then if/when the purge
+ comes to consider the secondary index record a second
+ time, it will not exist any more in the index. */
/* fputs("PURGE:........sec entry not found\n", stderr); */
- /* dtuple_print(entry); */
+ /* dtuple_print(stderr, entry); */
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -241,21 +267,17 @@ row_purge_remove_sec_if_poss_low(
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
- mtr_vers = mem_alloc(sizeof(mtr_t));
+ mtr_start(&mtr_vers);
- mtr_start(mtr_vers);
-
- success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr_vers);
+ success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr_vers);
if (success) {
old_has = row_vers_old_has_index_entry(
TRUE, btr_pcur_get_rec(&(node->pcur)),
- mtr_vers, index, entry);
+ &mtr_vers, index, entry);
}
- btr_pcur_commit_specify_mtr(&(node->pcur), mtr_vers);
-
- mem_free(mtr_vers);
+ btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
if (!success || !old_has) {
/* Remove the index record */
@@ -265,14 +287,9 @@ row_purge_remove_sec_if_poss_low(
} else {
ut_ad(mode == BTR_MODIFY_TREE);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- FALSE, &mtr);
- if (err == DB_SUCCESS) {
- success = TRUE;
- } else if (err == DB_OUT_OF_FILE_SPACE) {
- success = FALSE;
- } else {
- ut_error;
- }
+ RB_NONE, &mtr);
+ success = err == DB_SUCCESS;
+ ut_a(success || err == DB_OUT_OF_FILE_SPACE);
}
}
@@ -282,15 +299,15 @@ row_purge_remove_sec_if_poss_low(
return(success);
}
-/***************************************************************
+/***********************************************************//**
Removes a secondary index entry if possible. */
UNIV_INLINE
void
row_purge_remove_sec_if_poss(
/*=========================*/
- purge_node_t* node, /* in: row purge node */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry */
+ purge_node_t* node, /*!< in: row purge node */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry) /*!< in: index entry */
{
ibool success;
ulint n_tries = 0;
@@ -322,13 +339,13 @@ retry:
ut_a(success);
}
-/***************************************************************
+/***********************************************************//**
Purges a delete marking of a record. */
static
void
row_purge_del_mark(
/*===============*/
- purge_node_t* node) /* in: row purge node */
+ purge_node_t* node) /*!< in: row purge node */
{
mem_heap_t* heap;
dtuple_t* entry;
@@ -342,8 +359,8 @@ row_purge_del_mark(
index = node->index;
/* Build the index entry */
- entry = row_build_index_entry(node->row, index, heap);
-
+ entry = row_build_index_entry(node->row, NULL, index, heap);
+ ut_a(entry);
row_purge_remove_sec_if_poss(node, index, entry);
node->index = dict_table_get_next_index(node->index);
@@ -354,26 +371,22 @@ row_purge_del_mark(
row_purge_remove_clust_if_poss(node);
}
-/***************************************************************
+/***********************************************************//**
Purges an update of an existing record. Also purges an update of a delete
marked record if that record contained an externally stored field. */
static
void
row_purge_upd_exist_or_extern(
/*==========================*/
- purge_node_t* node) /* in: row purge node */
+ purge_node_t* node) /*!< in: row purge node */
{
mem_heap_t* heap;
dtuple_t* entry;
dict_index_t* index;
- upd_field_t* ufield;
ibool is_insert;
ulint rseg_id;
ulint page_no;
ulint offset;
- ulint internal_offset;
- byte* data_field;
- ulint data_field_len;
ulint i;
mtr_t mtr;
@@ -392,8 +405,9 @@ row_purge_upd_exist_or_extern(
if (row_upd_changes_ord_field_binary(NULL, node->index,
node->update)) {
/* Build the older version of the index entry */
- entry = row_build_index_entry(node->row, index, heap);
-
+ entry = row_build_index_entry(node->row, NULL,
+ index, heap);
+ ut_a(entry);
row_purge_remove_sec_if_poss(node, index, entry);
}
@@ -406,16 +420,23 @@ skip_secondaries:
/* Free possible externally stored fields */
for (i = 0; i < upd_get_n_fields(node->update); i++) {
- ufield = upd_get_nth_field(node->update, i);
+ const upd_field_t* ufield
+ = upd_get_nth_field(node->update, i);
+
+ if (dfield_is_ext(&ufield->new_val)) {
+ buf_block_t* block;
+ ulint internal_offset;
+ byte* data_field;
- if (ufield->extern_storage) {
/* We use the fact that new_val points to
node->undo_rec and get thus the offset of
- dfield data inside the unod record. Then we
+ dfield data inside the undo record. Then we
can calculate from node->roll_ptr the file
address of the new_val data */
- internal_offset = ((byte*)ufield->new_val.data)
+ internal_offset
+ = ((const byte*)
+ dfield_get_data(&ufield->new_val))
- node->undo_rec;
ut_a(internal_offset < UNIV_PAGE_SIZE);
@@ -446,46 +467,45 @@ skip_secondaries:
/* We assume in purge of externally stored fields
that the space id of the undo log record is 0! */
- data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
- + offset + internal_offset;
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(buf_frame_align(data_field),
- SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
- data_field_len = ufield->new_val.len;
+ data_field = buf_block_get_frame(block)
+ + offset + internal_offset;
- btr_free_externally_stored_field(index, data_field,
- data_field_len,
- FALSE, &mtr);
+ ut_a(dfield_get_len(&ufield->new_val)
+ >= BTR_EXTERN_FIELD_REF_SIZE);
+ btr_free_externally_stored_field(
+ index,
+ data_field + dfield_get_len(&ufield->new_val)
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ NULL, NULL, NULL, 0, RB_NONE, &mtr);
mtr_commit(&mtr);
}
}
}
-/***************************************************************
-Parses the row reference and other info in a modify undo log record. */
+/***********************************************************//**
+Parses the row reference and other info in a modify undo log record.
+@return TRUE if purge operation required: NOTE that then the CALLER
+must unfreeze data dictionary! */
static
ibool
row_purge_parse_undo_rec(
/*=====================*/
- /* out: TRUE if purge operation required:
- NOTE that then the CALLER must unfreeze
- data dictionary! */
- purge_node_t* node, /* in: row undo node */
+ purge_node_t* node, /*!< in: row undo node */
ibool* updated_extern,
- /* out: TRUE if an externally stored field
+ /*!< out: TRUE if an externally stored field
was updated */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
dict_index_t* clust_index;
byte* ptr;
trx_t* trx;
- dulint undo_no;
+ undo_no_t undo_no;
dulint table_id;
- dulint trx_id;
- dulint roll_ptr;
+ trx_id_t trx_id;
+ roll_ptr_t roll_ptr;
ulint info_bits;
ulint type;
ulint cmpl_info;
@@ -528,9 +548,8 @@ row_purge_parse_undo_rec(
if (node->table == NULL) {
/* The table has been dropped: no need to do purge */
-
+err_exit:
row_mysql_unfreeze_data_dictionary(trx);
-
return(FALSE);
}
@@ -539,9 +558,7 @@ row_purge_parse_undo_rec(
node->table = NULL;
- row_mysql_unfreeze_data_dictionary(trx);
-
- return(FALSE);
+ goto err_exit;
}
clust_index = dict_table_get_first_index(node->table);
@@ -549,9 +566,7 @@ row_purge_parse_undo_rec(
if (clust_index == NULL) {
/* The table was corrupt in the data dictionary */
- row_mysql_unfreeze_data_dictionary(trx);
-
- return(FALSE);
+ goto err_exit;
}
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
@@ -564,30 +579,31 @@ row_purge_parse_undo_rec(
/* Read to the partial row the fields that occur in indexes */
if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- ptr = trx_undo_rec_get_partial_row(ptr, clust_index,
- &(node->row), node->heap);
+ ptr = trx_undo_rec_get_partial_row(
+ ptr, clust_index, &node->row,
+ type == TRX_UNDO_UPD_DEL_REC,
+ node->heap);
}
return(TRUE);
}
-/***************************************************************
+/***********************************************************//**
Fetches an undo log record and does the purge for the recorded operation.
If none left, or the current purge completed, returns the control to the
-parent node, which is always a query thread node. */
+parent node, which is always a query thread node.
+@return DB_SUCCESS if operation successfully completed, else error code */
static
ulint
row_purge(
/*======*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code */
- purge_node_t* node, /* in: row purge node */
- que_thr_t* thr) /* in: query thread */
+ purge_node_t* node, /*!< in: row purge node */
+ que_thr_t* thr) /*!< in: query thread */
{
- dulint roll_ptr;
- ibool purge_needed;
- ibool updated_extern;
- trx_t* trx;
+ roll_ptr_t roll_ptr;
+ ibool purge_needed;
+ ibool updated_extern;
+ trx_t* trx;
ut_ad(node && thr);
@@ -646,15 +662,15 @@ row_purge(
return(DB_SUCCESS);
}
-/***************************************************************
+/***********************************************************//**
Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph. */
-
+function used in an SQL execution graph.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_purge_step(
/*===========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
purge_node_t* node;
ulint err;
diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c
index 08e50817db9..128ac3ba3e8 100644
--- a/storage/innobase/row/row0row.c
+++ b/storage/innobase/row/row0row.c
@@ -1,7 +1,24 @@
-/******************************************************
-General row routines
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0row.c
+General row routines
Created 4/20/1996 Heikki Tuuri
*******************************************************/
@@ -12,8 +29,10 @@ Created 4/20/1996 Heikki Tuuri
#include "row0row.ic"
#endif
+#include "data0type.h"
#include "dict0dict.h"
#include "btr0btr.h"
+#include "ha_prototypes.h"
#include "mach0data.h"
#include "trx0rseg.h"
#include "trx0trx.h"
@@ -22,100 +41,62 @@ Created 4/20/1996 Heikki Tuuri
#include "trx0purge.h"
#include "trx0rec.h"
#include "que0que.h"
-#include "row0row.h"
+#include "row0ext.h"
#include "row0upd.h"
#include "rem0cmp.h"
#include "read0read.h"
-
-/*************************************************************************
-Reads the trx id or roll ptr field from a clustered index record: this function
-is slower than the specialized inline functions. */
-
-dulint
-row_get_rec_sys_field(
+#include "ut0mem.h"
+
+/*********************************************************************//**
+Gets the offset of trx id field, in bytes relative to the origin of
+a clustered index record.
+@return offset of DATA_TRX_ID */
+UNIV_INTERN
+ulint
+row_get_trx_id_offset(
/*==================*/
- /* out: value of the field */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- ulint pos;
- byte* field;
- ulint len;
-
- ut_ad(index->type & DICT_CLUSTERED);
-
- pos = dict_index_get_sys_col_pos(index, type);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- if (type == DATA_TRX_ID) {
-
- return(trx_read_trx_id(field));
- } else {
- ut_ad(type == DATA_ROLL_PTR);
-
- return(trx_read_roll_ptr(field));
- }
-}
-
-/*************************************************************************
-Sets the trx id or roll ptr field in a clustered index record: this function
-is slower than the specialized inline functions. */
-
-void
-row_set_rec_sys_field(
-/*==================*/
- /* out: value of the field */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint val) /* in: value to set */
+ const rec_t* rec __attribute__((unused)),
+ /*!< in: record */
+ dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint pos;
- byte* field;
+ ulint offset;
ulint len;
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
- pos = dict_index_get_sys_col_pos(index, type);
+ pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
- field = rec_get_nth_field(rec, offsets, pos, &len);
+ offset = rec_get_nth_field_offs(offsets, pos, &len);
- if (type == DATA_TRX_ID) {
-
- trx_write_trx_id(field, val);
- } else {
- ut_ad(type == DATA_ROLL_PTR);
+ ut_ad(len == DATA_TRX_ID_LEN);
- trx_write_roll_ptr(field, val);
- }
+ return(offset);
}
-/*********************************************************************
-When an insert to a table is performed, this function builds the entry which
-has to be inserted to an index on the table. */
-
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INTERN
dtuple_t*
row_build_index_entry(
/*==================*/
- /* out: index entry which should be inserted */
- dtuple_t* row, /* in: row which should be inserted to the
- table */
- dict_index_t* index, /* in: index on the table */
- mem_heap_t* heap) /* in: memory heap from which the memory for
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ row_ext_t* ext, /*!< in: externally stored column prefixes,
+ or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap) /*!< in: memory heap from which the memory for
the index entry is allocated */
{
dtuple_t* entry;
ulint entry_len;
- dict_field_t* ind_field;
- dfield_t* dfield;
- dfield_t* dfield2;
ulint i;
- ulint storage_len;
ut_ad(row && index && heap);
ut_ad(dtuple_check_typed(row));
@@ -123,35 +104,61 @@ row_build_index_entry(
entry_len = dict_index_get_n_fields(index);
entry = dtuple_create(heap, entry_len);
- if (index->type & DICT_UNIVERSAL) {
+ if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
dtuple_set_n_fields_cmp(entry, entry_len);
+ /* There may only be externally stored columns
+ in a clustered index B-tree of a user table. */
+ ut_a(!ext);
} else {
dtuple_set_n_fields_cmp(
entry, dict_index_get_n_unique_in_tree(index));
}
for (i = 0; i < entry_len; i++) {
- const dict_col_t* col;
- ind_field = dict_index_get_nth_field(index, i);
- col = ind_field->col;
-
- dfield = dtuple_get_nth_field(entry, i);
-
- dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
+ const dict_field_t* ind_field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col
+ = ind_field->col;
+ ulint col_no
+ = dict_col_get_no(col);
+ dfield_t* dfield
+ = dtuple_get_nth_field(entry, i);
+ const dfield_t* dfield2
+ = dtuple_get_nth_field(row, col_no);
+ ulint len
+ = dfield_get_len(dfield2);
dfield_copy(dfield, dfield2);
- /* If a column prefix index, take only the prefix */
- if (ind_field->prefix_len > 0
- && dfield_get_len(dfield2) != UNIV_SQL_NULL) {
-
- storage_len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- ind_field->prefix_len,
- dfield_get_len(dfield2), dfield2->data);
+ if (dfield_is_null(dfield) || ind_field->prefix_len == 0) {
+ continue;
+ }
- dfield_set_len(dfield, storage_len);
+ /* If a column prefix index, take only the prefix.
+ Prefix-indexed columns may be externally stored. */
+ ut_ad(col->ord_part);
+
+ if (UNIV_LIKELY_NULL(ext)) {
+ /* See if the column is stored externally. */
+ const byte* buf = row_ext_lookup(ext, col_no,
+ &len);
+ if (UNIV_LIKELY_NULL(buf)) {
+ if (UNIV_UNLIKELY(buf == field_ref_zero)) {
+ return(NULL);
+ }
+ dfield_set_data(dfield, buf, len);
+ }
+ } else if (dfield_is_ext(dfield)) {
+ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+ len -= BTR_EXTERN_FIELD_REF_SIZE;
+ ut_a(ind_field->prefix_len <= len
+ || dict_index_is_clust(index));
}
+
+ len = dtype_get_at_most_n_mbchars(
+ col->prtype, col->mbminlen, col->mbmaxlen,
+ ind_field->prefix_len, len, dfield_get_data(dfield));
+ dfield_set_len(dfield, len);
}
ut_ad(dtuple_check_typed(entry));
@@ -159,49 +166,63 @@ row_build_index_entry(
return(entry);
}
-/***********************************************************************
-An inverse function to dict_row_build_index_entry. Builds a row from a
-record in a clustered index. */
-
+/*******************************************************************//**
+An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index.
+@return own: row built; see the NOTE below! */
+UNIV_INTERN
dtuple_t*
row_build(
/*======*/
- /* out, own: row built; see the NOTE below! */
- ulint type, /* in: ROW_COPY_POINTERS or ROW_COPY_DATA;
- the latter copies also the data fields to
- heap while the first only places pointers to
- data fields on the index page, and thus is
- more efficient */
- dict_index_t* index, /* in: clustered index */
- rec_t* rec, /* in: record in the clustered index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/* in: rec_get_offsets(rec, index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
- mem_heap_t* heap) /* in: memory heap from which the memory
- needed is allocated */
+ ulint type, /*!< in: ROW_COPY_POINTERS or
+ ROW_COPY_DATA; the latter
+ copies also the data fields to
+ heap while the first only
+ places pointers to data fields
+ on the index page, and thus is
+ more efficient */
+ const dict_index_t* index, /*!< in: clustered index */
+ const rec_t* rec, /*!< in: record in the clustered
+ index; NOTE: in the case
+ ROW_COPY_POINTERS the data
+ fields in the row will point
+ directly into this record,
+ therefore, the buffer page of
+ this record must be at least
+ s-latched and the latch held
+ as long as the row dtuple is used! */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index)
+ or NULL, in which case this function
+ will invoke rec_get_offsets() */
+ const dict_table_t* col_table,
+ /*!< in: table, to check which
+ externally stored columns
+ occur in the ordering columns
+ of an index, or NULL if
+ index->table should be
+ consulted instead */
+ row_ext_t** ext, /*!< out, own: cache of
+ externally stored column
+ prefixes, or NULL */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory needed is allocated */
{
- dtuple_t* row;
- dict_table_t* table;
- dict_field_t* ind_field;
- dfield_t* dfield;
- ulint n_fields;
- byte* field;
- ulint len;
- ulint row_len;
- byte* buf;
- ulint i;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ dtuple_t* row;
+ const dict_table_t* table;
+ ulint n_fields;
+ ulint n_ext_cols;
+ ulint* ext_cols = NULL; /* remove warning */
+ ulint len;
+ ulint row_len;
+ byte* buf;
+ ulint i;
+ ulint j;
+ mem_heap_t* tmp_heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs_init(offsets_);
ut_ad(index && rec && heap);
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
if (!offsets) {
offsets = rec_get_offsets(rec, index, offsets_,
@@ -223,31 +244,64 @@ row_build(
row = dtuple_create(heap, row_len);
+ dict_table_copy_types(row, table);
+
dtuple_set_info_bits(row, rec_get_info_bits(
rec, dict_table_is_comp(table)));
n_fields = rec_offs_n_fields(offsets);
+ n_ext_cols = rec_offs_n_extern(offsets);
+ if (n_ext_cols) {
+ ext_cols = mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols);
+ }
- dict_table_copy_types(row, table);
-
- for (i = 0; i < n_fields; i++) {
- ind_field = dict_index_get_nth_field(index, i);
+ for (i = j = 0; i < n_fields; i++) {
+ dict_field_t* ind_field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col
+ = dict_field_get_col(ind_field);
+ ulint col_no
+ = dict_col_get_no(col);
+ dfield_t* dfield
+ = dtuple_get_nth_field(row, col_no);
if (ind_field->prefix_len == 0) {
- const dict_col_t* col
- = dict_field_get_col(ind_field);
-
- dfield = dtuple_get_nth_field(row,
- dict_col_get_no(col));
- field = rec_get_nth_field(rec, offsets, i, &len);
+ const byte* field = rec_get_nth_field(
+ rec, offsets, i, &len);
dfield_set_data(dfield, field, len);
}
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ dfield_set_ext(dfield);
+
+ if (UNIV_LIKELY_NULL(col_table)) {
+ ut_a(col_no
+ < dict_table_get_n_cols(col_table));
+ col = dict_table_get_nth_col(
+ col_table, col_no);
+ }
+
+ if (col->ord_part) {
+ /* We will have to fetch prefixes of
+ externally stored columns that are
+ referenced by column prefixes. */
+ ext_cols[j++] = col_no;
+ }
+ }
}
ut_ad(dtuple_check_typed(row));
+ if (j) {
+ *ext = row_ext_create(j, ext_cols, row,
+ dict_table_zip_size(index->table),
+ heap);
+ } else {
+ *ext = NULL;
+ }
+
if (tmp_heap) {
mem_heap_free(tmp_heap);
}
@@ -255,54 +309,35 @@ row_build(
return(row);
}
-/***********************************************************************
-Converts an index record to a typed data tuple. NOTE that externally
-stored (often big) fields are NOT copied to heap. */
-
+/*******************************************************************//**
+Converts an index record to a typed data tuple.
+@return index entry built; does not set info_bits, and the data fields
+in the entry will point directly to rec */
+UNIV_INTERN
dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
- /* out, own: index entry built; see the
- NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap as the latter only places pointers to
- data fields on the index page */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the dtuple is used! */
- mem_heap_t* heap) /* in: memory heap from which the memory
- needed is allocated */
+row_rec_to_index_entry_low(
+/*=======================*/
+ const rec_t* rec, /*!< in: record in the index */
+ const dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint* n_ext, /*!< out: number of externally
+ stored columns */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory needed is allocated */
{
dtuple_t* entry;
dfield_t* dfield;
ulint i;
- byte* field;
+ const byte* field;
ulint len;
ulint rec_len;
- byte* buf;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
ut_ad(rec && heap && index);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &tmp_heap);
-
- if (type == ROW_COPY_DATA) {
- /* Take a copy of rec to heap */
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, offsets);
- }
+ /* Because this function may be invoked by row0merge.c
+ on a record whose header is in different format, the check
+ rec_offs_validate(rec, index, offsets) must be avoided here. */
+ ut_ad(n_ext);
+ *n_ext = 0;
rec_len = rec_offs_n_fields(offsets);
@@ -314,54 +349,104 @@ row_rec_to_index_entry(
dict_index_copy_types(entry, index, rec_len);
- dtuple_set_info_bits(entry,
- rec_get_info_bits(rec, rec_offs_comp(offsets)));
-
for (i = 0; i < rec_len; i++) {
dfield = dtuple_get_nth_field(entry, i);
field = rec_get_nth_field(rec, offsets, i, &len);
dfield_set_data(dfield, field, len);
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ dfield_set_ext(dfield);
+ (*n_ext)++;
+ }
}
ut_ad(dtuple_check_typed(entry));
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
+
+ return(entry);
+}
+
+/*******************************************************************//**
+Converts an index record to a typed data tuple. NOTE that externally
+stored (often big) fields are NOT copied to heap.
+@return own: index entry built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_rec_to_index_entry(
+/*===================*/
+ ulint type, /*!< in: ROW_COPY_DATA, or
+ ROW_COPY_POINTERS: the former
+ copies also the data fields to
+ heap as the latter only places
+ pointers to data fields on the
+ index page */
+ const rec_t* rec, /*!< in: record in the index;
+ NOTE: in the case
+ ROW_COPY_POINTERS the data
+ fields in the row will point
+ directly into this record,
+ therefore, the buffer page of
+ this record must be at least
+ s-latched and the latch held
+ as long as the dtuple is used! */
+ const dict_index_t* index, /*!< in: index */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
+ ulint* n_ext, /*!< out: number of externally
+ stored columns */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory needed is allocated */
+{
+ dtuple_t* entry;
+ byte* buf;
+
+ ut_ad(rec && heap && index);
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ if (type == ROW_COPY_DATA) {
+ /* Take a copy of rec to heap */
+ buf = mem_heap_alloc(heap, rec_offs_size(offsets));
+ rec = rec_copy(buf, rec, offsets);
+ /* Avoid a debug assertion in rec_offs_validate(). */
+ rec_offs_make_valid(rec, index, offsets);
}
+ entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);
+
+ dtuple_set_info_bits(entry,
+ rec_get_info_bits(rec, rec_offs_comp(offsets)));
+
return(entry);
}
-/***********************************************************************
+/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
+search the clustered index record.
+@return own: row reference built; see the NOTE below! */
+UNIV_INTERN
dtuple_t*
row_build_row_ref(
/*==============*/
- /* out, own: row reference built; see the
- NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+ ulint type, /*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
the former copies also the data fields to
heap, whereas the latter only places pointers
to data fields on the index page */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
+ dict_index_t* index, /*!< in: secondary index */
+ const rec_t* rec, /*!< in: record in the index;
NOTE: in the case ROW_COPY_POINTERS
the data fields in the row will point
directly into this record, therefore,
the buffer page of this record must be
at least s-latched and the latch held
as long as the row reference is used! */
- mem_heap_t* heap) /* in: memory heap from which the memory
+ mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
{
dict_table_t* table;
dict_index_t* clust_index;
dfield_t* dfield;
dtuple_t* ref;
- byte* field;
+ const byte* field;
ulint len;
ulint ref_len;
ulint pos;
@@ -371,12 +456,15 @@ row_build_row_ref(
mem_heap_t* tmp_heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
ut_ad(index && rec && heap);
+ ut_ad(!dict_index_is_clust(index));
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &tmp_heap);
+ /* Secondary indexes must not contain externally stored columns. */
+ ut_ad(!rec_offs_any_extern(offsets));
if (type == ROW_COPY_DATA) {
/* Take a copy of rec to heap */
@@ -442,40 +530,44 @@ row_build_row_ref(
return(ref);
}
-/***********************************************************************
+/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
-
+UNIV_INTERN
void
row_build_row_ref_in_tuple(
/*=======================*/
- dtuple_t* ref, /* in/out: row reference built; see the
- NOTE below! */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: the data fields in ref will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- trx_t* trx) /* in: transaction */
+ dtuple_t* ref, /*!< in/out: row reference built;
+ see the NOTE below! */
+ const rec_t* rec, /*!< in: record in the index;
+ NOTE: the data fields in ref
+ will point directly into this
+ record, therefore, the buffer
+ page of this record must be at
+ least s-latched and the latch
+ held as long as the row
+ reference is used! */
+ const dict_index_t* index, /*!< in: secondary index */
+ ulint* offsets,/*!< in: rec_get_offsets(rec, index)
+ or NULL */
+ trx_t* trx) /*!< in: transaction */
{
- dict_index_t* clust_index;
- dfield_t* dfield;
- byte* field;
- ulint len;
- ulint ref_len;
- ulint pos;
- ulint clust_col_prefix_len;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ const dict_index_t* clust_index;
+ dfield_t* dfield;
+ const byte* field;
+ ulint len;
+ ulint ref_len;
+ ulint pos;
+ ulint clust_col_prefix_len;
+ ulint i;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs_init(offsets_);
ut_a(ref);
ut_a(index);
ut_a(rec);
+ ut_ad(!dict_index_is_clust(index));
if (UNIV_UNLIKELY(!index->table)) {
fputs("InnoDB: table ", stderr);
@@ -489,13 +581,20 @@ notfound:
clust_index = dict_table_get_first_index(index->table);
- if (!clust_index) {
+ if (UNIV_UNLIKELY(!clust_index)) {
fputs("InnoDB: clust index for table ", stderr);
goto notfound;
}
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ if (!offsets) {
+ offsets = rec_get_offsets(rec, index, offsets_,
+ ULINT_UNDEFINED, &heap);
+ } else {
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ }
+ /* Secondary indexes must not contain externally stored columns. */
+ ut_ad(!rec_offs_any_extern(offsets));
ref_len = dict_index_get_n_unique(clust_index);
ut_ad(ref_len == dtuple_get_n_fields(ref));
@@ -544,74 +643,19 @@ notfound:
}
}
-/***********************************************************************
-From a row build a row reference with which we can search the clustered
-index record. */
-
-void
-row_build_row_ref_from_row(
-/*=======================*/
- dtuple_t* ref, /* in/out: row reference built; see the
- NOTE below! ref must have the right number
- of fields! */
- dict_table_t* table, /* in: table */
- dtuple_t* row) /* in: row
- NOTE: the data fields in ref will point
- directly into data of this row */
-{
- dict_index_t* clust_index;
- ulint ref_len;
- ulint i;
-
- ut_ad(ref && table && row);
-
- clust_index = dict_table_get_first_index(table);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
- ut_ad(ref_len == dtuple_get_n_fields(ref));
-
- for (i = 0; i < ref_len; i++) {
- const dict_col_t* col;
- dict_field_t* field;
- dfield_t* dfield;
- dfield_t* dfield2;
-
- dfield = dtuple_get_nth_field(ref, i);
-
- field = dict_index_get_nth_field(clust_index, i);
-
- col = dict_field_get_col(field);
-
- dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
-
- dfield_copy(dfield, dfield2);
-
- if (field->prefix_len > 0
- && dfield->len != UNIV_SQL_NULL) {
-
- dfield->len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- field->prefix_len, dfield->len, dfield->data);
- }
- }
-
- ut_ad(dtuple_check_typed(ref));
-}
-
-/*******************************************************************
-Searches the clustered index record for a row, if we have the row reference. */
-
+/***************************************************************//**
+Searches the clustered index record for a row, if we have the row reference.
+@return TRUE if found */
+UNIV_INTERN
ibool
row_search_on_row_ref(
/*==================*/
- /* out: TRUE if found */
- btr_pcur_t* pcur, /* in/out: persistent cursor, which must
- be closed by the caller */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- dict_table_t* table, /* in: table */
- dtuple_t* ref, /* in: row reference */
- mtr_t* mtr) /* in: mtr */
+ btr_pcur_t* pcur, /*!< out: persistent cursor, which must
+ be closed by the caller */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ const dict_table_t* table, /*!< in: table */
+ const dtuple_t* ref, /*!< in: row reference */
+ mtr_t* mtr) /*!< in/out: mtr */
{
ulint low_match;
rec_t* rec;
@@ -642,19 +686,19 @@ row_search_on_row_ref(
return(TRUE);
}
-/*************************************************************************
+/*********************************************************************//**
Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved. */
-
+on the secondary index record are preserved.
+@return record or NULL, if no record found */
+UNIV_INTERN
rec_t*
row_get_clust_rec(
/*==============*/
- /* out: record or NULL, if no record found */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index, /* in: secondary index */
- dict_index_t** clust_index,/* out: clustered index */
- mtr_t* mtr) /* in: mtr */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ const rec_t* rec, /*!< in: record in a secondary index */
+ dict_index_t* index, /*!< in: secondary index */
+ dict_index_t** clust_index,/*!< out: clustered index */
+ mtr_t* mtr) /*!< in: mtr */
{
mem_heap_t* heap;
dtuple_t* ref;
@@ -663,7 +707,7 @@ row_get_clust_rec(
ibool found;
rec_t* clust_rec;
- ut_ad((index->type & DICT_CLUSTERED) == 0);
+ ut_ad(!dict_index_is_clust(index));
table = index->table;
@@ -684,19 +728,19 @@ row_get_clust_rec(
return(clust_rec);
}
-/*******************************************************************
-Searches an index record. */
-
+/***************************************************************//**
+Searches an index record.
+@return TRUE if found */
+UNIV_INTERN
ibool
row_search_index_entry(
/*===================*/
- /* out: TRUE if found */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- btr_pcur_t* pcur, /* in/out: persistent cursor, which must
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* entry, /*!< in: index entry */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
be closed by the caller */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint n_fields;
ulint low_match;
@@ -711,16 +755,414 @@ row_search_index_entry(
n_fields = dtuple_get_n_fields(entry);
- if (page_rec_is_infimum(rec)) {
+ return(!page_rec_is_infimum(rec) && low_match == n_fields);
+}
- return(FALSE);
+#include <my_sys.h>
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_INT using "prtype" and writes the result to "buf".
+If the data is in unknown format, then nothing is written to "buf",
+0 is returned and "format_in_hex" is set to TRUE, otherwise
+"format_in_hex" is left untouched.
+Not more than "buf_size" bytes are written to "buf".
+The result is always '\0'-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating '\0').
+@return number of bytes that were written */
+static
+ulint
+row_raw_format_int(
+/*===============*/
+ const char* data, /*!< in: raw data */
+ ulint data_len, /*!< in: raw data length
+ in bytes */
+ ulint prtype, /*!< in: precise type */
+ char* buf, /*!< out: output buffer */
+ ulint buf_size, /*!< in: output buffer size
+ in bytes */
+ ibool* format_in_hex) /*!< out: should the data be
+ formated in hex */
+{
+ ulint ret;
+
+ if (data_len <= sizeof(ullint)) {
+
+ ullint value;
+ ibool unsigned_type = prtype & DATA_UNSIGNED;
+
+ value = mach_read_int_type((const byte*) data,
+ data_len, unsigned_type);
+
+ if (unsigned_type) {
+
+ ret = ut_snprintf(buf, buf_size, "%llu",
+ value) + 1;
+ } else {
+
+ ret = ut_snprintf(buf, buf_size, "%lld",
+ (long long) value) + 1;
+ }
+
+ } else {
+
+ *format_in_hex = TRUE;
+ ret = 0;
}
- if (low_match != n_fields) {
- /* Not found */
+ return(ut_min(ret, buf_size));
+}
- return(FALSE);
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "prtype" and writes the
+result to "buf".
+If the data is in binary format, then nothing is written to "buf",
+0 is returned and "format_in_hex" is set to TRUE, otherwise
+"format_in_hex" is left untouched.
+Not more than "buf_size" bytes are written to "buf".
+The result is always '\0'-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating '\0').
+@return number of bytes that were written */
+static
+ulint
+row_raw_format_str(
+/*===============*/
+ const char* data, /*!< in: raw data */
+ ulint data_len, /*!< in: raw data length
+ in bytes */
+ ulint prtype, /*!< in: precise type */
+ char* buf, /*!< out: output buffer */
+ ulint buf_size, /*!< in: output buffer size
+ in bytes */
+ ibool* format_in_hex) /*!< out: should the data be
+ formated in hex */
+{
+ ulint charset_coll;
+
+ if (buf_size == 0) {
+
+ return(0);
}
- return(TRUE);
+ /* we assume system_charset_info is UTF-8 */
+
+ charset_coll = dtype_get_charset_coll(prtype);
+
+ if (UNIV_LIKELY(dtype_is_utf8(prtype))) {
+
+ return(ut_str_sql_format(data, data_len, buf, buf_size));
+ }
+ /* else */
+
+ if (charset_coll == DATA_MYSQL_BINARY_CHARSET_COLL) {
+
+ *format_in_hex = TRUE;
+ return(0);
+ }
+ /* else */
+
+ return(innobase_raw_format(data, data_len, charset_coll,
+ buf, buf_size));
}
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) using
+"dict_field" and writes the result to "buf".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size is positive) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return number of bytes that were written */
+UNIV_INTERN
+ulint
+row_raw_format(
+/*===========*/
+ const char* data, /*!< in: raw data */
+ ulint data_len, /*!< in: raw data length
+ in bytes */
+ const dict_field_t* dict_field, /*!< in: index field */
+ char* buf, /*!< out: output buffer */
+ ulint buf_size) /*!< in: output buffer size
+ in bytes */
+{
+ ulint mtype;
+ ulint prtype;
+ ulint ret;
+ ibool format_in_hex;
+
+ if (buf_size == 0) {
+
+ return(0);
+ }
+
+ if (data_len == UNIV_SQL_NULL) {
+
+ ret = ut_snprintf((char*) buf, buf_size, "NULL") + 1;
+
+ return(ut_min(ret, buf_size));
+ }
+
+ mtype = dict_field->col->mtype;
+ prtype = dict_field->col->prtype;
+
+ format_in_hex = FALSE;
+
+ switch (mtype) {
+ case DATA_INT:
+
+ ret = row_raw_format_int(data, data_len, prtype,
+ buf, buf_size, &format_in_hex);
+ break;
+ case DATA_CHAR:
+ case DATA_VARCHAR:
+ case DATA_MYSQL:
+ case DATA_VARMYSQL:
+
+ ret = row_raw_format_str(data, data_len, prtype,
+ buf, buf_size, &format_in_hex);
+ break;
+ /* XXX support more data types */
+ default:
+
+ format_in_hex = TRUE;
+ }
+
+ if (format_in_hex) {
+
+ if (UNIV_LIKELY(buf_size > 2)) {
+
+ memcpy(buf, "0x", 2);
+ buf += 2;
+ buf_size -= 2;
+ ret = 2 + ut_raw_to_hex(data, data_len,
+ buf, buf_size);
+ } else {
+
+ buf[0] = '\0';
+ ret = 1;
+ }
+ }
+
+ return(ret);
+}
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+#include "ut0dbg.h"
+
+void
+test_row_raw_format_int()
+{
+ ulint ret;
+ char buf[128];
+ ibool format_in_hex;
+
+#define CALL_AND_TEST(data, data_len, prtype, buf, buf_size,\
+ ret_expected, buf_expected, format_in_hex_expected)\
+ do {\
+ ibool ok = TRUE;\
+ ulint i;\
+ memset(buf, 'x', 10);\
+ buf[10] = '\0';\
+ format_in_hex = FALSE;\
+ fprintf(stderr, "TESTING \"\\x");\
+ for (i = 0; i < data_len; i++) {\
+ fprintf(stderr, "%02hhX", data[i]);\
+ }\
+ fprintf(stderr, "\", %lu, %lu, %lu\n",\
+ (ulint) data_len, (ulint) prtype,\
+ (ulint) buf_size);\
+ ret = row_raw_format_int(data, data_len, prtype,\
+ buf, buf_size, &format_in_hex);\
+ if (ret != ret_expected) {\
+ fprintf(stderr, "expected ret %lu, got %lu\n",\
+ (ulint) ret_expected, ret);\
+ ok = FALSE;\
+ }\
+ if (strcmp((char*) buf, buf_expected) != 0) {\
+ fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
+ buf_expected, buf);\
+ ok = FALSE;\
+ }\
+ if (format_in_hex != format_in_hex_expected) {\
+ fprintf(stderr, "expected format_in_hex %d, got %d\n",\
+ (int) format_in_hex_expected,\
+ (int) format_in_hex);\
+ ok = FALSE;\
+ }\
+ if (ok) {\
+ fprintf(stderr, "OK: %lu, \"%s\" %d\n\n",\
+ (ulint) ret, buf, (int) format_in_hex);\
+ } else {\
+ return;\
+ }\
+ } while (0)
+
+#if 1
+ /* min values for signed 1-8 byte integers */
+
+ CALL_AND_TEST("\x00", 1, 0,
+ buf, sizeof(buf), 5, "-128", 0);
+
+ CALL_AND_TEST("\x00\x00", 2, 0,
+ buf, sizeof(buf), 7, "-32768", 0);
+
+ CALL_AND_TEST("\x00\x00\x00", 3, 0,
+ buf, sizeof(buf), 9, "-8388608", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00", 4, 0,
+ buf, sizeof(buf), 12, "-2147483648", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, 0,
+ buf, sizeof(buf), 14, "-549755813888", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, 0,
+ buf, sizeof(buf), 17, "-140737488355328", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, 0,
+ buf, sizeof(buf), 19, "-36028797018963968", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, 0,
+ buf, sizeof(buf), 21, "-9223372036854775808", 0);
+
+ /* min values for unsigned 1-8 byte integers */
+
+ CALL_AND_TEST("\x00", 1, DATA_UNSIGNED,
+ buf, sizeof(buf), 2, "0", 0);
+
+ CALL_AND_TEST("\x00\x00", 2, DATA_UNSIGNED,
+ buf, sizeof(buf), 2, "0", 0);
+
+ CALL_AND_TEST("\x00\x00\x00", 3, DATA_UNSIGNED,
+ buf, sizeof(buf), 2, "0", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00", 4, DATA_UNSIGNED,
+ buf, sizeof(buf), 2, "0", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, DATA_UNSIGNED,
+ buf, sizeof(buf), 2, "0", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, DATA_UNSIGNED,
+ buf, sizeof(buf), 2, "0", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, DATA_UNSIGNED,
+ buf, sizeof(buf), 2, "0", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, DATA_UNSIGNED,
+ buf, sizeof(buf), 2, "0", 0);
+
+ /* max values for signed 1-8 byte integers */
+
+ CALL_AND_TEST("\xFF", 1, 0,
+ buf, sizeof(buf), 4, "127", 0);
+
+ CALL_AND_TEST("\xFF\xFF", 2, 0,
+ buf, sizeof(buf), 6, "32767", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF", 3, 0,
+ buf, sizeof(buf), 8, "8388607", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, 0,
+ buf, sizeof(buf), 11, "2147483647", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, 0,
+ buf, sizeof(buf), 13, "549755813887", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, 0,
+ buf, sizeof(buf), 16, "140737488355327", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, 0,
+ buf, sizeof(buf), 18, "36028797018963967", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, 0,
+ buf, sizeof(buf), 20, "9223372036854775807", 0);
+
+ /* max values for unsigned 1-8 byte integers */
+
+ CALL_AND_TEST("\xFF", 1, DATA_UNSIGNED,
+ buf, sizeof(buf), 4, "255", 0);
+
+ CALL_AND_TEST("\xFF\xFF", 2, DATA_UNSIGNED,
+ buf, sizeof(buf), 6, "65535", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF", 3, DATA_UNSIGNED,
+ buf, sizeof(buf), 9, "16777215", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, DATA_UNSIGNED,
+ buf, sizeof(buf), 11, "4294967295", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, DATA_UNSIGNED,
+ buf, sizeof(buf), 14, "1099511627775", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, DATA_UNSIGNED,
+ buf, sizeof(buf), 16, "281474976710655", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, DATA_UNSIGNED,
+ buf, sizeof(buf), 18, "72057594037927935", 0);
+
+ CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, DATA_UNSIGNED,
+ buf, sizeof(buf), 21, "18446744073709551615", 0);
+
+ /* some random values */
+
+ CALL_AND_TEST("\x52", 1, 0,
+ buf, sizeof(buf), 4, "-46", 0);
+
+ CALL_AND_TEST("\x0E", 1, DATA_UNSIGNED,
+ buf, sizeof(buf), 3, "14", 0);
+
+ CALL_AND_TEST("\x62\xCE", 2, 0,
+ buf, sizeof(buf), 6, "-7474", 0);
+
+ CALL_AND_TEST("\x29\xD6", 2, DATA_UNSIGNED,
+ buf, sizeof(buf), 6, "10710", 0);
+
+ CALL_AND_TEST("\x7F\xFF\x90", 3, 0,
+ buf, sizeof(buf), 5, "-112", 0);
+
+ CALL_AND_TEST("\x00\xA1\x16", 3, DATA_UNSIGNED,
+ buf, sizeof(buf), 6, "41238", 0);
+
+ CALL_AND_TEST("\x7F\xFF\xFF\xF7", 4, 0,
+ buf, sizeof(buf), 3, "-9", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x5C", 4, DATA_UNSIGNED,
+ buf, sizeof(buf), 3, "92", 0);
+
+ CALL_AND_TEST("\x7F\xFF\xFF\xFF\xFF\xFF\xDC\x63", 8, 0,
+ buf, sizeof(buf), 6, "-9117", 0);
+
+ CALL_AND_TEST("\x00\x00\x00\x00\x00\x01\x64\x62", 8, DATA_UNSIGNED,
+ buf, sizeof(buf), 6, "91234", 0);
+#endif
+
+ /* speed test */
+
+ speedo_t speedo;
+ ulint i;
+
+ speedo_reset(&speedo);
+
+ for (i = 0; i < 1000000; i++) {
+ row_raw_format_int("\x23", 1,
+ 0, buf, sizeof(buf),
+ &format_in_hex);
+ row_raw_format_int("\x23", 1,
+ DATA_UNSIGNED, buf, sizeof(buf),
+ &format_in_hex);
+
+ row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8,
+ 0, buf, sizeof(buf),
+ &format_in_hex);
+ row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8,
+ DATA_UNSIGNED, buf, sizeof(buf),
+ &format_in_hex);
+ }
+
+ speedo_show(&speedo);
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
index 38902bca905..3ef9726588e 100644
--- a/storage/innobase/row/row0sel.c
+++ b/storage/innobase/row/row0sel.c
@@ -1,7 +1,31 @@
-/*******************************************************
-Select
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1997 Innobase Oy
+*****************************************************************************/
+
+/***************************************************//**
+@file row/row0sel.c
+Select
Created 12/19/1997 Heikki Tuuri
*******************************************************/
@@ -51,29 +75,79 @@ to que_run_threads: this is to allow canceling runaway queries */
#define SEL_EXHAUSTED 1
#define SEL_RETRY 2
-/************************************************************************
+/********************************************************************//**
+Returns TRUE if the user-defined column in a secondary index record
+is alphabetically the same as the corresponding BLOB column in the clustered
+index record.
+NOTE: the comparison is NOT done as a binary comparison, but character
+fields are compared with collation!
+@return TRUE if the columns are equal */
+static
+ibool
+row_sel_sec_rec_is_for_blob(
+/*========================*/
+ ulint mtype, /*!< in: main type */
+ ulint prtype, /*!< in: precise type */
+ ulint mbminlen, /*!< in: minimum length of a
+ multi-byte character */
+ ulint mbmaxlen, /*!< in: maximum length of a
+ multi-byte character */
+ const byte* clust_field, /*!< in: the locally stored part of
+ the clustered index column, including
+ the BLOB pointer; the clustered
+ index record must be covered by
+ a lock or a page latch to protect it
+ against deletion (rollback or purge) */
+ ulint clust_len, /*!< in: length of clust_field */
+ const byte* sec_field, /*!< in: column in secondary index */
+ ulint sec_len, /*!< in: length of sec_field */
+ ulint zip_size) /*!< in: compressed page size, or 0 */
+{
+ ulint len;
+ byte buf[DICT_MAX_INDEX_COL_LEN];
+
+ len = btr_copy_externally_stored_field_prefix(buf, sizeof buf,
+ zip_size,
+ clust_field, clust_len);
+
+ if (UNIV_UNLIKELY(len == 0)) {
+ /* The BLOB was being deleted as the server crashed.
+ There should not be any secondary index records
+ referring to this clustered index record, because
+ btr_free_externally_stored_field() is called after all
+ secondary index entries of the row have been purged. */
+ return(FALSE);
+ }
+
+ len = dtype_get_at_most_n_mbchars(prtype, mbminlen, mbmaxlen,
+ sec_len, len, (const char*) buf);
+
+ return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
+}
+
+/********************************************************************//**
Returns TRUE if the user-defined column values in a secondary index record
are alphabetically the same as the corresponding columns in the clustered
index record.
NOTE: the comparison is NOT done as a binary comparison, but character
-fields are compared with collation! */
+fields are compared with collation!
+@return TRUE if the secondary record is equal to the corresponding
+fields in the clustered record, when compared with collation */
static
ibool
row_sel_sec_rec_is_for_clust_rec(
/*=============================*/
- /* out: TRUE if the secondary
- record is equal to the corresponding
- fields in the clustered record,
- when compared with collation */
- rec_t* sec_rec, /* in: secondary index record */
- dict_index_t* sec_index, /* in: secondary index */
- rec_t* clust_rec, /* in: clustered index record */
- dict_index_t* clust_index) /* in: clustered index */
+ const rec_t* sec_rec, /*!< in: secondary index record */
+ dict_index_t* sec_index, /*!< in: secondary index */
+ const rec_t* clust_rec, /*!< in: clustered index record;
+ must be protected by a lock or
+ a page latch against deletion
+ in rollback or purge */
+ dict_index_t* clust_index) /*!< in: clustered index */
{
- byte* sec_field;
+ const byte* sec_field;
ulint sec_len;
- byte* clust_field;
- ulint clust_len;
+ const byte* clust_field;
ulint n;
ulint i;
mem_heap_t* heap = NULL;
@@ -83,8 +157,18 @@ row_sel_sec_rec_is_for_clust_rec(
ulint* sec_offs = sec_offsets_;
ibool is_equal = TRUE;
- *clust_offsets_ = (sizeof clust_offsets_) / sizeof *clust_offsets_;
- *sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_;
+ rec_offs_init(clust_offsets_);
+ rec_offs_init(sec_offsets_);
+
+ if (rec_get_deleted_flag(clust_rec,
+ dict_table_is_comp(clust_index->table))) {
+
+ /* The clustered index record is delete-marked;
+ it is not visible in the read view. Besides,
+ if there are any externally stored columns,
+ some of them may have already been purged. */
+ return(FALSE);
+ }
clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
ULINT_UNDEFINED, &heap);
@@ -96,26 +180,50 @@ row_sel_sec_rec_is_for_clust_rec(
for (i = 0; i < n; i++) {
const dict_field_t* ifield;
const dict_col_t* col;
+ ulint clust_pos;
+ ulint clust_len;
+ ulint len;
ifield = dict_index_get_nth_field(sec_index, i);
col = dict_field_get_col(ifield);
+ clust_pos = dict_col_get_clust_pos(col, clust_index);
clust_field = rec_get_nth_field(
- clust_rec, clust_offs,
- dict_col_get_clust_pos(col, clust_index), &clust_len);
+ clust_rec, clust_offs, clust_pos, &clust_len);
sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
- if (ifield->prefix_len > 0 && clust_len != UNIV_SQL_NULL) {
+ len = clust_len;
+
+ if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL) {
- clust_len = dtype_get_at_most_n_mbchars(
+ if (rec_offs_nth_extern(clust_offs, clust_pos)) {
+ len -= BTR_EXTERN_FIELD_REF_SIZE;
+ }
+
+ len = dtype_get_at_most_n_mbchars(
col->prtype, col->mbminlen, col->mbmaxlen,
- ifield->prefix_len,
- clust_len, (char*) clust_field);
+ ifield->prefix_len, len, (char*) clust_field);
+
+ if (rec_offs_nth_extern(clust_offs, clust_pos)
+ && len < sec_len) {
+ if (!row_sel_sec_rec_is_for_blob(
+ col->mtype, col->prtype,
+ col->mbminlen, col->mbmaxlen,
+ clust_field, clust_len,
+ sec_field, sec_len,
+ dict_table_zip_size(
+ clust_index->table))) {
+ goto inequal;
+ }
+
+ continue;
+ }
}
if (0 != cmp_data_data(col->mtype, col->prtype,
- clust_field, clust_len,
+ clust_field, len,
sec_field, sec_len)) {
+inequal:
is_equal = FALSE;
goto func_exit;
}
@@ -128,14 +236,14 @@ func_exit:
return(is_equal);
}
-/*************************************************************************
-Creates a select node struct. */
-
+/*********************************************************************//**
+Creates a select node struct.
+@return own: select node struct */
+UNIV_INTERN
sel_node_t*
sel_node_create(
/*============*/
- /* out, own: select node struct */
- mem_heap_t* heap) /* in: memory heap where created */
+ mem_heap_t* heap) /*!< in: memory heap where created */
{
sel_node_t* node;
@@ -143,22 +251,19 @@ sel_node_create(
node->common.type = QUE_NODE_SELECT;
node->state = SEL_NODE_OPEN;
- node->select_will_do_update = FALSE;
- node->latch_mode = BTR_SEARCH_LEAF;
-
node->plans = NULL;
return(node);
}
-/*************************************************************************
+/*********************************************************************//**
Frees the memory private to a select node when a query graph is freed,
does not free the heap where the node was originally created. */
-
+UNIV_INTERN
void
sel_node_free_private(
/*==================*/
- sel_node_t* node) /* in: select node struct */
+ sel_node_t* node) /*!< in: select node struct */
{
ulint i;
plan_t* plan;
@@ -177,14 +282,14 @@ sel_node_free_private(
}
}
-/*************************************************************************
+/*********************************************************************//**
Evaluates the values in a select list. If there are aggregate functions,
their argument value is added to the aggregate total. */
UNIV_INLINE
void
sel_eval_select_list(
/*=================*/
- sel_node_t* node) /* in: select node */
+ sel_node_t* node) /*!< in: select node */
{
que_node_t* exp;
@@ -197,15 +302,15 @@ sel_eval_select_list(
}
}
-/*************************************************************************
+/*********************************************************************//**
Assigns the values in the select list to the possible into-variables in
SELECT ... INTO ... */
UNIV_INLINE
void
sel_assign_into_var_values(
/*=======================*/
- sym_node_t* var, /* in: first variable in a list of variables */
- sel_node_t* node) /* in: select node */
+ sym_node_t* var, /*!< in: first variable in a list of variables */
+ sel_node_t* node) /*!< in: select node */
{
que_node_t* exp;
@@ -226,14 +331,14 @@ sel_assign_into_var_values(
}
}
-/*************************************************************************
+/*********************************************************************//**
Resets the aggregate value totals in the select list of an aggregate type
query. */
UNIV_INLINE
void
sel_reset_aggregate_vals(
/*=====================*/
- sel_node_t* node) /* in: select node */
+ sel_node_t* node) /*!< in: select node */
{
func_node_t* func_node;
@@ -250,13 +355,13 @@ sel_reset_aggregate_vals(
node->aggregate_already_fetched = FALSE;
}
-/*************************************************************************
+/*********************************************************************//**
Copies the input variable values when an explicit cursor is opened. */
UNIV_INLINE
void
row_sel_copy_input_variable_vals(
/*=============================*/
- sel_node_t* node) /* in: select node */
+ sel_node_t* node) /*!< in: select node */
{
sym_node_t* var;
@@ -271,28 +376,28 @@ row_sel_copy_input_variable_vals(
}
}
-/*************************************************************************
+/*********************************************************************//**
Fetches the column values from a record. */
static
void
row_sel_fetch_columns(
/*==================*/
- dict_index_t* index, /* in: record index */
- rec_t* rec, /* in: record in a clustered or non-clustered
- index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- sym_node_t* column) /* in: first column in a column list, or
+ dict_index_t* index, /*!< in: record index */
+ const rec_t* rec, /*!< in: record in a clustered or non-clustered
+ index; must be protected by a page latch */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ sym_node_t* column) /*!< in: first column in a column list, or
NULL */
{
dfield_t* val;
ulint index_type;
ulint field_no;
- byte* data;
+ const byte* data;
ulint len;
ut_ad(rec_offs_validate(rec, index, offsets));
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
index_type = SYM_CLUST_FIELD_NO;
} else {
index_type = SYM_SEC_FIELD_NO;
@@ -315,7 +420,9 @@ row_sel_fetch_columns(
heap = mem_heap_create(1);
data = btr_rec_copy_externally_stored_field(
- rec, offsets, field_no, &len, heap);
+ rec, offsets,
+ dict_table_zip_size(index->table),
+ field_no, &len, heap);
ut_a(len != UNIV_SQL_NULL);
@@ -324,6 +431,10 @@ row_sel_fetch_columns(
data = rec_get_nth_field(rec, offsets,
field_no, &len);
+ if (len == UNIV_SQL_NULL) {
+ len = UNIV_SQL_NULL;
+ }
+
needs_copy = column->copy_val;
}
@@ -344,13 +455,13 @@ row_sel_fetch_columns(
}
}
-/*************************************************************************
+/*********************************************************************//**
Allocates a prefetch buffer for a column when prefetch is first time done. */
static
void
sel_col_prefetch_buf_alloc(
/*=======================*/
- sym_node_t* column) /* in: symbol table node for a column */
+ sym_node_t* column) /*!< in: symbol table node for a column */
{
sel_buf_t* sel_buf;
ulint i;
@@ -368,14 +479,14 @@ sel_col_prefetch_buf_alloc(
}
}
-/*************************************************************************
+/*********************************************************************//**
Frees a prefetch buffer for a column, including the dynamically allocated
memory for data stored there. */
-
+UNIV_INTERN
void
sel_col_prefetch_buf_free(
/*======================*/
- sel_buf_t* prefetch_buf) /* in, own: prefetch buffer */
+ sel_buf_t* prefetch_buf) /*!< in, own: prefetch buffer */
{
sel_buf_t* sel_buf;
ulint i;
@@ -390,14 +501,14 @@ sel_col_prefetch_buf_free(
}
}
-/*************************************************************************
+/*********************************************************************//**
Pops the column values for a prefetched, cached row from the column prefetch
buffers and places them to the val fields in the column nodes. */
static
void
sel_pop_prefetched_row(
/*===================*/
- plan_t* plan) /* in: plan node for a table */
+ plan_t* plan) /*!< in: plan node for a table */
{
sym_node_t* column;
sel_buf_t* sel_buf;
@@ -419,13 +530,13 @@ sel_pop_prefetched_row(
ut_ad(!column->prefetch_buf);
ut_ad(que_node_get_val_buf_size(column) == 0);
-#ifdef UNIV_DEBUG
- dfield_set_data(val, NULL, 0);
-#endif
+ ut_d(dfield_set_null(val));
+
goto next_col;
}
ut_ad(column->prefetch_buf);
+ ut_ad(!dfield_is_ext(val));
sel_buf = column->prefetch_buf + plan->first_prefetched;
@@ -452,14 +563,14 @@ next_col:
plan->first_prefetched++;
}
-/*************************************************************************
+/*********************************************************************//**
Pushes the column values for a prefetched, cached row to the column prefetch
buffers from the val fields in the column nodes. */
UNIV_INLINE
void
sel_push_prefetched_row(
/*====================*/
- plan_t* plan) /* in: plan node for a table */
+ plan_t* plan) /*!< in: plan node for a table */
{
sym_node_t* column;
sel_buf_t* sel_buf;
@@ -524,26 +635,26 @@ next_col:
}
}
-/*************************************************************************
-Builds a previous version of a clustered index record for a consistent read */
+/*********************************************************************//**
+Builds a previous version of a clustered index record for a consistent read
+@return DB_SUCCESS or error code */
static
ulint
row_sel_build_prev_vers(
/*====================*/
- /* out: DB_SUCCESS or error code */
- read_view_t* read_view, /* in: read view */
- dict_index_t* index, /* in: plan node for table */
- rec_t* rec, /* in: record in a clustered index */
- ulint** offsets, /* in/out: offsets returned by
+ read_view_t* read_view, /*!< in: read view */
+ dict_index_t* index, /*!< in: plan node for table */
+ rec_t* rec, /*!< in: record in a clustered index */
+ ulint** offsets, /*!< in/out: offsets returned by
rec_get_offsets(rec, plan->index) */
- mem_heap_t** offset_heap, /* in/out: memory heap from which
+ mem_heap_t** offset_heap, /*!< in/out: memory heap from which
the offsets are allocated */
- mem_heap_t** old_vers_heap, /* out: old version heap to use */
- rec_t** old_vers, /* out: old version, or NULL if the
+ mem_heap_t** old_vers_heap, /*!< out: old version heap to use */
+ rec_t** old_vers, /*!< out: old version, or NULL if the
record does not exist in the view:
i.e., it was freshly inserted
afterwards */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint err;
@@ -559,26 +670,26 @@ row_sel_build_prev_vers(
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Builds the last committed version of a clustered index record for a
-semi-consistent read. */
+semi-consistent read.
+@return DB_SUCCESS or error code */
static
ulint
row_sel_build_committed_vers_for_mysql(
/*===================================*/
- /* out: DB_SUCCESS or error code */
- dict_index_t* clust_index, /* in: clustered index */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: record in a clustered index */
- ulint** offsets, /* in/out: offsets returned by
+ dict_index_t* clust_index, /*!< in: clustered index */
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
+ const rec_t* rec, /*!< in: record in a clustered index */
+ ulint** offsets, /*!< in/out: offsets returned by
rec_get_offsets(rec, clust_index) */
- mem_heap_t** offset_heap, /* in/out: memory heap from which
+ mem_heap_t** offset_heap, /*!< in/out: memory heap from which
the offsets are allocated */
- rec_t** old_vers, /* out: old version, or NULL if the
+ const rec_t** old_vers, /*!< out: old version, or NULL if the
record does not exist in the view:
i.e., it was freshly inserted
afterwards */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint err;
@@ -594,15 +705,15 @@ row_sel_build_committed_vers_for_mysql(
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Tests the conditions which determine when the index segment we are searching
-through has been exhausted. */
+through has been exhausted.
+@return TRUE if row passed the tests */
UNIV_INLINE
ibool
row_sel_test_end_conds(
/*===================*/
- /* out: TRUE if row passed the tests */
- plan_t* plan) /* in: plan for the table; the column values must
+ plan_t* plan) /*!< in: plan for the table; the column values must
already have been retrieved and the right sides of
comparisons evaluated */
{
@@ -632,14 +743,14 @@ row_sel_test_end_conds(
return(TRUE);
}
-/*************************************************************************
-Tests the other conditions. */
+/*********************************************************************//**
+Tests the other conditions.
+@return TRUE if row passed the tests */
UNIV_INLINE
ibool
row_sel_test_other_conds(
/*=====================*/
- /* out: TRUE if row passed the tests */
- plan_t* plan) /* in: plan for the table; the column values must
+ plan_t* plan) /*!< in: plan for the table; the column values must
already have been retrieved */
{
func_node_t* cond;
@@ -660,23 +771,23 @@ row_sel_test_other_conds(
return(TRUE);
}
-/*************************************************************************
+/*********************************************************************//**
Retrieves the clustered index record corresponding to a record in a
-non-clustered index. Does the necessary locking. */
+non-clustered index. Does the necessary locking.
+@return DB_SUCCESS or error code */
static
ulint
row_sel_get_clust_rec(
/*==================*/
- /* out: DB_SUCCESS or error code */
- sel_node_t* node, /* in: select_node */
- plan_t* plan, /* in: plan node for table */
- rec_t* rec, /* in: record in a non-clustered index */
- que_thr_t* thr, /* in: query thread */
- rec_t** out_rec,/* out: clustered record or an old version of
+ sel_node_t* node, /*!< in: select_node */
+ plan_t* plan, /*!< in: plan node for table */
+ rec_t* rec, /*!< in: record in a non-clustered index */
+ que_thr_t* thr, /*!< in: query thread */
+ rec_t** out_rec,/*!< out: clustered record or an old version of
it, NULL if the old version did not exist
in the read view, i.e., it was a fresh
inserted version */
- mtr_t* mtr) /* in: mtr used to get access to the
+ mtr_t* mtr) /*!< in: mtr used to get access to the
non-clustered record; the same mtr is used to
access the clustered index */
{
@@ -687,7 +798,7 @@ row_sel_get_clust_rec(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
*out_rec = NULL;
@@ -700,7 +811,7 @@ row_sel_get_clust_rec(
index = dict_table_get_first_index(plan->table);
btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
- node->latch_mode, &(plan->clust_pcur),
+ BTR_SEARCH_LEAF, &plan->clust_pcur,
0, mtr);
clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
@@ -751,7 +862,8 @@ row_sel_get_clust_rec(
}
err = lock_clust_rec_read_check_and_lock(
- 0, clust_rec, index, offsets,
+ 0, btr_pcur_get_block(&plan->clust_pcur),
+ clust_rec, index, offsets,
node->row_lock_mode, lock_type, thr);
if (err != DB_SUCCESS) {
@@ -806,7 +918,10 @@ row_sel_get_clust_rec(
}
}
- /* Fetch the columns needed in test conditions */
+ /* Fetch the columns needed in test conditions. The clustered
+ index record is protected by a page latch that was acquired
+ when plan->clust_pcur was positioned. The latch will not be
+ released until mtr_commit(mtr). */
row_sel_fetch_columns(index, clust_rec, offsets,
UT_LIST_GET_FIRST(plan->columns));
@@ -820,20 +935,21 @@ err_exit:
return(err);
}
-/*************************************************************************
-Sets a lock on a record. */
+/*********************************************************************//**
+Sets a lock on a record.
+@return DB_SUCCESS or error code */
UNIV_INLINE
ulint
sel_set_rec_lock(
/*=============*/
- /* out: DB_SUCCESS or error code */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: lock mode */
- ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
- LOC_REC_NOT_GAP */
- que_thr_t* thr) /* in: query thread */
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint mode, /*!< in: lock mode */
+ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
+ LOC_REC_NOT_GAP */
+ que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
ulint err;
@@ -847,30 +963,29 @@ sel_set_rec_lock(
}
}
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
err = lock_clust_rec_read_check_and_lock(
- 0, rec, index, offsets, mode, type, thr);
+ 0, block, rec, index, offsets, mode, type, thr);
} else {
err = lock_sec_rec_read_check_and_lock(
- 0, rec, index, offsets, mode, type, thr);
+ 0, block, rec, index, offsets, mode, type, thr);
}
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Opens a pcur to a table index. */
static
void
row_sel_open_pcur(
/*==============*/
- sel_node_t* node, /* in: select node */
- plan_t* plan, /* in: table plan */
+ plan_t* plan, /*!< in: table plan */
ibool search_latch_locked,
- /* in: TRUE if the thread currently
+ /*!< in: TRUE if the thread currently
has the search latch locked in
s-mode */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
func_node_t* cond;
@@ -917,13 +1032,13 @@ row_sel_open_pcur(
/* Open pcur to the index */
btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
- node->latch_mode, &(plan->pcur),
+ BTR_SEARCH_LEAF, &plan->pcur,
has_search_latch, mtr);
} else {
/* Open the cursor to the start or the end of the index
(FALSE: no init) */
- btr_pcur_open_at_index_side(plan->asc, index, node->latch_mode,
+ btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
&(plan->pcur), FALSE, mtr);
}
@@ -934,20 +1049,18 @@ row_sel_open_pcur(
plan->pcur_is_open = TRUE;
}
-/*************************************************************************
-Restores a stored pcur position to a table index. */
+/*********************************************************************//**
+Restores a stored pcur position to a table index.
+@return TRUE if the cursor should be moved to the next record after we
+return from this function (moved to the previous, in the case of a
+descending cursor) without processing again the current cursor
+record */
static
ibool
row_sel_restore_pcur_pos(
/*=====================*/
- /* out: TRUE if the cursor should be moved to
- the next record after we return from this
- function (moved to the previous, in the case
- of a descending cursor) without processing
- again the current cursor record */
- sel_node_t* node, /* in: select node */
- plan_t* plan, /* in: table plan */
- mtr_t* mtr) /* in: mtr */
+ plan_t* plan, /*!< in: table plan */
+ mtr_t* mtr) /*!< in: mtr */
{
ibool equal_position;
ulint relative_position;
@@ -956,7 +1069,7 @@ row_sel_restore_pcur_pos(
relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
- equal_position = btr_pcur_restore_position(node->latch_mode,
+ equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF,
&(plan->pcur), mtr);
/* If the cursor is traveling upwards, and relative_position is
@@ -1031,13 +1144,13 @@ row_sel_restore_pcur_pos(
return(TRUE);
}
-/*************************************************************************
+/*********************************************************************//**
Resets a plan cursor to a closed state. */
UNIV_INLINE
void
plan_reset_cursor(
/*==============*/
- plan_t* plan) /* in: plan */
+ plan_t* plan) /*!< in: plan */
{
plan->pcur_is_open = FALSE;
plan->cursor_at_end = FALSE;
@@ -1045,18 +1158,18 @@ plan_reset_cursor(
plan->n_rows_prefetched = 0;
}
-/*************************************************************************
+/*********************************************************************//**
Tries to do a shortcut to fetch a clustered index record with a unique key,
-using the hash index if possible (not always). */
+using the hash index if possible (not always).
+@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
static
ulint
row_sel_try_search_shortcut(
/*========================*/
- /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
- sel_node_t* node, /* in: select node for a consistent read */
- plan_t* plan, /* in: plan for a unique search in clustered
+ sel_node_t* node, /*!< in: select node for a consistent read */
+ plan_t* plan, /*!< in: plan for a unique search in clustered
index */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
rec_t* rec;
@@ -1064,7 +1177,7 @@ row_sel_try_search_shortcut(
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
ulint ret;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
index = plan->index;
@@ -1075,7 +1188,7 @@ row_sel_try_search_shortcut(
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
- row_sel_open_pcur(node, plan, TRUE, mtr);
+ row_sel_open_pcur(plan, TRUE, mtr);
rec = btr_pcur_get_rec(&(plan->pcur));
@@ -1100,22 +1213,19 @@ row_sel_try_search_shortcut(
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
node->read_view)) {
ret = SEL_RETRY;
goto func_exit;
}
- } else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) {
+ } else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
ret = SEL_RETRY;
goto func_exit;
}
- /* Test deleted flag. Fetch the columns needed in test conditions. */
-
- row_sel_fetch_columns(index, rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
+ /* Test the deleted flag. */
if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
@@ -1123,6 +1233,14 @@ row_sel_try_search_shortcut(
goto func_exit;
}
+ /* Fetch the columns needed in test conditions. The index
+ record is protected by a page latch that was acquired when
+ plan->pcur was positioned. The latch will not be released
+ until mtr_commit(mtr). */
+
+ row_sel_fetch_columns(index, rec, offsets,
+ UT_LIST_GET_FIRST(plan->columns));
+
/* Test the rest of search conditions */
if (!row_sel_test_other_conds(plan)) {
@@ -1131,7 +1249,7 @@ row_sel_try_search_shortcut(
goto func_exit;
}
- ut_ad(plan->pcur.latch_mode == node->latch_mode);
+ ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
plan->n_rows_fetched++;
ret = SEL_FOUND;
@@ -1142,15 +1260,15 @@ func_exit:
return(ret);
}
-/*************************************************************************
-Performs a select step. */
+/*********************************************************************//**
+Performs a select step.
+@return DB_SUCCESS or error code */
static
ulint
row_sel(
/*====*/
- /* out: DB_SUCCESS or error code */
- sel_node_t* node, /* in: select node */
- que_thr_t* thr) /* in: query thread */
+ sel_node_t* node, /*!< in: select node */
+ que_thr_t* thr) /*!< in: query thread */
{
dict_index_t* index;
plan_t* plan;
@@ -1171,13 +1289,6 @@ row_sel(
ulint cost_counter = 0;
ibool cursor_just_opened;
ibool must_go_to_next;
- ibool leaf_contains_updates = FALSE;
- /* TRUE if select_will_do_update is
- TRUE and the current clustered index
- leaf page has been updated during
- the current mtr: mtr must be committed
- at the same time as the leaf x-latch
- is released */
ibool mtr_has_extra_clust_latch = FALSE;
/* TRUE if the search was made using
a non-clustered index, and we had to
@@ -1190,7 +1301,7 @@ row_sel(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
ut_ad(thr->run_node == node);
@@ -1216,7 +1327,6 @@ table_loop:
node->fetch_table changes, and after adding a row to aggregate totals
and, of course, when this function is called. */
- ut_ad(leaf_contains_updates == FALSE);
ut_ad(mtr_has_extra_clust_latch == FALSE);
plan = sel_node_get_nth_plan(node, node->fetch_table);
@@ -1249,7 +1359,7 @@ table_loop:
rw_lock_s_lock(&btr_search_latch);
search_latch_locked = TRUE;
- } else if (btr_search_latch.writer_is_wait_ex) {
+ } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
/* There is an x-latch request waiting: release the
s-latch for a moment; as an s-latch here is often
@@ -1291,7 +1401,7 @@ table_loop:
/* Evaluate the expressions to build the search tuple and
open the cursor */
- row_sel_open_pcur(node, plan, search_latch_locked, &mtr);
+ row_sel_open_pcur(plan, search_latch_locked, &mtr);
cursor_just_opened = TRUE;
@@ -1300,7 +1410,7 @@ table_loop:
} else {
/* Restore pcur position to the index */
- must_go_to_next = row_sel_restore_pcur_pos(node, plan, &mtr);
+ must_go_to_next = row_sel_restore_pcur_pos(plan, &mtr);
cursor_just_opened = FALSE;
@@ -1370,7 +1480,8 @@ rec_loop:
lock_type = LOCK_ORDINARY;
}
- err = sel_set_rec_lock(next_rec, index, offsets,
+ err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
+ next_rec, index, offsets,
node->row_lock_mode,
lock_type, thr);
@@ -1426,7 +1537,8 @@ skip_lock:
lock_type = LOCK_ORDINARY;
}
- err = sel_set_rec_lock(rec, index, offsets,
+ err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
+ rec, index, offsets,
node->row_lock_mode, lock_type, thr);
if (err != DB_SUCCESS) {
@@ -1489,7 +1601,7 @@ skip_lock:
/* This is a non-locking consistent read: if necessary, fetch
a previous version of the record */
- if (index->type & DICT_CLUSTERED) {
+ if (dict_index_is_clust(index)) {
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
node->read_view)) {
@@ -1508,6 +1620,16 @@ skip_lock:
offsets = rec_get_offsets(
rec, index, offsets,
ULINT_UNDEFINED, &heap);
+
+ /* Fetch the columns needed in
+ test conditions. The clustered
+ index record is protected by a
+ page latch that was acquired
+ by row_sel_open_pcur() or
+ row_sel_restore_pcur_pos().
+ The latch will not be released
+ until mtr_commit(mtr). */
+
row_sel_fetch_columns(
index, rec, offsets,
UT_LIST_GET_FIRST(
@@ -1523,7 +1645,7 @@ skip_lock:
rec = old_vers;
}
- } else if (!lock_sec_rec_cons_read_sees(rec, index,
+ } else if (!lock_sec_rec_cons_read_sees(rec,
node->read_view)) {
cons_read_requires_clust_rec = TRUE;
}
@@ -1531,7 +1653,10 @@ skip_lock:
/* PHASE 4: Test search end conditions and deleted flag */
- /* Fetch the columns needed in test conditions */
+ /* Fetch the columns needed in test conditions. The record is
+ protected by a page latch that was acquired by
+ row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch
+ will not be released until mtr_commit(mtr). */
row_sel_fetch_columns(index, rec, offsets,
UT_LIST_GET_FIRST(plan->columns));
@@ -1624,29 +1749,7 @@ skip_lock:
plan->n_rows_fetched++;
- ut_ad(plan->pcur.latch_mode == node->latch_mode);
-
- if (node->select_will_do_update) {
- /* This is a searched update and we can do the update in-place,
- saving CPU time */
-
- row_upd_in_place_in_select(node, thr, &mtr);
-
- leaf_contains_updates = TRUE;
-
- /* When the database is in the online backup mode, the number
- of log records for a single mtr should be small: increment the
- cost counter to ensure it */
-
- cost_counter += 1 + (SEL_COST_LIMIT / 8);
-
- if (plan->unique_search) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
+ ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
|| plan->unique_search || plan->no_prefetch
@@ -1681,19 +1784,6 @@ next_rec:
goto commit_mtr_for_a_while;
}
- if (leaf_contains_updates
- && btr_pcur_is_after_last_on_page(&(plan->pcur), &mtr)) {
-
- /* We must commit &mtr if we are moving to a different page,
- because we have done updates to the x-latched leaf page, and
- the latch would be released in btr_pcur_move_to_next, without
- &mtr getting committed there */
-
- ut_ad(node->asc);
-
- goto commit_mtr_for_a_while;
- }
-
if (node->asc) {
moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
} else {
@@ -1715,7 +1805,7 @@ next_table:
/* We found a record which satisfies the conditions: we can move to
the next table or return a row in the result set */
- ut_ad(btr_pcur_is_on_user_rec(&(plan->pcur), &mtr));
+ ut_ad(btr_pcur_is_on_user_rec(&plan->pcur));
if (plan->unique_search && !node->can_get_updated) {
@@ -1730,7 +1820,6 @@ next_table:
mtr_commit(&mtr);
- leaf_contains_updates = FALSE;
mtr_has_extra_clust_latch = FALSE;
next_table_no_mtr:
@@ -1750,10 +1839,6 @@ next_table_no_mtr:
thr->run_node = que_node_get_parent(node);
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
- }
-
err = DB_SUCCESS;
goto func_exit;
}
@@ -1775,7 +1860,6 @@ table_exhausted:
mtr_commit(&mtr);
- leaf_contains_updates = FALSE;
mtr_has_extra_clust_latch = FALSE;
if (plan->n_rows_prefetched > 0) {
@@ -1797,20 +1881,10 @@ table_exhausted_no_mtr:
sel_assign_into_var_values(node->into_list, node);
thr->run_node = que_node_get_parent(node);
+ } else {
+ node->state = SEL_NODE_NO_MORE_ROWS;
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
- }
-
- goto func_exit;
- }
-
- node->state = SEL_NODE_NO_MORE_ROWS;
-
- thr->run_node = que_node_get_parent(node);
-
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
+ thr->run_node = que_node_get_parent(node);
}
goto func_exit;
@@ -1854,7 +1928,6 @@ commit_mtr_for_a_while:
mtr_commit(&mtr);
- leaf_contains_updates = FALSE;
mtr_has_extra_clust_latch = FALSE;
#ifdef UNIV_SYNC_DEBUG
@@ -1866,8 +1939,7 @@ commit_mtr_for_a_while:
lock_wait_or_error:
/* See the note at stop_for_a_while: the same holds for this case */
- ut_ad(!btr_pcur_is_before_first_on_page(&(plan->pcur), &mtr)
- || !node->asc);
+ ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc);
ut_ad(!search_latch_locked);
plan->stored_cursor_rec_processed = FALSE;
@@ -1880,21 +1952,24 @@ lock_wait_or_error:
#endif /* UNIV_SYNC_DEBUG */
func_exit:
+ if (search_latch_locked) {
+ rw_lock_s_unlock(&btr_search_latch);
+ }
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(err);
}
-/**************************************************************************
+/**********************************************************************//**
Performs a select step. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_sel_step(
/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint i_lock_mode;
sym_node_t* table_node;
@@ -1988,14 +2063,14 @@ row_sel_step(
return(thr);
}
-/**************************************************************************
-Performs a fetch for a cursor. */
-
+/**********************************************************************//**
+Performs a fetch for a cursor.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
fetch_step(
/*=======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
sel_node_t* sel_node;
fetch_node_t* node;
@@ -2051,15 +2126,15 @@ fetch_step(
return(thr);
}
-/********************************************************************
-Sample callback function for fetch that prints each row.*/
-
+/****************************************************************//**
+Sample callback function for fetch that prints each row.
+@return always returns non-NULL */
+UNIV_INTERN
void*
row_fetch_print(
/*============*/
- /* out: always returns non-NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg) /* in: not used */
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: not used */
{
sel_node_t* node = row;
que_node_t* exp;
@@ -2073,22 +2148,21 @@ row_fetch_print(
while (exp) {
dfield_t* dfield = que_node_get_val(exp);
- dtype_t* type = dfield_get_type(dfield);
+ const dtype_t* type = dfield_get_type(dfield);
fprintf(stderr, " column %lu:\n", (ulong)i);
dtype_print(type);
- fprintf(stderr, "\n");
+ putc('\n', stderr);
if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
ut_print_buf(stderr, dfield_get_data(dfield),
dfield_get_len(dfield));
+ putc('\n', stderr);
} else {
- fprintf(stderr, " <NULL>;");
+ fputs(" <NULL>;\n", stderr);
}
- fprintf(stderr, "\n");
-
exp = que_node_get_next(exp);
i++;
}
@@ -2096,24 +2170,24 @@ row_fetch_print(
return((void*)42);
}
-/********************************************************************
+/****************************************************************//**
Callback function for fetch that stores an unsigned 4 byte integer to the
location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4. */
-
+= 4.
+@return always returns NULL */
+UNIV_INTERN
void*
row_fetch_store_uint4(
/*==================*/
- /* out: always returns NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg) /* in: data pointer */
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: data pointer */
{
sel_node_t* node = row;
ib_uint32_t* val = user_arg;
ulint tmp;
dfield_t* dfield = que_node_get_val(node->select_list);
- dtype_t* type = dfield_get_type(dfield);
+ const dtype_t* type = dfield_get_type(dfield);
ulint len = dfield_get_len(dfield);
ut_a(dtype_get_mtype(type) == DATA_INT);
@@ -2126,14 +2200,14 @@ row_fetch_store_uint4(
return(NULL);
}
-/***************************************************************
-Prints a row in a select result. */
-
+/***********************************************************//**
+Prints a row in a select result.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_printf_step(
/*============*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
row_printf_node_t* node;
sel_node_t* sel_node;
@@ -2189,40 +2263,39 @@ row_printf_step(
return(thr);
}
-/********************************************************************
+/****************************************************************//**
Converts a key value stored in MySQL format to an Innobase dtuple. The last
field of the key value may be just a prefix of a fixed length field: hence
the parameter key_len. But currently we do not allow search keys where the
last field is only a prefix of the full key field len and print a warning if
such appears. A counterpart of this function is
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-
+UNIV_INTERN
void
row_sel_convert_mysql_key_to_innobase(
/*==================================*/
- dtuple_t* tuple, /* in: tuple where to build;
+ dtuple_t* tuple, /*!< in/out: tuple where to build;
NOTE: we assume that the type info
in the tuple is already according
to index! */
- byte* buf, /* in: buffer to use in field
+ byte* buf, /*!< in: buffer to use in field
conversions */
- ulint buf_len, /* in: buffer length */
- dict_index_t* index, /* in: index of the key value */
- byte* key_ptr, /* in: MySQL key value */
- ulint key_len, /* in: MySQL key value length */
- trx_t* trx) /* in: transaction */
+ ulint buf_len, /*!< in: buffer length */
+ dict_index_t* index, /*!< in: index of the key value */
+ const byte* key_ptr, /*!< in: MySQL key value */
+ ulint key_len, /*!< in: MySQL key value length */
+ trx_t* trx) /*!< in: transaction */
{
byte* original_buf = buf;
- byte* original_key_ptr = key_ptr;
+ const byte* original_key_ptr = key_ptr;
dict_field_t* field;
dfield_t* dfield;
ulint data_offset;
ulint data_len;
ulint data_field_len;
ibool is_null;
- byte* key_end;
+ const byte* key_end;
ulint n_fields = 0;
- ulint type;
/* For documentation of the key value storage format in MySQL, see
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
@@ -2236,7 +2309,7 @@ row_sel_convert_mysql_key_to_innobase(
dfield = dtuple_get_nth_field(tuple, 0);
field = dict_index_get_nth_field(index, 0);
- if (dfield_get_type(dfield)->mtype == DATA_SYS) {
+ if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
/* A special case: we are looking for a position in the
generated clustered index which InnoDB automatically added
to a table with no primary key: the first and the only
@@ -2254,7 +2327,8 @@ row_sel_convert_mysql_key_to_innobase(
while (key_ptr < key_end) {
- ut_a(field->col->mtype == dfield_get_type(dfield)->mtype);
+ ulint type = dfield_get_type(dfield)->mtype;
+ ut_a(field->col->mtype == type);
data_offset = 0;
is_null = FALSE;
@@ -2266,14 +2340,12 @@ row_sel_convert_mysql_key_to_innobase(
data_offset = 1;
if (*key_ptr != 0) {
- dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
+ dfield_set_null(dfield);
is_null = TRUE;
}
}
- type = dfield_get_type(dfield)->mtype;
-
/* Calculate data length and data field total length */
if (type == DATA_BLOB) {
@@ -2319,9 +2391,10 @@ row_sel_convert_mysql_key_to_innobase(
data_field_len = data_offset + data_len;
}
- if (dtype_get_mysql_type(dfield_get_type(dfield))
- == DATA_MYSQL_TRUE_VARCHAR
- && dfield_get_type(dfield)->mtype != DATA_INT) {
+ if (UNIV_UNLIKELY
+ (dtype_get_mysql_type(dfield_get_type(dfield))
+ == DATA_MYSQL_TRUE_VARCHAR)
+ && UNIV_LIKELY(type != DATA_INT)) {
/* In a MySQL key value format, a true VARCHAR is
always preceded by 2 bytes of a length field.
dfield_get_type(dfield)->len returns the maximum
@@ -2337,7 +2410,7 @@ row_sel_convert_mysql_key_to_innobase(
/* Storing may use at most data_len bytes of buf */
- if (!is_null) {
+ if (UNIV_LIKELY(!is_null)) {
row_mysql_store_col_in_innobase_format(
dfield, buf,
FALSE, /* MySQL key value format col */
@@ -2348,7 +2421,7 @@ row_sel_convert_mysql_key_to_innobase(
key_ptr += data_field_len;
- if (key_ptr > key_end) {
+ if (UNIV_UNLIKELY(key_ptr > key_end)) {
/* The last field in key was not a complete key field
but a prefix of it.
@@ -2372,10 +2445,12 @@ row_sel_convert_mysql_key_to_innobase(
(ulong) (key_ptr - key_end));
fflush(stderr);
ut_print_buf(stderr, original_key_ptr, key_len);
- fprintf(stderr, "\n");
+ putc('\n', stderr);
if (!is_null) {
- dfield->len -= (ulint)(key_ptr - key_end);
+ ulint len = dfield_get_len(dfield);
+ dfield_set_len(dfield, len
+ - (ulint) (key_ptr - key_end));
}
}
@@ -2392,20 +2467,20 @@ row_sel_convert_mysql_key_to_innobase(
dtuple_set_n_fields(tuple, n_fields);
}
-/******************************************************************
+/**************************************************************//**
Stores the row id to the prebuilt struct. */
static
void
row_sel_store_row_id_to_prebuilt(
/*=============================*/
- row_prebuilt_t* prebuilt, /* in: prebuilt */
- rec_t* index_rec, /* in: record */
- dict_index_t* index, /* in: index of the record */
- const ulint* offsets) /* in: rec_get_offsets
- (index_rec, index) */
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt */
+ const rec_t* index_rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: index of the record */
+ const ulint* offsets) /*!< in: rec_get_offsets
+ (index_rec, index) */
{
- byte* data;
- ulint len;
+ const byte* data;
+ ulint len;
ut_ad(rec_offs_validate(index_rec, index, offsets));
@@ -2413,7 +2488,7 @@ row_sel_store_row_id_to_prebuilt(
index_rec, offsets,
dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
- if (len != DATA_ROW_ID_LEN) {
+ if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
fprintf(stderr,
"InnoDB: Error: Row id field is"
" wrong length %lu in ", (ulong) len);
@@ -2430,22 +2505,26 @@ row_sel_store_row_id_to_prebuilt(
ut_memcpy(prebuilt->row_id, data, len);
}
-/******************************************************************
+/**************************************************************//**
Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */
static
void
row_sel_field_store_in_mysql_format(
/*================================*/
- byte* dest, /* in/out: buffer where to store; NOTE that BLOBs
- are not in themselves stored here: the caller must
- allocate and copy the BLOB into buffer before, and pass
- the pointer to the BLOB in 'data' */
- const mysql_row_templ_t* templ, /* in: MySQL column template.
- Its following fields are referenced:
- type, is_unsigned, mysql_col_len, mbminlen, mbmaxlen */
- byte* data, /* in: data to store */
- ulint len) /* in: length of the data */
+ byte* dest, /*!< in/out: buffer where to store; NOTE
+ that BLOBs are not in themselves
+ stored here: the caller must allocate
+ and copy the BLOB into buffer before,
+ and pass the pointer to the BLOB in
+ 'data' */
+ const mysql_row_templ_t* templ,
+ /*!< in: MySQL column template.
+ Its following fields are referenced:
+ type, is_unsigned, mysql_col_len,
+ mbminlen, mbmaxlen */
+ const byte* data, /*!< in: data to store */
+ ulint len) /*!< in: length of the data */
{
byte* ptr;
byte* field_end;
@@ -2453,7 +2532,8 @@ row_sel_field_store_in_mysql_format(
ut_ad(len != UNIV_SQL_NULL);
- if (templ->type == DATA_INT) {
+ switch (templ->type) {
+ case DATA_INT:
/* Convert integer data from Innobase to a little-endian
format, sign bit restored to normal */
@@ -2473,10 +2553,11 @@ row_sel_field_store_in_mysql_format(
}
ut_ad(templ->mysql_col_len == len);
- } else if (templ->type == DATA_VARCHAR
- || templ->type == DATA_VARMYSQL
- || templ->type == DATA_BINARY) {
+ break;
+ case DATA_VARCHAR:
+ case DATA_VARMYSQL:
+ case DATA_BINARY:
field_end = dest + templ->mysql_col_len;
if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
@@ -2527,13 +2608,17 @@ row_sel_field_store_in_mysql_format(
memset(pad_ptr, 0x20, field_end - pad_ptr);
}
- } else if (templ->type == DATA_BLOB) {
+ break;
+
+ case DATA_BLOB:
/* Store a pointer to the BLOB buffer to dest: the BLOB was
already copied to the buffer in row_sel_store_mysql_rec */
row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
len);
- } else if (templ->type == DATA_MYSQL) {
+ break;
+
+ case DATA_MYSQL:
memcpy(dest, data, len);
ut_ad(templ->mysql_col_len >= len);
@@ -2554,45 +2639,51 @@ row_sel_field_store_in_mysql_format(
memset(dest + len, 0x20, templ->mysql_col_len - len);
}
- } else {
- ut_ad(templ->type == DATA_CHAR
- || templ->type == DATA_FIXBINARY
- /*|| templ->type == DATA_SYS_CHILD
- || templ->type == DATA_SYS*/
- || templ->type == DATA_FLOAT
- || templ->type == DATA_DOUBLE
- || templ->type == DATA_DECIMAL);
- ut_ad(templ->mysql_col_len == len);
+ break;
+
+ default:
+#ifdef UNIV_DEBUG
+ case DATA_SYS_CHILD:
+ case DATA_SYS:
+ /* These column types should never be shipped to MySQL. */
+ ut_ad(0);
+ case DATA_CHAR:
+ case DATA_FIXBINARY:
+ case DATA_FLOAT:
+ case DATA_DOUBLE:
+ case DATA_DECIMAL:
+ /* Above are the valid column types for MySQL data. */
+#endif /* UNIV_DEBUG */
+ ut_ad(templ->mysql_col_len == len);
memcpy(dest, data, len);
}
}
-/******************************************************************
+/**************************************************************//**
Convert a row in the Innobase format to a row in the MySQL format.
Note that the template in prebuilt may advise us to copy only a few
columns to mysql_rec, other columns are left blank. All columns may not
-be needed in the query. */
+be needed in the query.
+@return TRUE if success, FALSE if could not allocate memory for a BLOB
+(though we may also assert in that case) */
static
ibool
row_sel_store_mysql_rec(
/*====================*/
- /* out: TRUE if success, FALSE if
- could not allocate memory for a BLOB
- (though we may also assert in that
- case) */
- byte* mysql_rec, /* out: row in the MySQL format */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: Innobase record in the index
+ byte* mysql_rec, /*!< out: row in the MySQL format */
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
+ const rec_t* rec, /*!< in: Innobase record in the index
which was described in prebuilt's
- template */
- const ulint* offsets) /* in: array returned by
+ template; must be protected by
+ a page latch */
+ const ulint* offsets) /*!< in: array returned by
rec_get_offsets() */
{
mysql_row_templ_t* templ;
mem_heap_t* extern_field_heap = NULL;
mem_heap_t* heap;
- byte* data;
+ const byte* data;
ulint len;
ulint i;
@@ -2636,8 +2727,9 @@ row_sel_store_mysql_rec(
causes an assert */
data = btr_rec_copy_externally_stored_field(
- rec, offsets, templ->rec_field_no,
- &len, heap);
+ rec, offsets,
+ dict_table_zip_size(prebuilt->table),
+ templ->rec_field_no, &len, heap);
ut_a(len != UNIV_SQL_NULL);
} else {
@@ -2690,7 +2782,8 @@ row_sel_store_mysql_rec(
mysql_rec[templ->mysql_null_byte_offset]
|= (byte) templ->mysql_null_bit_mask;
memcpy(mysql_rec + templ->mysql_col_offset,
- prebuilt->default_rec + templ->mysql_col_offset,
+ (const byte*) prebuilt->default_rec
+ + templ->mysql_col_offset,
templ->mysql_col_len);
}
}
@@ -2698,26 +2791,26 @@ row_sel_store_mysql_rec(
return(TRUE);
}
-/*************************************************************************
-Builds a previous version of a clustered index record for a consistent read */
+/*********************************************************************//**
+Builds a previous version of a clustered index record for a consistent read
+@return DB_SUCCESS or error code */
static
ulint
row_sel_build_prev_vers_for_mysql(
/*==============================*/
- /* out: DB_SUCCESS or error code */
- read_view_t* read_view, /* in: read view */
- dict_index_t* clust_index, /* in: clustered index */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: record in a clustered index */
- ulint** offsets, /* in/out: offsets returned by
+ read_view_t* read_view, /*!< in: read view */
+ dict_index_t* clust_index, /*!< in: clustered index */
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
+ const rec_t* rec, /*!< in: record in a clustered index */
+ ulint** offsets, /*!< in/out: offsets returned by
rec_get_offsets(rec, clust_index) */
- mem_heap_t** offset_heap, /* in/out: memory heap from which
+ mem_heap_t** offset_heap, /*!< in/out: memory heap from which
the offsets are allocated */
- rec_t** old_vers, /* out: old version, or NULL if the
+ rec_t** old_vers, /*!< out: old version, or NULL if the
record does not exist in the view:
i.e., it was freshly inserted
afterwards */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint err;
@@ -2733,36 +2826,38 @@ row_sel_build_prev_vers_for_mysql(
return(err);
}
-/*************************************************************************
+/*********************************************************************//**
Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking. Used in the MySQL
-interface. */
+interface.
+@return DB_SUCCESS or error code */
static
ulint
row_sel_get_clust_rec_for_mysql(
/*============================*/
- /* out: DB_SUCCESS or error code */
- row_prebuilt_t* prebuilt,/* in: prebuilt struct in the handle */
- dict_index_t* sec_index,/* in: secondary index where rec resides */
- rec_t* rec, /* in: record in a non-clustered index; if
+ row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */
+ dict_index_t* sec_index,/*!< in: secondary index where rec resides */
+ const rec_t* rec, /*!< in: record in a non-clustered index; if
this is a locking read, then rec is not
allowed to be delete-marked, and that would
not make sense either */
- que_thr_t* thr, /* in: query thread */
- rec_t** out_rec,/* out: clustered record or an old version of
+ que_thr_t* thr, /*!< in: query thread */
+ const rec_t** out_rec,/*!< out: clustered record or an old version of
it, NULL if the old version did not exist
in the read view, i.e., it was a fresh
inserted version */
- ulint** offsets,/* out: offsets returned by
+ ulint** offsets,/*!< in: offsets returned by
+ rec_get_offsets(rec, sec_index);
+ out: offsets returned by
rec_get_offsets(out_rec, clust_index) */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
+ mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
- mtr_t* mtr) /* in: mtr used to get access to the
+ mtr_t* mtr) /*!< in: mtr used to get access to the
non-clustered record; the same mtr is used to
access the clustered index */
{
dict_index_t* clust_index;
- rec_t* clust_rec;
+ const rec_t* clust_rec;
rec_t* old_vers;
ulint err;
trx_t* trx;
@@ -2770,7 +2865,8 @@ row_sel_get_clust_rec_for_mysql(
*out_rec = NULL;
trx = thr_get_trx(thr);
- row_build_row_ref_in_tuple(prebuilt->clust_ref, sec_index, rec, trx);
+ row_build_row_ref_in_tuple(prebuilt->clust_ref, rec,
+ sec_index, *offsets, trx);
clust_index = dict_table_get_first_index(sec_index->table);
@@ -2834,7 +2930,8 @@ row_sel_get_clust_rec_for_mysql(
we set a LOCK_REC_NOT_GAP type lock */
err = lock_clust_rec_read_check_and_lock(
- 0, clust_rec, clust_index, *offsets,
+ 0, btr_pcur_get_block(prebuilt->clust_pcur),
+ clust_rec, clust_index, *offsets,
prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr);
if (err != DB_SUCCESS) {
@@ -2861,7 +2958,7 @@ row_sel_get_clust_rec_for_mysql(
clust_rec, offsets, offset_heap, &old_vers,
mtr);
- if (err != DB_SUCCESS) {
+ if (err != DB_SUCCESS || old_vers == NULL) {
goto err_exit;
}
@@ -2882,15 +2979,15 @@ row_sel_get_clust_rec_for_mysql(
visit through secondary index records that would not really
exist in our snapshot. */
- if (clust_rec && (old_vers || rec_get_deleted_flag(
- rec,
- dict_table_is_comp(
- sec_index->table)))
+ if (clust_rec
+ && (old_vers
+ || rec_get_deleted_flag(rec, dict_table_is_comp(
+ sec_index->table)))
&& !row_sel_sec_rec_is_for_clust_rec(
rec, sec_index, clust_rec, clust_index)) {
clust_rec = NULL;
- } else {
#ifdef UNIV_SEARCH_DEBUG
+ } else {
ut_a(clust_rec == NULL
|| row_sel_sec_rec_is_for_clust_rec(
rec, sec_index, clust_rec, clust_index));
@@ -2913,29 +3010,27 @@ err_exit:
return(err);
}
-/************************************************************************
+/********************************************************************//**
Restores cursor position after it has been stored. We have to take into
account that the record cursor was positioned on may have been deleted.
-Then we may have to move the cursor one step up or down. */
+Then we may have to move the cursor one step up or down.
+@return TRUE if we may need to process the record the cursor is now
+positioned on (i.e. we should not go to the next record yet) */
static
ibool
sel_restore_position_for_mysql(
/*===========================*/
- /* out: TRUE if we may need to
- process the record the cursor is
- now positioned on (i.e. we should
- not go to the next record yet) */
- ibool* same_user_rec, /* out: TRUE if we were able to restore
+ ibool* same_user_rec, /*!< out: TRUE if we were able to restore
the cursor on a user record with the
same ordering prefix in in the
B-tree index */
- ulint latch_mode, /* in: latch mode wished in
+ ulint latch_mode, /*!< in: latch mode wished in
restoration */
- btr_pcur_t* pcur, /* in: cursor whose position
+ btr_pcur_t* pcur, /*!< in: cursor whose position
has been stored */
- ibool moves_up, /* in: TRUE if the cursor moves up
+ ibool moves_up, /*!< in: TRUE if the cursor moves up
in the index */
- mtr_t* mtr) /* in: mtr; CAUTION: may commit
+ mtr_t* mtr) /*!< in: mtr; CAUTION: may commit
mtr temporarily! */
{
ibool success;
@@ -2966,7 +3061,7 @@ sel_restore_position_for_mysql(
return(TRUE);
}
- if (btr_pcur_is_on_user_rec(pcur, mtr)) {
+ if (btr_pcur_is_on_user_rec(pcur)) {
btr_pcur_move_to_prev(pcur, mtr);
}
@@ -2976,22 +3071,22 @@ sel_restore_position_for_mysql(
ut_ad(relative_position == BTR_PCUR_BEFORE
|| relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
- if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) {
+ if (moves_up && btr_pcur_is_on_user_rec(pcur)) {
btr_pcur_move_to_next(pcur, mtr);
}
return(TRUE);
}
-/************************************************************************
+/********************************************************************//**
Pops a cached row for MySQL from the fetch cache. */
UNIV_INLINE
void
row_sel_pop_cached_row_for_mysql(
/*=============================*/
- byte* buf, /* in/out: buffer where to copy the
+ byte* buf, /*!< in/out: buffer where to copy the
row */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct */
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct */
{
ulint i;
mysql_row_templ_t* templ;
@@ -3033,15 +3128,16 @@ row_sel_pop_cached_row_for_mysql(
}
}
-/************************************************************************
+/********************************************************************//**
Pushes a row for MySQL to the fetch cache. */
UNIV_INLINE
void
row_sel_push_cache_row_for_mysql(
/*=============================*/
- row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: record to push */
- const ulint* offsets) /* in: rec_get_offsets() */
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
+ const rec_t* rec, /*!< in: record to push; must
+ be protected by a page latch */
+ const ulint* offsets) /*!< in: rec_get_offsets() */
{
byte* buf;
ulint i;
@@ -3081,29 +3177,29 @@ row_sel_push_cache_row_for_mysql(
prebuilt->n_fetch_cached++;
}
-/*************************************************************************
+/*********************************************************************//**
Tries to do a shortcut to fetch a clustered index record with a unique key,
using the hash index if possible (not always). We assume that the search
mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
-btr search latch has been locked in S-mode. */
+btr search latch has been locked in S-mode.
+@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
static
ulint
row_sel_try_search_shortcut_for_mysql(
/*==================================*/
- /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
- rec_t** out_rec,/* out: record if found */
- row_prebuilt_t* prebuilt,/* in: prebuilt struct */
- ulint** offsets,/* in/out: for rec_get_offsets(*out_rec) */
- mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
- mtr_t* mtr) /* in: started mtr */
+ const rec_t** out_rec,/*!< out: record if found */
+ row_prebuilt_t* prebuilt,/*!< in: prebuilt struct */
+ ulint** offsets,/*!< in/out: for rec_get_offsets(*out_rec) */
+ mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
+ mtr_t* mtr) /*!< in: started mtr */
{
dict_index_t* index = prebuilt->index;
- dtuple_t* search_tuple = prebuilt->search_tuple;
+ const dtuple_t* search_tuple = prebuilt->search_tuple;
btr_pcur_t* pcur = prebuilt->pcur;
trx_t* trx = prebuilt->trx;
- rec_t* rec;
+ const rec_t* rec;
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(!prebuilt->templ_contains_blob);
btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
@@ -3152,34 +3248,31 @@ row_sel_try_search_shortcut_for_mysql(
return(SEL_FOUND);
}
-/************************************************************************
+/********************************************************************//**
Searches for rows in the database. This is used in the interface to
MySQL. This function opens a cursor, and also implements fetch next
and fetch prev. NOTE that if we do a search with a full key value
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor! */
-
+position and fetch next or fetch prev must not be tried to the cursor!
+@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+UNIV_INTERN
ulint
row_search_for_mysql(
/*=================*/
- /* out: DB_SUCCESS,
- DB_RECORD_NOT_FOUND,
- DB_END_OF_INDEX, DB_DEADLOCK,
- DB_LOCK_TABLE_FULL, DB_CORRUPTION,
- or DB_TOO_BIG_RECORD */
- byte* buf, /* in/out: buffer for the fetched
+ byte* buf, /*!< in/out: buffer for the fetched
row in the MySQL format */
- ulint mode, /* in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct for the
+ ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
table handle; this contains the info
of search_tuple, index; if search
tuple contains 0 fields then we
position the cursor at the start or
the end of the index, depending on
'mode' */
- ulint match_mode, /* in: 0 or ROW_SEL_EXACT or
+ ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
ROW_SEL_EXACT_PREFIX */
- ulint direction) /* in: 0 or ROW_SEL_NEXT or
+ ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
ROW_SEL_PREV; NOTE: if this is != 0,
then prebuilt must have a pcur
with stored position! In opening of a
@@ -3187,14 +3280,14 @@ row_search_for_mysql(
{
dict_index_t* index = prebuilt->index;
ibool comp = dict_table_is_comp(index->table);
- dtuple_t* search_tuple = prebuilt->search_tuple;
+ const dtuple_t* search_tuple = prebuilt->search_tuple;
btr_pcur_t* pcur = prebuilt->pcur;
trx_t* trx = prebuilt->trx;
dict_index_t* clust_index;
que_thr_t* thr;
- rec_t* rec;
- rec_t* result_rec;
- rec_t* clust_rec;
+ const rec_t* rec;
+ const rec_t* result_rec;
+ const rec_t* clust_rec;
ulint err = DB_SUCCESS;
ibool unique_search = FALSE;
ibool unique_search_from_clust_index = FALSE;
@@ -3217,7 +3310,7 @@ row_search_for_mysql(
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
ut_ad(index && pcur && search_tuple);
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
@@ -3233,14 +3326,18 @@ row_search_for_mysql(
"InnoDB: the MySQL datadir, or have you used"
" DISCARD TABLESPACE?\n"
"InnoDB: Look from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
"InnoDB: how you can resolve the problem.\n",
prebuilt->table->name);
return(DB_ERROR);
}
+ if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+
+ return(DB_MISSING_HISTORY);
+ }
+
if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
@@ -3286,7 +3383,7 @@ row_search_for_mysql(
/* PHASE 0: Release a possible s-latch we are holding on the
adaptive hash index latch if there is someone waiting behind */
- if (UNIV_UNLIKELY(btr_search_latch.writer != RW_LOCK_NOT_LOCKED)
+ if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
&& trx->has_search_latch) {
/* There is an x-latch request on the adaptive hash index:
@@ -3381,10 +3478,10 @@ row_search_for_mysql(
locks when locking delete-marked records. */
if (match_mode == ROW_SEL_EXACT
- && index->type & DICT_UNIQUE
+ && dict_index_is_unique(index)
&& dtuple_get_n_fields(search_tuple)
== dict_index_get_n_unique(index)
- && (index->type & DICT_CLUSTERED
+ && (dict_index_is_clust(index)
|| !dtuple_contains_null(search_tuple))) {
/* Note above that a UNIQUE secondary index can contain many
@@ -3421,7 +3518,7 @@ row_search_for_mysql(
if (UNIV_UNLIKELY(direction == 0)
&& unique_search
- && index->type & DICT_CLUSTERED
+ && dict_index_is_clust(index)
&& !prebuilt->templ_contains_blob
&& !prebuilt->used_in_HANDLER
&& (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
@@ -3461,6 +3558,12 @@ row_search_for_mysql(
ut_a(0 == cmp_dtuple_rec(search_tuple,
rec, offsets));
#endif
+ /* At this point, rec is protected by
+ a page latch that was acquired by
+ row_sel_try_search_shortcut_for_mysql().
+ The latch will not be released until
+ mtr_commit(&mtr). */
+
if (!row_sel_store_mysql_rec(buf, prebuilt,
rec, offsets)) {
err = DB_TOO_BIG_RECORD;
@@ -3477,19 +3580,8 @@ row_search_for_mysql(
srv_n_rows_read++;
- if (trx->search_latch_timeout > 0
- && trx->has_search_latch) {
-
- trx->search_latch_timeout--;
-
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- }
-
- /* NOTE that we do NOT store the cursor
- position */
err = DB_SUCCESS;
- goto func_exit;
+ goto release_search_latch_if_needed;
case SEL_EXHAUSTED:
mtr_commit(&mtr);
@@ -3497,6 +3589,8 @@ row_search_for_mysql(
/* ut_print_name(stderr, index->name);
fputs(" record not found 2\n", stderr); */
+ err = DB_RECORD_NOT_FOUND;
+release_search_latch_if_needed:
if (trx->search_latch_timeout > 0
&& trx->has_search_latch) {
@@ -3508,9 +3602,13 @@ row_search_for_mysql(
/* NOTE that we do NOT store the cursor
position */
-
- err = DB_RECORD_NOT_FOUND;
goto func_exit;
+
+ case SEL_RETRY:
+ break;
+
+ default:
+ ut_ad(0);
}
shortcut_fails_too_big_rec:
mtr_commit(&mtr);
@@ -3601,12 +3699,12 @@ shortcut_fails_too_big_rec:
/* Try to place a gap lock on the next index record
to prevent phantoms in ORDER BY ... DESC queries */
+ const rec_t* next = page_rec_get_next_const(rec);
- offsets = rec_get_offsets(page_rec_get_next(rec),
- index, offsets,
+ offsets = rec_get_offsets(next, index, offsets,
ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(page_rec_get_next(rec),
- index, offsets,
+ err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+ next, index, offsets,
prebuilt->select_lock_type,
LOCK_GAP, thr);
@@ -3674,7 +3772,7 @@ rec_loop:
fputs("Using ", stderr);
dict_index_name_print(stderr, index);
fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
- buf_frame_get_page_no(buf_frame_align(rec)));
+ page_get_page_no(page_align(rec)));
rec_print(rec);
*/
#endif /* UNIV_SEARCH_DEBUG */
@@ -3704,7 +3802,8 @@ rec_loop:
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(rec, index, offsets,
+ err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+ rec, index, offsets,
prebuilt->select_lock_type,
LOCK_ORDINARY, thr);
@@ -3742,22 +3841,20 @@ rec_loop:
wrong_offs:
if (srv_force_recovery == 0 || moves_up == FALSE) {
ut_print_timestamp(stderr);
- buf_page_print(buf_frame_align(rec));
+ buf_page_print(page_align(rec), 0);
fprintf(stderr,
- "\nInnoDB: rec address %p, first"
- " buffer frame %p\n"
- "InnoDB: buffer pool high end %p,"
+ "\nInnoDB: rec address %p,"
" buf block fix count %lu\n",
- (void*) rec, (void*) buf_pool->frame_zero,
- (void*) buf_pool->high_end,
- (ulong)buf_block_align(rec)->buf_fix_count);
+ (void*) rec, (ulong)
+ btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
+ ->page.buf_fix_count);
fprintf(stderr,
"InnoDB: Index corruption: rec offs %lu"
" next offs %lu, page no %lu,\n"
"InnoDB: ",
(ulong) page_offset(rec),
(ulong) next_offs,
- (ulong) buf_frame_get_page_no(rec));
+ (ulong) page_get_page_no(page_align(rec)));
dict_index_name_print(stderr, trx, index);
fputs(". Run CHECK TABLE. You may need to\n"
"InnoDB: restore from a backup, or"
@@ -3777,7 +3874,7 @@ wrong_offs:
"InnoDB: ",
(ulong) page_offset(rec),
(ulong) next_offs,
- (ulong) buf_frame_get_page_no(rec));
+ (ulong) page_get_page_no(page_align(rec)));
dict_index_name_print(stderr, trx, index);
fputs(". We try to skip the rest of the page.\n",
stderr);
@@ -3802,7 +3899,7 @@ wrong_offs:
"InnoDB: ",
(ulong) page_offset(rec),
(ulong) next_offs,
- (ulong) buf_frame_get_page_no(rec));
+ (ulong) page_get_page_no(page_align(rec)));
dict_index_name_print(stderr, trx, index);
fputs(". We try to skip the record.\n",
stderr);
@@ -3836,6 +3933,7 @@ wrong_offs:
using a READ COMMITTED isolation level. */
err = sel_set_rec_lock(
+ btr_pcur_get_block(pcur),
rec, index, offsets,
prebuilt->select_lock_type, LOCK_GAP,
thr);
@@ -3871,6 +3969,7 @@ wrong_offs:
using a READ COMMITTED isolation level. */
err = sel_set_rec_lock(
+ btr_pcur_get_block(pcur),
rec, index, offsets,
prebuilt->select_lock_type, LOCK_GAP,
thr);
@@ -3939,12 +4038,13 @@ no_gap_lock:
lock_type = LOCK_REC_NOT_GAP;
}
- err = sel_set_rec_lock(rec, index, offsets,
+ err = sel_set_rec_lock(btr_pcur_get_block(pcur),
+ rec, index, offsets,
prebuilt->select_lock_type,
lock_type, thr);
switch (err) {
- rec_t* old_vers;
+ const rec_t* old_vers;
case DB_SUCCESS:
if (srv_locks_unsafe_for_binlog
|| trx->isolation_level == TRX_ISO_READ_COMMITTED) {
@@ -4056,8 +4156,7 @@ no_gap_lock:
rec = old_vers;
}
- } else if (!lock_sec_rec_cons_read_sees(rec, index,
- trx->read_view)) {
+ } else if (!lock_sec_rec_cons_read_sees(rec, trx->read_view)) {
/* We are looking into a non-clustered index,
and to get the right version of the record we
have to look also into the clustered index: this
@@ -4197,6 +4296,10 @@ requires_clust_rec:
result_rec != rec ? clust_index : index,
offsets));
+ /* At this point, the clustered index record is protected
+ by a page latch that was acquired when pcur was positioned.
+ The latch will not be released until mtr_commit(&mtr). */
+
if ((match_mode == ROW_SEL_EXACT
|| prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
&& prebuilt->select_lock_type == LOCK_NONE
@@ -4446,17 +4549,16 @@ func_exit:
return(err);
}
-/***********************************************************************
+/*******************************************************************//**
Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache. */
-
+consistent read result, or store it to the query cache.
+@return TRUE if storing or retrieving from the query cache is permitted */
+UNIV_INTERN
ibool
row_search_check_if_query_cache_permitted(
/*======================================*/
- /* out: TRUE if storing or retrieving
- from the query cache is permitted */
- trx_t* trx, /* in: transaction object */
- const char* norm_name) /* in: concatenation of database name,
+ trx_t* trx, /*!< in: transaction object */
+ const char* norm_name) /*!< in: concatenation of database name,
'/' char, table name */
{
dict_table_t* table;
@@ -4503,81 +4605,58 @@ row_search_check_if_query_cache_permitted(
return(ret);
}
-/***********************************************************************
+/*******************************************************************//**
Read the AUTOINC column from the current row. If the value is less than
-0 and the type is not unsigned then we reset the value to 0. */
+0 and the type is not unsigned then we reset the value to 0.
+@return value read from the column */
static
-ib_ulonglong
+ib_uint64_t
row_search_autoinc_read_column(
/*===========================*/
- /* out: value read from the column */
- dict_index_t* index, /* in: index to read from */
- const rec_t* rec, /* in: current rec */
- ulint col_no, /* in: column number */
- ulint mtype, /*!< in: column main type */
- ibool unsigned_type) /* in: signed or unsigned flag */
+ dict_index_t* index, /*!< in: index to read from */
+ const rec_t* rec, /*!< in: current rec */
+ ulint col_no, /*!< in: column number */
+ ibool unsigned_type) /*!< in: signed or unsigned flag */
{
ulint len;
const byte* data;
- ib_ulonglong value;
+ ib_uint64_t value;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = sizeof offsets_ / sizeof *offsets_;
+ rec_offs_init(offsets_);
- /* TODO: We have to cast away the const of rec for now. This needs
- to be fixed later.*/
- offsets = rec_get_offsets(
- (rec_t*) rec, index, offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
- /* TODO: We have to cast away the const of rec for now. This needs
- to be fixed later.*/
- data = rec_get_nth_field((rec_t*)rec, offsets, col_no, &len);
+ data = rec_get_nth_field(rec, offsets, col_no, &len);
ut_a(len != UNIV_SQL_NULL);
+ ut_a(len <= sizeof value);
- switch (mtype) {
- case DATA_INT:
- ut_a(len <= sizeof value);
- value = mach_read_int_type(data, len, unsigned_type);
- break;
-
- case DATA_FLOAT:
- ut_a(len == sizeof(float));
- value = mach_float_read(data);
- break;
-
- case DATA_DOUBLE:
- ut_a(len == sizeof(double));
- value = mach_double_read(data);
- break;
-
- default:
- ut_error;
- }
+ /* we assume AUTOINC value cannot be negative */
+ value = mach_read_int_type(data, len, unsigned_type);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
- /* We assume that the autoinc counter can't be negative. */
- if (!unsigned_type && (ib_longlong) value < 0) {
+ if (!unsigned_type && (ib_int64_t) value < 0) {
value = 0;
}
return(value);
}
-/***********************************************************************
-Get the last row. */
+/*******************************************************************//**
+Get the last row.
+@return current rec or NULL */
static
const rec_t*
row_search_autoinc_get_rec(
/*=======================*/
- /* out: current rec or NULL */
- btr_pcur_t* pcur, /* in: the current cursor */
- mtr_t* mtr) /* in: mini transaction */
+ btr_pcur_t* pcur, /*!< in: the current cursor */
+ mtr_t* mtr) /*!< in: mini transaction */
{
do {
const rec_t* rec = btr_pcur_get_rec(pcur);
@@ -4590,18 +4669,17 @@ row_search_autoinc_get_rec(
return(NULL);
}
-/***********************************************************************
-Read the max AUTOINC value from an index. */
-
+/*******************************************************************//**
+Read the max AUTOINC value from an index.
+@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
+column name can't be found in index */
+UNIV_INTERN
ulint
row_search_max_autoinc(
/*===================*/
- /* out: DB_SUCCESS if all OK else
- error code, DB_RECORD_NOT_FOUND if
- column name can't be found in index */
- dict_index_t* index, /* in: index to search */
- const char* col_name, /* in: name of autoinc column */
- ib_ulonglong* value) /* out: AUTOINC value read */
+ dict_index_t* index, /*!< in: index to search */
+ const char* col_name, /*!< in: name of autoinc column */
+ ib_uint64_t* value) /*!< out: AUTOINC value read */
{
ulint i;
ulint n_cols;
@@ -4643,8 +4721,7 @@ row_search_max_autoinc(
dfield->col->prtype & DATA_UNSIGNED);
*value = row_search_autoinc_read_column(
- index, rec, i,
- dfield->col->mtype, unsigned_type);
+ index, rec, i, unsigned_type);
}
}
diff --git a/storage/innobase/row/row0uins.c b/storage/innobase/row/row0uins.c
index ce9ab792204..9f9c814f1a5 100644
--- a/storage/innobase/row/row0uins.c
+++ b/storage/innobase/row/row0uins.c
@@ -1,7 +1,24 @@
-/******************************************************
-Fresh insert undo
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0uins.c
+Fresh insert undo
Created 2/25/1997 Heikki Tuuri
*******************************************************/
@@ -29,15 +46,15 @@ Created 2/25/1997 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "log0log.h"
-/*******************************************************************
+/***************************************************************//**
Removes a clustered index record. The pcur in node was positioned on the
-record, now it is detached. */
+record, now it is detached.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_ins_remove_clust_rec(
/*==========================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node) /* in: undo node */
+ undo_node_t* node) /*!< in: undo node */
{
btr_cur_t* btr_cur;
ibool success;
@@ -52,6 +69,7 @@ row_undo_ins_remove_clust_rec(
ut_a(success);
if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
+ ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
/* Drop the index tree associated with the row in
SYS_INDEXES table: */
@@ -86,7 +104,10 @@ retry:
&(node->pcur), &mtr);
ut_a(success);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ trx_is_recv(node->trx)
+ ? RB_RECOVERY
+ : RB_NORMAL, &mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -111,19 +132,18 @@ retry:
return(err);
}
-/*******************************************************************
-Removes a secondary index entry if found. */
+/***************************************************************//**
+Removes a secondary index entry if found.
+@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_ins_remove_sec_low(
/*========================*/
- /* out: DB_SUCCESS, DB_FAIL, or
- DB_OUT_OF_FILE_SPACE */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether we wish optimistic or
pessimistic descent down the index tree */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry to remove */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry) /*!< in: index entry to remove */
{
btr_pcur_t pcur;
btr_cur_t* btr_cur;
@@ -159,7 +179,14 @@ row_undo_ins_remove_sec_low(
} else {
ut_ad(mode == BTR_MODIFY_TREE);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);
+ /* No need to distinguish RB_RECOVERY here, because we
+ are deleting a secondary index record: the distinction
+ between RB_NORMAL and RB_RECOVERY only matters when
+ deleting a record that contains externally stored
+ columns. */
+ ut_ad(!dict_index_is_clust(index));
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ RB_NORMAL, &mtr);
}
btr_pcur_close(&pcur);
@@ -168,16 +195,16 @@ row_undo_ins_remove_sec_low(
return(err);
}
-/*******************************************************************
+/***************************************************************//**
Removes a secondary index entry from the index if found. Tries first
-optimistic, then pessimistic descent down the tree. */
+optimistic, then pessimistic descent down the tree.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_ins_remove_sec(
/*====================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry to insert */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry) /*!< in: index entry to insert */
{
ulint err;
ulint n_tries = 0;
@@ -211,17 +238,17 @@ retry:
return(err);
}
-/***************************************************************
+/***********************************************************//**
Parses the row reference and other info in a fresh insert undo record. */
static
void
row_undo_ins_parse_undo_rec(
/*========================*/
- undo_node_t* node) /* in: row undo node */
+ undo_node_t* node) /*!< in/out: row undo node */
{
dict_index_t* clust_index;
byte* ptr;
- dulint undo_no;
+ undo_no_t undo_no;
dulint table_id;
ulint type;
ulint dummy;
@@ -234,75 +261,90 @@ row_undo_ins_parse_undo_rec(
ut_ad(type == TRX_UNDO_INSERT_REC);
node->rec_type = type;
+ node->update = NULL;
node->table = dict_table_get_on_id(table_id, node->trx);
- if (node->table == NULL) {
-
- return;
- }
-
- if (node->table->ibd_file_missing) {
- /* We skip undo operations to missing .ibd files */
+ /* Skip the UNDO if we can't find the table or the .ibd file. */
+ if (UNIV_UNLIKELY(node->table == NULL)) {
+ } else if (UNIV_UNLIKELY(node->table->ibd_file_missing)) {
node->table = NULL;
+ } else {
+ clust_index = dict_table_get_first_index(node->table);
- return;
+ if (clust_index != NULL) {
+ ptr = trx_undo_rec_get_row_ref(
+ ptr, clust_index, &node->ref, node->heap);
+ } else {
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: table ");
+ ut_print_name(stderr, node->trx, TRUE,
+ node->table->name);
+ fprintf(stderr, " has no indexes, "
+ "ignoring the table\n");
+
+ node->table = NULL;
+ }
}
-
- clust_index = dict_table_get_first_index(node->table);
-
- ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
- node->heap);
}
-/***************************************************************
+/***********************************************************//**
Undoes a fresh insert of a row to a table. A fresh insert means that
the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. */
-
+marked, at the time of the insert. InnoDB is eager in a rollback:
+if it figures out that an index record will be removed in the purge
+anyway, it will remove it in the rollback.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
ulint
row_undo_ins(
/*=========*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node) /* in: row undo node */
+ undo_node_t* node) /*!< in: row undo node */
{
- dtuple_t* entry;
- ibool found;
- ulint err;
-
ut_ad(node);
ut_ad(node->state == UNDO_NODE_INSERT);
row_undo_ins_parse_undo_rec(node);
- if (node->table == NULL) {
- found = FALSE;
- } else {
- found = row_undo_search_clust_to_pcur(node);
- }
-
- if (!found) {
+ if (!node->table || !row_undo_search_clust_to_pcur(node)) {
trx_undo_rec_release(node->trx, node->undo_no);
return(DB_SUCCESS);
}
+ /* Iterate over all the indexes and undo the insert.*/
+
+ /* Skip the clustered index (the first index) */
node->index = dict_table_get_next_index(
dict_table_get_first_index(node->table));
while (node->index != NULL) {
- entry = row_build_index_entry(node->row, node->index,
- node->heap);
- err = row_undo_ins_remove_sec(node->index, entry);
+ dtuple_t* entry;
+ ulint err;
+
+ entry = row_build_index_entry(node->row, node->ext,
+ node->index, node->heap);
+ if (UNIV_UNLIKELY(!entry)) {
+ /* The database must have crashed after
+ inserting a clustered index record but before
+ writing all the externally stored columns of
+ that record. Because secondary index entries
+ are inserted after the clustered index record,
+ we may assume that the secondary index record
+ does not exist. However, this situation may
+ only occur during the rollback of incomplete
+ transactions. */
+ ut_a(trx_is_recv(node->trx));
+ } else {
+ err = row_undo_ins_remove_sec(node->index, entry);
- if (err != DB_SUCCESS) {
+ if (err != DB_SUCCESS) {
- return(err);
+ return(err);
+ }
}
node->index = dict_table_get_next_index(node->index);
}
- err = row_undo_ins_remove_clust_rec(node);
-
- return(err);
+ return(row_undo_ins_remove_clust_rec(node));
}
diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c
index 68139da116e..6be475d8c78 100644
--- a/storage/innobase/row/row0umod.c
+++ b/storage/innobase/row/row0umod.c
@@ -1,7 +1,24 @@
-/******************************************************
-Undo modify of a row
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1997 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0umod.c
+Undo modify of a row
Created 2/27/1997 Heikki Tuuri
*******************************************************/
@@ -41,18 +58,17 @@ delete marked clustered index record was delete unmarked and possibly also
some of its fields were changed. Now, it is possible that the delete marked
version has become obsolete at the time the undo is started. */
-/***************************************************************
+/***********************************************************//**
Checks if also the previous version of the clustered index record was
modified or inserted by the same transaction, and its undo number is such
-that it should be undone in the same rollback. */
+that it should be undone in the same rollback.
+@return TRUE if also previous modify or insert of this row should be undone */
UNIV_INLINE
ibool
row_undo_mod_undo_also_prev_vers(
/*=============================*/
- /* out: TRUE if also previous modify or
- insert of this row should be undone */
- undo_node_t* node, /* in: row undo node */
- dulint* undo_no)/* out: the undo number */
+ undo_node_t* node, /*!< in: row undo node */
+ undo_no_t* undo_no)/*!< out: the undo number */
{
trx_undo_rec_t* undo_rec;
trx_t* trx;
@@ -72,20 +88,19 @@ row_undo_mod_undo_also_prev_vers(
return(ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0);
}
-/***************************************************************
-Undoes a modify in a clustered index record. */
+/***********************************************************//**
+Undoes a modify in a clustered index record.
+@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
static
ulint
row_undo_mod_clust_low(
/*===================*/
- /* out: DB_SUCCESS, DB_FAIL, or error code:
- we may run out of file space */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr, /* in: mtr */
- ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr, /*!< in: mtr; must be committed before
+ latching any further pages */
+ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
- big_rec_t* dummy_big_rec;
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ulint err;
@@ -106,31 +121,38 @@ row_undo_mod_clust_low(
btr_cur, node->update,
node->cmpl_info, thr, mtr);
} else {
+ mem_heap_t* heap = NULL;
+ big_rec_t* dummy_big_rec;
+
ut_ad(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_update(
BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG
| BTR_KEEP_SYS_FLAG,
- btr_cur, &dummy_big_rec, node->update,
+ btr_cur, &heap, &dummy_big_rec, node->update,
node->cmpl_info, thr, mtr);
+
+ ut_a(!dummy_big_rec);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
}
return(err);
}
-/***************************************************************
-Removes a clustered index record after undo if possible. */
+/***********************************************************//**
+Removes a clustered index record after undo if possible.
+@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
static
ulint
row_undo_mod_remove_clust_low(
/*==========================*/
- /* out: DB_SUCCESS, DB_FAIL, or error code:
- we may run out of file space */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr __attribute__((unused)), /* in: query thread */
- mtr_t* mtr, /* in: mtr */
- ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr __attribute__((unused)), /*!< in: query thread */
+ mtr_t* mtr, /*!< in: mtr */
+ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
@@ -170,9 +192,9 @@ row_undo_mod_remove_clust_low(
/* Note that since this operation is analogous to purge,
we can free also inherited externally stored fields:
- hence the last FALSE in the call below */
+ hence the RB_NONE in the call below */
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, mtr);
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, RB_NONE, mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -182,24 +204,23 @@ row_undo_mod_remove_clust_low(
return(err);
}
-/***************************************************************
+/***********************************************************//**
Undoes a modify in a clustered index record. Sets also the node state for the
-next round of undo. */
+next round of undo.
+@return DB_SUCCESS or error code: we may run out of file space */
static
ulint
row_undo_mod_clust(
/*===============*/
- /* out: DB_SUCCESS or error code: we may run
- out of file space */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
btr_pcur_t* pcur;
mtr_t mtr;
ulint err;
ibool success;
ibool more_vers;
- dulint new_undo_no;
+ undo_no_t new_undo_no;
ut_ad(node && thr);
@@ -272,19 +293,18 @@ row_undo_mod_clust(
return(err);
}
-/***************************************************************
-Delete marks or removes a secondary index entry if found. */
+/***********************************************************//**
+Delete marks or removes a secondary index entry if found.
+@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_del_mark_or_remove_sec_low(
/*====================================*/
- /* out: DB_SUCCESS, DB_FAIL, or
- DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint mode) /* in: latch mode BTR_MODIFY_LEAF or
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr, /*!< in: query thread */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in: index entry */
+ ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
{
ibool found;
@@ -304,7 +324,15 @@ row_undo_mod_del_mark_or_remove_sec_low(
btr_cur = btr_pcur_get_btr_cur(&pcur);
if (!found) {
- /* Not found */
+ /* In crash recovery, the secondary index record may
+ be missing if the UPDATE did not have time to insert
+ the secondary index records before the crash. When we
+ are undoing that UPDATE in crash recovery, the record
+ may be missing.
+
+ In normal processing, if an update ends in a deadlock
+ before it has inserted all updated secondary index
+ records, then the undo will not find those records. */
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -342,8 +370,14 @@ row_undo_mod_del_mark_or_remove_sec_low(
} else {
ut_ad(mode == BTR_MODIFY_TREE);
+ /* No need to distinguish RB_RECOVERY here, because we
+ are deleting a secondary index record: the distinction
+ between RB_NORMAL and RB_RECOVERY only matters when
+ deleting a record that contains externally stored
+ columns. */
+ ut_ad(!dict_index_is_clust(index));
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- TRUE, &mtr);
+ RB_NORMAL, &mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -358,23 +392,23 @@ row_undo_mod_del_mark_or_remove_sec_low(
return(err);
}
-/***************************************************************
+/***********************************************************//**
Delete marks or removes a secondary index entry if found.
NOTE that if we updated the fields of a delete-marked secondary index record
so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
return to the original values because we do not know them. But this should
not cause problems because in row0sel.c, in queries we always retrieve the
clustered index record or an earlier version of it, if the secondary index
-record through which we do the search is delete-marked. */
+record through which we do the search is delete-marked.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_del_mark_or_remove_sec(
/*================================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr, /*!< in: query thread */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry) /*!< in: index entry */
{
ulint err;
@@ -390,38 +424,41 @@ row_undo_mod_del_mark_or_remove_sec(
return(err);
}
-/***************************************************************
+/***********************************************************//**
Delete unmarks a secondary index entry which must be found. It might not be
delete-marked at the moment, but it does not harm to unmark it anyway. We also
need to update the fields of the secondary index record if we updated its
-fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */
+fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
+@return DB_FAIL or DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_del_unmark_sec_and_undo_update(
/*========================================*/
- /* out: DB_FAIL or DB_SUCCESS or
- DB_OUT_OF_FILE_SPACE */
- ulint mode, /* in: search mode: BTR_MODIFY_LEAF or
+ ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry */
+ que_thr_t* thr, /*!< in: query thread */
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry) /*!< in: index entry */
{
mem_heap_t* heap;
btr_pcur_t pcur;
upd_t* update;
ulint err = DB_SUCCESS;
- ibool found;
big_rec_t* dummy_big_rec;
mtr_t mtr;
trx_t* trx = thr_get_trx(thr);
+ /* Ignore indexes that are being created. */
+ if (UNIV_UNLIKELY(*index->name == TEMP_INDEX_PREFIX)) {
+
+ return(DB_SUCCESS);
+ }
+
log_free_check();
mtr_start(&mtr);
- found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
- if (!found) {
+ if (UNIV_UNLIKELY(!row_search_index_entry(index, entry,
+ mode, &pcur, &mtr))) {
fputs("InnoDB: error in sec index entry del undo in\n"
"InnoDB: ", stderr);
dict_index_name_print(stderr, trx, index);
@@ -457,15 +494,19 @@ row_undo_mod_del_unmark_sec_and_undo_update(
err = btr_cur_optimistic_update(
BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
btr_cur, update, 0, thr, &mtr);
- if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+ switch (err) {
+ case DB_OVERFLOW:
+ case DB_UNDERFLOW:
+ case DB_ZIP_OVERFLOW:
err = DB_FAIL;
}
} else {
ut_a(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_update(
BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
- btr_cur, &dummy_big_rec,
+ btr_cur, &heap, &dummy_big_rec,
update, 0, thr, &mtr);
+ ut_a(!dummy_big_rec);
}
mem_heap_free(heap);
@@ -477,54 +518,68 @@ row_undo_mod_del_unmark_sec_and_undo_update(
return(err);
}
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is UPD_DEL. */
+/***********************************************************//**
+Undoes a modify in secondary indexes when undo record type is UPD_DEL.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_upd_del_sec(
/*=====================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
mem_heap_t* heap;
dtuple_t* entry;
dict_index_t* index;
- ulint err;
+ ulint err = DB_SUCCESS;
heap = mem_heap_create(1024);
while (node->index != NULL) {
index = node->index;
- entry = row_build_index_entry(node->row, index, heap);
-
- err = row_undo_mod_del_mark_or_remove_sec(node, thr, index,
- entry);
- if (err != DB_SUCCESS) {
+ entry = row_build_index_entry(node->row, node->ext,
+ index, heap);
+ if (UNIV_UNLIKELY(!entry)) {
+ /* The database must have crashed after
+ inserting a clustered index record but before
+ writing all the externally stored columns of
+ that record. Because secondary index entries
+ are inserted after the clustered index record,
+ we may assume that the secondary index record
+ does not exist. However, this situation may
+ only occur during the rollback of incomplete
+ transactions. */
+ ut_a(trx_is_recv(thr_get_trx(thr)));
+ } else {
+ err = row_undo_mod_del_mark_or_remove_sec(
+ node, thr, index, entry);
- mem_heap_free(heap);
+ if (err != DB_SUCCESS) {
- return(err);
+ break;
+ }
}
+ mem_heap_empty(heap);
+
node->index = dict_table_get_next_index(node->index);
}
mem_heap_free(heap);
- return(DB_SUCCESS);
+ return(err);
}
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is DEL_MARK. */
+/***********************************************************//**
+Undoes a modify in secondary indexes when undo record type is DEL_MARK.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_del_mark_sec(
/*======================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
mem_heap_t* heap;
dtuple_t* entry;
@@ -536,8 +591,9 @@ row_undo_mod_del_mark_sec(
while (node->index != NULL) {
index = node->index;
- entry = row_build_index_entry(node->row, index, heap);
-
+ entry = row_build_index_entry(node->row, node->ext,
+ index, heap);
+ ut_a(entry);
err = row_undo_mod_del_unmark_sec_and_undo_update(
BTR_MODIFY_LEAF, thr, index, entry);
if (err == DB_FAIL) {
@@ -560,15 +616,15 @@ row_undo_mod_del_mark_sec(
return(DB_SUCCESS);
}
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is UPD_EXIST. */
+/***********************************************************//**
+Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_upd_exist_sec(
/*=======================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
mem_heap_t* heap;
dtuple_t* entry;
@@ -590,8 +646,9 @@ row_undo_mod_upd_exist_sec(
node->update)) {
/* Build the newest version of the index entry */
- entry = row_build_index_entry(node->row, index, heap);
-
+ entry = row_build_index_entry(node->row, node->ext,
+ index, heap);
+ ut_a(entry);
/* NOTE that if we updated the fields of a
delete-marked secondary index record so that
alphabetically they stayed the same, e.g.,
@@ -617,9 +674,12 @@ row_undo_mod_upd_exist_sec(
the secondary index record if we updated its fields
but alphabetically they stayed the same, e.g.,
'abc' -> 'aBc'. */
+ mem_heap_empty(heap);
+ entry = row_build_index_entry(node->undo_row,
+ node->undo_ext,
+ index, heap);
+ ut_a(entry);
- row_upd_index_replace_new_col_vals(entry, index,
- node->update, NULL);
err = row_undo_mod_del_unmark_sec_and_undo_update(
BTR_MODIFY_LEAF, thr, index, entry);
if (err == DB_FAIL) {
@@ -642,21 +702,21 @@ row_undo_mod_upd_exist_sec(
return(DB_SUCCESS);
}
-/***************************************************************
+/***********************************************************//**
Parses the row reference and other info in a modify undo log record. */
static
void
row_undo_mod_parse_undo_rec(
/*========================*/
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
dict_index_t* clust_index;
byte* ptr;
- dulint undo_no;
+ undo_no_t undo_no;
dulint table_id;
- dulint trx_id;
- dulint roll_ptr;
+ trx_id_t trx_id;
+ roll_ptr_t roll_ptr;
ulint info_bits;
ulint type;
ulint cmpl_info;
@@ -702,17 +762,16 @@ row_undo_mod_parse_undo_rec(
node->cmpl_info = cmpl_info;
}
-/***************************************************************
-Undoes a modify operation on a row of a table. */
-
+/***********************************************************//**
+Undoes a modify operation on a row of a table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
row_undo_mod(
/*=========*/
- /* out: DB_SUCCESS or error code */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
- ibool found;
ulint err;
ut_ad(node && thr);
@@ -720,13 +779,7 @@ row_undo_mod(
row_undo_mod_parse_undo_rec(node, thr);
- if (node->table == NULL) {
- found = FALSE;
- } else {
- found = row_undo_search_clust_to_pcur(node);
- }
-
- if (!found) {
+ if (!node->table || !row_undo_search_clust_to_pcur(node)) {
/* It is already undone, or will be undone by another query
thread, or table was dropped */
diff --git a/storage/innobase/row/row0undo.c b/storage/innobase/row/row0undo.c
index f03f84ed1b0..3d739c9689a 100644
--- a/storage/innobase/row/row0undo.c
+++ b/storage/innobase/row/row0undo.c
@@ -1,7 +1,24 @@
-/******************************************************
-Row undo
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0undo.c
+Row undo
Created 1/8/1997 Heikki Tuuri
*******************************************************/
@@ -24,6 +41,7 @@ Created 1/8/1997 Heikki Tuuri
#include "row0row.h"
#include "row0uins.h"
#include "row0umod.h"
+#include "row0upd.h"
#include "row0mysql.h"
#include "srv0srv.h"
@@ -102,16 +120,16 @@ doing the purge. Similarly, during a rollback, a record can be removed
if the stored roll ptr in the undo log points to a trx already (being) purged,
or if the roll ptr is NULL, i.e., it was a fresh insert. */
-/************************************************************************
-Creates a row undo node to a query graph. */
-
+/********************************************************************//**
+Creates a row undo node to a query graph.
+@return own: undo node */
+UNIV_INTERN
undo_node_t*
row_undo_node_create(
/*=================*/
- /* out, own: undo node */
- trx_t* trx, /* in: transaction */
- que_thr_t* parent, /* in: parent node, i.e., a thr node */
- mem_heap_t* heap) /* in: memory heap where created */
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
+ mem_heap_t* heap) /*!< in: memory heap where created */
{
undo_node_t* undo;
@@ -132,19 +150,18 @@ row_undo_node_create(
return(undo);
}
-/***************************************************************
+/***********************************************************//**
Looks for the clustered index record when node has the row reference.
The pcur in node is used in the search. If found, stores the row to node,
and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case. */
-
+by the caller in any case.
+@return TRUE if found; NOTE the node->pcur must be closed by the
+caller, regardless of the return value */
+UNIV_INTERN
ibool
row_undo_search_clust_to_pcur(
/*==========================*/
- /* out: TRUE if found; NOTE the node->pcur
- must be closed by the caller, regardless of
- the return value */
- undo_node_t* node) /* in: row undo node */
+ undo_node_t* node) /*!< in: row undo node */
{
dict_index_t* clust_index;
ibool found;
@@ -154,7 +171,7 @@ row_undo_search_clust_to_pcur(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
mtr_start(&mtr);
@@ -183,7 +200,16 @@ row_undo_search_clust_to_pcur(
ret = FALSE;
} else {
node->row = row_build(ROW_COPY_DATA, clust_index, rec,
- offsets, node->heap);
+ offsets, NULL, &node->ext, node->heap);
+ if (node->update) {
+ node->undo_row = dtuple_copy(node->row, node->heap);
+ row_upd_replace(node->undo_row, &node->undo_ext,
+ clust_index, node->update, node->heap);
+ } else {
+ node->undo_row = NULL;
+ node->undo_ext = NULL;
+ }
+
btr_pcur_store_position(&(node->pcur), &mtr);
ret = TRUE;
@@ -197,23 +223,22 @@ row_undo_search_clust_to_pcur(
return(ret);
}
-/***************************************************************
+/***********************************************************//**
Fetches an undo log record and does the undo for the recorded operation.
If none left, or a partial rollback completed, returns control to the
-parent node, which is always a query thread node. */
+parent node, which is always a query thread node.
+@return DB_SUCCESS if operation successfully completed, else error code */
static
ulint
row_undo(
/*=====*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
+ undo_node_t* node, /*!< in: row undo node */
+ que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
- trx_t* trx;
- dulint roll_ptr;
- ibool locked_data_dict;
+ ulint err;
+ trx_t* trx;
+ roll_ptr_t roll_ptr;
+ ibool locked_data_dict;
ut_ad(node && thr);
@@ -300,15 +325,15 @@ row_undo(
return(err);
}
-/***************************************************************
+/***********************************************************//**
Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs. */
-
+in SQL execution graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_undo_step(
/*==========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err;
undo_node_t* node;
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
index c91cc449b96..58dfd43ead9 100644
--- a/storage/innobase/row/row0upd.c
+++ b/storage/innobase/row/row0upd.c
@@ -1,7 +1,24 @@
-/******************************************************
-Update of a row
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0upd.c
+Update of a row
Created 12/27/1996 Heikki Tuuri
*******************************************************/
@@ -13,13 +30,16 @@ Created 12/27/1996 Heikki Tuuri
#endif
#include "dict0dict.h"
+#include "trx0undo.h"
+#include "rem0rec.h"
+#ifndef UNIV_HOTBACKUP
#include "dict0boot.h"
#include "dict0crea.h"
#include "mach0data.h"
-#include "trx0undo.h"
#include "btr0btr.h"
#include "btr0cur.h"
#include "que0que.h"
+#include "row0ext.h"
#include "row0ins.h"
#include "row0sel.h"
#include "row0row.h"
@@ -72,40 +92,41 @@ the x-latch freed? The most efficient way for performing a
searched delete is obviously to keep the x-latch for several
steps of query graph execution. */
-/***************************************************************
+/***********************************************************//**
Checks if an update vector changes some of the first ordering fields of an
index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes. */
+that index does not contain column prefixes.
+@return TRUE if changes */
static
ibool
row_upd_changes_first_fields_binary(
/*================================*/
- /* out: TRUE if changes */
- dtuple_t* entry, /* in: old value of index entry */
- dict_index_t* index, /* in: index of entry */
- upd_t* update, /* in: update vector for the row */
- ulint n); /* in: how many first fields to check */
+ dtuple_t* entry, /*!< in: old value of index entry */
+ dict_index_t* index, /*!< in: index of entry */
+ const upd_t* update, /*!< in: update vector for the row */
+ ulint n); /*!< in: how many first fields to check */
-/*************************************************************************
+/*********************************************************************//**
Checks if index currently is mentioned as a referenced index in a foreign
-key constraint. */
+key constraint.
+
+NOTE that since we do not hold dict_operation_lock when leaving the
+function, it may be that the referencing table has been dropped when
+we leave this function: this function is only for heuristic use!
+
+@return TRUE if referenced */
static
ibool
row_upd_index_is_referenced(
/*========================*/
- /* out: TRUE if referenced; NOTE that since
- we do not hold dict_operation_lock
- when leaving the function, it may be that
- the referencing table has been dropped when
- we leave this function: this function is only
- for heuristic use! */
- dict_index_t* index, /* in: index */
- trx_t* trx) /* in: transaction */
+ dict_index_t* index, /*!< in: index */
+ trx_t* trx) /*!< in: transaction */
{
dict_table_t* table = index->table;
dict_foreign_t* foreign;
ibool froze_data_dict = FALSE;
+ ibool is_referenced = FALSE;
if (!UT_LIST_GET_FIRST(table->referenced_list)) {
@@ -122,45 +143,48 @@ row_upd_index_is_referenced(
while (foreign) {
if (foreign->referenced_index == index) {
- if (froze_data_dict) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- return(TRUE);
+ is_referenced = TRUE;
+ goto func_exit;
}
foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
}
+func_exit:
if (froze_data_dict) {
row_mysql_unfreeze_data_dictionary(trx);
}
- return(FALSE);
+ return(is_referenced);
}
-/*************************************************************************
+/*********************************************************************//**
Checks if possible foreign key constraints hold after a delete of the record
-under pcur. NOTE that this function will temporarily commit mtr and lose the
-pcur position! */
+under pcur.
+
+NOTE that this function will temporarily commit mtr and lose the
+pcur position!
+
+@return DB_SUCCESS or an error code */
static
ulint
row_upd_check_references_constraints(
/*=================================*/
- /* out: DB_SUCCESS or an error code */
- upd_node_t* node, /* in: row update node */
- btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the
+ upd_node_t* node, /*!< in: row update node */
+ btr_pcur_t* pcur, /*!< in: cursor positioned on a record; NOTE: the
cursor position is lost in this function! */
- dict_table_t* table, /* in: table in question */
- dict_index_t* index, /* in: index of the cursor */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
+ dict_table_t* table, /*!< in: table in question */
+ dict_index_t* index, /*!< in: index of the cursor */
+ ulint* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_foreign_t* foreign;
mem_heap_t* heap;
dtuple_t* entry;
trx_t* trx;
- rec_t* rec;
+ const rec_t* rec;
+ ulint n_ext;
ulint err;
ibool got_s_lock = FALSE;
@@ -172,10 +196,12 @@ row_upd_check_references_constraints(
trx = thr_get_trx(thr);
rec = btr_pcur_get_rec(pcur);
+ ut_ad(rec_offs_validate(rec, index, offsets));
heap = mem_heap_create(500);
- entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+ entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
+ &n_ext, heap);
mtr_commit(mtr);
@@ -236,37 +262,34 @@ row_upd_check_references_constraints(
}
if (err != DB_SUCCESS) {
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(
- trx);
- }
-
- mem_heap_free(heap);
- return(err);
+ goto func_exit;
}
}
foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
}
+ err = DB_SUCCESS;
+
+func_exit:
if (got_s_lock) {
row_mysql_unfreeze_data_dictionary(trx);
}
mem_heap_free(heap);
- return(DB_SUCCESS);
+ return(err);
}
-/*************************************************************************
-Creates an update node for a query graph. */
-
+/*********************************************************************//**
+Creates an update node for a query graph.
+@return own: update node */
+UNIV_INTERN
upd_node_t*
upd_node_create(
/*============*/
- /* out, own: update node */
- mem_heap_t* heap) /* in: mem heap where created */
+ mem_heap_t* heap) /*!< in: mem heap where created */
{
upd_node_t* node;
@@ -274,11 +297,12 @@ upd_node_create(
node->common.type = QUE_NODE_UPDATE;
node->state = UPD_NODE_UPDATE_CLUSTERED;
- node->select_will_do_update = FALSE;
node->in_mysql_interface = FALSE;
node->row = NULL;
- node->ext_vec = NULL;
+ node->ext = NULL;
+ node->upd_row = NULL;
+ node->upd_ext = NULL;
node->index = NULL;
node->update = NULL;
@@ -295,51 +319,61 @@ upd_node_create(
return(node);
}
+#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
+/*********************************************************************//**
Updates the trx id and roll ptr field in a clustered index record in database
recovery. */
-
+UNIV_INTERN
void
row_upd_rec_sys_fields_in_recovery(
/*===============================*/
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint pos, /* in: TRX_ID position in rec */
- dulint trx_id, /* in: transaction id */
- dulint roll_ptr)/* in: roll ptr of the undo log record */
+ rec_t* rec, /*!< in/out: record */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint pos, /*!< in: TRX_ID position in rec */
+ trx_id_t trx_id, /*!< in: transaction id */
+ roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */
{
- byte* field;
- ulint len;
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
- field = rec_get_nth_field(rec, offsets, pos, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- trx_write_trx_id(field, trx_id);
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page_zip_write_trx_id_and_roll_ptr(
+ page_zip, rec, offsets, pos, trx_id, roll_ptr);
+ } else {
+ byte* field;
+ ulint len;
- field = rec_get_nth_field(rec, offsets, pos + 1, &len);
- ut_ad(len == DATA_ROLL_PTR_LEN);
- trx_write_roll_ptr(field, roll_ptr);
+ field = rec_get_nth_field(rec, offsets, pos, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
+#endif
+ trx_write_trx_id(field, trx_id);
+ trx_write_roll_ptr(field + DATA_TRX_ID_LEN, roll_ptr);
+ }
}
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
Sets the trx id or roll ptr field of a clustered index entry. */
-
+UNIV_INTERN
void
row_upd_index_entry_sys_field(
/*==========================*/
- dtuple_t* entry, /* in: index entry, where the memory buffers
+ const dtuple_t* entry, /*!< in: index entry, where the memory buffers
for sys fields are already allocated:
the function just copies the new values to
them */
- dict_index_t* index, /* in: clustered index */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- dulint val) /* in: value to write */
+ dict_index_t* index, /*!< in: clustered index */
+ ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
+ dulint val) /*!< in: value to write */
{
dfield_t* dfield;
byte* field;
ulint pos;
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
pos = dict_index_get_sys_col_pos(index, type);
@@ -354,26 +388,25 @@ row_upd_index_entry_sys_field(
}
}
-/***************************************************************
+/***********************************************************//**
Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update. */
-
+field to be updated is stored externally in rec or update.
+@return TRUE if the update changes the size of some field in index or
+the field is external in rec or update */
+UNIV_INTERN
ibool
row_upd_changes_field_size_or_external(
/*===================================*/
- /* out: TRUE if the update changes the size of
- some field in index or the field is external
- in rec or update */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- upd_t* update) /* in: update vector */
+ dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ const upd_t* update) /*!< in: update vector */
{
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint old_len;
- ulint new_len;
- ulint n_fields;
- ulint i;
+ const upd_field_t* upd_field;
+ const dfield_t* new_val;
+ ulint old_len;
+ ulint new_len;
+ ulint n_fields;
+ ulint i;
ut_ad(rec_offs_validate(NULL, index, offsets));
n_fields = upd_get_n_fields(update);
@@ -382,9 +415,9 @@ row_upd_changes_field_size_or_external(
upd_field = upd_get_nth_field(update, i);
new_val = &(upd_field->new_val);
- new_len = new_val->len;
+ new_len = dfield_get_len(new_val);
- if (new_len == UNIV_SQL_NULL && !rec_offs_comp(offsets)) {
+ if (dfield_is_null(new_val) && !rec_offs_comp(offsets)) {
/* A bug fixed on Dec 31st, 2004: we looked at the
SQL NULL size from the wrong field! We may backport
this fix also to 4.0. The merge to 5.0 will be made
@@ -392,7 +425,8 @@ row_upd_changes_field_size_or_external(
new_len = dict_col_get_sql_null_size(
dict_index_get_nth_col(index,
- upd_field->field_no));
+ upd_field->field_no),
+ 0);
}
old_len = rec_offs_nth_size(offsets, upd_field->field_no);
@@ -410,17 +444,8 @@ row_upd_changes_field_size_or_external(
old_len = UNIV_SQL_NULL;
}
- if (old_len != new_len) {
-
- return(TRUE);
- }
-
- if (rec_offs_nth_extern(offsets, upd_field->field_no)) {
-
- return(TRUE);
- }
-
- if (upd_field->extern_storage) {
+ if (dfield_is_ext(new_val) || old_len != new_len
+ || rec_offs_nth_extern(offsets, upd_field->field_no)) {
return(TRUE);
}
@@ -428,56 +453,70 @@ row_upd_changes_field_size_or_external(
return(FALSE);
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************
+/***********************************************************//**
Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. This function is used only for
-a clustered index */
-
+given. No field size changes are allowed. */
+UNIV_INTERN
void
row_upd_rec_in_place(
/*=================*/
- rec_t* rec, /* in/out: record where replaced */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update) /* in: update vector */
+ rec_t* rec, /*!< in/out: record where replaced */
+ dict_index_t* index, /*!< in: the index the record belongs to */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ const upd_t* update, /*!< in: update vector */
+ page_zip_des_t* page_zip)/*!< in: compressed page with enough space
+ available, or NULL */
{
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint n_fields;
- ulint i;
+ const upd_field_t* upd_field;
+ const dfield_t* new_val;
+ ulint n_fields;
+ ulint i;
- ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(rec_offs_validate(rec, index, offsets));
- rec_set_info_bits(rec, rec_offs_comp(offsets), update->info_bits);
+ if (rec_offs_comp(offsets)) {
+ rec_set_info_bits_new(rec, update->info_bits);
+ } else {
+ rec_set_info_bits_old(rec, update->info_bits);
+ }
n_fields = upd_get_n_fields(update);
for (i = 0; i < n_fields; i++) {
upd_field = upd_get_nth_field(update, i);
new_val = &(upd_field->new_val);
+ ut_ad(!dfield_is_ext(new_val) ==
+ !rec_offs_nth_extern(offsets, upd_field->field_no));
rec_set_nth_field(rec, offsets, upd_field->field_no,
dfield_get_data(new_val),
dfield_get_len(new_val));
}
+
+ if (UNIV_LIKELY_NULL(page_zip)) {
+ page_zip_write_rec(page_zip, rec, index, offsets, 0);
+ }
}
-/*************************************************************************
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record. */
-
+to determine their positions within a clustered index record.
+@return new pointer to mlog */
+UNIV_INTERN
byte*
row_upd_write_sys_vals_to_log(
/*==========================*/
- /* out: new pointer to mlog */
- dict_index_t* index, /* in: clustered index */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr,/* in: roll ptr of the undo log record */
- byte* log_ptr,/* pointer to a buffer of size > 20 opened
+ dict_index_t* index, /*!< in: clustered index */
+ trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
+ byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
in mlog */
- mtr_t* mtr __attribute__((unused))) /* in: mtr */
+ mtr_t* mtr __attribute__((unused))) /*!< in: mtr */
{
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(mtr);
log_ptr += mach_write_compressed(log_ptr,
@@ -491,19 +530,20 @@ row_upd_write_sys_vals_to_log(
return(log_ptr);
}
+#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
-Parses the log data of system field values. */
-
+/*********************************************************************//**
+Parses the log data of system field values.
+@return log data end or NULL */
+UNIV_INTERN
byte*
row_upd_parse_sys_vals(
/*===================*/
- /* out: log data end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint* pos, /* out: TRX_ID position in record */
- dulint* trx_id, /* out: trx id */
- dulint* roll_ptr)/* out: roll ptr */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ ulint* pos, /*!< out: TRX_ID position in record */
+ trx_id_t* trx_id, /*!< out: trx id */
+ roll_ptr_t* roll_ptr)/*!< out: roll ptr */
{
ptr = mach_parse_compressed(ptr, end_ptr, pos);
@@ -525,24 +565,26 @@ row_upd_parse_sys_vals(
return(ptr);
}
-/***************************************************************
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
Writes to the redo log the new values of the fields occurring in the index. */
-
+UNIV_INTERN
void
row_upd_index_write_log(
/*====================*/
- upd_t* update, /* in: update vector */
- byte* log_ptr,/* in: pointer to mlog buffer: must contain at least
- MLOG_BUF_MARGIN bytes of free space; the buffer is
- closed within this function */
- mtr_t* mtr) /* in: mtr into whose log to write */
+ const upd_t* update, /*!< in: update vector */
+ byte* log_ptr,/*!< in: pointer to mlog buffer: must
+ contain at least MLOG_BUF_MARGIN bytes
+ of free space; the buffer is closed
+ within this function */
+ mtr_t* mtr) /*!< in: mtr into whose log to write */
{
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint len;
- ulint n_fields;
- byte* buf_end;
- ulint i;
+ const upd_field_t* upd_field;
+ const dfield_t* new_val;
+ ulint len;
+ ulint n_fields;
+ byte* buf_end;
+ ulint i;
n_fields = upd_get_n_fields(update);
@@ -569,20 +611,22 @@ row_upd_index_write_log(
new_val = &(upd_field->new_val);
- len = new_val->len;
+ len = dfield_get_len(new_val);
log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
log_ptr += mach_write_compressed(log_ptr, len);
if (len != UNIV_SQL_NULL) {
if (log_ptr + len < buf_end) {
- ut_memcpy(log_ptr, new_val->data, len);
+ memcpy(log_ptr, dfield_get_data(new_val), len);
log_ptr += len;
} else {
mlog_close(mtr, log_ptr);
- mlog_catenate_string(mtr, new_val->data, len);
+ mlog_catenate_string(mtr,
+ dfield_get_data(new_val),
+ len);
log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
buf_end = log_ptr + MLOG_BUF_MARGIN;
@@ -592,26 +636,26 @@ row_upd_index_write_log(
mlog_close(mtr, log_ptr);
}
+#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
-Parses the log data written by row_upd_index_write_log. */
-
+/*********************************************************************//**
+Parses the log data written by row_upd_index_write_log.
+@return log data end or NULL */
+UNIV_INTERN
byte*
row_upd_index_parse(
/*================*/
- /* out: log data end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- mem_heap_t* heap, /* in: memory heap where update vector is
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ mem_heap_t* heap, /*!< in: memory heap where update vector is
built */
- upd_t** update_out)/* out: update vector */
+ upd_t** update_out)/*!< out: update vector */
{
upd_t* update;
upd_field_t* upd_field;
dfield_t* new_val;
ulint len;
ulint n_fields;
- byte* buf;
ulint info_bits;
ulint i;
@@ -633,16 +677,19 @@ row_upd_index_parse(
update->info_bits = info_bits;
for (i = 0; i < n_fields; i++) {
+ ulint field_no;
upd_field = upd_get_nth_field(update, i);
new_val = &(upd_field->new_val);
- ptr = mach_parse_compressed(ptr, end_ptr,
- &(upd_field->field_no));
+ ptr = mach_parse_compressed(ptr, end_ptr, &field_no);
+
if (ptr == NULL) {
return(NULL);
}
+ upd_field->field_no = field_no;
+
ptr = mach_parse_compressed(ptr, end_ptr, &len);
if (ptr == NULL) {
@@ -650,21 +697,18 @@ row_upd_index_parse(
return(NULL);
}
- new_val->len = len;
-
if (len != UNIV_SQL_NULL) {
if (end_ptr < ptr + len) {
return(NULL);
- } else {
- buf = mem_heap_alloc(heap, len);
- ut_memcpy(buf, ptr, len);
-
- ptr += len;
-
- new_val->data = buf;
}
+
+ dfield_set_data(new_val,
+ mem_heap_dup(heap, ptr, len), len);
+ ptr += len;
+ } else {
+ dfield_set_null(new_val);
}
}
@@ -673,63 +717,35 @@ row_upd_index_parse(
return(ptr);
}
-/*******************************************************************
-Returns TRUE if ext_vec contains i. */
-static
-ibool
-upd_ext_vec_contains(
-/*=================*/
- /* out: TRUE if i is in ext_vec */
- ulint* ext_vec, /* in: array of indexes or NULL */
- ulint n_ext_vec, /* in: number of numbers in ext_vec */
- ulint i) /* in: a number */
-{
- ulint j;
-
- if (ext_vec == NULL) {
-
- return(FALSE);
- }
-
- for (j = 0; j < n_ext_vec; j++) {
- if (ext_vec[j] == i) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*******************************************************************
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings! */
-
+the fields as binary strings!
+@return own: update vector of differing fields */
+UNIV_INTERN
upd_t*
row_upd_build_sec_rec_difference_binary(
/*====================================*/
- /* out, own: update vector of differing
- fields */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: entry to insert */
- rec_t* rec, /* in: secondary index record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap) /* in: memory heap from which allocated */
+ dict_index_t* index, /*!< in: index */
+ const dtuple_t* entry, /*!< in: entry to insert */
+ const rec_t* rec, /*!< in: secondary index record */
+ trx_t* trx, /*!< in: transaction */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
{
upd_field_t* upd_field;
- dfield_t* dfield;
- byte* data;
+ const dfield_t* dfield;
+ const byte* data;
ulint len;
upd_t* update;
ulint n_diff;
ulint i;
ulint offsets_[REC_OFFS_SMALL_SIZE];
const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
/* This function is used only for a secondary index */
- ut_a(0 == (index->type & DICT_CLUSTERED));
+ ut_a(!dict_index_is_clust(index));
update = upd_create(dtuple_get_n_fields(entry), heap);
@@ -762,8 +778,6 @@ row_upd_build_sec_rec_difference_binary(
upd_field_set_field_no(upd_field, i, index, trx);
- upd_field->extern_storage = FALSE;
-
n_diff++;
}
}
@@ -773,41 +787,37 @@ row_upd_build_sec_rec_difference_binary(
return(update);
}
-/*******************************************************************
+/***************************************************************//**
Builds an update vector from those fields, excluding the roll ptr and
trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings! */
-
+the equal ordering fields. NOTE: we compare the fields as binary strings!
+@return own: update vector of differing fields, excluding roll ptr and
+trx id */
+UNIV_INTERN
upd_t*
row_upd_build_difference_binary(
/*============================*/
- /* out, own: update vector of differing
- fields, excluding roll ptr and trx id */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* entry, /* in: entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- rec_t* rec, /* in: clustered index record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap) /* in: memory heap from which allocated */
+ dict_index_t* index, /*!< in: clustered index */
+ const dtuple_t* entry, /*!< in: entry to insert */
+ const rec_t* rec, /*!< in: clustered index record */
+ trx_t* trx, /*!< in: transaction */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
{
upd_field_t* upd_field;
- dfield_t* dfield;
- byte* data;
+ const dfield_t* dfield;
+ const byte* data;
ulint len;
upd_t* update;
ulint n_diff;
ulint roll_ptr_pos;
ulint trx_id_pos;
- ibool extern_bit;
ulint i;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
/* This function is used only for a clustered index */
- ut_a(index->type & DICT_CLUSTERED);
+ ut_a(dict_index_is_clust(index));
update = upd_create(dtuple_get_n_fields(entry), heap);
@@ -833,10 +843,8 @@ row_upd_build_difference_binary(
goto skip_compare;
}
- extern_bit = upd_ext_vec_contains(ext_vec, n_ext_vec, i);
-
- if (UNIV_UNLIKELY(extern_bit
- == (ibool)!rec_offs_nth_extern(offsets, i))
+ if (UNIV_UNLIKELY(!dfield_is_ext(dfield)
+ != !rec_offs_nth_extern(offsets, i))
|| !dfield_data_is_binary_equal(dfield, len, data)) {
upd_field = upd_get_nth_field(update, n_diff);
@@ -845,8 +853,6 @@ row_upd_build_difference_binary(
upd_field_set_field_no(upd_field, i, index, trx);
- upd_field->extern_storage = extern_bit;
-
n_diff++;
}
skip_compare:
@@ -858,34 +864,156 @@ skip_compare:
return(update);
}
-/***************************************************************
+/***********************************************************//**
+Fetch a prefix of an externally stored column. This is similar
+to row_ext_lookup(), but the row_ext_t holds the old values
+of the column and must not be poisoned with the new values.
+@return BLOB prefix */
+static
+byte*
+row_upd_ext_fetch(
+/*==============*/
+ const byte* data, /*!< in: 'internally' stored part of the
+ field containing also the reference to
+ the external part */
+ ulint local_len, /*!< in: length of data, in bytes */
+ ulint zip_size, /*!< in: nonzero=compressed BLOB
+ page size, zero for uncompressed
+ BLOBs */
+ ulint* len, /*!< in: length of prefix to fetch;
+ out: fetched length of the prefix */
+ mem_heap_t* heap) /*!< in: heap where to allocate */
+{
+ byte* buf = mem_heap_alloc(heap, *len);
+
+ *len = btr_copy_externally_stored_field_prefix(buf, *len,
+ zip_size,
+ data, local_len);
+ /* We should never update records containing a half-deleted BLOB. */
+ ut_a(*len);
+
+ return(buf);
+}
+
+/***********************************************************//**
+Replaces the new column value stored in the update vector in
+the given index entry field. */
+static
+void
+row_upd_index_replace_new_col_val(
+/*==============================*/
+ dfield_t* dfield, /*!< in/out: data field
+ of the index entry */
+ const dict_field_t* field, /*!< in: index field */
+ const dict_col_t* col, /*!< in: field->col */
+ const upd_field_t* uf, /*!< in: update field */
+ mem_heap_t* heap, /*!< in: memory heap for allocating
+ and copying the new value */
+ ulint zip_size)/*!< in: compressed page
+ size of the table, or 0 */
+{
+ ulint len;
+ const byte* data;
+
+ dfield_copy_data(dfield, &uf->new_val);
+
+ if (dfield_is_null(dfield)) {
+ return;
+ }
+
+ len = dfield_get_len(dfield);
+ data = dfield_get_data(dfield);
+
+ if (field->prefix_len > 0) {
+ ibool fetch_ext = dfield_is_ext(dfield)
+ && len < (ulint) field->prefix_len
+ + BTR_EXTERN_FIELD_REF_SIZE;
+
+ if (fetch_ext) {
+ ulint l = len;
+
+ len = field->prefix_len;
+
+ data = row_upd_ext_fetch(data, l, zip_size,
+ &len, heap);
+ }
+
+ len = dtype_get_at_most_n_mbchars(col->prtype,
+ col->mbminlen, col->mbmaxlen,
+ field->prefix_len, len,
+ (const char*) data);
+
+ dfield_set_data(dfield, data, len);
+
+ if (!fetch_ext) {
+ dfield_dup(dfield, heap);
+ }
+
+ return;
+ }
+
+ switch (uf->orig_len) {
+ byte* buf;
+ case BTR_EXTERN_FIELD_REF_SIZE:
+ /* Restore the original locally stored
+ part of the column. In the undo log,
+ InnoDB writes a longer prefix of externally
+ stored columns, so that column prefixes
+ in secondary indexes can be reconstructed. */
+ dfield_set_data(dfield,
+ data + len - BTR_EXTERN_FIELD_REF_SIZE,
+ BTR_EXTERN_FIELD_REF_SIZE);
+ dfield_set_ext(dfield);
+ /* fall through */
+ case 0:
+ dfield_dup(dfield, heap);
+ break;
+ default:
+ /* Reconstruct the original locally
+ stored part of the column. The data
+ will have to be copied. */
+ ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
+ buf = mem_heap_alloc(heap, uf->orig_len);
+ /* Copy the locally stored prefix. */
+ memcpy(buf, data,
+ uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE);
+ /* Copy the BLOB pointer. */
+ memcpy(buf + uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE,
+ data + len - BTR_EXTERN_FIELD_REF_SIZE,
+ BTR_EXTERN_FIELD_REF_SIZE);
+
+ dfield_set_data(dfield, buf, uf->orig_len);
+ dfield_set_ext(dfield);
+ break;
+ }
+}
+
+/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
-
+UNIV_INTERN
void
row_upd_index_replace_new_col_vals_index_pos(
/*=========================================*/
- dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that this may also be a
+ dtuple_t* entry, /*!< in/out: index entry where replaced;
+ the clustered index record must be
+ covered by a lock or a page latch to
+ prevent deletion (rollback or purge) */
+ dict_index_t* index, /*!< in: index; NOTE that this may also be a
non-clustered index */
- upd_t* update, /* in: an update vector built for the index so
+ const upd_t* update, /*!< in: an update vector built for the index so
that the field number in an upd_field is the
index position */
ibool order_only,
- /* in: if TRUE, limit the replacement to
+ /*!< in: if TRUE, limit the replacement to
ordering fields of index; note that this
does not work for non-clustered indexes. */
- mem_heap_t* heap) /* in: memory heap to which we allocate and
- copy the new values, set this as NULL if you
- do not want allocation */
+ mem_heap_t* heap) /*!< in: memory heap for allocating and
+ copying the new values */
{
- dict_field_t* field;
- upd_field_t* upd_field;
- dfield_t* dfield;
- dfield_t* new_val;
- ulint j;
ulint i;
ulint n_fields;
+ const ulint zip_size = dict_table_zip_size(index->table);
ut_ad(index);
@@ -897,144 +1025,168 @@ row_upd_index_replace_new_col_vals_index_pos(
n_fields = dict_index_get_n_fields(index);
}
- for (j = 0; j < n_fields; j++) {
-
- field = dict_index_get_nth_field(index, j);
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- upd_field = upd_get_nth_field(update, i);
-
- if (upd_field->field_no == j) {
-
- dfield = dtuple_get_nth_field(entry, j);
-
- new_val = &(upd_field->new_val);
-
- dfield_set_data(dfield, new_val->data,
- new_val->len);
- if (heap && new_val->len != UNIV_SQL_NULL) {
- dfield->data = mem_heap_alloc(
- heap, new_val->len);
- ut_memcpy(dfield->data, new_val->data,
- new_val->len);
- }
-
- if (field->prefix_len > 0
- && new_val->len != UNIV_SQL_NULL) {
+ for (i = 0; i < n_fields; i++) {
+ const dict_field_t* field;
+ const dict_col_t* col;
+ const upd_field_t* uf;
- const dict_col_t* col
- = dict_field_get_col(field);
+ field = dict_index_get_nth_field(index, i);
+ col = dict_field_get_col(field);
+ uf = upd_get_field_by_field_no(update, i);
- dfield->len
- = dtype_get_at_most_n_mbchars(
- col->prtype,
- col->mbminlen,
- col->mbmaxlen,
- field->prefix_len,
- new_val->len,
- new_val->data);
- }
- }
+ if (uf) {
+ row_upd_index_replace_new_col_val(
+ dtuple_get_nth_field(entry, i),
+ field, col, uf, heap, zip_size);
}
}
}
-/***************************************************************
+/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
-
+UNIV_INTERN
void
row_upd_index_replace_new_col_vals(
/*===============================*/
- dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that this may also be a
+ dtuple_t* entry, /*!< in/out: index entry where replaced;
+ the clustered index record must be
+ covered by a lock or a page latch to
+ prevent deletion (rollback or purge) */
+ dict_index_t* index, /*!< in: index; NOTE that this may also be a
non-clustered index */
- upd_t* update, /* in: an update vector built for the
+ const upd_t* update, /*!< in: an update vector built for the
CLUSTERED index so that the field number in
an upd_field is the clustered index position */
- mem_heap_t* heap) /* in: memory heap to which we allocate and
- copy the new values, set this as NULL if you
- do not want allocation */
+ mem_heap_t* heap) /*!< in: memory heap for allocating and
+ copying the new values */
{
- upd_field_t* upd_field;
- dfield_t* dfield;
- dfield_t* new_val;
- ulint j;
- ulint i;
- dict_index_t* clust_index;
+ ulint i;
+ const dict_index_t* clust_index
+ = dict_table_get_first_index(index->table);
+ const ulint zip_size
+ = dict_table_zip_size(index->table);
- ut_ad(index);
+ dtuple_set_info_bits(entry, update->info_bits);
- clust_index = dict_table_get_first_index(index->table);
+ for (i = 0; i < dict_index_get_n_fields(index); i++) {
+ const dict_field_t* field;
+ const dict_col_t* col;
+ const upd_field_t* uf;
- dtuple_set_info_bits(entry, update->info_bits);
+ field = dict_index_get_nth_field(index, i);
+ col = dict_field_get_col(field);
+ uf = upd_get_field_by_field_no(
+ update, dict_col_get_clust_pos(col, clust_index));
- for (j = 0; j < dict_index_get_n_fields(index); j++) {
+ if (uf) {
+ row_upd_index_replace_new_col_val(
+ dtuple_get_nth_field(entry, i),
+ field, col, uf, heap, zip_size);
+ }
+ }
+}
- ulint clust_pos;
- dict_field_t* field = dict_index_get_nth_field(index, j);
+/***********************************************************//**
+Replaces the new column values stored in the update vector. */
+UNIV_INTERN
+void
+row_upd_replace(
+/*============*/
+ dtuple_t* row, /*!< in/out: row where replaced,
+ indexed by col_no;
+ the clustered index record must be
+ covered by a lock or a page latch to
+ prevent deletion (rollback or purge) */
+ row_ext_t** ext, /*!< out, own: NULL, or externally
+ stored column prefixes */
+ const dict_index_t* index, /*!< in: clustered index */
+ const upd_t* update, /*!< in: an update vector built for the
+ clustered index */
+ mem_heap_t* heap) /*!< in: memory heap */
+{
+ ulint col_no;
+ ulint i;
+ ulint n_cols;
+ ulint n_ext_cols;
+ ulint* ext_cols;
+ const dict_table_t* table;
+
+ ut_ad(row);
+ ut_ad(ext);
+ ut_ad(index);
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(update);
+ ut_ad(heap);
- clust_pos = dict_col_get_clust_pos(field->col, clust_index);
+ n_cols = dtuple_get_n_fields(row);
+ table = index->table;
+ ut_ad(n_cols == dict_table_get_n_cols(table));
- for (i = 0; i < upd_get_n_fields(update); i++) {
+ ext_cols = mem_heap_alloc(heap, n_cols * sizeof *ext_cols);
+ n_ext_cols = 0;
- upd_field = upd_get_nth_field(update, i);
+ dtuple_set_info_bits(row, update->info_bits);
- if (upd_field->field_no == clust_pos) {
+ for (col_no = 0; col_no < n_cols; col_no++) {
- dfield = dtuple_get_nth_field(entry, j);
+ const dict_col_t* col
+ = dict_table_get_nth_col(table, col_no);
+ const ulint clust_pos
+ = dict_col_get_clust_pos(col, index);
+ dfield_t* dfield;
- new_val = &(upd_field->new_val);
+ if (UNIV_UNLIKELY(clust_pos == ULINT_UNDEFINED)) {
- dfield_set_data(dfield, new_val->data,
- new_val->len);
- if (heap && new_val->len != UNIV_SQL_NULL) {
- dfield->data = mem_heap_alloc(
- heap, new_val->len);
- ut_memcpy(dfield->data, new_val->data,
- new_val->len);
- }
+ continue;
+ }
- if (field->prefix_len > 0
- && new_val->len != UNIV_SQL_NULL) {
+ dfield = dtuple_get_nth_field(row, col_no);
- const dict_col_t* col
- = dict_field_get_col(field);
+ for (i = 0; i < upd_get_n_fields(update); i++) {
- dfield->len
- = dtype_get_at_most_n_mbchars(
- col->prtype,
- col->mbminlen,
- col->mbmaxlen,
- field->prefix_len,
- new_val->len,
- new_val->data);
- }
+ const upd_field_t* upd_field
+ = upd_get_nth_field(update, i);
+
+ if (upd_field->field_no != clust_pos) {
+
+ continue;
}
+
+ dfield_copy_data(dfield, &upd_field->new_val);
+ break;
+ }
+
+ if (dfield_is_ext(dfield) && col->ord_part) {
+ ext_cols[n_ext_cols++] = col_no;
}
}
+
+ if (n_ext_cols) {
+ *ext = row_ext_create(n_ext_cols, ext_cols, row,
+ dict_table_zip_size(table), heap);
+ } else {
+ *ext = NULL;
+ }
}
-/***************************************************************
+/***********************************************************//**
Checks if an update vector changes an ordering field of an index record.
+
This function is fast if the update vector is short or the number of ordering
fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector changes an ordering field in the index record */
+UNIV_INTERN
ibool
row_upd_changes_ord_field_binary(
/*=============================*/
- /* out: TRUE if update vector changes
- an ordering field in the index record;
- NOTE: the fields are compared as binary
- strings */
- dtuple_t* row, /* in: old value of row, or NULL if the
+ const dtuple_t* row, /*!< in: old value of row, or NULL if the
row and the data values in update are not
known when this function is called, e.g., at
compile time */
- dict_index_t* index, /* in: index of the record */
- upd_t* update) /* in: update vector for the row; NOTE: the
+ dict_index_t* index, /*!< in: index of the record */
+ const upd_t* update) /*!< in: update vector for the row; NOTE: the
field numbers in this MUST be clustered index
positions! */
{
@@ -1064,7 +1216,7 @@ row_upd_changes_ord_field_binary(
for (j = 0; j < n_upd_fields; j++) {
- upd_field_t* upd_field
+ const upd_field_t* upd_field
= upd_get_nth_field(update, j);
/* Note that if the index field is a column prefix
@@ -1087,17 +1239,17 @@ row_upd_changes_ord_field_binary(
return(FALSE);
}
-/***************************************************************
+/***********************************************************//**
Checks if an update vector changes an ordering field of an index record.
-NOTE: we compare the fields as binary strings! */
-
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector may change an ordering field in an index
+record */
+UNIV_INTERN
ibool
row_upd_changes_some_index_ord_field_binary(
/*========================================*/
- /* out: TRUE if update vector may change
- an ordering field in an index record */
- dict_table_t* table, /* in: table */
- upd_t* update) /* in: update vector for the row */
+ const dict_table_t* table, /*!< in: table */
+ const upd_t* update) /*!< in: update vector for the row */
{
upd_field_t* upd_field;
dict_index_t* index;
@@ -1120,19 +1272,19 @@ row_upd_changes_some_index_ord_field_binary(
return(FALSE);
}
-/***************************************************************
+/***********************************************************//**
Checks if an update vector changes some of the first ordering fields of an
index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes. */
+that index does not contain column prefixes.
+@return TRUE if changes */
static
ibool
row_upd_changes_first_fields_binary(
/*================================*/
- /* out: TRUE if changes */
- dtuple_t* entry, /* in: index entry */
- dict_index_t* index, /* in: index of entry */
- upd_t* update, /* in: update vector for the row */
- ulint n) /* in: how many first fields to check */
+ dtuple_t* entry, /*!< in: index entry */
+ dict_index_t* index, /*!< in: index of entry */
+ const upd_t* update, /*!< in: update vector for the row */
+ ulint n) /*!< in: how many first fields to check */
{
ulint n_upd_fields;
ulint i, j;
@@ -1174,15 +1326,15 @@ row_upd_changes_first_fields_binary(
return(FALSE);
}
-/*************************************************************************
+/*********************************************************************//**
Copies the column values from a record. */
UNIV_INLINE
void
row_upd_copy_columns(
/*=================*/
- rec_t* rec, /* in: record in a clustered index */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- sym_node_t* column) /* in: first column in a column list, or
+ rec_t* rec, /*!< in: record in a clustered index */
+ const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ sym_node_t* column) /*!< in: first column in a column list, or
NULL */
{
byte* data;
@@ -1192,20 +1344,23 @@ row_upd_copy_columns(
data = rec_get_nth_field(rec, offsets,
column->field_nos[SYM_CLUST_FIELD_NO],
&len);
+ if (len == UNIV_SQL_NULL) {
+ len = UNIV_SQL_NULL;
+ }
eval_node_copy_and_alloc_val(column, data, len);
column = UT_LIST_GET_NEXT(col_var_list, column);
}
}
-/*************************************************************************
+/*********************************************************************//**
Calculates the new values for fields to update. Note that row_upd_copy_columns
must have been called first. */
UNIV_INLINE
void
row_upd_eval_new_vals(
/*==================*/
- upd_t* update) /* in: update vector */
+ upd_t* update) /*!< in/out: update vector */
{
que_node_t* exp;
upd_field_t* upd_field;
@@ -1225,27 +1380,25 @@ row_upd_eval_new_vals(
}
}
-/***************************************************************
+/***********************************************************//**
Stores to the heap the row on which the node->pcur is positioned. */
static
void
row_upd_store_row(
/*==============*/
- upd_node_t* node) /* in: row update node */
+ upd_node_t* node) /*!< in: row update node */
{
dict_index_t* clust_index;
- upd_t* update;
rec_t* rec;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);
if (node->row != NULL) {
mem_heap_empty(node->heap);
- node->row = NULL;
}
clust_index = dict_table_get_first_index(node->table);
@@ -1255,32 +1408,31 @@ row_upd_store_row(
offsets = rec_get_offsets(rec, clust_index, offsets_,
ULINT_UNDEFINED, &heap);
node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
- node->heap);
- node->ext_vec = mem_heap_alloc(node->heap, sizeof(ulint)
- * rec_offs_n_fields(offsets));
+ NULL, &node->ext, node->heap);
if (node->is_delete) {
- update = NULL;
+ node->upd_row = NULL;
+ node->upd_ext = NULL;
} else {
- update = node->update;
+ node->upd_row = dtuple_copy(node->row, node->heap);
+ row_upd_replace(node->upd_row, &node->upd_ext,
+ clust_index, node->update, node->heap);
}
- node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec,
- offsets, update);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
}
-/***************************************************************
-Updates a secondary index entry of a row. */
+/***********************************************************//**
+Updates a secondary index entry of a row.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
static
ulint
row_upd_sec_index_entry(
/*====================*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- que_thr_t* thr) /* in: query thread */
+ upd_node_t* node, /*!< in: row update node */
+ que_thr_t* thr) /*!< in: query thread */
{
ibool check_ref;
ibool found;
@@ -1301,7 +1453,8 @@ row_upd_sec_index_entry(
heap = mem_heap_create(1024);
/* Build old index entry */
- entry = row_build_index_entry(node->row, index, heap);
+ entry = row_build_index_entry(node->row, node->ext, index, heap);
+ ut_a(entry);
log_free_check();
mtr_start(&mtr);
@@ -1340,87 +1493,82 @@ row_upd_sec_index_entry(
thr, &mtr);
if (err == DB_SUCCESS && check_ref) {
+ ulint* offsets = rec_get_offsets(
+ rec, index, NULL,
+ ULINT_UNDEFINED, &heap);
/* NOTE that the following call loses
the position of pcur ! */
err = row_upd_check_references_constraints(
node, &pcur, index->table,
- index, thr, &mtr);
- if (err != DB_SUCCESS) {
-
- goto close_cur;
- }
+ index, offsets, thr, &mtr);
}
-
}
}
-close_cur:
+
btr_pcur_close(&pcur);
mtr_commit(&mtr);
if (node->is_delete || err != DB_SUCCESS) {
- mem_heap_free(heap);
-
- return(err);
+ goto func_exit;
}
/* Build a new index entry */
- row_upd_index_replace_new_col_vals(entry, index, node->update, NULL);
+ entry = row_build_index_entry(node->upd_row, node->upd_ext,
+ index, heap);
+ ut_a(entry);
/* Insert new index entry */
- err = row_ins_index_entry(index, entry, NULL, 0, thr);
+ err = row_ins_index_entry(index, entry, 0, TRUE, thr);
+func_exit:
mem_heap_free(heap);
return(err);
}
-/***************************************************************
+/***********************************************************//**
Updates the secondary index record if it is changed in the row update or
-deletes it if this is a delete. */
+deletes it if this is a delete.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
UNIV_INLINE
ulint
row_upd_sec_step(
/*=============*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- que_thr_t* thr) /* in: query thread */
+ upd_node_t* node, /*!< in: row update node */
+ que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
-
ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC)
|| (node->state == UPD_NODE_UPDATE_SOME_SEC));
- ut_ad(!(node->index->type & DICT_CLUSTERED));
+ ut_ad(!dict_index_is_clust(node->index));
if (node->state == UPD_NODE_UPDATE_ALL_SEC
|| row_upd_changes_ord_field_binary(node->row, node->index,
node->update)) {
- err = row_upd_sec_index_entry(node, thr);
-
- return(err);
+ return(row_upd_sec_index_entry(node, thr));
}
return(DB_SUCCESS);
}
-/***************************************************************
+/***********************************************************//**
Marks the clustered index record deleted and inserts the updated version
of the record to the index. This function should be used when the ordering
fields of the clustered index record change. This should be quite rare in
-database applications. */
+database applications.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
static
ulint
row_upd_clust_rec_by_insert(
/*========================*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- dict_index_t* index, /* in: clustered index of the record */
- que_thr_t* thr, /* in: query thread */
- ibool check_ref,/* in: TRUE if index may be referenced in
+ upd_node_t* node, /*!< in: row update node */
+ dict_index_t* index, /*!< in: clustered index of the record */
+ que_thr_t* thr, /*!< in: query thread */
+ ibool check_ref,/*!< in: TRUE if index may be referenced in
a foreign key constraint */
- mtr_t* mtr) /* in: mtr; gets committed here */
+ mtr_t* mtr) /*!< in: mtr; gets committed here */
{
mem_heap_t* heap = NULL;
btr_pcur_t* pcur;
@@ -1431,7 +1579,7 @@ row_upd_clust_rec_by_insert(
ulint err;
ut_ad(node);
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
trx = thr_get_trx(thr);
table = node->table;
@@ -1439,8 +1587,11 @@ row_upd_clust_rec_by_insert(
btr_cur = btr_pcur_get_btr_cur(pcur);
if (node->state != UPD_NODE_INSERT_CLUSTERED) {
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_t* rec;
+ dict_index_t* index;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets;
+ rec_offs_init(offsets_);
err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
btr_cur, TRUE, thr, mtr);
@@ -1454,17 +1605,18 @@ row_upd_clust_rec_by_insert(
free those externally stored fields even if the delete marked
record is removed from the index tree, or updated. */
+ rec = btr_cur_get_rec(btr_cur);
+ index = dict_table_get_first_index(table);
+ offsets = rec_get_offsets(rec, index, offsets_,
+ ULINT_UNDEFINED, &heap);
btr_cur_mark_extern_inherited_fields(
- btr_cur_get_rec(btr_cur),
- rec_get_offsets(btr_cur_get_rec(btr_cur),
- dict_table_get_first_index(table),
- offsets_, ULINT_UNDEFINED, &heap),
- node->update, mtr);
+ btr_cur_get_page_zip(btr_cur),
+ rec, index, offsets, node->update, mtr);
if (check_ref) {
/* NOTE that the following call loses
the position of pcur ! */
err = row_upd_check_references_constraints(
- node, pcur, table, index, thr, mtr);
+ node, pcur, table, index, offsets, thr, mtr);
if (err != DB_SUCCESS) {
mtr_commit(mtr);
if (UNIV_LIKELY_NULL(heap)) {
@@ -1473,7 +1625,6 @@ row_upd_clust_rec_by_insert(
return(err);
}
}
-
}
mtr_commit(mtr);
@@ -1483,53 +1634,55 @@ row_upd_clust_rec_by_insert(
}
node->state = UPD_NODE_INSERT_CLUSTERED;
- entry = row_build_index_entry(node->row, index, heap);
-
- row_upd_index_replace_new_col_vals(entry, index, node->update, NULL);
+ entry = row_build_index_entry(node->upd_row, node->upd_ext,
+ index, heap);
+ ut_a(entry);
row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
- /* If we return from a lock wait, for example, we may have
- extern fields marked as not-owned in entry (marked in the
- if-branch above). We must unmark them. */
+ if (node->upd_ext) {
+ /* If we return from a lock wait, for example, we may have
+ extern fields marked as not-owned in entry (marked in the
+ if-branch above). We must unmark them. */
- btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec,
- node->n_ext_vec);
- /* We must mark non-updated extern fields in entry as inherited,
- so that a possible rollback will not free them */
+ btr_cur_unmark_dtuple_extern_fields(entry);
- btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec,
- node->n_ext_vec,
- node->update);
+ /* We must mark non-updated extern fields in entry as
+ inherited, so that a possible rollback will not free them. */
+
+ btr_cur_mark_dtuple_inherited_extern(entry, node->update);
+ }
- err = row_ins_index_entry(index, entry, node->ext_vec,
- node->n_ext_vec, thr);
+ err = row_ins_index_entry(index, entry,
+ node->upd_ext ? node->upd_ext->n_ext : 0,
+ TRUE, thr);
mem_heap_free(heap);
return(err);
}
-/***************************************************************
+/***********************************************************//**
Updates a clustered index record of a row when the ordering fields do
-not change. */
+not change.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
static
ulint
row_upd_clust_rec(
/*==============*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- dict_index_t* index, /* in: clustered index */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr; gets committed here */
+ upd_node_t* node, /*!< in: row update node */
+ dict_index_t* index, /*!< in: clustered index */
+ que_thr_t* thr, /*!< in: query thread */
+ mtr_t* mtr) /*!< in: mtr; gets committed here */
{
+ mem_heap_t* heap = NULL;
big_rec_t* big_rec = NULL;
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ulint err;
ut_ad(node);
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
pcur = node->pcur;
btr_cur = btr_pcur_get_btr_cur(pcur);
@@ -1553,9 +1706,9 @@ row_upd_clust_rec(
mtr_commit(mtr);
- if (err == DB_SUCCESS) {
+ if (UNIV_LIKELY(err == DB_SUCCESS)) {
- return(err);
+ return(DB_SUCCESS);
}
if (buf_LRU_buf_pool_running_out()) {
@@ -1579,31 +1732,31 @@ row_upd_clust_rec(
dict_table_is_comp(index->table)));
err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
- &big_rec, node->update,
+ &heap, &big_rec, node->update,
node->cmpl_info, thr, mtr);
mtr_commit(mtr);
if (err == DB_SUCCESS && big_rec) {
- mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_t* rec;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
mtr_start(mtr);
ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields(
- index, rec,
+ index, btr_cur_get_block(btr_cur), rec,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
- big_rec, mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
+ big_rec, mtr);
mtr_commit(mtr);
}
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
if (big_rec) {
dtuple_big_rec_free(big_rec);
}
@@ -1611,27 +1764,28 @@ row_upd_clust_rec(
return(err);
}
-/***************************************************************
-Delete marks a clustered index record. */
+/***********************************************************//**
+Delete marks a clustered index record.
+@return DB_SUCCESS if operation successfully completed, else error code */
static
ulint
row_upd_del_mark_clust_rec(
/*=======================*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code */
- upd_node_t* node, /* in: row update node */
- dict_index_t* index, /* in: clustered index */
- que_thr_t* thr, /* in: query thread */
- ibool check_ref,/* in: TRUE if index may be referenced in
+ upd_node_t* node, /*!< in: row update node */
+ dict_index_t* index, /*!< in: clustered index */
+ ulint* offsets,/*!< in/out: rec_get_offsets() for the
+ record under the cursor */
+ que_thr_t* thr, /*!< in: query thread */
+ ibool check_ref,/*!< in: TRUE if index may be referenced in
a foreign key constraint */
- mtr_t* mtr) /* in: mtr; gets committed here */
+ mtr_t* mtr) /*!< in: mtr; gets committed here */
{
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ulint err;
ut_ad(node);
- ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(dict_index_is_clust(index));
ut_ad(node->is_delete);
pcur = node->pcur;
@@ -1652,12 +1806,8 @@ row_upd_del_mark_clust_rec(
err = row_upd_check_references_constraints(node,
pcur, index->table,
- index, thr, mtr);
- if (err != DB_SUCCESS) {
- mtr_commit(mtr);
-
- return(err);
- }
+ index, offsets,
+ thr, mtr);
}
mtr_commit(mtr);
@@ -1665,17 +1815,16 @@ row_upd_del_mark_clust_rec(
return(err);
}
-/***************************************************************
-Updates the clustered index record. */
+/***********************************************************//**
+Updates the clustered index record.
+@return DB_SUCCESS if operation successfully completed, DB_LOCK_WAIT
+in case of a lock wait, else error code */
static
ulint
row_upd_clust_step(
/*===============*/
- /* out: DB_SUCCESS if operation successfully
- completed, DB_LOCK_WAIT in case of a lock wait,
- else error code */
- upd_node_t* node, /* in: row update node */
- que_thr_t* thr) /* in: query thread */
+ upd_node_t* node, /*!< in: row update node */
+ que_thr_t* thr) /*!< in: query thread */
{
dict_index_t* index;
btr_pcur_t* pcur;
@@ -1687,8 +1836,8 @@ row_upd_clust_step(
rec_t* rec;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ ulint* offsets;
+ rec_offs_init(offsets_);
index = dict_table_get_first_index(node->table);
@@ -1751,7 +1900,8 @@ row_upd_clust_step(
if (!node->has_clust_rec_x_lock) {
err = lock_clust_rec_modify_check_and_lock(
- 0, rec, index, offsets, thr);
+ 0, btr_pcur_get_block(pcur),
+ rec, index, offsets, thr);
if (err != DB_SUCCESS) {
mtr_commit(mtr);
goto exit_func;
@@ -1761,8 +1911,8 @@ row_upd_clust_step(
/* NOTE: the following function calls will also commit mtr */
if (node->is_delete) {
- err = row_upd_del_mark_clust_rec(node, index, thr, check_ref,
- mtr);
+ err = row_upd_del_mark_clust_rec(node, index, offsets,
+ thr, check_ref, mtr);
if (err == DB_SUCCESS) {
node->state = UPD_NODE_UPDATE_ALL_SEC;
node->index = dict_table_get_next_index(index);
@@ -1777,7 +1927,7 @@ exit_func:
/* If the update is made for MySQL, we already have the update vector
ready, else we have to do some evaluation: */
- if (!node->in_mysql_interface) {
+ if (UNIV_UNLIKELY(!node->in_mysql_interface)) {
/* Copy the necessary columns from clust_rec and calculate the
new values to set */
row_upd_copy_columns(rec, offsets,
@@ -1834,18 +1984,18 @@ exit_func:
return(err);
}
-/***************************************************************
+/***********************************************************//**
Updates the affected index records of a row. When the control is transferred
to this node, we assume that we have a persistent cursor which was on a
-record, and the position of the cursor is stored in the cursor. */
+record, and the position of the cursor is stored in the cursor.
+@return DB_SUCCESS if operation successfully completed, else error
+code or DB_LOCK_WAIT */
static
ulint
row_upd(
/*====*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- que_thr_t* thr) /* in: query thread */
+ upd_node_t* node, /*!< in: row update node */
+ que_thr_t* thr) /*!< in: query thread */
{
ulint err = DB_SUCCESS;
@@ -1898,7 +2048,9 @@ function_exit:
if (node->row != NULL) {
node->row = NULL;
- node->n_ext_vec = 0;
+ node->ext = NULL;
+ node->upd_row = NULL;
+ node->upd_ext = NULL;
mem_heap_empty(node->heap);
}
@@ -1908,15 +2060,15 @@ function_exit:
return(err);
}
-/***************************************************************
+/***********************************************************//**
Updates a row in a table. This is a high-level function used in SQL execution
-graphs. */
-
+graphs.
+@return query thread to run next or NULL */
+UNIV_INTERN
que_thr_t*
row_upd_step(
/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
upd_node_t* node;
sel_node_t* sel_node;
@@ -2022,60 +2174,4 @@ error_handling:
return(thr);
}
-
-/*************************************************************************
-Performs an in-place update for the current clustered index record in
-select. */
-
-void
-row_upd_in_place_in_select(
-/*=======================*/
- sel_node_t* sel_node, /* in: select node */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- upd_node_t* node;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(sel_node->select_will_do_update);
- ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF);
- ut_ad(sel_node->asc);
-
- node = que_node_get_parent(sel_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
-
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- /* Copy the necessary columns from clust_rec and calculate the new
- values to set */
-
- row_upd_copy_columns(btr_pcur_get_rec(pcur),
- rec_get_offsets(btr_pcur_get_rec(pcur),
- btr_cur->index, offsets_,
- ULINT_UNDEFINED, &heap),
- UT_LIST_GET_FIRST(node->columns));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- row_upd_eval_new_vals(node->update);
-
- ut_ad(!rec_get_deleted_flag(
- btr_pcur_get_rec(pcur),
- dict_table_is_comp(btr_cur->index->table)));
-
- ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE);
- ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
- ut_ad(node->select_will_do_update);
-
- err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
- node->update, node->cmpl_info,
- thr, mtr);
- ut_ad(err == DB_SUCCESS);
-}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/row/row0vers.c b/storage/innobase/row/row0vers.c
index 03d9a2f1203..a4fbb5289aa 100644
--- a/storage/innobase/row/row0vers.c
+++ b/storage/innobase/row/row0vers.c
@@ -1,7 +1,24 @@
-/******************************************************
-Row versions
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1997 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0vers.c
+Row versions
Created 2/6/1997 Heikki Tuuri
*******************************************************/
@@ -29,35 +46,30 @@ Created 2/6/1997 Heikki Tuuri
#include "read0read.h"
#include "lock0lock.h"
-/*********************************************************************
+/*****************************************************************//**
Finds out if an active transaction has inserted or modified a secondary
index record. NOTE: the kernel mutex is temporarily released in this
-function! */
-
+function!
+@return NULL if committed, else the active transaction */
+UNIV_INTERN
trx_t*
row_vers_impl_x_locked_off_kernel(
/*==============================*/
- /* out: NULL if committed, else the active
- transaction; NOTE that the kernel mutex is
- temporarily released! */
- rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index, /* in: the secondary index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record in a secondary index */
+ dict_index_t* index, /*!< in: the secondary index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
dict_index_t* clust_index;
rec_t* clust_rec;
ulint* clust_offsets;
rec_t* version;
- rec_t* prev_version;
- dulint trx_id;
- dulint prev_trx_id;
+ trx_id_t trx_id;
mem_heap_t* heap;
mem_heap_t* heap2;
dtuple_t* row;
dtuple_t* entry = NULL; /* assignment to eliminate compiler
warning */
trx_t* trx;
- ulint vers_del;
ulint rec_del;
ulint err;
mtr_t mtr;
@@ -141,6 +153,11 @@ row_vers_impl_x_locked_off_kernel(
version = clust_rec;
for (;;) {
+ rec_t* prev_version;
+ ulint vers_del;
+ row_ext_t* ext;
+ trx_id_t prev_trx_id;
+
mutex_exit(&kernel_mutex);
/* While we retrieve an earlier version of clust_rec, we
@@ -157,34 +174,63 @@ row_vers_impl_x_locked_off_kernel(
heap, &prev_version);
mem_heap_free(heap2); /* free version and clust_offsets */
- if (prev_version) {
- clust_offsets = rec_get_offsets(
- prev_version, clust_index, NULL,
- ULINT_UNDEFINED, &heap);
- row = row_build(ROW_COPY_POINTERS, clust_index,
- prev_version, clust_offsets, heap);
- entry = row_build_index_entry(row, index, heap);
- }
+ if (prev_version == NULL) {
+ mutex_enter(&kernel_mutex);
- mutex_enter(&kernel_mutex);
+ if (!trx_is_active(trx_id)) {
+ /* Transaction no longer active: no
+ implicit x-lock */
- if (!trx_is_active(trx_id)) {
- /* Transaction no longer active: no implicit x-lock */
+ break;
+ }
+
+ /* If the transaction is still active,
+ clust_rec must be a fresh insert, because no
+ previous version was found. */
+ ut_ad(err == DB_SUCCESS);
+
+ /* It was a freshly inserted version: there is an
+ implicit x-lock on rec */
+
+ trx = trx_get_on_id(trx_id);
break;
}
- /* If the transaction is still active, the previous version
- of clust_rec must be accessible if not a fresh insert; we
- may assert the following: */
+ clust_offsets = rec_get_offsets(prev_version, clust_index,
+ NULL, ULINT_UNDEFINED, &heap);
- ut_ad(err == DB_SUCCESS);
+ vers_del = rec_get_deleted_flag(prev_version, comp);
+ prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
+ clust_offsets);
- if (prev_version == NULL) {
- /* It was a freshly inserted version: there is an
- implicit x-lock on rec */
+ /* If the trx_id and prev_trx_id are different and if
+ the prev_version is marked deleted then the
+ prev_trx_id must have already committed for the trx_id
+ to be able to modify the row. Therefore, prev_trx_id
+ cannot hold any implicit lock. */
+ if (vers_del && 0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
- trx = trx_get_on_id(trx_id);
+ mutex_enter(&kernel_mutex);
+ break;
+ }
+
+ /* The stack of versions is locked by mtr. Thus, it
+ is safe to fetch the prefixes for externally stored
+ columns. */
+ row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
+ clust_offsets, NULL, &ext, heap);
+ entry = row_build_index_entry(row, ext, index, heap);
+ /* entry may be NULL if a record was inserted in place
+ of a deleted record, and the BLOB pointers of the new
+ record were not initialized yet. But in that case,
+ prev_version should be NULL. */
+ ut_a(entry);
+
+ mutex_enter(&kernel_mutex);
+
+ if (!trx_is_active(trx_id)) {
+ /* Transaction no longer active: no implicit x-lock */
break;
}
@@ -194,7 +240,10 @@ row_vers_impl_x_locked_off_kernel(
if prev_version would require rec to be in a different
state. */
- vers_del = rec_get_deleted_flag(prev_version, comp);
+ /* The previous version of clust_rec must be
+ accessible, because the transaction is still active
+ and clust_rec was not a fresh insert. */
+ ut_ad(err == DB_SUCCESS);
/* We check if entry and rec are identified in the alphabetical
ordering */
@@ -231,9 +280,6 @@ row_vers_impl_x_locked_off_kernel(
break;
}
- prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
- clust_offsets);
-
if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
/* The versions modified by the trx_id transaction end
to prev_version: no implicit x-lock */
@@ -251,17 +297,18 @@ exit_func:
return(trx);
}
-/*********************************************************************
+/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view. */
-
+index record, because it is >= the purge view.
+@return TRUE if earlier version should be preserved */
+UNIV_INTERN
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
- /* out: TRUE if earlier version should be preserved */
- dulint trx_id, /* in: transaction id in the version */
- mtr_t* mtr) /* in: mtr holding the latch on the clustered index
- record; it will also hold the latch on purge_view */
+ trx_id_t trx_id, /*!< in: transaction id in the version */
+ mtr_t* mtr) /*!< in: mtr holding the latch on the
+ clustered index record; it will also
+ hold the latch on purge_view */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
@@ -280,41 +327,40 @@ row_vers_must_preserve_del_marked(
return(FALSE);
}
-/*********************************************************************
+/*****************************************************************//**
Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry and ientry are identified in
-the alphabetical ordering; exactly in this case we return TRUE. */
-
+the alphabetical ordering; exactly in this case we return TRUE.
+@return TRUE if earlier version should have */
+UNIV_INTERN
ibool
row_vers_old_has_index_entry(
/*=========================*/
- /* out: TRUE if earlier version should have */
- ibool also_curr,/* in: TRUE if also rec is included in the
+ ibool also_curr,/*!< in: TRUE if also rec is included in the
versions to search; otherwise only versions
prior to it are searched */
- rec_t* rec, /* in: record in the clustered index; the
+ const rec_t* rec, /*!< in: record in the clustered index; the
caller must have a latch on the page */
- mtr_t* mtr, /* in: mtr holding the latch on rec; it will
+ mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
also hold the latch on purge_view */
- dict_index_t* index, /* in: the secondary index */
- dtuple_t* ientry) /* in: the secondary index entry */
+ dict_index_t* index, /*!< in: the secondary index */
+ const dtuple_t* ientry) /*!< in: the secondary index entry */
{
- rec_t* version;
+ const rec_t* version;
rec_t* prev_version;
dict_index_t* clust_index;
ulint* clust_offsets;
mem_heap_t* heap;
mem_heap_t* heap2;
- dtuple_t* row;
- dtuple_t* entry;
+ const dtuple_t* row;
+ const dtuple_t* entry;
ulint err;
ulint comp;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_S_FIX));
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
@@ -329,17 +375,37 @@ row_vers_old_has_index_entry(
ULINT_UNDEFINED, &heap);
if (also_curr && !rec_get_deleted_flag(rec, comp)) {
+ row_ext_t* ext;
+
+ /* The stack of versions is locked by mtr.
+ Thus, it is safe to fetch the prefixes for
+ externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
- rec, clust_offsets, heap);
- entry = row_build_index_entry(row, index, heap);
+ rec, clust_offsets, NULL, &ext, heap);
+ entry = row_build_index_entry(row, ext, index, heap);
+
+ /* If entry == NULL, the record contains unset BLOB
+ pointers. This must be a freshly inserted record. If
+ this is called from
+ row_purge_remove_sec_if_poss_low(), the thread will
+ hold latches on the clustered index and the secondary
+ index. Because the insert works in three steps:
+
+ (1) insert the record to clustered index
+ (2) store the BLOBs and update BLOB pointers
+ (3) insert records to secondary indexes
+
+ the purge thread can safely ignore freshly inserted
+ records and delete the secondary index record. The
+ thread that inserted the new record will be inserting
+ the secondary index records. */
/* NOTE that we cannot do the comparison as binary
fields because the row is maybe being modified so that
- the clustered index record has already been updated
- to a different binary value in a char field, but the
+ the clustered index record has already been updated to
+ a different binary value in a char field, but the
collation identifies the old and new value anyway! */
-
- if (dtuple_datas_are_ordering_equal(ientry, entry)) {
+ if (entry && !dtuple_coll_cmp(ientry, entry)) {
mem_heap_free(heap);
@@ -369,9 +435,21 @@ row_vers_old_has_index_entry(
NULL, ULINT_UNDEFINED, &heap);
if (!rec_get_deleted_flag(prev_version, comp)) {
+ row_ext_t* ext;
+
+ /* The stack of versions is locked by mtr.
+ Thus, it is safe to fetch the prefixes for
+ externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
- prev_version, clust_offsets, heap);
- entry = row_build_index_entry(row, index, heap);
+ prev_version, clust_offsets,
+ NULL, &ext, heap);
+ entry = row_build_index_entry(row, ext, index, heap);
+
+ /* If entry == NULL, the record contains unset
+ BLOB pointers. This must be a freshly
+ inserted record that we can safely ignore.
+ For the justification, see the comments after
+ the previous row_build_index_entry() call. */
/* NOTE that we cannot do the comparison as binary
fields because maybe the secondary index record has
@@ -379,7 +457,7 @@ row_vers_old_has_index_entry(
a char field, but the collation identifies the old
and new value anyway! */
- if (dtuple_datas_are_ordering_equal(ientry, entry)) {
+ if (entry && !dtuple_coll_cmp(ientry, entry)) {
mem_heap_free(heap);
@@ -391,45 +469,44 @@ row_vers_old_has_index_entry(
}
}
-/*********************************************************************
+/*****************************************************************//**
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version. */
-
+the consistent read should not see rec in its present version.
+@return DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
ulint
row_vers_build_for_consistent_read(
/*===============================*/
- /* out: DB_SUCCESS or DB_MISSING_HISTORY */
- rec_t* rec, /* in: record in a clustered index; the
+ const rec_t* rec, /*!< in: record in a clustered index; the
caller must have a latch on the page; this
latch locks the top of the stack of versions
of this records */
- mtr_t* mtr, /* in: mtr holding the latch on rec */
- dict_index_t* index, /* in: the clustered index */
- ulint** offsets,/* in/out: offsets returned by
+ mtr_t* mtr, /*!< in: mtr holding the latch on rec */
+ dict_index_t* index, /*!< in: the clustered index */
+ ulint** offsets,/*!< in/out: offsets returned by
rec_get_offsets(rec, index) */
- read_view_t* view, /* in: the consistent read view */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
+ read_view_t* view, /*!< in: the consistent read view */
+ mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
- mem_heap_t* in_heap,/* in: memory heap from which the memory for
- old_vers is allocated; memory for possible
+ mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
+ *old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers)/* out, own: old version, or NULL if the
+ rec_t** old_vers)/*!< out, own: old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
{
- rec_t* version;
+ const rec_t* version;
rec_t* prev_version;
- dulint trx_id;
+ trx_id_t trx_id;
mem_heap_t* heap = NULL;
byte* buf;
ulint err;
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_S_FIX));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
@@ -446,8 +523,8 @@ row_vers_build_for_consistent_read(
for (;;) {
mem_heap_t* heap2 = heap;
trx_undo_rec_t* undo_rec;
- dulint roll_ptr;
- dulint undo_no;
+ roll_ptr_t roll_ptr;
+ undo_no_t undo_no;
heap = mem_heap_create(1024);
/* If we have high-granularity consistent read view and
@@ -525,42 +602,41 @@ row_vers_build_for_consistent_read(
return(err);
}
-/*********************************************************************
+/*****************************************************************//**
Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read. */
-
+which should be seen by a semi-consistent read.
+@return DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
ulint
row_vers_build_for_semi_consistent_read(
/*====================================*/
- /* out: DB_SUCCESS or DB_MISSING_HISTORY */
- rec_t* rec, /* in: record in a clustered index; the
+ const rec_t* rec, /*!< in: record in a clustered index; the
caller must have a latch on the page; this
latch locks the top of the stack of versions
of this records */
- mtr_t* mtr, /* in: mtr holding the latch on rec */
- dict_index_t* index, /* in: the clustered index */
- ulint** offsets,/* in/out: offsets returned by
+ mtr_t* mtr, /*!< in: mtr holding the latch on rec */
+ dict_index_t* index, /*!< in: the clustered index */
+ ulint** offsets,/*!< in/out: offsets returned by
rec_get_offsets(rec, index) */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
+ mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
- mem_heap_t* in_heap,/* in: memory heap from which the memory for
- old_vers is allocated; memory for possible
+ mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
+ *old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers)/* out, own: rec, old version, or NULL if the
+ const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
{
- rec_t* version;
+ const rec_t* version;
mem_heap_t* heap = NULL;
byte* buf;
ulint err;
- dulint rec_trx_id = ut_dulint_create(0, 0);
+ trx_id_t rec_trx_id = ut_dulint_zero;
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_S_FIX));
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
@@ -579,7 +655,7 @@ row_vers_build_for_semi_consistent_read(
trx_t* version_trx;
mem_heap_t* heap2;
rec_t* prev_version;
- dulint version_trx_id;
+ trx_id_t version_trx_id;
version_trx_id = row_get_rec_trx_id(version, index, *offsets);
if (rec == version) {
diff --git a/storage/innodb_plugin/scripts/export.sh b/storage/innobase/scripts/export.sh
index 2a4355c1e43..2a4355c1e43 100755
--- a/storage/innodb_plugin/scripts/export.sh
+++ b/storage/innobase/scripts/export.sh
diff --git a/storage/innodb_plugin/scripts/install_innodb_plugins.sql b/storage/innobase/scripts/install_innodb_plugins.sql
index 3fdb8f11e22..3fdb8f11e22 100644
--- a/storage/innodb_plugin/scripts/install_innodb_plugins.sql
+++ b/storage/innobase/scripts/install_innodb_plugins.sql
diff --git a/storage/innodb_plugin/scripts/install_innodb_plugins_win.sql b/storage/innobase/scripts/install_innodb_plugins_win.sql
index 8c94b4e240d..8c94b4e240d 100644
--- a/storage/innodb_plugin/scripts/install_innodb_plugins_win.sql
+++ b/storage/innobase/scripts/install_innodb_plugins_win.sql
diff --git a/storage/innodb_plugin/setup.sh b/storage/innobase/setup.sh
index 23fe729a406..23fe729a406 100755
--- a/storage/innodb_plugin/setup.sh
+++ b/storage/innobase/setup.sh
diff --git a/storage/innobase/srv/srv0que.c b/storage/innobase/srv/srv0que.c
index e2b4e217980..fc50a86a55c 100644
--- a/storage/innobase/srv/srv0que.c
+++ b/storage/innobase/srv/srv0que.c
@@ -1,7 +1,24 @@
-/******************************************************
-Server query execution
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0que.c
+Server query execution
Created 6/5/1996 Heikki Tuuri
*******************************************************/
@@ -14,72 +31,14 @@ Created 6/5/1996 Heikki Tuuri
#include "usr0sess.h"
#include "que0que.h"
-/**************************************************************************
-Checks if there is work to do in the server task queue. If there is, the
-thread starts processing a task. Before leaving, it again checks the task
-queue and picks a new task if any exists. This is called by a SRV_WORKER
-thread. */
-
-void
-srv_que_task_queue_check(void)
-/*==========================*/
-{
- que_thr_t* thr;
-
- for (;;) {
- mutex_enter(&kernel_mutex);
-
- thr = UT_LIST_GET_FIRST(srv_sys->tasks);
-
- if (thr == NULL) {
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
-
- mutex_exit(&kernel_mutex);
-
- que_run_threads(thr);
- }
-}
-
-/**************************************************************************
-Performs round-robin on the server tasks. This is called by a SRV_WORKER
-thread every second or so. */
-
-que_thr_t*
-srv_que_round_robin(
-/*================*/
- /* out: the new (may be == thr) query thread
- to run */
- que_thr_t* thr) /* in: query thread */
-{
- que_thr_t* new_thr;
-
- ut_ad(thr);
- ut_ad(thr->state == QUE_THR_RUNNING);
-
- mutex_enter(&kernel_mutex);
-
- UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
-
- new_thr = UT_LIST_GET_FIRST(srv_sys->tasks);
-
- mutex_exit(&kernel_mutex);
-
- return(new_thr);
-}
-
-/**************************************************************************
+/**********************************************************************//**
Enqueues a task to server task queue and releases a worker thread, if there
is a suspended one. */
-
+UNIV_INTERN
void
srv_que_task_enqueue_low(
/*=====================*/
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
ut_ad(thr);
ut_ad(mutex_own(&kernel_mutex));
@@ -88,23 +47,3 @@ srv_que_task_enqueue_low(
srv_release_threads(SRV_WORKER, 1);
}
-
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-
-void
-srv_que_task_enqueue(
-/*=================*/
- que_thr_t* thr) /* in: query thread */
-{
- ut_ad(thr);
-
- ut_a(0); /* Under MySQL this is never called */
-
- mutex_enter(&kernel_mutex);
-
- srv_que_task_enqueue_low(thr);
-
- mutex_exit(&kernel_mutex);
-}
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
index 71e74ab848b..639da1ed2f3 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innobase/srv/srv0srv.c
@@ -1,4 +1,56 @@
-/******************************************************
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, 2009 Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file srv/srv0srv.c
The database server main program
NOTE: SQL Server 7 uses something which the documentation
@@ -20,14 +72,14 @@ Windows 2000 will have something called thread pooling
Another possibility could be to use some very fast user space
thread library. This might confuse NT though.
-(c) 1995 Innobase Oy
-
Created 10/8/1995 Heikki Tuuri
*******************************************************/
+
/* Dummy comment */
#include "srv0srv.h"
#include "ut0mem.h"
+#include "ut0ut.h"
#include "os0proc.h"
#include "mem0mem.h"
#include "mem0pool.h"
@@ -42,145 +94,133 @@ Created 10/8/1995 Heikki Tuuri
#include "trx0purge.h"
#include "ibuf0ibuf.h"
#include "buf0flu.h"
+#include "buf0lru.h"
#include "btr0sea.h"
#include "dict0load.h"
#include "dict0boot.h"
#include "srv0start.h"
#include "row0mysql.h"
#include "ha_prototypes.h"
+#include "trx0i_s.h"
+#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
/* This is set to TRUE if the MySQL user has set it in MySQL; currently
affects only FOREIGN KEY definition parsing */
-ibool srv_lower_case_table_names = FALSE;
+UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
/* The following counter is incremented whenever there is some user activity
in the server */
-ulint srv_activity_count = 0;
+UNIV_INTERN ulint srv_activity_count = 0;
/* The following is the maximum allowed duration of a lock wait. */
-ulint srv_fatal_semaphore_wait_threshold = 600;
+UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
/* How much data manipulation language (DML) statements need to be delayed,
in microseconds, in order to reduce the lagging of the purge thread. */
-ulint srv_dml_needed_delay = 0;
+UNIV_INTERN ulint srv_dml_needed_delay = 0;
-ibool srv_lock_timeout_and_monitor_active = FALSE;
-ibool srv_error_monitor_active = FALSE;
+UNIV_INTERN ibool srv_lock_timeout_and_monitor_active = FALSE;
+UNIV_INTERN ibool srv_error_monitor_active = FALSE;
-const char* srv_main_thread_op_info = "";
+UNIV_INTERN const char* srv_main_thread_op_info = "";
-/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
-const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
+/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
+UNIV_INTERN const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
/* Server parameters which are read from the initfile */
/* The following three are dir paths which are catenated before file
names, where the file name itself may also contain a path */
-char* srv_data_home = NULL;
+UNIV_INTERN char* srv_data_home = NULL;
#ifdef UNIV_LOG_ARCHIVE
-char* srv_arch_dir = NULL;
+UNIV_INTERN char* srv_arch_dir = NULL;
#endif /* UNIV_LOG_ARCHIVE */
-ibool srv_file_per_table = FALSE; /* store to its own file each table
- created by an user; data dictionary
- tables are in the system tablespace
- 0 */
-ibool srv_locks_unsafe_for_binlog = FALSE; /* Place locks to
- records only i.e. do
- not use next-key
- locking except on
- duplicate key checking
- and foreign key
- checking */
-ulint srv_n_data_files = 0;
-char** srv_data_file_names = NULL;
-ulint* srv_data_file_sizes = NULL; /* size in database pages */
-
-ibool srv_auto_extend_last_data_file = FALSE; /* if TRUE, then we
- auto-extend the last data
- file */
-ulint srv_last_file_size_max = 0; /* if != 0, this tells
- the max size auto-extending
- may increase the last data
- file size */
-ulong srv_auto_extend_increment = 8; /* If the last data file is
- auto-extended, we add this
- many pages to it at a time */
-ulint* srv_data_file_is_raw_partition = NULL;
+/** store to its own file each table created by an user; data
+dictionary tables are in the system tablespace 0 */
+UNIV_INTERN my_bool srv_file_per_table;
+/** The file format to use on new *.ibd files. */
+UNIV_INTERN ulint srv_file_format = 0;
+/** Whether to check file format during startup. A value of
+DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
+set it to the highest format we support. */
+UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
+
+#if DICT_TF_FORMAT_51
+# error "DICT_TF_FORMAT_51 must be 0!"
+#endif
+/** Place locks to records only i.e. do not use next-key locking except
+on duplicate key checking and foreign key checking */
+UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
+
+UNIV_INTERN ulint srv_n_data_files = 0;
+UNIV_INTERN char** srv_data_file_names = NULL;
+/* size in database pages */
+UNIV_INTERN ulint* srv_data_file_sizes = NULL;
+
+/* if TRUE, then we auto-extend the last data file */
+UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
+/* if != 0, this tells the max size auto-extending may increase the
+last data file size */
+UNIV_INTERN ulint srv_last_file_size_max = 0;
+/* If the last data file is auto-extended, we add this
+many pages to it at a time */
+UNIV_INTERN ulong srv_auto_extend_increment = 8;
+UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
/* If the following is TRUE we do not allow inserts etc. This protects
the user from forgetting the 'newraw' keyword to my.cnf */
-ibool srv_created_new_raw = FALSE;
-
-char** srv_log_group_home_dirs = NULL;
-
-ulint srv_n_log_groups = ULINT_MAX;
-ulint srv_n_log_files = ULINT_MAX;
-ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
-ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
-ulong srv_flush_log_at_trx_commit = 1;
-
-byte srv_latin1_ordering[256] /* The sort order table of the latin1
- character set. The following table is
- the MySQL order as of Feb 10th, 2002 */
-= {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
-, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
-, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
-, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
-, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
-, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
-, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
-, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
-, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
-, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
-, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
-, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
-, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
-, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
-, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
-, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
-, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
-, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
-, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
-, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
-, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
-, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
-, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
-, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
-, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
-, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
-, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
-, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
-, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
-, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
-};
-
-ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits
- this to size in kilobytes but
- we normalize this to pages in
- srv_boot() */
-ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
- this to bytes, but we
- normalize it to pages in
- srv_boot() */
-ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
-ulint srv_lock_table_size = ULINT_MAX;
-
-ulint srv_n_file_io_threads = ULINT_MAX;
+UNIV_INTERN ibool srv_created_new_raw = FALSE;
+
+UNIV_INTERN char** srv_log_group_home_dirs = NULL;
+
+UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
+UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
+/* size in database pages */
+UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
+/* size in database pages */
+UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
+UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
+
+/* Try to flush dirty pages so as to avoid IO bursts at
+the checkpoints. */
+UNIV_INTERN char srv_adaptive_flushing = TRUE;
+
+/* The sort order table of the MySQL latin1_swedish_ci character set
+collation */
+UNIV_INTERN const byte* srv_latin1_ordering;
+
+/* use os/external memory allocator */
+UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
+/* requested size in kilobytes */
+UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
+/* previously requested size */
+UNIV_INTERN ulint srv_buf_pool_old_size;
+/* current size in kilobytes */
+UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
+/* size in bytes */
+UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
+UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
+
+/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
+instead. */
+UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
+UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
+UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
+
+/* User settable value of the number of pages that must be present
+in the buffer cache and accessed sequentially for InnoDB to trigger a
+readahead request. */
+UNIV_INTERN ulong srv_read_ahead_threshold = 56;
#ifdef UNIV_LOG_ARCHIVE
-ibool srv_log_archive_on = FALSE;
-ibool srv_archive_recovery = 0;
-dulint srv_archive_recovery_limit_lsn;
+UNIV_INTERN ibool srv_log_archive_on = FALSE;
+UNIV_INTERN ibool srv_archive_recovery = 0;
+UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
#endif /* UNIV_LOG_ARCHIVE */
-ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
-
/* This parameter is used to throttle the number of insert buffers that are
merged in a batch. By increasing this parameter on a faster disk you can
possibly reduce the number of I/O operations performed to complete the
@@ -189,92 +229,83 @@ background loop when the system is idle (low load), on a busy system
the parameter is scaled down by a factor of 4, this is to avoid putting
a heavier load on the I/O sub system. */
-ulong srv_insert_buffer_batch_size = 20;
+UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
+
+UNIV_INTERN char* srv_file_flush_method_str = NULL;
+UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
+UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-char* srv_file_flush_method_str = NULL;
-ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+UNIV_INTERN ulint srv_max_n_open_files = 300;
-ulint srv_max_n_open_files = 300;
+/* Number of IO operations per second the server can do */
+UNIV_INTERN ulong srv_io_capacity = 200;
/* The InnoDB main thread tries to keep the ratio of modified pages
in the buffer pool to all database pages in the buffer pool smaller than
the following number. But it is not guaranteed that the value stays below
that during a time of heavy update/insert activity. */
-ulong srv_max_buf_pool_modified_pct = 90;
+UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
/* variable counts amount of data read in total (in bytes) */
-ulint srv_data_read = 0;
+UNIV_INTERN ulint srv_data_read = 0;
/* here we count the amount of data written in total (in bytes) */
-ulint srv_data_written = 0;
+UNIV_INTERN ulint srv_data_written = 0;
/* the number of the log write requests done */
-ulint srv_log_write_requests = 0;
+UNIV_INTERN ulint srv_log_write_requests = 0;
/* the number of physical writes to the log performed */
-ulint srv_log_writes = 0;
+UNIV_INTERN ulint srv_log_writes = 0;
/* amount of data written to the log files in bytes */
-ulint srv_os_log_written = 0;
+UNIV_INTERN ulint srv_os_log_written = 0;
/* amount of writes being done to the log files */
-ulint srv_os_log_pending_writes = 0;
+UNIV_INTERN ulint srv_os_log_pending_writes = 0;
/* we increase this counter, when there we don't have enough space in the
log buffer and have to flush it */
-ulint srv_log_waits = 0;
+UNIV_INTERN ulint srv_log_waits = 0;
/* this variable counts the amount of times, when the doublewrite buffer
was flushed */
-ulint srv_dblwr_writes = 0;
+UNIV_INTERN ulint srv_dblwr_writes = 0;
/* here we store the number of pages that have been flushed to the
doublewrite buffer */
-ulint srv_dblwr_pages_written = 0;
+UNIV_INTERN ulint srv_dblwr_pages_written = 0;
/* in this variable we store the number of write requests issued */
-ulint srv_buf_pool_write_requests = 0;
+UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
/* here we store the number of times when we had to wait for a free page
in the buffer pool. It happens when the buffer pool is full and we need
to make a flush, in order to be able to read or create a page. */
-ulint srv_buf_pool_wait_free = 0;
+UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
/* variable to count the number of pages that were written from buffer
pool to the disk */
-ulint srv_buf_pool_flushed = 0;
+UNIV_INTERN ulint srv_buf_pool_flushed = 0;
-/* variable to count the number of buffer pool reads that led to the
+/** Number of buffer pool reads that led to the
reading of a disk page */
-ulint srv_buf_pool_reads = 0;
-
-/* variable to count the number of sequential read-aheads */
-ulint srv_read_ahead_seq = 0;
-
-/* variable to count the number of random read-aheads */
-ulint srv_read_ahead_rnd = 0;
-
-/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
-NOT update cardinality for indexes of InnoDB table". By default we are
-running with the fix disabled because MySQL 5.1 is frozen for such
-behavioral changes. */
-char srv_use_legacy_cardinality_algorithm = TRUE;
+UNIV_INTERN ulint srv_buf_pool_reads = 0;
/* structure to pass status variables to MySQL */
-export_struc export_vars;
+UNIV_INTERN export_struc export_vars;
/* If the following is != 0 we do not allow inserts etc. This protects
the user from forgetting the innodb_force_recovery keyword to my.cnf */
-ulint srv_force_recovery = 0;
+UNIV_INTERN ulint srv_force_recovery = 0;
/*-----------------------*/
/* We are prepared for a situation that we have this many threads waiting for
a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
value. */
-ulint srv_max_n_threads = 0;
+UNIV_INTERN ulint srv_max_n_threads = 0;
/* The following controls how many threads we let inside InnoDB concurrently:
threads waiting for locks are not counted into the number because otherwise
@@ -284,43 +315,38 @@ Value 10 should be good if there are less than 4 processors + 4 disks in the
computer. Bigger computers need bigger values. Value 0 will disable the
concurrency check. */
-ulong srv_thread_concurrency = 0;
-
-os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data
- structures */
-lint srv_conc_n_threads = 0; /* number of transactions that
- have declared_to_be_inside_innodb
- set. It used to be a non-error
- for this value to drop below
- zero temporarily. This is no
- longer true. We'll, however,
- keep the lint datatype to add
- assertions to catch any corner
- cases that we may have
- missed. */
-ulint srv_conc_n_waiting_threads = 0; /* number of OS threads waiting in the
- FIFO for a permission to enter InnoDB
- */
+UNIV_INTERN ulong srv_thread_concurrency = 0;
+
+/* this mutex protects srv_conc data structures */
+UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
+/* number of transactions that have declared_to_be_inside_innodb set.
+It used to be a non-error for this value to drop below zero temporarily.
+This is no longer true. We'll, however, keep the lint datatype to add
+assertions to catch any corner cases that we may have missed. */
+UNIV_INTERN lint srv_conc_n_threads = 0;
+/* number of OS threads waiting in the FIFO for a permission to enter
+InnoDB */
+UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
typedef struct srv_conc_slot_struct srv_conc_slot_t;
struct srv_conc_slot_struct{
- os_event_t event; /* event to wait */
- ibool reserved; /* TRUE if slot
+ os_event_t event; /*!< event to wait */
+ ibool reserved; /*!< TRUE if slot
reserved */
- ibool wait_ended; /* TRUE when another
+ ibool wait_ended; /*!< TRUE when another
thread has already set
the event and the
thread in this slot is
free to proceed; but
reserved may still be
TRUE at that point */
- UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */
+ UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
};
-UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue of threads
- waiting to get in */
-srv_conc_slot_t* srv_conc_slots; /* array of wait
- slots */
+/* queue of threads waiting to get in */
+UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
+/* array of wait slots */
+UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
/* Number of times a thread is allowed to enter InnoDB within the same
SQL query after it has once got the ticket at srv_conc_enter_innodb */
@@ -331,91 +357,121 @@ SQL query after it has once got the ticket at srv_conc_enter_innodb */
merge to completion before shutdown. If it is set to 2, do not even flush the
buffer pool to data files at the shutdown: we effectively 'crash'
InnoDB (but lose no committed transactions). */
-ulint srv_fast_shutdown = 0;
+UNIV_INTERN ulint srv_fast_shutdown = 0;
/* Generate a innodb_status.<pid> file */
-ibool srv_innodb_status = FALSE;
+UNIV_INTERN ibool srv_innodb_status = FALSE;
-ibool srv_use_doublewrite_buf = TRUE;
-ibool srv_use_checksums = TRUE;
+/* When estimating number of different key values in an index, sample
+this many index pages */
+UNIV_INTERN unsigned long long srv_stats_sample_pages = 8;
-ibool srv_set_thread_priorities = TRUE;
-int srv_query_thread_priority = 0;
+UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
+UNIV_INTERN ibool srv_use_checksums = TRUE;
-/* TRUE if the Address Windowing Extensions of Windows are used; then we must
-disable adaptive hash indexes */
-ibool srv_use_awe = FALSE;
-ibool srv_use_adaptive_hash_indexes = TRUE;
+UNIV_INTERN ibool srv_set_thread_priorities = TRUE;
+UNIV_INTERN int srv_query_thread_priority = 0;
+
+UNIV_INTERN ulong srv_replication_delay = 0;
/*-------------------------------------------*/
-ulong srv_n_spin_wait_rounds = 20;
-ulong srv_n_free_tickets_to_enter = 500;
-ulong srv_thread_sleep_delay = 10000;
-ulint srv_spin_wait_delay = 5;
-ibool srv_priority_boost = TRUE;
-
-ibool srv_print_thread_releases = FALSE;
-ibool srv_print_lock_waits = FALSE;
-ibool srv_print_buf_io = FALSE;
-ibool srv_print_log_io = FALSE;
-ibool srv_print_latch_waits = FALSE;
-
-ulint srv_n_rows_inserted = 0;
-ulint srv_n_rows_updated = 0;
-ulint srv_n_rows_deleted = 0;
-ulint srv_n_rows_read = 0;
-#ifndef UNIV_HOTBACKUP
+UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
+UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
+UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
+UNIV_INTERN ulong srv_spin_wait_delay = 6;
+UNIV_INTERN ibool srv_priority_boost = TRUE;
+
+#ifdef UNIV_DEBUG
+UNIV_INTERN ibool srv_print_thread_releases = FALSE;
+UNIV_INTERN ibool srv_print_lock_waits = FALSE;
+UNIV_INTERN ibool srv_print_buf_io = FALSE;
+UNIV_INTERN ibool srv_print_log_io = FALSE;
+UNIV_INTERN ibool srv_print_latch_waits = FALSE;
+#endif /* UNIV_DEBUG */
+
+UNIV_INTERN ulint srv_n_rows_inserted = 0;
+UNIV_INTERN ulint srv_n_rows_updated = 0;
+UNIV_INTERN ulint srv_n_rows_deleted = 0;
+UNIV_INTERN ulint srv_n_rows_read = 0;
+
static ulint srv_n_rows_inserted_old = 0;
static ulint srv_n_rows_updated_old = 0;
static ulint srv_n_rows_deleted_old = 0;
static ulint srv_n_rows_read_old = 0;
-#endif /* !UNIV_HOTBACKUP */
-ulint srv_n_lock_wait_count = 0;
-ulint srv_n_lock_wait_current_count = 0;
-ib_longlong srv_n_lock_wait_time = 0;
-ulint srv_n_lock_max_wait_time = 0;
+UNIV_INTERN ulint srv_n_lock_wait_count = 0;
+UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
+UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
+UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
/*
Set the following to 0 if you want InnoDB to write messages on
stderr on startup/shutdown
*/
-ibool srv_print_verbose_log = TRUE;
-ibool srv_print_innodb_monitor = FALSE;
-ibool srv_print_innodb_lock_monitor = FALSE;
-ibool srv_print_innodb_tablespace_monitor = FALSE;
-ibool srv_print_innodb_table_monitor = FALSE;
+UNIV_INTERN ibool srv_print_verbose_log = TRUE;
+UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
+UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
+UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
+UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
/* Array of English strings describing the current state of an
i/o handler thread */
-const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
+UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
+UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
-time_t srv_last_monitor_time;
+UNIV_INTERN time_t srv_last_monitor_time;
-mutex_t srv_innodb_monitor_mutex;
+UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
/* Mutex for locking srv_monitor_file */
-mutex_t srv_monitor_file_mutex;
+UNIV_INTERN mutex_t srv_monitor_file_mutex;
/* Temporary file for innodb monitor output */
-FILE* srv_monitor_file;
+UNIV_INTERN FILE* srv_monitor_file;
/* Mutex for locking srv_dict_tmpfile.
This mutex has a very high rank; threads reserving it should not
be holding any InnoDB latches. */
-mutex_t srv_dict_tmpfile_mutex;
+UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
/* Temporary file for output from the data dictionary */
-FILE* srv_dict_tmpfile;
+UNIV_INTERN FILE* srv_dict_tmpfile;
/* Mutex for locking srv_misc_tmpfile.
This mutex has a very low rank; threads reserving it should not
acquire any further latches or sleep before releasing this one. */
-mutex_t srv_misc_tmpfile_mutex;
+UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
/* Temporary file for miscellanous diagnostic output */
-FILE* srv_misc_tmpfile;
-
-ulint srv_main_thread_process_no = 0;
-ulint srv_main_thread_id = 0;
+UNIV_INTERN FILE* srv_misc_tmpfile;
+
+UNIV_INTERN ulint srv_main_thread_process_no = 0;
+UNIV_INTERN ulint srv_main_thread_id = 0;
+
+/* The following count work done by srv_master_thread. */
+
+/* Iterations by the 'once per second' loop. */
+static ulint srv_main_1_second_loops = 0;
+/* Calls to sleep by the 'once per second' loop. */
+static ulint srv_main_sleeps = 0;
+/* Iterations by the 'once per 10 seconds' loop. */
+static ulint srv_main_10_second_loops = 0;
+/* Iterations of the loop bounded by the 'background_loop' label. */
+static ulint srv_main_background_loops = 0;
+/* Iterations of the loop bounded by the 'flush_loop' label. */
+static ulint srv_main_flush_loops = 0;
+/* Log writes involving flush. */
+static ulint srv_log_writes_and_flush = 0;
+
+/* This is only ever touched by the master thread. It records the
+time when the last flush of log file has happened. The master
+thread ensures that we flush the log files at least once per
+second. */
+static time_t srv_last_log_flush_time;
+
+/* The master thread performs various tasks based on the current
+state of IO activity and the level of IO utilization is past
+intervals. Following macros define thresholds for these conditions. */
+#define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
+#define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
+#define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
/*
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
@@ -551,7 +607,7 @@ future, but at the moment we plan to implement a more coarse solution,
which could be called a global priority inheritance. If a thread
has to wait for a long time, say 300 milliseconds, for a resource,
we just guess that it may be waiting for a resource owned by a background
-thread, and boost the the priority of all runnable background threads
+thread, and boost the priority of all runnable background threads
to the normal level. The background threads then themselves adjust
their fixed priority back to background after releasing all resources
they had (or, at some fixed points in their program code).
@@ -587,63 +643,82 @@ Unix.*/
/* Thread slot in the thread table */
struct srv_slot_struct{
- os_thread_id_t id; /* thread id */
- os_thread_t handle; /* thread handle */
- ulint type; /* thread type: user, utility etc. */
- ibool in_use; /* TRUE if this slot is in use */
- ibool suspended; /* TRUE if the thread is waiting
+ os_thread_id_t id; /*!< thread id */
+ os_thread_t handle; /*!< thread handle */
+ unsigned type:3; /*!< thread type: user, utility etc. */
+ unsigned in_use:1; /*!< TRUE if this slot is in use */
+ unsigned suspended:1; /*!< TRUE if the thread is waiting
for the event of this slot */
- ib_time_t suspend_time; /* time when the thread was
+ ib_time_t suspend_time; /*!< time when the thread was
suspended */
- os_event_t event; /* event used in suspending the
+ os_event_t event; /*!< event used in suspending the
thread when it has nothing to do */
- que_thr_t* thr; /* suspended query thread (only
+ que_thr_t* thr; /*!< suspended query thread (only
used for MySQL threads) */
};
/* Table for MySQL threads where they will be suspended to wait for locks */
-srv_slot_t* srv_mysql_table = NULL;
+UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
-os_event_t srv_lock_timeout_thread_event;
+UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
-srv_sys_t* srv_sys = NULL;
+UNIV_INTERN srv_sys_t* srv_sys = NULL;
-byte srv_pad1[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line */
-mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
- query threads, and lock table */
-byte srv_pad2[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line */
+/* padding to prevent other memory update hotspots from residing on
+the same memory cache line */
+UNIV_INTERN byte srv_pad1[64];
+/* mutex protecting the server, trx structs, query threads, and lock table */
+UNIV_INTERN mutex_t* kernel_mutex_temp;
+/* padding to prevent other memory update hotspots from residing on
+the same memory cache line */
+UNIV_INTERN byte srv_pad2[64];
+#if 0
/* The following three values measure the urgency of the jobs of
buffer, version, and insert threads. They may vary from 0 - 1000.
The server mutex protects all these variables. The low-water values
tell that the server can acquiesce the utility when the value
drops below this low-water mark. */
-ulint srv_meter[SRV_MASTER + 1];
-ulint srv_meter_low_water[SRV_MASTER + 1];
-ulint srv_meter_high_water[SRV_MASTER + 1];
-ulint srv_meter_high_water2[SRV_MASTER + 1];
-ulint srv_meter_foreground[SRV_MASTER + 1];
+static ulint srv_meter[SRV_MASTER + 1];
+static ulint srv_meter_low_water[SRV_MASTER + 1];
+static ulint srv_meter_high_water[SRV_MASTER + 1];
+static ulint srv_meter_high_water2[SRV_MASTER + 1];
+static ulint srv_meter_foreground[SRV_MASTER + 1];
+#endif
/* The following values give info about the activity going on in
the database. They are protected by the server mutex. The arrays
are indexed by the type of the thread. */
-ulint srv_n_threads_active[SRV_MASTER + 1];
-ulint srv_n_threads[SRV_MASTER + 1];
+UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
+UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
-/*************************************************************************
-Sets the info describing an i/o thread current state. */
+/***********************************************************************
+Prints counters for work done by srv_master_thread. */
+static
+void
+srv_print_master_thread_info(
+/*=========================*/
+ FILE *file) /* in: output stream */
+{
+ fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
+ "%lu 10_second, %lu background, %lu flush\n",
+ srv_main_1_second_loops, srv_main_sleeps,
+ srv_main_10_second_loops, srv_main_background_loops,
+ srv_main_flush_loops);
+ fprintf(file, "srv_master_thread log flush and writes: %lu\n",
+ srv_log_writes_and_flush);
+}
+/*********************************************************************//**
+Sets the info describing an i/o thread current state. */
+UNIV_INTERN
void
srv_set_io_thread_op_info(
/*======================*/
- ulint i, /* in: the 'segment' of the i/o thread */
- const char* str) /* in: constant char string describing the
+ ulint i, /*!< in: the 'segment' of the i/o thread */
+ const char* str) /*!< in: constant char string describing the
state */
{
ut_a(i < SRV_MAX_N_IO_THREADS);
@@ -651,25 +726,25 @@ srv_set_io_thread_op_info(
srv_io_thread_op_info[i] = str;
}
-/*************************************************************************
+/*********************************************************************//**
Accessor function to get pointer to n'th slot in the server thread
-table. */
+table.
+@return pointer to the slot */
static
srv_slot_t*
srv_table_get_nth_slot(
/*===================*/
- /* out: pointer to the slot */
- ulint index) /* in: index of the slot */
+ ulint index) /*!< in: index of the slot */
{
ut_a(index < OS_THREAD_MAX_N);
return(srv_sys->threads + index);
}
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Gets the number of threads in the system. */
-
+/*********************************************************************//**
+Gets the number of threads in the system.
+@return sum of srv_n_threads[] */
+UNIV_INTERN
ulint
srv_get_n_threads(void)
/*===================*/
@@ -689,16 +764,16 @@ srv_get_n_threads(void)
return(n_threads);
}
-/*************************************************************************
+/*********************************************************************//**
Reserves a slot in the thread table for the current thread. Also creates the
thread local storage struct for the current thread. NOTE! The server mutex
-has to be reserved by the caller! */
+has to be reserved by the caller!
+@return reserved slot index */
static
ulint
srv_table_reserve_slot(
/*===================*/
- /* out: reserved slot index */
- ulint type) /* in: type of the thread: one of SRV_COM, ... */
+ enum srv_thread_type type) /*!< in: type of the thread */
{
srv_slot_t* slot;
ulint i;
@@ -718,9 +793,9 @@ srv_table_reserve_slot(
slot->in_use = TRUE;
slot->suspended = FALSE;
+ slot->type = type;
slot->id = os_thread_get_curr_id();
slot->handle = os_thread_get_curr();
- slot->type = type;
thr_local_create();
@@ -729,19 +804,19 @@ srv_table_reserve_slot(
return(i);
}
-/*************************************************************************
+/*********************************************************************//**
Suspends the calling thread to wait for the event in its thread slot.
-NOTE! The server mutex has to be reserved by the caller! */
+NOTE! The server mutex has to be reserved by the caller!
+@return event for the calling thread to wait */
static
os_event_t
srv_suspend_thread(void)
/*====================*/
- /* out: event for the calling thread to wait */
{
- srv_slot_t* slot;
- os_event_t event;
- ulint slot_no;
- ulint type;
+ srv_slot_t* slot;
+ os_event_t event;
+ ulint slot_no;
+ enum srv_thread_type type;
ut_ad(mutex_own(&kernel_mutex));
@@ -749,9 +824,8 @@ srv_suspend_thread(void)
if (srv_print_thread_releases) {
fprintf(stderr,
- "Suspending thread %lu to slot %lu meter %lu\n",
- (ulong) os_thread_get_curr_id(), (ulong) slot_no,
- (ulong) srv_meter[SRV_RECOVERY]);
+ "Suspending thread %lu to slot %lu\n",
+ (ulong) os_thread_get_curr_id(), (ulong) slot_no);
}
slot = srv_table_get_nth_slot(slot_no);
@@ -773,20 +847,18 @@ srv_suspend_thread(void)
return(event);
}
-#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
+/*********************************************************************//**
Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller! */
-
+NOTE! The server mutex has to be reserved by the caller!
+@return number of threads released: this may be less than n if not
+enough threads were suspended at the moment */
+UNIV_INTERN
ulint
srv_release_threads(
/*================*/
- /* out: number of threads released: this may be
- < n if not enough threads were suspended at the
- moment */
- ulint type, /* in: thread type */
- ulint n) /* in: number of threads to release */
+ enum srv_thread_type type, /*!< in: thread type */
+ ulint n) /*!< in: number of threads to release */
{
srv_slot_t* slot;
ulint i;
@@ -812,10 +884,9 @@ srv_release_threads(
if (srv_print_thread_releases) {
fprintf(stderr,
"Releasing thread %lu type %lu"
- " from slot %lu meter %lu\n",
+ " from slot %lu\n",
(ulong) slot->id, (ulong) type,
- (ulong) i,
- (ulong) srv_meter[SRV_RECOVERY]);
+ (ulong) i);
}
count++;
@@ -829,17 +900,17 @@ srv_release_threads(
return(count);
}
-/*************************************************************************
-Returns the calling thread type. */
-
-ulint
+/*********************************************************************//**
+Returns the calling thread type.
+@return SRV_COM, ... */
+UNIV_INTERN
+enum srv_thread_type
srv_get_thread_type(void)
/*=====================*/
- /* out: SRV_COM, ... */
{
- ulint slot_no;
- srv_slot_t* slot;
- ulint type;
+ ulint slot_no;
+ srv_slot_t* slot;
+ enum srv_thread_type type;
mutex_enter(&kernel_mutex);
@@ -857,16 +928,15 @@ srv_get_thread_type(void)
return(type);
}
-/*************************************************************************
+/*********************************************************************//**
Initializes the server. */
-
+UNIV_INTERN
void
srv_init(void)
/*==========*/
{
srv_conc_slot_t* conc_slot;
srv_slot_t* slot;
- dict_table_t* table;
ulint i;
srv_sys = mem_alloc(sizeof(srv_sys_t));
@@ -901,39 +971,20 @@ srv_init(void)
for (i = 0; i < SRV_MASTER + 1; i++) {
srv_n_threads_active[i] = 0;
srv_n_threads[i] = 0;
+#if 0
srv_meter[i] = 30;
srv_meter_low_water[i] = 50;
srv_meter_high_water[i] = 100;
srv_meter_high_water2[i] = 200;
srv_meter_foreground[i] = 250;
+#endif
}
UT_LIST_INIT(srv_sys->tasks);
- /* create dummy table and index for old-style infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY1",
- DICT_HDR_SPACE, 1, 0);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
-
- srv_sys->dummy_ind1 = dict_mem_index_create(
- "SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*)
- dict_table_get_nth_col(table, 0), 0);
- srv_sys->dummy_ind1->table = table;
- /* create dummy table and index for new-style infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY2",
- DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
- srv_sys->dummy_ind2 = dict_mem_index_create(
- "SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*)
- dict_table_get_nth_col(table, 0), 0);
- srv_sys->dummy_ind2->table = table;
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE;
+ /* Create dummy indexes for infimum and supremum records */
+
+ dict_ind_init();
/* Init the server concurrency restriction data structures */
@@ -949,26 +1000,45 @@ srv_init(void)
conc_slot->event = os_event_create(NULL);
ut_a(conc_slot->event);
}
-}
-/*************************************************************************
-Frees the OS fast mutex created in srv_init(). */
+ /* Initialize some INFORMATION SCHEMA internal structures */
+ trx_i_s_cache_init(trx_i_s_cache);
+}
+/*********************************************************************//**
+Frees the data structures created in srv_init(). */
+UNIV_INTERN
void
srv_free(void)
/*==========*/
{
os_fast_mutex_free(&srv_conc_mutex);
+ mem_free(srv_conc_slots);
+ srv_conc_slots = NULL;
+
+ mem_free(srv_sys->threads);
+ mem_free(srv_sys);
+ srv_sys = NULL;
+
+ mem_free(kernel_mutex_temp);
+ kernel_mutex_temp = NULL;
+ mem_free(srv_mysql_table);
+ srv_mysql_table = NULL;
+
+ trx_i_s_cache_free(trx_i_s_cache);
}
-/*************************************************************************
+/*********************************************************************//**
Initializes the synchronization primitives, memory system, and the thread
local storage. */
-
+UNIV_INTERN
void
srv_general_init(void)
/*==================*/
{
+ ut_mem_init();
+ /* Reset the system variables in the recovery module. */
+ recv_sys_var_init();
os_sync_init();
sync_init();
mem_init(srv_mem_pool_size);
@@ -978,16 +1048,16 @@ srv_general_init(void)
/*======================= InnoDB Server FIFO queue =======================*/
/* Maximum allowable purge history length. <=0 means 'infinite'. */
-ulong srv_max_purge_lag = 0;
+UNIV_INTERN ulong srv_max_purge_lag = 0;
-/*************************************************************************
+/*********************************************************************//**
Puts an OS thread to wait if there are too many concurrent threads
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-
+UNIV_INTERN
void
srv_conc_enter_innodb(
/*==================*/
- trx_t* trx) /* in: transaction object associated with the
+ trx_t* trx) /*!< in: transaction object associated with the
thread */
{
ibool has_slept = FALSE;
@@ -997,11 +1067,10 @@ srv_conc_enter_innodb(
if (trx->mysql_thd != NULL
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
- /* TODO Do something more interesting (based on a config
- parameter). Some users what to give the replication
- thread very low priority, see http://bugs.mysql.com/25078
- This can be done by introducing
- innodb_replication_delay(ms) config parameter */
+ UT_WAIT_FOR(srv_conc_n_threads
+ < (lint)srv_thread_concurrency,
+ srv_replication_delay * 1000);
+
return;
}
@@ -1141,14 +1210,14 @@ retry:
os_fast_mutex_unlock(&srv_conc_mutex);
}
-/*************************************************************************
+/*********************************************************************//**
This lets a thread enter InnoDB regardless of the number of threads inside
InnoDB. This must be called when a thread ends a lock wait. */
-
+UNIV_INTERN
void
srv_conc_force_enter_innodb(
/*========================*/
- trx_t* trx) /* in: transaction object associated with the
+ trx_t* trx) /*!< in: transaction object associated with the
thread */
{
if (UNIV_LIKELY(!srv_thread_concurrency)) {
@@ -1167,14 +1236,14 @@ srv_conc_force_enter_innodb(
os_fast_mutex_unlock(&srv_conc_mutex);
}
-/*************************************************************************
+/*********************************************************************//**
This must be called when a thread exits InnoDB in a lock wait or at the
end of an SQL statement. */
-
+UNIV_INTERN
void
srv_conc_force_exit_innodb(
/*=======================*/
- trx_t* trx) /* in: transaction object associated with the
+ trx_t* trx) /*!< in: transaction object associated with the
thread */
{
srv_conc_slot_t* slot = NULL;
@@ -1224,13 +1293,13 @@ srv_conc_force_exit_innodb(
}
}
-/*************************************************************************
+/*********************************************************************//**
This must be called when a thread exits InnoDB. */
-
+UNIV_INTERN
void
srv_conc_exit_innodb(
/*=================*/
- trx_t* trx) /* in: transaction object associated with the
+ trx_t* trx) /*!< in: transaction object associated with the
thread */
{
if (trx->n_tickets_to_enter_innodb > 0) {
@@ -1249,13 +1318,13 @@ srv_conc_exit_innodb(
/*========================================================================*/
-/*************************************************************************
-Normalizes init parameter values to use units we use inside InnoDB. */
+/*********************************************************************//**
+Normalizes init parameter values to use units we use inside InnoDB.
+@return DB_SUCCESS or error code */
static
ulint
srv_normalize_init_values(void)
/*===========================*/
- /* out: DB_SUCCESS or error code */
{
ulint n;
ulint i;
@@ -1274,30 +1343,18 @@ srv_normalize_init_values(void)
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
- srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
-
- srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
-
- if (srv_use_awe) {
- /* If we are using AWE we must save memory in the 32-bit
- address space of the process, and cannot bind the lock
- table size to the real buffer pool size. */
-
- srv_lock_table_size = 20 * srv_awe_window_size;
- } else {
- srv_lock_table_size = 5 * srv_pool_size;
- }
+ srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
return(DB_SUCCESS);
}
-/*************************************************************************
-Boots the InnoDB server. */
-
+/*********************************************************************//**
+Boots the InnoDB server.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
srv_boot(void)
/*==========*/
- /* out: DB_SUCCESS or error code */
{
ulint err;
@@ -1322,15 +1379,14 @@ srv_boot(void)
return(DB_SUCCESS);
}
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
Reserves a slot in the thread table for the current MySQL OS thread.
-NOTE! The kernel mutex has to be reserved by the caller! */
+NOTE! The kernel mutex has to be reserved by the caller!
+@return reserved slot */
static
srv_slot_t*
srv_table_reserve_slot_for_mysql(void)
/*==================================*/
- /* out: reserved slot */
{
srv_slot_t* slot;
ulint i;
@@ -1387,33 +1443,32 @@ srv_table_reserve_slot_for_mysql(void)
return(slot);
}
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
+/***************************************************************//**
Puts a MySQL OS thread to wait for a lock to be released. If an error
occurs during the wait trx->error_state associated with thr is
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
are possible errors. DB_DEADLOCK is returned if selective deadlock
resolution chose this transaction as a victim. */
-
+UNIV_INTERN
void
srv_suspend_mysql_thread(
/*=====================*/
- que_thr_t* thr) /* in: query thread associated with the MySQL
+ que_thr_t* thr) /*!< in: query thread associated with the MySQL
OS thread */
{
-#ifndef UNIV_HOTBACKUP
srv_slot_t* slot;
os_event_t event;
double wait_time;
trx_t* trx;
- ibool had_dict_lock = FALSE;
+ ulint had_dict_lock;
ibool was_declared_inside_innodb = FALSE;
- ib_longlong start_time = 0;
- ib_longlong finish_time;
+ ib_int64_t start_time = 0;
+ ib_int64_t finish_time;
ulint diff_time;
ulint sec;
ulint ms;
+ ulong lock_wait_timeout;
ut_ad(!mutex_own(&kernel_mutex));
@@ -1462,7 +1517,7 @@ srv_suspend_mysql_thread(
if (ut_usectime(&sec, &ms) == -1) {
start_time = -1;
} else {
- start_time = (ib_longlong)sec * 1000000 + ms;
+ start_time = (ib_int64_t) sec * 1000000 + ms;
}
}
/* Wake the lock timeout monitor thread, if it is suspended */
@@ -1482,23 +1537,35 @@ srv_suspend_mysql_thread(
srv_conc_force_exit_innodb(trx);
}
- /* Release possible foreign key check latch */
- if (trx->dict_operation_lock_mode == RW_S_LATCH) {
-
- had_dict_lock = TRUE;
+ had_dict_lock = trx->dict_operation_lock_mode;
+ switch (had_dict_lock) {
+ case RW_S_LATCH:
+ /* Release foreign key check latch */
row_mysql_unfreeze_data_dictionary(trx);
+ break;
+ case RW_X_LATCH:
+ /* Release fast index creation latch */
+ row_mysql_unlock_data_dictionary(trx);
+ break;
}
ut_a(trx->dict_operation_lock_mode == 0);
- /* Wait for the release */
+ /* Suspend this thread and wait for the event. */
os_event_wait(event);
- if (had_dict_lock) {
+ /* After resuming, reacquire the data dictionary latch if
+ necessary. */
+ switch (had_dict_lock) {
+ case RW_S_LATCH:
row_mysql_freeze_data_dictionary(trx);
+ break;
+ case RW_X_LATCH:
+ row_mysql_lock_data_dictionary(trx);
+ break;
}
if (was_declared_inside_innodb) {
@@ -1520,7 +1587,7 @@ srv_suspend_mysql_thread(
if (ut_usectime(&sec, &ms) == -1) {
finish_time = -1;
} else {
- finish_time = (ib_longlong)sec * 1000000 + ms;
+ finish_time = (ib_int64_t) sec * 1000000 + ms;
}
diff_time = (ulint) (finish_time - start_time);
@@ -1543,30 +1610,29 @@ srv_suspend_mysql_thread(
mutex_exit(&kernel_mutex);
- if (srv_lock_wait_timeout < 100000000
- && wait_time > (double)srv_lock_wait_timeout) {
+ /* InnoDB system transactions (such as the purge, and
+ incomplete transactions that are being rolled back after crash
+ recovery) will use the global value of
+ innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
+ lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
+
+ if (lock_wait_timeout < 100000000
+ && wait_time > (double) lock_wait_timeout) {
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
}
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* UNIV_HOTBACKUP */
}
-/************************************************************************
+/********************************************************************//**
Releases a MySQL OS thread waiting for a lock to be released, if the
thread is already suspended. */
-
+UNIV_INTERN
void
srv_release_mysql_thread_if_suspended(
/*==================================*/
- que_thr_t* thr) /* in: query thread associated with the
+ que_thr_t* thr) /*!< in: query thread associated with the
MySQL OS thread */
{
-#ifndef UNIV_HOTBACKUP
srv_slot_t* slot;
ulint i;
@@ -1586,16 +1652,9 @@ srv_release_mysql_thread_if_suspended(
}
/* not found */
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* UNIV_HOTBACKUP */
}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
+/******************************************************************//**
Refreshes the values used to calculate per-second averages. */
static
void
@@ -1623,16 +1682,16 @@ srv_refresh_innodb_monitor_stats(void)
mutex_exit(&srv_innodb_monitor_mutex);
}
-/**********************************************************************
+/******************************************************************//**
Outputs to a file the output of the InnoDB Monitor. */
-
+UNIV_INTERN
void
srv_printf_innodb_monitor(
/*======================*/
- FILE* file, /* in: output stream */
- ulint* trx_start, /* out: file position of the start of
+ FILE* file, /*!< in: output stream */
+ ulint* trx_start, /*!< out: file position of the start of
the list of active transactions */
- ulint* trx_end) /* out: file position of the end of
+ ulint* trx_end) /*!< out: file position of the end of
the list of active transactions */
{
double time_elapsed;
@@ -1662,6 +1721,11 @@ srv_printf_innodb_monitor(
(ulong)time_elapsed);
fputs("----------\n"
+ "BACKGROUND THREAD\n"
+ "----------\n", file);
+ srv_print_master_thread_info(file);
+
+ fputs("----------\n"
"SEMAPHORES\n"
"----------\n", file);
sync_print(file);
@@ -1737,13 +1801,6 @@ srv_printf_innodb_monitor(
fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
dict_sys->size);
- if (srv_use_awe) {
- fprintf(file,
- "In addition to that %lu MB of AWE memory allocated\n",
- (ulong) (srv_pool_size
- / ((1024 * 1024) / UNIV_PAGE_SIZE)));
- }
-
buf_print_io(file);
fputs("--------------\n"
@@ -1806,11 +1863,12 @@ srv_printf_innodb_monitor(
fflush(file);
}
-/**********************************************************************
+/******************************************************************//**
Function to pass InnoDB status variables to MySQL */
-
+UNIV_INTERN
void
srv_export_innodb_status(void)
+/*==========================*/
{
mutex_enter(&srv_innodb_monitor_mutex);
@@ -1826,14 +1884,16 @@ srv_export_innodb_status(void)
export_vars.innodb_data_reads = os_n_file_reads;
export_vars.innodb_data_writes = os_n_file_writes;
export_vars.innodb_data_written = srv_data_written;
- export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets;
+ export_vars.innodb_buffer_pool_read_requests = buf_pool->stat.n_page_gets;
export_vars.innodb_buffer_pool_write_requests
= srv_buf_pool_write_requests;
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
- export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd;
- export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq;
+ export_vars.innodb_buffer_pool_read_ahead
+ = buf_pool->stat.n_ra_pages_read;
+ export_vars.innodb_buffer_pool_read_ahead_evicted
+ = buf_pool->stat.n_ra_pages_evicted;
export_vars.innodb_buffer_pool_pages_data
= UT_LIST_GET_LEN(buf_pool->LRU);
export_vars.innodb_buffer_pool_pages_dirty
@@ -1846,9 +1906,14 @@ srv_export_innodb_status(void)
#endif /* UNIV_DEBUG */
export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
- export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size
+ export_vars.innodb_buffer_pool_pages_misc = buf_pool->curr_size
- UT_LIST_GET_LEN(buf_pool->LRU)
- UT_LIST_GET_LEN(buf_pool->free);
+#ifdef HAVE_ATOMIC_BUILTINS
+ export_vars.innodb_have_atomic_builtins = 1;
+#else
+ export_vars.innodb_have_atomic_builtins = 0;
+#endif
export_vars.innodb_page_size = UNIV_PAGE_SIZE;
export_vars.innodb_log_waits = srv_log_waits;
export_vars.innodb_os_log_written = srv_os_log_written;
@@ -1859,9 +1924,9 @@ srv_export_innodb_status(void)
export_vars.innodb_log_writes = srv_log_writes;
export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
export_vars.innodb_dblwr_writes = srv_dblwr_writes;
- export_vars.innodb_pages_created = buf_pool->n_pages_created;
- export_vars.innodb_pages_read = buf_pool->n_pages_read;
- export_vars.innodb_pages_written = buf_pool->n_pages_written;
+ export_vars.innodb_pages_created = buf_pool->stat.n_pages_created;
+ export_vars.innodb_pages_read = buf_pool->stat.n_pages_read;
+ export_vars.innodb_pages_written = buf_pool->stat.n_pages_written;
export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
export_vars.innodb_row_lock_current_waits
= srv_n_lock_wait_current_count;
@@ -1882,16 +1947,16 @@ srv_export_innodb_status(void)
mutex_exit(&srv_innodb_monitor_mutex);
}
-/*************************************************************************
+/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors. */
-
+This also prints the info output by various InnoDB monitors.
+@return a dummy parameter */
+UNIV_INTERN
os_thread_ret_t
srv_lock_timeout_and_monitor_thread(
/*================================*/
- /* out: a dummy parameter */
void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
+ /*!< in: a dummy parameter required by
os_thread_create */
{
srv_slot_t* slot;
@@ -2001,12 +2066,19 @@ loop:
slot = srv_mysql_table + i;
if (slot->in_use) {
+ trx_t* trx;
+ ulong lock_wait_timeout;
+
some_waits = TRUE;
wait_time = ut_difftime(ut_time(), slot->suspend_time);
- if (srv_lock_wait_timeout < 100000000
- && (wait_time > (double) srv_lock_wait_timeout
+ trx = thr_get_trx(slot->thr);
+ lock_wait_timeout = thd_lock_wait_timeout(
+ trx->mysql_thd);
+
+ if (lock_wait_timeout < 100000000
+ && (wait_time > (double) lock_wait_timeout
|| wait_time < 0)) {
/* Timeout exceeded or a wrap-around in system
@@ -2016,10 +2088,9 @@ loop:
possible that the lock has already been
granted: in that case do nothing */
- if (thr_get_trx(slot->thr)->wait_lock) {
+ if (trx->wait_lock) {
lock_cancel_waiting_and_release(
- thr_get_trx(slot->thr)
- ->wait_lock);
+ trx->wait_lock);
}
}
}
@@ -2063,22 +2134,22 @@ exit_func:
OS_THREAD_DUMMY_RETURN;
}
-/*************************************************************************
+/*********************************************************************//**
A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs. */
-
+too long. These can be used to track bugs which cause hangs.
+@return a dummy parameter */
+UNIV_INTERN
os_thread_ret_t
srv_error_monitor_thread(
/*=====================*/
- /* out: a dummy parameter */
void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
+ /*!< in: a dummy parameter required by
os_thread_create */
{
/* number of successive fatal timeouts observed */
- ulint fatal_cnt = 0;
- dulint old_lsn;
- dulint new_lsn;
+ ulint fatal_cnt = 0;
+ ib_uint64_t old_lsn;
+ ib_uint64_t new_lsn;
old_lsn = srv_start_lsn;
@@ -2094,18 +2165,15 @@ loop:
new_lsn = log_get_lsn();
- if (ut_dulint_cmp(new_lsn, old_lsn) < 0) {
+ if (new_lsn < old_lsn) {
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Error: old log sequence number %lu %lu"
+ " InnoDB: Error: old log sequence number %llu"
" was greater\n"
- "InnoDB: than the new log sequence number %lu %lu!\n"
+ "InnoDB: than the new log sequence number %llu!\n"
"InnoDB: Please submit a bug report"
" to http://bugs.mysql.com\n",
- (ulong) ut_dulint_get_high(old_lsn),
- (ulong) ut_dulint_get_low(old_lsn),
- (ulong) ut_dulint_get_high(new_lsn),
- (ulong) ut_dulint_get_low(new_lsn));
+ old_lsn, new_lsn);
}
old_lsn = new_lsn;
@@ -2117,10 +2185,17 @@ loop:
srv_refresh_innodb_monitor_stats();
}
+ /* Update the statistics collected for deciding LRU
+ eviction policy. */
+ buf_LRU_stat_update();
+
+ /* Update the statistics collected for flush rate policy. */
+ buf_flush_stat_update();
+
/* In case mutex_exit is not a memory barrier, it is
theoretically possible some threads are left waiting though
the semaphore is already released. Wake up those threads: */
-
+
sync_arr_wake_threads_if_sema_free();
if (sync_array_print_long_waits()) {
@@ -2162,13 +2237,13 @@ loop:
OS_THREAD_DUMMY_RETURN;
}
-/***********************************************************************
+/*******************************************************************//**
Tells the InnoDB server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
in the MySQL interface. Note that there is a small chance that the master
thread stays suspended (we do not protect our operation with the kernel
mutex, for performace reasons). */
-
+UNIV_INTERN
void
srv_active_wake_master_thread(void)
/*===============================*/
@@ -2185,9 +2260,9 @@ srv_active_wake_master_thread(void)
}
}
-/***********************************************************************
+/*******************************************************************//**
Wakes up the master thread if it is suspended or being suspended. */
-
+UNIV_INTERN
void
srv_wake_master_thread(void)
/*========================*/
@@ -2201,22 +2276,40 @@ srv_wake_master_thread(void)
mutex_exit(&kernel_mutex);
}
-/*************************************************************************
-The master thread controlling the server. */
+/**********************************************************************
+The master thread is tasked to ensure that flush of log file happens
+once every second in the background. This is to ensure that not more
+than one second of trxs are lost in case of crash when
+innodb_flush_logs_at_trx_commit != 1 */
+static
+void
+srv_sync_log_buffer_in_background(void)
+/*===================================*/
+{
+ time_t current_time = time(NULL);
+ srv_main_thread_op_info = "flushing log";
+ if (difftime(current_time, srv_last_log_flush_time) >= 1) {
+ log_buffer_sync_in_background(TRUE);
+ srv_last_log_flush_time = current_time;
+ srv_log_writes_and_flush++;
+ }
+}
+
+/*********************************************************************//**
+The master thread controlling the server.
+@return a dummy parameter */
+UNIV_INTERN
os_thread_ret_t
srv_master_thread(
/*==============*/
- /* out: a dummy parameter */
void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
+ /*!< in: a dummy parameter required by
os_thread_create */
{
os_event_t event;
- time_t last_flush_time;
- time_t current_time;
ulint old_activity_count;
- ulint n_pages_purged;
+ ulint n_pages_purged = 0;
ulint n_bytes_merged;
ulint n_pages_flushed;
ulint n_bytes_archived;
@@ -2250,8 +2343,8 @@ loop:
srv_main_thread_op_info = "reserving kernel mutex";
- n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
+ n_ios_very_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read
+ + buf_pool->stat.n_pages_written;
mutex_enter(&kernel_mutex);
/* Store the user activity counter at the start of this loop */
@@ -2267,16 +2360,19 @@ loop:
/* ---- We run the following loop approximately once per second
when there is database activity */
+ srv_last_log_flush_time = time(NULL);
skip_sleep = FALSE;
for (i = 0; i < 10; i++) {
- n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
+ n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read
+ + buf_pool->stat.n_pages_written;
srv_main_thread_op_info = "sleeping";
+ srv_main_1_second_loops++;
if (!skip_sleep) {
os_thread_sleep(1000000);
+ srv_main_sleeps++;
}
skip_sleep = FALSE;
@@ -2296,33 +2392,27 @@ loop:
goto background_loop;
}
- /* We flush the log once in a second even if no commit
- is issued or the we have specified in my.cnf no flush
- at transaction commit */
-
- srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
srv_main_thread_op_info = "making checkpoint";
log_free_check();
- /* If there were less than 5 i/os during the
- one second sleep, we assume that there is free
- disk i/o capacity available, and it makes sense to
- do an insert buffer merge. */
+ /* If i/os during one second sleep were less than 5% of
+ capacity, we assume that there is free disk i/o capacity
+ available, and it makes sense to do an insert buffer merge. */
n_pend_ios = buf_get_n_pending_ios()
+ log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
+ n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read
+ + buf_pool->stat.n_pages_written;
+ if (n_pend_ios < SRV_PEND_IO_THRESHOLD
+ && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(
- TRUE, srv_insert_buffer_batch_size / 4);
-
- srv_main_thread_op_info = "flushing log";
+ ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
- log_buffer_flush_to_disk();
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
}
if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
@@ -2331,8 +2421,11 @@ loop:
/* Try to keep the number of modified pages in the
buffer pool under the limit wished by the user */
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
- ut_dulint_max);
+ srv_main_thread_op_info =
+ "flushing buffer pool pages";
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+ PCT_IO(100),
+ IB_ULONGLONG_MAX);
/* If we had to do the flush, it may have taken
even more than 1 second, and also, there may be more
@@ -2340,6 +2433,24 @@ loop:
iteration of this loop. */
skip_sleep = TRUE;
+ } else if (srv_adaptive_flushing) {
+
+ /* Try to keep the rate of flushing of dirty
+ pages such that redo log generation does not
+ produce bursts of IO at checkpoint time. */
+ ulint n_flush = buf_flush_get_desired_flush_rate();
+
+ if (n_flush) {
+ srv_main_thread_op_info =
+ "flushing buffer pool pages";
+ n_flush = ut_min(PCT_IO(100), n_flush);
+ n_pages_flushed =
+ buf_flush_batch(
+ BUF_FLUSH_LIST,
+ n_flush,
+ IB_ULONGLONG_MAX);
+ skip_sleep = TRUE;
+ }
}
if (srv_activity_count == old_activity_count) {
@@ -2359,39 +2470,43 @@ loop:
seconds */
mem_validate_all_blocks();
#endif
- /* If there were less than 200 i/os during the 10 second period,
- we assume that there is free disk i/o capacity available, and it
- makes sense to flush 100 pages. */
+ /* If i/os during the 10 second period were less than 200% of
+ capacity, we assume that there is free disk i/o capacity
+ available, and it makes sense to flush srv_io_capacity pages.
+
+ Note that this is done regardless of the fraction of dirty
+ pages relative to the max requested by the user. The one second
+ loop above requests writes for that case. The writes done here
+ are not required, and may be disabled. */
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
+ n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read
+ + buf_pool->stat.n_pages_written;
+
+ srv_main_10_second_loops++;
+ if (n_pend_ios < SRV_PEND_IO_THRESHOLD
+ && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
srv_main_thread_op_info = "flushing buffer pool pages";
- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
+ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ IB_ULONGLONG_MAX);
- srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
}
/* We run a batch of insert buffer merge every 10 seconds,
even if the server were active */
srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
+ ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
- srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
/* We run a full purge every 10 seconds, even if the server
were active */
-
- n_pages_purged = 1;
-
- last_flush_time = time(NULL);
-
- while (n_pages_purged) {
+ do {
if (srv_fast_shutdown && srv_shutdown_state > 0) {
@@ -2401,15 +2516,10 @@ loop:
srv_main_thread_op_info = "purging";
n_pages_purged = trx_purge();
- current_time = time(NULL);
-
- if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = "flushing log";
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
- log_buffer_flush_to_disk();
- last_flush_time = current_time;
- }
- }
+ } while (n_pages_purged);
srv_main_thread_op_info = "flushing buffer pool pages";
@@ -2421,15 +2531,17 @@ loop:
(> 70 %), we assume we can afford reserving the disk(s) for
the time it requires to flush 100 pages */
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
- ut_dulint_max);
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+ PCT_IO(100),
+ IB_ULONGLONG_MAX);
} else {
/* Otherwise, we only flush a small number of pages so that
we do not unnecessarily use much disk i/o capacity from
other work */
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
- ut_dulint_max);
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+ PCT_IO(10),
+ IB_ULONGLONG_MAX);
}
srv_main_thread_op_info = "making checkpoint";
@@ -2462,7 +2574,7 @@ background_loop:
/* The server has been quiet for a while: start running background
operations */
-
+ srv_main_background_loops++;
srv_main_thread_op_info = "doing background drop tables";
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
@@ -2479,12 +2591,7 @@ background_loop:
srv_main_thread_op_info = "purging";
/* Run a full purge */
-
- n_pages_purged = 1;
-
- last_flush_time = time(NULL);
-
- while (n_pages_purged) {
+ do {
if (srv_fast_shutdown && srv_shutdown_state > 0) {
break;
@@ -2493,15 +2600,10 @@ background_loop:
srv_main_thread_op_info = "purging";
n_pages_purged = trx_purge();
- current_time = time(NULL);
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
- if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = "flushing log";
-
- log_buffer_flush_to_disk();
- last_flush_time = current_time;
- }
- }
+ } while (n_pages_purged);
srv_main_thread_op_info = "reserving kernel mutex";
@@ -2517,8 +2619,12 @@ background_loop:
if (srv_fast_shutdown && srv_shutdown_state > 0) {
n_bytes_merged = 0;
} else {
- n_bytes_merged = ibuf_contract_for_n_pages(
- TRUE, srv_insert_buffer_batch_size);
+ /* This should do an amount of IO similar to the number of
+ dirty pages that will be flushed in the call to
+ buf_flush_batch below. Otherwise, the system favors
+ clean pages over cleanup throughput. */
+ n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
+ PCT_IO(100));
}
srv_main_thread_op_info = "reserving kernel mutex";
@@ -2532,10 +2638,11 @@ background_loop:
flush_loop:
srv_main_thread_op_info = "flushing buffer pool pages";
-
+ srv_main_flush_loops++;
if (srv_fast_shutdown < 2) {
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
- ut_dulint_max);
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+ PCT_IO(100),
+ IB_ULONGLONG_MAX);
} else {
/* In the fastest shutdown we do not flush the buffer pool
to data files: we set n_pages_flushed to 0 artificially. */
@@ -2555,9 +2662,8 @@ flush_loop:
srv_main_thread_op_info = "waiting for buffer pool flush to end";
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
- srv_main_thread_op_info = "flushing log";
-
- log_buffer_flush_to_disk();
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
srv_main_thread_op_info = "making checkpoint";
@@ -2650,4 +2756,3 @@ suspend_thread:
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
index ea88039f3dd..d5f6120ca31 100644
--- a/storage/innobase/srv/srv0start.c
+++ b/storage/innobase/srv/srv0start.c
@@ -1,128 +1,175 @@
-/************************************************************************
-Starts the InnoDB database server
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+/***********************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
-(c) 1996-2000 Innobase Oy
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+***********************************************************************/
+
+/********************************************************************//**
+@file srv/srv0start.c
+Starts the InnoDB database server
Created 2/16/1996 Heikki Tuuri
*************************************************************************/
-#include "os0proc.h"
-#include "sync0sync.h"
#include "ut0mem.h"
#include "mem0mem.h"
-#include "mem0pool.h"
#include "data0data.h"
#include "data0type.h"
#include "dict0dict.h"
#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0rea.h"
#include "os0file.h"
#include "os0thread.h"
#include "fil0fil.h"
#include "fsp0fsp.h"
#include "rem0rec.h"
-#include "rem0cmp.h"
#include "mtr0mtr.h"
#include "log0log.h"
#include "log0recv.h"
#include "page0page.h"
#include "page0cur.h"
#include "trx0trx.h"
-#include "dict0boot.h"
-#include "dict0load.h"
#include "trx0sys.h"
-#include "dict0crea.h"
#include "btr0btr.h"
-#include "btr0pcur.h"
#include "btr0cur.h"
-#include "btr0sea.h"
#include "rem0rec.h"
-#include "srv0srv.h"
-#include "que0que.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "trx0purge.h"
-#include "row0ins.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "row0mysql.h"
-#include "lock0lock.h"
#include "ibuf0ibuf.h"
-#include "pars0pars.h"
-#include "btr0sea.h"
#include "srv0start.h"
-#include "que0que.h"
-
-/* Log sequence number immediately after startup */
-dulint srv_start_lsn;
-/* Log sequence number at shutdown */
-dulint srv_shutdown_lsn;
+#include "srv0srv.h"
+#ifndef UNIV_HOTBACKUP
+# include "os0proc.h"
+# include "sync0sync.h"
+# include "buf0flu.h"
+# include "buf0rea.h"
+# include "dict0boot.h"
+# include "dict0load.h"
+# include "que0que.h"
+# include "usr0sess.h"
+# include "lock0lock.h"
+# include "trx0roll.h"
+# include "trx0purge.h"
+# include "lock0lock.h"
+# include "pars0pars.h"
+# include "btr0sea.h"
+# include "rem0cmp.h"
+# include "dict0crea.h"
+# include "row0ins.h"
+# include "row0sel.h"
+# include "row0upd.h"
+# include "row0row.h"
+# include "row0mysql.h"
+# include "btr0pcur.h"
+# include "thr0loc.h"
+# include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
+
+/** Log sequence number immediately after startup */
+UNIV_INTERN ib_uint64_t srv_start_lsn;
+/** Log sequence number at shutdown */
+UNIV_INTERN ib_uint64_t srv_shutdown_lsn;
#ifdef HAVE_DARWIN_THREADS
# include <sys/utsname.h>
-ibool srv_have_fullfsync = FALSE;
+/** TRUE if the F_FULLFSYNC option is available */
+UNIV_INTERN ibool srv_have_fullfsync = FALSE;
#endif
-ibool srv_start_raw_disk_in_use = FALSE;
-
-ulint srv_sizeof_trx_t_in_ha_innodb_cc;
-
-ibool srv_startup_is_before_trx_rollback_phase = FALSE;
-ibool srv_is_being_started = FALSE;
-#ifndef UNIV_HOTBACKUP
+/** TRUE if a raw partition is in use */
+UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE;
+
+/** TRUE if the server is being started, before rolling back any
+incomplete transactions */
+UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE;
+/** TRUE if the server is being started */
+UNIV_INTERN ibool srv_is_being_started = FALSE;
+/** TRUE if the server was successfully started */
+UNIV_INTERN ibool srv_was_started = FALSE;
+/** TRUE if innobase_start_or_create_for_mysql() has been called */
static ibool srv_start_has_been_called = FALSE;
-static ibool srv_was_started = FALSE;
-#endif /* !UNIV_HOTBACKUP */
-/* At a shutdown the value first climbs to SRV_SHUTDOWN_CLEANUP
-and then to SRV_SHUTDOWN_LAST_PHASE */
-ulint srv_shutdown_state = 0;
+/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
+SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
+UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
-#ifndef UNIV_HOTBACKUP
+/** Files comprising the system tablespace */
static os_file_t files[1000];
+/** Mutex protecting the ios count */
static mutex_t ios_mutex;
+/** Count of I/O operations in io_handler_thread() */
static ulint ios;
+/** io_handler_thread parameters for thread identification */
static ulint n[SRV_MAX_N_IO_THREADS + 5];
+/** io_handler_thread identifiers */
static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5];
-/* We use this mutex to test the return value of pthread_mutex_trylock
+/** We use this mutex to test the return value of pthread_mutex_trylock
on successful locking. HP-UX does NOT return 0, though Linux et al do. */
static os_fast_mutex_t srv_os_test_mutex;
-/* Name of srv_monitor_file */
+/** Name of srv_monitor_file */
static char* srv_monitor_file_name;
#endif /* !UNIV_HOTBACKUP */
+/** */
#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
#define SRV_MAX_N_PENDING_SYNC_IOS 100
-/* Avoid warnings when using purify */
-
-#ifdef HAVE_purify
-static int inno_bcmp(register const char *s1, register const char *s2,
- register uint len)
-{
- while ((len-- != 0) && (*s1++ == *s2++))
- ;
-
- return(len + 1);
-}
-#define memcmp(A,B,C) inno_bcmp((A),(B),(C))
-#endif
-
+/*********************************************************************//**
+Convert a numeric string that optionally ends in G or M, to a number
+containing megabytes.
+@return next character in string */
static
char*
srv_parse_megabytes(
/*================*/
- /* out: next character in string */
- char* str, /* in: string containing a quantity in bytes */
- ulint* megs) /* out: the number in megabytes */
+ char* str, /*!< in: string containing a quantity in bytes */
+ ulint* megs) /*!< out: the number in megabytes */
{
char* endp;
ulint size;
@@ -147,36 +194,26 @@ srv_parse_megabytes(
return(str);
}
-/*************************************************************************
+/*********************************************************************//**
Reads the data files and their sizes from a character string given in
-the .cnf file. */
-
+the .cnf file.
+@return TRUE if ok, FALSE on parse error */
+UNIV_INTERN
ibool
srv_parse_data_file_paths_and_sizes(
/*================================*/
- /* out: TRUE if ok, FALSE if parsing
- error */
- char* str, /* in: the data file path string */
- char*** data_file_names, /* out, own: array of data file
- names */
- ulint** data_file_sizes, /* out, own: array of data file sizes
- in megabytes */
- ulint** data_file_is_raw_partition,/* out, own: array of flags
- showing which data files are raw
- partitions */
- ulint* n_data_files, /* out: number of data files */
- ibool* is_auto_extending, /* out: TRUE if the last data file is
- auto-extending */
- ulint* max_auto_extend_size) /* out: max auto extend size for the
- last file if specified, 0 if not */
+ char* str) /*!< in/out: the data file path string */
{
char* input_str;
char* path;
ulint size;
ulint i = 0;
- *is_auto_extending = FALSE;
- *max_auto_extend_size = 0;
+ srv_auto_extend_last_data_file = FALSE;
+ srv_last_file_size_max = 0;
+ srv_data_file_names = NULL;
+ srv_data_file_sizes = NULL;
+ srv_data_file_is_raw_partition = NULL;
input_str = str;
@@ -253,11 +290,12 @@ srv_parse_data_file_paths_and_sizes(
return(FALSE);
}
- *data_file_names = (char**)ut_malloc(i * sizeof(void*));
- *data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint));
- *data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint));
+ srv_data_file_names = malloc(i * sizeof *srv_data_file_names);
+ srv_data_file_sizes = malloc(i * sizeof *srv_data_file_sizes);
+ srv_data_file_is_raw_partition = malloc(
+ i * sizeof *srv_data_file_is_raw_partition);
- *n_data_files = i;
+ srv_n_data_files = i;
/* Then store the actual values to our arrays */
@@ -287,13 +325,13 @@ srv_parse_data_file_paths_and_sizes(
str = srv_parse_megabytes(str, &size);
- (*data_file_names)[i] = path;
- (*data_file_sizes)[i] = size;
+ srv_data_file_names[i] = path;
+ srv_data_file_sizes[i] = size;
if (0 == strncmp(str, ":autoextend",
(sizeof ":autoextend") - 1)) {
- *is_auto_extending = TRUE;
+ srv_auto_extend_last_data_file = TRUE;
str += (sizeof ":autoextend") - 1;
@@ -303,7 +341,7 @@ srv_parse_data_file_paths_and_sizes(
str += (sizeof ":max:") - 1;
str = srv_parse_megabytes(
- str, max_auto_extend_size);
+ str, &srv_last_file_size_max);
}
if (*str != '\0') {
@@ -312,21 +350,21 @@ srv_parse_data_file_paths_and_sizes(
}
}
- (*data_file_is_raw_partition)[i] = 0;
+ (srv_data_file_is_raw_partition)[i] = 0;
if (strlen(str) >= 6
&& *str == 'n'
&& *(str + 1) == 'e'
&& *(str + 2) == 'w') {
str += 3;
- (*data_file_is_raw_partition)[i] = SRV_NEW_RAW;
+ (srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
}
if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
str += 3;
- if ((*data_file_is_raw_partition)[i] == 0) {
- (*data_file_is_raw_partition)[i] = SRV_OLD_RAW;
+ if ((srv_data_file_is_raw_partition)[i] == 0) {
+ (srv_data_file_is_raw_partition)[i] = SRV_OLD_RAW;
}
}
@@ -340,22 +378,22 @@ srv_parse_data_file_paths_and_sizes(
return(TRUE);
}
-/*************************************************************************
+/*********************************************************************//**
Reads log group home directories from a character string given in
-the .cnf file. */
-
+the .cnf file.
+@return TRUE if ok, FALSE on parse error */
+UNIV_INTERN
ibool
srv_parse_log_group_home_dirs(
/*==========================*/
- /* out: TRUE if ok, FALSE if parsing
- error */
- char* str, /* in: character string */
- char*** log_group_home_dirs) /* out, own: log group home dirs */
+ char* str) /*!< in/out: character string */
{
char* input_str;
char* path;
ulint i = 0;
+ srv_log_group_home_dirs = NULL;
+
input_str = str;
/* First calculate the number of directories and check syntax:
@@ -385,7 +423,7 @@ srv_parse_log_group_home_dirs(
return(FALSE);
}
- *log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*));
+ srv_log_group_home_dirs = malloc(i * sizeof *srv_log_group_home_dirs);
/* Then store the actual values to our array */
@@ -404,7 +442,7 @@ srv_parse_log_group_home_dirs(
str++;
}
- (*log_group_home_dirs)[i] = path;
+ srv_log_group_home_dirs[i] = path;
i++;
}
@@ -412,15 +450,34 @@ srv_parse_log_group_home_dirs(
return(TRUE);
}
+/*********************************************************************//**
+Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
+and srv_parse_log_group_home_dirs(). */
+UNIV_INTERN
+void
+srv_free_paths_and_sizes(void)
+/*==========================*/
+{
+ free(srv_data_file_names);
+ srv_data_file_names = NULL;
+ free(srv_data_file_sizes);
+ srv_data_file_sizes = NULL;
+ free(srv_data_file_is_raw_partition);
+ srv_data_file_is_raw_partition = NULL;
+ free(srv_log_group_home_dirs);
+ srv_log_group_home_dirs = NULL;
+}
+
#ifndef UNIV_HOTBACKUP
-/************************************************************************
-I/o-handler thread function. */
+/********************************************************************//**
+I/o-handler thread function.
+@return OS_THREAD_DUMMY_RETURN */
static
-
os_thread_ret_t
io_handler_thread(
/*==============*/
- void* arg)
+ void* arg) /*!< in: pointer to the number of the segment in
+ the aio array */
{
ulint segment;
ulint i;
@@ -439,6 +496,8 @@ io_handler_thread(
mutex_exit(&ios_mutex);
}
+ thr_local_free(os_thread_get_curr_id());
+
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit.
The thread actually never comes here because it is exited in an
@@ -456,13 +515,13 @@ io_handler_thread(
#define SRV_PATH_SEPARATOR '/'
#endif
-/*************************************************************************
+/*********************************************************************//**
Normalizes a directory path for Windows: converts slashes to backslashes. */
-
+UNIV_INTERN
void
srv_normalize_path_for_win(
/*=======================*/
- char* str __attribute__((unused))) /* in/out: null-terminated
+ char* str __attribute__((unused))) /*!< in/out: null-terminated
character string */
{
#ifdef __WIN__
@@ -475,96 +534,72 @@ srv_normalize_path_for_win(
#endif
}
-/*************************************************************************
-Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty. */
-
-char*
-srv_add_path_separator_if_needed(
-/*=============================*/
- /* out: string which has the separator if the
- string is not empty */
- char* str) /* in: null-terminated character string */
-{
- char* out_str;
- ulint len = ut_strlen(str);
-
- if (len == 0 || str[len - 1] == SRV_PATH_SEPARATOR) {
-
- return(str);
- }
-
- out_str = ut_malloc(len + 2);
- memcpy(out_str, str, len);
- out_str[len] = SRV_PATH_SEPARATOR;
- out_str[len + 1] = 0;
-
- return(out_str);
-}
-
#ifndef UNIV_HOTBACKUP
-/*************************************************************************
+/*********************************************************************//**
Calculates the low 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes. */
+database pages is converted to the number of bytes.
+@return low 32 bytes of file size when expressed in bytes */
static
ulint
srv_calc_low32(
/*===========*/
- /* out: low 32 bytes of file size when
- expressed in bytes */
- ulint file_size) /* in: file size in database pages */
+ ulint file_size) /*!< in: file size in database pages */
{
return(0xFFFFFFFFUL & (file_size << UNIV_PAGE_SIZE_SHIFT));
}
-/*************************************************************************
+/*********************************************************************//**
Calculates the high 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes. */
+database pages is converted to the number of bytes.
+@return high 32 bytes of file size when expressed in bytes */
static
ulint
srv_calc_high32(
/*============*/
- /* out: high 32 bytes of file size when
- expressed in bytes */
- ulint file_size) /* in: file size in database pages */
+ ulint file_size) /*!< in: file size in database pages */
{
return(file_size >> (32 - UNIV_PAGE_SIZE_SHIFT));
}
-/*************************************************************************
-Creates or opens the log files and closes them. */
+/*********************************************************************//**
+Creates or opens the log files and closes them.
+@return DB_SUCCESS or error code */
static
ulint
open_or_create_log_file(
/*====================*/
- /* out: DB_SUCCESS or error code */
- ibool create_new_db, /* in: TRUE if we should create a
+ ibool create_new_db, /*!< in: TRUE if we should create a
new database */
- ibool* log_file_created, /* out: TRUE if new log file
+ ibool* log_file_created, /*!< out: TRUE if new log file
created */
- ibool log_file_has_been_opened,/* in: TRUE if a log file has been
+ ibool log_file_has_been_opened,/*!< in: TRUE if a log file has been
opened before: then it is an error
to try to create another log file */
- ulint k, /* in: log group number */
- ulint i) /* in: log file number in group */
+ ulint k, /*!< in: log group number */
+ ulint i) /*!< in: log file number in group */
{
ibool ret;
ulint size;
ulint size_high;
char name[10000];
+ ulint dirnamelen;
UT_NOT_USED(create_new_db);
*log_file_created = FALSE;
srv_normalize_path_for_win(srv_log_group_home_dirs[k]);
- srv_log_group_home_dirs[k] = srv_add_path_separator_if_needed(
- srv_log_group_home_dirs[k]);
- ut_a(strlen(srv_log_group_home_dirs[k])
- < (sizeof name) - 10 - sizeof "ib_logfile");
- sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k],
- "ib_logfile", (ulong) i);
+ dirnamelen = strlen(srv_log_group_home_dirs[k]);
+ ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
+ memcpy(name, srv_log_group_home_dirs[k], dirnamelen);
+
+ /* Add a path separator if needed. */
+ if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+ name[dirnamelen++] = SRV_PATH_SEPARATOR;
+ }
+
+ sprintf(name + dirnamelen, "%s%lu", "ib_logfile", (ulong) i);
files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL,
OS_LOG_FILE, &ret);
@@ -653,7 +688,7 @@ open_or_create_log_file(
which is for this log group */
fil_space_create(name,
- 2 * k + SRV_LOG_SPACE_FIRST_ID, FIL_LOG);
+ 2 * k + SRV_LOG_SPACE_FIRST_ID, 0, FIL_LOG);
}
ut_a(fil_validate());
@@ -668,7 +703,7 @@ open_or_create_log_file(
if (k == 0 && i == 0) {
arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
- fil_space_create("arch_log_space", arch_space_id, FIL_LOG);
+ fil_space_create("arch_log_space", arch_space_id, 0, FIL_LOG);
} else {
arch_space_id = ULINT_UNDEFINED;
}
@@ -684,24 +719,27 @@ open_or_create_log_file(
return(DB_SUCCESS);
}
-/*************************************************************************
-Creates or opens database data files and closes them. */
+/*********************************************************************//**
+Creates or opens database data files and closes them.
+@return DB_SUCCESS or error code */
static
ulint
open_or_create_data_files(
/*======================*/
- /* out: DB_SUCCESS or error code */
- ibool* create_new_db, /* out: TRUE if new database should be
- created */
+ ibool* create_new_db, /*!< out: TRUE if new database should be
+ created */
#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no,/* out: min of archived log numbers in data
- files */
- ulint* max_arch_log_no,/* out: */
+ ulint* min_arch_log_no,/*!< out: min of archived log
+ numbers in data files */
+ ulint* max_arch_log_no,/*!< out: max of archived log
+ numbers in data files */
#endif /* UNIV_LOG_ARCHIVE */
- dulint* min_flushed_lsn,/* out: min of flushed lsn values in data
- files */
- dulint* max_flushed_lsn,/* out: */
- ulint* sum_of_new_sizes)/* out: sum of sizes of the new files added */
+ ib_uint64_t* min_flushed_lsn,/*!< out: min of flushed lsn
+ values in data files */
+ ib_uint64_t* max_flushed_lsn,/*!< out: max of flushed lsn
+ values in data files */
+ ulint* sum_of_new_sizes)/*!< out: sum of sizes of the
+ new files added */
{
ibool ret;
ulint i;
@@ -724,14 +762,22 @@ open_or_create_data_files(
*create_new_db = FALSE;
srv_normalize_path_for_win(srv_data_home);
- srv_data_home = srv_add_path_separator_if_needed(srv_data_home);
for (i = 0; i < srv_n_data_files; i++) {
+ ulint dirnamelen;
+
srv_normalize_path_for_win(srv_data_file_names[i]);
+ dirnamelen = strlen(srv_data_home);
- ut_a(strlen(srv_data_home) + strlen(srv_data_file_names[i])
+ ut_a(dirnamelen + strlen(srv_data_file_names[i])
< (sizeof name) - 1);
- sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]);
+ memcpy(name, srv_data_home, dirnamelen);
+ /* Add a path separator if needed. */
+ if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+ name[dirnamelen++] = SRV_PATH_SEPARATOR;
+ }
+
+ strcpy(name + dirnamelen, srv_data_file_names[i]);
if (srv_data_file_is_raw_partition[i] == 0) {
@@ -937,18 +983,13 @@ skip_size_check:
ut_a(ret);
if (i == 0) {
- fil_space_create(name, 0, FIL_TABLESPACE);
+ fil_space_create(name, 0, 0, FIL_TABLESPACE);
}
ut_a(fil_validate());
- if (srv_data_file_is_raw_partition[i]) {
-
- fil_node_create(name, srv_data_file_sizes[i], 0, TRUE);
- } else {
- fil_node_create(name, srv_data_file_sizes[i], 0,
- FALSE);
- }
+ fil_node_create(name, srv_data_file_sizes[i], 0,
+ srv_data_file_is_raw_partition[i] != 0);
}
ios = 0;
@@ -960,32 +1001,33 @@ skip_size_check:
/********************************************************************
Starts InnoDB and creates a new database if database files
-are not found and the user wants. Server parameters are
-read from a file of name "srv_init" in the ib_home directory. */
-
+are not found and the user wants.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
int
innobase_start_or_create_for_mysql(void)
/*====================================*/
- /* out: DB_SUCCESS or error code */
{
buf_pool_t* ret;
- ibool create_new_db;
- ibool log_file_created;
- ibool log_created = FALSE;
- ibool log_opened = FALSE;
- dulint min_flushed_lsn;
- dulint max_flushed_lsn;
+ ibool create_new_db;
+ ibool log_file_created;
+ ibool log_created = FALSE;
+ ibool log_opened = FALSE;
+ ib_uint64_t min_flushed_lsn;
+ ib_uint64_t max_flushed_lsn;
#ifdef UNIV_LOG_ARCHIVE
- ulint min_arch_log_no;
- ulint max_arch_log_no;
+ ulint min_arch_log_no;
+ ulint max_arch_log_no;
#endif /* UNIV_LOG_ARCHIVE */
- ulint sum_of_new_sizes;
- ulint sum_of_data_file_sizes;
- ulint tablespace_size_in_header;
- ulint err;
- ulint i;
- ibool srv_file_per_table_original_value = srv_file_per_table;
- mtr_t mtr;
+ ulint sum_of_new_sizes;
+ ulint sum_of_data_file_sizes;
+ ulint tablespace_size_in_header;
+ ulint err;
+ ulint i;
+ ulint io_limit;
+ my_bool srv_file_per_table_original_value
+ = srv_file_per_table;
+ mtr_t mtr;
#ifdef HAVE_DARWIN_THREADS
# ifdef F_FULLFSYNC
/* This executable has been compiled on Mac OS X 10.3 or later.
@@ -1019,8 +1061,11 @@ innobase_start_or_create_for_mysql(void)
(ulong)sizeof(ulint), (ulong)sizeof(void*));
}
- srv_file_per_table = FALSE; /* system tables are created in tablespace
- 0 */
+ /* System tables are created in tablespace 0. Thus, we must
+ temporarily clear srv_file_per_table. This is ok, because the
+ server will not accept connections (which could modify
+ innodb_file_per_table) until this function has returned. */
+ srv_file_per_table = FALSE;
#ifdef UNIV_DEBUG
fprintf(stderr,
"InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
@@ -1042,27 +1087,22 @@ innobase_start_or_create_for_mysql(void)
"InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
#endif
+#ifdef UNIV_LOG_LSN_DEBUG
+ fprintf(stderr,
+ "InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
+#endif /* UNIV_LOG_LSN_DEBUG */
#ifdef UNIV_MEM_DEBUG
fprintf(stderr,
"InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
#endif
-#ifdef UNIV_SIMULATE_AWE
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!\n");
-#endif
- if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
+ if (UNIV_LIKELY(srv_use_sys_malloc)) {
fprintf(stderr,
- "InnoDB: Error: trx_t size is %lu in ha_innodb.cc"
- " but %lu in srv0start.c\n"
- "InnoDB: Check that pthread_mutex_t is defined"
- " in the same way in these\n"
- "InnoDB: compilation modules. Cannot continue.\n",
- (ulong) srv_sizeof_trx_t_in_ha_innodb_cc,
- (ulong) sizeof(trx_t));
- return(DB_ERROR);
+ "InnoDB: The InnoDB memory heap is disabled\n");
}
+ fprintf(stderr, "InnoDB: %s\n", IB_ATOMICS_STARTUP_MSG);
+
/* Since InnoDB does not currently clean up all its internal data
structures in MySQL Embedded Server Library server_end(), we
print an error message if someone tries to start up InnoDB a
@@ -1070,7 +1110,7 @@ innobase_start_or_create_for_mysql(void)
if (srv_start_has_been_called) {
fprintf(stderr,
- "InnoDB: Error:startup called second time"
+ "InnoDB: Error: startup called second time"
" during the process lifetime.\n"
"InnoDB: In the MySQL Embedded Server Library"
" you cannot call server_init()\n"
@@ -1089,35 +1129,22 @@ innobase_start_or_create_for_mysql(void)
srv_startup_is_before_trx_rollback_phase = TRUE;
os_aio_use_native_aio = FALSE;
-#if !defined(__WIN2000__) && !defined(UNIV_SIMULATE_AWE)
- if (srv_use_awe) {
-
- fprintf(stderr,
- "InnoDB: Error: You have specified"
- " innodb_buffer_pool_awe_mem_mb\n"
- "InnoDB: in my.cnf, but AWE can only"
- " be used in Windows 2000 and later.\n"
- "InnoDB: To use AWE, InnoDB must"
- " be compiled with __WIN2000__ defined.\n");
-
- return(DB_ERROR);
- }
-#endif
-
#ifdef __WIN__
- if (os_get_os_version() == OS_WIN95
- || os_get_os_version() == OS_WIN31
- || os_get_os_version() == OS_WINNT) {
-
+ switch (os_get_os_version()) {
+ case OS_WIN95:
+ case OS_WIN31:
+ case OS_WINNT:
/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
and NT use simulated aio. In NT Windows provides async i/o,
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
- } else {
+ break;
+ default:
/* On Win 2000 and XP use async i/o */
os_aio_use_native_aio = TRUE;
+ break;
}
#endif
if (srv_file_flush_method_str == NULL) {
@@ -1163,7 +1190,7 @@ innobase_start_or_create_for_mysql(void)
}
/* Note that the call srv_boot() also changes the values of
- srv_pool_size etc. to the units used by InnoDB internally */
+ some variables to the units used by InnoDB internally */
/* Set the maximum number of threads which can wait for a semaphore
inside InnoDB: this is the 'sync wait array' size, as well as the
@@ -1178,15 +1205,12 @@ innobase_start_or_create_for_mysql(void)
NetWare. */
srv_max_n_threads = 1000;
#else
- if (srv_pool_size >= 1000 * 1024) {
- /* Here we still have srv_pool_size counted
- in kilobytes (in 4.0 this was in bytes)
- srv_boot() converts the value to
- pages; if buffer pool is less than 1000 MB,
+ if (srv_buf_pool_size >= 1000 * 1024 * 1024) {
+ /* If buffer pool is less than 1000 MB,
assume fewer threads. */
srv_max_n_threads = 50000;
- } else if (srv_pool_size >= 8 * 1024) {
+ } else if (srv_buf_pool_size >= 8 * 1024 * 1024) {
srv_max_n_threads = 10000;
} else {
@@ -1195,7 +1219,7 @@ innobase_start_or_create_for_mysql(void)
computers */
}
#endif
- err = srv_boot(); /* This changes srv_pool_size to units of a page */
+ err = srv_boot();
if (err != DB_SUCCESS) {
@@ -1238,48 +1262,39 @@ innobase_start_or_create_for_mysql(void)
return(DB_ERROR);
}
- /* Restrict the maximum number of file i/o threads */
- if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
-
- srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
+ /* If user has set the value of innodb_file_io_threads then
+ we'll emit a message telling the user that this parameter
+ is now deprecated. */
+ if (srv_n_file_io_threads != 4) {
+ fprintf(stderr, "InnoDB: Warning:"
+ " innodb_file_io_threads is deprecated."
+ " Please use innodb_read_io_threads and"
+ " innodb_write_io_threads instead\n");
}
- if (!os_aio_use_native_aio) {
- /* In simulated aio we currently have use only for 4 threads */
- srv_n_file_io_threads = 4;
+ /* Now overwrite the value on srv_n_file_io_threads */
+ srv_n_file_io_threads = 2 + srv_n_read_io_threads
+ + srv_n_write_io_threads;
+
+ ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
- os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
- * srv_n_file_io_threads,
- srv_n_file_io_threads,
- SRV_MAX_N_PENDING_SYNC_IOS);
+ /* TODO: Investigate if SRV_N_PENDING_IOS_PER_THREAD (32) limit
+ still applies to windows. */
+ if (!os_aio_use_native_aio) {
+ io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
} else {
- os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
- * srv_n_file_io_threads,
- srv_n_file_io_threads,
- SRV_MAX_N_PENDING_SYNC_IOS);
+ io_limit = SRV_N_PENDING_IOS_PER_THREAD;
}
- fil_init(srv_max_n_open_files);
+ os_aio_init(io_limit,
+ srv_n_read_io_threads,
+ srv_n_write_io_threads,
+ SRV_MAX_N_PENDING_SYNC_IOS);
- if (srv_use_awe) {
- fprintf(stderr,
- "InnoDB: Using AWE: Memory window is %lu MB"
- " and AWE memory is %lu MB\n",
- (ulong) (srv_awe_window_size / ((1024 * 1024)
- / UNIV_PAGE_SIZE)),
- (ulong) (srv_pool_size / ((1024 * 1024)
- / UNIV_PAGE_SIZE)));
-
- /* We must disable adaptive hash indexes because they do not
- tolerate remapping of pages in AWE */
-
- srv_use_adaptive_hash_indexes = FALSE;
- ret = buf_pool_init(srv_pool_size, srv_pool_size,
- srv_awe_window_size);
- } else {
- ret = buf_pool_init(srv_pool_size, srv_pool_size,
- srv_pool_size);
- }
+ fil_init(srv_file_per_table ? 50000 : 5000,
+ srv_max_n_open_files);
+
+ ret = buf_pool_init();
if (ret == NULL) {
fprintf(stderr,
@@ -1289,6 +1304,19 @@ innobase_start_or_create_for_mysql(void)
return(DB_ERROR);
}
+#ifdef UNIV_DEBUG
+ /* We have observed deadlocks with a 5MB buffer pool but
+ the actual lower limit could very well be a little higher. */
+
+ if (srv_buf_pool_size <= 5 * 1024 * 1024) {
+
+ fprintf(stderr, "InnoDB: Warning: Small buffer pool size "
+ "(%luM), the flst_validate() debug function "
+ "can cause a deadlock if the buffer pool fills up.\n",
+ srv_buf_pool_size / 1024 / 1024);
+ }
+#endif
+
fsp_init();
log_init();
@@ -1338,7 +1366,7 @@ innobase_start_or_create_for_mysql(void)
sum_of_new_sizes += srv_data_file_sizes[i];
}
- if (sum_of_new_sizes < 640) {
+ if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
fprintf(stderr,
"InnoDB: Error: tablespace size must be"
" at least 10 MB\n");
@@ -1418,7 +1446,7 @@ innobase_start_or_create_for_mysql(void)
&& !srv_archive_recovery
#endif /* UNIV_LOG_ARCHIVE */
) {
- if (ut_dulint_cmp(max_flushed_lsn, min_flushed_lsn) != 0
+ if (max_flushed_lsn != min_flushed_lsn
#ifdef UNIV_LOG_ARCHIVE
|| max_arch_log_no != min_arch_log_no
#endif /* UNIV_LOG_ARCHIVE */
@@ -1433,8 +1461,7 @@ innobase_start_or_create_for_mysql(void)
return(DB_ERROR);
}
- if (ut_dulint_cmp(max_flushed_lsn, ut_dulint_create(0, 1000))
- < 0) {
+ if (max_flushed_lsn < (ib_uint64_t) 1000) {
fprintf(stderr,
"InnoDB: Cannot initialize created"
" log files because\n"
@@ -1462,9 +1489,10 @@ innobase_start_or_create_for_mysql(void)
mutex_exit(&(log_sys->mutex));
}
+ trx_sys_file_format_init();
+
if (create_new_db) {
mtr_start(&mtr);
-
fsp_header_init(0, sum_of_new_sizes, &mtr);
mtr_commit(&mtr);
@@ -1494,16 +1522,43 @@ innobase_start_or_create_for_mysql(void)
/* Initialize the fsp free limit global variable in the log
system */
- fsp_header_get_free_limit(0);
+ fsp_header_get_free_limit();
recv_recovery_from_archive_finish();
#endif /* UNIV_LOG_ARCHIVE */
} else {
+
+ /* Check if we support the max format that is stamped
+ on the system tablespace.
+ Note: We are NOT allowed to make any modifications to
+ the TRX_SYS_PAGE_NO page before recovery because this
+ page also contains the max_trx_id etc. important system
+ variables that are required for recovery. We need to
+ ensure that we return the system to a state where normal
+ recovery is guaranteed to work. We do this by
+ invalidating the buffer cache, this will force the
+ reread of the page and restoration to its last known
+ consistent state, this is REQUIRED for the recovery
+ process to work. */
+ err = trx_sys_file_format_max_check(
+ srv_check_file_format_at_startup);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Invalidate the buffer pool to ensure that we reread
+ the page that we read above, during recovery.
+ Note that this is not as heavy weight as it seems. At
+ this point there will be only ONE page in the buf_LRU
+ and there must be no page in the buf_flush list. */
+ buf_pool_invalidate();
+
/* We always try to do a recovery, even if the database had
been shut down normally: this is the normal startup path */
err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
- ut_dulint_max,
+ IB_ULONGLONG_MAX,
min_flushed_lsn,
max_flushed_lsn);
if (err != DB_SUCCESS) {
@@ -1549,12 +1604,19 @@ innobase_start_or_create_for_mysql(void)
/* Initialize the fsp free limit global variable in the log
system */
- fsp_header_get_free_limit(0);
+ fsp_header_get_free_limit();
/* recv_recovery_from_checkpoint_finish needs trx lists which
are initialized in trx_sys_init_at_db_start(). */
recv_recovery_from_checkpoint_finish();
+
+ /* It is possible that file_format tag has never
+ been set. In this case we initialize it to minimum
+ value. Important to note that we can do it ONLY after
+ we have finished the recovery process so that the
+ image of TRX_SYS_PAGE_NO is not stale. */
+ trx_sys_file_format_tag_init();
}
if (!create_new_db && sum_of_new_sizes > 0) {
@@ -1605,7 +1667,6 @@ innobase_start_or_create_for_mysql(void)
/* Create the thread which warns of long semaphore waits */
os_thread_create(&srv_error_monitor_thread, NULL,
thread_ids + 3 + SRV_MAX_N_IO_THREADS);
- srv_was_started = TRUE;
srv_is_being_started = FALSE;
if (trx_doublewrite == NULL) {
@@ -1634,7 +1695,7 @@ innobase_start_or_create_for_mysql(void)
sum_of_data_file_sizes += srv_data_file_sizes[i];
}
- tablespace_size_in_header = fsp_header_get_tablespace_size(0);
+ tablespace_size_in_header = fsp_header_get_tablespace_size();
if (!srv_auto_extend_last_data_file
&& sum_of_data_file_sizes != tablespace_size_in_header) {
@@ -1718,9 +1779,9 @@ innobase_start_or_create_for_mysql(void)
if (srv_print_verbose_log) {
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Started; log sequence number %lu %lu\n",
- (ulong) ut_dulint_get_high(srv_start_lsn),
- (ulong) ut_dulint_get_low(srv_start_lsn));
+ " InnoDB Plugin %s started; "
+ "log sequence number %llu\n",
+ INNODB_VERSION_STR, srv_start_lsn);
}
if (srv_force_recovery > 0) {
@@ -1736,7 +1797,7 @@ innobase_start_or_create_for_mysql(void)
/* Actually, we did not change the undo log format between
4.0 and 4.1.1, and we would not need to run purge to
completion. Note also that the purge algorithm in 4.1.1
- can process the the history list again even after a full
+ can process the history list again even after a full
purge, because our algorithm does not cut the end of the
history list in all cases so that it would become empty
after a full purge. That mean that we may purge 4.0 type
@@ -1776,8 +1837,7 @@ innobase_start_or_create_for_mysql(void)
" to an earlier version of\n"
"InnoDB: InnoDB! But if you absolutely need to"
" downgrade, see\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "multiple-tablespaces.html\n"
+ "InnoDB: " REFMAN "multiple-tablespaces.html\n"
"InnoDB: for instructions.\n");
}
@@ -1792,16 +1852,18 @@ innobase_start_or_create_for_mysql(void)
srv_file_per_table = srv_file_per_table_original_value;
+ srv_was_started = TRUE;
+
return((int) DB_SUCCESS);
}
-/********************************************************************
-Shuts down the InnoDB database. */
-
+/****************************************************************//**
+Shuts down the InnoDB database.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
int
innobase_shutdown_for_mysql(void)
/*=============================*/
- /* out: DB_SUCCESS or error code */
{
ulint i;
#ifdef __NETWARE__
@@ -1836,7 +1898,7 @@ innobase_shutdown_for_mysql(void)
}
#ifdef __NETWARE__
- if(!panic_shutdown)
+ if (!panic_shutdown)
#endif
logs_empty_and_mark_files_at_shutdown();
@@ -1887,8 +1949,10 @@ innobase_shutdown_for_mysql(void)
/* All the threads have exited or are just exiting;
NOTE that the threads may not have completed their
exit yet. Should we use pthread_join() to make sure
- they have exited? Now we just sleep 0.1 seconds and
- hope that is enough! */
+ they have exited? If we did, we would have to
+ remove the pthread_detach() from
+ os_thread_exit(). Now we just sleep 0.1
+ seconds and hope that is enough! */
os_mutex_exit(os_sync_mutex);
@@ -1927,34 +1991,41 @@ innobase_shutdown_for_mysql(void)
srv_misc_tmpfile = 0;
}
+ /* This must be disabled before closing the buffer pool
+ and closing the data dictionary. */
+ btr_search_disable();
+
+ ibuf_close();
+ log_shutdown();
+ lock_sys_close();
+ thr_local_close();
+ trx_sys_file_format_close();
+ trx_sys_close();
+
mutex_free(&srv_monitor_file_mutex);
mutex_free(&srv_dict_tmpfile_mutex);
mutex_free(&srv_misc_tmpfile_mutex);
+ dict_close();
+ btr_search_sys_free();
/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
them */
+ os_aio_free();
sync_close();
+ srv_free();
+ fil_close();
/* 4. Free the os_conc_mutex and all os_events and os_mutexes */
- srv_free();
os_sync_free();
- /* Check that all read views are closed except read view owned
- by a purge. */
-
- if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
- fprintf(stderr,
- "InnoDB: Error: all read views were not closed"
- " before shutdown:\n"
- "InnoDB: %lu read views open \n",
- UT_LIST_GET_LEN(trx_sys->view_list) - 1);
- }
-
- /* 5. Free all allocated memory and the os_fast_mutex created in
- ut0mem.c */
+ /* 5. Free all allocated memory */
+ pars_lexer_close();
+ log_mem_free();
+ buf_pool_free();
ut_free_all_mem();
+ mem_close();
if (os_thread_count != 0
|| os_event_count != 0
@@ -1980,11 +2051,13 @@ innobase_shutdown_for_mysql(void)
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Shutdown completed;"
- " log sequence number %lu %lu\n",
- (ulong) ut_dulint_get_high(srv_shutdown_lsn),
- (ulong) ut_dulint_get_low(srv_shutdown_lsn));
+ " log sequence number %llu\n",
+ srv_shutdown_lsn);
}
+ srv_was_started = FALSE;
+ srv_start_has_been_called = FALSE;
+
return((int) DB_SUCCESS);
}
diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
index 154593a9035..ed9e25bf2f2 100644
--- a/storage/innobase/sync/sync0arr.c
+++ b/storage/innobase/sync/sync0arr.c
@@ -1,7 +1,31 @@
-/******************************************************
-The wait array used in synchronization primitives
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1995 Innobase Oy
+/**************************************************//**
+@file sync/sync0arr.c
+The wait array used in synchronization primitives
Created 9/5/1995 Heikki Tuuri
*******************************************************/
@@ -50,27 +74,29 @@ wait array for the sake of diagnostics and also to avoid infinite
wait The error_monitor thread scans the global wait array to signal
any waiting threads who have missed the signal. */
-/* A cell where an individual thread may wait suspended
+/** A cell where an individual thread may wait suspended
until a resource is released. The suspending is implemented
using an operating system event semaphore. */
struct sync_cell_struct {
- void* wait_object; /* pointer to the object the
+ void* wait_object; /*!< pointer to the object the
thread is waiting for; if NULL
the cell is free for use */
- mutex_t* old_wait_mutex; /* the latest wait mutex in cell */
- rw_lock_t* old_wait_rw_lock;/* the latest wait rw-lock in cell */
- ulint request_type; /* lock type requested on the
+ mutex_t* old_wait_mutex; /*!< the latest wait mutex in cell */
+ rw_lock_t* old_wait_rw_lock;
+ /*!< the latest wait rw-lock
+ in cell */
+ ulint request_type; /*!< lock type requested on the
object */
- const char* file; /* in debug version file where
+ const char* file; /*!< in debug version file where
requested */
- ulint line; /* in debug version line where
+ ulint line; /*!< in debug version line where
requested */
- os_thread_id_t thread; /* thread id of this waiting
+ os_thread_id_t thread; /*!< thread id of this waiting
thread */
- ibool waiting; /* TRUE if the thread has already
+ ibool waiting; /*!< TRUE if the thread has already
called sync_array_event_wait
on this cell */
- ib_longlong signal_count; /* We capture the signal_count
+ ib_int64_t signal_count; /*!< We capture the signal_count
of the wait_object when we
reset the event. This value is
then passed on to os_event_wait
@@ -78,7 +104,7 @@ struct sync_cell_struct {
has not been signalled in the
period between the reset and
wait call. */
- time_t reservation_time;/* time when the thread reserved
+ time_t reservation_time;/*!< time when the thread reserved
the wait cell */
};
@@ -87,54 +113,56 @@ for an event allocated for the array without owning the
protecting mutex (depending on the case: OS or database mutex), but
all changes (set or reset) to the state of the event must be made
while owning the mutex. */
+
+/** Synchronization array */
struct sync_array_struct {
- ulint n_reserved; /* number of currently reserved
+ ulint n_reserved; /*!< number of currently reserved
cells in the wait array */
- ulint n_cells; /* number of cells in the
+ ulint n_cells; /*!< number of cells in the
wait array */
- sync_cell_t* array; /* pointer to wait array */
- ulint protection; /* this flag tells which
+ sync_cell_t* array; /*!< pointer to wait array */
+ ulint protection; /*!< this flag tells which
mutex protects the data */
- mutex_t mutex; /* possible database mutex
+ mutex_t mutex; /*!< possible database mutex
protecting this data structure */
- os_mutex_t os_mutex; /* Possible operating system mutex
+ os_mutex_t os_mutex; /*!< Possible operating system mutex
protecting the data structure.
As this data structure is used in
constructing the database mutex,
to prevent infinite recursion
in implementation, we fall back to
an OS mutex. */
- ulint sg_count; /* count of how many times an
+ ulint sg_count; /*!< count of how many times an
object has been signalled */
- ulint res_count; /* count of cell reservations
+ ulint res_count; /*!< count of cell reservations
since creation of the array */
};
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores. */
+of one or more threads because of waits of semaphores.
+@return TRUE if deadlock detected */
static
ibool
sync_array_detect_deadlock(
/*=======================*/
- /* out: TRUE if deadlock detected */
- sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
own the mutex to array */
- sync_cell_t* start, /* in: cell where recursive search started */
- sync_cell_t* cell, /* in: cell to search */
- ulint depth); /* in: recursion depth */
+ sync_cell_t* start, /*!< in: cell where recursive search started */
+ sync_cell_t* cell, /*!< in: cell to search */
+ ulint depth); /*!< in: recursion depth */
#endif /* UNIV_SYNC_DEBUG */
-/*********************************************************************
-Gets the nth cell in array. */
+/*****************************************************************//**
+Gets the nth cell in array.
+@return cell */
static
sync_cell_t*
sync_array_get_nth_cell(
/*====================*/
- /* out: cell */
- sync_array_t* arr, /* in: sync array */
- ulint n) /* in: index */
+ sync_array_t* arr, /*!< in: sync array */
+ ulint n) /*!< in: index */
{
ut_a(arr);
ut_a(n < arr->n_cells);
@@ -142,13 +170,13 @@ sync_array_get_nth_cell(
return(arr->array + n);
}
-/**********************************************************************
+/******************************************************************//**
Reserves the mutex semaphore protecting a sync array. */
static
void
sync_array_enter(
/*=============*/
- sync_array_t* arr) /* in: sync wait array */
+ sync_array_t* arr) /*!< in: sync wait array */
{
ulint protection;
@@ -163,13 +191,13 @@ sync_array_enter(
}
}
-/**********************************************************************
+/******************************************************************//**
Releases the mutex semaphore protecting a sync array. */
static
void
sync_array_exit(
/*============*/
- sync_array_t* arr) /* in: sync wait array */
+ sync_array_t* arr) /*!< in: sync wait array */
{
ulint protection;
@@ -184,39 +212,36 @@ sync_array_exit(
}
}
-/***********************************************************************
+/*******************************************************************//**
Creates a synchronization wait array. It is protected by a mutex
which is automatically reserved when the functions operating on it
-are called. */
-
+are called.
+@return own: created wait array */
+UNIV_INTERN
sync_array_t*
sync_array_create(
/*==============*/
- /* out, own: created wait array */
- ulint n_cells, /* in: number of cells in the array
+ ulint n_cells, /*!< in: number of cells in the array
to create */
- ulint protection) /* in: either SYNC_ARRAY_OS_MUTEX or
+ ulint protection) /*!< in: either SYNC_ARRAY_OS_MUTEX or
SYNC_ARRAY_MUTEX: determines the type
of mutex protecting the data structure */
{
+ ulint sz;
sync_array_t* arr;
- sync_cell_t* cell_array;
- sync_cell_t* cell;
- ulint i;
ut_a(n_cells > 0);
/* Allocate memory for the data structures */
arr = ut_malloc(sizeof(sync_array_t));
+ memset(arr, 0x0, sizeof(*arr));
- cell_array = ut_malloc(sizeof(sync_cell_t) * n_cells);
+ sz = sizeof(sync_cell_t) * n_cells;
+ arr->array = ut_malloc(sz);
+ memset(arr->array, 0x0, sz);
arr->n_cells = n_cells;
- arr->n_reserved = 0;
- arr->array = cell_array;
arr->protection = protection;
- arr->sg_count = 0;
- arr->res_count = 0;
/* Then create the mutex to protect the wait array complex */
if (protection == SYNC_ARRAY_OS_MUTEX) {
@@ -227,23 +252,16 @@ sync_array_create(
ut_error;
}
- for (i = 0; i < n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
- cell->wait_object = NULL;
- cell->waiting = FALSE;
- cell->signal_count = 0;
- }
-
return(arr);
}
-/**********************************************************************
+/******************************************************************//**
Frees the resources in a wait array. */
-
+UNIV_INTERN
void
sync_array_free(
/*============*/
- sync_array_t* arr) /* in, own: sync wait array */
+ sync_array_t* arr) /*!< in, own: sync wait array */
{
ulint protection;
@@ -267,14 +285,14 @@ sync_array_free(
ut_free(arr);
}
-/************************************************************************
+/********************************************************************//**
Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */
-
+UNIV_INTERN
void
sync_array_validate(
/*================*/
- sync_array_t* arr) /* in: sync wait array */
+ sync_array_t* arr) /*!< in: sync wait array */
{
ulint i;
sync_cell_t* cell;
@@ -294,44 +312,41 @@ sync_array_validate(
sync_array_exit(arr);
}
-/***********************************************************************
-Puts the cell event in reset state. */
+/*******************************************************************//**
+Returns the event that the thread owning the cell waits for. */
static
-ib_longlong
-sync_cell_event_reset(
-/*==================*/
- /* out: value of signal_count
- at the time of reset. */
- ulint type, /* in: lock type mutex/rw_lock */
- void* object) /* in: the rw_lock/mutex object */
+os_event_t
+sync_cell_get_event(
+/*================*/
+ sync_cell_t* cell) /*!< in: non-empty sync array cell */
{
+ ulint type = cell->request_type;
+
if (type == SYNC_MUTEX) {
- return(os_event_reset(((mutex_t *) object)->event));
-#ifdef __WIN__
+ return(((mutex_t *) cell->wait_object)->event);
} else if (type == RW_LOCK_WAIT_EX) {
- return(os_event_reset(
- ((rw_lock_t *) object)->wait_ex_event));
-#endif
- } else {
- return(os_event_reset(((rw_lock_t *) object)->event));
+ return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
+ } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
+ return(((rw_lock_t *) cell->wait_object)->event);
}
}
-/**********************************************************************
+/******************************************************************//**
Reserves a wait array cell for waiting for an object.
The event of the cell is reset to nonsignalled state. */
-
+UNIV_INTERN
void
sync_array_reserve_cell(
/*====================*/
- sync_array_t* arr, /* in: wait array */
- void* object, /* in: pointer to the object to wait for */
- ulint type, /* in: lock request type */
- const char* file, /* in: file where requested */
- ulint line, /* in: line where requested */
- ulint* index) /* out: index of the reserved cell */
+ sync_array_t* arr, /*!< in: wait array */
+ void* object, /*!< in: pointer to the object to wait for */
+ ulint type, /*!< in: lock request type */
+ const char* file, /*!< in: file where requested */
+ ulint line, /*!< in: line where requested */
+ ulint* index) /*!< out: index of the reserved cell */
{
sync_cell_t* cell;
+ os_event_t event;
ulint i;
ut_a(object);
@@ -370,8 +385,8 @@ sync_array_reserve_cell(
/* Make sure the event is reset and also store
the value of signal_count at which the event
was reset. */
- cell->signal_count = sync_cell_event_reset(type,
- object);
+ event = sync_cell_get_event(cell);
+ cell->signal_count = os_event_reset(event);
cell->reservation_time = time(NULL);
@@ -386,17 +401,17 @@ sync_array_reserve_cell(
return;
}
-/**********************************************************************
+/******************************************************************//**
This function should be called when a thread starts to wait on
a wait array cell. In the debug version this function checks
if the wait for a semaphore will result in a deadlock, in which
case prints info and asserts. */
-
+UNIV_INTERN
void
sync_array_wait_event(
/*==================*/
- sync_array_t* arr, /* in: wait array */
- ulint index) /* in: index of the reserved cell */
+ sync_array_t* arr, /*!< in: wait array */
+ ulint index) /*!< in: index of the reserved cell */
{
sync_cell_t* cell;
os_event_t event;
@@ -411,19 +426,7 @@ sync_array_wait_event(
ut_a(!cell->waiting);
ut_ad(os_thread_get_curr_id() == cell->thread);
- if (cell->request_type == SYNC_MUTEX) {
- event = ((mutex_t*) cell->wait_object)->event;
-#ifdef __WIN__
- /* On windows if the thread about to wait is the one which
- has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
- it waits on a special event i.e.: wait_ex_event. */
- } else if (cell->request_type == RW_LOCK_WAIT_EX) {
- event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
-#endif
- } else {
- event = ((rw_lock_t*) cell->wait_object)->event;
- }
-
+ event = sync_cell_get_event(cell);
cell->waiting = TRUE;
#ifdef UNIV_SYNC_DEBUG
@@ -450,18 +453,19 @@ sync_array_wait_event(
sync_array_free_cell(arr, index);
}
-/**********************************************************************
+/******************************************************************//**
Reports info of a wait array cell. */
static
void
sync_array_cell_print(
/*==================*/
- FILE* file, /* in: file where to print */
- sync_cell_t* cell) /* in: sync cell */
+ FILE* file, /*!< in: file where to print */
+ sync_cell_t* cell) /*!< in: sync cell */
{
mutex_t* mutex;
rw_lock_t* rwlock;
ulint type;
+ ulint writer;
type = cell->request_type;
@@ -491,9 +495,7 @@ sync_array_cell_print(
(ulong) mutex->waiters);
} else if (type == RW_LOCK_EX
-#ifdef __WIN__
|| type == RW_LOCK_WAIT_EX
-#endif
|| type == RW_LOCK_SHARED) {
fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
@@ -504,22 +506,25 @@ sync_array_cell_print(
" RW-latch at %p created in file %s line %lu\n",
(void*) rwlock, rwlock->cfile_name,
(ulong) rwlock->cline);
- if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
+ writer = rw_lock_get_writer(rwlock);
+ if (writer != RW_LOCK_NOT_LOCKED) {
fprintf(file,
"a writer (thread id %lu) has"
" reserved it in mode %s",
(ulong) os_thread_pf(rwlock->writer_thread),
- rwlock->writer == RW_LOCK_EX
+ writer == RW_LOCK_EX
? " exclusive\n"
: " wait exclusive\n");
}
fprintf(file,
- "number of readers %lu, waiters flag %lu\n"
+ "number of readers %lu, waiters flag %lu, "
+ "lock_word: %lx\n"
"Last time read locked in file %s line %lu\n"
"Last time write locked in file %s line %lu\n",
- (ulong) rwlock->reader_count,
+ (ulong) rw_lock_get_reader_count(rwlock),
(ulong) rwlock->waiters,
+ rwlock->lock_word,
rwlock->last_s_file_name,
(ulong) rwlock->last_s_line,
rwlock->last_x_file_name,
@@ -534,16 +539,15 @@ sync_array_cell_print(
}
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Looks for a cell with the given thread id. */
+/******************************************************************//**
+Looks for a cell with the given thread id.
+@return pointer to cell or NULL if not found */
static
sync_cell_t*
sync_array_find_thread(
/*===================*/
- /* out: pointer to cell or NULL
- if not found */
- sync_array_t* arr, /* in: wait array */
- os_thread_id_t thread) /* in: thread id */
+ sync_array_t* arr, /*!< in: wait array */
+ os_thread_id_t thread) /*!< in: thread id */
{
ulint i;
sync_cell_t* cell;
@@ -562,20 +566,20 @@ sync_array_find_thread(
return(NULL); /* Not found */
}
-/**********************************************************************
-Recursion step for deadlock detection. */
+/******************************************************************//**
+Recursion step for deadlock detection.
+@return TRUE if deadlock detected */
static
ibool
sync_array_deadlock_step(
/*=====================*/
- /* out: TRUE if deadlock detected */
- sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
own the mutex to array */
- sync_cell_t* start, /* in: cell where recursive search
+ sync_cell_t* start, /*!< in: cell where recursive search
started */
- os_thread_id_t thread, /* in: thread to look at */
- ulint pass, /* in: pass value */
- ulint depth) /* in: recursion depth */
+ os_thread_id_t thread, /*!< in: thread to look at */
+ ulint pass, /*!< in: pass value */
+ ulint depth) /*!< in: recursion depth */
{
sync_cell_t* new;
ibool ret;
@@ -613,19 +617,19 @@ sync_array_deadlock_step(
return(FALSE);
}
-/**********************************************************************
+/******************************************************************//**
This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores. */
+of one or more threads because of waits of semaphores.
+@return TRUE if deadlock detected */
static
ibool
sync_array_detect_deadlock(
/*=======================*/
- /* out: TRUE if deadlock detected */
- sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
own the mutex to array */
- sync_cell_t* start, /* in: cell where recursive search started */
- sync_cell_t* cell, /* in: cell to search */
- ulint depth) /* in: recursion depth */
+ sync_cell_t* start, /*!< in: cell where recursive search started */
+ sync_cell_t* cell, /*!< in: cell to search */
+ ulint depth) /*!< in: recursion depth */
{
mutex_t* mutex;
rw_lock_t* lock;
@@ -758,13 +762,13 @@ print:
}
#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
Determines if we can wake up the thread waiting for a sempahore. */
static
ibool
sync_arr_cell_can_wake_up(
/*======================*/
- sync_cell_t* cell) /* in: cell to search */
+ sync_cell_t* cell) /*!< in: cell to search */
{
mutex_t* mutex;
rw_lock_t* lock;
@@ -778,28 +782,30 @@ sync_arr_cell_can_wake_up(
return(TRUE);
}
- } else if (cell->request_type == RW_LOCK_EX
- || cell->request_type == RW_LOCK_WAIT_EX) {
+ } else if (cell->request_type == RW_LOCK_EX) {
lock = cell->wait_object;
- if (rw_lock_get_reader_count(lock) == 0
- && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+ if (lock->lock_word > 0) {
+ /* Either unlocked or only read locked. */
return(TRUE);
}
- if (rw_lock_get_reader_count(lock) == 0
- && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX
- && os_thread_eq(lock->writer_thread, cell->thread)) {
+ } else if (cell->request_type == RW_LOCK_WAIT_EX) {
+
+ lock = cell->wait_object;
+
+ /* lock_word == 0 means all readers have left */
+ if (lock->lock_word == 0) {
return(TRUE);
}
-
} else if (cell->request_type == RW_LOCK_SHARED) {
lock = cell->wait_object;
- if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+ /* lock_word > 0 means no writer or reserved writer */
+ if (lock->lock_word > 0) {
return(TRUE);
}
@@ -808,15 +814,15 @@ sync_arr_cell_can_wake_up(
return(FALSE);
}
-/**********************************************************************
+/******************************************************************//**
Frees the cell. NOTE! sync_array_wait_event frees the cell
automatically! */
-
+UNIV_INTERN
void
sync_array_free_cell(
/*=================*/
- sync_array_t* arr, /* in: wait array */
- ulint index) /* in: index of the cell in array */
+ sync_array_t* arr, /*!< in: wait array */
+ ulint index) /*!< in: index of the cell in array */
{
sync_cell_t* cell;
@@ -836,22 +842,26 @@ sync_array_free_cell(
sync_array_exit(arr);
}
-/**************************************************************************
+/**********************************************************************//**
Increments the signalled count. */
-
+UNIV_INTERN
void
sync_array_object_signalled(
/*========================*/
- sync_array_t* arr) /* in: wait array */
+ sync_array_t* arr) /*!< in: wait array */
{
+#ifdef HAVE_ATOMIC_BUILTINS
+ (void) os_atomic_increment_ulint(&arr->sg_count, 1);
+#else
sync_array_enter(arr);
arr->sg_count++;
sync_array_exit(arr);
+#endif
}
-/**************************************************************************
+/**********************************************************************//**
If the wakeup algorithm does not work perfectly at semaphore relases,
this function will do the waking (see the comment in mutex_exit). This
function should be called about every 1 second in the server.
@@ -859,7 +869,7 @@ function should be called about every 1 second in the server.
Note that there's a race condition between this thread and mutex_exit
changing the lock_word and calling signal_object, so sometimes this finds
threads to wake up even when nothing has gone wrong. */
-
+UNIV_INTERN
void
sync_arr_wake_threads_if_sema_free(void)
/*====================================*/
@@ -868,6 +878,7 @@ sync_arr_wake_threads_if_sema_free(void)
sync_cell_t* cell;
ulint count;
ulint i;
+ os_event_t event;
sync_array_enter(arr);
@@ -877,49 +888,32 @@ sync_arr_wake_threads_if_sema_free(void)
while (count < arr->n_reserved) {
cell = sync_array_get_nth_cell(arr, i);
+ i++;
- if (cell->wait_object != NULL) {
-
+ if (cell->wait_object == NULL) {
+ continue;
+ }
count++;
if (sync_arr_cell_can_wake_up(cell)) {
- if (cell->request_type == SYNC_MUTEX) {
- mutex_t* mutex;
-
- mutex = cell->wait_object;
- os_event_set(mutex->event);
-#ifdef __WIN__
- } else if (cell->request_type
- == RW_LOCK_WAIT_EX) {
- rw_lock_t* lock;
-
- lock = cell->wait_object;
- os_event_set(lock->wait_ex_event);
-#endif
- } else {
- rw_lock_t* lock;
+ event = sync_cell_get_event(cell);
- lock = cell->wait_object;
- os_event_set(lock->event);
- }
- }
+ os_event_set(event);
}
- i++;
}
sync_array_exit(arr);
}
-/**************************************************************************
-Prints warnings of long semaphore waits to stderr. */
-
+/**********************************************************************//**
+Prints warnings of long semaphore waits to stderr.
+@return TRUE if fatal semaphore wait threshold was exceeded */
+UNIV_INTERN
ibool
sync_array_print_long_waits(void)
/*=============================*/
- /* out: TRUE if fatal semaphore wait threshold
- was exceeded */
{
sync_cell_t* cell;
ibool old_val;
@@ -978,14 +972,14 @@ sync_array_print_long_waits(void)
return(fatal);
}
-/**************************************************************************
+/**********************************************************************//**
Prints info of the wait array. */
static
void
sync_array_output_info(
/*===================*/
- FILE* file, /* in: file where to print */
- sync_array_t* arr) /* in: wait array; NOTE! caller must own the
+ FILE* file, /*!< in: file where to print */
+ sync_array_t* arr) /*!< in: wait array; NOTE! caller must own the
mutex */
{
sync_cell_t* cell;
@@ -1011,14 +1005,14 @@ sync_array_output_info(
}
}
-/**************************************************************************
+/**********************************************************************//**
Prints info of the wait array. */
-
+UNIV_INTERN
void
sync_array_print_info(
/*==================*/
- FILE* file, /* in: file where to print */
- sync_array_t* arr) /* in: wait array */
+ FILE* file, /*!< in: file where to print */
+ sync_array_t* arr) /*!< in: wait array */
{
sync_array_enter(arr);
@@ -1026,4 +1020,3 @@ sync_array_print_info(
sync_array_exit(arr);
}
-
diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c
index 367f019ce55..d231b6acdf7 100644
--- a/storage/innobase/sync/sync0rw.c
+++ b/storage/innobase/sync/sync0rw.c
@@ -1,7 +1,31 @@
-/******************************************************
-The read-write lock (for thread synchronization)
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0rw.c
+The read-write lock (for thread synchronization)
Created 9/11/1995 Heikki Tuuri
*******************************************************/
@@ -14,60 +38,155 @@ Created 9/11/1995 Heikki Tuuri
#include "os0thread.h"
#include "mem0mem.h"
#include "srv0srv.h"
-
-/* number of system calls made during shared latching */
-ulint rw_s_system_call_count = 0;
-
-/* number of spin waits on rw-latches,
+#include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
+
+/*
+ IMPLEMENTATION OF THE RW_LOCK
+ =============================
+The status of a rw_lock is held in lock_word. The initial value of lock_word is
+X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
+for each x-lock. This describes the lock state for each value of lock_word:
+
+lock_word == X_LOCK_DECR: Unlocked.
+0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers.
+ (X_LOCK_DECR - lock_word) is the
+ number of readers that hold the lock.
+lock_word == 0: Write locked
+-X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer.
+ (-lock_word) is the number of readers
+ that hold the lock.
+lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
+ decremented by X_LOCK_DECR once for each lock,
+ so the number of locks is:
+ ((-lock_word) / X_LOCK_DECR) + 1
+When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
+other values of lock_word are invalid.
+
+The lock_word is always read and updated atomically and consistently, so that
+it always represents the state of the lock, and the state of the lock changes
+with a single atomic operation. This lock_word holds all of the information
+that a thread needs in order to determine if it is eligible to gain the lock
+or if it must spin or sleep. The one exception to this is that writer_thread
+must be verified before recursive write locks: to solve this scenario, we make
+writer_thread readable by all threads, but only writeable by the x-lock holder.
+
+The other members of the lock obey the following rules to remain consistent:
+
+recursive: This and the writer_thread field together control the
+ behaviour of recursive x-locking.
+ lock->recursive must be FALSE in following states:
+ 1) The writer_thread contains garbage i.e.: the
+ lock has just been initialized.
+ 2) The lock is not x-held and there is no
+ x-waiter waiting on WAIT_EX event.
+ 3) The lock is x-held or there is an x-waiter
+ waiting on WAIT_EX event but the 'pass' value
+ is non-zero.
+ lock->recursive is TRUE iff:
+ 1) The lock is x-held or there is an x-waiter
+ waiting on WAIT_EX event and the 'pass' value
+ is zero.
+ This flag must be set after the writer_thread field
+ has been updated with a memory ordering barrier.
+ It is unset before the lock_word has been incremented.
+writer_thread: Is used only in recursive x-locking. Can only be safely
+ read iff lock->recursive flag is TRUE.
+ This field is uninitialized at lock creation time and
+ is updated atomically when x-lock is acquired or when
+ move_ownership is called. A thread is only allowed to
+ set the value of this field to it's thread_id i.e.: a
+ thread cannot set writer_thread to some other thread's
+ id.
+waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
+ signals, it should only be set to 1 when there are threads
+ waiting on event. Must be 1 when a writer starts waiting to
+ ensure the current x-locking thread sends a wake-up signal
+ during unlock. May only be reset to 0 immediately before a
+ a wake-up signal is sent to event. On most platforms, a
+ memory barrier is required after waiters is set, and before
+ verifying lock_word is still held, to ensure some unlocker
+ really does see the flags new value.
+event: Threads wait on event for read or writer lock when another
+ thread has an x-lock or an x-lock reservation (wait_ex). A
+ thread may only wait on event after performing the following
+ actions in order:
+ (1) Record the counter value of event (with os_event_reset).
+ (2) Set waiters to 1.
+ (3) Verify lock_word <= 0.
+ (1) must come before (2) to ensure signal is not missed.
+ (2) must come before (3) to ensure a signal is sent.
+ These restrictions force the above ordering.
+ Immediately before sending the wake-up signal, we should:
+ (1) Verify lock_word == X_LOCK_DECR (unlocked)
+ (2) Reset waiters to 0.
+wait_ex_event: A thread may only wait on the wait_ex_event after it has
+ performed the following actions in order:
+ (1) Decrement lock_word by X_LOCK_DECR.
+ (2) Record counter value of wait_ex_event (os_event_reset,
+ called from sync_array_reserve_cell).
+ (3) Verify that lock_word < 0.
+ (1) must come first to ensures no other threads become reader
+ or next writer, and notifies unlocker that signal must be sent.
+ (2) must come before (3) to ensure the signal is not missed.
+ These restrictions force the above ordering.
+ Immediately before sending the wake-up signal, we should:
+ Verify lock_word == 0 (waiting thread holds x_lock)
+*/
+
+
+/** number of spin waits on rw-latches,
+resulted during shared (read) locks */
+UNIV_INTERN ib_int64_t rw_s_spin_wait_count = 0;
+/** number of spin loop rounds on rw-latches,
resulted during shared (read) locks */
-ulint rw_s_spin_wait_count = 0;
+UNIV_INTERN ib_int64_t rw_s_spin_round_count = 0;
-/* number of OS waits on rw-latches,
+/** number of OS waits on rw-latches,
resulted during shared (read) locks */
-ulint rw_s_os_wait_count = 0;
+UNIV_INTERN ib_int64_t rw_s_os_wait_count = 0;
-/* number of unlocks (that unlock shared locks),
+/** number of unlocks (that unlock shared locks),
set only when UNIV_SYNC_PERF_STAT is defined */
-ulint rw_s_exit_count = 0;
+UNIV_INTERN ib_int64_t rw_s_exit_count = 0;
-/* number of system calls made during exclusive latching */
-ulint rw_x_system_call_count = 0;
-
-/* number of spin waits on rw-latches,
+/** number of spin waits on rw-latches,
+resulted during exclusive (write) locks */
+UNIV_INTERN ib_int64_t rw_x_spin_wait_count = 0;
+/** number of spin loop rounds on rw-latches,
resulted during exclusive (write) locks */
-ulint rw_x_spin_wait_count = 0;
+UNIV_INTERN ib_int64_t rw_x_spin_round_count = 0;
-/* number of OS waits on rw-latches,
+/** number of OS waits on rw-latches,
resulted during exclusive (write) locks */
-ulint rw_x_os_wait_count = 0;
+UNIV_INTERN ib_int64_t rw_x_os_wait_count = 0;
-/* number of unlocks (that unlock exclusive locks),
+/** number of unlocks (that unlock exclusive locks),
set only when UNIV_SYNC_PERF_STAT is defined */
-ulint rw_x_exit_count = 0;
+UNIV_INTERN ib_int64_t rw_x_exit_count = 0;
/* The global list of rw-locks */
-rw_lock_list_t rw_lock_list;
-mutex_t rw_lock_list_mutex;
+UNIV_INTERN rw_lock_list_t rw_lock_list;
+UNIV_INTERN mutex_t rw_lock_list_mutex;
#ifdef UNIV_SYNC_DEBUG
/* The global mutex which protects debug info lists of all rw-locks.
To modify the debug info list of an rw-lock, this mutex has to be
acquired in addition to the mutex protecting the lock. */
-mutex_t rw_lock_debug_mutex;
-os_event_t rw_lock_debug_event; /* If deadlock detection does not
- get immediately the mutex, it may
- wait for this event */
-ibool rw_lock_debug_waiters; /* This is set to TRUE, if there may
- be waiters for the event */
+UNIV_INTERN mutex_t rw_lock_debug_mutex;
+/* If deadlock detection does not get immediately the mutex,
+it may wait for this event */
+UNIV_INTERN os_event_t rw_lock_debug_event;
+/* This is set to TRUE, if there may be waiters for the event */
+UNIV_INTERN ibool rw_lock_debug_waiters;
-/**********************************************************************
+/******************************************************************//**
Creates a debug info struct. */
static
rw_lock_debug_t*
rw_lock_debug_create(void);
/*======================*/
-/**********************************************************************
+/******************************************************************//**
Frees a debug info struct. */
static
void
@@ -75,8 +194,9 @@ rw_lock_debug_free(
/*===============*/
rw_lock_debug_t* info);
-/**********************************************************************
-Creates a debug info struct. */
+/******************************************************************//**
+Creates a debug info struct.
+@return own: debug info struct */
static
rw_lock_debug_t*
rw_lock_debug_create(void)
@@ -85,7 +205,7 @@ rw_lock_debug_create(void)
return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
}
-/**********************************************************************
+/******************************************************************//**
Frees a debug info struct. */
static
void
@@ -97,44 +217,49 @@ rw_lock_debug_free(
}
#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
is necessary only if the memory block containing it is freed. */
-
+UNIV_INTERN
void
rw_lock_create_func(
/*================*/
- rw_lock_t* lock, /* in: pointer to memory */
+ rw_lock_t* lock, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
- ulint level, /* in: level */
+ ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
- const char* cmutex_name, /* in: mutex name */
+ const char* cmutex_name, /*!< in: mutex name */
#endif /* UNIV_DEBUG */
- const char* cfile_name, /* in: file name where created */
- ulint cline) /* in: file line where created */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline) /*!< in: file line where created */
{
/* If this is the very first time a synchronization object is
created, then the following call initializes the sync system. */
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
lock->mutex.cfile_name = cfile_name;
lock->mutex.cline = cline;
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- lock->mutex.cmutex_name = cmutex_name;
- lock->mutex.mutex_type = 1;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ ut_d(lock->mutex.cmutex_name = cmutex_name);
+ ut_d(lock->mutex.mutex_type = 1);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+# ifdef UNIV_DEBUG
+ UT_NOT_USED(cmutex_name);
+# endif
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
- rw_lock_set_waiters(lock, 0);
- rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
- lock->writer_count = 0;
- rw_lock_set_reader_count(lock, 0);
+ lock->lock_word = X_LOCK_DECR;
+ lock->waiters = 0;
- lock->writer_is_wait_ex = FALSE;
+ /* We set this value to signify that lock->writer_thread
+ contains garbage at initialization and cannot be used for
+ recursive x-locking. */
+ lock->recursive = FALSE;
#ifdef UNIV_SYNC_DEBUG
UT_LIST_INIT(lock->debug_list);
@@ -147,15 +272,13 @@ rw_lock_create_func(
lock->cfile_name = cfile_name;
lock->cline = (unsigned int) cline;
+ lock->count_os_wait = 0;
lock->last_s_file_name = "not yet reserved";
lock->last_x_file_name = "not yet reserved";
lock->last_s_line = 0;
lock->last_x_line = 0;
lock->event = os_event_create(NULL);
-
-#ifdef __WIN__
lock->wait_ex_event = os_event_create(NULL);
-#endif
mutex_enter(&rw_lock_list_mutex);
@@ -169,31 +292,29 @@ rw_lock_create_func(
mutex_exit(&rw_lock_list_mutex);
}
-/**********************************************************************
+/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
rw-lock is checked to be in the non-locked state. */
-
+UNIV_INTERN
void
rw_lock_free(
/*=========*/
- rw_lock_t* lock) /* in: rw-lock */
+ rw_lock_t* lock) /*!< in: rw-lock */
{
ut_ad(rw_lock_validate(lock));
- ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
- ut_a(rw_lock_get_waiters(lock) == 0);
- ut_a(rw_lock_get_reader_count(lock) == 0);
+ ut_a(lock->lock_word == X_LOCK_DECR);
lock->magic_n = 0;
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
mutex_free(rw_lock_get_mutex(lock));
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
mutex_enter(&rw_lock_list_mutex);
os_event_free(lock->event);
-#ifdef __WIN__
os_event_free(lock->wait_ex_event);
-#endif
if (UT_LIST_GET_PREV(list, lock)) {
ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
@@ -208,63 +329,54 @@ rw_lock_free(
}
#ifdef UNIV_DEBUG
-/**********************************************************************
+/******************************************************************//**
Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks. */
-
+simultaneous shared and exclusive locks.
+@return TRUE */
+UNIV_INTERN
ibool
rw_lock_validate(
/*=============*/
- rw_lock_t* lock)
+ rw_lock_t* lock) /*!< in: rw-lock */
{
ut_a(lock);
- mutex_enter(rw_lock_get_mutex(lock));
+ ulint waiters = rw_lock_get_waiters(lock);
+ lint lock_word = lock->lock_word;
ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
- ut_a((rw_lock_get_reader_count(lock) == 0)
- || (rw_lock_get_writer(lock) != RW_LOCK_EX));
- ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX)
- || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
- || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED));
- ut_a((rw_lock_get_waiters(lock) == 0)
- || (rw_lock_get_waiters(lock) == 1));
- ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
-
- mutex_exit(rw_lock_get_mutex(lock));
+ ut_a(waiters == 0 || waiters == 1);
+ ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
return(TRUE);
}
#endif /* UNIV_DEBUG */
-/**********************************************************************
+/******************************************************************//**
Lock an rw-lock in shared mode for the current thread. If the rw-lock is
locked in exclusive mode, or there is an exclusive lock request waiting,
the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
for the lock, before suspending the thread. */
-
+UNIV_INTERN
void
rw_lock_s_lock_spin(
/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock
will be passed to another thread to unlock */
- const char* file_name, /* in: file name where lock requested */
- ulint line) /* in: line where requested */
+ const char* file_name, /*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
{
ulint index; /* index of the reserved wait cell */
- ulint i; /* spin round count */
+ ulint i = 0; /* spin round count */
ut_ad(rw_lock_validate(lock));
+ rw_s_spin_wait_count++; /*!< Count calls to this function */
lock_loop:
- rw_s_spin_wait_count++;
/* Spin waiting for the writer field to become free */
- i = 0;
-
- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
- && i < SYNC_SPIN_ROUNDS) {
+ while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
if (srv_spin_wait_delay) {
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
}
@@ -285,28 +397,32 @@ lock_loop:
lock->cfile_name, (ulong) lock->cline, (ulong) i);
}
- mutex_enter(rw_lock_get_mutex(lock));
-
/* We try once again to obtain the lock */
-
if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
- mutex_exit(rw_lock_get_mutex(lock));
+ rw_s_spin_round_count += i;
return; /* Success */
} else {
- /* If we get here, locking did not succeed, we may
- suspend the thread to wait in the wait array */
- rw_s_system_call_count++;
+ if (i < SYNC_SPIN_ROUNDS) {
+ goto lock_loop;
+ }
+
+ rw_s_spin_round_count += i;
sync_array_reserve_cell(sync_primary_wait_array,
lock, RW_LOCK_SHARED,
file_name, line,
&index);
- rw_lock_set_waiters(lock, 1);
+ /* Set waiters before checking lock_word to ensure wake-up
+ signal is sent. This may lead to some unnecessary signals. */
+ rw_lock_set_waiter_flag(lock);
- mutex_exit(rw_lock_get_mutex(lock));
+ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
+ sync_array_free_cell(sync_primary_wait_array, index);
+ return; /* Success */
+ }
if (srv_print_latch_waits) {
fprintf(stderr,
@@ -317,16 +433,18 @@ lock_loop:
(ulong) lock->cline);
}
- rw_s_system_call_count++;
+ /* these stats may not be accurate */
+ lock->count_os_wait++;
rw_s_os_wait_count++;
sync_array_wait_event(sync_primary_wait_array, index);
+ i = 0;
goto lock_loop;
}
}
-/**********************************************************************
+/******************************************************************//**
This function is used in the insert buffer to move the ownership of an
x-latch on a buffer frame to the current thread. The x-latch was set by
the buffer read operation and it protected the buffer frame while the
@@ -334,125 +452,141 @@ read was done. The ownership is moved because we want that the current
thread is able to acquire a second x-latch which is stored in an mtr.
This, in turn, is needed to pass the debug checks of index page
operations. */
-
+UNIV_INTERN
void
rw_lock_x_lock_move_ownership(
/*==========================*/
- rw_lock_t* lock) /* in: lock which was x-locked in the
+ rw_lock_t* lock) /*!< in: lock which was x-locked in the
buffer read */
{
ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
- mutex_enter(&(lock->mutex));
-
- lock->writer_thread = os_thread_get_curr_id();
-
- lock->pass = 0;
-
- mutex_exit(&(lock->mutex));
+ rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
}
-/**********************************************************************
-Low-level function for acquiring an exclusive lock. */
+/******************************************************************//**
+Function for the next writer to call. Waits for readers to exit.
+The caller must have already decremented lock_word by X_LOCK_DECR. */
UNIV_INLINE
-ulint
-rw_lock_x_lock_low(
-/*===============*/
- /* out: RW_LOCK_NOT_LOCKED if did
- not succeed, RW_LOCK_EX if success,
- RW_LOCK_WAIT_EX, if got wait reservation */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
+void
+rw_lock_x_lock_wait(
+/*================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
+#endif
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
{
- ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+ ulint index;
+ ulint i = 0;
- if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+ ut_ad(lock->lock_word <= 0);
- if (rw_lock_get_reader_count(lock) == 0) {
-
- rw_lock_set_writer(lock, RW_LOCK_EX);
- lock->writer_thread = os_thread_get_curr_id();
- lock->writer_count++;
- lock->pass = pass;
+ while (lock->lock_word < 0) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+ if(i < SYNC_SPIN_ROUNDS) {
+ i++;
+ continue;
+ }
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
- file_name, line);
-#endif
- lock->last_x_file_name = file_name;
- lock->last_x_line = (unsigned int) line;
+ /* If there is still a reader, then go to sleep.*/
+ rw_x_spin_round_count += i;
+ i = 0;
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock,
+ RW_LOCK_WAIT_EX,
+ file_name, line,
+ &index);
+ /* Check lock_word to ensure wake-up isn't missed.*/
+ if(lock->lock_word < 0) {
- /* Locking succeeded, we may return */
- return(RW_LOCK_EX);
- } else {
- /* There are readers, we have to wait */
- rw_lock_set_writer(lock, RW_LOCK_WAIT_EX);
- lock->writer_thread = os_thread_get_curr_id();
- lock->pass = pass;
- lock->writer_is_wait_ex = TRUE;
+ /* these stats may not be accurate */
+ lock->count_os_wait++;
+ rw_x_os_wait_count++;
+ /* Add debug info as it is needed to detect possible
+ deadlock. We must add info for WAIT_EX thread for
+ deadlock detection to work properly. */
#ifdef UNIV_SYNC_DEBUG
rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
file_name, line);
#endif
- return(RW_LOCK_WAIT_EX);
- }
-
- } else if ((rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
- && os_thread_eq(lock->writer_thread,
- os_thread_get_curr_id())) {
-
- if (rw_lock_get_reader_count(lock) == 0) {
-
- rw_lock_set_writer(lock, RW_LOCK_EX);
- lock->writer_count++;
- lock->pass = pass;
- lock->writer_is_wait_ex = FALSE;
-
+ sync_array_wait_event(sync_primary_wait_array,
+ index);
#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
- file_name, line);
+ rw_lock_remove_debug_info(lock, pass,
+ RW_LOCK_WAIT_EX);
#endif
-
- lock->last_x_file_name = file_name;
- lock->last_x_line = (unsigned int) line;
-
- /* Locking succeeded, we may return */
- return(RW_LOCK_EX);
+ /* It is possible to wake when lock_word < 0.
+ We must pass the while-loop check to proceed.*/
+ } else {
+ sync_array_free_cell(sync_primary_wait_array,
+ index);
}
+ }
+ rw_x_spin_round_count += i;
+}
+
+/******************************************************************//**
+Low-level function for acquiring an exclusive lock.
+@return RW_LOCK_NOT_LOCKED if did not succeed, RW_LOCK_EX if success. */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_low(
+/*===============*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
+{
+ os_thread_id_t curr_thread = os_thread_get_curr_id();
- return(RW_LOCK_WAIT_EX);
+ if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
- } else if ((rw_lock_get_writer(lock) == RW_LOCK_EX)
- && os_thread_eq(lock->writer_thread,
- os_thread_get_curr_id())
- && (lock->pass == 0)
- && (pass == 0)) {
+ /* lock->recursive also tells us if the writer_thread
+ field is stale or active. As we are going to write
+ our own thread id in that field it must be that the
+ current writer_thread value is not active. */
+ ut_a(!lock->recursive);
- lock->writer_count++;
+ /* Decrement occurred: we are writer or next-writer. */
+ rw_lock_set_writer_id_and_recursion_flag(lock,
+ pass ? FALSE : TRUE);
+ rw_lock_x_lock_wait(lock,
#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
- line);
+ pass,
#endif
+ file_name, line);
- lock->last_x_file_name = file_name;
- lock->last_x_line = (unsigned int) line;
-
- /* Locking succeeded, we may return */
- return(RW_LOCK_EX);
+ } else {
+ /* Decrement failed: relock or failed lock */
+ if (!pass && lock->recursive
+ && os_thread_eq(lock->writer_thread, curr_thread)) {
+ /* Relock */
+ lock->lock_word -= X_LOCK_DECR;
+ } else {
+ /* Another thread locked before us */
+ return(FALSE);
+ }
}
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+ file_name, line);
+#endif
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = (unsigned int) line;
- /* Locking did not succeed */
- return(RW_LOCK_NOT_LOCKED);
+ return(TRUE);
}
-/**********************************************************************
+/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread. If the rw-lock is locked
in shared or exclusive mode, or there is an exclusive lock request waiting,
@@ -461,58 +595,41 @@ for the lock before suspending the thread. If the same thread has an x-lock
on the rw-lock, locking succeed, with the following exception: if pass != 0,
only a single x-lock may be taken on the lock. NOTE: If the same thread has
an s-lock, locking does not succeed! */
-
+UNIV_INTERN
void
rw_lock_x_lock_func(
/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
{
- ulint index; /* index of the reserved wait cell */
- ulint state; /* lock state acquired */
- ulint i; /* spin round count */
+ ulint index; /*!< index of the reserved wait cell */
+ ulint i; /*!< spin round count */
+ ibool spinning = FALSE;
ut_ad(rw_lock_validate(lock));
-lock_loop:
- /* Acquire the mutex protecting the rw-lock fields */
- mutex_enter_fast(&(lock->mutex));
-
- state = rw_lock_x_lock_low(lock, pass, file_name, line);
+ i = 0;
- mutex_exit(&(lock->mutex));
+lock_loop:
- if (state == RW_LOCK_EX) {
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+ rw_x_spin_round_count += i;
return; /* Locking succeeded */
- } else if (state == RW_LOCK_NOT_LOCKED) {
-
- /* Spin waiting for the writer field to become free */
- i = 0;
-
- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
- && i < SYNC_SPIN_ROUNDS) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0,
- srv_spin_wait_delay));
- }
+ } else {
- i++;
+ if (!spinning) {
+ spinning = TRUE;
+ rw_x_spin_wait_count++;
}
- if (i == SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- }
- } else if (state == RW_LOCK_WAIT_EX) {
-
- /* Spin waiting for the reader count field to become zero */
- i = 0;
- while (rw_lock_get_reader_count(lock) != 0
- && i < SYNC_SPIN_ROUNDS) {
+ /* Spin waiting for the lock_word to become free */
+ while (i < SYNC_SPIN_ROUNDS
+ && lock->lock_word <= 0) {
if (srv_spin_wait_delay) {
ut_delay(ut_rnd_interval(0,
srv_spin_wait_delay));
@@ -522,12 +639,13 @@ lock_loop:
}
if (i == SYNC_SPIN_ROUNDS) {
os_thread_yield();
+ } else {
+ goto lock_loop;
}
- } else {
- i = 0; /* Eliminate a compiler warning */
- ut_error;
}
+ rw_x_spin_round_count += i;
+
if (srv_print_latch_waits) {
fprintf(stderr,
"Thread %lu spin wait rw-x-lock at %p"
@@ -536,39 +654,20 @@ lock_loop:
lock->cfile_name, (ulong) lock->cline, (ulong) i);
}
- rw_x_spin_wait_count++;
-
- /* We try once again to obtain the lock. Acquire the mutex protecting
- the rw-lock fields */
-
- mutex_enter(rw_lock_get_mutex(lock));
-
- state = rw_lock_x_lock_low(lock, pass, file_name, line);
-
- if (state == RW_LOCK_EX) {
- mutex_exit(rw_lock_get_mutex(lock));
-
- return; /* Locking succeeded */
- }
-
- rw_x_system_call_count++;
-
sync_array_reserve_cell(sync_primary_wait_array,
lock,
-#ifdef __WIN__
- /* On windows RW_LOCK_WAIT_EX signifies
- that this thread should wait on the
- special wait_ex_event. */
- (state == RW_LOCK_WAIT_EX)
- ? RW_LOCK_WAIT_EX :
-#endif
RW_LOCK_EX,
file_name, line,
&index);
- rw_lock_set_waiters(lock, 1);
+ /* Waiters must be set before checking lock_word, to ensure signal
+ is sent. This could lead to a few unnecessary wake-up signals. */
+ rw_lock_set_waiter_flag(lock);
- mutex_exit(rw_lock_get_mutex(lock));
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+ sync_array_free_cell(sync_primary_wait_array, index);
+ return; /* Locking succeeded */
+ }
if (srv_print_latch_waits) {
fprintf(stderr,
@@ -578,22 +677,24 @@ lock_loop:
lock->cfile_name, (ulong) lock->cline);
}
- rw_x_system_call_count++;
+ /* these stats may not be accurate */
+ lock->count_os_wait++;
rw_x_os_wait_count++;
sync_array_wait_event(sync_primary_wait_array, index);
+ i = 0;
goto lock_loop;
}
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
because the debug mutex is also acquired in sync0arr while holding the OS
mutex protecting the sync array, and the ordinary mutex_enter might
recursively call routines in sync0arr, leading to a deadlock on the OS
mutex. */
-
+UNIV_INTERN
void
rw_lock_debug_mutex_enter(void)
/*==========================*/
@@ -616,9 +717,9 @@ loop:
goto loop;
}
-/**********************************************************************
+/******************************************************************//**
Releases the debug mutex. */
-
+UNIV_INTERN
void
rw_lock_debug_mutex_exit(void)
/*==========================*/
@@ -631,17 +732,17 @@ rw_lock_debug_mutex_exit(void)
}
}
-/**********************************************************************
+/******************************************************************//**
Inserts the debug information for an rw-lock. */
-
+UNIV_INTERN
void
rw_lock_add_debug_info(
/*===================*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint pass, /* in: pass value */
- ulint lock_type, /* in: lock type */
- const char* file_name, /* in: file where requested */
- ulint line) /* in: line where requested */
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint pass, /*!< in: pass value */
+ ulint lock_type, /*!< in: lock type */
+ const char* file_name, /*!< in: file where requested */
+ ulint line) /*!< in: line where requested */
{
rw_lock_debug_t* info;
@@ -667,15 +768,15 @@ rw_lock_add_debug_info(
}
}
-/**********************************************************************
+/******************************************************************//**
Removes a debug information struct for an rw-lock. */
-
+UNIV_INTERN
void
rw_lock_remove_debug_info(
/*======================*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint pass, /* in: pass value */
- ulint lock_type) /* in: lock type */
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint pass, /*!< in: pass value */
+ ulint lock_type) /*!< in: lock type */
{
rw_lock_debug_t* info;
@@ -713,16 +814,16 @@ rw_lock_remove_debug_info(
#endif /* UNIV_SYNC_DEBUG */
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
Checks if the thread has locked the rw-lock in the specified mode, with
-the pass value == 0. */
-
+the pass value == 0.
+@return TRUE if locked */
+UNIV_INTERN
ibool
rw_lock_own(
/*========*/
- /* out: TRUE if locked */
- rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
{
rw_lock_debug_t* info;
@@ -730,7 +831,7 @@ rw_lock_own(
ut_ad(lock);
ut_ad(rw_lock_validate(lock));
- mutex_enter(&(lock->mutex));
+ rw_lock_debug_mutex_enter();
info = UT_LIST_GET_FIRST(lock->debug_list);
@@ -740,7 +841,7 @@ rw_lock_own(
&& (info->pass == 0)
&& (info->lock_type == lock_type)) {
- mutex_exit(&(lock->mutex));
+ rw_lock_debug_mutex_exit();
/* Found! */
return(TRUE);
@@ -748,21 +849,21 @@ rw_lock_own(
info = UT_LIST_GET_NEXT(list, info);
}
- mutex_exit(&(lock->mutex));
+ rw_lock_debug_mutex_exit();
return(FALSE);
}
#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
-Checks if somebody has locked the rw-lock in the specified mode. */
-
+/******************************************************************//**
+Checks if somebody has locked the rw-lock in the specified mode.
+@return TRUE if locked */
+UNIV_INTERN
ibool
rw_lock_is_locked(
/*==============*/
- /* out: TRUE if locked */
- rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
+ rw_lock_t* lock, /*!< in: rw-lock */
+ ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
{
ibool ret = FALSE;
@@ -770,33 +871,29 @@ rw_lock_is_locked(
ut_ad(lock);
ut_ad(rw_lock_validate(lock));
- mutex_enter(&(lock->mutex));
-
if (lock_type == RW_LOCK_SHARED) {
- if (lock->reader_count > 0) {
+ if (rw_lock_get_reader_count(lock) > 0) {
ret = TRUE;
}
} else if (lock_type == RW_LOCK_EX) {
- if (lock->writer == RW_LOCK_EX) {
+ if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
ret = TRUE;
}
} else {
ut_error;
}
- mutex_exit(&(lock->mutex));
-
return(ret);
}
#ifdef UNIV_SYNC_DEBUG
-/*******************************************************************
+/***************************************************************//**
Prints debug info of currently locked rw-locks. */
-
+UNIV_INTERN
void
rw_lock_list_print_info(
/*====================*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
rw_lock_t* lock;
ulint count = 0;
@@ -814,11 +911,10 @@ rw_lock_list_print_info(
count++;
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
mutex_enter(&(lock->mutex));
-
- if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
- || (rw_lock_get_reader_count(lock) != 0)
- || (rw_lock_get_waiters(lock) != 0)) {
+#endif
+ if (lock->lock_word != X_LOCK_DECR) {
fprintf(file, "RW-LOCK: %p ", (void*) lock);
@@ -834,8 +930,10 @@ rw_lock_list_print_info(
info = UT_LIST_GET_NEXT(list, info);
}
}
-
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
mutex_exit(&(lock->mutex));
+#endif
+
lock = UT_LIST_GET_NEXT(list, lock);
}
@@ -843,13 +941,13 @@ rw_lock_list_print_info(
mutex_exit(&rw_lock_list_mutex);
}
-/*******************************************************************
+/***************************************************************//**
Prints debug info of an rw-lock. */
-
+UNIV_INTERN
void
rw_lock_print(
/*==========*/
- rw_lock_t* lock) /* in: rw-lock */
+ rw_lock_t* lock) /*!< in: rw-lock */
{
rw_lock_debug_t* info;
@@ -858,9 +956,15 @@ rw_lock_print(
"RW-LATCH INFO\n"
"RW-LATCH: %p ", (void*) lock);
- if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
- || (rw_lock_get_reader_count(lock) != 0)
- || (rw_lock_get_waiters(lock) != 0)) {
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+ /* We used to acquire lock->mutex here, but it would cause a
+ recursive call to sync_thread_add_level() if UNIV_SYNC_DEBUG
+ is defined. Since this function is only invoked from
+ sync_thread_levels_g(), let us choose the smaller evil:
+ performing dirty reads instead of causing bogus deadlocks or
+ assertion failures. */
+#endif
+ if (lock->lock_word != X_LOCK_DECR) {
if (rw_lock_get_waiters(lock)) {
fputs(" Waiters for the lock exist\n", stderr);
@@ -876,13 +980,13 @@ rw_lock_print(
}
}
-/*************************************************************************
+/*********************************************************************//**
Prints info of a debug struct. */
-
+UNIV_INTERN
void
rw_lock_debug_print(
/*================*/
- rw_lock_debug_t* info) /* in: debug struct */
+ rw_lock_debug_t* info) /*!< in: debug struct */
{
ulint rwt;
@@ -906,10 +1010,11 @@ rw_lock_debug_print(
putc('\n', stderr);
}
-/*******************************************************************
+/***************************************************************//**
Returns the number of currently locked rw-locks. Works only in the debug
-version. */
-
+version.
+@return number of locked rw-locks */
+UNIV_INTERN
ulint
rw_lock_n_locked(void)
/*==================*/
@@ -922,14 +1027,11 @@ rw_lock_n_locked(void)
lock = UT_LIST_GET_FIRST(rw_lock_list);
while (lock != NULL) {
- mutex_enter(rw_lock_get_mutex(lock));
- if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
- || (rw_lock_get_reader_count(lock) != 0)) {
+ if (lock->lock_word != X_LOCK_DECR) {
count++;
}
- mutex_exit(rw_lock_get_mutex(lock));
lock = UT_LIST_GET_NEXT(list, lock);
}
diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
index 944fd2a97fc..569fc6328c4 100644
--- a/storage/innobase/sync/sync0sync.c
+++ b/storage/innobase/sync/sync0sync.c
@@ -1,7 +1,31 @@
-/******************************************************
-Mutex, the basic synchronization primitive
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file sync/sync0sync.c
+Mutex, the basic synchronization primitive
Created 9/5/1995 Heikki Tuuri
*******************************************************/
@@ -15,6 +39,7 @@ Created 9/5/1995 Heikki Tuuri
#include "buf0buf.h"
#include "srv0srv.h"
#include "buf0types.h"
+#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
/*
REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
@@ -138,110 +163,91 @@ Therefore, this thread is guaranteed to catch the os_set_event()
signalled unconditionally at the release of the lock.
Q.E.D. */
-/* The number of system calls made in this module. Intended for performance
-monitoring. */
-
-ulint mutex_system_call_count = 0;
-
/* Number of spin waits on mutexes: for performance monitoring */
-/* round=one iteration of a spin loop */
-ulint mutex_spin_round_count = 0;
-ulint mutex_spin_wait_count = 0;
-ulint mutex_os_wait_count = 0;
-ulint mutex_exit_count = 0;
+/** The number of iterations in the mutex_spin_wait() spin loop.
+Intended for performance monitoring. */
+static ib_int64_t mutex_spin_round_count = 0;
+/** The number of mutex_spin_wait() calls. Intended for
+performance monitoring. */
+static ib_int64_t mutex_spin_wait_count = 0;
+/** The number of OS waits in mutex_spin_wait(). Intended for
+performance monitoring. */
+static ib_int64_t mutex_os_wait_count = 0;
+/** The number of mutex_exit() calls. Intended for performance
+monitoring. */
+UNIV_INTERN ib_int64_t mutex_exit_count = 0;
-/* The global array of wait cells for implementation of the database's own
+/** The global array of wait cells for implementation of the database's own
mutexes and read-write locks */
-sync_array_t* sync_primary_wait_array;
-
-/* This variable is set to TRUE when sync_init is called */
-ibool sync_initialized = FALSE;
+UNIV_INTERN sync_array_t* sync_primary_wait_array;
+/** This variable is set to TRUE when sync_init is called */
+UNIV_INTERN ibool sync_initialized = FALSE;
+/** An acquired mutex or rw-lock and its level in the latching order */
typedef struct sync_level_struct sync_level_t;
+/** Mutexes or rw-locks held by a thread */
typedef struct sync_thread_struct sync_thread_t;
#ifdef UNIV_SYNC_DEBUG
-/* The latch levels currently owned by threads are stored in this data
+/** The latch levels currently owned by threads are stored in this data
structure; the size of this array is OS_THREAD_MAX_N */
-sync_thread_t* sync_thread_level_arrays;
+UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
-/* Mutex protecting sync_thread_level_arrays */
-mutex_t sync_thread_mutex;
+/** Mutex protecting sync_thread_level_arrays */
+UNIV_INTERN mutex_t sync_thread_mutex;
#endif /* UNIV_SYNC_DEBUG */
-/* Global list of database mutexes (not OS mutexes) created. */
-ut_list_base_node_t mutex_list;
+/** Global list of database mutexes (not OS mutexes) created. */
+UNIV_INTERN ut_list_base_node_t mutex_list;
-/* Mutex protecting the mutex_list variable */
-mutex_t mutex_list_mutex;
+/** Mutex protecting the mutex_list variable */
+UNIV_INTERN mutex_t mutex_list_mutex;
#ifdef UNIV_SYNC_DEBUG
-/* Latching order checks start when this is set TRUE */
-ibool sync_order_checks_on = FALSE;
+/** Latching order checks start when this is set TRUE */
+UNIV_INTERN ibool sync_order_checks_on = FALSE;
#endif /* UNIV_SYNC_DEBUG */
+/** Mutexes or rw-locks held by a thread */
struct sync_thread_struct{
- os_thread_id_t id; /* OS thread id */
- sync_level_t* levels; /* level array for this thread; if this is NULL
- this slot is unused */
+ os_thread_id_t id; /*!< OS thread id */
+ sync_level_t* levels; /*!< level array for this thread; if
+ this is NULL this slot is unused */
};
-/* Number of slots reserved for each OS thread in the sync level array */
+/** Number of slots reserved for each OS thread in the sync level array */
#define SYNC_THREAD_N_LEVELS 10000
+/** An acquired mutex or rw-lock and its level in the latching order */
struct sync_level_struct{
- void* latch; /* pointer to a mutex or an rw-lock; NULL means that
+ void* latch; /*!< pointer to a mutex or an rw-lock; NULL means that
the slot is empty */
- ulint level; /* level of the latch in the latching order */
+ ulint level; /*!< level of the latch in the latching order */
};
-/**********************************************************************
-A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled
-inlining of InnoDB functions, and no inlined functions should be called from
-there. That is why we need to duplicate the inlined function here. */
-
-void
-mutex_enter_noninline(
-/*==================*/
- mutex_t* mutex) /* in: mutex */
-{
- mutex_enter(mutex);
-}
-
-/**********************************************************************
-Releases a mutex. */
-
-void
-mutex_exit_noninline(
-/*=================*/
- mutex_t* mutex) /* in: mutex */
-{
- mutex_exit(mutex);
-}
-
-/**********************************************************************
+/******************************************************************//**
Creates, or rather, initializes a mutex object in a specified memory
location (which must be appropriately aligned). The mutex is initialized
in the reset state. Explicit freeing of the mutex with mutex_free is
necessary only if the memory block containing it is freed. */
-
+UNIV_INTERN
void
mutex_create_func(
/*==============*/
- mutex_t* mutex, /* in: pointer to memory */
+ mutex_t* mutex, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
- const char* cmutex_name, /* in: mutex name */
+ const char* cmutex_name, /*!< in: mutex name */
# ifdef UNIV_SYNC_DEBUG
- ulint level, /* in: level */
+ ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
#endif /* UNIV_DEBUG */
- const char* cfile_name, /* in: file name where created */
- ulint cline) /* in: file line where created */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline) /*!< in: file line where created */
{
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
+#if defined(HAVE_ATOMIC_BUILTINS)
mutex_reset_lock_word(mutex);
#else
os_fast_mutex_init(&(mutex->os_fast_mutex));
@@ -259,9 +265,8 @@ mutex_create_func(
#endif /* UNIV_SYNC_DEBUG */
mutex->cfile_name = cfile_name;
mutex->cline = cline;
-#ifndef UNIV_HOTBACKUP
mutex->count_os_wait = 0;
-# ifdef UNIV_DEBUG
+#ifdef UNIV_DEBUG
mutex->cmutex_name= cmutex_name;
mutex->count_using= 0;
mutex->mutex_type= 0;
@@ -270,8 +275,7 @@ mutex_create_func(
mutex->count_spin_loop= 0;
mutex->count_spin_rounds= 0;
mutex->count_os_yield= 0;
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
/* Check that lock_word is aligned; this is important on Intel */
ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
@@ -297,15 +301,15 @@ mutex_create_func(
mutex_exit(&mutex_list_mutex);
}
-/**********************************************************************
+/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the mutex is freed. Removes a mutex object from the mutex list. The mutex
is checked to be in the reset state. */
-
+UNIV_INTERN
void
mutex_free(
/*=======*/
- mutex_t* mutex) /* in: mutex */
+ mutex_t* mutex) /*!< in: mutex */
{
ut_ad(mutex_validate(mutex));
ut_a(mutex_get_lock_word(mutex) == 0);
@@ -333,7 +337,7 @@ mutex_free(
os_event_free(mutex->event);
-#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
+#if !defined(HAVE_ATOMIC_BUILTINS)
os_fast_mutex_free(&(mutex->os_fast_mutex));
#endif
/* If we free the mutex protecting the mutex list (freeing is
@@ -344,21 +348,21 @@ mutex_free(
#endif /* UNIV_DEBUG */
}
-/************************************************************************
+/********************************************************************//**
NOTE! Use the corresponding macro in the header file, not this function
directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1. */
-
+acquired immediately, returns with return value 1.
+@return 0 if succeed, 1 if not */
+UNIV_INTERN
ulint
mutex_enter_nowait_func(
/*====================*/
- /* out: 0 if succeed, 1 if not */
- mutex_t* mutex, /* in: pointer to mutex */
+ mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name __attribute__((unused)),
- /* in: file name where mutex
+ /*!< in: file name where mutex
requested */
ulint line __attribute__((unused)))
- /* in: line where requested */
+ /*!< in: line where requested */
{
ut_ad(mutex_validate(mutex));
@@ -376,13 +380,14 @@ mutex_enter_nowait_func(
}
#ifdef UNIV_DEBUG
-/**********************************************************************
-Checks that the mutex has been initialized. */
-
+/******************************************************************//**
+Checks that the mutex has been initialized.
+@return TRUE */
+UNIV_INTERN
ibool
mutex_validate(
/*===========*/
- const mutex_t* mutex)
+ const mutex_t* mutex) /*!< in: mutex */
{
ut_a(mutex);
ut_a(mutex->magic_n == MUTEX_MAGIC_N);
@@ -390,15 +395,15 @@ mutex_validate(
return(TRUE);
}
-/**********************************************************************
+/******************************************************************//**
Checks that the current thread owns the mutex. Works only in the debug
-version. */
-
+version.
+@return TRUE if owns */
+UNIV_INTERN
ibool
mutex_own(
/*======*/
- /* out: TRUE if owns */
- const mutex_t* mutex) /* in: mutex */
+ const mutex_t* mutex) /*!< in: mutex */
{
ut_ad(mutex_validate(mutex));
@@ -407,14 +412,14 @@ mutex_own(
}
#endif /* UNIV_DEBUG */
-/**********************************************************************
+/******************************************************************//**
Sets the waiters field in a mutex. */
-
+UNIV_INTERN
void
mutex_set_waiters(
/*==============*/
- mutex_t* mutex, /* in: mutex */
- ulint n) /* in: value to set */
+ mutex_t* mutex, /*!< in: mutex */
+ ulint n) /*!< in: value to set */
{
volatile ulint* ptr; /* declared volatile to ensure that
the value is stored to memory */
@@ -426,30 +431,36 @@ mutex_set_waiters(
word in memory is atomic */
}
-/**********************************************************************
+/******************************************************************//**
Reserves a mutex for the current thread. If the mutex is reserved, the
function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
for the mutex before suspending the thread. */
-
+UNIV_INTERN
void
mutex_spin_wait(
/*============*/
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where mutex
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where mutex
requested */
- ulint line) /* in: line where requested */
+ ulint line) /*!< in: line where requested */
{
ulint index; /* index of the reserved wait cell */
ulint i; /* spin round count */
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- ib_longlong lstart_time = 0, lfinish_time; /* for timing os_wait */
+#ifdef UNIV_DEBUG
+ ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
ulint ltime_diff;
ulint sec;
ulint ms;
uint timer_started = 0;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
ut_ad(mutex);
+ /* This update is not thread safe, but we don't mind if the count
+ isn't exact. Moved out of ifdef that follows because we are willing
+ to sacrifice the cost of counting this as the data is valuable.
+ Count the number of calls to mutex_spin_wait. */
+ mutex_spin_wait_count++;
+
mutex_loop:
i = 0;
@@ -461,10 +472,7 @@ mutex_loop:
a memory word. */
spin_loop:
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- mutex_spin_wait_count++;
- mutex->count_spin_loop++;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ ut_d(mutex->count_spin_loop++);
while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
if (srv_spin_wait_delay) {
@@ -475,14 +483,16 @@ spin_loop:
}
if (i == SYNC_SPIN_ROUNDS) {
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
mutex->count_os_yield++;
- if (timed_mutexes == 1 && timer_started==0) {
+#ifndef UNIV_HOTBACKUP
+ if (timed_mutexes && timer_started == 0) {
ut_usectime(&sec, &ms);
- lstart_time= (ib_longlong)sec * 1000000 + ms;
+ lstart_time= (ib_int64_t)sec * 1000000 + ms;
timer_started = 1;
}
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+#endif /* UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
os_thread_yield();
}
@@ -496,9 +506,7 @@ spin_loop:
mutex_spin_round_count += i;
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- mutex->count_spin_rounds += i;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ ut_d(mutex->count_spin_rounds += i);
if (mutex_test_and_set(mutex) == 0) {
/* Succeeded! */
@@ -527,8 +535,6 @@ spin_loop:
sync_array_reserve_cell(sync_primary_wait_array, mutex,
SYNC_MUTEX, file_name, line, &index);
- mutex_system_call_count++;
-
/* The memory order of the array reservation and the change in the
waiters field is important: when we suspend a thread, we first
reserve the cell and then set waiters field to 1. When threads are
@@ -575,30 +581,28 @@ spin_loop:
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
#endif
- mutex_system_call_count++;
mutex_os_wait_count++;
-#ifndef UNIV_HOTBACKUP
mutex->count_os_wait++;
-# ifdef UNIV_DEBUG
+#ifdef UNIV_DEBUG
/* !!!!! Sometimes os_wait can be called without os_thread_yield */
-
- if (timed_mutexes == 1 && timer_started==0) {
+#ifndef UNIV_HOTBACKUP
+ if (timed_mutexes == 1 && timer_started == 0) {
ut_usectime(&sec, &ms);
- lstart_time= (ib_longlong)sec * 1000000 + ms;
+ lstart_time= (ib_int64_t)sec * 1000000 + ms;
timer_started = 1;
}
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
sync_array_wait_event(sync_primary_wait_array, index);
goto mutex_loop;
finish_timing:
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
if (timed_mutexes == 1 && timer_started==1) {
ut_usectime(&sec, &ms);
- lfinish_time= (ib_longlong)sec * 1000000 + ms;
+ lfinish_time= (ib_int64_t)sec * 1000000 + ms;
ltime_diff= (ulint) (lfinish_time - lstart_time);
mutex->lspent_time += ltime_diff;
@@ -607,17 +611,17 @@ finish_timing:
mutex->lmax_spent_time= ltime_diff;
}
}
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
return;
}
-/**********************************************************************
+/******************************************************************//**
Releases the threads waiting in the primary wait array for this mutex. */
-
+UNIV_INTERN
void
mutex_signal_object(
/*================*/
- mutex_t* mutex) /* in: mutex */
+ mutex_t* mutex) /*!< in: mutex */
{
mutex_set_waiters(mutex, 0);
@@ -628,15 +632,15 @@ mutex_signal_object(
}
#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
+/******************************************************************//**
Sets the debug information for a reserved mutex. */
-
+UNIV_INTERN
void
mutex_set_debug_info(
/*=================*/
- mutex_t* mutex, /* in: mutex */
- const char* file_name, /* in: file where requested */
- ulint line) /* in: line where requested */
+ mutex_t* mutex, /*!< in: mutex */
+ const char* file_name, /*!< in: file where requested */
+ ulint line) /*!< in: line where requested */
{
ut_ad(mutex);
ut_ad(file_name);
@@ -647,16 +651,16 @@ mutex_set_debug_info(
mutex->line = line;
}
-/**********************************************************************
+/******************************************************************//**
Gets the debug information for a reserved mutex. */
-
+UNIV_INTERN
void
mutex_get_debug_info(
/*=================*/
- mutex_t* mutex, /* in: mutex */
- const char** file_name, /* out: file where requested */
- ulint* line, /* out: line where requested */
- os_thread_id_t* thread_id) /* out: id of the thread which owns
+ mutex_t* mutex, /*!< in: mutex */
+ const char** file_name, /*!< out: file where requested */
+ ulint* line, /*!< out: line where requested */
+ os_thread_id_t* thread_id) /*!< out: id of the thread which owns
the mutex */
{
ut_ad(mutex);
@@ -666,13 +670,13 @@ mutex_get_debug_info(
*thread_id = mutex->thread_id;
}
-/**********************************************************************
+/******************************************************************//**
Prints debug info of currently reserved mutexes. */
static
void
mutex_list_print_info(
/*==================*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
mutex_t* mutex;
const char* file_name;
@@ -709,9 +713,10 @@ mutex_list_print_info(
mutex_exit(&mutex_list_mutex);
}
-/**********************************************************************
-Counts currently reserved mutexes. Works only in the debug version. */
-
+/******************************************************************//**
+Counts currently reserved mutexes. Works only in the debug version.
+@return number of reserved mutexes */
+UNIV_INTERN
ulint
mutex_n_reserved(void)
/*==================*/
@@ -740,10 +745,11 @@ mutex_n_reserved(void)
was holding one mutex (mutex_list_mutex) */
}
-/**********************************************************************
+/******************************************************************//**
Returns TRUE if no mutex or rw-lock is currently locked. Works only in
-the debug version. */
-
+the debug version.
+@return TRUE if no mutexes and rw-locks reserved */
+UNIV_INTERN
ibool
sync_all_freed(void)
/*================*/
@@ -751,27 +757,27 @@ sync_all_freed(void)
return(mutex_n_reserved() + rw_lock_n_locked() == 0);
}
-/**********************************************************************
-Gets the value in the nth slot in the thread level arrays. */
+/******************************************************************//**
+Gets the value in the nth slot in the thread level arrays.
+@return pointer to thread slot */
static
sync_thread_t*
sync_thread_level_arrays_get_nth(
/*=============================*/
- /* out: pointer to thread slot */
- ulint n) /* in: slot number */
+ ulint n) /*!< in: slot number */
{
ut_ad(n < OS_THREAD_MAX_N);
return(sync_thread_level_arrays + n);
}
-/**********************************************************************
-Looks for the thread slot for the calling thread. */
+/******************************************************************//**
+Looks for the thread slot for the calling thread.
+@return pointer to thread slot, NULL if not found */
static
sync_thread_t*
sync_thread_level_arrays_find_slot(void)
/*====================================*/
- /* out: pointer to thread slot, NULL if not found */
{
sync_thread_t* slot;
@@ -793,13 +799,13 @@ sync_thread_level_arrays_find_slot(void)
return(NULL);
}
-/**********************************************************************
-Looks for an unused thread slot. */
+/******************************************************************//**
+Looks for an unused thread slot.
+@return pointer to thread slot */
static
sync_thread_t*
sync_thread_level_arrays_find_free(void)
/*====================================*/
- /* out: pointer to thread slot */
{
sync_thread_t* slot;
@@ -818,33 +824,34 @@ sync_thread_level_arrays_find_free(void)
return(NULL);
}
-/**********************************************************************
-Gets the value in the nth slot in the thread level array. */
+/******************************************************************//**
+Gets the value in the nth slot in the thread level array.
+@return pointer to level slot */
static
sync_level_t*
sync_thread_levels_get_nth(
/*=======================*/
- /* out: pointer to level slot */
- sync_level_t* arr, /* in: pointer to level array for an OS
+ sync_level_t* arr, /*!< in: pointer to level array for an OS
thread */
- ulint n) /* in: slot number */
+ ulint n) /*!< in: slot number */
{
ut_ad(n < SYNC_THREAD_N_LEVELS);
return(arr + n);
}
-/**********************************************************************
+/******************************************************************//**
Checks if all the level values stored in the level array are greater than
-the given limit. */
+the given limit.
+@return TRUE if all greater */
static
ibool
sync_thread_levels_g(
/*=================*/
- /* out: TRUE if all greater */
- sync_level_t* arr, /* in: pointer to level array for an OS
+ sync_level_t* arr, /*!< in: pointer to level array for an OS
thread */
- ulint limit) /* in: level limit */
+ ulint limit, /*!< in: level limit */
+ ulint warn) /*!< in: TRUE=display a diagnostic message */
{
sync_level_t* slot;
rw_lock_t* lock;
@@ -858,6 +865,11 @@ sync_thread_levels_g(
if (slot->latch != NULL) {
if (slot->level <= limit) {
+ if (!warn) {
+
+ return(FALSE);
+ }
+
lock = slot->latch;
mutex = slot->latch;
@@ -905,16 +917,16 @@ sync_thread_levels_g(
return(TRUE);
}
-/**********************************************************************
-Checks if the level value is stored in the level array. */
+/******************************************************************//**
+Checks if the level value is stored in the level array.
+@return TRUE if stored */
static
ibool
sync_thread_levels_contain(
/*=======================*/
- /* out: TRUE if stored */
- sync_level_t* arr, /* in: pointer to level array for an OS
+ sync_level_t* arr, /*!< in: pointer to level array for an OS
thread */
- ulint level) /* in: level */
+ ulint level) /*!< in: level */
{
sync_level_t* slot;
ulint i;
@@ -934,15 +946,14 @@ sync_thread_levels_contain(
return(FALSE);
}
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return TRUE if empty except the exceptions specified below */
+UNIV_INTERN
ibool
sync_thread_levels_empty_gen(
/*=========================*/
- /* out: TRUE if empty except the
- exceptions specified below */
- ibool dict_mutex_allowed) /* in: TRUE if dictionary mutex is
+ ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
allowed to be owned by the thread,
also purge_is_running mutex is
allowed */
@@ -991,27 +1002,27 @@ sync_thread_levels_empty_gen(
return(TRUE);
}
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return TRUE if empty */
+UNIV_INTERN
ibool
sync_thread_levels_empty(void)
/*==========================*/
- /* out: TRUE if empty */
{
return(sync_thread_levels_empty_gen(FALSE));
}
-/**********************************************************************
+/******************************************************************//**
Adds a latch and its level in the thread level array. Allocates the memory
for the array if called first time for this OS thread. Makes the checks
against other latch levels stored in the array for this thread. */
-
+UNIV_INTERN
void
sync_thread_add_level(
/*==================*/
- void* latch, /* in: pointer to a mutex or an rw-lock */
- ulint level) /* in: level in the latching order; if
+ void* latch, /*!< in: pointer to a mutex or an rw-lock */
+ ulint level) /*!< in: level in the latching order; if
SYNC_LEVEL_VARYING, nothing is done */
{
sync_level_t* array;
@@ -1072,74 +1083,79 @@ sync_thread_add_level(
/* Do no order checking */
break;
case SYNC_MEM_POOL:
- ut_a(sync_thread_levels_g(array, SYNC_MEM_POOL));
- break;
case SYNC_MEM_HASH:
- ut_a(sync_thread_levels_g(array, SYNC_MEM_HASH));
- break;
case SYNC_RECV:
- ut_a(sync_thread_levels_g(array, SYNC_RECV));
- break;
case SYNC_WORK_QUEUE:
- ut_a(sync_thread_levels_g(array, SYNC_WORK_QUEUE));
- break;
case SYNC_LOG:
- ut_a(sync_thread_levels_g(array, SYNC_LOG));
- break;
case SYNC_THR_LOCAL:
- ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL));
- break;
case SYNC_ANY_LATCH:
- ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
- break;
case SYNC_TRX_SYS_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_TRX_SYS_HEADER));
- break;
+ case SYNC_FILE_FORMAT_TAG:
case SYNC_DOUBLEWRITE:
- ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE));
- break;
- case SYNC_BUF_BLOCK:
- ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
- && sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
- || sync_thread_levels_g(array, SYNC_BUF_BLOCK));
- break;
case SYNC_BUF_POOL:
- ut_a(sync_thread_levels_g(array, SYNC_BUF_POOL));
- break;
case SYNC_SEARCH_SYS:
- ut_a(sync_thread_levels_g(array, SYNC_SEARCH_SYS));
- break;
+ case SYNC_SEARCH_SYS_CONF:
case SYNC_TRX_LOCK_HEAP:
- ut_a(sync_thread_levels_g(array, SYNC_TRX_LOCK_HEAP));
+ case SYNC_KERNEL:
+ case SYNC_IBUF_BITMAP_MUTEX:
+ case SYNC_RSEG:
+ case SYNC_TRX_UNDO:
+ case SYNC_PURGE_LATCH:
+ case SYNC_PURGE_SYS:
+ case SYNC_DICT_AUTOINC_MUTEX:
+ case SYNC_DICT_OPERATION:
+ case SYNC_DICT_HEADER:
+ case SYNC_TRX_I_S_RWLOCK:
+ case SYNC_TRX_I_S_LAST_READ:
+ if (!sync_thread_levels_g(array, level, TRUE)) {
+ fprintf(stderr,
+ "InnoDB: sync_thread_levels_g(array, %lu)"
+ " does not hold!\n", level);
+ ut_error;
+ }
break;
- case SYNC_REC_LOCK:
- ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
- && sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
- || sync_thread_levels_g(array, SYNC_REC_LOCK));
+ case SYNC_BUF_BLOCK:
+ /* Either the thread must own the buffer pool mutex
+ (buf_pool_mutex), or it is allowed to latch only ONE
+ buffer block (block->mutex or buf_pool_zip_mutex). */
+ if (!sync_thread_levels_g(array, level, FALSE)) {
+ ut_a(sync_thread_levels_g(array, level - 1, TRUE));
+ ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
+ }
break;
- case SYNC_KERNEL:
- ut_a(sync_thread_levels_g(array, SYNC_KERNEL));
+ case SYNC_REC_LOCK:
+ if (sync_thread_levels_contain(array, SYNC_KERNEL)) {
+ ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1,
+ TRUE));
+ } else {
+ ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE));
+ }
break;
case SYNC_IBUF_BITMAP:
- ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
- && sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
- || sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
- break;
- case SYNC_IBUF_BITMAP_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP_MUTEX));
+ /* Either the thread must own the master mutex to all
+ the bitmap pages, or it is allowed to latch only ONE
+ bitmap page. */
+ if (sync_thread_levels_contain(array,
+ SYNC_IBUF_BITMAP_MUTEX)) {
+ ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1,
+ TRUE));
+ } else {
+ ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP,
+ TRUE));
+ }
break;
case SYNC_FSP_PAGE:
ut_a(sync_thread_levels_contain(array, SYNC_FSP));
break;
case SYNC_FSP:
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
- || sync_thread_levels_g(array, SYNC_FSP));
+ || sync_thread_levels_g(array, SYNC_FSP, TRUE));
break;
case SYNC_TRX_UNDO_PAGE:
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
|| sync_thread_levels_contain(array, SYNC_RSEG)
|| sync_thread_levels_contain(array, SYNC_PURGE_SYS)
- || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE));
+ || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE));
break;
case SYNC_RSEG_HEADER:
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
@@ -1148,61 +1164,44 @@ sync_thread_add_level(
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
&& sync_thread_levels_contain(array, SYNC_FSP_PAGE));
break;
- case SYNC_RSEG:
- ut_a(sync_thread_levels_g(array, SYNC_RSEG));
- break;
- case SYNC_TRX_UNDO:
- ut_a(sync_thread_levels_g(array, SYNC_TRX_UNDO));
- break;
- case SYNC_PURGE_LATCH:
- ut_a(sync_thread_levels_g(array, SYNC_PURGE_LATCH));
- break;
- case SYNC_PURGE_SYS:
- ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS));
- break;
case SYNC_TREE_NODE:
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
|| sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
- || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
+ || sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
break;
case SYNC_TREE_NODE_NEW:
ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
|| sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
break;
case SYNC_INDEX_TREE:
- ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
- && sync_thread_levels_contain(array, SYNC_FSP)
- && sync_thread_levels_g(array, SYNC_FSP_PAGE - 1))
- || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
+ if (sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+ && sync_thread_levels_contain(array, SYNC_FSP)) {
+ ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1,
+ TRUE));
+ } else {
+ ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1,
+ TRUE));
+ }
break;
case SYNC_IBUF_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1));
+ ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1, TRUE));
break;
case SYNC_IBUF_PESS_INSERT_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
- && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
+ ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
break;
case SYNC_IBUF_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
- && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
- && !sync_thread_levels_contain(
- array, SYNC_IBUF_PESS_INSERT_MUTEX));
- break;
- case SYNC_DICT_AUTOINC_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX));
- break;
- case SYNC_DICT_OPERATION:
- ut_a(sync_thread_levels_g(array, SYNC_DICT_OPERATION));
- break;
- case SYNC_DICT_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
+ ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
+ ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ ut_a(!sync_thread_levels_contain(array,
+ SYNC_IBUF_PESS_INSERT_MUTEX));
break;
case SYNC_DICT:
#ifdef UNIV_DEBUG
ut_a(buf_debug_prints
- || sync_thread_levels_g(array, SYNC_DICT));
+ || sync_thread_levels_g(array, SYNC_DICT, TRUE));
#else /* UNIV_DEBUG */
- ut_a(sync_thread_levels_g(array, SYNC_DICT));
+ ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE));
#endif /* UNIV_DEBUG */
break;
default:
@@ -1226,15 +1225,16 @@ sync_thread_add_level(
mutex_exit(&sync_thread_mutex);
}
-/**********************************************************************
-Removes a latch from the thread level array if it is found there. */
-
+/******************************************************************//**
+Removes a latch from the thread level array if it is found there.
+@return TRUE if found in the array; it is no error if the latch is
+not found, as we presently are not able to determine the level for
+every latch reservation the program does */
+UNIV_INTERN
ibool
sync_thread_reset_level(
/*====================*/
- /* out: TRUE if found from the array; it is an error
- if the latch is not found */
- void* latch) /* in: pointer to a mutex or an rw-lock */
+ void* latch) /*!< in: pointer to a mutex or an rw-lock */
{
sync_level_t* array;
sync_level_t* slot;
@@ -1281,6 +1281,18 @@ sync_thread_reset_level(
}
}
+ if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
+ rw_lock_t* rw_lock;
+
+ rw_lock = (rw_lock_t*) latch;
+
+ if (rw_lock->level == SYNC_LEVEL_VARYING) {
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+ }
+ }
+
ut_error;
mutex_exit(&sync_thread_mutex);
@@ -1289,9 +1301,9 @@ sync_thread_reset_level(
}
#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
+/******************************************************************//**
Initializes the synchronization data structures. */
-
+UNIV_INTERN
void
sync_init(void)
/*===========*/
@@ -1343,10 +1355,10 @@ sync_init(void)
#endif /* UNIV_SYNC_DEBUG */
}
-/**********************************************************************
+/******************************************************************//**
Frees the resources in InnoDB's own synchronization data structures. Use
os_sync_free() after calling this. */
-
+UNIV_INTERN
void
sync_close(void)
/*===========*/
@@ -1365,42 +1377,57 @@ sync_close(void)
mutex_free(&mutex_list_mutex);
#ifdef UNIV_SYNC_DEBUG
mutex_free(&sync_thread_mutex);
+
+ /* Switch latching order checks on in sync0sync.c */
+ sync_order_checks_on = FALSE;
#endif /* UNIV_SYNC_DEBUG */
+
+ sync_initialized = FALSE;
}
-/***********************************************************************
+/*******************************************************************//**
Prints wait info of the sync system. */
-
+UNIV_INTERN
void
sync_print_wait_info(
/*=================*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
#ifdef UNIV_SYNC_DEBUG
- fprintf(file, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
+ fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
#endif
fprintf(file,
- "Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
- "RW-shared spins %lu, OS waits %lu;"
- " RW-excl spins %lu, OS waits %lu\n",
- (ulong) mutex_spin_wait_count,
- (ulong) mutex_spin_round_count,
- (ulong) mutex_os_wait_count,
- (ulong) rw_s_spin_wait_count,
- (ulong) rw_s_os_wait_count,
- (ulong) rw_x_spin_wait_count,
- (ulong) rw_x_os_wait_count);
+ "Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
+ "RW-shared spins %llu, OS waits %llu;"
+ " RW-excl spins %llu, OS waits %llu\n",
+ mutex_spin_wait_count,
+ mutex_spin_round_count,
+ mutex_os_wait_count,
+ rw_s_spin_wait_count,
+ rw_s_os_wait_count,
+ rw_x_spin_wait_count,
+ rw_x_os_wait_count);
+
+ fprintf(file,
+ "Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
+ "%.2f RW-excl\n",
+ (double) mutex_spin_round_count /
+ (mutex_spin_wait_count ? mutex_spin_wait_count : 1),
+ (double) rw_s_spin_round_count /
+ (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
+ (double) rw_x_spin_round_count /
+ (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
}
-/***********************************************************************
+/*******************************************************************//**
Prints info of the sync system. */
-
+UNIV_INTERN
void
sync_print(
/*=======*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
#ifdef UNIV_SYNC_DEBUG
mutex_list_print_info(file);
diff --git a/storage/innobase/thr/thr0loc.c b/storage/innobase/thr/thr0loc.c
index b803bd53101..59a234a6b72 100644
--- a/storage/innobase/thr/thr0loc.c
+++ b/storage/innobase/thr/thr0loc.c
@@ -1,7 +1,24 @@
-/******************************************************
-The thread local storage
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file thr/thr0loc.c
+The thread local storage
Created 10/5/1995 Heikki Tuuri
*******************************************************/
@@ -27,38 +44,41 @@ is protected by a mutex. If you need modify the program and put new data to
the thread local storage, just add it to struct thr_local_struct in the
header file. */
-/* Mutex protecting the local storage hash table */
-mutex_t thr_local_mutex;
+/** Mutex protecting thr_local_hash */
+static mutex_t thr_local_mutex;
-/* The hash table. The module is not yet initialized when it is NULL. */
-hash_table_t* thr_local_hash = NULL;
+/** The hash table. The module is not yet initialized when it is NULL. */
+static hash_table_t* thr_local_hash = NULL;
-/* The private data for each thread should be put to
-the structure below and the accessor functions written
-for the field. */
+/** Thread local data */
typedef struct thr_local_struct thr_local_t;
+/** @brief Thread local data.
+The private data for each thread should be put to
+the structure below and the accessor functions written
+for the field. */
struct thr_local_struct{
- os_thread_id_t id; /* id of the thread which owns this struct */
- os_thread_t handle; /* operating system handle to the thread */
- ulint slot_no;/* the index of the slot in the thread table
+ os_thread_id_t id; /*!< id of the thread which owns this struct */
+ os_thread_t handle; /*!< operating system handle to the thread */
+ ulint slot_no;/*!< the index of the slot in the thread table
for this thread */
- ibool in_ibuf;/* TRUE if the the thread is doing an ibuf
+ ibool in_ibuf;/*!< TRUE if the thread is doing an ibuf
operation */
- hash_node_t hash; /* hash chain node */
- ulint magic_n;
+ hash_node_t hash; /*!< hash chain node */
+ ulint magic_n;/*!< magic number (THR_LOCAL_MAGIC_N) */
};
+/** The value of thr_local_struct::magic_n */
#define THR_LOCAL_MAGIC_N 1231234
-/***********************************************************************
-Returns the local storage struct for a thread. */
+/*******************************************************************//**
+Returns the local storage struct for a thread.
+@return local storage */
static
thr_local_t*
thr_local_get(
/*==========*/
- /* out: local storage */
- os_thread_id_t id) /* in: thread id of the thread */
+ os_thread_id_t id) /*!< in: thread id of the thread */
{
thr_local_t* local;
@@ -71,7 +91,7 @@ try_again:
local = NULL;
HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id),
- local, os_thread_eq(local->id, id));
+ thr_local_t*, local,, os_thread_eq(local->id, id));
if (local == NULL) {
mutex_exit(&thr_local_mutex);
@@ -87,14 +107,14 @@ try_again:
return(local);
}
-/***********************************************************************
-Gets the slot number in the thread table of a thread. */
-
+/*******************************************************************//**
+Gets the slot number in the thread table of a thread.
+@return slot number */
+UNIV_INTERN
ulint
thr_local_get_slot_no(
/*==================*/
- /* out: slot number */
- os_thread_id_t id) /* in: thread id of the thread */
+ os_thread_id_t id) /*!< in: thread id of the thread */
{
ulint slot_no;
thr_local_t* local;
@@ -110,14 +130,14 @@ thr_local_get_slot_no(
return(slot_no);
}
-/***********************************************************************
+/*******************************************************************//**
Sets the slot number in the thread table of a thread. */
-
+UNIV_INTERN
void
thr_local_set_slot_no(
/*==================*/
- os_thread_id_t id, /* in: thread id of the thread */
- ulint slot_no)/* in: slot number */
+ os_thread_id_t id, /*!< in: thread id of the thread */
+ ulint slot_no)/*!< in: slot number */
{
thr_local_t* local;
@@ -130,14 +150,14 @@ thr_local_set_slot_no(
mutex_exit(&thr_local_mutex);
}
-/***********************************************************************
+/*******************************************************************//**
Returns pointer to the 'in_ibuf' field within the current thread local
-storage. */
-
+storage.
+@return pointer to the in_ibuf field */
+UNIV_INTERN
ibool*
thr_local_get_in_ibuf_field(void)
/*=============================*/
- /* out: pointer to the in_ibuf field */
{
thr_local_t* local;
@@ -150,9 +170,9 @@ thr_local_get_in_ibuf_field(void)
return(&(local->in_ibuf));
}
-/***********************************************************************
+/*******************************************************************//**
Creates a local storage struct for the calling new thread. */
-
+UNIV_INTERN
void
thr_local_create(void)
/*==================*/
@@ -180,13 +200,13 @@ thr_local_create(void)
mutex_exit(&thr_local_mutex);
}
-/***********************************************************************
+/*******************************************************************//**
Frees the local storage struct for the specified thread. */
-
+UNIV_INTERN
void
thr_local_free(
/*===========*/
- os_thread_id_t id) /* in: thread id */
+ os_thread_id_t id) /*!< in: thread id */
{
thr_local_t* local;
@@ -195,7 +215,7 @@ thr_local_free(
/* Look for the local struct in the hash table */
HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id),
- local, os_thread_eq(local->id, id));
+ thr_local_t*, local,, os_thread_eq(local->id, id));
if (local == NULL) {
mutex_exit(&thr_local_mutex);
@@ -212,9 +232,9 @@ thr_local_free(
mem_free(local);
}
-/********************************************************************
+/****************************************************************//**
Initializes the thread local storage module. */
-
+UNIV_INTERN
void
thr_local_init(void)
/*================*/
@@ -226,3 +246,34 @@ thr_local_init(void)
mutex_create(&thr_local_mutex, SYNC_THR_LOCAL);
}
+
+/********************************************************************
+Close the thread local storage module. */
+UNIV_INTERN
+void
+thr_local_close(void)
+/*=================*/
+{
+ ulint i;
+
+ ut_a(thr_local_hash != NULL);
+
+ /* Free the hash elements. We don't remove them from the table
+ because we are going to destroy the table anyway. */
+ for (i = 0; i < hash_get_n_cells(thr_local_hash); i++) {
+ thr_local_t* local;
+
+ local = HASH_GET_FIRST(thr_local_hash, i);
+
+ while (local) {
+ thr_local_t* prev_local = local;
+
+ local = HASH_GET_NEXT(hash, prev_local);
+ ut_a(prev_local->magic_n == THR_LOCAL_MAGIC_N);
+ mem_free(prev_local);
+ }
+ }
+
+ hash_table_free(thr_local_hash);
+ thr_local_hash = NULL;
+}
diff --git a/storage/innodb_plugin/trx/trx0i_s.c b/storage/innobase/trx/trx0i_s.c
index ef2e6850d6c..ef2e6850d6c 100644
--- a/storage/innodb_plugin/trx/trx0i_s.c
+++ b/storage/innobase/trx/trx0i_s.c
diff --git a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
index f0f300d918e..abbfa3d7f81 100644
--- a/storage/innobase/trx/trx0purge.c
+++ b/storage/innobase/trx/trx0purge.c
@@ -1,7 +1,24 @@
-/******************************************************
-Purge old versions
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0purge.c
+Purge old versions
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -27,24 +44,24 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0que.h"
#include "os0thread.h"
-/* The global data structure coordinating a purge */
-trx_purge_t* purge_sys = NULL;
+/** The global data structure coordinating a purge */
+UNIV_INTERN trx_purge_t* purge_sys = NULL;
-/* A dummy undo record used as a return value when we have a whole undo log
+/** A dummy undo record used as a return value when we have a whole undo log
which needs no purge */
-trx_undo_rec_t trx_purge_dummy_rec;
+UNIV_INTERN trx_undo_rec_t trx_purge_dummy_rec;
-/*********************************************************************
+/*****************************************************************//**
Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system. */
-
+undo log still exists in the system.
+@return TRUE if is sure that it is preserved, also if the function
+returns FALSE, it is possible that the undo log still exists in the
+system */
+UNIV_INTERN
ibool
trx_purge_update_undo_must_exist(
/*=============================*/
- /* out: TRUE if is sure that it is preserved, also
- if the function returns FALSE, it is possible that
- the undo log still exists in the system */
- dulint trx_id) /* in: transaction id */
+ trx_id_t trx_id) /*!< in: transaction id */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
@@ -60,15 +77,15 @@ trx_purge_update_undo_must_exist(
/*=================== PURGE RECORD ARRAY =============================*/
-/***********************************************************************
-Stores info of an undo log record during a purge. */
+/*******************************************************************//**
+Stores info of an undo log record during a purge.
+@return pointer to the storage cell */
static
trx_undo_inf_t*
trx_purge_arr_store_info(
/*=====================*/
- /* out: pointer to the storage cell */
- dulint trx_no, /* in: transaction number */
- dulint undo_no)/* in: undo number */
+ trx_id_t trx_no, /*!< in: transaction number */
+ undo_no_t undo_no)/*!< in: undo number */
{
trx_undo_inf_t* cell;
trx_undo_arr_t* arr;
@@ -92,13 +109,13 @@ trx_purge_arr_store_info(
}
}
-/***********************************************************************
+/*******************************************************************//**
Removes info of an undo log record during a purge. */
UNIV_INLINE
void
trx_purge_arr_remove_info(
/*======================*/
- trx_undo_inf_t* cell) /* in: pointer to the storage cell */
+ trx_undo_inf_t* cell) /*!< in: pointer to the storage cell */
{
trx_undo_arr_t* arr;
@@ -111,20 +128,20 @@ trx_purge_arr_remove_info(
arr->n_used--;
}
-/***********************************************************************
+/*******************************************************************//**
Gets the biggest pair of a trx number and an undo number in a purge array. */
static
void
trx_purge_arr_get_biggest(
/*======================*/
- trx_undo_arr_t* arr, /* in: purge array */
- dulint* trx_no, /* out: transaction number: ut_dulint_zero
+ trx_undo_arr_t* arr, /*!< in: purge array */
+ trx_id_t* trx_no, /*!< out: transaction number: ut_dulint_zero
if array is empty */
- dulint* undo_no)/* out: undo number */
+ undo_no_t* undo_no)/*!< out: undo number */
{
trx_undo_inf_t* cell;
- dulint pair_trx_no;
- dulint pair_undo_no;
+ trx_id_t pair_trx_no;
+ undo_no_t pair_undo_no;
int trx_cmp;
ulint n_used;
ulint i;
@@ -161,14 +178,14 @@ trx_purge_arr_get_biggest(
}
}
-/********************************************************************
+/****************************************************************//**
Builds a purge 'query' graph. The actual purge is performed by executing
-this query graph. */
+this query graph.
+@return own: the query graph */
static
que_t*
trx_purge_graph_build(void)
/*=======================*/
- /* out, own: the query graph */
{
mem_heap_t* heap;
que_fork_t* fork;
@@ -190,10 +207,10 @@ trx_purge_graph_build(void)
return(fork);
}
-/************************************************************************
+/********************************************************************//**
Creates the global purge system control structure and inits the history
mutex. */
-
+UNIV_INTERN
void
trx_purge_sys_create(void)
/*======================*/
@@ -232,19 +249,57 @@ trx_purge_sys_create(void)
purge_sys->heap);
}
+/************************************************************************
+Frees the global purge system control structure. */
+UNIV_INTERN
+void
+trx_purge_sys_close(void)
+/*======================*/
+{
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ que_graph_free(purge_sys->query);
+
+ ut_a(purge_sys->sess->trx->is_purge);
+ purge_sys->sess->trx->conc_state = TRX_NOT_STARTED;
+ sess_close(purge_sys->sess);
+ purge_sys->sess = NULL;
+
+ if (purge_sys->view != NULL) {
+ /* Because acquiring the kernel mutex is a pre-condition
+ of read_view_close(). We don't really need it here. */
+ mutex_enter(&kernel_mutex);
+
+ read_view_close(purge_sys->view);
+ purge_sys->view = NULL;
+
+ mutex_exit(&kernel_mutex);
+ }
+
+ trx_undo_arr_free(purge_sys->arr);
+
+ rw_lock_free(&purge_sys->latch);
+ mutex_free(&purge_sys->mutex);
+
+ mem_heap_free(purge_sys->heap);
+ mem_free(purge_sys);
+
+ purge_sys = NULL;
+}
+
/*================ UNDO LOG HISTORY LIST =============================*/
-/************************************************************************
+/********************************************************************//**
Adds the update undo log as the first log in the history list. Removes the
update undo log segment from the rseg slot if it is too big for reuse. */
-
+UNIV_INTERN
void
trx_purge_add_update_undo_to_history(
/*=================================*/
- trx_t* trx, /* in: transaction */
- page_t* undo_page, /* in: update undo log header page,
+ trx_t* trx, /*!< in: transaction */
+ page_t* undo_page, /*!< in: update undo log header page,
x-latched */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_undo_t* undo;
trx_rseg_t* rseg;
@@ -262,7 +317,8 @@ trx_purge_add_update_undo_to_history(
ut_ad(mutex_own(&(rseg->mutex)));
- rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+ rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size,
+ rseg->page_no, mtr);
undo_header = undo_page + undo->hdr_offset;
seg_header = undo_page + TRX_UNDO_SEG_HDR;
@@ -314,16 +370,16 @@ trx_purge_add_update_undo_to_history(
}
}
-/**************************************************************************
+/**********************************************************************//**
Frees an undo log segment which is in the history list. Cuts the end of the
history list at the youngest undo log in this segment. */
static
void
trx_purge_free_segment(
/*===================*/
- trx_rseg_t* rseg, /* in: rollback segment */
- fil_addr_t hdr_addr, /* in: the file address of log_hdr */
- ulint n_removed_logs) /* in: count of how many undo logs we
+ trx_rseg_t* rseg, /*!< in: rollback segment */
+ fil_addr_t hdr_addr, /*!< in: the file address of log_hdr */
+ ulint n_removed_logs) /*!< in: count of how many undo logs we
will cut off from the end of the
history list */
{
@@ -344,9 +400,11 @@ loop:
mtr_start(&mtr);
mutex_enter(&(rseg->mutex));
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
+ rseg->page_no, &mtr);
- undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
+ undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
+ hdr_addr.page, &mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
log_hdr = undo_page + hdr_addr.boffset;
@@ -418,16 +476,16 @@ loop:
mtr_commit(&mtr);
}
-/************************************************************************
+/********************************************************************//**
Removes unnecessary history data from a rollback segment. */
static
void
trx_purge_truncate_rseg_history(
/*============================*/
- trx_rseg_t* rseg, /* in: rollback segment */
- dulint limit_trx_no, /* in: remove update undo logs whose
+ trx_rseg_t* rseg, /*!< in: rollback segment */
+ trx_id_t limit_trx_no, /*!< in: remove update undo logs whose
trx number is < limit_trx_no */
- dulint limit_undo_no) /* in: if transaction number is equal
+ undo_no_t limit_undo_no) /*!< in: if transaction number is equal
to limit_trx_no, truncate undo records
with undo number < limit_undo_no */
{
@@ -446,7 +504,8 @@ trx_purge_truncate_rseg_history(
mtr_start(&mtr);
mutex_enter(&(rseg->mutex));
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
+ rseg->page_no, &mtr);
hdr_addr = trx_purge_get_log_from_hist(
flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
@@ -460,7 +519,8 @@ loop:
return;
}
- undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
+ undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
+ hdr_addr.page, &mtr);
log_hdr = undo_page + hdr_addr.boffset;
@@ -512,14 +572,15 @@ loop:
mtr_start(&mtr);
mutex_enter(&(rseg->mutex));
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
+ rseg->page_no, &mtr);
hdr_addr = prev_hdr_addr;
goto loop;
}
-/************************************************************************
+/********************************************************************//**
Removes unnecessary history data from rollback segments. NOTE that when this
function is called, the caller must not have any latches on undo log pages! */
static
@@ -528,15 +589,15 @@ trx_purge_truncate_history(void)
/*============================*/
{
trx_rseg_t* rseg;
- dulint limit_trx_no;
- dulint limit_undo_no;
+ trx_id_t limit_trx_no;
+ undo_no_t limit_undo_no;
ut_ad(mutex_own(&(purge_sys->mutex)));
trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
&limit_undo_no);
- if (ut_dulint_cmp(limit_trx_no, ut_dulint_zero) == 0) {
+ if (ut_dulint_is_zero(limit_trx_no)) {
limit_trx_no = purge_sys->purge_trx_no;
limit_undo_no = purge_sys->purge_undo_no;
@@ -562,14 +623,14 @@ trx_purge_truncate_history(void)
}
}
-/************************************************************************
+/********************************************************************//**
Does a truncate if the purge array is empty. NOTE that when this function is
-called, the caller must not have any latches on undo log pages! */
+called, the caller must not have any latches on undo log pages!
+@return TRUE if array empty */
UNIV_INLINE
ibool
trx_purge_truncate_if_arr_empty(void)
/*=================================*/
- /* out: TRUE if array empty */
{
ut_ad(mutex_own(&(purge_sys->mutex)));
@@ -583,20 +644,20 @@ trx_purge_truncate_if_arr_empty(void)
return(FALSE);
}
-/***************************************************************************
+/***********************************************************************//**
Updates the last not yet purged history log info in rseg when we have purged
a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
static
void
trx_purge_rseg_get_next_history_log(
/*================================*/
- trx_rseg_t* rseg) /* in: rollback segment */
+ trx_rseg_t* rseg) /*!< in: rollback segment */
{
page_t* undo_page;
trx_ulogf_t* log_hdr;
trx_usegf_t* seg_hdr;
fil_addr_t prev_log_addr;
- dulint trx_no;
+ trx_id_t trx_no;
ibool del_marks;
mtr_t mtr;
@@ -612,7 +673,7 @@ trx_purge_rseg_get_next_history_log(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(rseg->space,
+ undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
rseg->last_page_no, &mtr);
log_hdr = undo_page + rseg->last_offset;
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
@@ -664,7 +725,7 @@ trx_purge_rseg_get_next_history_log(
/* Read the trx number and del marks from the previous log header */
mtr_start(&mtr);
- log_hdr = trx_undo_page_get_s_latched(rseg->space,
+ log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
prev_log_addr.page, &mtr)
+ prev_log_addr.boffset;
@@ -684,7 +745,7 @@ trx_purge_rseg_get_next_history_log(
mutex_exit(&(rseg->mutex));
}
-/***************************************************************************
+/***********************************************************************//**
Chooses the next undo log to purge and updates the info in purge_sys. This
function is used to initialize purge_sys when the next record to purge is
not known, and also to update the purge system info on the next record when
@@ -697,8 +758,9 @@ trx_purge_choose_next_log(void)
trx_undo_rec_t* rec;
trx_rseg_t* rseg;
trx_rseg_t* min_rseg;
- dulint min_trx_no;
+ trx_id_t min_trx_no;
ulint space = 0; /* remove warning (??? bug ???) */
+ ulint zip_size = 0;
ulint page_no = 0; /* remove warning (??? bug ???) */
ulint offset = 0; /* remove warning (??? bug ???) */
mtr_t mtr;
@@ -724,6 +786,7 @@ trx_purge_choose_next_log(void)
min_rseg = rseg;
min_trx_no = rseg->last_trx_no;
space = rseg->space;
+ zip_size = rseg->zip_size;
ut_a(space == 0); /* We assume in purge of
externally stored fields
that space id == 0 */
@@ -749,7 +812,7 @@ trx_purge_choose_next_log(void)
rec = &trx_purge_dummy_rec;
} else {
- rec = trx_undo_get_first_rec(space, page_no, offset,
+ rec = trx_undo_get_first_rec(space, zip_size, page_no, offset,
RW_S_LATCH, &mtr);
if (rec == NULL) {
/* Undo log empty */
@@ -774,22 +837,21 @@ trx_purge_choose_next_log(void)
} else {
purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);
- purge_sys->page_no = buf_frame_get_page_no(rec);
- purge_sys->offset = rec - buf_frame_align(rec);
+ purge_sys->page_no = page_get_page_no(page_align(rec));
+ purge_sys->offset = page_offset(rec);
}
mtr_commit(&mtr);
}
-/***************************************************************************
-Gets the next record to purge and updates the info in the purge system. */
+/***********************************************************************//**
+Gets the next record to purge and updates the info in the purge system.
+@return copy of an undo log record or pointer to the dummy undo log record */
static
trx_undo_rec_t*
trx_purge_get_next_rec(
/*===================*/
- /* out: copy of an undo log record or
- pointer to the dummy undo log record */
- mem_heap_t* heap) /* in: memory heap where copied */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
{
trx_undo_rec_t* rec;
trx_undo_rec_t* rec_copy;
@@ -800,6 +862,7 @@ trx_purge_get_next_rec(
ulint offset;
ulint page_no;
ulint space;
+ ulint zip_size;
ulint type;
ulint cmpl_info;
mtr_t mtr;
@@ -808,6 +871,7 @@ trx_purge_get_next_rec(
ut_ad(purge_sys->next_stored);
space = purge_sys->rseg->space;
+ zip_size = purge_sys->rseg->zip_size;
page_no = purge_sys->page_no;
offset = purge_sys->offset;
@@ -826,7 +890,8 @@ trx_purge_get_next_rec(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(space, zip_size,
+ page_no, &mtr);
rec = undo_page + offset;
rec2 = rec;
@@ -877,14 +942,15 @@ trx_purge_get_next_rec(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(space, zip_size,
+ page_no, &mtr);
rec = undo_page + offset;
} else {
- page = buf_frame_align(rec2);
+ page = page_align(rec2);
purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
- purge_sys->page_no = buf_frame_get_page_no(page);
+ purge_sys->page_no = page_get_page_no(page);
purge_sys->offset = rec2 - page;
if (undo_page != page) {
@@ -900,21 +966,19 @@ trx_purge_get_next_rec(
return(rec_copy);
}
-/************************************************************************
+/********************************************************************//**
Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function. */
-
+released with the corresponding release function.
+@return copy of an undo log record or pointer to trx_purge_dummy_rec,
+if the whole undo log can skipped in purge; NULL if none left */
+UNIV_INTERN
trx_undo_rec_t*
trx_purge_fetch_next_rec(
/*=====================*/
- /* out: copy of an undo log record or
- pointer to the dummy undo log record
- &trx_purge_dummy_rec, if the whole undo log
- can skipped in purge; NULL if none left */
- dulint* roll_ptr,/* out: roll pointer to undo record */
- trx_undo_inf_t** cell, /* out: storage cell for the record in the
+ roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
+ trx_undo_inf_t** cell, /*!< out: storage cell for the record in the
purge array */
- mem_heap_t* heap) /* in: memory heap where copied */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
{
trx_undo_rec_t* undo_rec;
@@ -996,13 +1060,13 @@ trx_purge_fetch_next_rec(
return(undo_rec);
}
-/***********************************************************************
+/*******************************************************************//**
Releases a reserved purge undo record. */
-
+UNIV_INTERN
void
trx_purge_rec_release(
/*==================*/
- trx_undo_inf_t* cell) /* in: storage cell */
+ trx_undo_inf_t* cell) /*!< in: storage cell */
{
trx_undo_arr_t* arr;
@@ -1015,14 +1079,13 @@ trx_purge_rec_release(
mutex_exit(&(purge_sys->mutex));
}
-/***********************************************************************
-This function runs a purge batch. */
-
+/*******************************************************************//**
+This function runs a purge batch.
+@return number of undo log pages handled in the batch */
+UNIV_INTERN
ulint
trx_purge(void)
/*===========*/
- /* out: number of undo log pages handled in
- the batch */
{
que_thr_t* thr;
/* que_thr_t* thr2; */
@@ -1123,9 +1186,9 @@ trx_purge(void)
return(purge_sys->n_pages_handled - old_pages_handled);
}
-/**********************************************************************
+/******************************************************************//**
Prints information of the purge system to stderr. */
-
+UNIV_INTERN
void
trx_purge_sys_print(void)
/*=====================*/
@@ -1133,11 +1196,10 @@ trx_purge_sys_print(void)
fprintf(stderr, "InnoDB: Purge system view:\n");
read_view_print(purge_sys->view);
- fprintf(stderr, "InnoDB: Purge trx n:o %lu %lu, undo n_o %lu %lu\n",
- (ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
- (ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
- (ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
- (ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
+ fprintf(stderr, "InnoDB: Purge trx n:o " TRX_ID_FMT
+ ", undo n:o " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(purge_sys->purge_trx_no),
+ TRX_ID_PREP_PRINTF(purge_sys->purge_undo_no));
fprintf(stderr,
"InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n"
"InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n",
diff --git a/storage/innobase/trx/trx0rec.c b/storage/innobase/trx/trx0rec.c
index 38ad53fcfb0..5097cf18dcd 100644
--- a/storage/innobase/trx/trx0rec.c
+++ b/storage/innobase/trx/trx0rec.c
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction undo log record
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1996 Innobase Oy
+/**************************************************//**
+@file trx/trx0rec.c
+Transaction undo log record
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -14,30 +31,31 @@ Created 3/26/1996 Heikki Tuuri
#include "fsp0fsp.h"
#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
#include "trx0undo.h"
+#include "mtr0log.h"
+#ifndef UNIV_HOTBACKUP
#include "dict0dict.h"
#include "ut0mem.h"
+#include "row0ext.h"
#include "row0upd.h"
#include "que0que.h"
#include "trx0purge.h"
+#include "trx0rseg.h"
#include "row0row.h"
-#include "mtr0log.h"
/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
-/**************************************************************************
+/**********************************************************************//**
Writes the mtr log entry of the inserted undo log record on the undo log
page. */
UNIV_INLINE
void
trx_undof_page_add_undo_rec_log(
/*============================*/
- page_t* undo_page, /* in: undo log page */
- ulint old_free, /* in: start offset of the inserted entry */
- ulint new_free, /* in: end offset of the entry */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page, /*!< in: undo log page */
+ ulint old_free, /*!< in: start offset of the inserted entry */
+ ulint new_free, /*!< in: end offset of the entry */
+ mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
const byte* log_end;
@@ -66,17 +84,18 @@ trx_undof_page_add_undo_rec_log(
mlog_catenate_string(mtr, undo_page + old_free + 2, len);
}
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************
-Parses a redo log record of adding an undo log record. */
-
+/***********************************************************//**
+Parses a redo log record of adding an undo log record.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_add_undo_rec(
/*========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page) /* in: page or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page) /*!< in: page or NULL */
{
ulint len;
byte* rec;
@@ -114,15 +133,16 @@ trx_undo_parse_add_undo_rec(
return(ptr + len);
}
-/**************************************************************************
-Calculates the free space left for extending an undo log record. */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Calculates the free space left for extending an undo log record.
+@return bytes left */
UNIV_INLINE
ulint
trx_undo_left(
/*==========*/
- /* out: bytes left */
- page_t* page, /* in: undo log page */
- byte* ptr) /* in: pointer to page */
+ const page_t* page, /*!< in: undo log page */
+ const byte* ptr) /*!< in: pointer to page */
{
/* The '- 10' is a safety margin, in case we have some small
calculation error below */
@@ -130,28 +150,77 @@ trx_undo_left(
return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
}
-/**************************************************************************
-Reports in the undo log of an insert of a clustered index record. */
+/**********************************************************************//**
+Set the next and previous pointers in the undo page for the undo record
+that was written to ptr. Update the first free value by the number of bytes
+written for this undo record.
+@return offset of the inserted entry on the page if succeeded, 0 if fail */
+static
+ulint
+trx_undo_page_set_next_prev_and_add(
+/*================================*/
+ page_t* undo_page, /*!< in/out: undo log page */
+ byte* ptr, /*!< in: ptr up to where data has been
+ written on this undo page. */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ ulint first_free; /*!< offset within undo_page */
+ ulint end_of_rec; /*!< offset within undo_page */
+ byte* ptr_to_first_free;
+ /* pointer within undo_page
+ that points to the next free
+ offset value within undo_page.*/
+
+ ut_ad(ptr > undo_page);
+ ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
+
+ if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
+
+ return(0);
+ }
+
+ ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
+
+ first_free = mach_read_from_2(ptr_to_first_free);
+
+ /* Write offset of the previous undo log record */
+ mach_write_to_2(ptr, first_free);
+ ptr += 2;
+
+ end_of_rec = ptr - undo_page;
+
+ /* Write offset of the next undo log record */
+ mach_write_to_2(undo_page + first_free, end_of_rec);
+
+ /* Update the offset to first free undo record */
+ mach_write_to_2(ptr_to_first_free, end_of_rec);
+
+ /* Write this log entry to the UNDO log */
+ trx_undof_page_add_undo_rec_log(undo_page, first_free,
+ end_of_rec, mtr);
+
+ return(first_free);
+}
+
+/**********************************************************************//**
+Reports in the undo log of an insert of a clustered index record.
+@return offset of the inserted entry on the page if succeed, 0 if fail */
static
ulint
trx_undo_page_report_insert(
/*========================*/
- /* out: offset of the inserted entry
- on the page if succeed, 0 if fail */
- page_t* undo_page, /* in: undo log page */
- trx_t* trx, /* in: transaction */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* clust_entry, /* in: index entry which will be
+ page_t* undo_page, /*!< in: undo log page */
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: clustered index */
+ const dtuple_t* clust_entry, /*!< in: index entry which will be
inserted to the clustered index */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint first_free;
byte* ptr;
- ulint len;
- dfield_t* field;
- ulint flen;
ulint i;
+ ut_ad(dict_index_is_clust(index));
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
@@ -161,10 +230,9 @@ trx_undo_page_report_insert(
ut_ad(first_free <= UNIV_PAGE_SIZE);
- if (trx_undo_left(undo_page, ptr) < 30) {
+ if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
- /* NOTE: the value 30 must be big enough such that the general
- fields written below fit on the undo log page */
+ /* Not enough space for writing the general parameters */
return(0);
}
@@ -173,31 +241,24 @@ trx_undo_page_report_insert(
ptr += 2;
/* Store first some general parameters to the undo log */
- mach_write_to_1(ptr, TRX_UNDO_INSERT_REC);
- ptr++;
-
- len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
- ptr += len;
-
- len = mach_dulint_write_much_compressed(ptr, (index->table)->id);
- ptr += len;
+ *ptr++ = TRX_UNDO_INSERT_REC;
+ ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
+ ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
/*----------------------------------------*/
/* Store then the fields required to uniquely determine the record
to be inserted in the clustered index */
for (i = 0; i < dict_index_get_n_unique(index); i++) {
- field = dtuple_get_nth_field(clust_entry, i);
-
- flen = dfield_get_len(field);
+ const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
+ ulint flen = dfield_get_len(field);
if (trx_undo_left(undo_page, ptr) < 5) {
return(0);
}
- len = mach_write_compressed(ptr, flen);
- ptr += len;
+ ptr += mach_write_compressed(ptr, flen);
if (flen != UNIV_SQL_NULL) {
if (trx_undo_left(undo_page, ptr) < flen) {
@@ -210,54 +271,27 @@ trx_undo_page_report_insert(
}
}
- if (trx_undo_left(undo_page, ptr) < 2) {
-
- return(0);
- }
-
- /*----------------------------------------*/
- /* Write pointers to the previous and the next undo log records */
-
- if (trx_undo_left(undo_page, ptr) < 2) {
-
- return(0);
- }
-
- mach_write_to_2(ptr, first_free);
- ptr += 2;
-
- mach_write_to_2(undo_page + first_free, ptr - undo_page);
-
- mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- ptr - undo_page);
-
- /* Write the log entry to the REDO log of this change in the UNDO
- log */
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
- ptr - undo_page, mtr);
- return(first_free);
+ return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
}
-/**************************************************************************
-Reads from an undo log record the general parameters. */
-
+/**********************************************************************//**
+Reads from an undo log record the general parameters.
+@return remaining part of undo log record after reading these values */
+UNIV_INTERN
byte*
trx_undo_rec_get_pars(
/*==================*/
- /* out: remaining part of undo log
- record after reading these values */
- trx_undo_rec_t* undo_rec, /* in: undo log record */
- ulint* type, /* out: undo record type:
+ trx_undo_rec_t* undo_rec, /*!< in: undo log record */
+ ulint* type, /*!< out: undo record type:
TRX_UNDO_INSERT_REC, ... */
- ulint* cmpl_info, /* out: compiler info, relevant only
+ ulint* cmpl_info, /*!< out: compiler info, relevant only
for update type records */
- ibool* updated_extern, /* out: TRUE if we updated an
+ ibool* updated_extern, /*!< out: TRUE if we updated an
externally stored fild */
- dulint* undo_no, /* out: undo log record number */
- dulint* table_id) /* out: table id */
+ undo_no_t* undo_no, /*!< out: undo log record number */
+ dulint* table_id) /*!< out: table id */
{
byte* ptr;
- ulint len;
ulint type_cmpl;
ptr = undo_rec + 2;
@@ -276,36 +310,55 @@ trx_undo_rec_get_pars(
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
*undo_no = mach_dulint_read_much_compressed(ptr);
- len = mach_dulint_get_much_compressed_size(*undo_no);
- ptr += len;
+ ptr += mach_dulint_get_much_compressed_size(*undo_no);
*table_id = mach_dulint_read_much_compressed(ptr);
- len = mach_dulint_get_much_compressed_size(*table_id);
- ptr += len;
+ ptr += mach_dulint_get_much_compressed_size(*table_id);
return(ptr);
}
-/**************************************************************************
-Reads from an undo log record a stored column value. */
+/**********************************************************************//**
+Reads from an undo log record a stored column value.
+@return remaining part of undo log record after reading these values */
static
byte*
trx_undo_rec_get_col_val(
/*=====================*/
- /* out: remaining part of undo log record after
- reading these values */
- byte* ptr, /* in: pointer to remaining part of undo log record */
- byte** field, /* out: pointer to stored field */
- ulint* len) /* out: length of the field, or UNIV_SQL_NULL */
+ byte* ptr, /*!< in: pointer to remaining part of undo log record */
+ byte** field, /*!< out: pointer to stored field */
+ ulint* len, /*!< out: length of the field, or UNIV_SQL_NULL */
+ ulint* orig_len)/*!< out: original length of the locally
+ stored part of an externally stored column, or 0 */
{
*len = mach_read_compressed(ptr);
ptr += mach_get_compressed_size(*len);
- *field = ptr;
-
- if (*len != UNIV_SQL_NULL) {
+ *orig_len = 0;
+
+ switch (*len) {
+ case UNIV_SQL_NULL:
+ *field = NULL;
+ break;
+ case UNIV_EXTERN_STORAGE_FIELD:
+ *orig_len = mach_read_compressed(ptr);
+ ptr += mach_get_compressed_size(*orig_len);
+ *len = mach_read_compressed(ptr);
+ ptr += mach_get_compressed_size(*len);
+ *field = ptr;
+ ptr += *len;
+
+ ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
+ ut_ad(*len > *orig_len);
+ ut_ad(*len >= REC_MAX_INDEX_COL_LEN
+ + BTR_EXTERN_FIELD_REF_SIZE);
+
+ *len += UNIV_EXTERN_STORAGE_FIELD;
+ break;
+ default:
+ *field = ptr;
if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
- ptr += (*len - UNIV_EXTERN_STORAGE_FIELD);
+ ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
} else {
ptr += *len;
}
@@ -314,33 +367,29 @@ trx_undo_rec_get_col_val(
return(ptr);
}
-/***********************************************************************
-Builds a row reference from an undo log record. */
-
+/*******************************************************************//**
+Builds a row reference from an undo log record.
+@return pointer to remaining part of undo record */
+UNIV_INTERN
byte*
trx_undo_rec_get_row_ref(
/*=====================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part of a copy of an undo log
+ byte* ptr, /*!< in: remaining part of a copy of an undo log
record, at the start of the row reference;
NOTE that this copy of the undo log record must
be preserved as long as the row reference is
used, as we do NOT copy the data in the
record! */
- dict_index_t* index, /* in: clustered index */
- dtuple_t** ref, /* out, own: row reference */
- mem_heap_t* heap) /* in: memory heap from which the memory
+ dict_index_t* index, /*!< in: clustered index */
+ dtuple_t** ref, /*!< out, own: row reference */
+ mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
{
- dfield_t* dfield;
- byte* field;
- ulint len;
ulint ref_len;
ulint i;
ut_ad(index && ptr && ref && heap);
- ut_a(index->type & DICT_CLUSTERED);
+ ut_a(dict_index_is_clust(index));
ref_len = dict_index_get_n_unique(index);
@@ -349,9 +398,14 @@ trx_undo_rec_get_row_ref(
dict_index_copy_types(*ref, index, ref_len);
for (i = 0; i < ref_len; i++) {
+ dfield_t* dfield;
+ byte* field;
+ ulint len;
+ ulint orig_len;
+
dfield = dtuple_get_nth_field(*ref, i);
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
dfield_set_data(dfield, field, len);
}
@@ -359,77 +413,145 @@ trx_undo_rec_get_row_ref(
return(ptr);
}
-/***********************************************************************
-Skips a row reference from an undo log record. */
-
+/*******************************************************************//**
+Skips a row reference from an undo log record.
+@return pointer to remaining part of undo record */
+UNIV_INTERN
byte*
trx_undo_rec_skip_row_ref(
/*======================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part in update undo log
+ byte* ptr, /*!< in: remaining part in update undo log
record, at the start of the row reference */
- dict_index_t* index) /* in: clustered index */
+ dict_index_t* index) /*!< in: clustered index */
{
- byte* field;
- ulint len;
ulint ref_len;
ulint i;
ut_ad(index && ptr);
- ut_a(index->type & DICT_CLUSTERED);
+ ut_a(dict_index_is_clust(index));
ref_len = dict_index_get_n_unique(index);
for (i = 0; i < ref_len; i++) {
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+ byte* field;
+ ulint len;
+ ulint orig_len;
+
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
}
return(ptr);
}
-/**************************************************************************
+/**********************************************************************//**
+Fetch a prefix of an externally stored column, for writing to the undo log
+of an update or delete marking of a clustered index record.
+@return ext_buf */
+static
+byte*
+trx_undo_page_fetch_ext(
+/*====================*/
+ byte* ext_buf, /*!< in: a buffer of
+ REC_MAX_INDEX_COL_LEN
+ + BTR_EXTERN_FIELD_REF_SIZE */
+ ulint zip_size, /*!< compressed page size in bytes,
+ or 0 for uncompressed BLOB */
+ const byte* field, /*!< in: an externally stored column */
+ ulint* len) /*!< in: length of field;
+ out: used length of ext_buf */
+{
+ /* Fetch the BLOB. */
+ ulint ext_len = btr_copy_externally_stored_field_prefix(
+ ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
+ /* BLOBs should always be nonempty. */
+ ut_a(ext_len);
+ /* Append the BLOB pointer to the prefix. */
+ memcpy(ext_buf + ext_len,
+ field + *len - BTR_EXTERN_FIELD_REF_SIZE,
+ BTR_EXTERN_FIELD_REF_SIZE);
+ *len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
+ return(ext_buf);
+}
+
+/**********************************************************************//**
+Writes to the undo log a prefix of an externally stored column.
+@return undo log position */
+static
+byte*
+trx_undo_page_report_modify_ext(
+/*============================*/
+ byte* ptr, /*!< in: undo log position,
+ at least 15 bytes must be available */
+ byte* ext_buf, /*!< in: a buffer of
+ REC_MAX_INDEX_COL_LEN
+ + BTR_EXTERN_FIELD_REF_SIZE,
+ or NULL when should not fetch
+ a longer prefix */
+ ulint zip_size, /*!< compressed page size in bytes,
+ or 0 for uncompressed BLOB */
+ const byte** field, /*!< in/out: the locally stored part of
+ the externally stored column */
+ ulint* len) /*!< in/out: length of field, in bytes */
+{
+ if (ext_buf) {
+ /* If an ordering column is externally stored, we will
+ have to store a longer prefix of the field. In this
+ case, write to the log a marker followed by the
+ original length and the real length of the field. */
+ ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
+
+ ptr += mach_write_compressed(ptr, *len);
+
+ *field = trx_undo_page_fetch_ext(ext_buf, zip_size,
+ *field, len);
+
+ ptr += mach_write_compressed(ptr, *len);
+ } else {
+ ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
+ + *len);
+ }
+
+ return(ptr);
+}
+
+/**********************************************************************//**
Reports in the undo log of an update or delete marking of a clustered index
-record. */
+record.
+@return byte offset of the inserted undo log entry on the page if
+succeed, 0 if fail */
static
ulint
trx_undo_page_report_modify(
/*========================*/
- /* out: byte offset of the inserted
- undo log entry on the page if succeed,
- 0 if fail */
- page_t* undo_page, /* in: undo log page */
- trx_t* trx, /* in: transaction */
- dict_index_t* index, /* in: clustered index where update or
+ page_t* undo_page, /*!< in: undo log page */
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: clustered index where update or
delete marking is done */
- rec_t* rec, /* in: clustered index record which
+ const rec_t* rec, /*!< in: clustered index record which
has NOT yet been modified */
- const ulint* offsets, /* in: rec_get_offsets(rec, index) */
- upd_t* update, /* in: update vector which tells the
+ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
+ const upd_t* update, /*!< in: update vector which tells the
columns to be updated; in the case of
a delete, this should be set to NULL */
- ulint cmpl_info, /* in: compiler info on secondary
+ ulint cmpl_info, /*!< in: compiler info on secondary
index updates */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
dict_table_t* table;
- upd_field_t* upd_field;
ulint first_free;
byte* ptr;
- ulint len;
- byte* field;
+ const byte* field;
ulint flen;
- ulint pos;
- dulint roll_ptr;
- dulint trx_id;
- ulint bits;
ulint col_no;
- byte* old_ptr;
ulint type_cmpl;
byte* type_cmpl_ptr;
ulint i;
+ trx_id_t trx_id;
+ ibool ignore_prefix = FALSE;
+ byte ext_buf[REC_MAX_INDEX_COL_LEN
+ + BTR_EXTERN_FIELD_REF_SIZE];
- ut_a(index->type & DICT_CLUSTERED);
+ ut_a(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
@@ -454,53 +576,55 @@ trx_undo_page_report_modify(
/* Store first some general parameters to the undo log */
- if (update) {
- if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
- type_cmpl = TRX_UNDO_UPD_DEL_REC;
- } else {
- type_cmpl = TRX_UNDO_UPD_EXIST_REC;
- }
- } else {
+ if (!update) {
type_cmpl = TRX_UNDO_DEL_MARK_REC;
+ } else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
+ type_cmpl = TRX_UNDO_UPD_DEL_REC;
+ /* We are about to update a delete marked record.
+ We don't typically need the prefix in this case unless
+ the delete marking is done by the same transaction
+ (which we check below). */
+ ignore_prefix = TRUE;
+ } else {
+ type_cmpl = TRX_UNDO_UPD_EXIST_REC;
}
- type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT);
-
- mach_write_to_1(ptr, type_cmpl);
-
+ type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
type_cmpl_ptr = ptr;
- ptr++;
- len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
- ptr += len;
+ *ptr++ = (byte) type_cmpl;
+ ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
- len = mach_dulint_write_much_compressed(ptr, table->id);
- ptr += len;
+ ptr += mach_dulint_write_much_compressed(ptr, table->id);
/*----------------------------------------*/
/* Store the state of the info bits */
- bits = rec_get_info_bits(rec, dict_table_is_comp(table));
- mach_write_to_1(ptr, bits);
- ptr += 1;
+ *ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
/* Store the values of the system columns */
field = rec_get_nth_field(rec, offsets,
dict_index_get_sys_col_pos(
- index, DATA_TRX_ID), &len);
- ut_ad(len == DATA_TRX_ID_LEN);
+ index, DATA_TRX_ID), &flen);
+ ut_ad(flen == DATA_TRX_ID_LEN);
+
trx_id = trx_read_trx_id(field);
+
+ /* If it is an update of a delete marked record, then we are
+ allowed to ignore blob prefixes if the delete marking was done
+ by some other trx as it must have committed by now for us to
+ allow an over-write. */
+ if (ignore_prefix) {
+ ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
+ }
+ ptr += mach_dulint_write_compressed(ptr, trx_id);
+
field = rec_get_nth_field(rec, offsets,
dict_index_get_sys_col_pos(
- index, DATA_ROLL_PTR), &len);
- ut_ad(len == DATA_ROLL_PTR_LEN);
- roll_ptr = trx_read_roll_ptr(field);
-
- len = mach_dulint_write_compressed(ptr, trx_id);
- ptr += len;
+ index, DATA_ROLL_PTR), &flen);
+ ut_ad(flen == DATA_ROLL_PTR_LEN);
- len = mach_dulint_write_compressed(ptr, roll_ptr);
- ptr += len;
+ ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));
/*----------------------------------------*/
/* Store then the fields required to uniquely determine the
@@ -510,13 +634,16 @@ trx_undo_page_report_modify(
field = rec_get_nth_field(rec, offsets, i, &flen);
- if (trx_undo_left(undo_page, ptr) < 4) {
+ /* The ordering columns must not be stored externally. */
+ ut_ad(!rec_offs_nth_extern(offsets, i));
+ ut_ad(dict_index_get_nth_col(index, i)->ord_part);
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
return(0);
}
- len = mach_write_compressed(ptr, flen);
- ptr += len;
+ ptr += mach_write_compressed(ptr, flen);
if (flen != UNIV_SQL_NULL) {
if (trx_undo_left(undo_page, ptr) < flen) {
@@ -538,13 +665,11 @@ trx_undo_page_report_modify(
return(0);
}
- len = mach_write_compressed(ptr, upd_get_n_fields(update));
- ptr += len;
+ ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
for (i = 0; i < upd_get_n_fields(update); i++) {
- upd_field = upd_get_nth_field(update, i);
- pos = upd_field->field_no;
+ ulint pos = upd_get_nth_field(update, i)->field_no;
/* Write field number to undo log */
if (trx_undo_left(undo_page, ptr) < 5) {
@@ -552,38 +677,37 @@ trx_undo_page_report_modify(
return(0);
}
- len = mach_write_compressed(ptr, pos);
- ptr += len;
+ ptr += mach_write_compressed(ptr, pos);
/* Save the old value of field */
field = rec_get_nth_field(rec, offsets, pos, &flen);
- if (trx_undo_left(undo_page, ptr) < 5) {
+ if (trx_undo_left(undo_page, ptr) < 15) {
return(0);
}
if (rec_offs_nth_extern(offsets, pos)) {
- /* If a field has external storage, we add
- to flen the flag */
-
- len = mach_write_compressed(
+ ptr = trx_undo_page_report_modify_ext(
ptr,
- UNIV_EXTERN_STORAGE_FIELD + flen);
+ dict_index_get_nth_col(index, pos)
+ ->ord_part
+ && !ignore_prefix
+ && flen < REC_MAX_INDEX_COL_LEN
+ ? ext_buf : NULL,
+ dict_table_zip_size(table),
+ &field, &flen);
/* Notify purge that it eventually has to
free the old externally stored field */
trx->update_undo->del_marks = TRUE;
- *type_cmpl_ptr = *type_cmpl_ptr
- | TRX_UNDO_UPD_EXTERN;
+ *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
} else {
- len = mach_write_compressed(ptr, flen);
+ ptr += mach_write_compressed(ptr, flen);
}
- ptr += len;
-
if (flen != UNIV_SQL_NULL) {
if (trx_undo_left(undo_page, ptr) < flen) {
@@ -603,12 +727,15 @@ trx_undo_page_report_modify(
in the purge of old versions where we use it to build and search the
delete marked index records, to look if we can remove them from the
index tree. Note that starting from 4.0.14 also externally stored
- fields can be ordering in some index. But we always store at least
- 384 first bytes locally to the clustered index record, which means
- we can construct the column prefix fields in the index from the
- stored data. */
+ fields can be ordering in some index. Starting from 5.2, we no longer
+ store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
+ but we can construct the column prefix fields in the index by
+ fetching the first page of the BLOB that is pointed to by the
+ clustered index. This works also in crash recovery, because all pages
+ (including BLOBs) are recovered before anything is rolled back. */
if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+ byte* old_ptr = ptr;
trx->update_undo->del_marks = TRUE;
@@ -617,8 +744,6 @@ trx_undo_page_report_modify(
return(0);
}
- old_ptr = ptr;
-
/* Reserve 2 bytes to write the number of bytes the stored
fields take in this undo record */
@@ -630,32 +755,36 @@ trx_undo_page_report_modify(
const dict_col_t* col
= dict_table_get_nth_col(table, col_no);
- if (col->ord_part > 0) {
-
- pos = dict_index_get_nth_col_pos(index,
- col_no);
+ if (col->ord_part) {
+ ulint pos;
/* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5) {
+ if (trx_undo_left(undo_page, ptr) < 5 + 15) {
return(0);
}
- len = mach_write_compressed(ptr, pos);
- ptr += len;
+ pos = dict_index_get_nth_col_pos(index,
+ col_no);
+ ptr += mach_write_compressed(ptr, pos);
/* Save the old value of field */
field = rec_get_nth_field(rec, offsets, pos,
&flen);
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
+ if (rec_offs_nth_extern(offsets, pos)) {
+ ptr = trx_undo_page_report_modify_ext(
+ ptr,
+ flen < REC_MAX_INDEX_COL_LEN
+ && !ignore_prefix
+ ? ext_buf : NULL,
+ dict_table_zip_size(table),
+ &field, &flen);
+ } else {
+ ptr += mach_write_compressed(
+ ptr, flen);
}
- len = mach_write_compressed(ptr, flen);
- ptr += len;
-
if (flen != UNIV_SQL_NULL) {
if (trx_undo_left(undo_page, ptr)
< flen) {
@@ -693,24 +822,21 @@ trx_undo_page_report_modify(
return(first_free);
}
-/**************************************************************************
+/**********************************************************************//**
Reads from an undo log update record the system field values of the old
-version. */
-
+version.
+@return remaining part of undo log record after reading these values */
+UNIV_INTERN
byte*
trx_undo_update_rec_get_sys_cols(
/*=============================*/
- /* out: remaining part of undo log
- record after reading these values */
- byte* ptr, /* in: remaining part of undo log
- record after reading general
- parameters */
- dulint* trx_id, /* out: trx id */
- dulint* roll_ptr, /* out: roll ptr */
- ulint* info_bits) /* out: info bits state */
+ byte* ptr, /*!< in: remaining part of undo
+ log record after reading
+ general parameters */
+ trx_id_t* trx_id, /*!< out: trx id */
+ roll_ptr_t* roll_ptr, /*!< out: roll ptr */
+ ulint* info_bits) /*!< out: info bits state */
{
- ulint len;
-
/* Read the state of the info bits */
*info_bits = mach_read_from_1(ptr);
ptr += 1;
@@ -718,26 +844,23 @@ trx_undo_update_rec_get_sys_cols(
/* Read the values of the system columns */
*trx_id = mach_dulint_read_compressed(ptr);
- len = mach_dulint_get_compressed_size(*trx_id);
- ptr += len;
+ ptr += mach_dulint_get_compressed_size(*trx_id);
*roll_ptr = mach_dulint_read_compressed(ptr);
- len = mach_dulint_get_compressed_size(*roll_ptr);
- ptr += len;
+ ptr += mach_dulint_get_compressed_size(*roll_ptr);
return(ptr);
}
-/**************************************************************************
-Reads from an update undo log record the number of updated fields. */
+/**********************************************************************//**
+Reads from an update undo log record the number of updated fields.
+@return remaining part of undo log record after reading this value */
UNIV_INLINE
byte*
trx_undo_update_rec_get_n_upd_fields(
/*=================================*/
- /* out: remaining part of undo log record after
- reading this value */
- byte* ptr, /* in: pointer to remaining part of undo log record */
- ulint* n) /* out: number of fields */
+ byte* ptr, /*!< in: pointer to remaining part of undo log record */
+ ulint* n) /*!< out: number of fields */
{
*n = mach_read_compressed(ptr);
ptr += mach_get_compressed_size(*n);
@@ -745,16 +868,15 @@ trx_undo_update_rec_get_n_upd_fields(
return(ptr);
}
-/**************************************************************************
-Reads from an update undo log record a stored field number. */
+/**********************************************************************//**
+Reads from an update undo log record a stored field number.
+@return remaining part of undo log record after reading this value */
UNIV_INLINE
byte*
trx_undo_update_rec_get_field_no(
/*=============================*/
- /* out: remaining part of undo log record after
- reading this value */
- byte* ptr, /* in: pointer to remaining part of undo log record */
- ulint* field_no)/* out: field number */
+ byte* ptr, /*!< in: pointer to remaining part of undo log record */
+ ulint* field_no)/*!< out: field number */
{
*field_no = mach_read_compressed(ptr);
ptr += mach_get_compressed_size(*field_no);
@@ -762,45 +884,41 @@ trx_undo_update_rec_get_field_no(
return(ptr);
}
-/***********************************************************************
-Builds an update vector based on a remaining part of an undo log record. */
-
+/*******************************************************************//**
+Builds an update vector based on a remaining part of an undo log record.
+@return remaining part of the record, NULL if an error detected, which
+means that the record is corrupted */
+UNIV_INTERN
byte*
trx_undo_update_rec_get_update(
/*===========================*/
- /* out: remaining part of the record,
- NULL if an error detected, which means that
- the record is corrupted */
- byte* ptr, /* in: remaining part in update undo log
+ byte* ptr, /*!< in: remaining part in update undo log
record, after reading the row reference
NOTE that this copy of the undo log record must
be preserved as long as the update vector is
used, as we do NOT copy the data in the
record! */
- dict_index_t* index, /* in: clustered index */
- ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
+ dict_index_t* index, /*!< in: clustered index */
+ ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
TRX_UNDO_UPD_DEL_REC, or
TRX_UNDO_DEL_MARK_REC; in the last case,
only trx id and roll ptr fields are added to
the update vector */
- dulint trx_id, /* in: transaction id from this undo record */
- dulint roll_ptr,/* in: roll pointer from this undo record */
- ulint info_bits,/* in: info bits from this undo record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap, /* in: memory heap from which the memory
+ trx_id_t trx_id, /*!< in: transaction id from this undo record */
+ roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
+ ulint info_bits,/*!< in: info bits from this undo record */
+ trx_t* trx, /*!< in: transaction */
+ mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
- upd_t** upd) /* out, own: update vector */
+ upd_t** upd) /*!< out, own: update vector */
{
upd_field_t* upd_field;
upd_t* update;
ulint n_fields;
byte* buf;
- byte* field;
- ulint len;
- ulint field_no;
ulint i;
- ut_a(index->type & DICT_CLUSTERED);
+ ut_a(dict_index_is_clust(index));
if (type != TRX_UNDO_DEL_MARK_REC) {
ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
@@ -836,6 +954,11 @@ trx_undo_update_rec_get_update(
for (i = 0; i < n_fields; i++) {
+ byte* field;
+ ulint len;
+ ulint field_no;
+ ulint orig_len;
+
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
if (field_no >= dict_index_get_n_fields(index)) {
@@ -857,20 +980,24 @@ trx_undo_update_rec_get_update(
return(NULL);
}
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
-
upd_field = upd_get_nth_field(update, i);
upd_field_set_field_no(upd_field, field_no, index, trx);
- if (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD) {
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
- upd_field->extern_storage = TRUE;
+ upd_field->orig_len = orig_len;
+ if (len == UNIV_SQL_NULL) {
+ dfield_set_null(&upd_field->new_val);
+ } else if (len < UNIV_EXTERN_STORAGE_FIELD) {
+ dfield_set_data(&upd_field->new_val, field, len);
+ } else {
len -= UNIV_EXTERN_STORAGE_FIELD;
- }
- dfield_set_data(&(upd_field->new_val), field, len);
+ dfield_set_data(&upd_field->new_val, field, len);
+ dfield_set_ext(&upd_field->new_val);
+ }
}
*upd = update;
@@ -878,38 +1005,37 @@ trx_undo_update_rec_get_update(
return(ptr);
}
-/***********************************************************************
+/*******************************************************************//**
Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table. */
-
+columns which occur as ordering in any index of the table.
+@return pointer to remaining part of undo record */
+UNIV_INTERN
byte*
trx_undo_rec_get_partial_row(
/*=========================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part in update undo log
+ byte* ptr, /*!< in: remaining part in update undo log
record of a suitable type, at the start of
the stored index columns;
NOTE that this copy of the undo log record must
be preserved as long as the partial row is
used, as we do NOT copy the data in the
record! */
- dict_index_t* index, /* in: clustered index */
- dtuple_t** row, /* out, own: partial row */
- mem_heap_t* heap) /* in: memory heap from which the memory
+ dict_index_t* index, /*!< in: clustered index */
+ dtuple_t** row, /*!< out, own: partial row */
+ ibool ignore_prefix, /*!< in: flag to indicate if we
+ expect blob prefixes in undo. Used
+ only in the assertion. */
+ mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
{
- dfield_t* dfield;
- byte* field;
- ulint len;
- ulint field_no;
- ulint col_no;
+ const byte* end_ptr;
ulint row_len;
- ulint total_len;
- byte* start_ptr;
- ulint i;
- ut_ad(index && ptr && row && heap);
+ ut_ad(index);
+ ut_ad(ptr);
+ ut_ad(row);
+ ut_ad(heap);
+ ut_ad(dict_index_is_clust(index));
row_len = dict_table_get_n_cols(index->table);
@@ -917,40 +1043,57 @@ trx_undo_rec_get_partial_row(
dict_table_copy_types(*row, index->table);
- start_ptr = ptr;
-
- total_len = mach_read_from_2(ptr);
+ end_ptr = ptr + mach_read_from_2(ptr);
ptr += 2;
- for (i = 0;; i++) {
-
- if (ptr == start_ptr + total_len) {
-
- break;
- }
+ while (ptr != end_ptr) {
+ dfield_t* dfield;
+ byte* field;
+ ulint field_no;
+ const dict_col_t* col;
+ ulint col_no;
+ ulint len;
+ ulint orig_len;
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
- col_no = dict_index_get_nth_col_no(index, field_no);
+ col = dict_index_get_nth_col(index, field_no);
+ col_no = dict_col_get_no(col);
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
dfield = dtuple_get_nth_field(*row, col_no);
dfield_set_data(dfield, field, len);
+
+ if (len != UNIV_SQL_NULL
+ && len >= UNIV_EXTERN_STORAGE_FIELD) {
+ dfield_set_len(dfield,
+ len - UNIV_EXTERN_STORAGE_FIELD);
+ dfield_set_ext(dfield);
+ /* If the prefix of this column is indexed,
+ ensure that enough prefix is stored in the
+ undo log record. */
+ ut_a(ignore_prefix
+ || !col->ord_part
+ || dfield_get_len(dfield)
+ >= REC_MAX_INDEX_COL_LEN
+ + BTR_EXTERN_FIELD_REF_SIZE);
+ }
}
return(ptr);
}
+#endif /* !UNIV_HOTBACKUP */
-/***************************************************************************
+/***********************************************************************//**
Erases the unused undo log page end. */
static
void
trx_undo_erase_page_end(
/*====================*/
- page_t* undo_page, /* in: undo page whose end to erase */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page, /*!< in: undo page whose end to erase */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint first_free;
@@ -962,17 +1105,17 @@ trx_undo_erase_page_end(
mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
}
-/***************************************************************
-Parses a redo log record of erasing of an undo page end. */
-
+/***********************************************************//**
+Parses a redo log record of erasing of an undo page end.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_erase_page_end(
/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
ut_ad(ptr && end_ptr);
@@ -986,58 +1129,56 @@ trx_undo_parse_erase_page_end(
return(ptr);
}
-/***************************************************************************
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
transaction and in consistent reads that must look to the history of this
-transaction. */
-
+transaction.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
ulint
trx_undo_report_row_operation(
/*==========================*/
- /* out: DB_SUCCESS or error code */
- ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
+ ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
set, does nothing */
- ulint op_type, /* in: TRX_UNDO_INSERT_OP or
+ ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or
TRX_UNDO_MODIFY_OP */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* clust_entry, /* in: in the case of an insert,
+ que_thr_t* thr, /*!< in: query thread */
+ dict_index_t* index, /*!< in: clustered index */
+ const dtuple_t* clust_entry, /*!< in: in the case of an insert,
index entry to insert into the
clustered index, otherwise NULL */
- upd_t* update, /* in: in the case of an update,
+ const upd_t* update, /*!< in: in the case of an update,
the update vector, otherwise NULL */
- ulint cmpl_info, /* in: compiler info on secondary
+ ulint cmpl_info, /*!< in: compiler info on secondary
index updates */
- rec_t* rec, /* in: in case of an update or delete
+ const rec_t* rec, /*!< in: in case of an update or delete
marking, the record in the clustered
index, otherwise NULL */
- dulint* roll_ptr) /* out: rollback pointer to the
+ roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
inserted undo log record,
ut_dulint_zero if BTR_NO_UNDO_LOG
flag was specified */
{
trx_t* trx;
trx_undo_t* undo;
- page_t* undo_page;
- ulint offset;
ulint page_no;
- ibool is_insert;
trx_rseg_t* rseg;
mtr_t mtr;
ulint err = DB_SUCCESS;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
+ rec_offs_init(offsets_);
- ut_a(index->type & DICT_CLUSTERED);
+ ut_a(dict_index_is_clust(index));
if (flags & BTR_NO_UNDO_LOG_FLAG) {
*roll_ptr = ut_dulint_zero;
- return(err);
+ return(DB_SUCCESS);
}
ut_ad(thr);
@@ -1059,7 +1200,13 @@ trx_undo_report_row_operation(
}
undo = trx->insert_undo;
- is_insert = TRUE;
+
+ if (UNIV_UNLIKELY(!undo)) {
+ /* Did not succeed */
+ mutex_exit(&(trx->undo_mutex));
+
+ return(err);
+ }
} else {
ut_ad(op_type == TRX_UNDO_MODIFY_OP);
@@ -1070,14 +1217,15 @@ trx_undo_report_row_operation(
}
undo = trx->update_undo;
- is_insert = FALSE;
- }
- if (err != DB_SUCCESS) {
- /* Did not succeed: return the error encountered */
- mutex_exit(&(trx->undo_mutex));
+ if (UNIV_UNLIKELY(!undo)) {
+ /* Did not succeed */
+ mutex_exit(&(trx->undo_mutex));
+ return(err);
+ }
- return(err);
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
}
page_no = undo->last_page_no;
@@ -1085,28 +1233,28 @@ trx_undo_report_row_operation(
mtr_start(&mtr);
for (;;) {
- undo_page = buf_page_get_gen(undo->space, page_no,
- RW_X_LATCH, undo->guess_page,
- BUF_GET,
- __FILE__, __LINE__,
- &mtr);
+ buf_block_t* undo_block;
+ page_t* undo_page;
+ ulint offset;
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ undo_block = buf_page_get_gen(undo->space, undo->zip_size,
+ page_no, RW_X_LATCH,
+ undo->guess_block, BUF_GET,
+ __FILE__, __LINE__, &mtr);
+ buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
+
+ undo_page = buf_block_get_frame(undo_block);
if (op_type == TRX_UNDO_INSERT_OP) {
offset = trx_undo_page_report_insert(
undo_page, trx, index, clust_entry, &mtr);
} else {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
offset = trx_undo_page_report_modify(
undo_page, trx, index, rec, offsets, update,
cmpl_info, &mtr);
}
- if (offset == 0) {
+ if (UNIV_UNLIKELY(offset == 0)) {
/* The record did not fit on the page. We erase the
end segment of the undo log page and write a log
record of it: this is to ensure that in the debug
@@ -1114,14 +1262,29 @@ trx_undo_report_row_operation(
records stays identical to the original page */
trx_undo_erase_page_end(undo_page, &mtr);
- }
+ mtr_commit(&mtr);
+ } else {
+ /* Success */
- mtr_commit(&mtr);
+ mtr_commit(&mtr);
- if (offset != 0) {
- /* Success */
+ undo->empty = FALSE;
+ undo->top_page_no = page_no;
+ undo->top_offset = offset;
+ undo->top_undo_no = trx->undo_no;
+ undo->guess_block = undo_block;
+
+ UT_DULINT_INC(trx->undo_no);
- break;
+ mutex_exit(&trx->undo_mutex);
+
+ *roll_ptr = trx_undo_build_roll_ptr(
+ op_type == TRX_UNDO_INSERT_OP,
+ rseg->id, page_no, offset);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ return(DB_SUCCESS);
}
ut_ad(page_no == undo->last_page_no);
@@ -1140,7 +1303,7 @@ trx_undo_report_row_operation(
mutex_exit(&(rseg->mutex));
- if (page_no == FIL_NULL) {
+ if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
/* Did not succeed: out of space */
mutex_exit(&(trx->undo_mutex));
@@ -1151,43 +1314,26 @@ trx_undo_report_row_operation(
return(DB_OUT_OF_FILE_SPACE);
}
}
-
- undo->empty = FALSE;
- undo->top_page_no = page_no;
- undo->top_offset = offset;
- undo->top_undo_no = trx->undo_no;
- undo->guess_page = undo_page;
-
- UT_DULINT_INC(trx->undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-
- *roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no,
- offset);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
}
/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
-/**********************************************************************
+/******************************************************************//**
Copies an undo record to heap. This function can be called if we know that
-the undo log record exists. */
-
+the undo log record exists.
+@return own: copy of the record */
+UNIV_INTERN
trx_undo_rec_t*
trx_undo_get_undo_rec_low(
/*======================*/
- /* out, own: copy of the record */
- dulint roll_ptr, /* in: roll pointer to record */
- mem_heap_t* heap) /* in: memory heap where copied */
+ roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
{
trx_undo_rec_t* undo_rec;
ulint rseg_id;
ulint page_no;
ulint offset;
- page_t* undo_page;
+ const page_t* undo_page;
trx_rseg_t* rseg;
ibool is_insert;
mtr_t mtr;
@@ -1198,7 +1344,8 @@ trx_undo_get_undo_rec_low(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(rseg->space, page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
+ page_no, &mtr);
undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
@@ -1207,24 +1354,24 @@ trx_undo_get_undo_rec_low(
return(undo_rec);
}
-/**********************************************************************
-Copies an undo record to heap. */
+/******************************************************************//**
+Copies an undo record to heap.
+
+NOTE: the caller must have latches on the clustered index page and
+purge_view.
+@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
+truncated and we cannot fetch the old version */
+UNIV_INTERN
ulint
trx_undo_get_undo_rec(
/*==================*/
- /* out: DB_SUCCESS, or
- DB_MISSING_HISTORY if the undo log
- has been truncated and we cannot
- fetch the old version; NOTE: the
- caller must have latches on the
- clustered index page and purge_view */
- dulint roll_ptr, /* in: roll pointer to record */
- dulint trx_id, /* in: id of the trx that generated
+ roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
+ trx_id_t trx_id, /*!< in: id of the trx that generated
the roll pointer: it points to an
undo log of this transaction */
- trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
- mem_heap_t* heap) /* in: memory heap where copied */
+ trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
@@ -1243,42 +1390,43 @@ trx_undo_get_undo_rec(
return(DB_SUCCESS);
}
-/***********************************************************************
+/*******************************************************************//**
Build a previous version of a clustered index record. This function checks
that the caller has a latch on the index page of the clustered index record
and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked. */
-
+is locked all the way down to the purge_view.
+@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
+earlier than purge_view, which means that it may have been removed,
+DB_ERROR if corrupted record */
+UNIV_INTERN
ulint
trx_undo_prev_version_build(
/*========================*/
- /* out: DB_SUCCESS, or DB_MISSING_HISTORY if
- the previous version is not >= purge_view,
- which means that it may have been removed,
- DB_ERROR if corrupted record */
- rec_t* index_rec,/* in: clustered index record in the
+ const rec_t* index_rec,/*!< in: clustered index record in the
index tree */
mtr_t* index_mtr __attribute__((unused)),
- /* in: mtr which contains the latch to
+ /*!< in: mtr which contains the latch to
index_rec page and purge_view */
- rec_t* rec, /* in: version of a clustered index record */
- dict_index_t* index, /* in: clustered index */
- ulint* offsets,/* in: rec_get_offsets(rec, index) */
- mem_heap_t* heap, /* in: memory heap from which the memory
+ const rec_t* rec, /*!< in: version of a clustered index record */
+ dict_index_t* index, /*!< in: clustered index */
+ ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
- rec_t** old_vers)/* out, own: previous version, or NULL if
+ rec_t** old_vers)/*!< out, own: previous version, or NULL if
rec is the first inserted version, or if
- history data has been deleted */
+ history data has been deleted (an error),
+ or if the purge COULD have removed the version
+ though it has not yet done so */
{
- trx_undo_rec_t* undo_rec;
+ trx_undo_rec_t* undo_rec = NULL;
dtuple_t* entry;
- dulint rec_trx_id;
+ trx_id_t rec_trx_id;
ulint type;
- dulint undo_no;
+ undo_no_t undo_no;
dulint table_id;
- dulint trx_id;
- dulint roll_ptr;
- dulint old_roll_ptr;
+ trx_id_t trx_id;
+ roll_ptr_t roll_ptr;
+ roll_ptr_t old_roll_ptr;
upd_t* update;
byte* ptr;
ulint info_bits;
@@ -1289,13 +1437,12 @@ trx_undo_prev_version_build(
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec),
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(index_mtr, buf_block_align(index_rec),
- MTR_MEMO_PAGE_X_FIX));
+ ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
+ || mtr_memo_contains_page(index_mtr, index_rec,
+ MTR_MEMO_PAGE_X_FIX));
ut_ad(rec_offs_validate(rec, index, offsets));
- if (!(index->type & DICT_CLUSTERED)) {
+ if (!dict_index_is_clust(index)) {
fprintf(stderr, "InnoDB: Error: trying to access"
" update undo rec for non-clustered index %s\n"
"InnoDB: Submit a detailed bug report to"
@@ -1325,7 +1472,9 @@ trx_undo_prev_version_build(
err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
- if (err != DB_SUCCESS) {
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ /* The undo record may already have been purged.
+ This should never happen in InnoDB. */
return(err);
}
@@ -1335,6 +1484,29 @@ trx_undo_prev_version_build(
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
&info_bits);
+
+ /* (a) If a clustered index record version is such that the
+ trx id stamp in it is bigger than purge_sys->view, then the
+ BLOBs in that version are known to exist (the purge has not
+ progressed that far);
+
+ (b) if the version is the first version such that trx id in it
+ is less than purge_sys->view, and it is not delete-marked,
+ then the BLOBs in that version are known to exist (the purge
+ cannot have purged the BLOBs referenced by that version
+ yet).
+
+ This function does not fetch any BLOBs. The callers might, by
+ possibly invoking row_ext_create() via row_build(). However,
+ they should have all needed information in the *old_vers
+ returned by this function. This is because *old_vers is based
+ on the transaction undo log records. The function
+ trx_undo_page_fetch_ext() will write BLOB prefixes to the
+ transaction undo log that are at least as long as the longest
+ possible column prefix in a secondary index. Thus, secondary
+ index entries for *old_vers can be constructed without
+ dereferencing any BLOB pointers. */
+
ptr = trx_undo_rec_skip_row_ref(ptr, index);
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
@@ -1380,14 +1552,12 @@ trx_undo_prev_version_build(
"InnoDB: record version ", stderr);
rec_print_new(stderr, rec, offsets);
fprintf(stderr, "\n"
- "InnoDB: Record trx id %lu %lu, update rec"
- " trx id %lu %lu\n"
+ "InnoDB: Record trx id " TRX_ID_FMT
+ ", update rec trx id " TRX_ID_FMT "\n"
"InnoDB: Roll ptr in rec %lu %lu, in update rec"
" %lu %lu\n",
- (ulong) ut_dulint_get_high(rec_trx_id),
- (ulong) ut_dulint_get_low(rec_trx_id),
- (ulong) ut_dulint_get_high(trx_id),
- (ulong) ut_dulint_get_low(trx_id),
+ TRX_ID_PREP_PRINTF(rec_trx_id),
+ TRX_ID_PREP_PRINTF(trx_id),
(ulong) ut_dulint_get_high(old_roll_ptr),
(ulong) ut_dulint_get_low(old_roll_ptr),
(ulong) ut_dulint_get_high(roll_ptr),
@@ -1398,38 +1568,34 @@ trx_undo_prev_version_build(
}
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
- ulint* ext_vect;
- ulint n_ext_vect;
+ ulint n_ext;
/* We have to set the appropriate extern storage bits in the
old version of the record: the extern bits in rec for those
- fields that update does NOT update, as well as the the bits for
+ fields that update does NOT update, as well as the bits for
those fields that update updates to become externally stored
- fields. Store the info to ext_vect: */
-
- ext_vect = mem_alloc(sizeof(ulint)
- * rec_offs_n_fields(offsets));
- n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets,
- update);
- entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec,
- heap);
+ fields. Store the info: */
+
+ entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
+ offsets, &n_ext, heap);
+ n_ext += btr_push_update_extern_fields(entry, update, heap);
+ /* The page containing the clustered index record
+ corresponding to entry is latched in mtr. Thus the
+ following call is safe. */
row_upd_index_replace_new_col_vals(entry, index, update, heap);
- buf = mem_heap_alloc(heap,
- rec_get_converted_size(index, entry));
-
- *old_vers = rec_convert_dtuple_to_rec(buf, index, entry);
+ buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
+ n_ext));
- /* Now set the extern bits in the old version of the record */
- rec_set_field_extern_bits(*old_vers, index,
- ext_vect, n_ext_vect, NULL);
- mem_free(ext_vect);
+ *old_vers = rec_convert_dtuple_to_rec(buf, index,
+ entry, n_ext);
} else {
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
*old_vers = rec_copy(buf, rec, offsets);
rec_offs_make_valid(*old_vers, index, offsets);
- row_upd_rec_in_place(*old_vers, offsets, update);
+ row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
}
return(DB_SUCCESS);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/trx/trx0roll.c b/storage/innobase/trx/trx0roll.c
index 8934fe87c7e..c925478cdf4 100644
--- a/storage/innobase/trx/trx0roll.c
+++ b/storage/innobase/trx/trx0roll.c
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction rollback
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0roll.c
+Transaction rollback
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -27,32 +44,32 @@ Created 3/26/1996 Heikki Tuuri
#include "lock0lock.h"
#include "pars0pars.h"
-/* This many pages must be undone before a truncate is tried within rollback */
+/** This many pages must be undone before a truncate is tried within
+rollback */
#define TRX_ROLL_TRUNC_THRESHOLD 1
-/* In crash recovery, the current trx to be rolled back */
-trx_t* trx_roll_crash_recv_trx = NULL;
+/** In crash recovery, the current trx to be rolled back */
+static trx_t* trx_roll_crash_recv_trx = NULL;
-/* In crash recovery we set this to the undo n:o of the current trx to be
+/** In crash recovery we set this to the undo n:o of the current trx to be
rolled back. Then we can print how many % the rollback has progressed. */
-ib_longlong trx_roll_max_undo_no;
-
-/* Auxiliary variable which tells the previous progress % we printed */
-ulint trx_roll_progress_printed_pct;
+static ib_int64_t trx_roll_max_undo_no;
-/***********************************************************************
-Rollback a transaction used in MySQL. */
+/** Auxiliary variable which tells the previous progress % we printed */
+static ulint trx_roll_progress_printed_pct;
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
trx_general_rollback_for_mysql(
/*===========================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- ibool partial,/* in: TRUE if partial rollback requested */
- trx_savept_t* savept) /* in: pointer to savepoint undo number, if
- partial rollback requested */
+ trx_t* trx, /*!< in: transaction handle */
+ trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
+ partial rollback requested, or NULL for
+ complete rollback */
{
-#ifndef UNIV_HOTBACKUP
mem_heap_t* heap;
que_thr_t* thr;
roll_node_t* roll_node;
@@ -68,9 +85,8 @@ trx_general_rollback_for_mysql(
roll_node = roll_node_create(heap);
- roll_node->partial = partial;
-
- if (partial) {
+ if (savept) {
+ roll_node->partial = TRUE;
roll_node->savept = *savept;
}
@@ -104,23 +120,16 @@ trx_general_rollback_for_mysql(
srv_active_wake_master_thread();
return((int) trx->error_state);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
}
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
trx_rollback_for_mysql(
/*===================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx) /* in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
{
int err;
@@ -135,35 +144,21 @@ trx_rollback_for_mysql(
the transaction object does not have an InnoDB session object, and we
set a dummy session that we use for all MySQL transactions. */
- mutex_enter(&kernel_mutex);
-
- if (trx->sess == NULL) {
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
- trx->sess = trx_dummy_sess;
- }
-
- mutex_exit(&kernel_mutex);
+ err = trx_general_rollback_for_mysql(trx, NULL);
- err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
-
trx->op_info = "";
return(err);
}
-/***********************************************************************
-Rollback the latest SQL statement for MySQL. */
-
+/*******************************************************************//**
+Rollback the latest SQL statement for MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
int
trx_rollback_last_sql_stat_for_mysql(
/*=================================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx) /* in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
{
int err;
@@ -174,8 +169,7 @@ trx_rollback_last_sql_stat_for_mysql(
trx->op_info = "rollback of SQL statement";
- err = trx_general_rollback_for_mysql(trx, TRUE,
- &(trx->last_sql_stat_start));
+ err = trx_general_rollback_for_mysql(trx, &trx->last_sql_stat_start);
/* The following call should not be needed, but we play safe: */
trx_mark_sql_stat_end(trx);
@@ -184,14 +178,14 @@ trx_rollback_last_sql_stat_for_mysql(
return(err);
}
-/***********************************************************************
+/*******************************************************************//**
Frees a single savepoint struct. */
-
+UNIV_INTERN
void
trx_roll_savepoint_free(
/*=====================*/
- trx_t* trx, /* in: transaction handle */
- trx_named_savept_t* savep) /* in: savepoint to free */
+ trx_t* trx, /*!< in: transaction handle */
+ trx_named_savept_t* savep) /*!< in: savepoint to free */
{
ut_a(savep != NULL);
ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0);
@@ -201,15 +195,15 @@ trx_roll_savepoint_free(
mem_free(savep);
}
-/***********************************************************************
+/*******************************************************************//**
Frees savepoint structs starting from savep, if savep == NULL then
free all savepoints. */
-
+UNIV_INTERN
void
trx_roll_savepoints_free(
/*=====================*/
- trx_t* trx, /* in: transaction handle */
- trx_named_savept_t* savep) /* in: free all savepoints > this one;
+ trx_t* trx, /*!< in: transaction handle */
+ trx_named_savept_t* savep) /*!< in: free all savepoints > this one;
if this is NULL, free all savepoints
of trx */
{
@@ -230,24 +224,22 @@ trx_roll_savepoints_free(
}
}
-/***********************************************************************
+/*******************************************************************//**
Rolls back a transaction back to a named savepoint. Modifications after the
savepoint are undone but InnoDB does NOT release the corresponding locks
which are stored in memory. If a lock is 'implicit', that is, a new inserted
row holds a lock where the lock information is carried by the trx id stored in
the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted. */
-
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
ulint
trx_rollback_to_savepoint_for_mysql(
/*================================*/
- /* out: if no savepoint
- of the name found then
- DB_NO_SAVEPOINT,
- otherwise DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name, /* in: savepoint name */
- ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache
+ trx_t* trx, /*!< in: transaction handle */
+ const char* savepoint_name, /*!< in: savepoint name */
+ ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
position corresponding to this
savepoint; MySQL needs this
information to remove the
@@ -288,7 +280,7 @@ trx_rollback_to_savepoint_for_mysql(
trx->op_info = "rollback to a savepoint";
- err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept));
+ err = trx_general_rollback_for_mysql(trx, &savep->savept);
/* Store the current undo_no of the transaction so that we know where
to roll back if we have to roll back the next SQL statement: */
@@ -300,19 +292,19 @@ trx_rollback_to_savepoint_for_mysql(
return(err);
}
-/***********************************************************************
+/*******************************************************************//**
Creates a named savepoint. If the transaction is not yet started, starts it.
If there is already a savepoint of the same name, this call erases that old
savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback. */
-
+commit or rollback.
+@return always DB_SUCCESS */
+UNIV_INTERN
ulint
trx_savepoint_for_mysql(
/*====================*/
- /* out: always DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name, /* in: savepoint name */
- ib_longlong binlog_cache_pos) /* in: MySQL binlog cache
+ trx_t* trx, /*!< in: transaction handle */
+ const char* savepoint_name, /*!< in: savepoint name */
+ ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
position corresponding to this
connection at the time of the
savepoint */
@@ -358,19 +350,17 @@ trx_savepoint_for_mysql(
return(DB_SUCCESS);
}
-/***********************************************************************
+/*******************************************************************//**
Releases only the named savepoint. Savepoints which were set after this
-savepoint are left as is. */
-
+savepoint are left as is.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
ulint
trx_release_savepoint_for_mysql(
/*============================*/
- /* out: if no savepoint
- of the name found then
- DB_NO_SAVEPOINT,
- otherwise DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name) /* in: savepoint name */
+ trx_t* trx, /*!< in: transaction handle */
+ const char* savepoint_name) /*!< in: savepoint name */
{
trx_named_savept_t* savep;
@@ -388,14 +378,28 @@ trx_release_savepoint_for_mysql(
return(DB_NO_SAVEPOINT);
}
-/***********************************************************************
-Returns a transaction savepoint taken at this point in time. */
+/*******************************************************************//**
+Determines if this transaction is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if trx is an incomplete transaction that is being rolled
+back in crash recovery */
+UNIV_INTERN
+ibool
+trx_is_recv(
+/*========*/
+ const trx_t* trx) /*!< in: transaction */
+{
+ return(trx == trx_roll_crash_recv_trx);
+}
+/*******************************************************************//**
+Returns a transaction savepoint taken at this point in time.
+@return savepoint */
+UNIV_INTERN
trx_savept_t
trx_savept_take(
/*============*/
- /* out: savepoint */
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
trx_savept_t savept;
@@ -404,94 +408,25 @@ trx_savept_take(
return(savept);
}
-/***********************************************************************
-Rollback or clean up transactions which have no user session. If the
-transaction already was committed, then we clean up a possible insert
-undo log. If the transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread. */
-
-os_thread_ret_t
-trx_rollback_or_clean_all_without_sess(
-/*===================================*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
+/*******************************************************************//**
+Roll back an active transaction. */
+static
+void
+trx_rollback_active(
+/*================*/
+ trx_t* trx) /*!< in/out: transaction */
{
mem_heap_t* heap;
que_fork_t* fork;
que_thr_t* thr;
roll_node_t* roll_node;
- trx_t* trx;
dict_table_t* table;
- ib_longlong rows_to_undo;
+ ib_int64_t rows_to_undo;
const char* unit = "";
- int err;
-
- mutex_enter(&kernel_mutex);
-
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
- mutex_exit(&kernel_mutex);
-
- if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
+ ibool dictionary_locked = FALSE;
- fprintf(stderr,
- "InnoDB: Starting in background the rollback"
- " of uncommitted transactions\n");
- } else {
- goto leave_function;
- }
-loop:
heap = mem_heap_create(512);
- mutex_enter(&kernel_mutex);
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) {
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- } else if (trx->conc_state == TRX_PREPARED) {
-
- trx->sess = trx_dummy_sess;
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- } else {
- break;
- }
- }
-
- mutex_exit(&kernel_mutex);
-
- if (trx == NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Rollback of non-prepared transactions"
- " completed\n");
-
- mem_heap_free(heap);
-
- goto leave_function;
- }
-
- trx->sess = trx_dummy_sess;
-
- if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
- fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id));
-
- trx_cleanup_at_db_startup(trx);
-
- mem_heap_free(heap);
-
- goto loop;
- }
-
fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
fork->trx = trx;
@@ -520,10 +455,9 @@ loop:
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Rolling back trx with id %lu %lu, %lu%s"
+ " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
" rows to undo\n",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id),
+ TRX_ID_PREP_PRINTF(trx->id),
(ulong) rows_to_undo, unit);
mutex_exit(&kernel_mutex);
@@ -531,8 +465,9 @@ loop:
trx->mysql_process_no = os_proc_get_number();
- if (trx->dict_operation) {
+ if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
row_mysql_lock_data_dictionary(trx);
+ dictionary_locked = TRUE;
}
que_run_threads(thr);
@@ -553,7 +488,9 @@ loop:
mutex_exit(&kernel_mutex);
- if (trx->dict_operation) {
+ if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
+ && !ut_dulint_is_zero(trx->table_id)) {
+
/* If the transaction was for a dictionary operation, we
drop the relevant table, if it still exists */
@@ -566,30 +503,120 @@ loop:
table = dict_table_get_on_id_low(trx->table_id);
if (table) {
+ ulint err;
+
fputs("InnoDB: Table found: dropping table ", stderr);
ut_print_name(stderr, trx, TRUE, table->name);
fputs(" in recovery\n", stderr);
err = row_drop_table_for_mysql(table->name, trx, TRUE);
+ trx_commit_for_mysql(trx);
ut_a(err == (int) DB_SUCCESS);
}
}
- if (trx->dict_operation) {
+ if (dictionary_locked) {
row_mysql_unlock_data_dictionary(trx);
}
- fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id));
+ fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
+ " completed\n",
+ TRX_ID_PREP_PRINTF(trx->id));
mem_heap_free(heap);
trx_roll_crash_recv_trx = NULL;
+}
+
+/*******************************************************************//**
+Rollback or clean up any incomplete transactions which were
+encountered in crash recovery. If the transaction already was
+committed, then we clean up a possible insert undo log. If the
+transaction was not yet committed, then we roll it back. */
+UNIV_INTERN
+void
+trx_rollback_or_clean_recovered(
+/*============================*/
+ ibool all) /*!< in: FALSE=roll back dictionary transactions;
+ TRUE=roll back all non-PREPARED transactions */
+{
+ trx_t* trx;
+
+ mutex_enter(&kernel_mutex);
+
+ if (!UT_LIST_GET_FIRST(trx_sys->trx_list)) {
+ goto leave_function;
+ }
+
+ if (all) {
+ fprintf(stderr,
+ "InnoDB: Starting in background the rollback"
+ " of uncommitted transactions\n");
+ }
+
+ mutex_exit(&kernel_mutex);
+
+loop:
+ mutex_enter(&kernel_mutex);
+
+ for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list); trx;
+ trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+ if (!trx->is_recovered) {
+ continue;
+ }
+
+ switch (trx->conc_state) {
+ case TRX_NOT_STARTED:
+ case TRX_PREPARED:
+ continue;
+
+ case TRX_COMMITTED_IN_MEMORY:
+ mutex_exit(&kernel_mutex);
+ fprintf(stderr,
+ "InnoDB: Cleaning up trx with id "
+ TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(trx->id));
+ trx_cleanup_at_db_startup(trx);
+ goto loop;
+
+ case TRX_ACTIVE:
+ if (all || trx_get_dict_operation(trx)
+ != TRX_DICT_OP_NONE) {
+ mutex_exit(&kernel_mutex);
+ trx_rollback_active(trx);
+ goto loop;
+ }
+ }
+ }
- goto loop;
+ if (all) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Rollback of non-prepared"
+ " transactions completed\n");
+ }
leave_function:
+ mutex_exit(&kernel_mutex);
+}
+
+/*******************************************************************//**
+Rollback or clean up any incomplete transactions which were
+encountered in crash recovery. If the transaction already was
+committed, then we clean up a possible insert undo log. If the
+transaction was not yet committed, then we roll it back.
+Note: this is done in a background thread.
+@return a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+trx_rollback_or_clean_all_recovered(
+/*================================*/
+ void* arg __attribute__((unused)))
+ /*!< in: a dummy parameter required by
+ os_thread_create */
+{
+ trx_rollback_or_clean_recovered(TRUE);
+
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
@@ -598,9 +625,10 @@ leave_function:
OS_THREAD_DUMMY_RETURN;
}
-/***********************************************************************
-Creates an undo number array. */
-
+/*******************************************************************//**
+Creates an undo number array.
+@return own: undo number array */
+UNIV_INTERN
trx_undo_arr_t*
trx_undo_arr_create(void)
/*=====================*/
@@ -628,29 +656,28 @@ trx_undo_arr_create(void)
return(arr);
}
-/***********************************************************************
+/*******************************************************************//**
Frees an undo number array. */
-
+UNIV_INTERN
void
trx_undo_arr_free(
/*==============*/
- trx_undo_arr_t* arr) /* in: undo number array */
+ trx_undo_arr_t* arr) /*!< in: undo number array */
{
ut_ad(arr->n_used == 0);
mem_heap_free(arr->heap);
}
-/***********************************************************************
-Stores info of an undo log record to the array if it is not stored yet. */
+/*******************************************************************//**
+Stores info of an undo log record to the array if it is not stored yet.
+@return FALSE if the record already existed in the array */
static
ibool
trx_undo_arr_store_info(
/*====================*/
- /* out: FALSE if the record already existed in the
- array */
- trx_t* trx, /* in: transaction */
- dulint undo_no)/* in: undo number */
+ trx_t* trx, /*!< in: transaction */
+ undo_no_t undo_no)/*!< in: undo number */
{
trx_undo_inf_t* cell;
trx_undo_inf_t* stored_here;
@@ -703,14 +730,14 @@ trx_undo_arr_store_info(
}
}
-/***********************************************************************
+/*******************************************************************//**
Removes an undo number from the array. */
static
void
trx_undo_arr_remove_info(
/*=====================*/
- trx_undo_arr_t* arr, /* in: undo number array */
- dulint undo_no)/* in: undo number */
+ trx_undo_arr_t* arr, /*!< in: undo number array */
+ undo_no_t undo_no)/*!< in: undo number */
{
trx_undo_inf_t* cell;
ulint n_used;
@@ -737,19 +764,18 @@ trx_undo_arr_remove_info(
}
}
-/***********************************************************************
-Gets the biggest undo number in an array. */
+/*******************************************************************//**
+Gets the biggest undo number in an array.
+@return biggest value, ut_dulint_zero if the array is empty */
static
-dulint
+undo_no_t
trx_undo_arr_get_biggest(
/*=====================*/
- /* out: biggest value, ut_dulint_zero if
- the array is empty */
- trx_undo_arr_t* arr) /* in: undo number array */
+ trx_undo_arr_t* arr) /*!< in: undo number array */
{
trx_undo_inf_t* cell;
ulint n_used;
- dulint biggest;
+ undo_no_t biggest;
ulint n;
ulint i;
@@ -774,17 +800,17 @@ trx_undo_arr_get_biggest(
}
}
-/***************************************************************************
+/***********************************************************************//**
Tries truncate the undo logs. */
-
+UNIV_INTERN
void
trx_roll_try_truncate(
/*==================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in/out: transaction */
{
trx_undo_arr_t* arr;
- dulint limit;
- dulint biggest;
+ undo_no_t limit;
+ undo_no_t biggest;
ut_ad(mutex_own(&(trx->undo_mutex)));
ut_ad(mutex_own(&((trx->rseg)->mutex)));
@@ -813,17 +839,17 @@ trx_roll_try_truncate(
}
}
-/***************************************************************************
+/***********************************************************************//**
Pops the topmost undo log record in a single undo log and updates the info
-about the topmost record in the undo log memory struct. */
+about the topmost record in the undo log memory struct.
+@return undo log record, the page s-latched */
static
trx_undo_rec_t*
trx_roll_pop_top_rec(
/*=================*/
- /* out: undo log record, the page s-latched */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log */
- mtr_t* mtr) /* in: mtr */
+ trx_t* trx, /*!< in: transaction */
+ trx_undo_t* undo, /*!< in: undo log */
+ mtr_t* mtr) /*!< in: mtr */
{
page_t* undo_page;
ulint offset;
@@ -832,7 +858,7 @@ trx_roll_pop_top_rec(
ut_ad(mutex_own(&(trx->undo_mutex)));
- undo_page = trx_undo_page_get_s_latched(undo->space,
+ undo_page = trx_undo_page_get_s_latched(undo->space, undo->zip_size,
undo->top_page_no, mtr);
offset = undo->top_offset;
@@ -847,14 +873,14 @@ trx_roll_pop_top_rec(
undo->empty = TRUE;
} else {
- prev_rec_page = buf_frame_align(prev_rec);
+ prev_rec_page = page_align(prev_rec);
if (prev_rec_page != undo_page) {
trx->pages_undone++;
}
- undo->top_page_no = buf_frame_get_page_no(prev_rec_page);
+ undo->top_page_no = page_get_page_no(prev_rec_page);
undo->top_offset = prev_rec - prev_rec_page;
undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
}
@@ -862,30 +888,29 @@ trx_roll_pop_top_rec(
return(undo_page + offset);
}
-/************************************************************************
+/********************************************************************//**
Pops the topmost record when the two undo logs of a transaction are seen
as a single stack of records ordered by their undo numbers. Inserts the
undo number of the popped undo record to the array of currently processed
undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release. */
-
+of this undo record, it must be released with trx_undo_rec_release.
+@return undo log record copied to heap, NULL if none left, or if the
+undo number of the top record would be less than the limit */
+UNIV_INTERN
trx_undo_rec_t*
trx_roll_pop_top_rec_of_trx(
/*========================*/
- /* out: undo log record copied to heap, NULL
- if none left, or if the undo number of the
- top record would be less than the limit */
- trx_t* trx, /* in: transaction */
- dulint limit, /* in: least undo number we need */
- dulint* roll_ptr,/* out: roll pointer to undo record */
- mem_heap_t* heap) /* in: memory heap where copied */
+ trx_t* trx, /*!< in: transaction */
+ undo_no_t limit, /*!< in: least undo number we need */
+ roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
{
trx_undo_t* undo;
trx_undo_t* ins_undo;
trx_undo_t* upd_undo;
trx_undo_rec_t* undo_rec;
trx_undo_rec_t* undo_rec_copy;
- dulint undo_no;
+ undo_no_t undo_no;
ibool is_insert;
trx_rseg_t* rseg;
ulint progress_pct;
@@ -995,17 +1020,17 @@ try_again:
return(undo_rec_copy);
}
-/************************************************************************
+/********************************************************************//**
Reserves an undo log record for a query thread to undo. This should be
called if the query thread gets the undo log record not using the pop
-function above. */
-
+function above.
+@return TRUE if succeeded */
+UNIV_INTERN
ibool
trx_undo_rec_reserve(
/*=================*/
- /* out: TRUE if succeeded */
- trx_t* trx, /* in: transaction */
- dulint undo_no)/* in: undo number of the record */
+ trx_t* trx, /*!< in/out: transaction */
+ undo_no_t undo_no)/*!< in: undo number of the record */
{
ibool ret;
@@ -1018,14 +1043,14 @@ trx_undo_rec_reserve(
return(ret);
}
-/***********************************************************************
+/*******************************************************************//**
Releases a reserved undo record. */
-
+UNIV_INTERN
void
trx_undo_rec_release(
/*=================*/
- trx_t* trx, /* in: transaction */
- dulint undo_no)/* in: undo number */
+ trx_t* trx, /*!< in/out: transaction */
+ undo_no_t undo_no)/*!< in: undo number */
{
trx_undo_arr_t* arr;
@@ -1038,15 +1063,15 @@ trx_undo_rec_release(
mutex_exit(&(trx->undo_mutex));
}
-/*************************************************************************
+/*********************************************************************//**
Starts a rollback operation. */
-
+UNIV_INTERN
void
trx_rollback(
/*=========*/
- trx_t* trx, /* in: transaction */
- trx_sig_t* sig, /* in: signal starting the rollback */
- que_thr_t** next_thr)/* in/out: next query thread to run;
+ trx_t* trx, /*!< in: transaction */
+ trx_sig_t* sig, /*!< in: signal starting the rollback */
+ que_thr_t** next_thr)/*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
@@ -1109,17 +1134,17 @@ trx_rollback(
}
}
-/********************************************************************
+/****************************************************************//**
Builds an undo 'query' graph for a transaction. The actual rollback is
performed by executing this query graph like a query subprocedure call.
The reply about the completion of the rollback will be sent by this
-graph. */
-
+graph.
+@return own: the query graph */
+UNIV_INTERN
que_t*
trx_roll_graph_build(
/*=================*/
- /* out, own: the query graph */
- trx_t* trx) /* in: trx handle */
+ trx_t* trx) /*!< in: trx handle */
{
mem_heap_t* heap;
que_fork_t* fork;
@@ -1141,14 +1166,14 @@ trx_roll_graph_build(
return(fork);
}
-/*************************************************************************
+/*********************************************************************//**
Finishes error processing after the necessary partial rollback has been
done. */
static
void
trx_finish_error_processing(
/*========================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
trx_sig_t* sig;
trx_sig_t* next_sig;
@@ -1171,14 +1196,14 @@ trx_finish_error_processing(
trx->que_state = TRX_QUE_RUNNING;
}
-/*************************************************************************
+/*********************************************************************//**
Finishes a partial rollback operation. */
static
void
trx_finish_partial_rollback_off_kernel(
/*===================================*/
- trx_t* trx, /* in: transaction */
- que_thr_t** next_thr)/* in/out: next query thread to run;
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t** next_thr)/*!< in/out: next query thread to run;
if the value which is passed in is a pointer
to a NULL pointer, then the calling function
can start running a new query thread; if this
@@ -1199,15 +1224,15 @@ trx_finish_partial_rollback_off_kernel(
trx->que_state = TRX_QUE_RUNNING;
}
-/********************************************************************
+/****************************************************************//**
Finishes a transaction rollback. */
-
+UNIV_INTERN
void
trx_finish_rollback_off_kernel(
/*===========================*/
- que_t* graph, /* in: undo graph which can now be freed */
- trx_t* trx, /* in: transaction */
- que_thr_t** next_thr)/* in/out: next query thread to run;
+ que_t* graph, /*!< in: undo graph which can now be freed */
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t** next_thr)/*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
@@ -1267,14 +1292,14 @@ trx_finish_rollback_off_kernel(
}
}
-/*************************************************************************
-Creates a rollback command node struct. */
-
+/*********************************************************************//**
+Creates a rollback command node struct.
+@return own: rollback node struct */
+UNIV_INTERN
roll_node_t*
roll_node_create(
/*=============*/
- /* out, own: rollback node struct */
- mem_heap_t* heap) /* in: mem heap where created */
+ mem_heap_t* heap) /*!< in: mem heap where created */
{
roll_node_t* node;
@@ -1287,14 +1312,14 @@ roll_node_create(
return(node);
}
-/***************************************************************
-Performs an execution step for a rollback command node in a query graph. */
-
+/***********************************************************//**
+Performs an execution step for a rollback command node in a query graph.
+@return query thread to run next, or NULL */
+UNIV_INTERN
que_thr_t*
trx_rollback_step(
/*==============*/
- /* out: query thread to run next, or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
roll_node_t* node;
ulint sig_no;
diff --git a/storage/innobase/trx/trx0rseg.c b/storage/innobase/trx/trx0rseg.c
index 020f217c90b..8d754788e2a 100644
--- a/storage/innobase/trx/trx0rseg.c
+++ b/storage/innobase/trx/trx0rseg.c
@@ -1,7 +1,24 @@
-/******************************************************
-Rollback segment
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1996 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0rseg.c
+Rollback segment
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -17,14 +34,14 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0srv.h"
#include "trx0purge.h"
-/**********************************************************************
-Looks for a rollback segment, based on the rollback segment id. */
-
+/******************************************************************//**
+Looks for a rollback segment, based on the rollback segment id.
+@return rollback segment */
+UNIV_INTERN
trx_rseg_t*
trx_rseg_get_on_id(
/*===============*/
- /* out: rollback segment */
- ulint id) /* in: rollback segment id */
+ ulint id) /*!< in: rollback segment id */
{
trx_rseg_t* rseg;
@@ -39,29 +56,30 @@ trx_rseg_get_on_id(
return(rseg);
}
-/********************************************************************
+/****************************************************************//**
Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database. */
-
+a new rollback segment is created in the database.
+@return page number of the created segment, FIL_NULL if fail */
+UNIV_INTERN
ulint
trx_rseg_header_create(
/*===================*/
- /* out: page number of the created segment,
- FIL_NULL if fail */
- ulint space, /* in: space id */
- ulint max_size, /* in: max size in pages */
- ulint* slot_no, /* out: rseg id == slot number in trx sys */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint max_size, /*!< in: max size in pages */
+ ulint* slot_no, /*!< out: rseg id == slot number in trx sys */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint page_no;
trx_rsegf_t* rsegf;
trx_sysf_t* sys_header;
ulint i;
- page_t* page;
+ buf_block_t* block;
ut_ad(mtr);
ut_ad(mutex_own(&kernel_mutex));
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+ ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
MTR_MEMO_X_LOCK));
sys_header = trx_sysf_get(mtr);
@@ -73,22 +91,21 @@ trx_rseg_header_create(
}
/* Allocate a new file segment for the rollback segment */
- page = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
+ block = fseg_create(space, 0,
+ TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
- if (page == NULL) {
+ if (block == NULL) {
/* No space left */
return(FIL_NULL);
}
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_RSEG_HEADER_NEW);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
- page_no = buf_frame_get_page_no(page);
+ page_no = buf_block_get_page_no(block);
/* Get the rollback segment file page */
- rsegf = trx_rsegf_get_new(space, page_no, mtr);
+ rsegf = trx_rsegf_get_new(space, zip_size, page_no, mtr);
/* Initialize max size field */
mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
@@ -114,20 +131,65 @@ trx_rseg_header_create(
return(page_no);
}
+/***********************************************************************//**
+Free's an instance of the rollback segment in memory. */
+UNIV_INTERN
+void
+trx_rseg_mem_free(
+/*==============*/
+ trx_rseg_t* rseg) /* in, own: instance to free */
+{
+ trx_undo_t* undo;
+
+ mutex_free(&rseg->mutex);
+
+ /* There can't be any active transactions. */
+ ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
+ ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
+
+ undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
+
+ while (undo != NULL) {
+ trx_undo_t* prev_undo = undo;
+
+ undo = UT_LIST_GET_NEXT(undo_list, undo);
+ UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, prev_undo);
+
+ trx_undo_mem_free(prev_undo);
+ }
+
+ undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+
+ while (undo != NULL) {
+ trx_undo_t* prev_undo = undo;
+
+ undo = UT_LIST_GET_NEXT(undo_list, undo);
+ UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, prev_undo);
+
+ trx_undo_mem_free(prev_undo);
+ }
+
+ trx_sys_set_nth_rseg(trx_sys, rseg->id, NULL);
+
+ mem_free(rseg);
+}
+
/***************************************************************************
Creates and initializes a rollback segment object. The values for the
fields are read from the header. The object is inserted to the rseg
list of the trx system object and a pointer is inserted in the rseg
-array in the trx system object. */
+array in the trx system object.
+@return own: rollback segment object */
static
trx_rseg_t*
trx_rseg_mem_create(
/*================*/
- /* out, own: rollback segment object */
- ulint id, /* in: rollback segment id */
- ulint space, /* in: space where the segment placed */
- ulint page_no, /* in: page number of the segment header */
- mtr_t* mtr) /* in: mtr */
+ ulint id, /*!< in: rollback segment id */
+ ulint space, /*!< in: space where the segment placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number of the segment header */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_rsegf_t* rseg_header;
trx_rseg_t* rseg;
@@ -142,6 +204,7 @@ trx_rseg_mem_create(
rseg->id = id;
rseg->space = space;
+ rseg->zip_size = zip_size;
rseg->page_no = page_no;
mutex_create(&rseg->mutex, SYNC_RSEG);
@@ -150,7 +213,7 @@ trx_rseg_mem_create(
trx_sys_set_nth_rseg(trx_sys, id, rseg);
- rseg_header = trx_rsegf_get_new(space, page_no, mtr);
+ rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr);
rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE,
MLOG_4BYTES, mtr);
@@ -172,7 +235,8 @@ trx_rseg_mem_create(
rseg->last_page_no = node_addr.page;
rseg->last_offset = node_addr.boffset;
- undo_log_hdr = trx_undo_page_get(rseg->space, node_addr.page,
+ undo_log_hdr = trx_undo_page_get(rseg->space, rseg->zip_size,
+ node_addr.page,
mtr) + node_addr.boffset;
rseg->last_trx_no = mtr_read_dulint(
@@ -186,15 +250,15 @@ trx_rseg_mem_create(
return(rseg);
}
-/*************************************************************************
+/*********************************************************************//**
Creates the memory copies for rollback segments and initializes the
rseg list and array in trx_sys at a database startup. */
-
+UNIV_INTERN
void
trx_rseg_list_and_array_init(
/*=========================*/
- trx_sysf_t* sys_header, /* in: trx system header */
- mtr_t* mtr) /* in: mtr */
+ trx_sysf_t* sys_header, /*!< in: trx system header */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint i;
ulint page_no;
@@ -212,33 +276,39 @@ trx_rseg_list_and_array_init(
trx_sys_set_nth_rseg(trx_sys, i, NULL);
} else {
+ ulint zip_size;
+
space = trx_sysf_rseg_get_space(sys_header, i, mtr);
- trx_rseg_mem_create(i, space, page_no, mtr);
+ zip_size = space ? fil_space_get_zip_size(space) : 0;
+
+ trx_rseg_mem_create(i, space, zip_size, page_no, mtr);
}
}
}
-/********************************************************************
-Creates a new rollback segment to the database. */
-
+/****************************************************************//**
+Creates a new rollback segment to the database.
+@return the created segment object, NULL if fail */
+UNIV_INTERN
trx_rseg_t*
trx_rseg_create(
/*============*/
- /* out: the created segment object, NULL if
- fail */
- ulint space, /* in: space id */
- ulint max_size, /* in: max size in pages */
- ulint* id, /* out: rseg id */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space id */
+ ulint max_size, /*!< in: max size in pages */
+ ulint* id, /*!< out: rseg id */
+ mtr_t* mtr) /*!< in: mtr */
{
+ ulint flags;
+ ulint zip_size;
ulint page_no;
trx_rseg_t* rseg;
- mtr_x_lock(fil_space_get_latch(space), mtr);
+ mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
+ zip_size = dict_table_flags_to_zip_size(flags);
mutex_enter(&kernel_mutex);
- page_no = trx_rseg_header_create(space, max_size, id, mtr);
+ page_no = trx_rseg_header_create(space, zip_size, max_size, id, mtr);
if (page_no == FIL_NULL) {
@@ -246,7 +316,7 @@ trx_rseg_create(
return(NULL);
}
- rseg = trx_rseg_mem_create(*id, space, page_no, mtr);
+ rseg = trx_rseg_mem_create(*id, space, zip_size, page_no, mtr);
mutex_exit(&kernel_mutex);
diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
index 19c5159e15f..79e5af1c677 100644
--- a/storage/innobase/trx/trx0sys.c
+++ b/storage/innobase/trx/trx0sys.c
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction system
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1996 Innobase Oy
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0sys.c
+Transaction system
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -12,8 +29,10 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0sys.ic"
#endif
+#ifndef UNIV_HOTBACKUP
#include "fsp0fsp.h"
#include "mtr0log.h"
+#include "mtr0log.h"
#include "trx0trx.h"
#include "trx0rseg.h"
#include "trx0undo.h"
@@ -21,49 +40,107 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0purge.h"
#include "log0log.h"
#include "os0file.h"
-
-/* The transaction system */
-trx_sys_t* trx_sys = NULL;
-trx_doublewrite_t* trx_doublewrite = NULL;
-ibool trx_doublewrite_buf_is_being_created = FALSE;
-
-/* The following is set to TRUE when we are upgrading from the old format data
-files to the new >= 4.1.x format multiple tablespaces format data files */
-
-ibool trx_doublewrite_must_reset_space_ids = FALSE;
-
-/* The following is TRUE when we are using the database in the new format,
-i.e., we have successfully upgraded, or have created a new database
-installation */
-
-ibool trx_sys_multiple_tablespace_format = FALSE;
-
-/* In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. We have successfully got the updates to InnoDB
-up to this position. If .._pos is -1, it means no crash recovery was needed,
-or there was no master log position info inside InnoDB. */
-
-char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-ib_longlong trx_sys_mysql_master_log_pos = -1;
-
-/* If this MySQL server uses binary logging, after InnoDB has been inited
+#include "read0read.h"
+
+/** The file format tag structure with id and name. */
+struct file_format_struct {
+ ulint id; /*!< id of the file format */
+ const char* name; /*!< text representation of the
+ file format */
+ mutex_t mutex; /*!< covers changes to the above
+ fields */
+};
+
+/** The file format tag */
+typedef struct file_format_struct file_format_t;
+
+/** The transaction system */
+UNIV_INTERN trx_sys_t* trx_sys = NULL;
+/** The doublewrite buffer */
+UNIV_INTERN trx_doublewrite_t* trx_doublewrite = NULL;
+
+/** The following is set to TRUE when we are upgrading from pre-4.1
+format data files to the multiple tablespaces format data files */
+UNIV_INTERN ibool trx_doublewrite_must_reset_space_ids = FALSE;
+/** Set to TRUE when the doublewrite buffer is being created */
+UNIV_INTERN ibool trx_doublewrite_buf_is_being_created = FALSE;
+
+/** The following is TRUE when we are using the database in the
+post-4.1 format, i.e., we have successfully upgraded, or have created
+a new database installation */
+UNIV_INTERN ibool trx_sys_multiple_tablespace_format = FALSE;
+
+/** In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. */
+/* @{ */
+/** Master binlog file name */
+UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
+/** Master binlog file position. We have successfully got the updates
+up to this position. -1 means that no crash recovery was needed, or
+there was no master log position info inside InnoDB.*/
+UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
+/* @} */
+
+/** If this MySQL server uses binary logging, after InnoDB has been inited
and if it has done a crash recovery, we store the binlog file name and position
-here. If .._pos is -1, it means there was no binlog position info inside
-InnoDB. */
-
-char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-ib_longlong trx_sys_mysql_bin_log_pos = -1;
-
-
-/********************************************************************
-Determines if a page number is located inside the doublewrite buffer. */
-
+here. */
+/* @{ */
+/** Binlog file name */
+UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
+/** Binlog file position, or -1 if unknown */
+UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
+/* @} */
+#endif /* !UNIV_HOTBACKUP */
+
+/** List of animal names representing file format. */
+static const char* file_format_name_map[] = {
+ "Antelope",
+ "Barracuda",
+ "Cheetah",
+ "Dragon",
+ "Elk",
+ "Fox",
+ "Gazelle",
+ "Hornet",
+ "Impala",
+ "Jaguar",
+ "Kangaroo",
+ "Leopard",
+ "Moose",
+ "Nautilus",
+ "Ocelot",
+ "Porpoise",
+ "Quail",
+ "Rabbit",
+ "Shark",
+ "Tiger",
+ "Urchin",
+ "Viper",
+ "Whale",
+ "Xenops",
+ "Yak",
+ "Zebra"
+};
+
+/** The number of elements in the file format name array. */
+static const ulint FILE_FORMAT_NAME_N
+ = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
+
+#ifndef UNIV_HOTBACKUP
+/** This is used to track the maximum file format id known to InnoDB. It's
+updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
+or create a table. */
+static file_format_t file_format_max;
+
+/****************************************************************//**
+Determines if a page number is located inside the doublewrite buffer.
+@return TRUE if the location is inside the two blocks of the
+doublewrite buffer */
+UNIV_INTERN
ibool
trx_doublewrite_page_inside(
/*========================*/
- /* out: TRUE if the location is inside
- the two blocks of the doublewrite buffer */
- ulint page_no) /* in: page number */
+ ulint page_no) /*!< in: page number */
{
if (trx_doublewrite == NULL) {
@@ -85,13 +162,13 @@ trx_doublewrite_page_inside(
return(FALSE);
}
-/********************************************************************
+/****************************************************************//**
Creates or initialializes the doublewrite buffer at a database start. */
static
void
trx_doublewrite_init(
/*=================*/
- byte* doublewrite) /* in: pointer to the doublewrite buf
+ byte* doublewrite) /*!< in: pointer to the doublewrite buf
header on trx sys page */
{
trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
@@ -119,17 +196,17 @@ trx_doublewrite_init(
2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
}
-/********************************************************************
+/****************************************************************//**
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
multiple tablespace format. */
-
+UNIV_INTERN
void
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
/*===============================================*/
{
- page_t* page;
- byte* doublewrite;
- mtr_t mtr;
+ buf_block_t* block;
+ byte* doublewrite;
+ mtr_t mtr;
/* We upgraded to 4.1.x and reset the space id fields in the
doublewrite buffer. Let us mark to the trx_sys header that the upgrade
@@ -137,12 +214,11 @@ trx_sys_mark_upgraded_to_multiple_tablespaces(void)
mtr_start(&mtr);
- page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
- doublewrite = page + TRX_SYS_DOUBLEWRITE;
+ doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
@@ -150,22 +226,22 @@ trx_sys_mark_upgraded_to_multiple_tablespaces(void)
mtr_commit(&mtr);
/* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(ut_dulint_max, TRUE);
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
trx_sys_multiple_tablespace_format = TRUE;
}
-/********************************************************************
+/****************************************************************//**
Creates the doublewrite buffer to a new InnoDB installation. The header of the
doublewrite buffer is placed on the trx system header page. */
-
+UNIV_INTERN
void
trx_sys_create_doublewrite_buf(void)
/*================================*/
{
- page_t* page;
- page_t* page2;
- page_t* new_page;
+ buf_block_t* block;
+ buf_block_t* block2;
+ buf_block_t* new_block;
byte* doublewrite;
byte* fseg_header;
ulint page_no;
@@ -183,12 +259,11 @@ start_again:
mtr_start(&mtr);
trx_doublewrite_buf_is_being_created = TRUE;
- page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+ block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
- doublewrite = page + TRX_SYS_DOUBLEWRITE;
+ doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
@@ -217,18 +292,16 @@ start_again:
exit(1);
}
- page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
- TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
+ block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
+ TRX_SYS_DOUBLEWRITE
+ + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
/* fseg_create acquires a second latch on the page,
therefore we must declare it: */
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
- if (page2 == NULL) {
+ if (block2 == NULL) {
fprintf(stderr,
"InnoDB: Cannot create doublewrite buffer:"
" you must\n"
@@ -241,8 +314,8 @@ start_again:
exit(1);
}
- fseg_header = page + TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_FSEG;
+ fseg_header = buf_block_get_frame(block)
+ + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
prev_page_no = 0;
for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
@@ -271,11 +344,10 @@ start_again:
the page position in the tablespace, then the page
has not been written to in doublewrite. */
- new_page = buf_page_get(TRX_SYS_SPACE, page_no,
- RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
+ new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(new_block,
+ SYNC_NO_ORDER_CHECK);
if (i == FSP_EXTENT_SIZE / 2) {
ut_a(page_no == FSP_EXTENT_SIZE);
@@ -318,7 +390,7 @@ start_again:
mtr_commit(&mtr);
/* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(ut_dulint_max, TRUE);
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
@@ -328,18 +400,18 @@ start_again:
}
}
-/********************************************************************
+/****************************************************************//**
At a database startup initializes the doublewrite buffer memory structure if
we already have a doublewrite buffer created in the data files. If we are
upgrading to an InnoDB version which supports multiple tablespaces, then this
function performs the necessary update operations. If we are in a crash
recovery, this function uses a possible doublewrite buffer to restore
half-written pages in the data files. */
-
+UNIV_INTERN
void
trx_sys_doublewrite_init_or_restore_pages(
/*======================================*/
- ibool restore_corrupt_pages)
+ ibool restore_corrupt_pages) /*!< in: TRUE=restore pages */
{
byte* buf;
byte* read_buf;
@@ -361,7 +433,7 @@ trx_sys_doublewrite_init_or_restore_pages(
/* Read the trx sys header to check if we are using the doublewrite
buffer */
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0,
+ fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
UNIV_PAGE_SIZE, read_buf, NULL);
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
@@ -399,10 +471,10 @@ trx_sys_doublewrite_init_or_restore_pages(
/* Read the pages from the doublewrite buffer to memory */
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0,
+ fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
buf, NULL);
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0,
+ fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
NULL);
@@ -431,7 +503,7 @@ trx_sys_doublewrite_init_or_restore_pages(
+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
}
- fil_io(OS_FILE_WRITE, TRUE, 0, source_page_no, 0,
+ fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
UNIV_PAGE_SIZE, page, NULL);
/* printf("Resetting space id in page %lu\n",
source_page_no); */
@@ -470,32 +542,37 @@ trx_sys_doublewrite_init_or_restore_pages(
/* It is an unwritten doublewrite buffer page:
do nothing */
} else {
- /* Read in the actual page from the data files */
+ ulint zip_size = fil_space_get_zip_size(space_id);
+
+ /* Read in the actual page from the file */
+ fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
+ page_no, 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE,
+ read_buf, NULL);
- fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0,
- UNIV_PAGE_SIZE, read_buf, NULL);
/* Check if the page is corrupt */
- if (buf_page_is_corrupted(read_buf)) {
+ if (UNIV_UNLIKELY
+ (buf_page_is_corrupted(read_buf, zip_size))) {
fprintf(stderr,
"InnoDB: Warning: database page"
" corruption or a failed\n"
- "InnoDB: file read of page %lu.\n",
- (ulong) page_no);
- fprintf(stderr,
+ "InnoDB: file read of"
+ " space %lu page %lu.\n"
"InnoDB: Trying to recover it from"
- " the doublewrite buffer.\n");
+ " the doublewrite buffer.\n",
+ (ulong) space_id, (ulong) page_no);
- if (buf_page_is_corrupted(page)) {
+ if (buf_page_is_corrupted(page, zip_size)) {
fprintf(stderr,
"InnoDB: Dump of the page:\n");
- buf_page_print(read_buf);
+ buf_page_print(read_buf, zip_size);
fprintf(stderr,
"InnoDB: Dump of"
" corresponding page"
" in doublewrite buffer:\n");
- buf_page_print(page);
+ buf_page_print(page, zip_size);
fprintf(stderr,
"InnoDB: Also the page in the"
@@ -517,8 +594,9 @@ trx_sys_doublewrite_init_or_restore_pages(
position */
fil_io(OS_FILE_WRITE, TRUE, space_id,
- page_no, 0,
- UNIV_PAGE_SIZE, page, NULL);
+ zip_size, page_no, 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE,
+ page, NULL);
fprintf(stderr,
"InnoDB: Recovered the page from"
" the doublewrite buffer.\n");
@@ -534,14 +612,14 @@ leave_func:
ut_free(unaligned_read_buf);
}
-/********************************************************************
-Checks that trx is in the trx list. */
-
+/****************************************************************//**
+Checks that trx is in the trx list.
+@return TRUE if is in */
+UNIV_INTERN
ibool
trx_in_trx_list(
/*============*/
- /* out: TRUE if is in */
- trx_t* in_trx) /* in: trx */
+ trx_t* in_trx) /*!< in: trx */
{
trx_t* trx;
@@ -562,9 +640,9 @@ trx_in_trx_list(
return(FALSE);
}
-/*********************************************************************
+/*****************************************************************//**
Writes the value of max_trx_id to the file based trx system header. */
-
+UNIV_INTERN
void
trx_sys_flush_max_trx_id(void)
/*==========================*/
@@ -583,20 +661,20 @@ trx_sys_flush_max_trx_id(void)
mtr_commit(&mtr);
}
-/*********************************************************************
+/*****************************************************************//**
Updates the offset information about the end of the MySQL binlog entry
which corresponds to the transaction just being committed. In a MySQL
replication slave updates the latest master binlog position up to which
replication has proceeded. */
-
+UNIV_INTERN
void
trx_sys_update_mysql_binlog_offset(
/*===============================*/
- const char* file_name,/* in: MySQL log file name */
- ib_longlong offset, /* in: position in that log file */
- ulint field, /* in: offset of the MySQL log info field in
+ const char* file_name,/*!< in: MySQL log file name */
+ ib_int64_t offset, /*!< in: position in that log file */
+ ulint field, /*!< in: offset of the MySQL log info field in
the trx sys header */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_sysf_t* sys_header;
@@ -644,44 +722,10 @@ trx_sys_update_mysql_binlog_offset(
MLOG_4BYTES, mtr);
}
-#ifdef UNIV_HOTBACKUP
-/*********************************************************************
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- byte* page) /* in: buffer containing the trx system header page,
- i.e., page number TRX_SYS_PAGE_NO in the tablespace */
-{
- trx_sysf_t* sys_header;
-
- sys_header = page + TRX_SYS;
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- == TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- fprintf(stderr,
- "ibbackup: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- }
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*********************************************************************
+/*****************************************************************//**
Stores the MySQL binlog offset info in the trx system header if
the magic number shows it valid, and print the info to stderr */
-
+UNIV_INTERN
void
trx_sys_print_mysql_binlog_offset(void)
/*===================================*/
@@ -712,8 +756,8 @@ trx_sys_print_mysql_binlog_offset(void)
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
trx_sys_mysql_bin_log_pos
- = (((ib_longlong)trx_sys_mysql_bin_log_pos_high) << 32)
- + (ib_longlong)trx_sys_mysql_bin_log_pos_low;
+ = (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
+ + (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
ut_memcpy(trx_sys_mysql_bin_log_name,
sys_header + TRX_SYS_MYSQL_LOG_INFO
@@ -728,10 +772,10 @@ trx_sys_print_mysql_binlog_offset(void)
mtr_commit(&mtr);
}
-/*********************************************************************
+/*****************************************************************//**
Prints to stderr the MySQL master log offset info in the trx system header if
the magic number shows it valid. */
-
+UNIV_INTERN
void
trx_sys_print_mysql_master_log_pos(void)
/*====================================*/
@@ -773,23 +817,23 @@ trx_sys_print_mysql_master_log_pos(void)
TRX_SYS_MYSQL_LOG_NAME_LEN);
trx_sys_mysql_master_log_pos
- = (((ib_longlong) mach_read_from_4(
+ = (((ib_int64_t) mach_read_from_4(
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
- + ((ib_longlong) mach_read_from_4(
+ + ((ib_int64_t) mach_read_from_4(
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW));
mtr_commit(&mtr);
}
-/********************************************************************
-Looks for a free slot for a rollback segment in the trx system file copy. */
-
+/****************************************************************//**
+Looks for a free slot for a rollback segment in the trx system file copy.
+@return slot index or ULINT_UNDEFINED if not found */
+UNIV_INTERN
ulint
trx_sysf_rseg_find_free(
/*====================*/
- /* out: slot index or ULINT_UNDEFINED if not found */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_sysf_t* sys_header;
ulint page_no;
@@ -812,17 +856,18 @@ trx_sysf_rseg_find_free(
return(ULINT_UNDEFINED);
}
-/*********************************************************************
+/*****************************************************************//**
Creates the file page for the transaction system. This function is called only
at the database creation, before trx_sys_init. */
static
void
trx_sysf_create(
/*============*/
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_sysf_t* sys_header;
ulint slot_no;
+ buf_block_t* block;
page_t* page;
ulint page_no;
ulint i;
@@ -833,17 +878,17 @@ trx_sysf_create(
then enter the kernel: we must do it in this order to conform
to the latching order rules. */
- mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr);
+ mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
mutex_enter(&kernel_mutex);
/* Create the trx sys file block in a new allocated file segment */
- page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
- mtr);
- ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO);
+ block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
+ mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+
+ ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
+ page = buf_block_get_frame(block);
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
MLOG_2BYTES, mtr);
@@ -880,7 +925,7 @@ trx_sysf_create(
+ page - sys_header);
/* Create the first rollback segment in the SYSTEM tablespace */
- page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
+ page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
mtr);
ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
ut_a(page_no != FIL_NULL);
@@ -888,16 +933,16 @@ trx_sysf_create(
mutex_exit(&kernel_mutex);
}
-/*********************************************************************
+/*****************************************************************//**
Creates and initializes the central memory structures for the transaction
system. This is called when the database is started. */
-
+UNIV_INTERN
void
trx_sys_init_at_db_start(void)
/*==========================*/
{
trx_sysf_t* sys_header;
- ib_longlong rows_to_undo = 0;
+ ib_int64_t rows_to_undo = 0;
const char* unit = "";
trx_t* trx;
mtr_t mtr;
@@ -931,6 +976,7 @@ trx_sys_init_at_db_start(void)
2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
UT_LIST_INIT(trx_sys->mysql_trx_list);
+ trx_dummy_sess = sess_open();
trx_lists_init_at_db_start();
if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
@@ -962,9 +1008,8 @@ trx_sys_init_at_db_start(void)
(ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
(ulong) rows_to_undo, unit);
- fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n",
- (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
- (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
+ fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
+ TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
}
UT_LIST_INIT(trx_sys->view_list);
@@ -976,9 +1021,9 @@ trx_sys_init_at_db_start(void)
mtr_commit(&mtr);
}
-/*********************************************************************
+/*****************************************************************//**
Creates and initializes the transaction system at the database creation. */
-
+UNIV_INTERN
void
trx_sys_create(void)
/*================*/
@@ -993,3 +1038,576 @@ trx_sys_create(void)
trx_sys_init_at_db_start();
}
+
+/*****************************************************************//**
+Update the file format tag.
+@return always TRUE */
+static
+ibool
+trx_sys_file_format_max_write(
+/*==========================*/
+ ulint format_id, /*!< in: file format id */
+ const char** name) /*!< out: max file format name, can
+ be NULL */
+{
+ mtr_t mtr;
+ byte* ptr;
+ buf_block_t* block;
+ ulint tag_value_low;
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(
+ TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+
+ file_format_max.id = format_id;
+ file_format_max.name = trx_sys_file_format_id_to_name(format_id);
+
+ ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
+ tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
+
+ if (name) {
+ *name = file_format_max.name;
+ }
+
+ mlog_write_dulint(
+ ptr,
+ ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
+ tag_value_low),
+ &mtr);
+
+ mtr_commit(&mtr);
+
+ return(TRUE);
+}
+
+/*****************************************************************//**
+Read the file format tag.
+@return the file format or ULINT_UNDEFINED if not set. */
+static
+ulint
+trx_sys_file_format_max_read(void)
+/*==============================*/
+{
+ mtr_t mtr;
+ const byte* ptr;
+ const buf_block_t* block;
+ ulint format_id;
+ dulint file_format_id;
+
+ /* Since this is called during the startup phase it's safe to
+ read the value without a covering mutex. */
+ mtr_start(&mtr);
+
+ block = buf_page_get(
+ TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+
+ ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
+ file_format_id = mach_read_from_8(ptr);
+
+ mtr_commit(&mtr);
+
+ format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
+
+ if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
+ || format_id >= FILE_FORMAT_NAME_N) {
+
+ /* Either it has never been tagged, or garbage in it. */
+ return(ULINT_UNDEFINED);
+ }
+
+ return(format_id);
+}
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+ const ulint id) /*!< in: id of the file format */
+{
+ ut_a(id < FILE_FORMAT_NAME_N);
+
+ return(file_format_name_map[id]);
+}
+
+/*****************************************************************//**
+Check for the max file format tag stored on disk. Note: If max_format_id
+is == DICT_TF_FORMAT_MAX + 1 then we only print a warning.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ulint
+trx_sys_file_format_max_check(
+/*==========================*/
+ ulint max_format_id) /*!< in: max format id to check */
+{
+ ulint format_id;
+
+ /* Check the file format in the tablespace. Do not try to
+ recover if the file format is not supported by the engine
+ unless forced by the user. */
+ format_id = trx_sys_file_format_max_read();
+ if (format_id == ULINT_UNDEFINED) {
+ /* Format ID was not set. Set it to minimum possible
+ value. */
+ format_id = DICT_TF_FORMAT_51;
+ }
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: highest supported file format is %s.\n",
+ trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
+
+ if (format_id > DICT_TF_FORMAT_MAX) {
+
+ ut_a(format_id < FILE_FORMAT_NAME_N);
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: %s: the system tablespace is in a file "
+ "format that this version doesn't support - %s\n",
+ ((max_format_id <= DICT_TF_FORMAT_MAX)
+ ? "Error" : "Warning"),
+ trx_sys_file_format_id_to_name(format_id));
+
+ if (max_format_id <= DICT_TF_FORMAT_MAX) {
+ return(DB_ERROR);
+ }
+ }
+
+ format_id = (format_id > max_format_id) ? format_id : max_format_id;
+
+ /* We don't need a mutex here, as this function should only
+ be called once at start up. */
+ file_format_max.id = format_id;
+ file_format_max.name = trx_sys_file_format_id_to_name(format_id);
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Set the file format id unconditionally except if it's already the
+same value.
+@return TRUE if value updated */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_set(
+/*========================*/
+ ulint format_id, /*!< in: file format id */
+ const char** name) /*!< out: max file format name or
+ NULL if not needed. */
+{
+ ibool ret = FALSE;
+
+ ut_a(format_id <= DICT_TF_FORMAT_MAX);
+
+ mutex_enter(&file_format_max.mutex);
+
+ /* Only update if not already same value. */
+ if (format_id != file_format_max.id) {
+
+ ret = trx_sys_file_format_max_write(format_id, name);
+ }
+
+ mutex_exit(&file_format_max.mutex);
+
+ return(ret);
+}
+
+/********************************************************************//**
+Tags the system table space with minimum format id if it has not been
+tagged yet.
+WARNING: This function is only called during the startup and AFTER the
+redo log application during recovery has finished. */
+UNIV_INTERN
+void
+trx_sys_file_format_tag_init(void)
+/*==============================*/
+{
+ ulint format_id;
+
+ format_id = trx_sys_file_format_max_read();
+
+ /* If format_id is not set then set it to the minimum. */
+ if (format_id == ULINT_UNDEFINED) {
+ trx_sys_file_format_max_set(DICT_TF_FORMAT_51, NULL);
+ }
+}
+
+/********************************************************************//**
+Update the file format tag in the system tablespace only if the given
+format id is greater than the known max id.
+@return TRUE if format_id was bigger than the known max id */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_upgrade(
+/*============================*/
+ const char** name, /*!< out: max file format name */
+ ulint format_id) /*!< in: file format identifier */
+{
+ ibool ret = FALSE;
+
+ ut_a(name);
+ ut_a(file_format_max.name != NULL);
+ ut_a(format_id <= DICT_TF_FORMAT_MAX);
+
+ mutex_enter(&file_format_max.mutex);
+
+ if (format_id > file_format_max.id) {
+
+ ret = trx_sys_file_format_max_write(format_id, name);
+ }
+
+ mutex_exit(&file_format_max.mutex);
+
+ return(ret);
+}
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the max format name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_max_get(void)
+/*=============================*/
+{
+ return(file_format_max.name);
+}
+
+/*****************************************************************//**
+Initializes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_init(void)
+/*==========================*/
+{
+ mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
+
+ /* We don't need a mutex here, as this function should only
+ be called once at start up. */
+ file_format_max.id = DICT_TF_FORMAT_51;
+
+ file_format_max.name = trx_sys_file_format_id_to_name(
+ file_format_max.id);
+}
+
+/*****************************************************************//**
+Closes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_close(void)
+/*===========================*/
+{
+ /* Does nothing at the moment */
+}
+#else /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Prints to stderr the MySQL binlog info in the system header if the
+magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset_from_page(
+/*========================================*/
+ const byte* page) /*!< in: buffer containing the trx
+ system header page, i.e., page number
+ TRX_SYS_PAGE_NO in the tablespace */
+{
+ const trx_sysf_t* sys_header;
+
+ sys_header = page + TRX_SYS;
+
+ if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ == TRX_SYS_MYSQL_LOG_MAGIC_N) {
+
+ fprintf(stderr,
+ "ibbackup: Last MySQL binlog file position %lu %lu,"
+ " file name %s\n",
+ (ulong) mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
+ (ulong) mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME);
+ }
+}
+
+
+/* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
+ (This code duplicaton should be fixed at some point!)
+*/
+
+#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
+/* The offset of the file format tag on the trx system header page */
+#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
+/* We use these random constants to reduce the probability of reading
+garbage (from previous versions) that maps to an actual format id. We
+use these as bit masks at the time of reading and writing from/to disk. */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
+
+/* END OF COPIED DEFINITIONS */
+
+
+/*****************************************************************//**
+Reads the file format id from the first system table space file.
+Even if the call succeeds and returns TRUE, the returned format id
+may be ULINT_UNDEFINED signalling that the format id was not present
+in the data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_file_format_id(
+/*========================*/
+ const char *pathname, /*!< in: pathname of the first system
+ table space file */
+ ulint *format_id) /*!< out: file format of the system table
+ space */
+{
+ os_file_t file;
+ ibool success;
+ byte buf[UNIV_PAGE_SIZE * 2];
+ page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
+ const byte* ptr;
+ dulint file_format_id;
+
+ *format_id = ULINT_UNDEFINED;
+
+ file = os_file_create_simple_no_error_handling(
+ pathname,
+ OS_FILE_OPEN,
+ OS_FILE_READ_ONLY,
+ &success
+ );
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" ibbackup: Error: trying to read system tablespace file format,\n"
+" ibbackup: but could not open the tablespace file %s!\n",
+ pathname
+ );
+ return(FALSE);
+ }
+
+ /* Read the page on which file format is stored */
+
+ success = os_file_read_no_error_handling(
+ file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, 0, UNIV_PAGE_SIZE
+ );
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" ibbackup: Error: trying to read system table space file format,\n"
+" ibbackup: but failed to read the tablespace file %s!\n",
+ pathname
+ );
+ os_file_close(file);
+ return(FALSE);
+ }
+ os_file_close(file);
+
+ /* get the file format from the page */
+ ptr = page + TRX_SYS_FILE_FORMAT_TAG;
+ file_format_id = mach_read_from_8(ptr);
+
+ *format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
+
+ if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
+ || *format_id >= FILE_FORMAT_NAME_N) {
+
+ /* Either it has never been tagged, or garbage in it. */
+ *format_id = ULINT_UNDEFINED;
+ return(TRUE);
+ }
+
+ return(TRUE);
+}
+
+
+/*****************************************************************//**
+Reads the file format id from the given per-table data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_pertable_file_format_id(
+/*=================================*/
+ const char *pathname, /*!< in: pathname of a per-table
+ datafile */
+ ulint *format_id) /*!< out: file format of the per-table
+ data file */
+{
+ os_file_t file;
+ ibool success;
+ byte buf[UNIV_PAGE_SIZE * 2];
+ page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
+ const byte* ptr;
+ ib_uint32_t flags;
+
+ *format_id = ULINT_UNDEFINED;
+
+ file = os_file_create_simple_no_error_handling(
+ pathname,
+ OS_FILE_OPEN,
+ OS_FILE_READ_ONLY,
+ &success
+ );
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" ibbackup: Error: trying to read per-table tablespace format,\n"
+" ibbackup: but could not open the tablespace file %s!\n",
+ pathname
+ );
+ return(FALSE);
+ }
+
+ /* Read the first page of the per-table datafile */
+
+ success = os_file_read_no_error_handling(
+ file, page, 0, 0, UNIV_PAGE_SIZE
+ );
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" ibbackup: Error: trying to per-table data file format,\n"
+" ibbackup: but failed to read the tablespace file %s!\n",
+ pathname
+ );
+ os_file_close(file);
+ return(FALSE);
+ }
+ os_file_close(file);
+
+ /* get the file format from the page */
+ ptr = page + 54;
+ flags = mach_read_from_4(ptr);
+ if (flags == 0) {
+ /* file format is Antelope */
+ *format_id = 0;
+ return (TRUE);
+ } else if (flags & 1) {
+ /* tablespace flags are ok */
+ *format_id = (flags / 32) % 128;
+ return (TRUE);
+ } else {
+ /* bad tablespace flags */
+ return(FALSE);
+ }
+}
+
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+ const ulint id) /*!< in: id of the file format */
+{
+ if (!(id < FILE_FORMAT_NAME_N)) {
+ /* unknown id */
+ return ("Unknown");
+ }
+
+ return(file_format_name_map[id]);
+}
+
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************
+Shutdown/Close the transaction system. */
+UNIV_INTERN
+void
+trx_sys_close(void)
+/*===============*/
+{
+ trx_rseg_t* rseg;
+ read_view_t* view;
+
+ ut_ad(trx_sys != NULL);
+
+ /* Check that all read views are closed except read view owned
+ by a purge. */
+
+ if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
+ fprintf(stderr,
+ "InnoDB: Error: all read views were not closed"
+ " before shutdown:\n"
+ "InnoDB: %lu read views open \n",
+ UT_LIST_GET_LEN(trx_sys->view_list) - 1);
+ }
+
+ sess_close(trx_dummy_sess);
+ trx_dummy_sess = NULL;
+
+ trx_purge_sys_close();
+
+ mutex_enter(&kernel_mutex);
+
+ /* Free the double write data structures. */
+ ut_a(trx_doublewrite != NULL);
+ ut_free(trx_doublewrite->write_buf_unaligned);
+ trx_doublewrite->write_buf_unaligned = NULL;
+
+ mem_free(trx_doublewrite->buf_block_arr);
+ trx_doublewrite->buf_block_arr = NULL;
+
+ mutex_free(&trx_doublewrite->mutex);
+ mem_free(trx_doublewrite);
+ trx_doublewrite = NULL;
+
+ /* There can't be any active transactions. */
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ while (rseg != NULL) {
+ trx_rseg_t* prev_rseg = rseg;
+
+ rseg = UT_LIST_GET_NEXT(rseg_list, prev_rseg);
+ UT_LIST_REMOVE(rseg_list, trx_sys->rseg_list, prev_rseg);
+
+ trx_rseg_mem_free(prev_rseg);
+ }
+
+ view = UT_LIST_GET_FIRST(trx_sys->view_list);
+
+ while (view != NULL) {
+ read_view_t* prev_view = view;
+
+ view = UT_LIST_GET_NEXT(view_list, prev_view);
+
+ /* Views are allocated from the trx_sys->global_read_view_heap.
+ So, we simply remove the element here. */
+ UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == 0);
+ ut_a(UT_LIST_GET_LEN(trx_sys->rseg_list) == 0);
+ ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
+ ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
+
+ mem_free(trx_sys);
+
+ trx_sys = NULL;
+ mutex_exit(&kernel_mutex);
+}
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 8ada38845c5..21ba6e481a7 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -1,7 +1,24 @@
-/******************************************************
-The transaction
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0trx.c
+The transaction
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -27,82 +44,52 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0xa.h"
#include "ha_prototypes.h"
-/* Copy of the prototype for innobase_mysql_print_thd: this
-copy MUST be equal to the one in mysql/sql/ha_innodb.cc ! */
-
-void innobase_mysql_print_thd(
- FILE* f,
- void* thd,
- ulint max_query_len);
-
-/* Dummy session used currently in MySQL interface */
-sess_t* trx_dummy_sess = NULL;
+/** Dummy session used currently in MySQL interface */
+UNIV_INTERN sess_t* trx_dummy_sess = NULL;
-/* Number of transactions currently allocated for MySQL: protected by
+/** Number of transactions currently allocated for MySQL: protected by
the kernel mutex */
-ulint trx_n_mysql_transactions = 0;
-
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-
-void
-trx_start_if_not_started_noninline(
-/*===============================*/
- trx_t* trx) /* in: transaction */
-{
- trx_start_if_not_started(trx);
-}
+UNIV_INTERN ulint trx_n_mysql_transactions = 0;
-/*****************************************************************
+/*************************************************************//**
Set detailed error message for the transaction. */
-
+UNIV_INTERN
void
trx_set_detailed_error(
/*===================*/
- trx_t* trx, /* in: transaction struct */
- const char* msg) /* in: detailed error message */
+ trx_t* trx, /*!< in: transaction struct */
+ const char* msg) /*!< in: detailed error message */
{
ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
}
-/*****************************************************************
+/*************************************************************//**
Set detailed error message for the transaction from a file. Note that the
file is rewinded before reading from it. */
-
+UNIV_INTERN
void
trx_set_detailed_error_from_file(
/*=============================*/
- trx_t* trx, /* in: transaction struct */
- FILE* file) /* in: file to read message from */
+ trx_t* trx, /*!< in: transaction struct */
+ FILE* file) /*!< in: file to read message from */
{
os_file_read_string(file, trx->detailed_error,
sizeof(trx->detailed_error));
}
-/********************************************************************
-Retrieves the error_info field from a trx. */
-
-void*
-trx_get_error_info(
-/*===============*/
- /* out: the error info */
- trx_t* trx) /* in: trx object */
-{
- return(trx->error_info);
-}
-
-/********************************************************************
-Creates and initializes a transaction object. */
-
+/****************************************************************//**
+Creates and initializes a transaction object.
+@return own: the transaction */
+UNIV_INTERN
trx_t*
trx_create(
/*=======*/
- /* out, own: the transaction */
- sess_t* sess) /* in: session or NULL */
+ sess_t* sess) /*!< in: session */
{
trx_t* trx;
ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(sess);
trx = mem_alloc(sizeof(trx_t));
@@ -111,6 +98,7 @@ trx_create(
trx->op_info = "";
trx->is_purge = 0;
+ trx->is_recovered = 0;
trx->conc_state = TRX_NOT_STARTED;
trx->start_time = time(NULL);
@@ -127,7 +115,8 @@ trx_create(
trx->flush_log_later = FALSE;
trx->must_flush_log_later = FALSE;
- trx->dict_operation = FALSE;
+ trx->dict_operation = TRX_DICT_OP_NONE;
+ trx->table_id = ut_dulint_zero;
trx->mysql_thd = NULL;
trx->mysql_query_str = NULL;
@@ -151,6 +140,7 @@ trx_create(
trx->undo_no_arr = NULL;
trx->error_state = DB_SUCCESS;
+ trx->error_key_num = 0;
trx->detailed_error[0] = '\0';
trx->sess = sess;
@@ -180,8 +170,6 @@ trx_create(
trx->declared_to_be_inside_innodb = FALSE;
trx->n_tickets_to_enter_innodb = 0;
- trx->auto_inc_lock = NULL;
-
trx->global_read_view_heap = mem_heap_create(256);
trx->global_read_view = NULL;
trx->read_view = NULL;
@@ -192,27 +180,25 @@ trx_create(
trx->n_autoinc_rows = 0;
+ /* Remember to free the vector explicitly. */
+ trx->autoinc_locks = ib_vector_create(
+ mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 4), 4);
+
return(trx);
}
-/************************************************************************
-Creates a transaction object for MySQL. */
-
+/********************************************************************//**
+Creates a transaction object for MySQL.
+@return own: transaction object */
+UNIV_INTERN
trx_t*
trx_allocate_for_mysql(void)
/*========================*/
- /* out, own: transaction object */
{
trx_t* trx;
mutex_enter(&kernel_mutex);
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
trx = trx_create(trx_dummy_sess);
trx_n_mysql_transactions++;
@@ -228,24 +214,18 @@ trx_allocate_for_mysql(void)
return(trx);
}
-/************************************************************************
-Creates a transaction object for background operations by the master thread. */
-
+/********************************************************************//**
+Creates a transaction object for background operations by the master thread.
+@return own: transaction object */
+UNIV_INTERN
trx_t*
trx_allocate_for_background(void)
/*=============================*/
- /* out, own: transaction object */
{
trx_t* trx;
mutex_enter(&kernel_mutex);
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
trx = trx_create(trx_dummy_sess);
mutex_exit(&kernel_mutex);
@@ -253,13 +233,13 @@ trx_allocate_for_background(void)
return(trx);
}
-/************************************************************************
+/********************************************************************//**
Releases the search latch if trx has reserved it. */
-
+UNIV_INTERN
void
trx_search_latch_release_if_reserved(
/*=================================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
if (trx->has_search_latch) {
rw_lock_s_unlock(&btr_search_latch);
@@ -268,13 +248,13 @@ trx_search_latch_release_if_reserved(
}
}
-/************************************************************************
+/********************************************************************//**
Frees a transaction object. */
-
+UNIV_INTERN
void
trx_free(
/*=====*/
- trx_t* trx) /* in, own: trx object */
+ trx_t* trx) /*!< in, own: trx object */
{
ut_ad(mutex_own(&kernel_mutex));
@@ -305,6 +285,7 @@ trx_free(
trx_print(stderr, trx, 600);
ut_print_buf(stderr, trx, sizeof(trx_t));
+ putc('\n', stderr);
}
ut_a(trx->magic_n == TRX_MAGIC_N);
@@ -329,7 +310,6 @@ trx_free(
ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
ut_a(!trx->has_search_latch);
- ut_a(!trx->auto_inc_lock);
ut_a(trx->dict_operation_lock_mode == 0);
@@ -347,16 +327,20 @@ trx_free(
ut_a(trx->read_view == NULL);
+ ut_a(ib_vector_is_empty(trx->autoinc_locks));
+ /* We allocated a dedicated heap for the vector. */
+ ib_vector_free(trx->autoinc_locks);
+
mem_free(trx);
}
-/************************************************************************
+/********************************************************************//**
Frees a transaction object for MySQL. */
-
+UNIV_INTERN
void
trx_free_for_mysql(
/*===============*/
- trx_t* trx) /* in, own: trx object */
+ trx_t* trx) /*!< in, own: trx object */
{
mutex_enter(&kernel_mutex);
@@ -371,13 +355,13 @@ trx_free_for_mysql(
mutex_exit(&kernel_mutex);
}
-/************************************************************************
+/********************************************************************//**
Frees a transaction object of a background operation of the master thread. */
-
+UNIV_INTERN
void
trx_free_for_background(
/*====================*/
- trx_t* trx) /* in, own: trx object */
+ trx_t* trx) /*!< in, own: trx object */
{
mutex_enter(&kernel_mutex);
@@ -386,7 +370,7 @@ trx_free_for_background(
mutex_exit(&kernel_mutex);
}
-/********************************************************************
+/****************************************************************//**
Inserts the trx handle in the trx system trx list in the right position.
The list is sorted on the trx id so that the biggest id is at the list
start. This function is used at the database startup to insert incomplete
@@ -395,7 +379,7 @@ static
void
trx_list_insert_ordered(
/*====================*/
- trx_t* trx) /* in: trx handle */
+ trx_t* trx) /*!< in: trx handle */
{
trx_t* trx2;
@@ -426,13 +410,13 @@ trx_list_insert_ordered(
}
}
-/********************************************************************
+/****************************************************************//**
Creates trx objects for transactions and initializes the trx list of
trx_sys at database start. Rollback segment and undo log lists must
already exist when this function is called, because the lists of
transactions to be rolled back or cleaned up are built based on the
undo log lists. */
-
+UNIV_INTERN
void
trx_lists_init_at_db_start(void)
/*============================*/
@@ -453,8 +437,9 @@ trx_lists_init_at_db_start(void)
while (undo != NULL) {
- trx = trx_create(NULL);
+ trx = trx_create(trx_dummy_sess);
+ trx->is_recovered = TRUE;
trx->id = undo->trx_id;
trx->xid = undo->xid;
trx->insert_undo = undo;
@@ -469,11 +454,11 @@ trx_lists_init_at_db_start(void)
if (undo->state == TRX_UNDO_PREPARED) {
fprintf(stderr,
- "InnoDB: Transaction %lu %lu"
+ "InnoDB: Transaction "
+ TRX_ID_FMT
" was in the"
" XA prepared state.\n",
- ut_dulint_get_high(trx->id),
- ut_dulint_get_low(trx->id));
+ TRX_ID_PREP_PRINTF(trx->id));
if (srv_force_recovery == 0) {
@@ -511,7 +496,8 @@ trx_lists_init_at_db_start(void)
}
if (undo->dict_operation) {
- trx->dict_operation = undo->dict_operation;
+ trx_set_dict_operation(
+ trx, TRX_DICT_OP_TABLE);
trx->table_id = undo->table_id;
}
@@ -531,8 +517,9 @@ trx_lists_init_at_db_start(void)
trx = trx_get_on_id(undo->trx_id);
if (NULL == trx) {
- trx = trx_create(NULL);
+ trx = trx_create(trx_dummy_sess);
+ trx->is_recovered = TRUE;
trx->id = undo->trx_id;
trx->xid = undo->xid;
@@ -544,12 +531,10 @@ trx_lists_init_at_db_start(void)
if (undo->state == TRX_UNDO_PREPARED) {
fprintf(stderr,
- "InnoDB: Transaction"
- " %lu %lu was in the"
+ "InnoDB: Transaction "
+ TRX_ID_FMT " was in the"
" XA prepared state.\n",
- ut_dulint_get_high(
- trx->id),
- ut_dulint_get_low(
+ TRX_ID_PREP_PRINTF(
trx->id));
if (srv_force_recovery == 0) {
@@ -590,8 +575,8 @@ trx_lists_init_at_db_start(void)
trx_list_insert_ordered(trx);
if (undo->dict_operation) {
- trx->dict_operation
- = undo->dict_operation;
+ trx_set_dict_operation(
+ trx, TRX_DICT_OP_TABLE);
trx->table_id = undo->table_id;
}
}
@@ -613,14 +598,14 @@ trx_lists_init_at_db_start(void)
}
}
-/**********************************************************************
+/******************************************************************//**
Assigns a rollback segment to a transaction in a round-robin fashion.
-Skips the SYSTEM rollback segment if another is available. */
+Skips the SYSTEM rollback segment if another is available.
+@return assigned rollback segment id */
UNIV_INLINE
ulint
trx_assign_rseg(void)
/*=================*/
- /* out: assigned rollback segment id */
{
trx_rseg_t* rseg = trx_sys->latest_rseg;
@@ -647,15 +632,15 @@ loop:
return(rseg->id);
}
-/********************************************************************
-Starts a new transaction. */
-
+/****************************************************************//**
+Starts a new transaction.
+@return TRUE */
+UNIV_INTERN
ibool
trx_start_low(
/*==========*/
- /* out: TRUE */
- trx_t* trx, /* in: transaction */
- ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
+ trx_t* trx, /*!< in: transaction */
+ ulint rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
is passed, the system chooses the rollback segment
automatically in a round-robin fashion */
{
@@ -698,20 +683,28 @@ trx_start_low(
return(TRUE);
}
-/********************************************************************
-Starts a new transaction. */
-
+/****************************************************************//**
+Starts a new transaction.
+@return TRUE */
+UNIV_INTERN
ibool
trx_start(
/*======*/
- /* out: TRUE */
- trx_t* trx, /* in: transaction */
- ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
+ trx_t* trx, /*!< in: transaction */
+ ulint rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
is passed, the system chooses the rollback segment
automatically in a round-robin fashion */
{
ibool ret;
+ /* Update the info whether we should skip XA steps that eat CPU time
+ For the duration of the transaction trx->support_xa is not reread
+ from thd so any changes in the value take effect in the next
+ transaction. This is to avoid a scenario where some undo
+ generated by a transaction, has XA stuff, and other undo,
+ generated by the same transaction, doesn't. */
+ trx->support_xa = thd_supports_xa(trx->mysql_thd);
+
mutex_enter(&kernel_mutex);
ret = trx_start_low(trx, rseg_id);
@@ -721,19 +714,18 @@ trx_start(
return(ret);
}
-/********************************************************************
+/****************************************************************//**
Commits a transaction. */
-
+UNIV_INTERN
void
trx_commit_off_kernel(
/*==================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
page_t* update_hdr_page;
- dulint lsn;
+ ib_uint64_t lsn = 0;
trx_rseg_t* rseg;
trx_undo_t* undo;
- ibool must_flush_log = FALSE;
mtr_t mtr;
ut_ad(mutex_own(&kernel_mutex));
@@ -748,8 +740,6 @@ trx_commit_off_kernel(
mtr_start(&mtr);
- must_flush_log = TRUE;
-
/* Change the undo log segment states from TRX_UNDO_ACTIVE
to some other state: these modifications to the file data
structure define the transaction as committed in the file
@@ -813,7 +803,7 @@ trx_commit_off_kernel(
in exactly the same order as commit lsn's, if the transactions
have different rollback segments. To get exactly the same
order we should hold the kernel mutex up to this point,
- adding to to the contention of the kernel mutex. However, if
+ adding to the contention of the kernel mutex. However, if
a transaction T2 is able to see modifications made by
a transaction T1, T2 will always get a bigger transaction
number and a bigger commit lsn than T1. */
@@ -848,6 +838,20 @@ trx_commit_off_kernel(
trx->conc_state = TRX_COMMITTED_IN_MEMORY;
/*--------------------------------------*/
+ /* If we release kernel_mutex below and we are still doing
+ recovery i.e.: back ground rollback thread is still active
+ then there is a chance that the rollback thread may see
+ this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
+ up calling trx_cleanup_at_db_startup(). This can happen
+ in the case we are committing a trx here that is left in
+ PREPARED state during the crash. Note that commit of the
+ rollback of a PREPARED trx happens in the recovery thread
+ while the rollback of other transactions happen in the
+ background thread. To avoid this race we unconditionally
+ unset the is_recovered flag from the trx. */
+
+ trx->is_recovered = FALSE;
+
lock_release_off_kernel(trx);
if (trx->global_read_view) {
@@ -858,7 +862,7 @@ trx_commit_off_kernel(
trx->read_view = NULL;
- if (must_flush_log) {
+ if (lsn) {
mutex_exit(&kernel_mutex);
@@ -887,11 +891,11 @@ trx_commit_off_kernel(
there are > 2 users in the database. Then at least 2 users can
gather behind one doing the physical log write to disk.
- If we are calling trx_commit() under MySQL's binlog mutex, we
+ If we are calling trx_commit() under prepare_commit_mutex, we
will delay possible log write and flush to a separate function
trx_commit_complete_for_mysql(), which is only called when the
- thread has released the binlog mutex. This is to make the
- group commit algorithm to work. Otherwise, the MySQL binlog
+ thread has released the mutex. This is to make the
+ group commit algorithm to work. Otherwise, the prepare_commit
mutex would serialize all commits and prevent a group of
transactions from gathering. */
@@ -943,15 +947,15 @@ trx_commit_off_kernel(
UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
}
-/********************************************************************
+/****************************************************************//**
Cleans up a transaction at database startup. The cleanup is needed if
the transaction already got to the middle of a commit when the database
-crashed, andf we cannot roll it back. */
-
+crashed, and we cannot roll it back. */
+UNIV_INTERN
void
trx_cleanup_at_db_startup(
/*======================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
if (trx->insert_undo != NULL) {
@@ -966,16 +970,16 @@ trx_cleanup_at_db_startup(
UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
}
-/************************************************************************
+/********************************************************************//**
Assigns a read view for a consistent read query. All the consistent reads
within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-
+when this function is first called for a new started transaction.
+@return consistent read view */
+UNIV_INTERN
read_view_t*
trx_assign_read_view(
/*=================*/
- /* out: consistent read view */
- trx_t* trx) /* in: active transaction */
+ trx_t* trx) /*!< in: active transaction */
{
ut_ad(trx->conc_state == TRX_ACTIVE);
@@ -996,14 +1000,14 @@ trx_assign_read_view(
return(trx->read_view);
}
-/********************************************************************
+/****************************************************************//**
Commits a transaction. NOTE that the kernel mutex is temporarily released. */
static
void
trx_handle_commit_sig_off_kernel(
/*=============================*/
- trx_t* trx, /* in: transaction */
- que_thr_t** next_thr) /* in/out: next query thread to run;
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t** next_thr) /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
@@ -1040,15 +1044,15 @@ trx_handle_commit_sig_off_kernel(
trx->que_state = TRX_QUE_RUNNING;
}
-/***************************************************************
+/***********************************************************//**
The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
the TRX_QUE_RUNNING state and releases query threads which were
waiting for a lock in the wait_thrs list. */
-
+UNIV_INTERN
void
trx_end_lock_wait(
/*==============*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
que_thr_t* thr;
@@ -1068,14 +1072,14 @@ trx_end_lock_wait(
trx->que_state = TRX_QUE_RUNNING;
}
-/***************************************************************
+/***********************************************************//**
Moves the query threads in the lock wait list to the SUSPENDED state and puts
the transaction to the TRX_QUE_RUNNING state. */
static
void
trx_lock_wait_to_suspended(
/*=======================*/
- trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */
+ trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
{
que_thr_t* thr;
@@ -1095,14 +1099,14 @@ trx_lock_wait_to_suspended(
trx->que_state = TRX_QUE_RUNNING;
}
-/***************************************************************
+/***********************************************************//**
Moves the query threads in the sig reply wait list of trx to the SUSPENDED
state. */
static
void
trx_sig_reply_wait_to_suspended(
/*============================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
trx_sig_t* sig;
que_thr_t* thr;
@@ -1126,17 +1130,17 @@ trx_sig_reply_wait_to_suspended(
}
}
-/*********************************************************************
+/*****************************************************************//**
Checks the compatibility of a new signal with the other signals in the
-queue. */
+queue.
+@return TRUE if the signal can be queued */
static
ibool
trx_sig_is_compatible(
/*==================*/
- /* out: TRUE if the signal can be queued */
- trx_t* trx, /* in: trx handle */
- ulint type, /* in: signal type */
- ulint sender) /* in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
+ trx_t* trx, /*!< in: trx handle */
+ ulint type, /*!< in: signal type */
+ ulint sender) /*!< in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
{
trx_sig_t* sig;
@@ -1200,22 +1204,22 @@ trx_sig_is_compatible(
}
}
-/********************************************************************
+/****************************************************************//**
Sends a signal to a trx object. */
-
+UNIV_INTERN
void
trx_sig_send(
/*=========*/
- trx_t* trx, /* in: trx handle */
- ulint type, /* in: signal type */
- ulint sender, /* in: TRX_SIG_SELF or
+ trx_t* trx, /*!< in: trx handle */
+ ulint type, /*!< in: signal type */
+ ulint sender, /*!< in: TRX_SIG_SELF or
TRX_SIG_OTHER_SESS */
- que_thr_t* receiver_thr, /* in: query thread which wants the
+ que_thr_t* receiver_thr, /*!< in: query thread which wants the
reply, or NULL; if type is
TRX_SIG_END_WAIT, this must be NULL */
- trx_savept_t* savept, /* in: possible rollback savepoint, or
+ trx_savept_t* savept, /*!< in: possible rollback savepoint, or
NULL */
- que_thr_t** next_thr) /* in/out: next query thread to run;
+ que_thr_t** next_thr) /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
@@ -1284,16 +1288,16 @@ trx_sig_send(
}
}
-/********************************************************************
+/****************************************************************//**
Ends signal handling. If the session is in the error state, and
trx->graph_before_signal_handling != NULL, then returns control to the error
handling routine of the graph (currently just returns the control to the
graph root which then will send an error message to the client). */
-
+UNIV_INTERN
void
trx_end_signal_handling(
/*====================*/
- trx_t* trx) /* in: trx */
+ trx_t* trx) /*!< in: trx */
{
ut_ad(mutex_own(&kernel_mutex));
ut_ad(trx->handling_signals == TRUE);
@@ -1308,14 +1312,14 @@ trx_end_signal_handling(
}
}
-/********************************************************************
+/****************************************************************//**
Starts handling of a trx signal. */
-
+UNIV_INTERN
void
trx_sig_start_handle(
/*=================*/
- trx_t* trx, /* in: trx handle */
- que_thr_t** next_thr) /* in/out: next query thread to run;
+ trx_t* trx, /*!< in: trx handle */
+ que_thr_t** next_thr) /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
@@ -1413,15 +1417,15 @@ loop:
goto loop;
}
-/********************************************************************
+/****************************************************************//**
Send the reply message when a signal in the queue of the trx has been
handled. */
-
+UNIV_INTERN
void
trx_sig_reply(
/*==========*/
- trx_sig_t* sig, /* in: signal */
- que_thr_t** next_thr) /* in/out: next query thread to run;
+ trx_sig_t* sig, /*!< in: signal */
+ que_thr_t** next_thr) /*!< in/out: next query thread to run;
if the value which is passed in is
a pointer to a NULL pointer, then the
calling function can start running
@@ -1448,14 +1452,14 @@ trx_sig_reply(
}
}
-/********************************************************************
+/****************************************************************//**
Removes a signal object from the trx signal queue. */
-
+UNIV_INTERN
void
trx_sig_remove(
/*===========*/
- trx_t* trx, /* in: trx handle */
- trx_sig_t* sig) /* in, own: signal */
+ trx_t* trx, /*!< in: trx handle */
+ trx_sig_t* sig) /*!< in, own: signal */
{
ut_ad(trx && sig);
ut_ad(mutex_own(&kernel_mutex));
@@ -1470,14 +1474,14 @@ trx_sig_remove(
}
}
-/*************************************************************************
-Creates a commit command node struct. */
-
+/*********************************************************************//**
+Creates a commit command node struct.
+@return own: commit node struct */
+UNIV_INTERN
commit_node_t*
commit_node_create(
/*===============*/
- /* out, own: commit node struct */
- mem_heap_t* heap) /* in: mem heap where created */
+ mem_heap_t* heap) /*!< in: mem heap where created */
{
commit_node_t* node;
@@ -1488,14 +1492,14 @@ commit_node_create(
return(node);
}
-/***************************************************************
-Performs an execution step for a commit type node in a query graph. */
-
+/***********************************************************//**
+Performs an execution step for a commit type node in a query graph.
+@return query thread to run next, or NULL */
+UNIV_INTERN
que_thr_t*
trx_commit_step(
/*============*/
- /* out: query thread to run next, or NULL */
- que_thr_t* thr) /* in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
{
commit_node_t* node;
que_thr_t* next_thr;
@@ -1536,14 +1540,14 @@ trx_commit_step(
return(thr);
}
-/**************************************************************************
-Does the transaction commit for MySQL. */
-
+/**********************************************************************//**
+Does the transaction commit for MySQL.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
ulint
trx_commit_for_mysql(
/*=================*/
- /* out: 0 or error number */
- trx_t* trx) /* in: trx handle */
+ trx_t* trx) /*!< in: trx handle */
{
/* Because we do not do the commit by sending an Innobase
sig to the transaction, we must here make sure that trx has been
@@ -1551,30 +1555,10 @@ trx_commit_for_mysql(
ut_a(trx);
- trx->op_info = "committing";
-
- /* If we are doing the XA recovery of prepared transactions, then
- the transaction object does not have an InnoDB session object, and we
- set the dummy session that we use for all MySQL transactions. */
-
- if (trx->sess == NULL) {
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- mutex_enter(&kernel_mutex);
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
- mutex_exit(&kernel_mutex);
- }
-
- trx->sess = trx_dummy_sess;
- }
-
trx_start_if_not_started(trx);
+ trx->op_info = "committing";
+
mutex_enter(&kernel_mutex);
trx_commit_off_kernel(trx);
@@ -1583,20 +1567,20 @@ trx_commit_for_mysql(
trx->op_info = "";
- return(0);
+ return(DB_SUCCESS);
}
-/**************************************************************************
+/**********************************************************************//**
If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE. */
-
+with trx->flush_log_later == TRUE.
+@return 0 or error number */
+UNIV_INTERN
ulint
trx_commit_complete_for_mysql(
/*==========================*/
- /* out: 0 or error number */
- trx_t* trx) /* in: trx handle */
+ trx_t* trx) /*!< in: trx handle */
{
- dulint lsn = trx->commit_lsn;
+ ib_uint64_t lsn = trx->commit_lsn;
ut_a(trx);
@@ -1633,13 +1617,13 @@ trx_commit_complete_for_mysql(
return(0);
}
-/**************************************************************************
+/**********************************************************************//**
Marks the latest SQL statement ended. */
-
+UNIV_INTERN
void
trx_mark_sql_stat_end(
/*==================*/
- trx_t* trx) /* in: trx handle */
+ trx_t* trx) /*!< in: trx handle */
{
ut_a(trx);
@@ -1650,25 +1634,23 @@ trx_mark_sql_stat_end(
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
}
-/**************************************************************************
+/**********************************************************************//**
Prints info about a transaction to the given file. The caller must own the
kernel mutex and must have called
innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL
or InnoDB cannot meanwhile change the info printed here. */
-
+UNIV_INTERN
void
trx_print(
/*======*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction */
- ulint max_query_len) /* in: max query length to print, or 0 to
+ FILE* f, /*!< in: output stream */
+ trx_t* trx, /*!< in: transaction */
+ ulint max_query_len) /*!< in: max query length to print, or 0 to
use the default max length */
{
ibool newline;
- fprintf(f, "TRANSACTION %lu %lu",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id));
+ fprintf(f, "TRANSACTION " TRX_ID_FMT, TRX_ID_PREP_PRINTF(trx->id));
switch (trx->conc_state) {
case TRX_NOT_STARTED:
@@ -1700,6 +1682,10 @@ trx_print(
fputs(trx->op_info, f);
}
+ if (trx->is_recovered) {
+ fputs(" recovered trx", f);
+ }
+
if (trx->is_purge) {
fputs(" purge trx", f);
}
@@ -1748,7 +1734,7 @@ trx_print(
fputs(", holds adaptive hash latch", f);
}
- if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) {
+ if (!ut_dulint_is_zero(trx->undo_no)) {
newline = TRUE;
fprintf(f, ", undo log entries %lu",
(ulong) ut_dulint_get_low(trx->undo_no));
@@ -1763,18 +1749,17 @@ trx_print(
}
}
-/***********************************************************************
-Compares the "weight" (or size) of two transactions. The weight of one
-transaction is estimated as the number of altered rows + the number of
-locked rows. Transactions that have edited non-transactional tables are
-considered heavier than ones that have not. */
-
+/*******************************************************************//**
+Compares the "weight" (or size) of two transactions. Transactions that
+have edited non-transactional tables are considered heavier than ones
+that have not.
+@return <0, 0 or >0; similar to strcmp(3) */
+UNIV_INTERN
int
trx_weight_cmp(
/*===========*/
- /* out: <0, 0 or >0; similar to strcmp(3) */
- trx_t* a, /* in: the first transaction to be compared */
- trx_t* b) /* in: the second transaction to be compared */
+ const trx_t* a, /*!< in: the first transaction to be compared */
+ const trx_t* b) /*!< in: the second transaction to be compared */
{
ibool a_notrans_edit;
ibool b_notrans_edit;
@@ -1812,24 +1797,20 @@ trx_weight_cmp(
UT_LIST_GET_LEN(b->trx_locks));
#endif
-#define TRX_WEIGHT(t) \
- ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))
-
return(ut_dulint_cmp(TRX_WEIGHT(a), TRX_WEIGHT(b)));
}
-/********************************************************************
+/****************************************************************//**
Prepares a transaction. */
-
+UNIV_INTERN
void
trx_prepare_off_kernel(
/*===================*/
- trx_t* trx) /* in: transaction */
+ trx_t* trx) /*!< in: transaction */
{
page_t* update_hdr_page;
trx_rseg_t* rseg;
- ibool must_flush_log = FALSE;
- dulint lsn;
+ ib_uint64_t lsn = 0;
mtr_t mtr;
ut_ad(mutex_own(&kernel_mutex));
@@ -1842,8 +1823,6 @@ trx_prepare_off_kernel(
mtr_start(&mtr);
- must_flush_log = TRUE;
-
/* Change the undo log segment states from TRX_UNDO_ACTIVE
to TRX_UNDO_PREPARED: these modifications to the file data
structure define the transaction as prepared in the
@@ -1884,7 +1863,7 @@ trx_prepare_off_kernel(
trx->conc_state = TRX_PREPARED;
/*--------------------------------------*/
- if (must_flush_log) {
+ if (lsn) {
/* Depending on the my.cnf options, we may now write the log
buffer to the log files, making the prepared state of the
transaction durable if the OS does not crash. We may also
@@ -1931,14 +1910,14 @@ trx_prepare_off_kernel(
}
}
-/**************************************************************************
-Does the transaction prepare for MySQL. */
-
+/**********************************************************************//**
+Does the transaction prepare for MySQL.
+@return 0 or error number */
+UNIV_INTERN
ulint
trx_prepare_for_mysql(
/*==================*/
- /* out: 0 or error number */
- trx_t* trx) /* in: trx handle */
+ trx_t* trx) /*!< in: trx handle */
{
/* Because we do not do the prepare by sending an Innobase
sig to the transaction, we must here make sure that trx has been
@@ -1961,17 +1940,16 @@ trx_prepare_for_mysql(
return(0);
}
-/**************************************************************************
+/**********************************************************************//**
This function is used to find number of prepared transactions and
-their transaction objects for a recovery. */
-
+their transaction objects for a recovery.
+@return number of prepared transactions stored in xid_list */
+UNIV_INTERN
int
trx_recover_for_mysql(
/*==================*/
- /* out: number of prepared transactions
- stored in xid_list */
- XID* xid_list, /* in/out: prepared transactions */
- ulint len) /* in: number of slots in xid_list */
+ XID* xid_list, /*!< in/out: prepared transactions */
+ ulint len) /*!< in: number of slots in xid_list */
{
trx_t* trx;
ulint count = 0;
@@ -1999,10 +1977,9 @@ trx_recover_for_mysql(
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Transaction %lu %lu in"
+ " InnoDB: Transaction " TRX_ID_FMT " in"
" prepared state after recovery\n",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id));
+ TRX_ID_PREP_PRINTF(trx->id));
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -2034,15 +2011,15 @@ trx_recover_for_mysql(
return ((int) count);
}
-/***********************************************************************
+/*******************************************************************//**
This function is used to find one X/Open XA distributed transaction
-which is in the prepared state */
-
+which is in the prepared state
+@return trx or NULL */
+UNIV_INTERN
trx_t*
trx_get_trx_by_xid(
/*===============*/
- /* out: trx or NULL */
- XID* xid) /* in: X/Open XA transaction identification */
+ XID* xid) /*!< in: X/Open XA transaction identification */
{
trx_t* trx;
diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c
index deb6c85e6e3..3bb1b1cdf6c 100644
--- a/storage/innobase/trx/trx0undo.c
+++ b/storage/innobase/trx/trx0undo.c
@@ -1,7 +1,24 @@
-/******************************************************
-Transaction undo log
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1996 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0undo.c
+Transaction undo log
Created 3/26/1996 Heikki Tuuri
*******************************************************/
@@ -13,6 +30,7 @@ Created 3/26/1996 Heikki Tuuri
#endif
#include "fsp0fsp.h"
+#ifndef UNIV_HOTBACKUP
#include "mach0data.h"
#include "mtr0log.h"
#include "trx0rseg.h"
@@ -20,7 +38,6 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0srv.h"
#include "trx0rec.h"
#include "trx0purge.h"
-#include "trx0xa.h"
/* How should the old versions in the history list be managed?
----------------------------------------------------------
@@ -76,74 +93,79 @@ it until a truncate operation occurs, which can remove undo logs from the end
of the list and release undo log segments. In stepping through the list,
s-latches on the undo log pages are enough, but in a truncate, x-latches must
be obtained on the rollback segment and individual pages. */
+#endif /* !UNIV_HOTBACKUP */
-/************************************************************************
+/********************************************************************//**
Initializes the fields in an undo log segment page. */
static
void
trx_undo_page_init(
/*===============*/
- page_t* undo_page, /* in: undo log segment page */
- ulint type, /* in: undo log segment type */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-Creates and initializes an undo log memory object. */
+ page_t* undo_page, /*!< in: undo log segment page */
+ ulint type, /*!< in: undo log segment type */
+ mtr_t* mtr); /*!< in: mtr */
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Creates and initializes an undo log memory object.
+@return own: the undo log memory object */
static
trx_undo_t*
trx_undo_mem_create(
/*================*/
- /* out, own: the undo log memory object */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- ulint id, /* in: slot index within rseg */
- ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ trx_rseg_t* rseg, /*!< in: rollback segment memory object */
+ ulint id, /*!< in: slot index within rseg */
+ ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */
- dulint trx_id, /* in: id of the trx for which the undo log
+ trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is created */
- XID* xid, /* in: X/Open XA transaction identification*/
- ulint page_no,/* in: undo log header page number */
- ulint offset);/* in: undo log header byte offset on page */
-/*******************************************************************
+ const XID* xid, /*!< in: X/Open XA transaction identification*/
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset);/*!< in: undo log header byte offset on page */
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
Initializes a cached insert undo log header page for new use. NOTE that this
function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function! */
+the operation of this function!
+@return undo log header byte offset on page */
static
ulint
trx_undo_insert_header_reuse(
/*=========================*/
- /* out: undo log header byte offset on page */
- page_t* undo_page, /* in: insert undo log segment header page,
- x-latched */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
+ page_t* undo_page, /*!< in/out: insert undo log segment
+ header page, x-latched */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
If an update undo log can be discarded immediately, this function frees the
space, resetting the page to the proper state for caching. */
static
void
trx_undo_discard_latest_update_undo(
/*================================*/
- page_t* undo_page, /* in: header page of an undo log of size 1 */
- mtr_t* mtr); /* in: mtr */
-
+ page_t* undo_page, /*!< in: header page of an undo log of size 1 */
+ mtr_t* mtr); /*!< in: mtr */
-/***************************************************************************
-Gets the previous record in an undo log from the previous page. */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Gets the previous record in an undo log from the previous page.
+@return undo log record, the page s-latched, NULL if none */
static
trx_undo_rec_t*
trx_undo_get_prev_rec_from_prev_page(
/*=================================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr) /* in: mtr */
+ trx_undo_rec_t* rec, /*!< in: undo record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset, /*!< in: undo log header offset on page */
+ mtr_t* mtr) /*!< in: mtr */
{
+ ulint space;
+ ulint zip_size;
ulint prev_page_no;
page_t* prev_page;
page_t* undo_page;
- undo_page = buf_frame_align(rec);
+ undo_page = page_align(rec);
prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_NODE, mtr)
@@ -154,24 +176,26 @@ trx_undo_get_prev_rec_from_prev_page(
return(NULL);
}
- prev_page = trx_undo_page_get_s_latched(
- buf_frame_get_space_id(undo_page), prev_page_no, mtr);
+ space = page_get_space_id(undo_page);
+ zip_size = fil_space_get_zip_size(space);
+
+ prev_page = trx_undo_page_get_s_latched(space, zip_size,
+ prev_page_no, mtr);
return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
}
-/***************************************************************************
-Gets the previous record in an undo log. */
-
+/***********************************************************************//**
+Gets the previous record in an undo log.
+@return undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
trx_undo_rec_t*
trx_undo_get_prev_rec(
/*==================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr) /* in: mtr */
+ trx_undo_rec_t* rec, /*!< in: undo record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset, /*!< in: undo log header offset on page */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_undo_rec_t* prev_rec;
@@ -189,27 +213,28 @@ trx_undo_get_prev_rec(
mtr));
}
-/***************************************************************************
-Gets the next record in an undo log from the next page. */
+/***********************************************************************//**
+Gets the next record in an undo log from the next page.
+@return undo log record, the page latched, NULL if none */
static
trx_undo_rec_t*
trx_undo_get_next_rec_from_next_page(
/*=================================*/
- /* out: undo log record, the page latched, NULL if
- none */
- page_t* undo_page, /* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- ulint mode, /* in: latch mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: undo log header space */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ page_t* undo_page, /*!< in: undo log page */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset, /*!< in: undo log header offset on page */
+ ulint mode, /*!< in: latch mode: RW_S_LATCH or RW_X_LATCH */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_ulogf_t* log_hdr;
ulint next_page_no;
page_t* next_page;
- ulint space;
ulint next;
- if (page_no == buf_frame_get_page_no(undo_page)) {
+ if (page_no == page_get_page_no(undo_page)) {
log_hdr = undo_page + offset;
next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
@@ -220,8 +245,6 @@ trx_undo_get_next_rec_from_next_page(
}
}
- space = buf_frame_get_space_id(undo_page);
-
next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_NODE, mtr)
.page;
@@ -231,29 +254,31 @@ trx_undo_get_next_rec_from_next_page(
}
if (mode == RW_S_LATCH) {
- next_page = trx_undo_page_get_s_latched(space, next_page_no,
- mtr);
+ next_page = trx_undo_page_get_s_latched(space, zip_size,
+ next_page_no, mtr);
} else {
ut_ad(mode == RW_X_LATCH);
- next_page = trx_undo_page_get(space, next_page_no, mtr);
+ next_page = trx_undo_page_get(space, zip_size,
+ next_page_no, mtr);
}
return(trx_undo_page_get_first_rec(next_page, page_no, offset));
}
-/***************************************************************************
-Gets the next record in an undo log. */
-
+/***********************************************************************//**
+Gets the next record in an undo log.
+@return undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
trx_undo_rec_t*
trx_undo_get_next_rec(
/*==================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr) /* in: mtr */
+ trx_undo_rec_t* rec, /*!< in: undo record */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset, /*!< in: undo log header offset on page */
+ mtr_t* mtr) /*!< in: mtr */
{
+ ulint space;
+ ulint zip_size;
trx_undo_rec_t* next_rec;
next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
@@ -262,32 +287,38 @@ trx_undo_get_next_rec(
return(next_rec);
}
- return(trx_undo_get_next_rec_from_next_page(buf_frame_align(rec),
+ space = page_get_space_id(page_align(rec));
+ zip_size = fil_space_get_zip_size(space);
+
+ return(trx_undo_get_next_rec_from_next_page(space, zip_size,
+ page_align(rec),
page_no, offset,
RW_S_LATCH, mtr));
}
-/***************************************************************************
-Gets the first record in an undo log. */
-
+/***********************************************************************//**
+Gets the first record in an undo log.
+@return undo log record, the page latched, NULL if none */
+UNIV_INTERN
trx_undo_rec_t*
trx_undo_get_first_rec(
/*===================*/
- /* out: undo log record, the page latched, NULL if
- none */
- ulint space, /* in: undo log header space */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: undo log header space */
+ ulint zip_size,/*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset, /*!< in: undo log header offset on page */
+ ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
+ mtr_t* mtr) /*!< in: mtr */
{
page_t* undo_page;
trx_undo_rec_t* rec;
if (mode == RW_S_LATCH) {
- undo_page = trx_undo_page_get_s_latched(space, page_no, mtr);
+ undo_page = trx_undo_page_get_s_latched(space, zip_size,
+ page_no, mtr);
} else {
- undo_page = trx_undo_page_get(space, page_no, mtr);
+ undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
}
rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
@@ -296,38 +327,42 @@ trx_undo_get_first_rec(
return(rec);
}
- return(trx_undo_get_next_rec_from_next_page(undo_page, page_no, offset,
+ return(trx_undo_get_next_rec_from_next_page(space, zip_size,
+ undo_page, page_no, offset,
mode, mtr));
}
/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
-/**************************************************************************
+/**********************************************************************//**
Writes the mtr log entry of an undo log page initialization. */
UNIV_INLINE
void
trx_undo_page_init_log(
/*===================*/
- page_t* undo_page, /* in: undo log page */
- ulint type, /* in: undo log type */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page, /*!< in: undo log page */
+ ulint type, /*!< in: undo log type */
+ mtr_t* mtr) /*!< in: mtr */
{
mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
mlog_catenate_ulint_compressed(mtr, type);
}
-
-/***************************************************************
-Parses the redo log entry of an undo log page initialization. */
-
+#else /* !UNIV_HOTBACKUP */
+# define trx_undo_page_init_log(undo_page,type,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses the redo log entry of an undo log page initialization.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_page_init(
/*=====================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
ulint type;
@@ -345,15 +380,15 @@ trx_undo_parse_page_init(
return(ptr);
}
-/************************************************************************
+/********************************************************************//**
Initializes the fields in an undo log segment page. */
static
void
trx_undo_page_init(
/*===============*/
- page_t* undo_page, /* in: undo log segment page */
- ulint type, /* in: undo log segment type */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page, /*!< in: undo log segment page */
+ ulint type, /*!< in: undo log segment type */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_upagef_t* page_hdr;
@@ -371,29 +406,29 @@ trx_undo_page_init(
trx_undo_page_init_log(undo_page, type, mtr);
}
-/*******************************************************************
-Creates a new undo log segment in file. */
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Creates a new undo log segment in file.
+@return DB_SUCCESS if page creation OK possible error codes are:
+DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
static
ulint
trx_undo_seg_create(
/*================*/
- /* out: DB_SUCCESS if page creation OK
- possible error codes are:
- DB_TOO_MANY_CONCURRENT_TRXS
- DB_OUT_OF_FILE_SPACE */
- trx_rseg_t* rseg __attribute__((unused)),/* in: rollback segment */
- trx_rsegf_t* rseg_hdr,/* in: rollback segment header, page
+ trx_rseg_t* rseg __attribute__((unused)),/*!< in: rollback segment */
+ trx_rsegf_t* rseg_hdr,/*!< in: rollback segment header, page
x-latched */
- ulint type, /* in: type of the segment: TRX_UNDO_INSERT or
+ ulint type, /*!< in: type of the segment: TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */
- ulint* id, /* out: slot index within rseg header */
+ ulint* id, /*!< out: slot index within rseg header */
page_t** undo_page,
- /* out: segment header page x-latched, NULL
+ /*!< out: segment header page x-latched, NULL
if there was an error */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint slot_no;
ulint space;
+ buf_block_t* block;
trx_upagef_t* page_hdr;
trx_usegf_t* seg_hdr;
ulint n_reserved;
@@ -419,7 +454,7 @@ trx_undo_seg_create(
return(DB_TOO_MANY_CONCURRENT_TRXS);
}
- space = buf_frame_get_space_id(rseg_hdr);
+ space = page_get_space_id(page_align(rseg_hdr));
success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
mtr);
@@ -429,21 +464,21 @@ trx_undo_seg_create(
}
/* Allocate a new file segment for the undo log */
- *undo_page = fseg_create_general(space, 0,
- TRX_UNDO_SEG_HDR
- + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
+ block = fseg_create_general(space, 0,
+ TRX_UNDO_SEG_HDR
+ + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
fil_space_release_free_extents(space, n_reserved);
- if (*undo_page == NULL) {
+ if (block == NULL) {
/* No space left */
return(DB_OUT_OF_FILE_SPACE);
}
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(*undo_page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
+ buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
+
+ *undo_page = buf_block_get_frame(block);
page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
@@ -462,43 +497,46 @@ trx_undo_seg_create(
page_hdr + TRX_UNDO_PAGE_NODE, mtr);
trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
- buf_frame_get_page_no(*undo_page), mtr);
-
+ page_get_page_no(*undo_page), mtr);
*id = slot_no;
return(err);
}
-/**************************************************************************
+/**********************************************************************//**
Writes the mtr log entry of an undo log header initialization. */
UNIV_INLINE
void
trx_undo_header_create_log(
/*=======================*/
- page_t* undo_page, /* in: undo log header page */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr) /* in: mtr */
+ const page_t* undo_page, /*!< in: undo log header page */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in: mtr */
{
mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr);
mlog_catenate_dulint_compressed(mtr, trx_id);
}
+#else /* !UNIV_HOTBACKUP */
+# define trx_undo_header_create_log(undo_page,trx_id,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************
+/***************************************************************//**
Creates a new undo log header in file. NOTE that this function has its own
log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of
-this function! */
+this function!
+@return header byte offset on page */
static
ulint
trx_undo_header_create(
/*===================*/
- /* out: header byte offset on page */
- page_t* undo_page, /* in: undo log segment header page,
- x-latched; it is assumed that there are
- TRX_UNDO_LOG_XA_HDR_SIZE bytes free space
- on it */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page, /*!< in/out: undo log segment
+ header page, x-latched; it is
+ assumed that there is
+ TRX_UNDO_LOG_XA_HDR_SIZE bytes
+ free space on it */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_upagef_t* page_hdr;
trx_usegf_t* seg_hdr;
@@ -556,15 +594,16 @@ trx_undo_header_create(
return(free);
}
-/************************************************************************
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
Write X/Open XA Transaction Identification (XID) to undo log header */
static
void
trx_undo_write_xid(
/*===============*/
- trx_ulogf_t* log_hdr,/* in: undo log header */
- const XID* xid, /* in: X/Open XA Transaction Identification */
- mtr_t* mtr) /* in: mtr */
+ trx_ulogf_t* log_hdr,/*!< in: undo log header */
+ const XID* xid, /*!< in: X/Open XA Transaction Identification */
+ mtr_t* mtr) /*!< in: mtr */
{
mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
(ulint)xid->formatID, MLOG_4BYTES, mtr);
@@ -579,14 +618,14 @@ trx_undo_write_xid(
XIDDATASIZE, mtr);
}
-/************************************************************************
+/********************************************************************//**
Read X/Open XA Transaction Identification (XID) from undo log header */
static
void
trx_undo_read_xid(
/*==============*/
- trx_ulogf_t* log_hdr,/* in: undo log header */
- XID* xid) /* out: X/Open XA Transaction Identification */
+ trx_ulogf_t* log_hdr,/*!< in: undo log header */
+ XID* xid) /*!< out: X/Open XA Transaction Identification */
{
xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
@@ -598,15 +637,15 @@ trx_undo_read_xid(
memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE);
}
-/*******************************************************************
+/***************************************************************//**
Adds space for the XA XID after an undo log old-style header. */
static
void
trx_undo_header_add_space_for_xid(
/*==============================*/
- page_t* undo_page,/* in: undo log segment header page */
- trx_ulogf_t* log_hdr,/* in: undo log header */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page,/*!< in: undo log segment header page */
+ trx_ulogf_t* log_hdr,/*!< in: undo log header */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_upagef_t* page_hdr;
ulint free;
@@ -636,35 +675,38 @@ trx_undo_header_add_space_for_xid(
MLOG_2BYTES, mtr);
}
-/**************************************************************************
+/**********************************************************************//**
Writes the mtr log entry of an undo log header reuse. */
UNIV_INLINE
void
trx_undo_insert_header_reuse_log(
/*=============================*/
- page_t* undo_page, /* in: undo log header page */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr) /* in: mtr */
+ const page_t* undo_page, /*!< in: undo log header page */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in: mtr */
{
mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
mlog_catenate_dulint_compressed(mtr, trx_id);
}
-
-/***************************************************************
-Parses the redo log entry of an undo log page header create or reuse. */
-
+#else /* !UNIV_HOTBACKUP */
+# define trx_undo_insert_header_reuse_log(undo_page,trx_id,mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses the redo log entry of an undo log page header create or reuse.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_page_header(
/*=======================*/
- /* out: end of log record or NULL */
- ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr,/*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
- dulint trx_id;
+ trx_id_t trx_id;
ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id);
@@ -685,19 +727,19 @@ trx_undo_parse_page_header(
return(ptr);
}
-/*******************************************************************
+/***************************************************************//**
Initializes a cached insert undo log header page for new use. NOTE that this
function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function! */
+the operation of this function!
+@return undo log header byte offset on page */
static
ulint
trx_undo_insert_header_reuse(
/*=========================*/
- /* out: undo log header byte offset on page */
- page_t* undo_page, /* in: insert undo log segment header page,
- x-latched */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page, /*!< in/out: insert undo log segment
+ header page, x-latched */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_upagef_t* page_hdr;
trx_usegf_t* seg_hdr;
@@ -745,29 +787,33 @@ trx_undo_insert_header_reuse(
return(free);
}
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Writes the redo log entry of an update undo log header discard. */
UNIV_INLINE
void
trx_undo_discard_latest_log(
/*========================*/
- page_t* undo_page, /* in: undo log header page */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page, /*!< in: undo log header page */
+ mtr_t* mtr) /*!< in: mtr */
{
mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr);
}
-
-/***************************************************************
-Parses the redo log entry of an undo log page header discard. */
-
+#else /* !UNIV_HOTBACKUP */
+# define trx_undo_discard_latest_log(undo_page, mtr) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Parses the redo log entry of an undo log page header discard.
+@return end of log record or NULL */
+UNIV_INTERN
byte*
trx_undo_parse_discard_latest(
/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
+ page_t* page, /*!< in: page or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
{
ut_ad(end_ptr);
@@ -778,15 +824,15 @@ trx_undo_parse_discard_latest(
return(ptr);
}
-/**************************************************************************
+/**********************************************************************//**
If an update undo log can be discarded immediately, this function frees the
space, resetting the page to the proper state for caching. */
static
void
trx_undo_discard_latest_update_undo(
/*================================*/
- page_t* undo_page, /* in: header page of an undo log of size 1 */
- mtr_t* mtr) /* in: mtr */
+ page_t* undo_page, /*!< in: header page of an undo log of size 1 */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_usegf_t* seg_hdr;
trx_upagef_t* page_hdr;
@@ -820,17 +866,17 @@ trx_undo_discard_latest_update_undo(
trx_undo_discard_latest_log(undo_page, mtr);
}
-/************************************************************************
-Tries to add a page to the undo log segment where the undo log is placed. */
-
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Tries to add a page to the undo log segment where the undo log is placed.
+@return page number if success, else FIL_NULL */
+UNIV_INTERN
ulint
trx_undo_add_page(
/*==============*/
- /* out: page number if success, else
- FIL_NULL */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory object */
- mtr_t* mtr) /* in: mtr which does not have a latch to any
+ trx_t* trx, /*!< in: transaction */
+ trx_undo_t* undo, /*!< in: undo log memory object */
+ mtr_t* mtr) /*!< in: mtr which does not have a latch to any
undo log page; the caller must have reserved
the rollback segment mutex */
{
@@ -852,7 +898,8 @@ trx_undo_add_page(
return(FIL_NULL);
}
- header_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+ header_page = trx_undo_page_get(undo->space, undo->zip_size,
+ undo->hdr_page_no, mtr);
success = fsp_reserve_free_extents(&n_reserved, undo->space, 1,
FSP_UNDO, mtr);
@@ -877,7 +924,8 @@ trx_undo_add_page(
undo->last_page_no = page_no;
- new_page = trx_undo_page_get(undo->space, page_no, mtr);
+ new_page = trx_undo_page_get(undo->space, undo->zip_size,
+ page_no, mtr);
trx_undo_page_init(new_page, undo->type, mtr);
@@ -889,21 +937,21 @@ trx_undo_add_page(
return(page_no);
}
-/************************************************************************
-Frees an undo log page that is not the header page. */
+/********************************************************************//**
+Frees an undo log page that is not the header page.
+@return last page number in remaining log */
static
ulint
trx_undo_free_page(
/*===============*/
- /* out: last page number in remaining log */
- trx_rseg_t* rseg, /* in: rollback segment */
- ibool in_history, /* in: TRUE if the undo log is in the history
+ trx_rseg_t* rseg, /*!< in: rollback segment */
+ ibool in_history, /*!< in: TRUE if the undo log is in the history
list */
- ulint space, /* in: space */
- ulint hdr_page_no, /* in: header page number */
- ulint page_no, /* in: page number to free: must not be the
+ ulint space, /*!< in: space */
+ ulint hdr_page_no, /*!< in: header page number */
+ ulint page_no, /*!< in: page number to free: must not be the
header page */
- mtr_t* mtr) /* in: mtr which does not have a latch to any
+ mtr_t* mtr) /*!< in: mtr which does not have a latch to any
undo log page; the caller must have reserved
the rollback segment mutex */
{
@@ -912,14 +960,17 @@ trx_undo_free_page(
fil_addr_t last_addr;
trx_rsegf_t* rseg_header;
ulint hist_size;
+ ulint zip_size;
ut_a(hdr_page_no != page_no);
ut_ad(!mutex_own(&kernel_mutex));
ut_ad(mutex_own(&(rseg->mutex)));
- undo_page = trx_undo_page_get(space, page_no, mtr);
+ zip_size = rseg->zip_size;
+
+ undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
- header_page = trx_undo_page_get(space, hdr_page_no, mtr);
+ header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
@@ -932,7 +983,8 @@ trx_undo_free_page(
rseg->curr_size--;
if (in_history) {
- rseg_header = trx_rsegf_get(space, rseg->page_no, mtr);
+ rseg_header = trx_rsegf_get(space, zip_size,
+ rseg->page_no, mtr);
hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
MLOG_4BYTES, mtr);
@@ -944,18 +996,18 @@ trx_undo_free_page(
return(last_addr.page);
}
-/************************************************************************
+/********************************************************************//**
Frees an undo log page when there is also the memory object for the undo
log. */
static
void
trx_undo_free_page_in_rollback(
/*===========================*/
- trx_t* trx __attribute__((unused)), /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- ulint page_no,/* in: page number to free: must not be the
+ trx_t* trx __attribute__((unused)), /*!< in: transaction */
+ trx_undo_t* undo, /*!< in: undo log memory copy */
+ ulint page_no,/*!< in: page number to free: must not be the
header page */
- mtr_t* mtr) /* in: mtr which does not have a latch to any
+ mtr_t* mtr) /*!< in: mtr which does not have a latch to any
undo log page; the caller must have reserved
the rollback segment mutex */
{
@@ -971,23 +1023,25 @@ trx_undo_free_page_in_rollback(
undo->size--;
}
-/************************************************************************
+/********************************************************************//**
Empties an undo log header page of undo records for that undo log. Other
undo logs may still have records on that page, if it is an update undo log. */
static
void
trx_undo_empty_header_page(
/*=======================*/
- ulint space, /* in: space */
- ulint hdr_page_no, /* in: header page number */
- ulint hdr_offset, /* in: header offset */
- mtr_t* mtr) /* in: mtr */
+ ulint space, /*!< in: space */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint hdr_page_no, /*!< in: header page number */
+ ulint hdr_offset, /*!< in: header offset */
+ mtr_t* mtr) /*!< in: mtr */
{
page_t* header_page;
trx_ulogf_t* log_hdr;
ulint end;
- header_page = trx_undo_page_get(space, hdr_page_no, mtr);
+ header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
log_hdr = header_page + hdr_offset;
@@ -996,16 +1050,16 @@ trx_undo_empty_header_page(
mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
}
-/***************************************************************************
+/***********************************************************************//**
Truncates an undo log from the end. This function is used during a rollback
to free space from an undo log. */
-
+UNIV_INTERN
void
trx_undo_truncate_end(
/*==================*/
- trx_t* trx, /* in: transaction whose undo log it is */
- trx_undo_t* undo, /* in: undo log */
- dulint limit) /* in: all undo records with undo number
+ trx_t* trx, /*!< in: transaction whose undo log it is */
+ trx_undo_t* undo, /*!< in: undo log */
+ undo_no_t limit) /*!< in: all undo records with undo number
>= this value should be truncated */
{
page_t* undo_page;
@@ -1027,7 +1081,8 @@ trx_undo_truncate_end(
last_page_no = undo->last_page_no;
- undo_page = trx_undo_page_get(undo->space, last_page_no, &mtr);
+ undo_page = trx_undo_page_get(undo->space, undo->zip_size,
+ last_page_no, &mtr);
rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
undo->hdr_offset);
@@ -1070,22 +1125,24 @@ function_exit:
mtr_commit(&mtr);
}
-/***************************************************************************
+/***********************************************************************//**
Truncates an undo log from the start. This function is used during a purge
operation. */
-
+UNIV_INTERN
void
trx_undo_truncate_start(
/*====================*/
- trx_rseg_t* rseg, /* in: rollback segment */
- ulint space, /* in: space id of the log */
- ulint hdr_page_no, /* in: header page number */
- ulint hdr_offset, /* in: header offset on the page */
- dulint limit) /* in: all undo pages with undo numbers <
- this value should be truncated; NOTE that
- the function only frees whole pages; the
- header page is not freed, but emptied, if
- all the records there are < limit */
+ trx_rseg_t* rseg, /*!< in: rollback segment */
+ ulint space, /*!< in: space id of the log */
+ ulint hdr_page_no, /*!< in: header page number */
+ ulint hdr_offset, /*!< in: header offset on the page */
+ undo_no_t limit) /*!< in: all undo pages with
+ undo numbers < this value
+ should be truncated; NOTE that
+ the function only frees whole
+ pages; the header page is not
+ freed, but emptied, if all the
+ records there are < limit */
{
page_t* undo_page;
trx_undo_rec_t* rec;
@@ -1095,14 +1152,15 @@ trx_undo_truncate_start(
ut_ad(mutex_own(&(rseg->mutex)));
- if (0 == ut_dulint_cmp(limit, ut_dulint_zero)) {
+ if (ut_dulint_is_zero(limit)) {
return;
}
loop:
mtr_start(&mtr);
- rec = trx_undo_get_first_rec(space, hdr_page_no, hdr_offset,
+ rec = trx_undo_get_first_rec(space, rseg->zip_size,
+ hdr_page_no, hdr_offset,
RW_X_LATCH, &mtr);
if (rec == NULL) {
/* Already empty */
@@ -1112,7 +1170,7 @@ loop:
return;
}
- undo_page = buf_frame_align(rec);
+ undo_page = page_align(rec);
last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no,
hdr_offset);
@@ -1123,10 +1181,11 @@ loop:
return;
}
- page_no = buf_frame_get_page_no(undo_page);
+ page_no = page_get_page_no(undo_page);
if (page_no == hdr_page_no) {
- trx_undo_empty_header_page(space, hdr_page_no, hdr_offset,
+ trx_undo_empty_header_page(space, rseg->zip_size,
+ hdr_page_no, hdr_offset,
&mtr);
} else {
trx_undo_free_page(rseg, TRUE, space, hdr_page_no,
@@ -1138,13 +1197,13 @@ loop:
goto loop;
}
-/**************************************************************************
+/**********************************************************************//**
Frees an undo log segment which is not in the history list. */
static
void
trx_undo_seg_free(
/*==============*/
- trx_undo_t* undo) /* in: undo log */
+ trx_undo_t* undo) /*!< in: undo log */
{
trx_rseg_t* rseg;
fseg_header_t* file_seg;
@@ -1153,10 +1212,9 @@ trx_undo_seg_free(
ibool finished;
mtr_t mtr;
- finished = FALSE;
rseg = undo->rseg;
- while (!finished) {
+ do {
mtr_start(&mtr);
@@ -1164,7 +1222,8 @@ trx_undo_seg_free(
mutex_enter(&(rseg->mutex));
- seg_header = trx_undo_page_get(undo->space, undo->hdr_page_no,
+ seg_header = trx_undo_page_get(undo->space, undo->zip_size,
+ undo->hdr_page_no,
&mtr) + TRX_UNDO_SEG_HDR;
file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
@@ -1173,32 +1232,33 @@ trx_undo_seg_free(
if (finished) {
/* Update the rseg header */
- rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
- &mtr);
+ rseg_header = trx_rsegf_get(
+ rseg->space, rseg->zip_size, rseg->page_no,
+ &mtr);
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
&mtr);
}
mutex_exit(&(rseg->mutex));
mtr_commit(&mtr);
- }
+ } while (!finished);
}
/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
-/************************************************************************
+/********************************************************************//**
Creates and initializes an undo log memory object according to the values
in the header in file, when the database is started. The memory object is
-inserted in the appropriate list of rseg. */
+inserted in the appropriate list of rseg.
+@return own: the undo log memory object */
static
trx_undo_t*
trx_undo_mem_create_at_db_start(
/*============================*/
- /* out, own: the undo log memory object */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- ulint id, /* in: slot index within rseg */
- ulint page_no,/* in: undo log segment page number */
- mtr_t* mtr) /* in: mtr */
+ trx_rseg_t* rseg, /*!< in: rollback segment memory object */
+ ulint id, /*!< in: slot index within rseg */
+ ulint page_no,/*!< in: undo log segment page number */
+ mtr_t* mtr) /*!< in: mtr */
{
page_t* undo_page;
trx_upagef_t* page_header;
@@ -1207,7 +1267,7 @@ trx_undo_mem_create_at_db_start(
trx_undo_t* undo;
ulint type;
ulint state;
- dulint trx_id;
+ trx_id_t trx_id;
ulint offset;
fil_addr_t last_addr;
page_t* last_page;
@@ -1221,7 +1281,8 @@ trx_undo_mem_create_at_db_start(
ut_error;
}
- undo_page = trx_undo_page_get(rseg->space, page_no, mtr);
+ undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
+ page_no, mtr);
page_header = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1274,7 +1335,8 @@ trx_undo_mem_create_at_db_start(
undo->last_page_no = last_addr.page;
undo->top_page_no = last_addr.page;
- last_page = trx_undo_page_get(rseg->space, undo->last_page_no, mtr);
+ last_page = trx_undo_page_get(rseg->space, rseg->zip_size,
+ undo->last_page_no, mtr);
rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
@@ -1308,17 +1370,16 @@ add_to_list:
return(undo);
}
-/************************************************************************
+/********************************************************************//**
Initializes the undo log lists for a rollback segment memory copy. This
function is only called when the database is started or a new rollback
-segment is created. */
-
+segment is created.
+@return the combined size of undo log segments in pages */
+UNIV_INTERN
ulint
trx_undo_lists_init(
/*================*/
- /* out: the combined size of undo log segments
- in pages */
- trx_rseg_t* rseg) /* in: rollback segment memory object */
+ trx_rseg_t* rseg) /*!< in: rollback segment memory object */
{
ulint page_no;
trx_undo_t* undo;
@@ -1334,7 +1395,8 @@ trx_undo_lists_init(
mtr_start(&mtr);
- rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr);
+ rseg_header = trx_rsegf_get_new(rseg->space, rseg->zip_size,
+ rseg->page_no, &mtr);
for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
@@ -1355,8 +1417,9 @@ trx_undo_lists_init(
mtr_start(&mtr);
- rseg_header = trx_rsegf_get(rseg->space,
- rseg->page_no, &mtr);
+ rseg_header = trx_rsegf_get(
+ rseg->space, rseg->zip_size, rseg->page_no,
+ &mtr);
}
}
@@ -1365,22 +1428,22 @@ trx_undo_lists_init(
return(size);
}
-/************************************************************************
-Creates and initializes an undo log memory object. */
+/********************************************************************//**
+Creates and initializes an undo log memory object.
+@return own: the undo log memory object */
static
trx_undo_t*
trx_undo_mem_create(
/*================*/
- /* out, own: the undo log memory object */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- ulint id, /* in: slot index within rseg */
- ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ trx_rseg_t* rseg, /*!< in: rollback segment memory object */
+ ulint id, /*!< in: slot index within rseg */
+ ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */
- dulint trx_id, /* in: id of the trx for which the undo log
+ trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is created */
- XID* xid, /* in: X/Open transaction identification */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header byte offset on page */
+ const XID* xid, /*!< in: X/Open transaction identification */
+ ulint page_no,/*!< in: undo log header page number */
+ ulint offset) /*!< in: undo log header byte offset on page */
{
trx_undo_t* undo;
@@ -1411,6 +1474,7 @@ trx_undo_mem_create(
undo->rseg = rseg;
undo->space = rseg->space;
+ undo->zip_size = rseg->zip_size;
undo->hdr_page_no = page_no;
undo->hdr_offset = offset;
undo->last_page_no = page_no;
@@ -1418,22 +1482,22 @@ trx_undo_mem_create(
undo->empty = TRUE;
undo->top_page_no = page_no;
- undo->guess_page = NULL;
+ undo->guess_block = NULL;
return(undo);
}
-/************************************************************************
+/********************************************************************//**
Initializes a cached undo log object for new use. */
static
void
trx_undo_mem_init_for_reuse(
/*========================*/
- trx_undo_t* undo, /* in: undo log to init */
- dulint trx_id, /* in: id of the trx for which the undo log
+ trx_undo_t* undo, /*!< in: undo log to init */
+ trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is created */
- XID* xid, /* in: X/Open XA transaction identification*/
- ulint offset) /* in: undo log header byte offset on page */
+ const XID* xid, /*!< in: X/Open XA transaction identification*/
+ ulint offset) /*!< in: undo log header byte offset on page */
{
ut_ad(mutex_own(&((undo->rseg)->mutex)));
@@ -1456,13 +1520,13 @@ trx_undo_mem_init_for_reuse(
undo->empty = TRUE;
}
-/************************************************************************
+/********************************************************************//**
Frees an undo log memory copy. */
-static
+UNIV_INTERN
void
trx_undo_mem_free(
/*==============*/
- trx_undo_t* undo) /* in: the undo object to be freed */
+ trx_undo_t* undo) /*!< in: the undo object to be freed */
{
if (undo->id >= TRX_RSEG_N_SLOTS) {
fprintf(stderr,
@@ -1473,28 +1537,25 @@ trx_undo_mem_free(
mem_free(undo);
}
-/**************************************************************************
-Creates a new undo log. */
+/**********************************************************************//**
+Creates a new undo log.
+@return DB_SUCCESS if successful in creating the new undo lob object,
+possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
+DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
static
ulint
trx_undo_create(
/*============*/
- /* out: DB_SUCCESS if successful in creating
- the new undo lob object, possible error
- codes are:
- DB_TOO_MANY_CONCURRENT_TRXS
- DB_OUT_OF_FILE_SPACE
- DB_OUT_OF_MEMORY*/
- trx_t* trx, /* in: transaction */
- trx_rseg_t* rseg, /* in: rollback segment memory copy */
- ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ trx_t* trx, /*!< in: transaction */
+ trx_rseg_t* rseg, /*!< in: rollback segment memory copy */
+ ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */
- dulint trx_id, /* in: id of the trx for which the undo log
+ trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is created */
- XID* xid, /* in: X/Open transaction identification*/
- trx_undo_t** undo, /* out: the new undo log object, undefined
+ const XID* xid, /*!< in: X/Open transaction identification*/
+ trx_undo_t** undo, /*!< out: the new undo log object, undefined
* if did not succeed */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_rsegf_t* rseg_header;
ulint page_no;
@@ -1512,10 +1573,11 @@ trx_undo_create(
rseg->curr_size++;
- rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+ rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no,
+ mtr);
err = trx_undo_seg_create(rseg, rseg_header, type, &id,
- &undo_page, mtr);
+ &undo_page, mtr);
if (err != DB_SUCCESS) {
/* Did not succeed */
@@ -1525,7 +1587,7 @@ trx_undo_create(
return(err);
}
- page_no = buf_frame_get_page_no(undo_page);
+ page_no = page_get_page_no(undo_page);
offset = trx_undo_header_create(undo_page, trx_id, mtr);
@@ -1546,22 +1608,21 @@ trx_undo_create(
/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
-/************************************************************************
-Reuses a cached undo log. */
+/********************************************************************//**
+Reuses a cached undo log.
+@return the undo log memory object, NULL if none cached */
static
trx_undo_t*
trx_undo_reuse_cached(
/*==================*/
- /* out: the undo log memory object, NULL if
- none cached */
- trx_t* trx, /* in: transaction */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ trx_t* trx, /*!< in: transaction */
+ trx_rseg_t* rseg, /*!< in: rollback segment memory object */
+ ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
TRX_UNDO_UPDATE */
- dulint trx_id, /* in: id of the trx for which the undo log
+ trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is used */
- XID* xid, /* in: X/Open XA transaction identification */
- mtr_t* mtr) /* in: mtr */
+ const XID* xid, /*!< in: X/Open XA transaction identification */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_undo_t* undo;
page_t* undo_page;
@@ -1599,7 +1660,8 @@ trx_undo_reuse_cached(
ut_error;
}
- undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+ undo_page = trx_undo_page_get(undo->space, undo->zip_size,
+ undo->hdr_page_no, mtr);
if (type == TRX_UNDO_INSERT) {
offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
@@ -1626,47 +1688,56 @@ trx_undo_reuse_cached(
return(undo);
}
-/**************************************************************************
+/**********************************************************************//**
Marks an undo log header as a header of a data dictionary operation
transaction. */
static
void
trx_undo_mark_as_dict_operation(
/*============================*/
- trx_t* trx, /* in: dict op transaction */
- trx_undo_t* undo, /* in: assigned undo log */
- mtr_t* mtr) /* in: mtr */
+ trx_t* trx, /*!< in: dict op transaction */
+ trx_undo_t* undo, /*!< in: assigned undo log */
+ mtr_t* mtr) /*!< in: mtr */
{
page_t* hdr_page;
- ut_a(trx->dict_operation);
+ hdr_page = trx_undo_page_get(undo->space, undo->zip_size,
+ undo->hdr_page_no, mtr);
- hdr_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ ut_error;
+ case TRX_DICT_OP_INDEX:
+ /* Do not discard the table on recovery. */
+ undo->table_id = ut_dulint_zero;
+ break;
+ case TRX_DICT_OP_TABLE:
+ undo->table_id = trx->table_id;
+ break;
+ }
mlog_write_ulint(hdr_page + undo->hdr_offset
+ TRX_UNDO_DICT_TRANS,
- trx->dict_operation, MLOG_1BYTE, mtr);
+ TRUE, MLOG_1BYTE, mtr);
mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
- trx->table_id, mtr);
+ undo->table_id, mtr);
- undo->dict_operation = trx->dict_operation;
- undo->table_id = trx->table_id;
+ undo->dict_operation = TRUE;
}
-/**************************************************************************
+/**********************************************************************//**
Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused. */
-
+undo log reused.
+@return DB_SUCCESS if undo log assign successful, possible error codes
+are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE
+DB_OUT_OF_MEMORY */
+UNIV_INTERN
ulint
trx_undo_assign_undo(
/*=================*/
- /* out: DB_SUCCESS if undo log assign
- successful, possible error codes are:
- DD_TOO_MANY_CONCURRENT_TRXS
- DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY*/
- trx_t* trx, /* in: transaction */
- ulint type) /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+ trx_t* trx, /*!< in: transaction */
+ ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
{
trx_rseg_t* rseg;
trx_undo_t* undo;
@@ -1707,7 +1778,7 @@ trx_undo_assign_undo(
trx->update_undo = undo;
}
- if (trx->dict_operation) {
+ if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
trx_undo_mark_as_dict_operation(trx, undo, &mtr);
}
@@ -1718,18 +1789,17 @@ func_exit:
return err;
}
-/**********************************************************************
-Sets the state of the undo log segment at a transaction finish. */
-
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction finish.
+@return undo log segment header page, x-latched */
+UNIV_INTERN
page_t*
trx_undo_set_state_at_finish(
/*=========================*/
- /* out: undo log segment header page,
- x-latched */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- trx_t* trx __attribute__((unused)), /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- mtr_t* mtr) /* in: mtr */
+ trx_rseg_t* rseg, /*!< in: rollback segment memory object */
+ trx_t* trx __attribute__((unused)), /*!< in: transaction */
+ trx_undo_t* undo, /*!< in: undo log memory copy */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_usegf_t* seg_hdr;
trx_upagef_t* page_hdr;
@@ -1748,7 +1818,8 @@ trx_undo_set_state_at_finish(
ut_error;
}
- undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+ undo_page = trx_undo_page_get(undo->space, undo->zip_size,
+ undo->hdr_page_no, mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1785,17 +1856,16 @@ trx_undo_set_state_at_finish(
return(undo_page);
}
-/**********************************************************************
-Sets the state of the undo log segment at a transaction prepare. */
-
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction prepare.
+@return undo log segment header page, x-latched */
+UNIV_INTERN
page_t*
trx_undo_set_state_at_prepare(
/*==========================*/
- /* out: undo log segment header page,
- x-latched */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- mtr_t* mtr) /* in: mtr */
+ trx_t* trx, /*!< in: transaction */
+ trx_undo_t* undo, /*!< in: undo log memory copy */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_usegf_t* seg_hdr;
trx_upagef_t* page_hdr;
@@ -1812,7 +1882,8 @@ trx_undo_set_state_at_prepare(
ut_error;
}
- undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+ undo_page = trx_undo_page_get(undo->space, undo->zip_size,
+ undo->hdr_page_no, mtr);
seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
@@ -1836,18 +1907,18 @@ trx_undo_set_state_at_prepare(
return(undo_page);
}
-/**************************************************************************
+/**********************************************************************//**
Adds the update undo log header as the first in the history list, and
frees the memory object, or puts it to the list of cached update undo log
segments. */
-
+UNIV_INTERN
void
trx_undo_update_cleanup(
/*====================*/
- trx_t* trx, /* in: trx owning the update undo log */
- page_t* undo_page, /* in: update undo log header page,
+ trx_t* trx, /*!< in: trx owning the update undo log */
+ page_t* undo_page, /*!< in: update undo log header page,
x-latched */
- mtr_t* mtr) /* in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
{
trx_rseg_t* rseg;
trx_undo_t* undo;
@@ -1873,15 +1944,15 @@ trx_undo_update_cleanup(
}
}
-/**********************************************************************
+/******************************************************************//**
Frees or caches an insert undo log after a transaction commit or rollback.
Knowledge of inserts is not needed after a commit or rollback, therefore
the data can be discarded. */
-
+UNIV_INTERN
void
trx_undo_insert_cleanup(
/*====================*/
- trx_t* trx) /* in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
{
trx_undo_t* undo;
trx_rseg_t* rseg;
@@ -1919,3 +1990,4 @@ trx_undo_insert_cleanup(
mutex_exit(&(rseg->mutex));
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/usr/usr0sess.c b/storage/innobase/usr/usr0sess.c
index 3740c05eaab..8087dcb4170 100644
--- a/storage/innobase/usr/usr0sess.c
+++ b/storage/innobase/usr/usr0sess.c
@@ -1,7 +1,24 @@
-/******************************************************
-Sessions
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
-(c) 1996 Innobase Oy
+/**************************************************//**
+@file usr/usr0sess.c
+Sessions
Created 6/25/1996 Heikki Tuuri
*******************************************************/
@@ -14,21 +31,13 @@ Created 6/25/1996 Heikki Tuuri
#include "trx0trx.h"
-/*************************************************************************
-Closes a session, freeing the memory occupied by it. */
-static
-void
-sess_close(
-/*=======*/
- sess_t* sess); /* in, own: session object */
-
-/*************************************************************************
-Opens a session. */
-
+/*********************************************************************//**
+Opens a session.
+@return own: session object */
+UNIV_INTERN
sess_t*
sess_open(void)
/*===========*/
- /* out, own: session object */
{
sess_t* sess;
@@ -45,37 +54,18 @@ sess_open(void)
return(sess);
}
-/*************************************************************************
+/*********************************************************************//**
Closes a session, freeing the memory occupied by it. */
-static
+UNIV_INTERN
void
sess_close(
/*=======*/
- sess_t* sess) /* in, own: session object */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(sess->trx == NULL);
-
- mem_free(sess);
-}
-
-/*************************************************************************
-Closes a session, freeing the memory occupied by it, if it is in a state
-where it should be closed. */
-
-ibool
-sess_try_close(
-/*===========*/
- /* out: TRUE if closed */
- sess_t* sess) /* in, own: session object */
+ sess_t* sess) /*!< in, own: session object */
{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (UT_LIST_GET_LEN(sess->graphs) == 0) {
- sess_close(sess);
+ ut_ad(!mutex_own(&kernel_mutex));
- return(TRUE);
- }
+ ut_a(UT_LIST_GET_LEN(sess->graphs) == 0);
- return(FALSE);
+ trx_free_for_background(sess->trx);
+ mem_free(sess);
}
diff --git a/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_gcc.c b/storage/innobase/ut/ut0auxconf_atomic_pthread_t_gcc.c
index 30de5aa6f17..30de5aa6f17 100644
--- a/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_gcc.c
+++ b/storage/innobase/ut/ut0auxconf_atomic_pthread_t_gcc.c
diff --git a/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c b/storage/innobase/ut/ut0auxconf_atomic_pthread_t_solaris.c
index 310603c7503..310603c7503 100644
--- a/storage/innodb_plugin/ut/ut0auxconf_atomic_pthread_t_solaris.c
+++ b/storage/innobase/ut/ut0auxconf_atomic_pthread_t_solaris.c
diff --git a/storage/innodb_plugin/ut/ut0auxconf_have_gcc_atomics.c b/storage/innobase/ut/ut0auxconf_have_gcc_atomics.c
index da5c13d7d79..da5c13d7d79 100644
--- a/storage/innodb_plugin/ut/ut0auxconf_have_gcc_atomics.c
+++ b/storage/innobase/ut/ut0auxconf_have_gcc_atomics.c
diff --git a/storage/innodb_plugin/ut/ut0auxconf_have_solaris_atomics.c b/storage/innobase/ut/ut0auxconf_have_solaris_atomics.c
index 7eb704edd4b..7eb704edd4b 100644
--- a/storage/innodb_plugin/ut/ut0auxconf_have_solaris_atomics.c
+++ b/storage/innobase/ut/ut0auxconf_have_solaris_atomics.c
diff --git a/storage/innodb_plugin/ut/ut0auxconf_pause.c b/storage/innobase/ut/ut0auxconf_pause.c
index 54d63bdd9bc..54d63bdd9bc 100644
--- a/storage/innodb_plugin/ut/ut0auxconf_pause.c
+++ b/storage/innobase/ut/ut0auxconf_pause.c
diff --git a/storage/innodb_plugin/ut/ut0auxconf_sizeof_pthread_t.c b/storage/innobase/ut/ut0auxconf_sizeof_pthread_t.c
index 96add4526ef..96add4526ef 100644
--- a/storage/innodb_plugin/ut/ut0auxconf_sizeof_pthread_t.c
+++ b/storage/innobase/ut/ut0auxconf_sizeof_pthread_t.c
diff --git a/storage/innobase/ut/ut0byte.c b/storage/innobase/ut/ut0byte.c
index b5467fde601..4e093f72ce2 100644
--- a/storage/innobase/ut/ut0byte.c
+++ b/storage/innobase/ut/ut0byte.c
@@ -1,7 +1,24 @@
-/*******************************************************************
-Byte utilities
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994, 1995 Innobase Oy
+*****************************************************************************/
+
+/***************************************************************//**
+@file ut/ut0byte.c
+Byte utilities
Created 5/11/1994 Heikki Tuuri
********************************************************************/
@@ -12,20 +29,27 @@ Created 5/11/1994 Heikki Tuuri
#include "ut0byte.ic"
#endif
-#include "ut0sort.h"
+/** Zero value for a dulint */
+UNIV_INTERN const dulint ut_dulint_zero = {0, 0};
-/* Zero value for a dulint */
-dulint ut_dulint_zero = {0, 0};
+/** Maximum value for a dulint */
+UNIV_INTERN const dulint ut_dulint_max = {0xFFFFFFFFUL, 0xFFFFFFFFUL};
-/* Maximum value for a dulint */
-dulint ut_dulint_max = {0xFFFFFFFFUL, 0xFFFFFFFFUL};
+#ifdef notdefined /* unused code */
+#include "ut0sort.h"
-/****************************************************************
+/************************************************************//**
Sort function for dulint arrays. */
+UNIV_INTERN
void
-ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high)
-/*===============================================================*/
+ut_dulint_sort(
+/*===========*/
+ dulint* arr, /*!< in/out: array to be sorted */
+ dulint* aux_arr,/*!< in/out: auxiliary array (same size as arr) */
+ ulint low, /*!< in: low bound of sort interval, inclusive */
+ ulint high) /*!< in: high bound of sort interval, noninclusive */
{
UT_SORT_FUNCTION_BODY(ut_dulint_sort, arr, aux_arr, low, high,
ut_dulint_cmp);
}
+#endif /* notdefined */
diff --git a/storage/innobase/ut/ut0dbg.c b/storage/innobase/ut/ut0dbg.c
index 8c4be190d77..4484e6c36de 100644
--- a/storage/innobase/ut/ut0dbg.c
+++ b/storage/innobase/ut/ut0dbg.c
@@ -1,50 +1,72 @@
-/*********************************************************************
-Debug utilities for Innobase.
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-(c) 1994, 1995 Innobase Oy
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*****************************************************************//**
+@file ut/ut0dbg.c
+Debug utilities for Innobase.
Created 1/30/1994 Heikki Tuuri
**********************************************************************/
#include "univ.i"
+#include "ut0dbg.h"
#if defined(__GNUC__) && (__GNUC__ > 2)
#else
-/* This is used to eliminate compiler warnings */
-ulint ut_dbg_zero = 0;
+/** This is used to eliminate compiler warnings */
+UNIV_INTERN ulint ut_dbg_zero = 0;
#endif
#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/* If this is set to TRUE all threads will stop into the next assertion
-and assert */
-ibool ut_dbg_stop_threads = FALSE;
+/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
+will stop at the next ut_a() or ut_ad(). */
+UNIV_INTERN ibool ut_dbg_stop_threads = FALSE;
#endif
#ifdef __NETWARE__
-ibool panic_shutdown = FALSE; /* This is set to TRUE when on NetWare there
- happens an InnoDB assertion failure or other
- fatal error condition that requires an
- immediate shutdown. */
+/** Flag for ignoring further assertion failures. This is set to TRUE
+when on NetWare there happens an InnoDB assertion failure or other
+fatal error condition that requires an immediate shutdown. */
+UNIV_INTERN ibool panic_shutdown = FALSE;
#elif !defined(UT_DBG_USE_ABORT)
-/* Null pointer used to generate memory trap */
-
-ulint* ut_dbg_null_ptr = NULL;
+/** A null pointer that will be dereferenced to trigger a memory trap */
+UNIV_INTERN ulint* ut_dbg_null_ptr = NULL;
#endif
-/*****************************************************************
+/*************************************************************//**
Report a failed assertion. */
-
+UNIV_INTERN
void
ut_dbg_assertion_failed(
/*====================*/
- const char* expr, /* in: the failed assertion (optional) */
- const char* file, /* in: source file containing the assertion */
- ulint line) /* in: line number of the assertion */
+ const char* expr, /*!< in: the failed assertion (optional) */
+ const char* file, /*!< in: source file containing the assertion */
+ ulint line) /*!< in: line number of the assertion */
{
ut_print_timestamp(stderr);
+#ifdef UNIV_HOTBACKUP
+ fprintf(stderr, " InnoDB: Assertion failure in file %s line %lu\n",
+ file, line);
+#else /* UNIV_HOTBACKUP */
fprintf(stderr,
" InnoDB: Assertion failure in thread %lu"
" in file %s line %lu\n",
os_thread_pf(os_thread_get_curr_id()), file, line);
+#endif /* UNIV_HOTBACKUP */
if (expr) {
fprintf(stderr,
"InnoDB: Failing assertion: %s\n", expr);
@@ -57,8 +79,7 @@ ut_dbg_assertion_failed(
" or crashes, even\n"
"InnoDB: immediately after the mysqld startup, there may be\n"
"InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
+ "InnoDB: " REFMAN "forcing-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
ut_dbg_stop_threads = TRUE;
@@ -66,9 +87,9 @@ ut_dbg_assertion_failed(
}
#ifdef __NETWARE__
-/*****************************************************************
+/*************************************************************//**
Shut down MySQL/InnoDB after assertion failure. */
-
+UNIV_INTERN
void
ut_dbg_panic(void)
/*==============*/
@@ -81,18 +102,86 @@ ut_dbg_panic(void)
}
#else /* __NETWARE__ */
# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/*****************************************************************
+/*************************************************************//**
Stop a thread after assertion failure. */
-
+UNIV_INTERN
void
ut_dbg_stop_thread(
/*===============*/
const char* file,
ulint line)
{
+#ifndef UNIV_HOTBACKUP
fprintf(stderr, "InnoDB: Thread %lu stopped in file %s line %lu\n",
os_thread_pf(os_thread_get_curr_id()), file, line);
os_thread_sleep(1000000000);
+#endif /* !UNIV_HOTBACKUP */
}
# endif
#endif /* __NETWARE__ */
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <unistd.h>
+
+#ifndef timersub
+#define timersub(a, b, r) \
+ do { \
+ (r)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (r)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+ if ((r)->tv_usec < 0) { \
+ (r)->tv_sec--; \
+ (r)->tv_usec += 1000000; \
+ } \
+ } while (0)
+#endif /* timersub */
+
+/*******************************************************************//**
+Resets a speedo (records the current time in it). */
+UNIV_INTERN
+void
+speedo_reset(
+/*=========*/
+ speedo_t* speedo) /*!< out: speedo */
+{
+ gettimeofday(&speedo->tv, NULL);
+
+ getrusage(RUSAGE_SELF, &speedo->ru);
+}
+
+/*******************************************************************//**
+Shows the time elapsed and usage statistics since the last reset of a
+speedo. */
+UNIV_INTERN
+void
+speedo_show(
+/*========*/
+ const speedo_t* speedo) /*!< in: speedo */
+{
+ struct rusage ru_now;
+ struct timeval tv_now;
+ struct timeval tv_diff;
+
+ getrusage(RUSAGE_SELF, &ru_now);
+
+ gettimeofday(&tv_now, NULL);
+
+#define PRINT_TIMEVAL(prefix, tvp) \
+ fprintf(stderr, "%s% 5ld.%06ld sec\n", \
+ prefix, (tvp)->tv_sec, (tvp)->tv_usec)
+
+ timersub(&tv_now, &speedo->tv, &tv_diff);
+ PRINT_TIMEVAL("real", &tv_diff);
+
+ timersub(&ru_now.ru_utime, &speedo->ru.ru_utime, &tv_diff);
+ PRINT_TIMEVAL("user", &tv_diff);
+
+ timersub(&ru_now.ru_stime, &speedo->ru.ru_stime, &tv_diff);
+ PRINT_TIMEVAL("sys ", &tv_diff);
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/ut/ut0list.c b/storage/innobase/ut/ut0list.c
index a0db7ff7b55..895a575c535 100644
--- a/storage/innobase/ut/ut0list.c
+++ b/storage/innobase/ut/ut0list.c
@@ -1,15 +1,40 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file ut/ut0list.c
+A double-linked list
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
#include "ut0list.h"
#ifdef UNIV_NONINL
#include "ut0list.ic"
#endif
-/********************************************************************
-Create a new list. */
-
+/****************************************************************//**
+Create a new list.
+@return list */
+UNIV_INTERN
ib_list_t*
ib_list_create(void)
/*=================*/
- /* out: list */
{
ib_list_t* list = mem_alloc(sizeof(ib_list_t));
@@ -20,15 +45,15 @@ ib_list_create(void)
return(list);
}
-/********************************************************************
+/****************************************************************//**
Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function. */
-
+lists created with this function.
+@return list */
+UNIV_INTERN
ib_list_t*
ib_list_create_heap(
/*================*/
- /* out: list */
- mem_heap_t* heap) /* in: memory heap to use */
+ mem_heap_t* heap) /*!< in: memory heap to use */
{
ib_list_t* list = mem_heap_alloc(heap, sizeof(ib_list_t));
@@ -39,13 +64,13 @@ ib_list_create_heap(
return(list);
}
-/********************************************************************
+/****************************************************************//**
Free a list. */
-
+UNIV_INTERN
void
ib_list_free(
/*=========*/
- ib_list_t* list) /* in: list */
+ ib_list_t* list) /*!< in: list */
{
ut_a(!list->is_heap_list);
@@ -56,46 +81,46 @@ ib_list_free(
mem_free(list);
}
-/********************************************************************
-Add the data to the start of the list. */
-
+/****************************************************************//**
+Add the data to the start of the list.
+@return new list node */
+UNIV_INTERN
ib_list_node_t*
ib_list_add_first(
/*==============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- void* data, /* in: data */
- mem_heap_t* heap) /* in: memory heap to use */
+ ib_list_t* list, /*!< in: list */
+ void* data, /*!< in: data */
+ mem_heap_t* heap) /*!< in: memory heap to use */
{
return(ib_list_add_after(list, ib_list_get_first(list), data, heap));
}
-/********************************************************************
-Add the data to the end of the list. */
-
+/****************************************************************//**
+Add the data to the end of the list.
+@return new list node */
+UNIV_INTERN
ib_list_node_t*
ib_list_add_last(
/*=============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- void* data, /* in: data */
- mem_heap_t* heap) /* in: memory heap to use */
+ ib_list_t* list, /*!< in: list */
+ void* data, /*!< in: data */
+ mem_heap_t* heap) /*!< in: memory heap to use */
{
return(ib_list_add_after(list, ib_list_get_last(list), data, heap));
}
-/********************************************************************
-Add the data after the indicated node. */
-
+/****************************************************************//**
+Add the data after the indicated node.
+@return new list node */
+UNIV_INTERN
ib_list_node_t*
ib_list_add_after(
/*==============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- ib_list_node_t* prev_node, /* in: node preceding new node (can
+ ib_list_t* list, /*!< in: list */
+ ib_list_node_t* prev_node, /*!< in: node preceding new node (can
be NULL) */
- void* data, /* in: data */
- mem_heap_t* heap) /* in: memory heap to use */
+ void* data, /*!< in: data */
+ mem_heap_t* heap) /*!< in: memory heap to use */
{
ib_list_node_t* node = mem_heap_alloc(heap, sizeof(ib_list_node_t));
@@ -138,14 +163,14 @@ ib_list_add_after(
return(node);
}
-/********************************************************************
+/****************************************************************//**
Remove the node from the list. */
-
+UNIV_INTERN
void
ib_list_remove(
/*===========*/
- ib_list_t* list, /* in: list */
- ib_list_node_t* node) /* in: node to remove */
+ ib_list_t* list, /*!< in: list */
+ ib_list_node_t* node) /*!< in: node to remove */
{
if (node->prev) {
node->prev->next = node->next;
diff --git a/storage/innobase/ut/ut0mem.c b/storage/innobase/ut/ut0mem.c
index b466a5f6872..35a325b9ccd 100644
--- a/storage/innobase/ut/ut0mem.c
+++ b/storage/innobase/ut/ut0mem.c
@@ -1,7 +1,24 @@
-/************************************************************************
-Memory primitives
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1994, 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file ut/ut0mem.c
+Memory primitives
Created 5/11/1994 Heikki Tuuri
*************************************************************************/
@@ -12,70 +29,96 @@ Created 5/11/1994 Heikki Tuuri
#include "ut0mem.ic"
#endif
-#include "mem0mem.h"
-#include "os0sync.h"
-#include "os0thread.h"
+#ifndef UNIV_HOTBACKUP
+# include "os0thread.h"
+# include "srv0srv.h"
+
+#include <stdlib.h>
-/* This struct is placed first in every allocated memory block */
+/** This struct is placed first in every allocated memory block */
typedef struct ut_mem_block_struct ut_mem_block_t;
-/* The total amount of memory currently allocated from the OS with malloc */
-ulint ut_total_allocated_memory = 0;
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large() or malloc(). Does not count malloc()
+if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
+UNIV_INTERN ulint ut_total_allocated_memory = 0;
+
+/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
+UNIV_INTERN os_fast_mutex_t ut_list_mutex;
+/** Dynamically allocated memory block */
struct ut_mem_block_struct{
UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
- /* mem block list node */
- ulint size; /* size of allocated memory */
- ulint magic_n;
+ /*!< mem block list node */
+ ulint size; /*!< size of allocated memory */
+ ulint magic_n;/*!< magic number (UT_MEM_MAGIC_N) */
};
+/** The value of ut_mem_block_struct::magic_n. Used in detecting
+memory corruption. */
#define UT_MEM_MAGIC_N 1601650166
-/* List of all memory blocks allocated from the operating system
-with malloc */
-UT_LIST_BASE_NODE_T(ut_mem_block_t) ut_mem_block_list;
+/** List of all memory blocks allocated from the operating system
+with malloc. Protected by ut_list_mutex. */
+static UT_LIST_BASE_NODE_T(ut_mem_block_t) ut_mem_block_list;
-os_fast_mutex_t ut_list_mutex; /* this protects the list */
+/** Flag: has ut_mem_block_list been initialized? */
+static ibool ut_mem_block_list_inited = FALSE;
-ibool ut_mem_block_list_inited = FALSE;
+/** A dummy pointer for generating a null pointer exception in
+ut_malloc_low() */
+static ulint* ut_mem_null_ptr = NULL;
-ulint* ut_mem_null_ptr = NULL;
-
-/**************************************************************************
+/**********************************************************************//**
Initializes the mem block list at database startup. */
-static
+UNIV_INTERN
void
-ut_mem_block_list_init(void)
-/*========================*/
+ut_mem_init(void)
+/*=============*/
{
+ ut_a(!ut_mem_block_list_inited);
os_fast_mutex_init(&ut_list_mutex);
UT_LIST_INIT(ut_mem_block_list);
ut_mem_block_list_inited = TRUE;
}
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************************
+/**********************************************************************//**
Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE. */
-
+defined and set_to_zero is TRUE.
+@return own: allocated memory */
+UNIV_INTERN
void*
ut_malloc_low(
/*==========*/
- /* out, own: allocated memory */
- ulint n, /* in: number of bytes to allocate */
- ibool set_to_zero, /* in: TRUE if allocated memory should be
+ ulint n, /*!< in: number of bytes to allocate */
+ ibool set_to_zero, /*!< in: TRUE if allocated memory should be
set to zero if UNIV_SET_MEM_TO_ZERO is
defined */
- ibool assert_on_error)/* in: if TRUE, we crash mysqld if the
+ ibool assert_on_error)/*!< in: if TRUE, we crash mysqld if the
memory cannot be allocated */
{
- ulint retry_count = 0;
+#ifndef UNIV_HOTBACKUP
+ ulint retry_count;
void* ret;
- ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
+ if (UNIV_LIKELY(srv_use_sys_malloc)) {
+ ret = malloc(n);
+ ut_a(ret || !assert_on_error);
- if (!ut_mem_block_list_inited) {
- ut_mem_block_list_init();
+#ifdef UNIV_SET_MEM_TO_ZERO
+ if (set_to_zero) {
+ memset(ret, '\0', n);
+ UNIV_MEM_ALLOC(ret, n);
+ }
+#endif
+ return(ret);
}
+
+ ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
+ ut_a(ut_mem_block_list_inited);
+
+ retry_count = 0;
retry:
os_fast_mutex_lock(&ut_list_mutex);
@@ -174,31 +217,47 @@ retry:
os_fast_mutex_unlock(&ut_list_mutex);
return((void*)((byte*)ret + sizeof(ut_mem_block_t)));
+#else /* !UNIV_HOTBACKUP */
+ void* ret = malloc(n);
+ ut_a(ret || !assert_on_error);
+
+# ifdef UNIV_SET_MEM_TO_ZERO
+ if (set_to_zero) {
+ memset(ret, '\0', n);
+ }
+# endif
+ return(ret);
+#endif /* !UNIV_HOTBACKUP */
}
-/**************************************************************************
+/**********************************************************************//**
Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined. */
-
+defined.
+@return own: allocated memory */
+UNIV_INTERN
void*
ut_malloc(
/*======*/
- /* out, own: allocated memory */
- ulint n) /* in: number of bytes to allocate */
+ ulint n) /*!< in: number of bytes to allocate */
{
+#ifndef UNIV_HOTBACKUP
return(ut_malloc_low(n, TRUE, TRUE));
+#else /* !UNIV_HOTBACKUP */
+ return(malloc(n));
+#endif /* !UNIV_HOTBACKUP */
}
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails. */
-
+stderr a message if fails.
+@return TRUE if succeeded */
+UNIV_INTERN
ibool
ut_test_malloc(
/*===========*/
- /* out: TRUE if succeeded */
- ulint n) /* in: try to allocate this many bytes */
+ ulint n) /*!< in: try to allocate this many bytes */
{
void* ret;
@@ -228,17 +287,24 @@ ut_test_malloc(
return(TRUE);
}
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************************
+/**********************************************************************//**
Frees a memory block allocated with ut_malloc. */
-
+UNIV_INTERN
void
ut_free(
/*====*/
- void* ptr) /* in, own: memory block */
+ void* ptr) /*!< in, own: memory block */
{
+#ifndef UNIV_HOTBACKUP
ut_mem_block_t* block;
+ if (UNIV_LIKELY(srv_use_sys_malloc)) {
+ free(ptr);
+ return;
+ }
+
block = (ut_mem_block_t*)((byte*)ptr - sizeof(ut_mem_block_t));
os_fast_mutex_lock(&ut_list_mutex);
@@ -252,9 +318,13 @@ ut_free(
free(block);
os_fast_mutex_unlock(&ut_list_mutex);
+#else /* !UNIV_HOTBACKUP */
+ free(ptr);
+#endif /* !UNIV_HOTBACKUP */
}
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
use this function because the allocation functions in mem0mem.h are the
recommended ones in InnoDB.
@@ -263,7 +333,7 @@ man realloc in Linux, 2004:
realloc() changes the size of the memory block pointed to
by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem­
+ the minimum of the old and new sizes; newly allocated mem-
ory will be uninitialized. If ptr is NULL, the call is
equivalent to malloc(size); if size is equal to zero, the
call is equivalent to free(ptr). Unless ptr is NULL, it
@@ -277,20 +347,24 @@ RETURN VALUE
size was equal to 0, either NULL or a pointer suitable to
be passed to free() is returned. If realloc() fails the
original block is left untouched - it is not freed or
- moved. */
-
+ moved.
+@return own: pointer to new mem block or NULL */
+UNIV_INTERN
void*
ut_realloc(
/*=======*/
- /* out, own: pointer to new mem block or NULL */
- void* ptr, /* in: pointer to old block or NULL */
- ulint size) /* in: desired size */
+ void* ptr, /*!< in: pointer to old block or NULL */
+ ulint size) /*!< in: desired size */
{
ut_mem_block_t* block;
ulint old_size;
ulint min_size;
void* new_ptr;
+ if (UNIV_LIKELY(srv_use_sys_malloc)) {
+ return(realloc(ptr, size));
+ }
+
if (ptr == NULL) {
return(ut_malloc(size));
@@ -329,15 +403,17 @@ ut_realloc(
return(new_ptr);
}
-/**************************************************************************
+/**********************************************************************//**
Frees in shutdown all allocated memory not freed yet. */
-
+UNIV_INTERN
void
ut_free_all_mem(void)
/*=================*/
{
ut_mem_block_t* block;
+ ut_a(ut_mem_block_list_inited);
+ ut_mem_block_list_inited = FALSE;
os_fast_mutex_free(&ut_list_mutex);
while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) {
@@ -357,20 +433,23 @@ ut_free_all_mem(void)
" total allocated memory is %lu\n",
(ulong) ut_total_allocated_memory);
}
+
+ ut_mem_block_list_inited = FALSE;
}
+#endif /* !UNIV_HOTBACKUP */
-/**************************************************************************
+/**********************************************************************//**
Copies up to size - 1 characters from the NUL-terminated string src to
dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size. */
-
+occurred if the return value >= size.
+@return strlen(src) */
+UNIV_INTERN
ulint
ut_strlcpy(
/*=======*/
- /* out: strlen(src) */
- char* dst, /* in: destination buffer */
- const char* src, /* in: source buffer */
- ulint size) /* in: size of destination buffer */
+ char* dst, /*!< in: destination buffer */
+ const char* src, /*!< in: source buffer */
+ ulint size) /*!< in: size of destination buffer */
{
ulint src_size = strlen(src);
@@ -384,17 +463,17 @@ ut_strlcpy(
return(src_size);
}
-/**************************************************************************
+/**********************************************************************//**
Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first. */
-
+(size - 1) bytes of src, not the first.
+@return strlen(src) */
+UNIV_INTERN
ulint
ut_strlcpy_rev(
/*===========*/
- /* out: strlen(src) */
- char* dst, /* in: destination buffer */
- const char* src, /* in: source buffer */
- ulint size) /* in: size of destination buffer */
+ char* dst, /*!< in: destination buffer */
+ const char* src, /*!< in: source buffer */
+ ulint size) /*!< in: size of destination buffer */
{
ulint src_size = strlen(src);
@@ -407,18 +486,18 @@ ut_strlcpy_rev(
return(src_size);
}
-/**************************************************************************
+/**********************************************************************//**
Make a quoted copy of a NUL-terminated string. Leading and trailing
quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq(). */
-
+See also ut_strlenq() and ut_memcpyq().
+@return pointer to end of dest */
+UNIV_INTERN
char*
ut_strcpyq(
/*=======*/
- /* out: pointer to end of dest */
- char* dest, /* in: output buffer */
- char q, /* in: the quote character */
- const char* src) /* in: null-terminated string */
+ char* dest, /*!< in: output buffer */
+ char q, /*!< in: the quote character */
+ const char* src) /*!< in: null-terminated string */
{
while (*src) {
if ((*dest++ = *src++) == q) {
@@ -429,19 +508,19 @@ ut_strcpyq(
return(dest);
}
-/**************************************************************************
+/**********************************************************************//**
Make a quoted copy of a fixed-length string. Leading and trailing
quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq(). */
-
+See also ut_strlenq() and ut_strcpyq().
+@return pointer to end of dest */
+UNIV_INTERN
char*
ut_memcpyq(
/*=======*/
- /* out: pointer to end of dest */
- char* dest, /* in: output buffer */
- char q, /* in: the quote character */
- const char* src, /* in: string to be quoted */
- ulint len) /* in: length of src */
+ char* dest, /*!< in: output buffer */
+ char q, /*!< in: the quote character */
+ const char* src, /*!< in: string to be quoted */
+ ulint len) /*!< in: length of src */
{
const char* srcend = src + len;
@@ -454,16 +533,17 @@ ut_memcpyq(
return(dest);
}
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once. */
-
+are only counted once.
+@return the number of times s2 occurs in s1 */
+UNIV_INTERN
ulint
ut_strcount(
/*========*/
- /* out: the number of times s2 occurs in s1 */
- const char* s1, /* in: string to search in */
- const char* s2) /* in: string to search for */
+ const char* s1, /*!< in: string to search in */
+ const char* s2) /*!< in: string to search for */
{
ulint count = 0;
ulint len = strlen(s2);
@@ -488,18 +568,17 @@ ut_strcount(
return(count);
}
-/**************************************************************************
+/**********************************************************************//**
Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once. */
-
-char *
+are only replaced once.
+@return own: modified string, must be freed with mem_free() */
+UNIV_INTERN
+char*
ut_strreplace(
/*==========*/
- /* out, own: modified string, must be
- freed with mem_free() */
- const char* str, /* in: string to operate on */
- const char* s1, /* in: string to replace */
- const char* s2) /* in: string to replace s1 with */
+ const char* str, /*!< in: string to operate on */
+ const char* s1, /*!< in: string to replace */
+ const char* s2) /*!< in: string to replace s1 with */
{
char* new_str;
char* ptr;
@@ -546,3 +625,84 @@ ut_strreplace(
return(new_str);
}
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+void
+test_ut_str_sql_format()
+{
+ char buf[128];
+ ulint ret;
+
+#define CALL_AND_TEST(str, str_len, buf, buf_size, ret_expected, buf_expected)\
+ do {\
+ ibool ok = TRUE;\
+ memset(buf, 'x', 10);\
+ buf[10] = '\0';\
+ fprintf(stderr, "TESTING \"%s\", %lu, %lu\n",\
+ str, (ulint) str_len, (ulint) buf_size);\
+ ret = ut_str_sql_format(str, str_len, buf, buf_size);\
+ if (ret != ret_expected) {\
+ fprintf(stderr, "expected ret %lu, got %lu\n",\
+ (ulint) ret_expected, ret);\
+ ok = FALSE;\
+ }\
+ if (strcmp((char*) buf, buf_expected) != 0) {\
+ fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
+ buf_expected, buf);\
+ ok = FALSE;\
+ }\
+ if (ok) {\
+ fprintf(stderr, "OK: %lu, \"%s\"\n\n",\
+ (ulint) ret, buf);\
+ } else {\
+ return;\
+ }\
+ } while (0)
+
+ CALL_AND_TEST("abcd", 4, buf, 0, 0, "xxxxxxxxxx");
+
+ CALL_AND_TEST("abcd", 4, buf, 1, 1, "");
+
+ CALL_AND_TEST("abcd", 4, buf, 2, 1, "");
+
+ CALL_AND_TEST("abcd", 0, buf, 3, 3, "''");
+ CALL_AND_TEST("abcd", 1, buf, 3, 1, "");
+ CALL_AND_TEST("abcd", 2, buf, 3, 1, "");
+ CALL_AND_TEST("abcd", 3, buf, 3, 1, "");
+ CALL_AND_TEST("abcd", 4, buf, 3, 1, "");
+
+ CALL_AND_TEST("abcd", 0, buf, 4, 3, "''");
+ CALL_AND_TEST("abcd", 1, buf, 4, 4, "'a'");
+ CALL_AND_TEST("abcd", 2, buf, 4, 4, "'a'");
+ CALL_AND_TEST("abcd", 3, buf, 4, 4, "'a'");
+ CALL_AND_TEST("abcd", 4, buf, 4, 4, "'a'");
+ CALL_AND_TEST("abcde", 5, buf, 4, 4, "'a'");
+ CALL_AND_TEST("'", 1, buf, 4, 3, "''");
+ CALL_AND_TEST("''", 2, buf, 4, 3, "''");
+ CALL_AND_TEST("a'", 2, buf, 4, 4, "'a'");
+ CALL_AND_TEST("'a", 2, buf, 4, 3, "''");
+ CALL_AND_TEST("ab", 2, buf, 4, 4, "'a'");
+
+ CALL_AND_TEST("abcdef", 0, buf, 5, 3, "''");
+ CALL_AND_TEST("abcdef", 1, buf, 5, 4, "'a'");
+ CALL_AND_TEST("abcdef", 2, buf, 5, 5, "'ab'");
+ CALL_AND_TEST("abcdef", 3, buf, 5, 5, "'ab'");
+ CALL_AND_TEST("abcdef", 4, buf, 5, 5, "'ab'");
+ CALL_AND_TEST("abcdef", 5, buf, 5, 5, "'ab'");
+ CALL_AND_TEST("abcdef", 6, buf, 5, 5, "'ab'");
+ CALL_AND_TEST("'", 1, buf, 5, 5, "''''");
+ CALL_AND_TEST("''", 2, buf, 5, 5, "''''");
+ CALL_AND_TEST("a'", 2, buf, 5, 4, "'a'");
+ CALL_AND_TEST("'a", 2, buf, 5, 5, "''''");
+ CALL_AND_TEST("ab", 2, buf, 5, 5, "'ab'");
+ CALL_AND_TEST("abc", 3, buf, 5, 5, "'ab'");
+
+ CALL_AND_TEST("ab", 2, buf, 6, 5, "'ab'");
+
+ CALL_AND_TEST("a'b'c", 5, buf, 32, 10, "'a''b''c'");
+ CALL_AND_TEST("a'b'c'", 6, buf, 32, 12, "'a''b''c'''");
+}
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/ut/ut0rnd.c b/storage/innobase/ut/ut0rnd.c
index 016809e0474..cefd0990ecc 100644
--- a/storage/innobase/ut/ut0rnd.c
+++ b/storage/innobase/ut/ut0rnd.c
@@ -1,7 +1,24 @@
-/*******************************************************************
-Random numbers and hashing
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
-(c) 1994, 1995 Innobase Oy
+*****************************************************************************/
+
+/***************************************************************//**
+@file ut/ut0rnd.c
+Random numbers and hashing
Created 5/11/1994 Heikki Tuuri
********************************************************************/
@@ -12,23 +29,25 @@ Created 5/11/1994 Heikki Tuuri
#include "ut0rnd.ic"
#endif
-/* These random numbers are used in ut_find_prime */
+/** These random numbers are used in ut_find_prime */
+/*@{*/
#define UT_RANDOM_1 1.0412321
#define UT_RANDOM_2 1.1131347
#define UT_RANDOM_3 1.0132677
+/*@}*/
+/** Seed value of ut_rnd_gen_ulint(). */
+UNIV_INTERN ulint ut_rnd_ulint_counter = 65654363;
-ulint ut_rnd_ulint_counter = 65654363;
-
-/***************************************************************
+/***********************************************************//**
Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2. */
-
+The prime is chosen so that it is not near any power of 2.
+@return prime */
+UNIV_INTERN
ulint
ut_find_prime(
/*==========*/
- /* out: prime */
- ulint n) /* in: positive number > 100 */
+ ulint n) /*!< in: positive number > 100 */
{
ulint pow2;
ulint i;
diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c
index 3b26d83bbb9..498873e290a 100644
--- a/storage/innobase/ut/ut0ut.c
+++ b/storage/innobase/ut/ut0ut.c
@@ -1,7 +1,31 @@
-/*******************************************************************
-Various utilities for Innobase.
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2009, Sun Microsystems, Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
-(c) 1994, 1995 Innobase Oy
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/***************************************************************//**
+@file ut/ut0ut.c
+Various utilities for Innobase.
Created 5/11/1994 Heikki Tuuri
********************************************************************/
@@ -16,33 +40,35 @@ Created 5/11/1994 Heikki Tuuri
#include <string.h>
#include <ctype.h>
-#include "ut0sort.h"
-#include "trx0trx.h"
-#include "ha_prototypes.h"
-#include "mysql_com.h" /* NAME_LEN */
+#ifndef UNIV_HOTBACKUP
+# include "trx0trx.h"
+# include "ha_prototypes.h"
+# include "mysql_com.h" /* NAME_LEN */
+#endif /* UNIV_HOTBACKUP */
-ibool ut_always_false = FALSE;
+/** A constant to prevent the compiler from optimizing ut_delay() away. */
+UNIV_INTERN ibool ut_always_false = FALSE;
#ifdef __WIN__
-/*********************************************************************
+/*****************************************************************//**
NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix
epoch starts from 1970/1/1. For selection of constant see:
http://support.microsoft.com/kb/167296/ */
-#define WIN_TO_UNIX_DELTA_USEC ((ib_longlong) 11644473600000000ULL)
+#define WIN_TO_UNIX_DELTA_USEC ((ib_int64_t) 11644473600000000ULL)
-/*********************************************************************
-This is the Windows version of gettimeofday(2).*/
+/*****************************************************************//**
+This is the Windows version of gettimeofday(2).
+@return 0 if all OK else -1 */
static
int
ut_gettimeofday(
/*============*/
- /* out: 0 if all OK else -1 */
- struct timeval* tv, /* out: Values are relative to Unix epoch */
- void* tz) /* in: not used */
+ struct timeval* tv, /*!< out: Values are relative to Unix epoch */
+ void* tz) /*!< in: not used */
{
FILETIME ft;
- ib_longlong tm;
+ ib_int64_t tm;
if (!tv) {
errno = EINVAL;
@@ -51,7 +77,7 @@ ut_gettimeofday(
GetSystemTimeAsFileTime(&ft);
- tm = (ib_longlong) ft.dwHighDateTime << 32;
+ tm = (ib_int64_t) ft.dwHighDateTime << 32;
tm |= ft.dwLowDateTime;
ut_a(tm >= 0); /* If tm wraps over to negative, the quotient / 10
@@ -69,42 +95,36 @@ ut_gettimeofday(
return(0);
}
#else
+/** An alias for gettimeofday(2). On Microsoft Windows, we have to
+reimplement this function. */
#define ut_gettimeofday gettimeofday
#endif
-/************************************************************
+/********************************************************//**
Gets the high 32 bits in a ulint. That is makes a shift >> 32,
but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion. */
-
+we do this by a special conversion.
+@return a >> 32 */
+UNIV_INTERN
ulint
ut_get_high32(
/*==========*/
- /* out: a >> 32 */
- ulint a) /* in: ulint */
+ ulint a) /*!< in: ulint */
{
- ib_longlong i;
+ ib_int64_t i;
- i = (ib_longlong)a;
+ i = (ib_int64_t)a;
i = i >> 32;
return((ulint)i);
}
-/************************************************************
-The following function returns elapsed CPU time in milliseconds. */
-
-ulint
-ut_clock(void)
-{
- return((clock() * 1000) / CLOCKS_PER_SEC);
-}
-
-/**************************************************************
+/**********************************************************//**
Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime. */
-
+the only way to manipulate it is to use the function ut_difftime.
+@return system time */
+UNIV_INTERN
ib_time_t
ut_time(void)
/*=========*/
@@ -112,18 +132,19 @@ ut_time(void)
return(time(NULL));
}
-/**************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
Returns system time.
Upon successful completion, the value 0 is returned; otherwise the
value -1 is returned and the global variable errno is set to indicate the
-error. */
-
+error.
+@return 0 on success, -1 otherwise */
+UNIV_INTERN
int
ut_usectime(
/*========*/
- /* out: 0 on success, -1 otherwise */
- ulint* sec, /* out: seconds since the Epoch */
- ulint* ms) /* out: microseconds since the Epoch+*sec */
+ ulint* sec, /*!< out: seconds since the Epoch */
+ ulint* ms) /*!< out: microseconds since the Epoch+*sec */
{
struct timeval tv;
int ret;
@@ -154,26 +175,69 @@ ut_usectime(
return(ret);
}
-/**************************************************************
-Returns the difference of two times in seconds. */
+/**********************************************************//**
+Returns the number of microseconds since epoch. Similar to
+time(3), the return value is also stored in *tloc, provided
+that tloc is non-NULL.
+@return us since epoch */
+UNIV_INTERN
+ullint
+ut_time_us(
+/*=======*/
+ ullint* tloc) /*!< out: us since epoch, if non-NULL */
+{
+ struct timeval tv;
+ ullint us;
+
+ ut_gettimeofday(&tv, NULL);
+
+ us = (ullint) tv.tv_sec * 1000000 + tv.tv_usec;
+
+ if (tloc != NULL) {
+ *tloc = us;
+ }
+
+ return(us);
+}
+
+/**********************************************************//**
+Returns the number of milliseconds since some epoch. The
+value may wrap around. It should only be used for heuristic
+purposes.
+@return ms since epoch */
+UNIV_INTERN
+ulint
+ut_time_ms(void)
+/*============*/
+{
+ struct timeval tv;
+
+ ut_gettimeofday(&tv, NULL);
+ return((ulint) tv.tv_sec * 1000 + tv.tv_usec / 1000);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Returns the difference of two times in seconds.
+@return time2 - time1 expressed in seconds */
+UNIV_INTERN
double
ut_difftime(
/*========*/
- /* out: time2 - time1 expressed in seconds */
- ib_time_t time2, /* in: time */
- ib_time_t time1) /* in: time */
+ ib_time_t time2, /*!< in: time */
+ ib_time_t time1) /*!< in: time */
{
return(difftime(time2, time1));
}
-/**************************************************************
+/**********************************************************//**
Prints a timestamp to a file. */
-
+UNIV_INTERN
void
ut_print_timestamp(
/*===============*/
- FILE* file) /* in: file where to print */
+ FILE* file) /*!< in: file where to print */
{
#ifdef __WIN__
SYSTEMTIME cal_tm;
@@ -210,13 +274,13 @@ ut_print_timestamp(
#endif
}
-/**************************************************************
+/**********************************************************//**
Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-
+UNIV_INTERN
void
ut_sprintf_timestamp(
/*=================*/
- char* buf) /* in: buffer where to sprintf */
+ char* buf) /*!< in: buffer where to sprintf */
{
#ifdef __WIN__
SYSTEMTIME cal_tm;
@@ -253,14 +317,15 @@ ut_sprintf_timestamp(
#endif
}
-/**************************************************************
+#ifdef UNIV_HOTBACKUP
+/**********************************************************//**
Sprintfs a timestamp to a buffer with no spaces and with ':' characters
replaced by '_'. */
-
+UNIV_INTERN
void
ut_sprintf_timestamp_without_extra_chars(
/*=====================================*/
- char* buf) /* in: buffer where to sprintf */
+ char* buf) /*!< in: buffer where to sprintf */
{
#ifdef __WIN__
SYSTEMTIME cal_tm;
@@ -297,15 +362,15 @@ ut_sprintf_timestamp_without_extra_chars(
#endif
}
-/**************************************************************
+/**********************************************************//**
Returns current year, month, day. */
-
+UNIV_INTERN
void
ut_get_year_month_day(
/*==================*/
- ulint* year, /* out: current year */
- ulint* month, /* out: month */
- ulint* day) /* out: day */
+ ulint* year, /*!< out: current year */
+ ulint* month, /*!< out: month */
+ ulint* day) /*!< out: day */
{
#ifdef __WIN__
SYSTEMTIME cal_tm;
@@ -333,16 +398,18 @@ ut_get_year_month_day(
*day = (ulint)cal_tm_ptr->tm_mday;
#endif
}
+#endif /* UNIV_HOTBACKUP */
-/*****************************************************************
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++. */
-
+in microseconds on 100 MHz Pentium + Visual C++.
+@return dummy value */
+UNIV_INTERN
ulint
ut_delay(
/*=====*/
- /* out: dummy value */
- ulint delay) /* in: delay in microseconds on 100 MHz Pentium */
+ ulint delay) /*!< in: delay in microseconds on 100 MHz Pentium */
{
ulint i, j;
@@ -350,6 +417,7 @@ ut_delay(
for (i = 0; i < delay * 50; i++) {
j += i;
+ UT_RELAX_CPU();
}
if (ut_always_false) {
@@ -358,16 +426,17 @@ ut_delay(
return(j);
}
+#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************
+/*************************************************************//**
Prints the contents of a memory buffer in hex and ascii. */
-
+UNIV_INTERN
void
ut_print_buf(
/*=========*/
- FILE* file, /* in: file where to print */
- const void* buf, /* in: memory buffer */
- ulint len) /* in: length of the buffer */
+ FILE* file, /*!< in: file where to print */
+ const void* buf, /*!< in: memory buffer */
+ ulint len) /*!< in: length of the buffer */
{
const byte* data;
ulint i;
@@ -392,25 +461,14 @@ ut_print_buf(
putc(';', file);
}
-/****************************************************************
-Sort function for ulint arrays. */
-
-void
-ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high)
-/*============================================================*/
-{
- UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
- ut_ulint_cmp);
-}
-
-/*****************************************************************
-Calculates fast the number rounded up to the nearest power of 2. */
-
+/*************************************************************//**
+Calculates fast the number rounded up to the nearest power of 2.
+@return first power of 2 which is >= n */
+UNIV_INTERN
ulint
ut_2_power_up(
/*==========*/
- /* out: first power of 2 which is >= n */
- ulint n) /* in: number != 0 */
+ ulint n) /*!< in: number != 0 */
{
ulint res;
@@ -425,14 +483,14 @@ ut_2_power_up(
return(res);
}
-/**************************************************************************
+/**********************************************************************//**
Outputs a NUL-terminated file name, quoted with apostrophes. */
-
+UNIV_INTERN
void
ut_print_filename(
/*==============*/
- FILE* f, /* in: output stream */
- const char* name) /* in: name to print */
+ FILE* f, /*!< in: output stream */
+ const char* name) /*!< in: name to print */
{
putc('\'', f);
for (;;) {
@@ -450,40 +508,40 @@ ut_print_filename(
done:
putc('\'', f);
}
-
-/**************************************************************************
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
Outputs a fixed-length string, quoted as an SQL identifier.
If the string contains a slash '/', the string will be
output as two identifiers separated by a period (.),
as in SQL database_name.identifier. */
-
+UNIV_INTERN
void
ut_print_name(
/*==========*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction */
- ibool table_id,/* in: TRUE=print a table name,
+ FILE* f, /*!< in: output stream */
+ trx_t* trx, /*!< in: transaction */
+ ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
- const char* name) /* in: name to print */
+ const char* name) /*!< in: name to print */
{
ut_print_namel(f, trx, table_id, name, strlen(name));
}
-/**************************************************************************
+/**********************************************************************//**
Outputs a fixed-length string, quoted as an SQL identifier.
If the string contains a slash '/', the string will be
output as two identifiers separated by a period (.),
as in SQL database_name.identifier. */
-
+UNIV_INTERN
void
ut_print_namel(
/*===========*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction (NULL=no quotes) */
- ibool table_id,/* in: TRUE=print a table name,
+ FILE* f, /*!< in: output stream */
+ trx_t* trx, /*!< in: transaction (NULL=no quotes) */
+ ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
- const char* name, /* in: name to print */
- ulint namelen)/* in: length of name */
+ const char* name, /*!< in: name to print */
+ ulint namelen)/*!< in: length of name */
{
/* 2 * NAME_LEN for database and table name,
and some slack for the #mysql50# prefix and quotes */
@@ -498,14 +556,14 @@ ut_print_namel(
fwrite(buf, 1, bufend - buf, f);
}
-/**************************************************************************
+/**********************************************************************//**
Catenate files. */
-
+UNIV_INTERN
void
ut_copy_file(
/*=========*/
- FILE* dest, /* in: output file */
- FILE* src) /* in: input file to be appended to output */
+ FILE* dest, /*!< in: output file */
+ FILE* src) /*!< in: input file to be appended to output */
{
long len = ftell(src);
char buf[4096];
@@ -523,22 +581,23 @@ ut_copy_file(
}
} while (len > 0);
}
-
-/**************************************************************************
-snprintf(). */
+#endif /* !UNIV_HOTBACKUP */
#ifdef __WIN__
-#include <stdarg.h>
+# include <stdarg.h>
+/**********************************************************************//**
+A substitute for snprintf(3), formatted output conversion into
+a limited buffer.
+@return number of characters that would have been printed if the size
+were unlimited, not including the terminating '\0'. */
+UNIV_INTERN
int
ut_snprintf(
- /* out: number of characters that would
- have been printed if the size were
- unlimited, not including the terminating
- '\0'. */
- char* str, /* out: string */
- size_t size, /* in: str size */
- const char* fmt, /* in: format */
- ...) /* in: format values */
+/*========*/
+ char* str, /*!< out: string */
+ size_t size, /*!< in: str size */
+ const char* fmt, /*!< in: format */
+ ...) /*!< in: format values */
{
int res;
va_list ap1;
diff --git a/storage/innobase/ut/ut0vec.c b/storage/innobase/ut/ut0vec.c
index e0d3e84d4a2..45f2bc9771f 100644
--- a/storage/innobase/ut/ut0vec.c
+++ b/storage/innobase/ut/ut0vec.c
@@ -1,18 +1,43 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file ut/ut0vec.c
+A vector of pointers to data items
+
+Created 4/6/2006 Osku Salerma
+************************************************************************/
+
#include "ut0vec.h"
#ifdef UNIV_NONINL
#include "ut0vec.ic"
#endif
#include <string.h>
-/********************************************************************
-Create a new vector with the given initial size. */
-
+/****************************************************************//**
+Create a new vector with the given initial size.
+@return vector */
+UNIV_INTERN
ib_vector_t*
ib_vector_create(
/*=============*/
- /* out: vector */
- mem_heap_t* heap, /* in: heap */
- ulint size) /* in: initial size */
+ mem_heap_t* heap, /*!< in: heap */
+ ulint size) /*!< in: initial size */
{
ib_vector_t* vec;
@@ -28,14 +53,14 @@ ib_vector_create(
return(vec);
}
-/********************************************************************
+/****************************************************************//**
Push a new element to the vector, increasing its size if necessary. */
-
+UNIV_INTERN
void
ib_vector_push(
/*===========*/
- ib_vector_t* vec, /* in: vector */
- void* elem) /* in: data element */
+ ib_vector_t* vec, /*!< in: vector */
+ void* elem) /*!< in: data element */
{
if (vec->used >= vec->total) {
void** new_data;
diff --git a/storage/innobase/ut/ut0wqueue.c b/storage/innobase/ut/ut0wqueue.c
index 7e090e89a4f..5220d1e17f4 100644
--- a/storage/innobase/ut/ut0wqueue.c
+++ b/storage/innobase/ut/ut0wqueue.c
@@ -1,12 +1,37 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
#include "ut0wqueue.h"
-/********************************************************************
-Create a new work queue. */
+/*******************************************************************//**
+@file ut/ut0wqueue.c
+A work queue
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/****************************************************************//**
+Create a new work queue.
+@return work queue */
+UNIV_INTERN
ib_wqueue_t*
ib_wqueue_create(void)
/*===================*/
- /* out: work queue */
{
ib_wqueue_t* wq = mem_alloc(sizeof(ib_wqueue_t));
@@ -18,13 +43,13 @@ ib_wqueue_create(void)
return(wq);
}
-/********************************************************************
+/****************************************************************//**
Free a work queue. */
-
+UNIV_INTERN
void
ib_wqueue_free(
/*===========*/
- ib_wqueue_t* wq) /* in: work queue */
+ ib_wqueue_t* wq) /*!< in: work queue */
{
ut_a(!ib_list_get_first(wq->items));
@@ -35,15 +60,15 @@ ib_wqueue_free(
mem_free(wq);
}
-/********************************************************************
+/****************************************************************//**
Add a work item to the queue. */
-
+UNIV_INTERN
void
ib_wqueue_add(
/*==========*/
- ib_wqueue_t* wq, /* in: work queue */
- void* item, /* in: work item */
- mem_heap_t* heap) /* in: memory heap to use for allocating the
+ ib_wqueue_t* wq, /*!< in: work queue */
+ void* item, /*!< in: work item */
+ mem_heap_t* heap) /*!< in: memory heap to use for allocating the
list node */
{
mutex_enter(&wq->mutex);
@@ -54,13 +79,14 @@ ib_wqueue_add(
mutex_exit(&wq->mutex);
}
-/********************************************************************
-Wait for a work item to appear in the queue. */
-
+/****************************************************************//**
+Wait for a work item to appear in the queue.
+@return work item */
+UNIV_INTERN
void*
ib_wqueue_wait(
- /* out: work item */
- ib_wqueue_t* wq) /* in: work queue */
+/*===========*/
+ ib_wqueue_t* wq) /*!< in: work queue */
{
ib_list_node_t* node;
diff --git a/storage/innodb_plugin/CMakeLists.txt b/storage/innodb_plugin/CMakeLists.txt
deleted file mode 100644
index 21d83ea2b5f..00000000000
--- a/storage/innodb_plugin/CMakeLists.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (C) 2009 Oracle/Innobase Oy
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-# This is the CMakeLists for InnoDB Plugin
-
-
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-
-# Starting at 5.1.38, MySQL CMake files are simplified. But the plugin
-# CMakeLists.txt still needs to work with previous versions of MySQL.
-IF (MYSQL_VERSION_ID GREATER "50137")
- INCLUDE("${PROJECT_SOURCE_DIR}/storage/mysql_storage_engine.cmake")
-ENDIF (MYSQL_VERSION_ID GREATER "50137")
-
-IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
- SET(WIN64 TRUE)
-ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
-
-# Include directories under innodb_plugin
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innodb_plugin/include
- ${CMAKE_SOURCE_DIR}/storage/innodb_plugin/handler)
-
-# Include directories under mysql
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
- ${CMAKE_SOURCE_DIR}/sql
- ${CMAKE_SOURCE_DIR}/regex
- ${CMAKE_SOURCE_DIR}/zlib
- ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
-# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
-# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
-IF (MSVC AND $(WIN64))
- SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.c mem/mem0pool.c
- PROPERTIES COMPILE_FLAGS -Od)
-ENDIF (MSVC AND $(WIN64))
-
-SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
- buf/buf0buddy.c buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
- data/data0data.c data/data0type.c
- dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c
- dyn/dyn0dyn.c
- eval/eval0eval.c eval/eval0proc.c
- fil/fil0fil.c
- fsp/fsp0fsp.c
- fut/fut0fut.c fut/fut0lst.c
- ha/ha0ha.c ha/hash0hash.c ha/ha0storage.c
- ibuf/ibuf0ibuf.c
- pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
- lock/lock0lock.c lock/lock0iter.c
- log/log0log.c log/log0recv.c
- mach/mach0data.c
- mem/mem0mem.c mem/mem0pool.c
- mtr/mtr0log.c mtr/mtr0mtr.c
- os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
- page/page0cur.c page/page0page.c page/page0zip.c
- que/que0que.c
- handler/ha_innodb.cc handler/handler0alter.cc handler/i_s.cc handler/mysql_addons.cc
- read/read0read.c
- rem/rem0cmp.c rem/rem0rec.c
- row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c row/row0purge.c row/row0row.c
- row/row0sel.c row/row0uins.c row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
- srv/srv0que.c srv/srv0srv.c srv/srv0start.c
- sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
- thr/thr0loc.c
- trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
- trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
- usr/usr0sess.c
- ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
- ut/ut0list.c ut/ut0wqueue.c)
-ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DIB_HAVE_PAUSE_INSTRUCTION)
-
-IF (MYSQL_VERSION_ID GREATER "50137")
- MYSQL_STORAGE_ENGINE(INNOBASE)
- # Use ha_innodb for plugin name, if plugin is built
- GET_TARGET_PROPERTY(LIB_LOCATION ha_innobase LOCATION)
- IF(LIB_LOCATION)
- SET_TARGET_PROPERTIES(ha_innobase PROPERTIES OUTPUT_NAME ha_innodb)
- ENDIF(LIB_LOCATION)
-ELSE (MYSQL_VERSION_ID GREATER "50137")
- IF (NOT SOURCE_SUBLIBS)
- ADD_DEFINITIONS(-D_WIN32 -DMYSQL_SERVER)
diff --git a/storage/innodb_plugin/Makefile.am b/storage/innodb_plugin/Makefile.am
deleted file mode 100644
index 5c71fe18d14..00000000000
--- a/storage/innodb_plugin/Makefile.am
+++ /dev/null
@@ -1,342 +0,0 @@
-# Copyright (C) 2001, 2004, 2006 MySQL AB & Innobase Oy
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-# Process this file with automake to create Makefile.in
-
-MYSQLDATAdir= $(localstatedir)
-MYSQLSHAREdir= $(pkgdatadir)
-MYSQLBASEdir= $(prefix)
-MYSQLLIBdir= $(pkglibdir)
-pkgplugindir= $(pkglibdir)/plugin
-INCLUDES= -I$(top_srcdir)/include -I$(top_builddir)/include \
- -I$(top_srcdir)/regex \
- -I$(srcdir)/include \
- -I$(top_srcdir)/sql \
- -I$(srcdir) @ZLIB_INCLUDES@
-
-DEFS= @DEFS@
-
-
-noinst_HEADERS= \
- handler/ha_innodb.h \
- handler/i_s.h \
- include/btr0btr.h \
- include/btr0btr.ic \
- include/btr0cur.h \
- include/btr0cur.ic \
- include/btr0pcur.h \
- include/btr0pcur.ic \
- include/btr0sea.h \
- include/btr0sea.ic \
- include/btr0types.h \
- include/buf0buddy.h \
- include/buf0buddy.ic \
- include/buf0buf.h \
- include/buf0buf.ic \
- include/buf0flu.h \
- include/buf0flu.ic \
- include/buf0lru.h \
- include/buf0lru.ic \
- include/buf0rea.h \
- include/buf0types.h \
- include/data0data.h \
- include/data0data.ic \
- include/data0type.h \
- include/data0type.ic \
- include/data0types.h \
- include/db0err.h \
- include/dict0boot.h \
- include/dict0boot.ic \
- include/dict0crea.h \
- include/dict0crea.ic \
- include/dict0dict.h \
- include/dict0dict.ic \
- include/dict0load.h \
- include/dict0load.ic \
- include/dict0mem.h \
- include/dict0mem.ic \
- include/dict0types.h \
- include/dyn0dyn.h \
- include/dyn0dyn.ic \
- include/eval0eval.h \
- include/eval0eval.ic \
- include/eval0proc.h \
- include/eval0proc.ic \
- include/fil0fil.h \
- include/fsp0fsp.h \
- include/fsp0fsp.ic \
- include/fsp0types.h \
- include/fut0fut.h \
- include/fut0fut.ic \
- include/fut0lst.h \
- include/fut0lst.ic \
- include/ha0ha.h \
- include/ha0ha.ic \
- include/ha0storage.h \
- include/ha0storage.ic \
- include/ha_prototypes.h \
- include/handler0alter.h \
- include/hash0hash.h \
- include/hash0hash.ic \
- include/ibuf0ibuf.h \
- include/ibuf0ibuf.ic \
- include/ibuf0types.h \
- include/lock0iter.h \
- include/lock0lock.h \
- include/lock0lock.ic \
- include/lock0priv.h \
- include/lock0priv.ic \
- include/lock0types.h \
- include/log0log.h \
- include/log0log.ic \
- include/log0recv.h \
- include/log0recv.ic \
- include/mach0data.h \
- include/mach0data.ic \
- include/mem0dbg.h \
- include/mem0dbg.ic \
- include/mem0mem.h \
- include/mem0mem.ic \
- include/mem0pool.h \
- include/mem0pool.ic \
- include/mtr0log.h \
- include/mtr0log.ic \
- include/mtr0mtr.h \
- include/mtr0mtr.ic \
- include/mtr0types.h \
- include/mysql_addons.h \
- include/os0file.h \
- include/os0proc.h \
- include/os0proc.ic \
- include/os0sync.h \
- include/os0sync.ic \
- include/os0thread.h \
- include/os0thread.ic \
- include/page0cur.h \
- include/page0cur.ic \
- include/page0page.h \
- include/page0page.ic \
- include/page0types.h \
- include/page0zip.h \
- include/page0zip.ic \
- include/pars0grm.h \
- include/pars0opt.h \
- include/pars0opt.ic \
- include/pars0pars.h \
- include/pars0pars.ic \
- include/pars0sym.h \
- include/pars0sym.ic \
- include/pars0types.h \
- include/que0que.h \
- include/que0que.ic \
- include/que0types.h \
- include/read0read.h \
- include/read0read.ic \
- include/read0types.h \
- include/rem0cmp.h \
- include/rem0cmp.ic \
- include/rem0rec.h \
- include/rem0rec.ic \
- include/rem0types.h \
- include/row0ext.h \
- include/row0ext.ic \
- include/row0ins.h \
- include/row0ins.ic \
- include/row0merge.h \
- include/row0mysql.h \
- include/row0mysql.ic \
- include/row0purge.h \
- include/row0purge.ic \
- include/row0row.h \
- include/row0row.ic \
- include/row0sel.h \
- include/row0sel.ic \
- include/row0types.h \
- include/row0uins.h \
- include/row0uins.ic \
- include/row0umod.h \
- include/row0umod.ic \
- include/row0undo.h \
- include/row0undo.ic \
- include/row0upd.h \
- include/row0upd.ic \
- include/row0vers.h \
- include/row0vers.ic \
- include/srv0que.h \
- include/srv0srv.h \
- include/srv0srv.ic \
- include/srv0start.h \
- include/sync0arr.h \
- include/sync0arr.ic \
- include/sync0rw.h \
- include/sync0rw.ic \
- include/sync0sync.h \
- include/sync0sync.ic \
- include/sync0types.h \
- include/thr0loc.h \
- include/thr0loc.ic \
- include/trx0i_s.h \
- include/trx0purge.h \
- include/trx0purge.ic \
- include/trx0rec.h \
- include/trx0rec.ic \
- include/trx0roll.h \
- include/trx0roll.ic \
- include/trx0rseg.h \
- include/trx0rseg.ic \
- include/trx0sys.h \
- include/trx0sys.ic \
- include/trx0trx.h \
- include/trx0trx.ic \
- include/trx0types.h \
- include/trx0undo.h \
- include/trx0undo.ic \
- include/trx0xa.h \
- include/univ.i \
- include/usr0sess.h \
- include/usr0sess.ic \
- include/usr0types.h \
- include/ut0auxconf.h \
- include/ut0byte.h \
- include/ut0byte.ic \
- include/ut0dbg.h \
- include/ut0list.h \
- include/ut0list.ic \
- include/ut0lst.h \
- include/ut0mem.h \
- include/ut0mem.ic \
- include/ut0rnd.h \
- include/ut0rnd.ic \
- include/ut0sort.h \
- include/ut0ut.h \
- include/ut0ut.ic \
- include/ut0vec.h \
- include/ut0vec.ic \
- include/ut0wqueue.h \
- mem/mem0dbg.c
-
-EXTRA_LIBRARIES= libinnobase.a
-noinst_LIBRARIES= @plugin_innodb_plugin_static_target@
-libinnobase_a_SOURCES= \
- btr/btr0btr.c \
- btr/btr0cur.c \
- btr/btr0pcur.c \
- btr/btr0sea.c \
- buf/buf0buddy.c \
- buf/buf0buf.c \
- buf/buf0flu.c \
- buf/buf0lru.c \
- buf/buf0rea.c \
- data/data0data.c \
- data/data0type.c \
- dict/dict0boot.c \
- dict/dict0crea.c \
- dict/dict0dict.c \
- dict/dict0load.c \
- dict/dict0mem.c \
- dyn/dyn0dyn.c \
- eval/eval0eval.c \
- eval/eval0proc.c \
- fil/fil0fil.c \
- fsp/fsp0fsp.c \
- fut/fut0fut.c \
- fut/fut0lst.c \
- ha/ha0ha.c \
- ha/ha0storage.c \
- ha/hash0hash.c \
- handler/ha_innodb.cc \
- handler/handler0alter.cc \
- handler/i_s.cc \
- handler/mysql_addons.cc \
- ibuf/ibuf0ibuf.c \
- lock/lock0iter.c \
- lock/lock0lock.c \
- log/log0log.c \
- log/log0recv.c \
- mach/mach0data.c \
- mem/mem0mem.c \
- mem/mem0pool.c \
- mtr/mtr0log.c \
- mtr/mtr0mtr.c \
- os/os0file.c \
- os/os0proc.c \
- os/os0sync.c \
- os/os0thread.c \
- page/page0cur.c \
- page/page0page.c \
- page/page0zip.c \
- pars/lexyy.c \
- pars/pars0grm.c \
- pars/pars0opt.c \
- pars/pars0pars.c \
- pars/pars0sym.c \
- que/que0que.c \
- read/read0read.c \
- rem/rem0cmp.c \
- rem/rem0rec.c \
- row/row0ext.c \
- row/row0ins.c \
- row/row0merge.c \
- row/row0mysql.c \
- row/row0purge.c \
- row/row0row.c \
- row/row0sel.c \
- row/row0uins.c \
- row/row0umod.c \
- row/row0undo.c \
- row/row0upd.c \
- row/row0vers.c \
- srv/srv0que.c \
- srv/srv0srv.c \
- srv/srv0start.c \
- sync/sync0arr.c \
- sync/sync0rw.c \
- sync/sync0sync.c \
- thr/thr0loc.c \
- trx/trx0i_s.c \
- trx/trx0purge.c \
- trx/trx0rec.c \
- trx/trx0roll.c \
- trx/trx0rseg.c \
- trx/trx0sys.c \
- trx/trx0trx.c \
- trx/trx0undo.c \
- usr/usr0sess.c \
- ut/ut0byte.c \
- ut/ut0dbg.c \
- ut/ut0list.c \
- ut/ut0mem.c \
- ut/ut0rnd.c \
- ut/ut0ut.c \
- ut/ut0vec.c \
- ut/ut0wqueue.c
-
-libinnobase_a_CXXFLAGS= $(AM_CFLAGS)
-libinnobase_a_CFLAGS= $(AM_CFLAGS)
-
-EXTRA_LTLIBRARIES= ha_innodb_plugin.la
-pkgplugin_LTLIBRARIES= @plugin_innodb_plugin_shared_target@
-
-ha_innodb_plugin_la_LDFLAGS= -module -rpath $(pkgplugindir)
-ha_innodb_plugin_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
-ha_innodb_plugin_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
-ha_innodb_plugin_la_SOURCES= $(libinnobase_a_SOURCES)
-
-EXTRA_DIST= CMakeLists.txt plug.in \
- pars/make_bison.sh pars/make_flex.sh \
- pars/pars0grm.y pars/pars0lex.l
-
-# Don't update the files from bitkeeper
-%::SCCS/s.%
diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c
deleted file mode 100644
index 086b3a0a599..00000000000
--- a/storage/innodb_plugin/btr/btr0btr.c
+++ /dev/null
@@ -1,3719 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file btr/btr0btr.c
-The B-tree
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0btr.h"
-
-#ifdef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "page0page.h"
-#include "page0zip.h"
-
-#ifndef UNIV_HOTBACKUP
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "btr0pcur.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "ibuf0ibuf.h"
-#include "trx0trx.h"
-
-/*
-Latching strategy of the InnoDB B-tree
---------------------------------------
-A tree latch protects all non-leaf nodes of the tree. Each node of a tree
-also has a latch of its own.
-
-A B-tree operation normally first acquires an S-latch on the tree. It
-searches down the tree and releases the tree latch when it has the
-leaf node latch. To save CPU time we do not acquire any latch on
-non-leaf nodes of the tree during a search, those pages are only bufferfixed.
-
-If an operation needs to restructure the tree, it acquires an X-latch on
-the tree before searching to a leaf node. If it needs, for example, to
-split a leaf,
-(1) InnoDB decides the split point in the leaf,
-(2) allocates a new page,
-(3) inserts the appropriate node pointer to the first non-leaf level,
-(4) releases the tree X-latch,
-(5) and then moves records from the leaf to the new allocated page.
-
-Node pointers
--------------
-Leaf pages of a B-tree contain the index records stored in the
-tree. On levels n > 0 we store 'node pointers' to pages on level
-n - 1. For each page there is exactly one node pointer stored:
-thus the our tree is an ordinary B-tree, not a B-link tree.
-
-A node pointer contains a prefix P of an index record. The prefix
-is long enough so that it determines an index record uniquely.
-The file page number of the child page is added as the last
-field. To the child page we can store node pointers or index records
-which are >= P in the alphabetical order, but < P1 if there is
-a next node pointer on the level, and P1 is its prefix.
-
-If a node pointer with a prefix P points to a non-leaf child,
-then the leftmost record in the child must have the same
-prefix P. If it points to a leaf node, the child is not required
-to contain any record with a prefix equal to P. The leaf case
-is decided this way to allow arbitrary deletions in a leaf node
-without touching upper levels of the tree.
-
-We have predefined a special minimum record which we
-define as the smallest record in any alphabetical order.
-A minimum record is denoted by setting a bit in the record
-header. A minimum record acts as the prefix of a node pointer
-which points to a leftmost node on any level of the tree.
-
-File page allocation
---------------------
-In the root node of a B-tree there are two file segment headers.
-The leaf pages of a tree are allocated from one file segment, to
-make them consecutive on disk if possible. From the other file segment
-we allocate pages for the non-leaf levels of the tree.
-*/
-
-#ifdef UNIV_BTR_DEBUG
-/**************************************************************//**
-Checks a file segment header within a B-tree root page.
-@return TRUE if valid */
-static
-ibool
-btr_root_fseg_validate(
-/*===================*/
- const fseg_header_t* seg_header, /*!< in: segment header */
- ulint space) /*!< in: tablespace identifier */
-{
- ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
-
- ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
- ut_a(offset >= FIL_PAGE_DATA);
- ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
- return(TRUE);
-}
-#endif /* UNIV_BTR_DEBUG */
-
-/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
-static
-buf_block_t*
-btr_root_block_get(
-/*===============*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- ulint root_page_no;
- buf_block_t* block;
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- root_page_no = dict_index_get_page(index);
-
- block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
- ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
- == dict_table_is_comp(index->table));
-#ifdef UNIV_BTR_DEBUG
- if (!dict_index_is_ibuf(index)) {
- const page_t* root = buf_block_get_frame(block);
-
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
- }
-#endif /* UNIV_BTR_DEBUG */
-
- return(block);
-}
-
-/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
-UNIV_INTERN
-page_t*
-btr_root_get(
-/*=========*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
-{
- return(buf_block_get_frame(btr_root_block_get(index, mtr)));
-}
-
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed that
-the caller has appropriate latches on the page and its neighbor.
-@return previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the previous page */
-{
- page_t* page;
- page_t* prev_page;
- ulint prev_page_no;
-
- if (!page_rec_is_infimum(rec)) {
-
- rec_t* prev_rec = page_rec_get_prev(rec);
-
- if (!page_rec_is_infimum(prev_rec)) {
-
- return(prev_rec);
- }
- }
-
- page = page_align(rec);
- prev_page_no = btr_page_get_prev(page, mtr);
-
- if (prev_page_no != FIL_NULL) {
-
- ulint space;
- ulint zip_size;
- buf_block_t* prev_block;
-
- space = page_get_space_id(page);
- zip_size = fil_space_get_zip_size(space);
-
- prev_block = buf_page_get_with_no_latch(space, zip_size,
- prev_page_no, mtr);
- prev_page = buf_block_get_frame(prev_block);
- /* The caller must already have a latch to the brother */
- ut_ad(mtr_memo_contains(mtr, prev_block,
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, prev_block,
- MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
- }
-
- return(NULL);
-}
-
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed that the
-caller has appropriate latches on the page and its neighbor.
-@return next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the next page */
-{
- page_t* page;
- page_t* next_page;
- ulint next_page_no;
-
- if (!page_rec_is_supremum(rec)) {
-
- rec_t* next_rec = page_rec_get_next(rec);
-
- if (!page_rec_is_supremum(next_rec)) {
-
- return(next_rec);
- }
- }
-
- page = page_align(rec);
- next_page_no = btr_page_get_next(page, mtr);
-
- if (next_page_no != FIL_NULL) {
- ulint space;
- ulint zip_size;
- buf_block_t* next_block;
-
- space = page_get_space_id(page);
- zip_size = fil_space_get_zip_size(space);
-
- next_block = buf_page_get_with_no_latch(space, zip_size,
- next_page_no, mtr);
- next_page = buf_block_get_frame(next_block);
- /* The caller must already have a latch to the brother */
- ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, next_block,
- MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- return(page_rec_get_next(page_get_infimum_rec(next_page)));
- }
-
- return(NULL);
-}
-
-/**************************************************************//**
-Creates a new index page (not the root, and also not
-used in page reorganization). @see btr_page_empty(). */
-static
-void
-btr_page_create(
-/*============*/
- buf_block_t* block, /*!< in/out: page to be created */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page = buf_block_get_frame(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- page_create_zip(block, index, level, mtr);
- } else {
- page_create(block, mtr, dict_table_is_comp(index->table));
- /* Set the level of the new index page */
- btr_page_set_level(page, NULL, level, mtr);
- }
-
- block->check_index_page_at_flush = TRUE;
-
- btr_page_set_index_id(page, page_zip, index->id, mtr);
-}
-
-/**************************************************************//**
-Allocates a new file page to be used in an ibuf tree. Takes the page from
-the free list of the tree, which must contain pages!
-@return new allocated block, x-latched */
-static
-buf_block_t*
-btr_page_alloc_for_ibuf(
-/*====================*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
-{
- fil_addr_t node_addr;
- page_t* root;
- page_t* new_page;
- buf_block_t* new_block;
-
- root = btr_root_get(index, mtr);
-
- node_addr = flst_get_first(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, mtr);
- ut_a(node_addr.page != FIL_NULL);
-
- new_block = buf_page_get(dict_index_get_space(index),
- dict_table_zip_size(index->table),
- node_addr.page, RW_X_LATCH, mtr);
- new_page = buf_block_get_frame(new_block);
- buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
-
- flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
- mtr);
- ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- mtr));
-
- return(new_block);
-}
-
-/**************************************************************//**
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents!
-@return new allocated block, x-latched; NULL if out of space */
-UNIV_INTERN
-buf_block_t*
-btr_page_alloc(
-/*===========*/
- dict_index_t* index, /*!< in: index */
- ulint hint_page_no, /*!< in: hint of a good page */
- byte file_direction, /*!< in: direction where a possible
- page split is made */
- ulint level, /*!< in: level where the page is placed
- in the tree */
- mtr_t* mtr) /*!< in: mtr */
-{
- fseg_header_t* seg_header;
- page_t* root;
- buf_block_t* new_block;
- ulint new_page_no;
-
- if (dict_index_is_ibuf(index)) {
-
- return(btr_page_alloc_for_ibuf(index, mtr));
- }
-
- root = btr_root_get(index, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
-
- /* Parameter TRUE below states that the caller has made the
- reservation for free extents, and thus we know that a page can
- be allocated: */
-
- new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
- file_direction, TRUE, mtr);
- if (new_page_no == FIL_NULL) {
-
- return(NULL);
- }
-
- new_block = buf_page_get(dict_index_get_space(index),
- dict_table_zip_size(index->table),
- new_page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
-
- return(new_block);
-}
-
-/**************************************************************//**
-Gets the number of pages in a B-tree.
-@return number of pages */
-UNIV_INTERN
-ulint
-btr_get_size(
-/*=========*/
- dict_index_t* index, /*!< in: index */
- ulint flag) /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
-{
- fseg_header_t* seg_header;
- page_t* root;
- ulint n;
- ulint dummy;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- root = btr_root_get(index, &mtr);
-
- if (flag == BTR_N_LEAF_PAGES) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- fseg_n_reserved_pages(seg_header, &n, &mtr);
-
- } else if (flag == BTR_TOTAL_SIZE) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
- n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
-
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
- } else {
- ut_error;
- }
-
- mtr_commit(&mtr);
-
- return(n);
-}
-
-/**************************************************************//**
-Frees a page used in an ibuf tree. Puts the page to the free list of the
-ibuf tree. */
-static
-void
-btr_page_free_for_ibuf(
-/*===================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* root;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- root = btr_root_get(index, mtr);
-
- flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- buf_block_get_frame(block)
- + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
-
- ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- mtr));
-}
-
-/**************************************************************//**
-Frees a file page used in an index tree. Can be used also to (BLOB)
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
-void
-btr_page_free_low(
-/*==============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- ulint level, /*!< in: page level */
- mtr_t* mtr) /*!< in: mtr */
-{
- fseg_header_t* seg_header;
- page_t* root;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* The page gets invalid for optimistic searches: increment the frame
- modify clock */
-
- buf_block_modify_clock_inc(block);
-
- if (dict_index_is_ibuf(index)) {
-
- btr_page_free_for_ibuf(index, block, mtr);
-
- return;
- }
-
- root = btr_root_get(index, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
-
- fseg_free_page(seg_header,
- buf_block_get_space(block),
- buf_block_get_page_no(block), mtr);
-}
-
-/**************************************************************//**
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-UNIV_INTERN
-void
-btr_page_free(
-/*==========*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint level;
-
- level = btr_page_get_level(buf_block_get_frame(block), mtr);
-
- btr_page_free_low(index, block, level, mtr);
-}
-
-/**************************************************************//**
-Sets the child node file address in a node pointer. */
-UNIV_INLINE
-void
-btr_node_ptr_set_child_page_no(
-/*===========================*/
- rec_t* rec, /*!< in: node pointer record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint page_no,/*!< in: child node address */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* field;
- ulint len;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!page_is_leaf(page_align(rec)));
- ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
-
- /* The child address is in the last field */
- field = rec_get_nth_field(rec, offsets,
- rec_offs_n_fields(offsets) - 1, &len);
-
- ut_ad(len == REC_NODE_PTR_SIZE);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- page_zip_write_node_ptr(page_zip, rec,
- rec_offs_data_size(offsets),
- page_no, mtr);
- } else {
- mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
- }
-}
-
-/************************************************************//**
-Returns the child page of a node pointer and x-latches it.
-@return child page, x-latched */
-static
-buf_block_t*
-btr_node_ptr_get_child(
-/*===================*/
- const rec_t* node_ptr,/*!< in: node pointer */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint page_no;
- ulint space;
-
- ut_ad(rec_offs_validate(node_ptr, index, offsets));
- space = page_get_space_id(page_align(node_ptr));
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
-
- return(btr_block_get(space, dict_table_zip_size(index->table),
- page_no, RW_X_LATCH, mtr));
-}
-
-/************************************************************//**
-Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree.
-@return rec_get_offsets() of the node pointer record */
-static
-ulint*
-btr_page_get_father_node_ptr(
-/*=========================*/
- ulint* offsets,/*!< in: work area for the return value */
- mem_heap_t* heap, /*!< in: memory heap to use */
- btr_cur_t* cursor, /*!< in: cursor pointing to user record,
- out: cursor on node pointer record,
- its page x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- dtuple_t* tuple;
- rec_t* user_rec;
- rec_t* node_ptr;
- ulint level;
- ulint page_no;
- dict_index_t* index;
-
- page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
- index = btr_cur_get_index(cursor);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
-
- ut_ad(dict_index_get_page(index) != page_no);
-
- level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
- user_rec = btr_cur_get_rec(cursor);
- ut_a(page_rec_is_user_rec(user_rec));
- tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
-
- btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE, cursor, 0, mtr);
-
- node_ptr = btr_cur_get_rec(cursor);
- ut_ad(!page_rec_is_comp(node_ptr)
- || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
- != page_no)) {
- rec_t* print_rec;
- fputs("InnoDB: Dump of the child page:\n", stderr);
- buf_page_print(page_align(user_rec), 0);
- fputs("InnoDB: Dump of the parent page:\n", stderr);
- buf_page_print(page_align(node_ptr), 0);
-
- fputs("InnoDB: Corruption of an index tree: table ", stderr);
- ut_print_name(stderr, NULL, TRUE, index->table_name);
- fputs(", index ", stderr);
- ut_print_name(stderr, NULL, FALSE, index->name);
- fprintf(stderr, ",\n"
- "InnoDB: father ptr page no %lu, child page no %lu\n",
- (ulong)
- btr_node_ptr_get_child_page_no(node_ptr, offsets),
- (ulong) page_no);
- print_rec = page_rec_get_next(
- page_get_infimum_rec(page_align(user_rec)));
- offsets = rec_get_offsets(print_rec, index,
- offsets, ULINT_UNDEFINED, &heap);
- page_rec_print(print_rec, offsets);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(node_ptr, offsets);
-
- fputs("InnoDB: You should dump + drop + reimport the table"
- " to fix the\n"
- "InnoDB: corruption. If the crash happens at "
- "the database startup, see\n"
- "InnoDB: " REFMAN "forcing-recovery.html about\n"
- "InnoDB: forcing recovery. "
- "Then dump + drop + reimport.\n", stderr);
-
- ut_error;
- }
-
- return(offsets);
-}
-
-/************************************************************//**
-Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree.
-@return rec_get_offsets() of the node pointer record */
-static
-ulint*
-btr_page_get_father_block(
-/*======================*/
- ulint* offsets,/*!< in: work area for the return value */
- mem_heap_t* heap, /*!< in: memory heap to use */
- dict_index_t* index, /*!< in: b-tree index */
- buf_block_t* block, /*!< in: child page in the index */
- mtr_t* mtr, /*!< in: mtr */
- btr_cur_t* cursor) /*!< out: cursor on node pointer record,
- its page x-latched */
-{
- rec_t* rec
- = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
- block)));
- btr_cur_position(index, rec, block, cursor);
- return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
-}
-
-/************************************************************//**
-Seeks to the upper level node pointer to a page.
-It is assumed that mtr holds an x-latch on the tree. */
-static
-void
-btr_page_get_father(
-/*================*/
- dict_index_t* index, /*!< in: b-tree index */
- buf_block_t* block, /*!< in: child page in the index */
- mtr_t* mtr, /*!< in: mtr */
- btr_cur_t* cursor) /*!< out: cursor on node pointer record,
- its page x-latched */
-{
- mem_heap_t* heap;
- rec_t* rec
- = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
- block)));
- btr_cur_position(index, rec, block, cursor);
-
- heap = mem_heap_create(100);
- btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
- mem_heap_free(heap);
-}
-
-/************************************************************//**
-Creates the root node for a new index tree.
-@return page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
-ulint
-btr_create(
-/*=======*/
- ulint type, /*!< in: type of the index */
- ulint space, /*!< in: space where created */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- dulint index_id,/*!< in: index id */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint page_no;
- buf_block_t* block;
- buf_frame_t* frame;
- page_t* page;
- page_zip_des_t* page_zip;
-
- /* Create the two new segments (one, in the case of an ibuf tree) for
- the index tree; the segment headers are put on the allocated root page
- (for an ibuf tree, not in the root, but on a separate ibuf header
- page) */
-
- if (type & DICT_IBUF) {
- /* Allocate first the ibuf header page */
- buf_block_t* ibuf_hdr_block = fseg_create(
- space, 0,
- IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
-
- buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
-
- ut_ad(buf_block_get_page_no(ibuf_hdr_block)
- == IBUF_HEADER_PAGE_NO);
- /* Allocate then the next page to the segment: it will be the
- tree root page */
-
- page_no = fseg_alloc_free_page(buf_block_get_frame(
- ibuf_hdr_block)
- + IBUF_HEADER
- + IBUF_TREE_SEG_HEADER,
- IBUF_TREE_ROOT_PAGE_NO,
- FSP_UP, mtr);
- ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
-
- block = buf_page_get(space, zip_size, page_no,
- RW_X_LATCH, mtr);
- } else {
- block = fseg_create(space, 0,
- PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
- }
-
- if (block == NULL) {
-
- return(FIL_NULL);
- }
-
- page_no = buf_block_get_page_no(block);
- frame = buf_block_get_frame(block);
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
-
- if (type & DICT_IBUF) {
- /* It is an insert buffer tree: initialize the free list */
-
- ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
-
- flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
- } else {
- /* It is a non-ibuf tree: create a file segment for leaf
- pages */
- if (!fseg_create(space, page_no,
- PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
- /* Not enough space for new segment, free root
- segment before return. */
- btr_free_root(space, zip_size, page_no, mtr);
-
- return(FIL_NULL);
- }
-
- /* The fseg create acquires a second latch on the page,
- therefore we must declare it: */
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
- }
-
- /* Create a new index page on the allocated segment page */
- page_zip = buf_block_get_page_zip(block);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- page = page_create_zip(block, index, 0, mtr);
- } else {
- page = page_create(block, mtr,
- dict_table_is_comp(index->table));
- /* Set the level of the new index page */
- btr_page_set_level(page, NULL, 0, mtr);
- }
-
- block->check_index_page_at_flush = TRUE;
-
- /* Set the index id of the page */
- btr_page_set_index_id(page, page_zip, index_id, mtr);
-
- /* Set the next node and previous node fields */
- btr_page_set_next(page, page_zip, FIL_NULL, mtr);
- btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
-
- /* We reset the free bits for the page to allow creation of several
- trees in the same mtr, otherwise the latch on a bitmap page would
- prevent it because of the latching order */
-
- if (!(type & DICT_CLUSTERED)) {
- ibuf_reset_free_bits(block);
- }
-
- /* In the following assertion we test that two records of maximum
- allowed size fit on the root page: this fact is needed to ensure
- correctness of split algorithms */
-
- ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
-
- return(page_no);
-}
-
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
-void
-btr_free_but_not_root(
-/*==================*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no) /*!< in: root page number */
-{
- ibool finished;
- page_t* root;
- mtr_t mtr;
-
-leaf_loop:
- mtr_start(&mtr);
-
- root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
-#endif /* UNIV_BTR_DEBUG */
-
- /* NOTE: page hash indexes are dropped when a page is freed inside
- fsp0fsp. */
-
- finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
- &mtr);
- mtr_commit(&mtr);
-
- if (!finished) {
-
- goto leaf_loop;
- }
-top_loop:
- mtr_start(&mtr);
-
- root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
-#endif /* UNIV_BTR_DEBUG */
-
- finished = fseg_free_step_not_header(
- root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
- mtr_commit(&mtr);
-
- if (!finished) {
-
- goto top_loop;
- }
-}
-
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
-void
-btr_free_root(
-/*==========*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no, /*!< in: root page number */
- mtr_t* mtr) /*!< in: a mini-transaction which has already
- been started */
-{
- buf_block_t* block;
- fseg_header_t* header;
-
- block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
-
- btr_search_drop_page_hash_index(block);
-
- header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_root_fseg_validate(header, space));
-#endif /* UNIV_BTR_DEBUG */
-
- while (!fseg_free_step(header, mtr));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Reorganizes an index page. */
-static
-ibool
-btr_page_reorganize_low(
-/*====================*/
- ibool recovery,/*!< in: TRUE if called in recovery:
- locks should not be updated, i.e.,
- there cannot exist locks on the
- page, and a hash index should not be
- dropped: it cannot exist */
- buf_block_t* block, /*!< in: page to be reorganized */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
- buf_block_t* temp_block;
- page_t* temp_page;
- ulint log_mode;
- ulint data_size1;
- ulint data_size2;
- ulint max_ins_size1;
- ulint max_ins_size2;
- ibool success = FALSE;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- data_size1 = page_get_data_size(page);
- max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
-
-#ifndef UNIV_HOTBACKUP
- /* Write the log record */
- mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
- ? MLOG_COMP_PAGE_REORGANIZE
- : MLOG_PAGE_REORGANIZE, 0);
-#endif /* !UNIV_HOTBACKUP */
-
- /* Turn logging off */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
-#ifndef UNIV_HOTBACKUP
- temp_block = buf_block_alloc(0);
-#else /* !UNIV_HOTBACKUP */
- ut_ad(block == back_block1);
- temp_block = back_block2;
-#endif /* !UNIV_HOTBACKUP */
- temp_page = temp_block->frame;
-
- /* Copy the old page to temporary space */
- buf_frame_copy(temp_page, page);
-
-#ifndef UNIV_HOTBACKUP
- if (UNIV_LIKELY(!recovery)) {
- btr_search_drop_page_hash_index(block);
- }
-
- block->check_index_page_at_flush = TRUE;
-#endif /* !UNIV_HOTBACKUP */
-
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
-
- page_create(block, mtr, dict_table_is_comp(index->table));
-
- /* Copy the records from the temporary space to the recreated page;
- do not copy the lock bits yet */
-
- page_copy_rec_list_end_no_locks(block, temp_block,
- page_get_infimum_rec(temp_page),
- index, mtr);
-
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
- /* Copy max trx id to recreated page */
- trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
- page_set_max_trx_id(block, NULL, max_trx_id, mtr);
- /* In crash recovery, dict_index_is_sec_or_ibuf() always
- returns TRUE, even for clustered indexes. max_trx_id is
- unused in clustered index pages. */
- ut_ad(!ut_dulint_is_zero(max_trx_id) || recovery);
- }
-
- if (UNIV_LIKELY_NULL(page_zip)
- && UNIV_UNLIKELY
- (!page_zip_compress(page_zip, page, index, NULL))) {
-
- /* Restore the old page and exit. */
-
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
- /* Check that the bytes that we skip are identical. */
- ut_a(!memcmp(page, temp_page, PAGE_HEADER));
- ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
- PAGE_HEADER + PAGE_N_RECS + temp_page,
- PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
- ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page,
- UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page,
- FIL_PAGE_DATA_END));
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-
- memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
- PAGE_N_RECS - PAGE_N_DIR_SLOTS);
- memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
- UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
-
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
- ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE));
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-
- goto func_exit;
- }
-
-#ifndef UNIV_HOTBACKUP
- if (UNIV_LIKELY(!recovery)) {
- /* Update the record lock bitmaps */
- lock_move_reorganize_page(block, temp_block);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- data_size2 = page_get_data_size(page);
- max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
-
- if (UNIV_UNLIKELY(data_size1 != data_size2)
- || UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
- buf_page_print(page, 0);
- buf_page_print(temp_page, 0);
- fprintf(stderr,
- "InnoDB: Error: page old data size %lu"
- " new data size %lu\n"
- "InnoDB: Error: page old max ins size %lu"
- " new max ins size %lu\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- (unsigned long) data_size1, (unsigned long) data_size2,
- (unsigned long) max_ins_size1,
- (unsigned long) max_ins_size2);
- } else {
- success = TRUE;
- }
-
-func_exit:
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-#ifndef UNIV_HOTBACKUP
- buf_block_free(temp_block);
-#endif /* !UNIV_HOTBACKUP */
-
- /* Restore logging mode */
- mtr_set_log_mode(mtr, log_mode);
-
- return(success);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Reorganizes an index page.
-IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
-page of a non-clustered index, the caller must update the insert
-buffer free bits in the same mini-transaction in such a way that the
-modification will be redo-logged.
-@return TRUE on success, FALSE on failure */
-UNIV_INTERN
-ibool
-btr_page_reorganize(
-/*================*/
- buf_block_t* block, /*!< in: page to be reorganized */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- return(btr_page_reorganize_low(FALSE, block, index, mtr));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of reorganizing a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_parse_page_reorganize(
-/*======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)),
- /*!< in: buffer end */
- dict_index_t* index, /*!< in: record descriptor */
- buf_block_t* block, /*!< in: page to be reorganized, or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- /* The record is empty, except for the record initial part */
-
- if (UNIV_LIKELY(block != NULL)) {
- btr_page_reorganize_low(TRUE, block, index, mtr);
- }
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Empties an index page. @see btr_page_create(). */
-static
-void
-btr_page_empty(
-/*===========*/
- buf_block_t* block, /*!< in: page to be emptied */
- page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
- dict_index_t* index, /*!< in: index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page = buf_block_get_frame(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_zip == buf_block_get_page_zip(block));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- btr_search_drop_page_hash_index(block);
-
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- page_create_zip(block, index, level, mtr);
- } else {
- page_create(block, mtr, dict_table_is_comp(index->table));
- btr_page_set_level(page, NULL, level, mtr);
- }
-
- block->check_index_page_at_flush = TRUE;
-}
-
-/*************************************************************//**
-Makes tree one level higher by splitting the root, and inserts
-the tuple. It is assumed that mtr contains an x-latch on the tree.
-NOTE that the operation of this function must always succeed,
-we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called.
-@return inserted record */
-UNIV_INTERN
-rec_t*
-btr_root_raise_and_insert(
-/*======================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
- on the root page; when the function returns,
- the cursor is positioned on the predecessor
- of the inserted record */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- page_t* root;
- page_t* new_page;
- ulint new_page_no;
- rec_t* rec;
- mem_heap_t* heap;
- dtuple_t* node_ptr;
- ulint level;
- rec_t* node_ptr_rec;
- page_cur_t* page_cursor;
- page_zip_des_t* root_page_zip;
- page_zip_des_t* new_page_zip;
- buf_block_t* root_block;
- buf_block_t* new_block;
-
- root = btr_cur_get_page(cursor);
- root_block = btr_cur_get_block(cursor);
- root_page_zip = buf_block_get_page_zip(root_block);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
-#endif /* UNIV_ZIP_DEBUG */
- index = btr_cur_get_index(cursor);
-#ifdef UNIV_BTR_DEBUG
- if (!dict_index_is_ibuf(index)) {
- ulint space = dict_index_get_space(index);
-
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
- }
-
- ut_a(dict_index_get_page(index) == page_get_page_no(root));
-#endif /* UNIV_BTR_DEBUG */
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
-
- /* Allocate a new page to the tree. Root splitting is done by first
- moving the root records to the new page, emptying the root, putting
- a node pointer to the new page, and then splitting the new page. */
-
- level = btr_page_get_level(root, mtr);
-
- new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
- new_page = buf_block_get_frame(new_block);
- new_page_zip = buf_block_get_page_zip(new_block);
- ut_a(!new_page_zip == !root_page_zip);
- ut_a(!new_page_zip
- || page_zip_get_size(new_page_zip)
- == page_zip_get_size(root_page_zip));
-
- btr_page_create(new_block, new_page_zip, index, level, mtr);
-
- /* Set the next node and previous node fields of new page */
- btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
- btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
-
- /* Copy the records from root to the new page one by one. */
-
- if (0
-#ifdef UNIV_ZIP_COPY
- || new_page_zip
-#endif /* UNIV_ZIP_COPY */
- || UNIV_UNLIKELY
- (!page_copy_rec_list_end(new_block, root_block,
- page_get_infimum_rec(root),
- index, mtr))) {
- ut_a(new_page_zip);
-
- /* Copy the page byte for byte. */
- page_zip_copy_recs(new_page_zip, new_page,
- root_page_zip, root, index, mtr);
-
- /* Update the lock table and possible hash index. */
-
- lock_move_rec_list_end(new_block, root_block,
- page_get_infimum_rec(root));
-
- btr_search_move_or_delete_hash_entries(new_block, root_block,
- index);
- }
-
- /* If this is a pessimistic insert which is actually done to
- perform a pessimistic update then we have stored the lock
- information of the record to be inserted on the infimum of the
- root page: we cannot discard the lock structs on the root page */
-
- lock_update_root_raise(new_block, root_block);
-
- /* Create a memory heap where the node pointer is stored */
- heap = mem_heap_create(100);
-
- rec = page_rec_get_next(page_get_infimum_rec(new_page));
- new_page_no = buf_block_get_page_no(new_block);
-
- /* Build the node pointer (= node key and page address) for the
- child */
-
- node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
- level);
- /* The node pointer must be marked as the predefined minimum record,
- as there is no lower alphabetical limit to records in the leftmost
- node of a level: */
- dtuple_set_info_bits(node_ptr,
- dtuple_get_info_bits(node_ptr)
- | REC_INFO_MIN_REC_FLAG);
-
- /* Rebuild the root page to get free space */
- btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
-
- /* Set the next node and previous node fields, although
- they should already have been set. The previous node field
- must be FIL_NULL if root_page_zip != NULL, because the
- REC_INFO_MIN_REC_FLAG (of the first user record) will be
- set if and only if btr_page_get_prev() == FIL_NULL. */
- btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
- btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Insert node pointer to the root */
-
- page_cur_set_before_first(root_block, page_cursor);
-
- node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
- index, 0, mtr);
-
- /* The root page should only contain the node pointer
- to new_page at this point. Thus, the data should fit. */
- ut_a(node_ptr_rec);
-
- /* Free the memory heap */
- mem_heap_free(heap);
-
- /* We play safe and reset the free bits for the new page */
-
-#if 0
- fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
-#endif
-
- if (!dict_index_is_clust(index)) {
- ibuf_reset_free_bits(new_block);
- }
-
- /* Reposition the cursor to the child node */
- page_cur_search(new_block, index, tuple,
- PAGE_CUR_LE, page_cursor);
-
- /* Split the child and insert tuple */
- return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
-}
-
-/*************************************************************//**
-Decides if the page should be split at the convergence point of inserts
-converging to the left.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec) /*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
-{
- page_t* page;
- rec_t* insert_point;
- rec_t* infimum;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
-
- if (page_header_get_ptr(page, PAGE_LAST_INSERT)
- == page_rec_get_next(insert_point)) {
-
- infimum = page_get_infimum_rec(page);
-
- /* If the convergence is in the middle of a page, include also
- the record immediately before the new insert to the upper
- page. Otherwise, we could repeatedly move from page to page
- lots of records smaller than the convergence point. */
-
- if (infimum != insert_point
- && page_rec_get_next(infimum) != insert_point) {
-
- *split_rec = insert_point;
- } else {
- *split_rec = page_rec_get_next(insert_point);
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Decides if the page should be split at the convergence point of inserts
-converging to the right.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec) /*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
-{
- page_t* page;
- rec_t* insert_point;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
-
- /* We use eager heuristics: if the new insert would be right after
- the previous insert on the same page, we assume that there is a
- pattern of sequential inserts here. */
-
- if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT)
- == insert_point)) {
-
- rec_t* next_rec;
-
- next_rec = page_rec_get_next(insert_point);
-
- if (page_rec_is_supremum(next_rec)) {
-split_at_new:
- /* Split at the new record to insert */
- *split_rec = NULL;
- } else {
- rec_t* next_next_rec = page_rec_get_next(next_rec);
- if (page_rec_is_supremum(next_next_rec)) {
-
- goto split_at_new;
- }
-
- /* If there are >= 2 user records up from the insert
- point, split all but 1 off. We want to keep one because
- then sequential inserts can use the adaptive hash
- index, as they can do the necessary checks of the right
- search position just by looking at the records on this
- page. */
-
- *split_rec = next_next_rec;
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Calculates a split record such that the tuple will certainly fit on
-its half-page when the split is performed. We assume in this function
-only that the cursor page has at least one user record.
-@return split record, or NULL if tuple will be the first record on
-upper half-page */
-static
-rec_t*
-btr_page_get_sure_split_rec(
-/*========================*/
- btr_cur_t* cursor, /*!< in: cursor at which insert should be made */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- page_t* page;
- page_zip_des_t* page_zip;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- ulint total_space;
- ulint incl_data;
- rec_t* ins_rec;
- rec_t* rec;
- rec_t* next_rec;
- ulint n;
- mem_heap_t* heap;
- ulint* offsets;
-
- page = btr_cur_get_page(cursor);
-
- insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- page_zip = btr_cur_get_page_zip(cursor);
- if (UNIV_LIKELY_NULL(page_zip)) {
- /* Estimate the free space of an empty compressed page. */
- ulint free_space_zip = page_zip_empty_size(
- cursor->index->n_fields,
- page_zip_get_size(page_zip));
-
- if (UNIV_LIKELY(free_space > (ulint) free_space_zip)) {
- free_space = (ulint) free_space_zip;
- }
- }
-
- /* free_space is now the free space of a created new page */
-
- total_data = page_get_data_size(page) + insert_size;
- total_n_recs = page_get_n_recs(page) + 1;
- ut_ad(total_n_recs >= 2);
- total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
-
- n = 0;
- incl_data = 0;
- ins_rec = btr_cur_get_rec(cursor);
- rec = page_get_infimum_rec(page);
-
- heap = NULL;
- offsets = NULL;
-
- /* We start to include records to the left half, and when the
- space reserved by them exceeds half of total_space, then if
- the included records fit on the left page, they will be put there
- if something was left over also for the right page,
- otherwise the last included record will be the first on the right
- half page */
-
- do {
- /* Decide the next record to include */
- if (rec == ins_rec) {
- rec = NULL; /* NULL denotes that tuple is
- now included */
- } else if (rec == NULL) {
- rec = page_rec_get_next(ins_rec);
- } else {
- rec = page_rec_get_next(rec);
- }
-
- if (rec == NULL) {
- /* Include tuple */
- incl_data += insert_size;
- } else {
- offsets = rec_get_offsets(rec, cursor->index,
- offsets, ULINT_UNDEFINED,
- &heap);
- incl_data += rec_offs_size(offsets);
- }
-
- n++;
- } while (incl_data + page_dir_calc_reserved_space(n)
- < total_space / 2);
-
- if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
- /* The next record will be the first on
- the right half page if it is not the
- supremum record of page */
-
- if (rec == ins_rec) {
- rec = NULL;
-
- goto func_exit;
- } else if (rec == NULL) {
- next_rec = page_rec_get_next(ins_rec);
- } else {
- next_rec = page_rec_get_next(rec);
- }
- ut_ad(next_rec);
- if (!page_rec_is_supremum(next_rec)) {
- rec = next_rec;
- }
- }
-
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(rec);
-}
-
-/*************************************************************//**
-Returns TRUE if the insert fits on the appropriate half-page with the
-chosen split_rec.
-@return TRUE if fits */
-static
-ibool
-btr_page_insert_fits(
-/*=================*/
- btr_cur_t* cursor, /*!< in: cursor at which insert
- should be made */
- const rec_t* split_rec,/*!< in: suggestion for first record
- on upper half-page, or NULL if
- tuple to be inserted should be first */
- const ulint* offsets,/*!< in: rec_get_offsets(
- split_rec, cursor->index) */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mem_heap_t* heap) /*!< in: temporary memory heap */
-{
- page_t* page;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- const rec_t* rec;
- const rec_t* end_rec;
- ulint* offs;
-
- page = btr_cur_get_page(cursor);
-
- ut_ad(!split_rec == !offsets);
- ut_ad(!offsets
- || !page_is_comp(page) == !rec_offs_comp(offsets));
- ut_ad(!offsets
- || rec_offs_validate(split_rec, cursor->index, offsets));
-
- insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- /* free_space is now the free space of a created new page */
-
- total_data = page_get_data_size(page) + insert_size;
- total_n_recs = page_get_n_recs(page) + 1;
-
- /* We determine which records (from rec to end_rec, not including
- end_rec) will end up on the other half page from tuple when it is
- inserted. */
-
- if (split_rec == NULL) {
- rec = page_rec_get_next(page_get_infimum_rec(page));
- end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
-
- } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
- end_rec = split_rec;
- } else {
- rec = split_rec;
- end_rec = page_get_supremum_rec(page);
- }
-
- if (total_data + page_dir_calc_reserved_space(total_n_recs)
- <= free_space) {
-
- /* Ok, there will be enough available space on the
- half page where the tuple is inserted */
-
- return(TRUE);
- }
-
- offs = NULL;
-
- while (rec != end_rec) {
- /* In this loop we calculate the amount of reserved
- space after rec is removed from page. */
-
- offs = rec_get_offsets(rec, cursor->index, offs,
- ULINT_UNDEFINED, &heap);
-
- total_data -= rec_offs_size(offs);
- total_n_recs--;
-
- if (total_data + page_dir_calc_reserved_space(total_n_recs)
- <= free_space) {
-
- /* Ok, there will be enough available space on the
- half page where the tuple is inserted */
-
- return(TRUE);
- }
-
- rec = page_rec_get_next_const(rec);
- }
-
- return(FALSE);
-}
-
-/*******************************************************//**
-Inserts a data tuple to a tree on a non-leaf level. It is assumed
-that mtr holds an x-latch on the tree. */
-UNIV_INTERN
-void
-btr_insert_on_non_leaf_level(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: level, must be > 0 */
- dtuple_t* tuple, /*!< in: the record to be inserted */
- mtr_t* mtr) /*!< in: mtr */
-{
- big_rec_t* dummy_big_rec;
- btr_cur_t cursor;
- ulint err;
- rec_t* rec;
-
- ut_ad(level > 0);
-
- btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE,
- &cursor, 0, mtr);
-
- err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- &cursor, tuple, &rec,
- &dummy_big_rec, 0, NULL, mtr);
- ut_a(err == DB_SUCCESS);
-}
-
-/**************************************************************//**
-Attaches the halves of an index page on the appropriate level in an
-index tree. */
-static
-void
-btr_attach_half_pages(
-/*==================*/
- dict_index_t* index, /*!< in: the index tree */
- buf_block_t* block, /*!< in/out: page to be split */
- rec_t* split_rec, /*!< in: first record on upper
- half page */
- buf_block_t* new_block, /*!< in/out: the new half page */
- ulint direction, /*!< in: FSP_UP or FSP_DOWN */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- ulint prev_page_no;
- ulint next_page_no;
- ulint level;
- page_t* page = buf_block_get_frame(block);
- page_t* lower_page;
- page_t* upper_page;
- ulint lower_page_no;
- ulint upper_page_no;
- page_zip_des_t* lower_page_zip;
- page_zip_des_t* upper_page_zip;
- dtuple_t* node_ptr_upper;
- mem_heap_t* heap;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
-
- /* Create a memory heap where the data tuple is stored */
- heap = mem_heap_create(1024);
-
- /* Based on split direction, decide upper and lower pages */
- if (direction == FSP_DOWN) {
-
- btr_cur_t cursor;
- ulint* offsets;
-
- lower_page = buf_block_get_frame(new_block);
- lower_page_no = buf_block_get_page_no(new_block);
- lower_page_zip = buf_block_get_page_zip(new_block);
- upper_page = buf_block_get_frame(block);
- upper_page_no = buf_block_get_page_no(block);
- upper_page_zip = buf_block_get_page_zip(block);
-
- /* Look up the index for the node pointer to page */
- offsets = btr_page_get_father_block(NULL, heap, index,
- block, mtr, &cursor);
-
- /* Replace the address of the old child node (= page) with the
- address of the new lower half */
-
- btr_node_ptr_set_child_page_no(
- btr_cur_get_rec(&cursor),
- btr_cur_get_page_zip(&cursor),
- offsets, lower_page_no, mtr);
- mem_heap_empty(heap);
- } else {
- lower_page = buf_block_get_frame(block);
- lower_page_no = buf_block_get_page_no(block);
- lower_page_zip = buf_block_get_page_zip(block);
- upper_page = buf_block_get_frame(new_block);
- upper_page_no = buf_block_get_page_no(new_block);
- upper_page_zip = buf_block_get_page_zip(new_block);
- }
-
- /* Get the level of the split pages */
- level = btr_page_get_level(buf_block_get_frame(block), mtr);
- ut_ad(level
- == btr_page_get_level(buf_block_get_frame(new_block), mtr));
-
- /* Build the node pointer (= node key and page address) for the upper
- half */
-
- node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
- upper_page_no, heap, level);
-
- /* Insert it next to the pointer to the lower half. Note that this
- may generate recursion leading to a split on the higher level. */
-
- btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
-
- /* Free the memory heap */
- mem_heap_free(heap);
-
- /* Get the previous and next pages of page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_block_get_space(block);
- zip_size = buf_block_get_zip_size(block);
-
- /* Update page links of the level */
-
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block = btr_block_get(space, zip_size,
- prev_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_block->frame, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_next(buf_block_get_frame(prev_block),
- buf_block_get_page_zip(prev_block),
- lower_page_no, mtr);
- }
-
- if (next_page_no != FIL_NULL) {
- buf_block_t* next_block = btr_block_get(space, zip_size,
- next_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_prev(buf_block_get_frame(next_block),
- buf_block_get_page_zip(next_block),
- upper_page_no, mtr);
- }
-
- btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
- btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
-
- btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
- btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
-}
-
-/*************************************************************//**
-Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
-released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore enough
-free disk space (2 pages) must be guaranteed to be available before
-this function is called.
-
-@return inserted record */
-UNIV_INTERN
-rec_t*
-btr_page_split_and_insert(
-/*======================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
- function returns, the cursor is positioned
- on the predecessor of the inserted record */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
- ulint page_no;
- byte direction;
- ulint hint_page_no;
- buf_block_t* new_block;
- page_t* new_page;
- page_zip_des_t* new_page_zip;
- rec_t* split_rec;
- buf_block_t* left_block;
- buf_block_t* right_block;
- buf_block_t* insert_block;
- page_t* insert_page;
- page_cur_t* page_cursor;
- rec_t* first_rec;
- byte* buf = 0; /* remove warning */
- rec_t* move_limit;
- ibool insert_will_fit;
- ibool insert_left;
- ulint n_iterations = 0;
- rec_t* rec;
- mem_heap_t* heap;
- ulint n_uniq;
- ulint* offsets;
-
- heap = mem_heap_create(1024);
- n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
-func_start:
- mem_heap_empty(heap);
- offsets = NULL;
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- page_zip = buf_block_get_page_zip(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_get_n_recs(page) >= 1);
-
- page_no = buf_block_get_page_no(block);
-
- /* 1. Decide the split record; split_rec == NULL means that the
- tuple to be inserted should be the first record on the upper
- half-page */
-
- if (n_iterations > 0) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
- split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext);
-
- } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
- } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
- direction = FSP_DOWN;
- hint_page_no = page_no - 1;
- } else {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
- if (page_get_n_recs(page) == 1) {
- page_cur_t pcur;
-
- /* There is only one record in the index page
- therefore we can't split the node in the middle
- by default. We need to determine whether the
- new record will be inserted to the left or right. */
-
- /* Read the first (and only) record in the page. */
- page_cur_set_before_first(block, &pcur);
- page_cur_move_to_next(&pcur);
- first_rec = page_cur_get_rec(&pcur);
-
- offsets = rec_get_offsets(
- first_rec, cursor->index, offsets,
- n_uniq, &heap);
-
- /* If the new record is less than the existing record
- the split in the middle will copy the existing
- record to the new node. */
- if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) {
- split_rec = page_get_middle_rec(page);
- } else {
- split_rec = NULL;
- }
- } else {
- split_rec = page_get_middle_rec(page);
- }
- }
-
- /* 2. Allocate a new page to the index */
- new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr);
- new_page = buf_block_get_frame(new_block);
- new_page_zip = buf_block_get_page_zip(new_block);
- btr_page_create(new_block, new_page_zip, cursor->index,
- btr_page_get_level(page, mtr), mtr);
-
- /* 3. Calculate the first record on the upper half-page, and the
- first record (move_limit) on original page which ends up on the
- upper half */
-
- if (split_rec) {
- first_rec = move_limit = split_rec;
-
- offsets = rec_get_offsets(split_rec, cursor->index, offsets,
- n_uniq, &heap);
-
- insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
-
- if (UNIV_UNLIKELY(!insert_left && new_page_zip
- && n_iterations > 0)) {
- /* If a compressed page has already been split,
- avoid further splits by inserting the record
- to an empty page. */
- split_rec = NULL;
- goto insert_right;
- }
- } else {
-insert_right:
- insert_left = FALSE;
- buf = mem_alloc(rec_get_converted_size(cursor->index,
- tuple, n_ext));
-
- first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
- tuple, n_ext);
- move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
- }
-
- /* 4. Do first the modifications in the tree structure */
-
- btr_attach_half_pages(cursor->index, block,
- first_rec, new_block, direction, mtr);
-
- /* If the split is made on the leaf level and the insert will fit
- on the appropriate half-page, we may release the tree x-latch.
- We can then move the records after releasing the tree latch,
- thus reducing the tree latch contention. */
-
- if (split_rec) {
- insert_will_fit = !new_page_zip
- && btr_page_insert_fits(cursor, split_rec,
- offsets, tuple, n_ext, heap);
- } else {
- mem_free(buf);
- insert_will_fit = !new_page_zip
- && btr_page_insert_fits(cursor, NULL,
- NULL, tuple, n_ext, heap);
- }
-
- if (insert_will_fit && page_is_leaf(page)) {
-
- mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK);
- }
-
- /* 5. Move then the records to the new page */
- if (direction == FSP_DOWN) {
- /* fputs("Split left\n", stderr); */
-
- if (0
-#ifdef UNIV_ZIP_COPY
- || page_zip
-#endif /* UNIV_ZIP_COPY */
- || UNIV_UNLIKELY
- (!page_move_rec_list_start(new_block, block, move_limit,
- cursor->index, mtr))) {
- /* For some reason, compressing new_page failed,
- even though it should contain fewer records than
- the original page. Copy the page byte for byte
- and then delete the records from both pages
- as appropriate. Deleting will always succeed. */
- ut_a(new_page_zip);
-
- page_zip_copy_recs(new_page_zip, new_page,
- page_zip, page, cursor->index, mtr);
- page_delete_rec_list_end(move_limit - page + new_page,
- new_block, cursor->index,
- ULINT_UNDEFINED,
- ULINT_UNDEFINED, mtr);
-
- /* Update the lock table and possible hash index. */
-
- lock_move_rec_list_start(
- new_block, block, move_limit,
- new_page + PAGE_NEW_INFIMUM);
-
- btr_search_move_or_delete_hash_entries(
- new_block, block, cursor->index);
-
- /* Delete the records from the source page. */
-
- page_delete_rec_list_start(move_limit, block,
- cursor->index, mtr);
- }
-
- left_block = new_block;
- right_block = block;
-
- lock_update_split_left(right_block, left_block);
- } else {
- /* fputs("Split right\n", stderr); */
-
- if (0
-#ifdef UNIV_ZIP_COPY
- || page_zip
-#endif /* UNIV_ZIP_COPY */
- || UNIV_UNLIKELY
- (!page_move_rec_list_end(new_block, block, move_limit,
- cursor->index, mtr))) {
- /* For some reason, compressing new_page failed,
- even though it should contain fewer records than
- the original page. Copy the page byte for byte
- and then delete the records from both pages
- as appropriate. Deleting will always succeed. */
- ut_a(new_page_zip);
-
- page_zip_copy_recs(new_page_zip, new_page,
- page_zip, page, cursor->index, mtr);
- page_delete_rec_list_start(move_limit - page
- + new_page, new_block,
- cursor->index, mtr);
-
- /* Update the lock table and possible hash index. */
-
- lock_move_rec_list_end(new_block, block, move_limit);
-
- btr_search_move_or_delete_hash_entries(
- new_block, block, cursor->index);
-
- /* Delete the records from the source page. */
-
- page_delete_rec_list_end(move_limit, block,
- cursor->index,
- ULINT_UNDEFINED,
- ULINT_UNDEFINED, mtr);
- }
-
- left_block = block;
- right_block = new_block;
-
- lock_update_split_right(right_block, left_block);
- }
-
-#ifdef UNIV_ZIP_DEBUG
- if (UNIV_LIKELY_NULL(page_zip)) {
- ut_a(page_zip_validate(page_zip, page));
- ut_a(page_zip_validate(new_page_zip, new_page));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- /* At this point, split_rec, move_limit and first_rec may point
- to garbage on the old page. */
-
- /* 6. The split and the tree modification is now completed. Decide the
- page where the tuple should be inserted */
-
- if (insert_left) {
- insert_block = left_block;
- } else {
- insert_block = right_block;
- }
-
- insert_page = buf_block_get_frame(insert_block);
-
- /* 7. Reposition the cursor for insert and try insertion */
- page_cursor = btr_cur_get_page_cur(cursor);
-
- page_cur_search(insert_block, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
-
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
-
-#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* insert_page_zip
- = buf_block_get_page_zip(insert_block);
- ut_a(!insert_page_zip
- || page_zip_validate(insert_page_zip, insert_page));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- if (UNIV_LIKELY(rec != NULL)) {
-
- goto func_exit;
- }
-
- /* 8. If insert did not fit, try page reorganization */
-
- if (UNIV_UNLIKELY
- (!btr_page_reorganize(insert_block, cursor->index, mtr))) {
-
- goto insert_failed;
- }
-
- page_cur_search(insert_block, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
- n_ext, mtr);
-
- if (UNIV_UNLIKELY(rec == NULL)) {
- /* The insert did not fit on the page: loop back to the
- start of the function for a new split */
-insert_failed:
- /* We play safe and reset the free bits for new_page */
- if (!dict_index_is_clust(cursor->index)) {
- ibuf_reset_free_bits(new_block);
- }
-
- /* fprintf(stderr, "Split second round %lu\n",
- page_get_page_no(page)); */
- n_iterations++;
- ut_ad(n_iterations < 2
- || buf_block_get_page_zip(insert_block));
- ut_ad(!insert_will_fit);
-
- goto func_start;
- }
-
-func_exit:
- /* Insert fit on the page: update the free bits for the
- left and right pages in the same mtr */
-
- if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
- ibuf_update_free_bits_for_two_pages_low(
- buf_block_get_zip_size(left_block),
- left_block, right_block, mtr);
- }
-
-#if 0
- fprintf(stderr, "Split and insert done %lu %lu\n",
- buf_block_get_page_no(left_block),
- buf_block_get_page_no(right_block));
-#endif
-
- ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
- ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
-
- mem_heap_free(heap);
- return(rec);
-}
-
-/*************************************************************//**
-Removes a page from the level list of pages. */
-static
-void
-btr_level_list_remove(
-/*==================*/
- ulint space, /*!< in: space where removed */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- page_t* page, /*!< in: page to remove */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint prev_page_no;
- ulint next_page_no;
-
- ut_ad(page && mtr);
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
- ut_ad(space == page_get_space_id(page));
- /* Get the previous and next page numbers of page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
-
- /* Update page links of the level */
-
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block
- = btr_block_get(space, zip_size, prev_page_no,
- RW_X_LATCH, mtr);
- page_t* prev_page
- = buf_block_get_frame(prev_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_next(prev_page,
- buf_block_get_page_zip(prev_block),
- next_page_no, mtr);
- }
-
- if (next_page_no != FIL_NULL) {
- buf_block_t* next_block
- = btr_block_get(space, zip_size, next_page_no,
- RW_X_LATCH, mtr);
- page_t* next_page
- = buf_block_get_frame(next_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_prev(next_page,
- buf_block_get_page_zip(next_block),
- prev_page_no, mtr);
- }
-}
-
-/****************************************************************//**
-Writes the redo log record for setting an index record as the predefined
-minimum record. */
-UNIV_INLINE
-void
-btr_set_min_rec_mark_log(
-/*=====================*/
- rec_t* rec, /*!< in: record */
- byte type, /*!< in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(rec, type, mtr);
-
- /* Write rec offset as a 2-byte ulint */
- mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
-}
-#else /* !UNIV_HOTBACKUP */
-# define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/****************************************************************//**
-Parses the redo log record for setting an index record as the predefined
-minimum record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_parse_set_min_rec_mark(
-/*=======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- rec_t* rec;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- if (page) {
- ut_a(!page_is_comp(page) == !comp);
-
- rec = page + mach_read_from_2(ptr);
-
- btr_set_min_rec_mark(rec, mtr);
- }
-
- return(ptr + 2);
-}
-
-/****************************************************************//**
-Sets a record as the predefined minimum record. */
-UNIV_INTERN
-void
-btr_set_min_rec_mark(
-/*=================*/
- rec_t* rec, /*!< in: record */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint info_bits;
-
- if (UNIV_LIKELY(page_rec_is_comp(rec))) {
- info_bits = rec_get_info_bits(rec, TRUE);
-
- rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
-
- btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
- } else {
- info_bits = rec_get_info_bits(rec, FALSE);
-
- rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
-
- btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_cur_t cursor;
- ibool compressed;
- ulint err;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- /* Delete node pointer on father page */
- btr_page_get_father(index, block, mtr, &cursor);
-
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
- mtr);
- ut_a(err == DB_SUCCESS);
-
- if (!compressed) {
- btr_cur_compress_if_useful(&cursor, mtr);
- }
-}
-
-/*************************************************************//**
-If page is the only on its level, this function moves its records to the
-father page, thus reducing the tree height. */
-static
-void
-btr_lift_page_up(
-/*=============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page which is the only on its level;
- must not be empty: use
- btr_discard_only_page_on_level if the last
- record from the page should be removed */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* father_block;
- page_t* father_page;
- ulint page_level;
- page_zip_des_t* father_page_zip;
- page_t* page = buf_block_get_frame(block);
- ulint root_page_no;
- buf_block_t* blocks[BTR_MAX_LEVELS];
- ulint n_blocks; /*!< last used index in blocks[] */
- ulint i;
-
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- page_level = btr_page_get_level(page, mtr);
- root_page_no = dict_index_get_page(index);
-
- {
- btr_cur_t cursor;
- mem_heap_t* heap = mem_heap_create(100);
- ulint* offsets;
- buf_block_t* b;
-
- offsets = btr_page_get_father_block(NULL, heap, index,
- block, mtr, &cursor);
- father_block = btr_cur_get_block(&cursor);
- father_page_zip = buf_block_get_page_zip(father_block);
- father_page = buf_block_get_frame(father_block);
-
- n_blocks = 0;
-
- /* Store all ancestor pages so we can reset their
- levels later on. We have to do all the searches on
- the tree now because later on, after we've replaced
- the first level, the tree is in an inconsistent state
- and can not be searched. */
- for (b = father_block;
- buf_block_get_page_no(b) != root_page_no; ) {
- ut_a(n_blocks < BTR_MAX_LEVELS);
-
- offsets = btr_page_get_father_block(offsets, heap,
- index, b,
- mtr, &cursor);
-
- blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
- }
-
- mem_heap_free(heap);
- }
-
- btr_search_drop_page_hash_index(block);
-
- /* Make the father empty */
- btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
-
- /* Copy the records to the father page one by one. */
- if (0
-#ifdef UNIV_ZIP_COPY
- || father_page_zip
-#endif /* UNIV_ZIP_COPY */
- || UNIV_UNLIKELY
- (!page_copy_rec_list_end(father_block, block,
- page_get_infimum_rec(page),
- index, mtr))) {
- const page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(father_page_zip);
- ut_a(page_zip);
-
- /* Copy the page byte for byte. */
- page_zip_copy_recs(father_page_zip, father_page,
- page_zip, page, index, mtr);
-
- /* Update the lock table and possible hash index. */
-
- lock_move_rec_list_end(father_block, block,
- page_get_infimum_rec(page));
-
- btr_search_move_or_delete_hash_entries(father_block, block,
- index);
- }
-
- lock_update_copy_and_discard(father_block, block);
-
- /* Go upward to root page, decrementing levels by one. */
- for (i = 0; i < n_blocks; i++, page_level++) {
- page_t* page = buf_block_get_frame(blocks[i]);
- page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
-
- ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
-
- btr_page_set_level(page, page_zip, page_level, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- }
-
- /* Free the file page */
- btr_page_free(index, block, mtr);
-
- /* We play it safe and reset the free bits for the father */
- if (!dict_index_is_clust(index)) {
- ibuf_reset_free_bits(father_block);
- }
- ut_ad(page_validate(father_page, index));
- ut_ad(btr_check_node_ptr(index, father_block, mtr));
-}
-
-/*************************************************************//**
-Tries to merge the page first to the left immediate brother if such a
-brother exists, and the node pointers to the current page and to the brother
-reside on the same page. If the left brother does not satisfy these
-conditions, looks at the right brother. If the page is the only one on that
-level lifts the records of the page to the father page, thus reducing the
-tree height. It is assumed that mtr holds an x-latch on the tree and on the
-page. If cursor is on the leaf level, mtr must also hold x-latches to the
-brothers, if they exist.
-@return TRUE on success */
-UNIV_INTERN
-ibool
-btr_compress(
-/*=========*/
- btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- ulint space;
- ulint zip_size;
- ulint left_page_no;
- ulint right_page_no;
- buf_block_t* merge_block;
- page_t* merge_page;
- page_zip_des_t* merge_page_zip;
- ibool is_left;
- buf_block_t* block;
- page_t* page;
- btr_cur_t father_cursor;
- mem_heap_t* heap;
- ulint* offsets;
- ulint data_size;
- ulint n_recs;
- ulint max_ins_size;
- ulint max_ins_size_reorg;
- ulint level;
-
- block = btr_cur_get_block(cursor);
- page = btr_cur_get_page(cursor);
- index = btr_cur_get_index(cursor);
- ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- level = btr_page_get_level(page, mtr);
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
- left_page_no = btr_page_get_prev(page, mtr);
- right_page_no = btr_page_get_next(page, mtr);
-
-#if 0
- fprintf(stderr, "Merge left page %lu right %lu \n",
- left_page_no, right_page_no);
-#endif
-
- heap = mem_heap_create(100);
- offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
- &father_cursor);
-
- /* Decide the page to which we try to merge and which will inherit
- the locks */
-
- is_left = left_page_no != FIL_NULL;
-
- if (is_left) {
-
- merge_block = btr_block_get(space, zip_size, left_page_no,
- RW_X_LATCH, mtr);
- merge_page = buf_block_get_frame(merge_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
- } else if (right_page_no != FIL_NULL) {
-
- merge_block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, mtr);
- merge_page = buf_block_get_frame(merge_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
- } else {
- /* The page is the only one on the level, lift the records
- to the father */
- btr_lift_page_up(index, block, mtr);
- mem_heap_free(heap);
- return(TRUE);
- }
-
- n_recs = page_get_n_recs(page);
- data_size = page_get_data_size(page);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(merge_page) == page_is_comp(page));
-#endif /* UNIV_BTR_DEBUG */
-
- max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
- merge_page, n_recs);
- if (data_size > max_ins_size_reorg) {
-
- /* No space for merge */
-err_exit:
- /* We play it safe and reset the free bits. */
- if (zip_size
- && page_is_leaf(merge_page)
- && !dict_index_is_clust(index)) {
- ibuf_reset_free_bits(merge_block);
- }
-
- mem_heap_free(heap);
- return(FALSE);
- }
-
- ut_ad(page_validate(merge_page, index));
-
- max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
- if (UNIV_UNLIKELY(data_size > max_ins_size)) {
-
- /* We have to reorganize merge_page */
-
- if (UNIV_UNLIKELY(!btr_page_reorganize(merge_block,
- index, mtr))) {
-
- goto err_exit;
- }
-
- max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
- ut_ad(page_validate(merge_page, index));
- ut_ad(max_ins_size == max_ins_size_reorg);
-
- if (UNIV_UNLIKELY(data_size > max_ins_size)) {
-
- /* Add fault tolerance, though this should
- never happen */
-
- goto err_exit;
- }
- }
-
- merge_page_zip = buf_block_get_page_zip(merge_block);
-#ifdef UNIV_ZIP_DEBUG
- if (UNIV_LIKELY_NULL(merge_page_zip)) {
- const page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(page_zip);
- ut_a(page_zip_validate(merge_page_zip, merge_page));
- ut_a(page_zip_validate(page_zip, page));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- /* Move records to the merge page */
- if (is_left) {
- rec_t* orig_pred = page_copy_rec_list_start(
- merge_block, block, page_get_supremum_rec(page),
- index, mtr);
-
- if (UNIV_UNLIKELY(!orig_pred)) {
- goto err_exit;
- }
-
- btr_search_drop_page_hash_index(block);
-
- /* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, mtr);
-
- btr_node_ptr_delete(index, block, mtr);
- lock_update_merge_left(merge_block, orig_pred, block);
- } else {
- rec_t* orig_succ;
-#ifdef UNIV_BTR_DEBUG
- byte fil_page_prev[4];
-#endif /* UNIV_BTR_DEBUG */
-
- if (UNIV_LIKELY_NULL(merge_page_zip)) {
- /* The function page_zip_compress(), which will be
- invoked by page_copy_rec_list_end() below,
- requires that FIL_PAGE_PREV be FIL_NULL.
- Clear the field, but prepare to restore it. */
-#ifdef UNIV_BTR_DEBUG
- memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
-#endif /* UNIV_BTR_DEBUG */
-#if FIL_NULL != 0xffffffff
-# error "FIL_NULL != 0xffffffff"
-#endif
- memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
- }
-
- orig_succ = page_copy_rec_list_end(merge_block, block,
- page_get_infimum_rec(page),
- cursor->index, mtr);
-
- if (UNIV_UNLIKELY(!orig_succ)) {
- ut_a(merge_page_zip);
-#ifdef UNIV_BTR_DEBUG
- /* FIL_PAGE_PREV was restored from merge_page_zip. */
- ut_a(!memcmp(fil_page_prev,
- merge_page + FIL_PAGE_PREV, 4));
-#endif /* UNIV_BTR_DEBUG */
- goto err_exit;
- }
-
- btr_search_drop_page_hash_index(block);
-
-#ifdef UNIV_BTR_DEBUG
- if (UNIV_LIKELY_NULL(merge_page_zip)) {
- /* Restore FIL_PAGE_PREV in order to avoid an assertion
- failure in btr_level_list_remove(), which will set
- the field again to FIL_NULL. Even though this makes
- merge_page and merge_page_zip inconsistent for a
- split second, it is harmless, because the pages
- are X-latched. */
- memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
- }
-#endif /* UNIV_BTR_DEBUG */
-
- /* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, mtr);
-
- /* Replace the address of the old child node (= page) with the
- address of the merge page to the right */
-
- btr_node_ptr_set_child_page_no(
- btr_cur_get_rec(&father_cursor),
- btr_cur_get_page_zip(&father_cursor),
- offsets, right_page_no, mtr);
- btr_node_ptr_delete(index, merge_block, mtr);
-
- lock_update_merge_right(merge_block, orig_succ, block);
- }
-
- mem_heap_free(heap);
-
- if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
- /* Update the free bits of the B-tree page in the
- insert buffer bitmap. This has to be done in a
- separate mini-transaction that is committed before the
- main mini-transaction. We cannot update the insert
- buffer bitmap in this mini-transaction, because
- btr_compress() can be invoked recursively without
- committing the mini-transaction in between. Since
- insert buffer bitmap pages have a lower rank than
- B-tree pages, we must not access other pages in the
- same mini-transaction after accessing an insert buffer
- bitmap page. */
-
- /* The free bits in the insert buffer bitmap must
- never exceed the free space on a page. It is safe to
- decrement or reset the bits in the bitmap in a
- mini-transaction that is committed before the
- mini-transaction that affects the free space. */
-
- /* It is unsafe to increment the bits in a separately
- committed mini-transaction, because in crash recovery,
- the free bits could momentarily be set too high. */
-
- if (zip_size) {
- /* Because the free bits may be incremented
- and we cannot update the insert buffer bitmap
- in the same mini-transaction, the only safe
- thing we can do here is the pessimistic
- approach: reset the free bits. */
- ibuf_reset_free_bits(merge_block);
- } else {
- /* On uncompressed pages, the free bits will
- never increase here. Thus, it is safe to
- write the bits accurately in a separate
- mini-transaction. */
- ibuf_update_free_bits_if_full(merge_block,
- UNIV_PAGE_SIZE,
- ULINT_UNDEFINED);
- }
- }
-
- ut_ad(page_validate(merge_page, index));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
-#endif /* UNIV_ZIP_DEBUG */
-
- /* Free the file page */
- btr_page_free(index, block, mtr);
-
- ut_ad(btr_check_node_ptr(index, merge_block, mtr));
- return(TRUE);
-}
-
-/*************************************************************//**
-Discards a page that is the only page on its level. This will empty
-the whole B-tree, leaving just an empty root page. This function
-should never be reached, because btr_compress(), which is invoked in
-delete operations, calls btr_lift_page_up() to flatten the B-tree. */
-static
-void
-btr_discard_only_page_on_level(
-/*===========================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page which is the only on its level */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint page_level = 0;
- trx_id_t max_trx_id;
-
- /* Save the PAGE_MAX_TRX_ID from the leaf page. */
- max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
-
- while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
- btr_cur_t cursor;
- buf_block_t* father;
- const page_t* page = buf_block_get_frame(block);
-
- ut_a(page_get_n_recs(page) == 1);
- ut_a(page_level == btr_page_get_level(page, mtr));
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(block);
-
- btr_page_get_father(index, block, mtr, &cursor);
- father = btr_cur_get_block(&cursor);
-
- lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
-
- /* Free the file page */
- btr_page_free(index, block, mtr);
-
- block = father;
- page_level++;
- }
-
- /* block is the root page, which must be empty, except
- for the node pointer to the (now discarded) block(s). */
-
-#ifdef UNIV_BTR_DEBUG
- if (!dict_index_is_ibuf(index)) {
- const page_t* root = buf_block_get_frame(block);
- const ulint space = dict_index_get_space(index);
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
- }
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
-
- if (!dict_index_is_clust(index)) {
- /* We play it safe and reset the free bits for the root */
- ibuf_reset_free_bits(block);
-
- if (page_is_leaf(buf_block_get_frame(block))) {
- ut_a(!ut_dulint_is_zero(max_trx_id));
- page_set_max_trx_id(block,
- buf_block_get_page_zip(block),
- max_trx_id, mtr);
- }
- }
-}
-
-/*************************************************************//**
-Discards a page from a B-tree. This is used to remove the last record from
-a B-tree page: the whole page must be removed at the same time. This cannot
-be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
-void
-btr_discard_page(
-/*=============*/
- btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
- the root page */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- ulint space;
- ulint zip_size;
- ulint left_page_no;
- ulint right_page_no;
- buf_block_t* merge_block;
- page_t* merge_page;
- buf_block_t* block;
- page_t* page;
- rec_t* node_ptr;
-
- block = btr_cur_get_block(cursor);
- index = btr_cur_get_index(cursor);
-
- ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
- /* Decide the page which will inherit the locks */
-
- left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
- right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
-
- if (left_page_no != FIL_NULL) {
- merge_block = btr_block_get(space, zip_size, left_page_no,
- RW_X_LATCH, mtr);
- merge_page = buf_block_get_frame(merge_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
- } else if (right_page_no != FIL_NULL) {
- merge_block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, mtr);
- merge_page = buf_block_get_frame(merge_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
- } else {
- btr_discard_only_page_on_level(index, block, mtr);
-
- return;
- }
-
- page = buf_block_get_frame(block);
- ut_a(page_is_comp(merge_page) == page_is_comp(page));
- btr_search_drop_page_hash_index(block);
-
- if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
-
- /* We have to mark the leftmost node pointer on the right
- side page as the predefined minimum record */
- node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
-
- ut_ad(page_rec_is_user_rec(node_ptr));
-
- /* This will make page_zip_validate() fail on merge_page
- until btr_level_list_remove() completes. This is harmless,
- because everything will take place within a single
- mini-transaction and because writing to the redo log
- is an atomic operation (performed by mtr_commit()). */
- btr_set_min_rec_mark(node_ptr, mtr);
- }
-
- btr_node_ptr_delete(index, block, mtr);
-
- /* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, mtr);
-#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* merge_page_zip
- = buf_block_get_page_zip(merge_block);
- ut_a(!merge_page_zip
- || page_zip_validate(merge_page_zip, merge_page));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- if (left_page_no != FIL_NULL) {
- lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
- block);
- } else {
- lock_update_discard(merge_block,
- lock_get_min_heap_no(merge_block),
- block);
- }
-
- /* Free the file page */
- btr_page_free(index, block, mtr);
-
- ut_ad(btr_check_node_ptr(index, merge_block, mtr));
-}
-
-#ifdef UNIV_BTR_PRINT
-/*************************************************************//**
-Prints size info of a B-tree. */
-UNIV_INTERN
-void
-btr_print_size(
-/*===========*/
- dict_index_t* index) /*!< in: index tree */
-{
- page_t* root;
- fseg_header_t* seg;
- mtr_t mtr;
-
- if (dict_index_is_ibuf(index)) {
- fputs("Sorry, cannot print info of an ibuf tree:"
- " use ibuf functions\n", stderr);
-
- return;
- }
-
- mtr_start(&mtr);
-
- root = btr_root_get(index, &mtr);
-
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
- fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
- fseg_print(seg, &mtr);
-
- if (!(index->type & DICT_UNIVERSAL)) {
-
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
- fseg_print(seg, &mtr);
- }
-
- mtr_commit(&mtr);
-}
-
-/************************************************************//**
-Prints recursively index tree pages. */
-static
-void
-btr_print_recursive(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: index page */
- ulint width, /*!< in: print this many entries from start
- and end */
- mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
- ulint** offsets,/*!< in/out: buffer for rec_get_offsets() */
- mtr_t* mtr) /*!< in: mtr */
-{
- const page_t* page = buf_block_get_frame(block);
- page_cur_t cursor;
- ulint n_recs;
- ulint i = 0;
- mtr_t mtr2;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
- (ulong) btr_page_get_level(page, mtr),
- (ulong) buf_block_get_page_no(block));
-
- page_print(block, index, width, width);
-
- n_recs = page_get_n_recs(page);
-
- page_cur_set_before_first(block, &cursor);
- page_cur_move_to_next(&cursor);
-
- while (!page_cur_is_after_last(&cursor)) {
-
- if (page_is_leaf(page)) {
-
- /* If this is the leaf level, do nothing */
-
- } else if ((i <= width) || (i >= n_recs - width)) {
-
- const rec_t* node_ptr;
-
- mtr_start(&mtr2);
-
- node_ptr = page_cur_get_rec(&cursor);
-
- *offsets = rec_get_offsets(node_ptr, index, *offsets,
- ULINT_UNDEFINED, heap);
- btr_print_recursive(index,
- btr_node_ptr_get_child(node_ptr,
- index,
- *offsets,
- &mtr2),
- width, heap, offsets, &mtr2);
- mtr_commit(&mtr2);
- }
-
- page_cur_move_to_next(&cursor);
- i++;
- }
-}
-
-/**************************************************************//**
-Prints directories and other info of all nodes in the tree. */
-UNIV_INTERN
-void
-btr_print_index(
-/*============*/
- dict_index_t* index, /*!< in: index */
- ulint width) /*!< in: print this many entries from start
- and end */
-{
- mtr_t mtr;
- buf_block_t* root;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- fputs("--------------------------\n"
- "INDEX TREE PRINT\n", stderr);
-
- mtr_start(&mtr);
-
- root = btr_root_block_get(index, &mtr);
-
- btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- mtr_commit(&mtr);
-
- btr_validate_index(index, NULL);
-}
-#endif /* UNIV_BTR_PRINT */
-
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Checks that the node pointer to a page is appropriate.
-@return TRUE */
-UNIV_INTERN
-ibool
-btr_check_node_ptr(
-/*===============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: index page */
- mtr_t* mtr) /*!< in: mtr */
-{
- mem_heap_t* heap;
- dtuple_t* tuple;
- ulint* offsets;
- btr_cur_t cursor;
- page_t* page = buf_block_get_frame(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
-
- return(TRUE);
- }
-
- heap = mem_heap_create(256);
- offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
- &cursor);
-
- if (page_is_leaf(page)) {
-
- goto func_exit;
- }
-
- tuple = dict_index_build_node_ptr(
- index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
- btr_page_get_level(page, mtr));
-
- ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
-func_exit:
- mem_heap_free(heap);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/************************************************************//**
-Display identification information for a record. */
-static
-void
-btr_index_rec_validate_report(
-/*==========================*/
- const page_t* page, /*!< in: index page */
- const rec_t* rec, /*!< in: index record */
- const dict_index_t* index) /*!< in: index */
-{
- fputs("InnoDB: Record in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, ", page %lu, at offset %lu\n",
- page_get_page_no(page), (ulint) page_offset(rec));
-}
-
-/************************************************************//**
-Checks the size and number of fields in a record based on the definition of
-the index.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_index_rec_validate(
-/*===================*/
- const rec_t* rec, /*!< in: index record */
- const dict_index_t* index, /*!< in: index */
- ibool dump_on_error) /*!< in: TRUE if the function
- should print hex dump of record
- and page on error */
-{
- ulint len;
- ulint n;
- ulint i;
- const page_t* page;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- page = page_align(rec);
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- /* The insert buffer index tree can contain records from any
- other index: we cannot check the number of fields or
- their length */
-
- return(TRUE);
- }
-
- if (UNIV_UNLIKELY((ibool)!!page_is_comp(page)
- != dict_table_is_comp(index->table))) {
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
- (ulong) !!page_is_comp(page),
- (ulong) dict_table_is_comp(index->table));
-
- return(FALSE);
- }
-
- n = dict_index_get_n_fields(index);
-
- if (!page_is_comp(page)
- && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) {
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
- (ulong) rec_get_n_fields_old(rec), (ulong) n);
-
- if (dump_on_error) {
- buf_page_print(page, 0);
-
- fputs("InnoDB: corrupt record ", stderr);
- rec_print_old(stderr, rec);
- putc('\n', stderr);
- }
- return(FALSE);
- }
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- for (i = 0; i < n; i++) {
- ulint fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i), page_is_comp(page));
-
- rec_get_nth_field_offs(offsets, i, &len);
-
- /* Note that if fixed_size != 0, it equals the
- length of a fixed-size column in the clustered index.
- A prefix index of the column is of fixed, but different
- length. When fixed_size == 0, prefix_len is the maximum
- length of the prefix index column. */
-
- if ((dict_index_get_nth_field(index, i)->prefix_len == 0
- && len != UNIV_SQL_NULL && fixed_size
- && len != fixed_size)
- || (dict_index_get_nth_field(index, i)->prefix_len > 0
- && len != UNIV_SQL_NULL
- && len
- > dict_index_get_nth_field(index, i)->prefix_len)) {
-
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr,
- "InnoDB: field %lu len is %lu,"
- " should be %lu\n",
- (ulong) i, (ulong) len, (ulong) fixed_size);
-
- if (dump_on_error) {
- buf_page_print(page, 0);
-
- fputs("InnoDB: corrupt record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- }
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(FALSE);
- }
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(TRUE);
-}
-
-/************************************************************//**
-Checks the size and number of fields in records based on the definition of
-the index.
-@return TRUE if ok */
-static
-ibool
-btr_index_page_validate(
-/*====================*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index) /*!< in: index */
-{
- page_cur_t cur;
- ibool ret = TRUE;
-
- page_cur_set_before_first(block, &cur);
- page_cur_move_to_next(&cur);
-
- for (;;) {
- if (page_cur_is_after_last(&cur)) {
-
- break;
- }
-
- if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
-
- return(FALSE);
- }
-
- page_cur_move_to_next(&cur);
- }
-
- return(ret);
-}
-
-/************************************************************//**
-Report an error on one page of an index tree. */
-static
-void
-btr_validate_report1(
-/*=================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: B-tree level */
- const buf_block_t* block) /*!< in: index page */
-{
- fprintf(stderr, "InnoDB: Error in page %lu of ",
- buf_block_get_page_no(block));
- dict_index_name_print(stderr, NULL, index);
- if (level) {
- fprintf(stderr, ", index tree level %lu", level);
- }
- putc('\n', stderr);
-}
-
-/************************************************************//**
-Report an error on two pages of an index tree. */
-static
-void
-btr_validate_report2(
-/*=================*/
- const dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: B-tree level */
- const buf_block_t* block1, /*!< in: first index page */
- const buf_block_t* block2) /*!< in: second index page */
-{
- fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
- buf_block_get_page_no(block1),
- buf_block_get_page_no(block2));
- dict_index_name_print(stderr, NULL, index);
- if (level) {
- fprintf(stderr, ", index tree level %lu", level);
- }
- putc('\n', stderr);
-}
-
-/************************************************************//**
-Validates index tree level.
-@return TRUE if ok */
-static
-ibool
-btr_validate_level(
-/*===============*/
- dict_index_t* index, /*!< in: index tree */
- trx_t* trx, /*!< in: transaction or NULL */
- ulint level) /*!< in: level number */
-{
- ulint space;
- ulint zip_size;
- buf_block_t* block;
- page_t* page;
- buf_block_t* right_block = 0; /* remove warning */
- page_t* right_page = 0; /* remove warning */
- page_t* father_page;
- btr_cur_t node_cur;
- btr_cur_t right_node_cur;
- rec_t* rec;
- ulint right_page_no;
- ulint left_page_no;
- page_cur_t cursor;
- dtuple_t* node_ptr_tuple;
- ibool ret = TRUE;
- mtr_t mtr;
- mem_heap_t* heap = mem_heap_create(256);
- ulint* offsets = NULL;
- ulint* offsets2= NULL;
-#ifdef UNIV_ZIP_DEBUG
- page_zip_des_t* page_zip;
-#endif /* UNIV_ZIP_DEBUG */
-
- mtr_start(&mtr);
-
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- block = btr_root_block_get(index, &mtr);
- page = buf_block_get_frame(block);
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
- while (level != btr_page_get_level(page, &mtr)) {
- const rec_t* node_ptr;
-
- ut_a(space == buf_block_get_space(block));
- ut_a(space == page_get_space_id(page));
-#ifdef UNIV_ZIP_DEBUG
- page_zip = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- ut_a(!page_is_leaf(page));
-
- page_cur_set_before_first(block, &cursor);
- page_cur_move_to_next(&cursor);
-
- node_ptr = page_cur_get_rec(&cursor);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
- block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
- page = buf_block_get_frame(block);
- }
-
- /* Now we are on the desired level. Loop through the pages on that
- level. */
-loop:
- if (trx_is_interrupted(trx)) {
- mtr_commit(&mtr);
- mem_heap_free(heap);
- return(ret);
- }
- mem_heap_empty(heap);
- offsets = offsets2 = NULL;
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
-#ifdef UNIV_ZIP_DEBUG
- page_zip = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- /* Check ordering etc. of records */
-
- if (!page_validate(page, index)) {
- btr_validate_report1(index, level, block);
-
- ret = FALSE;
- } else if (level == 0) {
- /* We are on level 0. Check that the records have the right
- number of fields, and field lengths are right. */
-
- if (!btr_index_page_validate(block, index)) {
-
- ret = FALSE;
- }
- }
-
- ut_a(btr_page_get_level(page, &mtr) == level);
-
- right_page_no = btr_page_get_next(page, &mtr);
- left_page_no = btr_page_get_prev(page, &mtr);
-
- ut_a(page_get_n_recs(page) > 0 || (level == 0
- && page_get_page_no(page)
- == dict_index_get_page(index)));
-
- if (right_page_no != FIL_NULL) {
- const rec_t* right_rec;
- right_block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, &mtr);
- right_page = buf_block_get_frame(right_block);
- if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
- != page_get_page_no(page))) {
- btr_validate_report2(index, level, block, right_block);
- fputs("InnoDB: broken FIL_PAGE_NEXT"
- " or FIL_PAGE_PREV links\n", stderr);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
-
- ret = FALSE;
- }
-
- if (UNIV_UNLIKELY(page_is_comp(right_page)
- != page_is_comp(page))) {
- btr_validate_report2(index, level, block, right_block);
- fputs("InnoDB: 'compact' flag mismatch\n", stderr);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
-
- ret = FALSE;
-
- goto node_ptr_fails;
- }
-
- rec = page_rec_get_prev(page_get_supremum_rec(page));
- right_rec = page_rec_get_next(page_get_infimum_rec(
- right_page));
- offsets = rec_get_offsets(rec, index,
- offsets, ULINT_UNDEFINED, &heap);
- offsets2 = rec_get_offsets(right_rec, index,
- offsets2, ULINT_UNDEFINED, &heap);
- if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec,
- offsets, offsets2,
- index) >= 0)) {
-
- btr_validate_report2(index, level, block, right_block);
-
- fputs("InnoDB: records in wrong order"
- " on adjacent pages\n", stderr);
-
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
-
- fputs("InnoDB: record ", stderr);
- rec = page_rec_get_prev(page_get_supremum_rec(page));
- rec_print(stderr, rec, index);
- putc('\n', stderr);
- fputs("InnoDB: record ", stderr);
- rec = page_rec_get_next(
- page_get_infimum_rec(right_page));
- rec_print(stderr, rec, index);
- putc('\n', stderr);
-
- ret = FALSE;
- }
- }
-
- if (level > 0 && left_page_no == FIL_NULL) {
- ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
- page_rec_get_next(page_get_infimum_rec(page)),
- page_is_comp(page)));
- }
-
- if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
-
- /* Check father node pointers */
-
- rec_t* node_ptr;
-
- offsets = btr_page_get_father_block(offsets, heap, index,
- block, &mtr, &node_cur);
- father_page = btr_cur_get_page(&node_cur);
- node_ptr = btr_cur_get_rec(&node_cur);
-
- btr_cur_position(
- index, page_rec_get_prev(page_get_supremum_rec(page)),
- block, &node_cur);
- offsets = btr_page_get_father_node_ptr(offsets, heap,
- &node_cur, &mtr);
-
- if (UNIV_UNLIKELY(node_ptr != btr_cur_get_rec(&node_cur))
- || UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr,
- offsets)
- != buf_block_get_page_no(block))) {
-
- btr_validate_report1(index, level, block);
-
- fputs("InnoDB: node pointer to the page is wrong\n",
- stderr);
-
- buf_page_print(father_page, 0);
- buf_page_print(page, 0);
-
- fputs("InnoDB: node ptr ", stderr);
- rec_print(stderr, node_ptr, index);
-
- rec = btr_cur_get_rec(&node_cur);
- fprintf(stderr, "\n"
- "InnoDB: node ptr child page n:o %lu\n",
- (ulong) btr_node_ptr_get_child_page_no(
- rec, offsets));
-
- fputs("InnoDB: record on page ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- ret = FALSE;
-
- goto node_ptr_fails;
- }
-
- if (!page_is_leaf(page)) {
- node_ptr_tuple = dict_index_build_node_ptr(
- index,
- page_rec_get_next(page_get_infimum_rec(page)),
- 0, heap, btr_page_get_level(page, &mtr));
-
- if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
- offsets)) {
- const rec_t* first_rec = page_rec_get_next(
- page_get_infimum_rec(page));
-
- btr_validate_report1(index, level, block);
-
- buf_page_print(father_page, 0);
- buf_page_print(page, 0);
-
- fputs("InnoDB: Error: node ptrs differ"
- " on levels > 0\n"
- "InnoDB: node ptr ", stderr);
- rec_print_new(stderr, node_ptr, offsets);
- fputs("InnoDB: first rec ", stderr);
- rec_print(stderr, first_rec, index);
- putc('\n', stderr);
- ret = FALSE;
-
- goto node_ptr_fails;
- }
- }
-
- if (left_page_no == FIL_NULL) {
- ut_a(node_ptr == page_rec_get_next(
- page_get_infimum_rec(father_page)));
- ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
- }
-
- if (right_page_no == FIL_NULL) {
- ut_a(node_ptr == page_rec_get_prev(
- page_get_supremum_rec(father_page)));
- ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
- } else {
- const rec_t* right_node_ptr
- = page_rec_get_next(node_ptr);
-
- offsets = btr_page_get_father_block(
- offsets, heap, index, right_block,
- &mtr, &right_node_cur);
- if (right_node_ptr
- != page_get_supremum_rec(father_page)) {
-
- if (btr_cur_get_rec(&right_node_cur)
- != right_node_ptr) {
- ret = FALSE;
- fputs("InnoDB: node pointer to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- block);
-
- buf_page_print(father_page, 0);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
- }
- } else {
- page_t* right_father_page
- = btr_cur_get_page(&right_node_cur);
-
- if (btr_cur_get_rec(&right_node_cur)
- != page_rec_get_next(
- page_get_infimum_rec(
- right_father_page))) {
- ret = FALSE;
- fputs("InnoDB: node pointer 2 to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- block);
-
- buf_page_print(father_page, 0);
- buf_page_print(right_father_page, 0);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
- }
-
- if (page_get_page_no(right_father_page)
- != btr_page_get_next(father_page, &mtr)) {
-
- ret = FALSE;
- fputs("InnoDB: node pointer 3 to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- block);
-
- buf_page_print(father_page, 0);
- buf_page_print(right_father_page, 0);
- buf_page_print(page, 0);
- buf_page_print(right_page, 0);
- }
- }
- }
- }
-
-node_ptr_fails:
- /* Commit the mini-transaction to release the latch on 'page'.
- Re-acquire the latch on right_page, which will become 'page'
- on the next loop. The page has already been checked. */
- mtr_commit(&mtr);
-
- if (right_page_no != FIL_NULL) {
- mtr_start(&mtr);
-
- block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, &mtr);
- page = buf_block_get_frame(block);
-
- goto loop;
- }
-
- mem_heap_free(heap);
- return(ret);
-}
-
-/**************************************************************//**
-Checks the consistency of an index tree.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_validate_index(
-/*===============*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction or NULL */
-{
- mtr_t mtr;
- page_t* root;
- ulint i;
- ulint n;
-
- mtr_start(&mtr);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- root = btr_root_get(index, &mtr);
- n = btr_page_get_level(root, &mtr);
-
- for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
- if (!btr_validate_level(index, trx, n - i)) {
-
- mtr_commit(&mtr);
-
- return(FALSE);
- }
- }
-
- mtr_commit(&mtr);
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c
deleted file mode 100644
index 46dfb5d1a46..00000000000
--- a/storage/innodb_plugin/btr/btr0cur.c
+++ /dev/null
@@ -1,4847 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file btr/btr0cur.c
-The index tree cursor
-
-All changes that row operations make to a B-tree or the records
-there must go through this module! Undo log records are written here
-of every modify or insert of a clustered index record.
-
- NOTE!!!
-To make sure we do not run out of disk space during a pessimistic
-insert or update, we have to reserve 2 x the height of the index tree
-many pages in the tablespace before we start the operation, because
-if leaf splitting has been started, it is difficult to undo, except
-by crashing the database and doing a roll-forward.
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0cur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
-#include "row0upd.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0log.h"
-#include "page0page.h"
-#include "page0zip.h"
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "buf0lru.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "trx0rec.h"
-#include "trx0roll.h" /* trx_is_recv() */
-#include "que0que.h"
-#include "row0row.h"
-#include "srv0srv.h"
-#include "ibuf0ibuf.h"
-#include "lock0lock.h"
-#include "zlib.h"
-
-#ifdef UNIV_DEBUG
-/** If the following is set to TRUE, this module prints a lot of
-trace information of individual record operations */
-UNIV_INTERN ibool btr_cur_print_record_ops = FALSE;
-#endif /* UNIV_DEBUG */
-
-/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint btr_cur_n_non_sea = 0;
-/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint btr_cur_n_sea = 0;
-/** Old value of btr_cur_n_non_sea. Copied by
-srv_refresh_innodb_monitor_stats(). Referenced by
-srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint btr_cur_n_non_sea_old = 0;
-/** Old value of btr_cur_n_sea. Copied by
-srv_refresh_innodb_monitor_stats(). Referenced by
-srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint btr_cur_n_sea_old = 0;
-
-/** In the optimistic insert, if the insert does not fit, but this much space
-can be released by page reorganize, then it is reorganized */
-#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
-
-/** The structure of a BLOB part header */
-/* @{ */
-/*--------------------------------------*/
-#define BTR_BLOB_HDR_PART_LEN 0 /*!< BLOB part len on this
- page */
-#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /*!< next BLOB part page no,
- FIL_NULL if none */
-/*--------------------------------------*/
-#define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB
- part header, in bytes */
-/* @} */
-#endif /* !UNIV_HOTBACKUP */
-
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-UNIV_INTERN const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Marks all extern fields in a record as owned by the record. This function
-should be called if the delete mark of a record is removed: a not delete
-marked record always owns all its extern fields. */
-static
-void
-btr_cur_unmark_extern_fields(
-/*=========================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: record in a clustered index */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- mtr_t* mtr); /*!< in: mtr, or NULL if not logged */
-/*******************************************************************//**
-Adds path information to the cursor for the current page, for which
-the binary search has been performed. */
-static
-void
-btr_cur_add_path_info(
-/*==================*/
- btr_cur_t* cursor, /*!< in: cursor positioned on a page */
- ulint height, /*!< in: height of the page in tree;
- 0 means leaf node */
- ulint root_height); /*!< in: root node height in tree */
-/***********************************************************//**
-Frees the externally stored fields for a record, if the field is mentioned
-in the update vector. */
-static
-void
-btr_rec_free_updated_extern_fields(
-/*===============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree MUST be
- X-latched */
- rec_t* rec, /*!< in: record */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const upd_t* update, /*!< in: update vector */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr); /*!< in: mini-transaction handle which contains
- an X-latch to record page and to the tree */
-/***********************************************************//**
-Frees the externally stored fields for a record. */
-static
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
- dict_index_t* index, /*!< in: index of the data, the index
- tree MUST be X-latched */
- rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr); /*!< in: mini-transaction handle which contains
- an X-latch to record page and to the index
- tree */
-/***********************************************************//**
-Gets the externally stored size of a record, in units of a database page.
-@return externally stored part, in units of a database page */
-static
-ulint
-btr_rec_get_externally_stored_len(
-/*==============================*/
- rec_t* rec, /*!< in: record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-#endif /* !UNIV_HOTBACKUP */
-
-/******************************************************//**
-The following function is used to set the deleted bit of a record. */
-UNIV_INLINE
-void
-btr_rec_set_deleted_flag(
-/*=====================*/
- rec_t* rec, /*!< in/out: physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
- ulint flag) /*!< in: nonzero if delete marked */
-{
- if (page_rec_is_comp(rec)) {
- rec_set_deleted_flag_new(rec, page_zip, flag);
- } else {
- ut_ad(!page_zip);
- rec_set_deleted_flag_old(rec, flag);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*==================== B-TREE SEARCH =========================*/
-
-/********************************************************************//**
-Latches the leaf page or pages requested. */
-static
-void
-btr_cur_latch_leaves(
-/*=================*/
- page_t* page, /*!< in: leaf page where the search
- converged */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the leaf */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< in: cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint mode;
- ulint left_page_no;
- ulint right_page_no;
- buf_block_t* get_block;
-
- ut_ad(page && mtr);
-
- switch (latch_mode) {
- case BTR_SEARCH_LEAF:
- case BTR_MODIFY_LEAF:
- mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
- get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- return;
- case BTR_MODIFY_TREE:
- /* x-latch also brothers from left to right */
- left_page_no = btr_page_get_prev(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- get_block = btr_block_get(space, zip_size,
- left_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame)
- == page_is_comp(page));
- ut_a(btr_page_get_next(get_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- }
-
- get_block = btr_block_get(space, zip_size, page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
-
- right_page_no = btr_page_get_next(page, mtr);
-
- if (right_page_no != FIL_NULL) {
- get_block = btr_block_get(space, zip_size,
- right_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame)
- == page_is_comp(page));
- ut_a(btr_page_get_prev(get_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- }
-
- return;
-
- case BTR_SEARCH_PREV:
- case BTR_MODIFY_PREV:
- mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
- /* latch also left brother */
- left_page_no = btr_page_get_prev(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- get_block = btr_block_get(space, zip_size,
- left_page_no, mode, mtr);
- cursor->left_block = get_block;
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame)
- == page_is_comp(page));
- ut_a(btr_page_get_next(get_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- }
-
- get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- return;
- }
-
- ut_error;
-}
-
-/********************************************************************//**
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
-
-If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
-search tuple should be performed in the B-tree. InnoDB does an insert
-immediately after the cursor. Thus, the cursor may end up on a user record,
-or on a page infimum record. */
-UNIV_INTERN
-void
-btr_cur_search_to_nth_level(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: the tree level of search */
- const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
- tuple must be set so that it cannot get
- compared to the node ptr page number field! */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- Inserts should always be made using
- PAGE_CUR_LE to search the position! */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
- BTR_INSERT and BTR_ESTIMATE;
- cursor->left_block is used to store a pointer
- to the left neighbor page, in the cases
- BTR_SEARCH_PREV and BTR_MODIFY_PREV;
- NOTE that if has_search_latch
- is != 0, we maybe do not have a latch set
- on the cursor page, we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
- s- or x-latched, but see also above! */
- ulint has_search_latch,/*!< in: info on the latch mode the
- caller currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t* page_cursor;
- page_t* page;
- buf_block_t* guess;
- rec_t* node_ptr;
- ulint page_no;
- ulint space;
- ulint up_match;
- ulint up_bytes;
- ulint low_match;
- ulint low_bytes;
- ulint height;
- ulint savepoint;
- ulint page_mode;
- ulint insert_planned;
- ulint estimate;
- ulint ignore_sec_unique;
- ulint root_height = 0; /* remove warning */
-#ifdef BTR_CUR_ADAPT
- btr_search_t* info;
-#endif
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
- /* Currently, PAGE_CUR_LE is the only search mode used for searches
- ending to upper levels */
-
- ut_ad(level == 0 || mode == PAGE_CUR_LE);
- ut_ad(dict_index_check_search_tuple(index, tuple));
- ut_ad(!dict_index_is_ibuf(index) || ibuf_inside());
- ut_ad(dtuple_check_typed(tuple));
-
-#ifdef UNIV_DEBUG
- cursor->up_match = ULINT_UNDEFINED;
- cursor->low_match = ULINT_UNDEFINED;
-#endif
- insert_planned = latch_mode & BTR_INSERT;
- estimate = latch_mode & BTR_ESTIMATE;
- ignore_sec_unique = latch_mode & BTR_IGNORE_SEC_UNIQUE;
- latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE
- | BTR_IGNORE_SEC_UNIQUE);
-
- ut_ad(!insert_planned || (mode == PAGE_CUR_LE));
-
- cursor->flag = BTR_CUR_BINARY;
- cursor->index = index;
-
-#ifndef BTR_CUR_ADAPT
- guess = NULL;
-#else
- info = btr_search_get_info(index);
-
- guess = info->root_guess;
-
-#ifdef BTR_CUR_HASH_ADAPT
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_searches++;
-#endif
- if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
- && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
- && !estimate
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- && mode != PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- /* If !has_search_latch, we do a dirty read of
- btr_search_enabled below, and btr_search_guess_on_hash()
- will have to check it again. */
- && UNIV_LIKELY(btr_search_enabled)
- && btr_search_guess_on_hash(index, info, tuple, mode,
- latch_mode, cursor,
- has_search_latch, mtr)) {
-
- /* Search using the hash index succeeded */
-
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- btr_cur_n_sea++;
-
- return;
- }
-#endif /* BTR_CUR_HASH_ADAPT */
-#endif /* BTR_CUR_ADAPT */
- btr_cur_n_non_sea++;
-
- /* If the hash search did not succeed, do binary search down the
- tree */
-
- if (has_search_latch) {
- /* Release possible search latch to obey latching order */
- rw_lock_s_unlock(&btr_search_latch);
- }
-
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched leaf node(s) */
-
- savepoint = mtr_set_savepoint(mtr);
-
- if (latch_mode == BTR_MODIFY_TREE) {
- mtr_x_lock(dict_index_get_lock(index), mtr);
-
- } else if (latch_mode == BTR_CONT_MODIFY_TREE) {
- /* Do nothing */
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- } else {
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- space = dict_index_get_space(index);
- page_no = dict_index_get_page(index);
-
- up_match = 0;
- up_bytes = 0;
- low_match = 0;
- low_bytes = 0;
-
- height = ULINT_UNDEFINED;
-
- /* We use these modified search modes on non-leaf levels of the
- B-tree. These let us end up in the right B-tree leaf. In that leaf
- we use the original search mode. */
-
- switch (mode) {
- case PAGE_CUR_GE:
- page_mode = PAGE_CUR_L;
- break;
- case PAGE_CUR_G:
- page_mode = PAGE_CUR_LE;
- break;
- default:
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || mode == PAGE_CUR_LE_OR_EXTENDS);
-#else /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- page_mode = mode;
- break;
- }
-
- /* Loop and search until we arrive at the desired level */
-
- for (;;) {
- ulint zip_size;
- buf_block_t* block;
- ulint rw_latch;
- ulint buf_mode;
-
- zip_size = dict_table_zip_size(index->table);
- rw_latch = RW_NO_LATCH;
- buf_mode = BUF_GET;
-
- if (height == 0 && latch_mode <= BTR_MODIFY_LEAF) {
-
- rw_latch = latch_mode;
-
- if (insert_planned
- && ibuf_should_try(index, ignore_sec_unique)) {
-
- /* Try insert to the insert buffer if the
- page is not in the buffer pool */
-
- buf_mode = BUF_GET_IF_IN_POOL;
- }
- }
-
-retry_page_get:
- block = buf_page_get_gen(space, zip_size, page_no,
- rw_latch, guess, buf_mode,
- __FILE__, __LINE__, mtr);
- if (block == NULL) {
- /* This must be a search to perform an insert;
- try insert to the insert buffer */
-
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
- ut_ad(insert_planned);
- ut_ad(cursor->thr);
-
- if (ibuf_insert(tuple, index, space, zip_size,
- page_no, cursor->thr)) {
- /* Insertion to the insert buffer succeeded */
- cursor->flag = BTR_CUR_INSERT_TO_IBUF;
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- goto func_exit;
- }
-
- /* Insert to the insert buffer did not succeed:
- retry page get */
-
- buf_mode = BUF_GET;
-
- goto retry_page_get;
- }
-
- page = buf_block_get_frame(block);
-
- block->check_index_page_at_flush = TRUE;
-
- if (rw_latch != RW_NO_LATCH) {
-#ifdef UNIV_ZIP_DEBUG
- const page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE);
- }
-
- ut_ad(0 == ut_dulint_cmp(index->id,
- btr_page_get_index_id(page)));
-
- if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- root_height = height;
- cursor->tree_height = root_height + 1;
-#ifdef BTR_CUR_ADAPT
- if (block != guess) {
- info->root_guess = block;
- }
-#endif
- }
-
- if (height == 0) {
- if (rw_latch == RW_NO_LATCH) {
-
- btr_cur_latch_leaves(page, space, zip_size,
- page_no, latch_mode,
- cursor, mtr);
- }
-
- if ((latch_mode != BTR_MODIFY_TREE)
- && (latch_mode != BTR_CONT_MODIFY_TREE)) {
-
- /* Release the tree s-latch */
-
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
- }
-
- page_mode = mode;
- }
-
- page_cur_search_with_match(block, index, tuple, page_mode,
- &up_match, &up_bytes,
- &low_match, &low_bytes,
- page_cursor);
-
- if (estimate) {
- btr_cur_add_path_info(cursor, height, root_height);
- }
-
- /* If this is the desired level, leave the loop */
-
- ut_ad(height == btr_page_get_level(
- page_cur_get_page(page_cursor), mtr));
-
- if (level == height) {
-
- if (level > 0) {
- /* x-latch the page */
- page = btr_page_get(space, zip_size,
- page_no, RW_X_LATCH, mtr);
- ut_a((ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- }
-
- break;
- }
-
- ut_ad(height > 0);
-
- height--;
-
- guess = NULL;
-
- node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (level == 0) {
- cursor->low_match = low_match;
- cursor->low_bytes = low_bytes;
- cursor->up_match = up_match;
- cursor->up_bytes = up_bytes;
-
-#ifdef BTR_CUR_ADAPT
- /* We do a dirty read of btr_search_enabled here. We
- will properly check btr_search_enabled again in
- btr_search_build_page_hash_index() before building a
- page hash index, while holding btr_search_latch. */
- if (UNIV_LIKELY(btr_search_enabled)) {
-
- btr_search_info_update(index, cursor);
- }
-#endif
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- }
-
-func_exit:
- if (has_search_latch) {
-
- rw_lock_s_lock(&btr_search_latch);
- }
-}
-
-/*****************************************************************//**
-Opens a cursor at either end of an index. */
-UNIV_INTERN
-void
-btr_cur_open_at_index_side(
-/*=======================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: latch mode */
- btr_cur_t* cursor, /*!< in: cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t* page_cursor;
- ulint page_no;
- ulint space;
- ulint zip_size;
- ulint height;
- ulint root_height = 0; /* remove warning */
- rec_t* node_ptr;
- ulint estimate;
- ulint savepoint;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- estimate = latch_mode & BTR_ESTIMATE;
- latch_mode = latch_mode & ~BTR_ESTIMATE;
-
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched the leaf node */
-
- savepoint = mtr_set_savepoint(mtr);
-
- if (latch_mode == BTR_MODIFY_TREE) {
- mtr_x_lock(dict_index_get_lock(index), mtr);
- } else {
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
- cursor->index = index;
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- page_no = dict_index_get_page(index);
-
- height = ULINT_UNDEFINED;
-
- for (;;) {
- buf_block_t* block;
- page_t* page;
- block = buf_page_get_gen(space, zip_size, page_no,
- RW_NO_LATCH, NULL, BUF_GET,
- __FILE__, __LINE__, mtr);
- page = buf_block_get_frame(block);
- ut_ad(0 == ut_dulint_cmp(index->id,
- btr_page_get_index_id(page)));
-
- block->check_index_page_at_flush = TRUE;
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- root_height = height;
- }
-
- if (height == 0) {
- btr_cur_latch_leaves(page, space, zip_size, page_no,
- latch_mode, cursor, mtr);
-
- /* In versions <= 3.23.52 we had forgotten to
- release the tree latch here. If in an index scan
- we had to scan far to find a record visible to the
- current transaction, that could starve others
- waiting for the tree latch. */
-
- if ((latch_mode != BTR_MODIFY_TREE)
- && (latch_mode != BTR_CONT_MODIFY_TREE)) {
-
- /* Release the tree s-latch */
-
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
- }
- }
-
- if (from_left) {
- page_cur_set_before_first(block, page_cursor);
- } else {
- page_cur_set_after_last(block, page_cursor);
- }
-
- if (height == 0) {
- if (estimate) {
- btr_cur_add_path_info(cursor, height,
- root_height);
- }
-
- break;
- }
-
- ut_ad(height > 0);
-
- if (from_left) {
- page_cur_move_to_next(page_cursor);
- } else {
- page_cur_move_to_prev(page_cursor);
- }
-
- if (estimate) {
- btr_cur_add_path_info(cursor, height, root_height);
- }
-
- height--;
-
- node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
-btr_cur_open_at_rnd_pos(
-/*====================*/
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< in/out: B-tree cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t* page_cursor;
- ulint page_no;
- ulint space;
- ulint zip_size;
- ulint height;
- rec_t* node_ptr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- if (latch_mode == BTR_MODIFY_TREE) {
- mtr_x_lock(dict_index_get_lock(index), mtr);
- } else {
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
- cursor->index = index;
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- page_no = dict_index_get_page(index);
-
- height = ULINT_UNDEFINED;
-
- for (;;) {
- buf_block_t* block;
- page_t* page;
-
- block = buf_page_get_gen(space, zip_size, page_no,
- RW_NO_LATCH, NULL, BUF_GET,
- __FILE__, __LINE__, mtr);
- page = buf_block_get_frame(block);
- ut_ad(0 == ut_dulint_cmp(index->id,
- btr_page_get_index_id(page)));
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- }
-
- if (height == 0) {
- btr_cur_latch_leaves(page, space, zip_size, page_no,
- latch_mode, cursor, mtr);
- }
-
- page_cur_open_on_rnd_user_rec(block, page_cursor);
-
- if (height == 0) {
-
- break;
- }
-
- ut_ad(height > 0);
-
- height--;
-
- node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/*==================== B-TREE INSERT =========================*/
-
-/*************************************************************//**
-Inserts a record if there is enough space, or if enough space can
-be freed by reorganizing. Differs from btr_cur_optimistic_insert because
-no heuristics is applied to whether it pays to use CPU time for
-reorganizing the page or not.
-@return pointer to inserted record if succeed, else NULL */
-static
-rec_t*
-btr_cur_insert_if_possible(
-/*=======================*/
- btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
- cursor stays valid */
- const dtuple_t* tuple, /*!< in: tuple to insert; the size info need not
- have been stored to tuple */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t* page_cursor;
- buf_block_t* block;
- rec_t* rec;
-
- ut_ad(dtuple_check_typed(tuple));
-
- block = btr_cur_get_block(cursor);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Now, try the insert */
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
-
- if (UNIV_UNLIKELY(!rec)) {
- /* If record did not fit, reorganize */
-
- if (btr_page_reorganize(block, cursor->index, mtr)) {
-
- page_cur_search(block, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
-
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
- }
- }
-
- return(rec);
-}
-
-/*************************************************************//**
-For an insert, checks the locks and does the undo logging if desired.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INLINE
-ulint
-btr_cur_ins_lock_and_undo(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags: if
- not zero, the parameters index and thr
- should be specified */
- btr_cur_t* cursor, /*!< in: cursor on page after which to insert */
- const dtuple_t* entry, /*!< in: entry to insert */
- que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit)/*!< out: TRUE if the inserted new record maybe
- should inherit LOCK_GAP type locks from the
- successor record */
-{
- dict_index_t* index;
- ulint err;
- rec_t* rec;
- roll_ptr_t roll_ptr;
-
- /* Check if we have to wait for a lock: enqueue an explicit lock
- request if yes */
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
-
- err = lock_rec_insert_check_and_lock(flags, rec,
- btr_cur_get_block(cursor),
- index, thr, mtr, inherit);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (dict_index_is_clust(index) && !dict_index_is_ibuf(index)) {
-
- err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
- thr, index, entry,
- NULL, 0, NULL,
- &roll_ptr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- /* Now we can fill in the roll ptr field in entry */
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
-
- row_upd_index_entry_sys_field(entry, index,
- DATA_ROLL_PTR, roll_ptr);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Report information about a transaction. */
-static
-void
-btr_cur_trx_report(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- const dict_index_t* index, /*!< in: index */
- const char* op) /*!< in: operation */
-{
- fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ",
- TRX_ID_PREP_PRINTF(trx->id));
- fputs(op, stderr);
- dict_index_name_print(stderr, trx, index);
- putc('\n', stderr);
-}
-#endif /* UNIV_DEBUG */
-
-/*************************************************************//**
-Tries to perform an insert to a page in an index tree, next to cursor.
-It is assumed that mtr holds an x-latch on the page. The operation does
-not succeed if there is too little space on the page. If there is just
-one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
-ulint
-btr_cur_optimistic_insert(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags: if not
- zero, the parameters index and thr should be
- specified */
- btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
- cursor stays valid */
- dtuple_t* entry, /*!< in/out: entry to insert */
- rec_t** rec, /*!< out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller, or
- NULL */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr) /*!< in: mtr; if this function returns
- DB_SUCCESS on a leaf page of a secondary
- index in a compressed tablespace, the
- mtr must be committed before latching
- any further pages */
-{
- big_rec_t* big_rec_vec = NULL;
- dict_index_t* index;
- page_cur_t* page_cursor;
- buf_block_t* block;
- page_t* page;
- ulint max_size;
- rec_t* dummy_rec;
- ibool leaf;
- ibool reorg;
- ibool inherit;
- ulint zip_size;
- ulint rec_size;
- mem_heap_t* heap = NULL;
- ulint err;
-
- *big_rec = NULL;
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- index = cursor->index;
- zip_size = buf_block_get_zip_size(block);
-#ifdef UNIV_DEBUG_VALGRIND
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
- UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- if (!dtuple_check_typed_no_assert(entry)) {
- fputs("InnoDB: Error in a tuple to insert into ", stderr);
- dict_index_name_print(stderr, thr_get_trx(thr), index);
- }
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
- dtuple_print(stderr, entry);
- }
-#endif /* UNIV_DEBUG */
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- max_size = page_get_max_insert_size_after_reorganize(page, 1);
- leaf = page_is_leaf(page);
-
- /* Calculate the record size when entry is converted to a record */
- rec_size = rec_get_converted_size(index, entry, n_ext);
-
- if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
- dtuple_get_n_fields(entry), zip_size)) {
-
- /* The record is so big that we have to store some fields
- externally on separate database pages */
- big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
-
- if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
-
- return(DB_TOO_BIG_RECORD);
- }
-
- rec_size = rec_get_converted_size(index, entry, n_ext);
- }
-
- if (UNIV_UNLIKELY(zip_size)) {
- /* Estimate the free space of an empty compressed page.
- Subtract one byte for the encoded heap_no in the
- modification log. */
- ulint free_space_zip = page_zip_empty_size(
- cursor->index->n_fields, zip_size) - 1;
- ulint n_uniq = dict_index_get_n_unique_in_tree(index);
-
- ut_ad(dict_table_is_comp(index->table));
-
- /* There should be enough room for two node pointer
- records on an empty non-leaf page. This prevents
- infinite page splits. */
-
- if (UNIV_LIKELY(entry->n_fields >= n_uniq)
- && UNIV_UNLIKELY(REC_NODE_PTR_SIZE
- + rec_get_converted_size_comp_prefix(
- index, entry->fields, n_uniq,
- NULL)
- /* On a compressed page, there is
- a two-byte entry in the dense
- page directory for every record.
- But there is no record header. */
- - (REC_N_NEW_EXTRA_BYTES - 2)
- > free_space_zip / 2)) {
-
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(
- index, entry, big_rec_vec);
- }
-
- if (heap) {
- mem_heap_free(heap);
- }
-
- return(DB_TOO_BIG_RECORD);
- }
- }
-
- /* If there have been many consecutive inserts, and we are on the leaf
- level, check if we have to split the page to reserve enough free space
- for future updates of records. */
-
- if (dict_index_is_clust(index)
- && (page_get_n_recs(page) >= 2)
- && UNIV_LIKELY(leaf)
- && (dict_index_get_space_reserve() + rec_size > max_size)
- && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
- || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
-fail:
- err = DB_FAIL;
-fail_err:
-
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(err);
- }
-
- if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
- || max_size < rec_size)
- && UNIV_LIKELY(page_get_n_recs(page) > 1)
- && page_get_max_insert_size(page, 1) < rec_size) {
-
- goto fail;
- }
-
- /* Check locks and write to the undo log, if specified */
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
- thr, mtr, &inherit);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
- goto fail_err;
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Now, try the insert */
-
- {
- const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
- *rec = page_cur_tuple_insert(page_cursor, entry, index,
- n_ext, mtr);
- reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
-
- if (UNIV_UNLIKELY(reorg)) {
- ut_a(zip_size);
- ut_a(*rec);
- }
- }
-
- if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
- /* If the record did not fit, reorganize */
- if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
- ut_a(zip_size);
-
- goto fail;
- }
-
- ut_ad(zip_size
- || page_get_max_insert_size(page, 1) == max_size);
-
- reorg = TRUE;
-
- page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
-
- *rec = page_cur_tuple_insert(page_cursor, entry, index,
- n_ext, mtr);
-
- if (UNIV_UNLIKELY(!*rec)) {
- if (UNIV_LIKELY(zip_size != 0)) {
-
- goto fail;
- }
-
- fputs("InnoDB: Error: cannot insert tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs(" into ", stderr);
- dict_index_name_print(stderr, thr_get_trx(thr), index);
- fprintf(stderr, "\nInnoDB: max insert size %lu\n",
- (ulong) max_size);
- ut_error;
- }
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
-#ifdef BTR_CUR_HASH_ADAPT
- if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
- btr_search_update_hash_node_on_insert(cursor);
- } else {
- btr_search_update_hash_on_insert(cursor);
- }
-#endif
-
- if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
-
- lock_update_insert(block, *rec);
- }
-
-#if 0
- fprintf(stderr, "Insert into page %lu, max ins size %lu,"
- " rec %lu ind type %lu\n",
- buf_block_get_page_no(block), max_size,
- rec_size + PAGE_DIR_SLOT_SIZE, index->type);
-#endif
- if (leaf && !dict_index_is_clust(index)) {
- /* Update the free bits of the B-tree page in the
- insert buffer bitmap. */
-
- /* The free bits in the insert buffer bitmap must
- never exceed the free space on a page. It is safe to
- decrement or reset the bits in the bitmap in a
- mini-transaction that is committed before the
- mini-transaction that affects the free space. */
-
- /* It is unsafe to increment the bits in a separately
- committed mini-transaction, because in crash recovery,
- the free bits could momentarily be set too high. */
-
- if (zip_size) {
- /* Update the bits in the same mini-transaction. */
- ibuf_update_free_bits_zip(block, mtr);
- } else {
- /* Decrement the bits in a separate
- mini-transaction. */
- ibuf_update_free_bits_if_full(
- block, max_size,
- rec_size + PAGE_DIR_SLOT_SIZE);
- }
- }
-
- *big_rec = big_rec_vec;
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************//**
-Performs an insert on a page of an index tree. It is assumed that mtr
-holds an x-latch on the tree and on the cursor page. If the insert is
-made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-btr_cur_pessimistic_insert(
-/*=======================*/
- ulint flags, /*!< in: undo logging and locking flags: if not
- zero, the parameter thr should be
- specified; if no undo logging is specified,
- then the caller must have reserved enough
- free extents in the file space so that the
- insertion will certainly succeed */
- btr_cur_t* cursor, /*!< in: cursor after which to insert;
- cursor stays valid */
- dtuple_t* entry, /*!< in/out: entry to insert */
- rec_t** rec, /*!< out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller, or
- NULL */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index = cursor->index;
- ulint zip_size = dict_table_zip_size(index->table);
- big_rec_t* big_rec_vec = NULL;
- mem_heap_t* heap = NULL;
- ulint err;
- ibool dummy_inh;
- ibool success;
- ulint n_extents = 0;
- ulint n_reserved;
-
- ut_ad(dtuple_check_typed(entry));
-
- *big_rec = NULL;
-
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
-
- /* Try first an optimistic insert; reset the cursor flag: we do not
- assume anything of how it was positioned */
-
- cursor->flag = BTR_CUR_BINARY;
-
- err = btr_cur_optimistic_insert(flags, cursor, entry, rec,
- big_rec, n_ext, thr, mtr);
- if (err != DB_FAIL) {
-
- return(err);
- }
-
- /* Retry with a pessimistic insert. Check locks and write to undo log,
- if specified */
-
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
- thr, mtr, &dummy_inh);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
- /* First reserve enough free space for the file segments
- of the index tree, so that the insert will not fail because
- of lack of space */
-
- n_extents = cursor->tree_height / 16 + 3;
-
- success = fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents, FSP_NORMAL, mtr);
- if (!success) {
- return(DB_OUT_OF_FILE_SPACE);
- }
- }
-
- if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
- dict_table_is_comp(index->table),
- dict_index_get_n_fields(index),
- zip_size)) {
- /* The record is so big that we have to store some fields
- externally on separate database pages */
-
- if (UNIV_LIKELY_NULL(big_rec_vec)) {
- /* This should never happen, but we handle
- the situation in a robust manner. */
- ut_ad(0);
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
-
- big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
-
- if (big_rec_vec == NULL) {
-
- if (n_extents > 0) {
- fil_space_release_free_extents(index->space,
- n_reserved);
- }
- return(DB_TOO_BIG_RECORD);
- }
- }
-
- if (dict_index_get_page(index)
- == buf_block_get_page_no(btr_cur_get_block(cursor))) {
-
- /* The page is the root page */
- *rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
- } else {
- *rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
-
-#ifdef BTR_CUR_ADAPT
- btr_search_update_hash_on_insert(cursor);
-#endif
- if (!(flags & BTR_NO_LOCKING_FLAG)) {
-
- lock_update_insert(btr_cur_get_block(cursor), *rec);
- }
-
- if (n_extents > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- *big_rec = big_rec_vec;
-
- return(DB_SUCCESS);
-}
-
-/*==================== B-TREE UPDATE =========================*/
-
-/*************************************************************//**
-For an update, checks the locks and does the undo logging.
-@return DB_SUCCESS, DB_WAIT_LOCK, or error number */
-UNIV_INLINE
-ulint
-btr_cur_upd_lock_and_undo(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on record to update */
- const upd_t* update, /*!< in: update vector */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- roll_ptr_t* roll_ptr)/*!< out: roll pointer */
-{
- dict_index_t* index;
- rec_t* rec;
- ulint err;
-
- ut_ad(cursor && update && thr && roll_ptr);
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
-
- if (!dict_index_is_clust(index)) {
- /* We do undo logging only when we update a clustered index
- record */
- return(lock_sec_rec_modify_check_and_lock(
- flags, btr_cur_get_block(cursor), rec,
- index, thr, mtr));
- }
-
- /* Check if we have to wait for a lock: enqueue an explicit lock
- request if yes */
-
- err = DB_SUCCESS;
-
- if (!(flags & BTR_NO_LOCKING_FLAG)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- err = lock_clust_rec_modify_check_and_lock(
- flags, btr_cur_get_block(cursor), rec, index,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap), thr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- /* Append the info about the update in the undo log */
-
- err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
- index, NULL, update,
- cmpl_info, rec, roll_ptr);
- return(err);
-}
-
-/***********************************************************//**
-Writes a redo log record of updating a record in-place. */
-UNIV_INLINE
-void
-btr_cur_update_in_place_log(
-/*========================*/
- ulint flags, /*!< in: flags */
- rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index where cursor positioned */
- const upd_t* update, /*!< in: update vector */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr, /*!< in: roll ptr */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- page_t* page = page_align(rec);
- ut_ad(flags < 256);
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
- ? MLOG_COMP_REC_UPDATE_IN_PLACE
- : MLOG_REC_UPDATE_IN_PLACE,
- 1 + DATA_ROLL_PTR_LEN + 14 + 2
- + MLOG_BUF_MARGIN);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery */
- return;
- }
-
- /* The code below assumes index is a clustered index: change index to
- the clustered index if we are updating a secondary index record (or we
- could as well skip writing the sys col values to the log in this case
- because they are not needed for a secondary index record update) */
-
- index = dict_table_get_first_index(index->table);
-
- mach_write_to_1(log_ptr, flags);
- log_ptr++;
-
- log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
- mtr);
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- row_upd_index_write_log(update, log_ptr, mtr);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of updating a record in-place.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_update_in_place(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index) /*!< in: index corresponding to page */
-{
- ulint flags;
- rec_t* rec;
- upd_t* update;
- ulint pos;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- ulint rec_offset;
- mem_heap_t* heap;
- ulint* offsets;
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- flags = mach_read_from_1(ptr);
- ptr++;
-
- ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- rec_offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(rec_offset <= UNIV_PAGE_SIZE);
-
- heap = mem_heap_create(256);
-
- ptr = row_upd_index_parse(ptr, end_ptr, heap, &update);
-
- if (!ptr || !page) {
-
- goto func_exit;
- }
-
- ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
- rec = page + rec_offset;
-
- /* We do not need to reserve btr_search_latch, as the page is only
- being recovered, and there cannot be a hash index to it. */
-
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
- pos, trx_id, roll_ptr);
- }
-
- row_upd_rec_in_place(rec, index, offsets, update, page_zip);
-
-func_exit:
- mem_heap_free(heap);
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-See if there is enough place in the page modification log to log
-an update-in-place.
-@return TRUE if enough place */
-static
-ibool
-btr_cur_update_alloc_zip(
-/*=====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- buf_block_t* block, /*!< in/out: buffer page */
- dict_index_t* index, /*!< in: the index corresponding to the block */
- ulint length, /*!< in: size needed */
- ibool create, /*!< in: TRUE=delete-and-insert,
- FALSE=update-in-place */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- ut_a(page_zip == buf_block_get_page_zip(block));
- ut_ad(page_zip);
- ut_ad(!dict_index_is_ibuf(index));
-
- if (page_zip_available(page_zip, dict_index_is_clust(index),
- length, create)) {
- return(TRUE);
- }
-
- if (!page_zip->m_nonempty) {
- /* The page has been freshly compressed, so
- recompressing it will not help. */
- return(FALSE);
- }
-
- if (!page_zip_compress(page_zip, buf_block_get_frame(block),
- index, mtr)) {
- /* Unable to compress the page */
- return(FALSE);
- }
-
- /* After recompressing a page, we must make sure that the free
- bits in the insert buffer bitmap will not exceed the free
- space on the page. Because this function will not attempt
- recompression unless page_zip_available() fails above, it is
- safe to reset the free bits if page_zip_available() fails
- again, below. The free bits can safely be reset in a separate
- mini-transaction. If page_zip_available() succeeds below, we
- can be sure that the page_zip_compress() above did not reduce
- the free space available on the page. */
-
- if (!page_zip_available(page_zip, dict_index_is_clust(index),
- length, create)) {
- /* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index)
- && page_is_leaf(buf_block_get_frame(block))) {
- ibuf_reset_free_bits(block);
- }
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************//**
-Updates a record when the update causes no size changes in its fields.
-We assume here that the ordering fields of the record do not change.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-btr_cur_update_in_place(
-/*====================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- const upd_t* update, /*!< in: update vector */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
-{
- dict_index_t* index;
- buf_block_t* block;
- page_zip_des_t* page_zip;
- ulint err;
- rec_t* rec;
- roll_ptr_t roll_ptr = ut_dulint_zero;
- trx_t* trx;
- ulint was_delete_marked;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
-
- trx = thr_get_trx(thr);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(trx, index, "update ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
-
- block = btr_cur_get_block(cursor);
- page_zip = buf_block_get_page_zip(block);
-
- /* Check that enough space is available on the compressed page. */
- if (UNIV_LIKELY_NULL(page_zip)
- && !btr_cur_update_alloc_zip(page_zip, block, index,
- rec_offs_size(offsets), FALSE, mtr)) {
- return(DB_ZIP_OVERFLOW);
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
- thr, mtr, &roll_ptr);
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
- }
-
- if (block->is_hashed) {
- /* The function row_upd_changes_ord_field_binary works only
- if the update vector was built for a clustered index, we must
- NOT call it if index is secondary */
-
- if (!dict_index_is_clust(index)
- || row_upd_changes_ord_field_binary(NULL, index, update)) {
-
- /* Remove possible hash index pointer to this record */
- btr_search_update_hash_on_delete(cursor);
- }
-
- rw_lock_x_lock(&btr_search_latch);
- }
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, NULL,
- index, offsets, trx, roll_ptr);
- }
-
- was_delete_marked = rec_get_deleted_flag(
- rec, page_is_comp(buf_block_get_frame(block)));
-
- row_upd_rec_in_place(rec, index, offsets, update, page_zip);
-
- if (block->is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- if (page_zip && !dict_index_is_clust(index)
- && page_is_leaf(buf_block_get_frame(block))) {
- /* Update the free bits in the insert buffer. */
- ibuf_update_free_bits_zip(block, mtr);
- }
-
- btr_cur_update_in_place_log(flags, rec, index, update,
- trx, roll_ptr, mtr);
-
- if (was_delete_marked
- && !rec_get_deleted_flag(rec, page_is_comp(
- buf_block_get_frame(block)))) {
- /* The new updated record owns its possible externally
- stored fields */
-
- btr_cur_unmark_extern_fields(page_zip,
- rec, index, offsets, mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(DB_SUCCESS);
-}
-
-/*************************************************************//**
-Tries to update a record on a page in an index tree. It is assumed that mtr
-holds an x-latch on the page. The operation does not succeed if there is too
-little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended. We assume here that the ordering
-fields of the record do not change.
-@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
-DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
-there is not enough space left on the compressed page */
-UNIV_INTERN
-ulint
-btr_cur_optimistic_update(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- const upd_t* update, /*!< in: update vector; this must also
- contain trx id and roll ptr fields */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
-{
- dict_index_t* index;
- page_cur_t* page_cursor;
- ulint err;
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
- rec_t* rec;
- rec_t* orig_rec;
- ulint max_size;
- ulint new_rec_size;
- ulint old_rec_size;
- dtuple_t* new_entry;
- roll_ptr_t roll_ptr;
- trx_t* trx;
- mem_heap_t* heap;
- ulint i;
- ulint n_ext;
- ulint* offsets;
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- orig_rec = rec = btr_cur_get_rec(cursor);
- index = cursor->index;
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
-
- heap = mem_heap_create(1024);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "update ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
-
- if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
-
- /* The simplest and the most common case: the update does not
- change the size of any field and none of the updated fields is
- externally stored in rec or update, and there is enough space
- on the compressed page to log the update. */
-
- mem_heap_free(heap);
- return(btr_cur_update_in_place(flags, cursor, update,
- cmpl_info, thr, mtr));
- }
-
- if (rec_offs_any_extern(offsets)) {
-any_extern:
- /* Externally stored fields are treated in pessimistic
- update */
-
- mem_heap_free(heap);
- return(DB_OVERFLOW);
- }
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
- if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
-
- goto any_extern;
- }
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
- &n_ext, heap);
- /* We checked above that there are no externally stored fields. */
- ut_a(!n_ext);
-
- /* The page containing the clustered index record
- corresponding to new_entry is latched in mtr.
- Thus the following call is safe. */
- row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, heap);
- old_rec_size = rec_offs_size(offsets);
- new_rec_size = rec_get_converted_size(index, new_entry, 0);
-
- page_zip = buf_block_get_page_zip(block);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (UNIV_LIKELY_NULL(page_zip)
- && !btr_cur_update_alloc_zip(page_zip, block, index,
- new_rec_size, TRUE, mtr)) {
- err = DB_ZIP_OVERFLOW;
- goto err_exit;
- }
-
- if (UNIV_UNLIKELY(new_rec_size
- >= (page_get_free_space_of_empty(page_is_comp(page))
- / 2))) {
-
- err = DB_OVERFLOW;
- goto err_exit;
- }
-
- if (UNIV_UNLIKELY(page_get_data_size(page)
- - old_rec_size + new_rec_size
- < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
-
- /* The page would become too empty */
-
- err = DB_UNDERFLOW;
- goto err_exit;
- }
-
- max_size = old_rec_size
- + page_get_max_insert_size_after_reorganize(page, 1);
-
- if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
- && (max_size >= new_rec_size))
- || (page_get_n_recs(page) <= 1))) {
-
- /* There was not enough space, or it did not pay to
- reorganize: for simplicity, we decide what to do assuming a
- reorganization is needed, though it might not be necessary */
-
- err = DB_OVERFLOW;
- goto err_exit;
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
- thr, mtr, &roll_ptr);
- if (err != DB_SUCCESS) {
-err_exit:
- mem_heap_free(heap);
- return(err);
- }
-
- /* Ok, we may do the replacement. Store on the page infimum the
- explicit locks on rec, before deleting rec (see the comment in
- btr_cur_pessimistic_update). */
-
- lock_rec_store_on_page_infimum(block, rec);
-
- btr_search_update_hash_on_delete(cursor);
-
- /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
- invokes rec_offs_make_valid() to point to the copied record that
- the fields of new_entry point to. We have to undo it here. */
- ut_ad(rec_offs_validate(NULL, index, offsets));
- rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
-
- page_cur_delete_rec(page_cursor, index, offsets, mtr);
-
- page_cur_move_to_prev(page_cursor);
-
- trx = thr_get_trx(thr);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx->id);
- }
-
- /* There are no externally stored columns in new_entry */
- rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
- ut_a(rec); /* <- We calculated above the insert would fit */
-
- if (page_zip && !dict_index_is_clust(index)
- && page_is_leaf(page)) {
- /* Update the free bits in the insert buffer. */
- ibuf_update_free_bits_zip(block, mtr);
- }
-
- /* Restore the old explicit lock state on the record */
-
- lock_rec_restore_from_page_infimum(block, rec, block);
-
- page_cur_move_to_next(page_cursor);
-
- mem_heap_free(heap);
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************//**
-If, in a split, a new supremum record was created as the predecessor of the
-updated record, the supremum record must inherit exactly the locks on the
-updated record. In the split it may have inherited locks from the successor
-of the updated record, which is not correct. This function restores the
-right locks for the new supremum. */
-static
-void
-btr_cur_pess_upd_restore_supremum(
-/*==============================*/
- buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: updated record */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page;
- buf_block_t* prev_block;
- ulint space;
- ulint zip_size;
- ulint prev_page_no;
-
- page = buf_block_get_frame(block);
-
- if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
- /* Updated record is not the first user record on its page */
-
- return;
- }
-
- space = buf_block_get_space(block);
- zip_size = buf_block_get_zip_size(block);
- prev_page_no = btr_page_get_prev(page, mtr);
-
- ut_ad(prev_page_no != FIL_NULL);
- prev_block = buf_page_get_with_no_latch(space, zip_size,
- prev_page_no, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- /* We must already have an x-latch on prev_block! */
- ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX));
-
- lock_rec_reset_and_inherit_gap_locks(prev_block, block,
- PAGE_HEAP_NO_SUPREMUM,
- page_rec_get_heap_no(rec));
-}
-
-/*************************************************************//**
-Performs an update of a record on a page of a tree. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. If the
-update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist. We assume
-here that the ordering fields of the record do not change.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-btr_cur_pessimistic_update(
-/*=======================*/
- ulint flags, /*!< in: undo logging, locking, and rollback
- flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller, or NULL */
- const upd_t* update, /*!< in: update vector; this is allowed also
- contain trx id and roll ptr fields, but
- the values in update vector have no effect */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
-{
- big_rec_t* big_rec_vec = NULL;
- big_rec_t* dummy_big_rec;
- dict_index_t* index;
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
- rec_t* rec;
- page_cur_t* page_cursor;
- dtuple_t* new_entry;
- ulint err;
- ulint optim_err;
- roll_ptr_t roll_ptr;
- trx_t* trx;
- ibool was_first;
- ulint n_extents = 0;
- ulint n_reserved;
- ulint n_ext;
- ulint* offsets = NULL;
-
- *big_rec = NULL;
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- page_zip = buf_block_get_page_zip(block);
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
-
- optim_err = btr_cur_optimistic_update(flags, cursor, update,
- cmpl_info, thr, mtr);
-
- switch (optim_err) {
- case DB_UNDERFLOW:
- case DB_OVERFLOW:
- case DB_ZIP_OVERFLOW:
- break;
- default:
- return(optim_err);
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
- thr, mtr, &roll_ptr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (optim_err == DB_OVERFLOW) {
- ulint reserve_flag;
-
- /* First reserve enough free space for the file segments
- of the index tree, so that the update will not fail because
- of lack of space */
-
- n_extents = cursor->tree_height / 16 + 3;
-
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
- reserve_flag = FSP_CLEANING;
- } else {
- reserve_flag = FSP_NORMAL;
- }
-
- if (!fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents, reserve_flag, mtr)) {
- return(DB_OUT_OF_FILE_SPACE);
- }
- }
-
- if (!*heap) {
- *heap = mem_heap_create(1024);
- }
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
-
- trx = thr_get_trx(thr);
-
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
- &n_ext, *heap);
- /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
- invokes rec_offs_make_valid() to point to the copied record that
- the fields of new_entry point to. We have to undo it here. */
- ut_ad(rec_offs_validate(NULL, index, offsets));
- rec_offs_make_valid(rec, index, offsets);
-
- /* The page containing the clustered index record
- corresponding to new_entry is latched in mtr. If the
- clustered index record is delete-marked, then its externally
- stored fields cannot have been purged yet, because then the
- purge would also have removed the clustered index record
- itself. Thus the following call is safe. */
- row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, *heap);
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx->id);
- }
-
- if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
- /* We are in a transaction rollback undoing a row
- update: we must free possible externally stored fields
- which got new values in the update, if they are not
- inherited values. They can be inherited if we have
- updated the primary key to another value, and then
- update it back again. */
-
- ut_ad(big_rec_vec == NULL);
-
- btr_rec_free_updated_extern_fields(
- index, rec, page_zip, offsets, update,
- trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
- }
-
- /* We have to set appropriate extern storage bits in the new
- record to be inserted: we have to remember which fields were such */
-
- ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
- n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- ut_ad(page_is_comp(page));
- if (page_zip_rec_needs_ext(
- rec_get_converted_size(index, new_entry, n_ext),
- TRUE,
- dict_index_get_n_fields(index),
- page_zip_get_size(page_zip))) {
-
- goto make_external;
- }
- } else if (page_zip_rec_needs_ext(
- rec_get_converted_size(index, new_entry, n_ext),
- page_is_comp(page), 0, 0)) {
-make_external:
- big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
- if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
-
- err = DB_TOO_BIG_RECORD;
- goto return_after_reservations;
- }
- }
-
- /* Store state of explicit locks on rec on the page infimum record,
- before deleting rec. The page infimum acts as a dummy carrier of the
- locks, taking care also of lock releases, before we can move the locks
- back on the actual record. There is a special case: if we are
- inserting on the root page and the insert causes a call of
- btr_root_raise_and_insert. Therefore we cannot in the lock system
- delete the lock structs set on the root page even if the root
- page carries just node pointers. */
-
- lock_rec_store_on_page_infimum(block, rec);
-
- btr_search_update_hash_on_delete(cursor);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- page_cursor = btr_cur_get_page_cur(cursor);
-
- page_cur_delete_rec(page_cursor, index, offsets, mtr);
-
- page_cur_move_to_prev(page_cursor);
-
- rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
-
- if (rec) {
- lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
- rec, block);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, heap);
-
- if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
- /* The new inserted record owns its possible externally
- stored fields */
- btr_cur_unmark_extern_fields(page_zip,
- rec, index, offsets, mtr);
- }
-
- btr_cur_compress_if_useful(cursor, mtr);
-
- if (page_zip && !dict_index_is_clust(index)
- && page_is_leaf(page)) {
- /* Update the free bits in the insert buffer. */
- ibuf_update_free_bits_zip(block, mtr);
- }
-
- err = DB_SUCCESS;
- goto return_after_reservations;
- } else {
- ut_a(optim_err != DB_UNDERFLOW);
-
- /* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index)
- && page_is_leaf(page)) {
- ibuf_reset_free_bits(block);
- }
- }
-
- /* Was the record to be updated positioned as the first user
- record on its page? */
- was_first = page_cur_is_before_first(page_cursor);
-
- /* The first parameter means that no lock checking and undo logging
- is made in the insert */
-
- err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
- | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG,
- cursor, new_entry, &rec,
- &dummy_big_rec, n_ext, NULL, mtr);
- ut_a(rec);
- ut_a(err == DB_SUCCESS);
- ut_a(dummy_big_rec == NULL);
-
- if (dict_index_is_sec_or_ibuf(index)) {
- /* Update PAGE_MAX_TRX_ID in the index page header.
- It was not updated by btr_cur_pessimistic_insert()
- because of BTR_NO_LOCKING_FLAG. */
- buf_block_t* rec_block;
-
- rec_block = btr_cur_get_block(cursor);
-
- page_update_max_trx_id(rec_block,
- buf_block_get_page_zip(rec_block),
- trx->id, mtr);
- }
-
- if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
- /* The new inserted record owns its possible externally
- stored fields */
- buf_block_t* rec_block = btr_cur_get_block(cursor);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
- page = buf_block_get_frame(rec_block);
-#endif /* UNIV_ZIP_DEBUG */
- page_zip = buf_block_get_page_zip(rec_block);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, heap);
- btr_cur_unmark_extern_fields(page_zip,
- rec, index, offsets, mtr);
- }
-
- lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
- rec, block);
-
- /* If necessary, restore also the correct lock state for a new,
- preceding supremum record created in a page split. While the old
- record was nonexistent, the supremum might have inherited its locks
- from a wrong record. */
-
- if (!was_first) {
- btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
- rec, mtr);
- }
-
-return_after_reservations:
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (n_extents > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- *big_rec = big_rec_vec;
-
- return(err);
-}
-
-/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
-
-/****************************************************************//**
-Writes the redo log record for delete marking or unmarking of an index
-record. */
-UNIV_INLINE
-void
-btr_cur_del_mark_set_clust_rec_log(
-/*===============================*/
- ulint flags, /*!< in: flags */
- rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index of the record */
- ibool val, /*!< in: value to set */
- trx_t* trx, /*!< in: deleting transaction */
- roll_ptr_t roll_ptr,/*!< in: roll ptr to the undo log record */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- ut_ad(flags < 256);
- ut_ad(val <= 1);
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index,
- page_rec_is_comp(rec)
- ? MLOG_COMP_REC_CLUST_DELETE_MARK
- : MLOG_REC_CLUST_DELETE_MARK,
- 1 + 1 + DATA_ROLL_PTR_LEN
- + 14 + 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery */
- return;
- }
-
- mach_write_to_1(log_ptr, flags);
- log_ptr++;
- mach_write_to_1(log_ptr, val);
- log_ptr++;
-
- log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
- mtr);
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/****************************************************************//**
-Parses the redo log record for delete marking or unmarking of a clustered
-index record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_del_mark_set_clust_rec(
-/*=================================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index) /*!< in: index corresponding to page */
-{
- ulint flags;
- ulint val;
- ulint pos;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- ulint offset;
- rec_t* rec;
-
- ut_ad(!page
- || !!page_is_comp(page) == dict_table_is_comp(index->table));
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- flags = mach_read_from_1(ptr);
- ptr++;
- val = mach_read_from_1(ptr);
- ptr++;
-
- ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (page) {
- rec = page + offset;
-
- /* We do not need to reserve btr_search_latch, as the page
- is only being recovered, and there cannot be a hash index to
- it. */
-
- btr_rec_set_deleted_flag(rec, page_zip, val);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- row_upd_rec_sys_fields_in_recovery(
- rec, page_zip,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- pos, trx_id, roll_ptr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
- }
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Marks a clustered index record deleted. Writes an undo log record to
-undo log on this delete marking. Writes in the trx id field the id
-of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
-ulint
-btr_cur_del_mark_set_clust_rec(
-/*===========================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor */
- ibool val, /*!< in: value to set */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- buf_block_t* block;
- roll_ptr_t roll_ptr;
- ulint err;
- rec_t* rec;
- page_zip_des_t* page_zip;
- trx_t* trx;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
-
- err = lock_clust_rec_modify_check_and_lock(flags,
- btr_cur_get_block(cursor),
- rec, index, offsets, thr);
-
- if (err != DB_SUCCESS) {
-
- goto func_exit;
- }
-
- err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
- index, NULL, NULL, 0, rec,
- &roll_ptr);
- if (err != DB_SUCCESS) {
-
- goto func_exit;
- }
-
- block = btr_cur_get_block(cursor);
-
- if (block->is_hashed) {
- rw_lock_x_lock(&btr_search_latch);
- }
-
- page_zip = buf_block_get_page_zip(block);
-
- btr_rec_set_deleted_flag(rec, page_zip, val);
-
- trx = thr_get_trx(thr);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, page_zip,
- index, offsets, trx, roll_ptr);
- }
-
- if (block->is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
- roll_ptr, mtr);
-
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/****************************************************************//**
-Writes the redo log record for a delete mark setting of a secondary
-index record. */
-UNIV_INLINE
-void
-btr_cur_del_mark_set_sec_rec_log(
-/*=============================*/
- rec_t* rec, /*!< in: record */
- ibool val, /*!< in: value to set */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- ut_ad(val <= 1);
-
- log_ptr = mlog_open(mtr, 11 + 1 + 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(
- rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr);
- mach_write_to_1(log_ptr, val);
- log_ptr++;
-
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/****************************************************************//**
-Parses the redo log record for delete marking or unmarking of a secondary
-index record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_del_mark_set_sec_rec(
-/*===============================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip)/*!< in/out: compressed page, or NULL */
-{
- ulint val;
- ulint offset;
- rec_t* rec;
-
- if (end_ptr < ptr + 3) {
-
- return(NULL);
- }
-
- val = mach_read_from_1(ptr);
- ptr++;
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (page) {
- rec = page + offset;
-
- /* We do not need to reserve btr_search_latch, as the page
- is only being recovered, and there cannot be a hash index to
- it. */
-
- btr_rec_set_deleted_flag(rec, page_zip, val);
- }
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Sets a secondary index record delete mark to TRUE or FALSE.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
-ulint
-btr_cur_del_mark_set_sec_rec(
-/*=========================*/
- ulint flags, /*!< in: locking flag */
- btr_cur_t* cursor, /*!< in: cursor */
- ibool val, /*!< in: value to set */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- rec_t* rec;
- ulint err;
-
- block = btr_cur_get_block(cursor);
- rec = btr_cur_get_rec(cursor);
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), cursor->index,
- "del mark ");
- rec_print(stderr, rec, cursor->index);
- }
-#endif /* UNIV_DEBUG */
-
- err = lock_sec_rec_modify_check_and_lock(flags,
- btr_cur_get_block(cursor),
- rec, cursor->index, thr, mtr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- ut_ad(!!page_rec_is_comp(rec)
- == dict_table_is_comp(cursor->index->table));
-
- if (block->is_hashed) {
- rw_lock_x_lock(&btr_search_latch);
- }
-
- btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
-
- if (block->is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************//**
-Clear a secondary index record's delete mark. This function is only
-used by the insert buffer insert merge mechanism. */
-UNIV_INTERN
-void
-btr_cur_del_unmark_for_ibuf(
-/*========================*/
- rec_t* rec, /*!< in/out: record to delete unmark */
- page_zip_des_t* page_zip, /*!< in/out: compressed page
- corresponding to rec, or NULL
- when the tablespace is
- uncompressed */
- mtr_t* mtr) /*!< in: mtr */
-{
- /* We do not need to reserve btr_search_latch, as the page has just
- been read to the buffer pool and there cannot be a hash index to it. */
-
- btr_rec_set_deleted_flag(rec, page_zip, FALSE);
-
- btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
-}
-
-/*==================== B-TREE RECORD REMOVE =========================*/
-
-/*************************************************************//**
-Tries to compress a page of the tree if it seems useful. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done!
-@return TRUE if compression occurred */
-UNIV_INTERN
-ibool
-btr_cur_compress_if_useful(
-/*=======================*/
- btr_cur_t* cursor, /*!< in: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
-
- return(btr_cur_compress_recommendation(cursor, mtr)
- && btr_compress(cursor, mtr));
-}
-
-/*******************************************************//**
-Removes the record on which the tree cursor is positioned on a leaf page.
-It is assumed that the mtr has an x-latch on the page where the cursor is
-positioned, but no latch on the whole tree.
-@return TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
-ibool
-btr_cur_optimistic_delete(
-/*======================*/
- btr_cur_t* cursor, /*!< in: cursor on leaf page, on the record to
- delete; cursor stays valid: if deletion
- succeeds, on function exit it points to the
- successor of the deleted record */
- mtr_t* mtr) /*!< in: mtr; if this function returns
- TRUE on a leaf page of a secondary
- index, the mtr must be committed
- before latching any further pages */
-{
- buf_block_t* block;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ibool no_compress_needed;
- rec_offs_init(offsets_);
-
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
- /* This is intended only for leaf page deletions */
-
- block = btr_cur_get_block(cursor);
-
- ut_ad(page_is_leaf(buf_block_get_frame(block)));
-
- rec = btr_cur_get_rec(cursor);
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- no_compress_needed = !rec_offs_any_extern(offsets)
- && btr_cur_can_delete_without_compress(
- cursor, rec_offs_size(offsets), mtr);
-
- if (no_compress_needed) {
-
- page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
- ulint max_ins = 0;
-
- lock_update_delete(block, rec);
-
- btr_search_update_hash_on_delete(cursor);
-
- if (!page_zip) {
- max_ins = page_get_max_insert_size_after_reorganize(
- page, 1);
- }
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- page_cur_delete_rec(btr_cur_get_page_cur(cursor),
- cursor->index, offsets, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (dict_index_is_clust(cursor->index)
- || dict_index_is_ibuf(cursor->index)
- || !page_is_leaf(page)) {
- /* The insert buffer does not handle
- inserts to clustered indexes, to
- non-leaf pages of secondary index B-trees,
- or to the insert buffer. */
- } else if (page_zip) {
- ibuf_update_free_bits_zip(block, mtr);
- } else {
- ibuf_update_free_bits_low(block, max_ins, mtr);
- }
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(no_compress_needed);
-}
-
-/*************************************************************//**
-Removes the record on which the tree cursor is positioned. Tries
-to compress the page if its fillfactor drops below a threshold
-or if it is the only page on the level. It is assumed that mtr holds
-an x-latch on the tree and on the cursor page. To avoid deadlocks,
-mtr must also own x-latches to brothers of page, if those brothers
-exist.
-@return TRUE if compression occurred */
-UNIV_INTERN
-ibool
-btr_cur_pessimistic_delete(
-/*=======================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
- the latter may occur because we may have
- to update node pointers on upper levels,
- and in the case of variable length keys
- these may actually grow in size */
- ibool has_reserved_extents, /*!< in: TRUE if the
- caller has already reserved enough free
- extents so that he knows that the operation
- will succeed */
- btr_cur_t* cursor, /*!< in: cursor on the record to delete;
- if compression does not occur, the cursor
- stays valid: it points to successor of
- deleted record on function exit */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
- dict_index_t* index;
- rec_t* rec;
- dtuple_t* node_ptr;
- ulint n_extents = 0;
- ulint n_reserved;
- ibool success;
- ibool ret = FALSE;
- ulint level;
- mem_heap_t* heap;
- ulint* offsets;
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- index = btr_cur_get_index(cursor);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- if (!has_reserved_extents) {
- /* First reserve enough free space for the file segments
- of the index tree, so that the node pointer updates will
- not fail because of lack of space */
-
- n_extents = cursor->tree_height / 32 + 1;
-
- success = fsp_reserve_free_extents(&n_reserved,
- index->space,
- n_extents,
- FSP_CLEANING, mtr);
- if (!success) {
- *err = DB_OUT_OF_FILE_SPACE;
-
- return(FALSE);
- }
- }
-
- heap = mem_heap_create(1024);
- rec = btr_cur_get_rec(cursor);
- page_zip = buf_block_get_page_zip(block);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- if (rec_offs_any_extern(offsets)) {
- btr_rec_free_externally_stored_fields(index,
- rec, offsets, page_zip,
- rb_ctx, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- }
-
- if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
- && UNIV_UNLIKELY(dict_index_get_page(index)
- != buf_block_get_page_no(block))) {
-
- /* If there is only one record, drop the whole page in
- btr_discard_page, if this is not the root page */
-
- btr_discard_page(cursor, mtr);
-
- *err = DB_SUCCESS;
- ret = TRUE;
-
- goto return_after_reservations;
- }
-
- lock_update_delete(block, rec);
- level = btr_page_get_level(page, mtr);
-
- if (level > 0
- && UNIV_UNLIKELY(rec == page_rec_get_next(
- page_get_infimum_rec(page)))) {
-
- rec_t* next_rec = page_rec_get_next(rec);
-
- if (btr_page_get_prev(page, mtr) == FIL_NULL) {
-
- /* If we delete the leftmost node pointer on a
- non-leaf level, we must mark the new leftmost node
- pointer as the predefined minimum record */
-
- /* This will make page_zip_validate() fail until
- page_cur_delete_rec() completes. This is harmless,
- because everything will take place within a single
- mini-transaction and because writing to the redo log
- is an atomic operation (performed by mtr_commit()). */
- btr_set_min_rec_mark(next_rec, mtr);
- } else {
- /* Otherwise, if we delete the leftmost node pointer
- on a page, we have to change the father node pointer
- so that it is equal to the new leftmost node pointer
- on the page */
-
- btr_node_ptr_delete(index, block, mtr);
-
- node_ptr = dict_index_build_node_ptr(
- index, next_rec, buf_block_get_page_no(block),
- heap, level);
-
- btr_insert_on_non_leaf_level(index,
- level + 1, node_ptr, mtr);
- }
- }
-
- btr_search_update_hash_on_delete(cursor);
-
- page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- ut_ad(btr_check_node_ptr(index, block, mtr));
-
- *err = DB_SUCCESS;
-
-return_after_reservations:
- mem_heap_free(heap);
-
- if (ret == FALSE) {
- ret = btr_cur_compress_if_useful(cursor, mtr);
- }
-
- if (n_extents > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- return(ret);
-}
-
-/*******************************************************************//**
-Adds path information to the cursor for the current page, for which
-the binary search has been performed. */
-static
-void
-btr_cur_add_path_info(
-/*==================*/
- btr_cur_t* cursor, /*!< in: cursor positioned on a page */
- ulint height, /*!< in: height of the page in tree;
- 0 means leaf node */
- ulint root_height) /*!< in: root node height in tree */
-{
- btr_path_t* slot;
- rec_t* rec;
-
- ut_a(cursor->path_arr);
-
- if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) {
- /* Do nothing; return empty path */
-
- slot = cursor->path_arr;
- slot->nth_rec = ULINT_UNDEFINED;
-
- return;
- }
-
- if (height == 0) {
- /* Mark end of slots for path */
- slot = cursor->path_arr + root_height + 1;
- slot->nth_rec = ULINT_UNDEFINED;
- }
-
- rec = btr_cur_get_rec(cursor);
-
- slot = cursor->path_arr + (root_height - height);
-
- slot->nth_rec = page_rec_get_n_recs_before(rec);
- slot->n_recs = page_get_n_recs(page_align(rec));
-}
-
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
-@return estimated number of rows */
-UNIV_INTERN
-ib_int64_t
-btr_estimate_n_rows_in_range(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */
- ulint mode1, /*!< in: search mode for range start */
- const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */
- ulint mode2) /*!< in: search mode for range end */
-{
- btr_path_t path1[BTR_PATH_ARRAY_N_SLOTS];
- btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS];
- btr_cur_t cursor;
- btr_path_t* slot1;
- btr_path_t* slot2;
- ibool diverged;
- ibool diverged_lot;
- ulint divergence_level;
- ib_int64_t n_rows;
- ulint i;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- cursor.path_arr = path1;
-
- if (dtuple_get_n_fields(tuple1) > 0) {
-
- btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0, &mtr);
- } else {
- btr_cur_open_at_index_side(TRUE, index,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, &mtr);
- }
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- cursor.path_arr = path2;
-
- if (dtuple_get_n_fields(tuple2) > 0) {
-
- btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0, &mtr);
- } else {
- btr_cur_open_at_index_side(FALSE, index,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, &mtr);
- }
-
- mtr_commit(&mtr);
-
- /* We have the path information for the range in path1 and path2 */
-
- n_rows = 1;
- diverged = FALSE; /* This becomes true when the path is not
- the same any more */
- diverged_lot = FALSE; /* This becomes true when the paths are
- not the same or adjacent any more */
- divergence_level = 1000000; /* This is the level where paths diverged
- a lot */
- for (i = 0; ; i++) {
- ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
-
- slot1 = path1 + i;
- slot2 = path2 + i;
-
- if (slot1->nth_rec == ULINT_UNDEFINED
- || slot2->nth_rec == ULINT_UNDEFINED) {
-
- if (i > divergence_level + 1) {
- /* In trees whose height is > 1 our algorithm
- tends to underestimate: multiply the estimate
- by 2: */
-
- n_rows = n_rows * 2;
- }
-
- /* Do not estimate the number of rows in the range
- to over 1 / 2 of the estimated rows in the whole
- table */
-
- if (n_rows > index->table->stat_n_rows / 2) {
- n_rows = index->table->stat_n_rows / 2;
-
- /* If there are just 0 or 1 rows in the table,
- then we estimate all rows are in the range */
-
- if (n_rows == 0) {
- n_rows = index->table->stat_n_rows;
- }
- }
-
- return(n_rows);
- }
-
- if (!diverged && slot1->nth_rec != slot2->nth_rec) {
-
- diverged = TRUE;
-
- if (slot1->nth_rec < slot2->nth_rec) {
- n_rows = slot2->nth_rec - slot1->nth_rec;
-
- if (n_rows > 1) {
- diverged_lot = TRUE;
- divergence_level = i;
- }
- } else {
- /* Maybe the tree has changed between
- searches */
-
- return(10);
- }
-
- } else if (diverged && !diverged_lot) {
-
- if (slot1->nth_rec < slot1->n_recs
- || slot2->nth_rec > 1) {
-
- diverged_lot = TRUE;
- divergence_level = i;
-
- n_rows = 0;
-
- if (slot1->nth_rec < slot1->n_recs) {
- n_rows += slot1->n_recs
- - slot1->nth_rec;
- }
-
- if (slot2->nth_rec > 1) {
- n_rows += slot2->nth_rec - 1;
- }
- }
- } else if (diverged_lot) {
-
- n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
- / 2;
- }
- }
-}
-
-/*******************************************************************//**
-Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals. */
-UNIV_INTERN
-void
-btr_estimate_number_of_different_key_vals(
-/*======================================*/
- dict_index_t* index) /*!< in: index */
-{
- btr_cur_t cursor;
- page_t* page;
- rec_t* rec;
- ulint n_cols;
- ulint matched_fields;
- ulint matched_bytes;
- ib_int64_t* n_diff;
- ullint n_sample_pages; /* number of pages to sample */
- ulint not_empty_flag = 0;
- ulint total_external_size = 0;
- ulint i;
- ulint j;
- ullint add_on;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_rec_[REC_OFFS_NORMAL_SIZE];
- ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets_rec = offsets_rec_;
- ulint* offsets_next_rec= offsets_next_rec_;
- rec_offs_init(offsets_rec_);
- rec_offs_init(offsets_next_rec_);
-
- n_cols = dict_index_get_n_unique(index);
-
- n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
-
- /* It makes no sense to test more pages than are contained
- in the index, thus we lower the number if it is too high */
- if (srv_stats_sample_pages > index->stat_index_size) {
- if (index->stat_index_size > 0) {
- n_sample_pages = index->stat_index_size;
- } else {
- n_sample_pages = 1;
- }
- } else {
- n_sample_pages = srv_stats_sample_pages;
- }
-
- /* We sample some pages in the index to get an estimate */
-
- for (i = 0; i < n_sample_pages; i++) {
- rec_t* supremum;
- mtr_start(&mtr);
-
- btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
-
- /* Count the number of different key values for each prefix of
- the key on this index page. If the prefix does not determine
- the index record uniquely in the B-tree, then we subtract one
- because otherwise our algorithm would give a wrong estimate
- for an index where there is just one key value. */
-
- page = btr_cur_get_page(&cursor);
-
- supremum = page_get_supremum_rec(page);
- rec = page_rec_get_next(page_get_infimum_rec(page));
-
- if (rec != supremum) {
- not_empty_flag = 1;
- offsets_rec = rec_get_offsets(rec, index, offsets_rec,
- ULINT_UNDEFINED, &heap);
- }
-
- while (rec != supremum) {
- rec_t* next_rec = page_rec_get_next(rec);
- if (next_rec == supremum) {
- break;
- }
-
- matched_fields = 0;
- matched_bytes = 0;
- offsets_next_rec = rec_get_offsets(next_rec, index,
- offsets_next_rec,
- n_cols, &heap);
-
- cmp_rec_rec_with_match(rec, next_rec,
- offsets_rec, offsets_next_rec,
- index, &matched_fields,
- &matched_bytes);
-
- for (j = matched_fields + 1; j <= n_cols; j++) {
- /* We add one if this index record has
- a different prefix from the previous */
-
- n_diff[j]++;
- }
-
- total_external_size
- += btr_rec_get_externally_stored_len(
- rec, offsets_rec);
-
- rec = next_rec;
- /* Initialize offsets_rec for the next round
- and assign the old offsets_rec buffer to
- offsets_next_rec. */
- {
- ulint* offsets_tmp = offsets_rec;
- offsets_rec = offsets_next_rec;
- offsets_next_rec = offsets_tmp;
- }
- }
-
-
- if (n_cols == dict_index_get_n_unique_in_tree(index)) {
-
- /* If there is more than one leaf page in the tree,
- we add one because we know that the first record
- on the page certainly had a different prefix than the
- last record on the previous index page in the
- alphabetical order. Before this fix, if there was
- just one big record on each clustered index page, the
- algorithm grossly underestimated the number of rows
- in the table. */
-
- if (btr_page_get_prev(page, &mtr) != FIL_NULL
- || btr_page_get_next(page, &mtr) != FIL_NULL) {
-
- n_diff[n_cols]++;
- }
- }
-
- offsets_rec = rec_get_offsets(rec, index, offsets_rec,
- ULINT_UNDEFINED, &heap);
- total_external_size += btr_rec_get_externally_stored_len(
- rec, offsets_rec);
- mtr_commit(&mtr);
- }
-
- /* If we saw k borders between different key values on
- n_sample_pages leaf pages, we can estimate how many
- there will be in index->stat_n_leaf_pages */
-
- /* We must take into account that our sample actually represents
- also the pages used for external storage of fields (those pages are
- included in index->stat_n_leaf_pages) */
-
- for (j = 0; j <= n_cols; j++) {
- index->stat_n_diff_key_vals[j]
- = ((n_diff[j]
- * (ib_int64_t)index->stat_n_leaf_pages
- + n_sample_pages - 1
- + total_external_size
- + not_empty_flag)
- / (n_sample_pages
- + total_external_size));
-
- /* If the tree is small, smaller than
- 10 * n_sample_pages + total_external_size, then
- the above estimate is ok. For bigger trees it is common that we
- do not see any borders between key values in the few pages
- we pick. But still there may be n_sample_pages
- different key values, or even more. Let us try to approximate
- that: */
-
- add_on = index->stat_n_leaf_pages
- / (10 * (n_sample_pages
- + total_external_size));
-
- if (add_on > n_sample_pages) {
- add_on = n_sample_pages;
- }
-
- index->stat_n_diff_key_vals[j] += add_on;
- }
-
- mem_free(n_diff);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
-
-/***********************************************************//**
-Gets the externally stored size of a record, in units of a database page.
-@return externally stored part, in units of a database page */
-static
-ulint
-btr_rec_get_externally_stored_len(
-/*==============================*/
- rec_t* rec, /*!< in: record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- byte* data;
- ulint local_len;
- ulint extern_len;
- ulint total_extern_len = 0;
- ulint i;
-
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
- n_fields = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n_fields; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- data = rec_get_nth_field(rec, offsets, i, &local_len);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- extern_len = mach_read_from_4(data + local_len
- + BTR_EXTERN_LEN + 4);
-
- total_extern_len += ut_calc_align(extern_len,
- UNIV_PAGE_SIZE);
- }
- }
-
- return(total_extern_len / UNIV_PAGE_SIZE);
-}
-
-/*******************************************************************//**
-Sets the ownership bit of an externally stored field in a record. */
-static
-void
-btr_cur_set_ownership_of_extern_field(
-/*==================================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: clustered index record */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint i, /*!< in: field number */
- ibool val, /*!< in: value to set */
- mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
-{
- byte* data;
- ulint local_len;
- ulint byte_val;
-
- data = rec_get_nth_field(rec, offsets, i, &local_len);
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN);
-
- if (val) {
- byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
- } else {
- byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
- }
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
- page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
- } else if (UNIV_LIKELY(mtr != NULL)) {
-
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
- MLOG_1BYTE, mtr);
- } else {
- mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
- }
-}
-
-/*******************************************************************//**
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
-index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
-UNIV_INTERN
-void
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: record in a clustered index */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- const upd_t* update, /*!< in: update vector */
- mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
-{
- ulint n;
- ulint j;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
-
- if (!rec_offs_any_extern(offsets)) {
-
- return;
- }
-
- n = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- /* Check it is not in updated fields */
-
- if (update) {
- for (j = 0; j < upd_get_n_fields(update);
- j++) {
- if (upd_get_nth_field(update, j)
- ->field_no == i) {
-
- goto updated;
- }
- }
- }
-
- btr_cur_set_ownership_of_extern_field(
- page_zip, rec, index, offsets, i, FALSE, mtr);
-updated:
- ;
- }
- }
-}
-
-/*******************************************************************//**
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-UNIV_INTERN
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
- dtuple_t* entry, /*!< in/out: updated entry to be
- inserted to clustered index */
- const upd_t* update) /*!< in: update vector */
-{
- ulint i;
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
- dfield_t* dfield = dtuple_get_nth_field(entry, i);
- byte* data;
- ulint len;
- ulint j;
-
- if (!dfield_is_ext(dfield)) {
- continue;
- }
-
- /* Check if it is in updated fields */
-
- for (j = 0; j < upd_get_n_fields(update); j++) {
- if (upd_get_nth_field(update, j)->field_no == i) {
-
- goto is_updated;
- }
- }
-
- data = dfield_get_data(dfield);
- len = dfield_get_len(dfield);
- data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
- |= BTR_EXTERN_INHERITED_FLAG;
-
-is_updated:
- ;
- }
-}
-
-/*******************************************************************//**
-Marks all extern fields in a record as owned by the record. This function
-should be called if the delete mark of a record is removed: a not delete
-marked record always owns all its extern fields. */
-static
-void
-btr_cur_unmark_extern_fields(
-/*=========================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: record in a clustered index */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
-{
- ulint n;
- ulint i;
-
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
- n = rec_offs_n_fields(offsets);
-
- if (!rec_offs_any_extern(offsets)) {
-
- return;
- }
-
- for (i = 0; i < n; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- btr_cur_set_ownership_of_extern_field(
- page_zip, rec, index, offsets, i, TRUE, mtr);
- }
- }
-}
-
-/*******************************************************************//**
-Marks all extern fields in a dtuple as owned by the record. */
-UNIV_INTERN
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
- dtuple_t* entry) /*!< in/out: clustered index entry */
-{
- ulint i;
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
- dfield_t* dfield = dtuple_get_nth_field(entry, i);
-
- if (dfield_is_ext(dfield)) {
- byte* data = dfield_get_data(dfield);
- ulint len = dfield_get_len(dfield);
-
- data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
- &= ~BTR_EXTERN_OWNER_FLAG;
- }
- }
-}
-
-/*******************************************************************//**
-Flags the data tuple fields that are marked as extern storage in the
-update vector. We use this function to remember which fields we must
-mark as extern storage in a record inserted for an update.
-@return number of flagged external columns */
-UNIV_INTERN
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const upd_t* update, /*!< in: update vector */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint n_pushed = 0;
- ulint n;
- const upd_field_t* uf;
-
- ut_ad(tuple);
- ut_ad(update);
-
- uf = update->fields;
- n = upd_get_n_fields(update);
-
- for (; n--; uf++) {
- if (dfield_is_ext(&uf->new_val)) {
- dfield_t* field
- = dtuple_get_nth_field(tuple, uf->field_no);
-
- if (!dfield_is_ext(field)) {
- dfield_set_ext(field);
- n_pushed++;
- }
-
- switch (uf->orig_len) {
- byte* data;
- ulint len;
- byte* buf;
- case 0:
- break;
- case BTR_EXTERN_FIELD_REF_SIZE:
- /* Restore the original locally stored
- part of the column. In the undo log,
- InnoDB writes a longer prefix of externally
- stored columns, so that column prefixes
- in secondary indexes can be reconstructed. */
- dfield_set_data(field, (byte*) dfield_get_data(field)
- + dfield_get_len(field)
- - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- dfield_set_ext(field);
- break;
- default:
- /* Reconstruct the original locally
- stored part of the column. The data
- will have to be copied. */
- ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
-
- data = dfield_get_data(field);
- len = dfield_get_len(field);
-
- buf = mem_heap_alloc(heap, uf->orig_len);
- /* Copy the locally stored prefix. */
- memcpy(buf, data,
- uf->orig_len
- - BTR_EXTERN_FIELD_REF_SIZE);
- /* Copy the BLOB pointer. */
- memcpy(buf + uf->orig_len
- - BTR_EXTERN_FIELD_REF_SIZE,
- data + len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
-
- dfield_set_data(field, buf, uf->orig_len);
- dfield_set_ext(field);
- }
- }
- }
-
- return(n_pushed);
-}
-
-/*******************************************************************//**
-Returns the length of a BLOB part stored on the header page.
-@return part length */
-static
-ulint
-btr_blob_get_part_len(
-/*==================*/
- const byte* blob_header) /*!< in: blob header */
-{
- return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
-}
-
-/*******************************************************************//**
-Returns the page number where the next BLOB part is stored.
-@return page number or FIL_NULL if no more pages */
-static
-ulint
-btr_blob_get_next_page_no(
-/*======================*/
- const byte* blob_header) /*!< in: blob header */
-{
- return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
-}
-
-/*******************************************************************//**
-Deallocate a buffer block that was reserved for a BLOB part. */
-static
-void
-btr_blob_free(
-/*==========*/
- buf_block_t* block, /*!< in: buffer block */
- ibool all, /*!< in: TRUE=remove also the compressed page
- if there is one */
- mtr_t* mtr) /*!< in: mini-transaction to commit */
-{
- ulint space = buf_block_get_space(block);
- ulint page_no = buf_block_get_page_no(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- mtr_commit(mtr);
-
- buf_pool_mutex_enter();
- mutex_enter(&block->mutex);
-
- /* Only free the block if it is still allocated to
- the same file page. */
-
- if (buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE
- && buf_block_get_space(block) == space
- && buf_block_get_page_no(block) == page_no) {
-
- if (buf_LRU_free_block(&block->page, all, NULL)
- != BUF_LRU_FREED
- && all && block->page.zip.data) {
- /* Attempt to deallocate the uncompressed page
- if the whole block cannot be deallocted. */
-
- buf_LRU_free_block(&block->page, FALSE, NULL);
- }
- }
-
- buf_pool_mutex_exit();
- mutex_exit(&block->mutex);
-}
-
-/*******************************************************************//**
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The extern flags in rec will have to be set beforehand.
-The fields are stored on pages allocated from leaf node
-file segment of the index tree.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ulint
-btr_store_big_rec_extern_fields(
-/*============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree
- MUST be X-latched */
- buf_block_t* rec_block, /*!< in/out: block containing rec */
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
- the "external storage" flags in offsets
- will not correspond to rec when
- this function returns */
- big_rec_t* big_rec_vec, /*!< in: vector containing fields
- to be stored externally */
- mtr_t* local_mtr __attribute__((unused))) /*!< in: mtr
- containing the latch to rec and to the
- tree */
-{
- ulint rec_page_no;
- byte* field_ref;
- ulint extern_len;
- ulint store_len;
- ulint page_no;
- ulint space_id;
- ulint zip_size;
- ulint prev_page_no;
- ulint hint_page_no;
- ulint i;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- page_zip_des_t* page_zip;
- z_stream c_stream;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
- ut_a(dict_index_is_clust(index));
-
- page_zip = buf_block_get_page_zip(rec_block);
- ut_a(dict_table_zip_size(index->table)
- == buf_block_get_zip_size(rec_block));
-
- space_id = buf_block_get_space(rec_block);
- zip_size = buf_block_get_zip_size(rec_block);
- rec_page_no = buf_block_get_page_no(rec_block);
- ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- int err;
-
- /* Zlib deflate needs 128 kilobytes for the default
- window size, plus 512 << memLevel, plus a few
- kilobytes for small objects. We use reduced memLevel
- to limit the memory consumption, and preallocate the
- heap, hoping to avoid memory fragmentation. */
- heap = mem_heap_create(250000);
- page_zip_set_alloc(&c_stream, heap);
-
- err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
- Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
- ut_a(err == Z_OK);
- }
-
- /* We have to create a file segment to the tablespace
- for each field and put the pointer to the field in rec */
-
- for (i = 0; i < big_rec_vec->n_fields; i++) {
- ut_ad(rec_offs_nth_extern(offsets,
- big_rec_vec->fields[i].field_no));
- {
- ulint local_len;
- field_ref = rec_get_nth_field(
- rec, offsets, big_rec_vec->fields[i].field_no,
- &local_len);
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
- field_ref += local_len;
- }
- extern_len = big_rec_vec->fields[i].len;
-
- ut_a(extern_len > 0);
-
- prev_page_no = FIL_NULL;
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- int err = deflateReset(&c_stream);
- ut_a(err == Z_OK);
-
- c_stream.next_in = (void*) big_rec_vec->fields[i].data;
- c_stream.avail_in = extern_len;
- }
-
- for (;;) {
- buf_block_t* block;
- page_t* page;
-
- mtr_start(&mtr);
-
- if (prev_page_no == FIL_NULL) {
- hint_page_no = 1 + rec_page_no;
- } else {
- hint_page_no = prev_page_no + 1;
- }
-
- block = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, &mtr);
- if (UNIV_UNLIKELY(block == NULL)) {
-
- mtr_commit(&mtr);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- deflateEnd(&c_stream);
- mem_heap_free(heap);
- }
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- page_no = buf_block_get_page_no(block);
- page = buf_block_get_frame(block);
-
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block;
- page_t* prev_page;
-
- prev_block = buf_page_get(space_id, zip_size,
- prev_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(prev_block,
- SYNC_EXTERN_STORAGE);
- prev_page = buf_block_get_frame(prev_block);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- mlog_write_ulint(
- prev_page + FIL_PAGE_NEXT,
- page_no, MLOG_4BYTES, &mtr);
- memcpy(buf_block_get_page_zip(
- prev_block)
- ->data + FIL_PAGE_NEXT,
- prev_page + FIL_PAGE_NEXT, 4);
- } else {
- mlog_write_ulint(
- prev_page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO,
- page_no, MLOG_4BYTES, &mtr);
- }
-
- }
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- int err;
- page_zip_des_t* blob_page_zip;
-
- /* Write FIL_PAGE_TYPE to the redo log
- separately, before logging any other
- changes to the page, so that the debug
- assertions in
- recv_parse_or_apply_log_rec_body() can
- be made simpler. Before InnoDB Plugin
- 1.0.4, the initialization of
- FIL_PAGE_TYPE was logged as part of
- the mlog_log_string() below. */
-
- mlog_write_ulint(page + FIL_PAGE_TYPE,
- prev_page_no == FIL_NULL
- ? FIL_PAGE_TYPE_ZBLOB
- : FIL_PAGE_TYPE_ZBLOB2,
- MLOG_2BYTES, &mtr);
-
- c_stream.next_out = page
- + FIL_PAGE_DATA;
- c_stream.avail_out
- = page_zip_get_size(page_zip)
- - FIL_PAGE_DATA;
-
- err = deflate(&c_stream, Z_FINISH);
- ut_a(err == Z_OK || err == Z_STREAM_END);
- ut_a(err == Z_STREAM_END
- || c_stream.avail_out == 0);
-
- /* Write the "next BLOB page" pointer */
- mlog_write_ulint(page + FIL_PAGE_NEXT,
- FIL_NULL, MLOG_4BYTES, &mtr);
- /* Initialize the unused "prev page" pointer */
- mlog_write_ulint(page + FIL_PAGE_PREV,
- FIL_NULL, MLOG_4BYTES, &mtr);
- /* Write a back pointer to the record
- into the otherwise unused area. This
- information could be useful in
- debugging. Later, we might want to
- implement the possibility to relocate
- BLOB pages. Then, we would need to be
- able to adjust the BLOB pointer in the
- record. We do not store the heap
- number of the record, because it can
- change in page_zip_reorganize() or
- btr_page_reorganize(). However, also
- the page number of the record may
- change when B-tree nodes are split or
- merged. */
- mlog_write_ulint(page
- + FIL_PAGE_FILE_FLUSH_LSN,
- space_id,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(page
- + FIL_PAGE_FILE_FLUSH_LSN + 4,
- rec_page_no,
- MLOG_4BYTES, &mtr);
-
- /* Zero out the unused part of the page. */
- memset(page + page_zip_get_size(page_zip)
- - c_stream.avail_out,
- 0, c_stream.avail_out);
- mlog_log_string(page + FIL_PAGE_FILE_FLUSH_LSN,
- page_zip_get_size(page_zip)
- - FIL_PAGE_FILE_FLUSH_LSN,
- &mtr);
- /* Copy the page to compressed storage,
- because it will be flushed to disk
- from there. */
- blob_page_zip = buf_block_get_page_zip(block);
- ut_ad(blob_page_zip);
- ut_ad(page_zip_get_size(blob_page_zip)
- == page_zip_get_size(page_zip));
- memcpy(blob_page_zip->data, page,
- page_zip_get_size(page_zip));
-
- if (err == Z_OK && prev_page_no != FIL_NULL) {
-
- goto next_zip_page;
- }
-
- rec_block = buf_page_get(space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(rec_block,
- SYNC_NO_ORDER_CHECK);
-
- if (err == Z_STREAM_END) {
- mach_write_to_4(field_ref
- + BTR_EXTERN_LEN, 0);
- mach_write_to_4(field_ref
- + BTR_EXTERN_LEN + 4,
- c_stream.total_in);
- } else {
- memset(field_ref + BTR_EXTERN_LEN,
- 0, 8);
- }
-
- if (prev_page_no == FIL_NULL) {
- mach_write_to_4(field_ref
- + BTR_EXTERN_SPACE_ID,
- space_id);
-
- mach_write_to_4(field_ref
- + BTR_EXTERN_PAGE_NO,
- page_no);
-
- mach_write_to_4(field_ref
- + BTR_EXTERN_OFFSET,
- FIL_PAGE_NEXT);
- }
-
- page_zip_write_blob_ptr(
- page_zip, rec, index, offsets,
- big_rec_vec->fields[i].field_no, &mtr);
-
-next_zip_page:
- prev_page_no = page_no;
-
- /* Commit mtr and release the
- uncompressed page frame to save memory. */
- btr_blob_free(block, FALSE, &mtr);
-
- if (err == Z_STREAM_END) {
- break;
- }
- } else {
- mlog_write_ulint(page + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_BLOB,
- MLOG_2BYTES, &mtr);
-
- if (extern_len > (UNIV_PAGE_SIZE
- - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END)) {
- store_len = UNIV_PAGE_SIZE
- - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END;
- } else {
- store_len = extern_len;
- }
-
- mlog_write_string(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_SIZE,
- (const byte*)
- big_rec_vec->fields[i].data
- + big_rec_vec->fields[i].len
- - extern_len,
- store_len, &mtr);
- mlog_write_ulint(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_PART_LEN,
- store_len, MLOG_4BYTES, &mtr);
- mlog_write_ulint(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO,
- FIL_NULL, MLOG_4BYTES, &mtr);
-
- extern_len -= store_len;
-
- rec_block = buf_page_get(space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(rec_block,
- SYNC_NO_ORDER_CHECK);
-
- mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(field_ref
- + BTR_EXTERN_LEN + 4,
- big_rec_vec->fields[i].len
- - extern_len,
- MLOG_4BYTES, &mtr);
-
- if (prev_page_no == FIL_NULL) {
- mlog_write_ulint(field_ref
- + BTR_EXTERN_SPACE_ID,
- space_id,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(field_ref
- + BTR_EXTERN_PAGE_NO,
- page_no,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(field_ref
- + BTR_EXTERN_OFFSET,
- FIL_PAGE_DATA,
- MLOG_4BYTES, &mtr);
- }
-
- prev_page_no = page_no;
-
- mtr_commit(&mtr);
-
- if (extern_len == 0) {
- break;
- }
- }
- }
- }
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- deflateEnd(&c_stream);
- mem_heap_free(heap);
- }
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Check the FIL_PAGE_TYPE on an uncompressed BLOB page. */
-static
-void
-btr_check_blob_fil_page_type(
-/*=========================*/
- ulint space_id, /*!< in: space id */
- ulint page_no, /*!< in: page number */
- const page_t* page, /*!< in: page */
- ibool read) /*!< in: TRUE=read, FALSE=purge */
-{
- ulint type = fil_page_get_type(page);
-
- ut_a(space_id == page_get_space_id(page));
- ut_a(page_no == page_get_page_no(page));
-
- if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) {
- ulint flags = fil_space_get_flags(space_id);
-
- if (UNIV_LIKELY
- ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) {
- /* Old versions of InnoDB did not initialize
- FIL_PAGE_TYPE on BLOB pages. Do not print
- anything about the type mismatch when reading
- a BLOB page that is in Antelope format.*/
- return;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: FIL_PAGE_TYPE=%lu"
- " on BLOB %s space %lu page %lu flags %lx\n",
- (ulong) type, read ? "read" : "purge",
- (ulong) space_id, (ulong) page_no, (ulong) flags);
- ut_error;
- }
-}
-
-/*******************************************************************//**
-Frees the space in an externally stored field to the file space
-management if the field in data is owned by the externally stored field,
-in a rollback we may have the additional condition that the field must
-not be inherited. */
-UNIV_INTERN
-void
-btr_free_externally_stored_field(
-/*=============================*/
- dict_index_t* index, /*!< in: index of the data, the index
- tree MUST be X-latched; if the tree
- height is 1, then also the root page
- must be X-latched! (this is relevant
- in the case this function is called
- from purge where 'data' is located on
- an undo log page, not an index
- page) */
- byte* field_ref, /*!< in/out: field reference */
- const rec_t* rec, /*!< in: record containing field_ref, for
- page_zip_write_blob_ptr(), or NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
- or NULL */
- page_zip_des_t* page_zip, /*!< in: compressed page corresponding
- to rec, or NULL if rec == NULL */
- ulint i, /*!< in: field number of field_ref;
- ignored if rec == NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* local_mtr __attribute__((unused))) /*!< in: mtr
- containing the latch to data an an
- X-latch to the index tree */
-{
- page_t* page;
- ulint space_id;
- ulint rec_zip_size = dict_table_zip_size(index->table);
- ulint ext_zip_size;
- ulint page_no;
- ulint next_page_no;
- mtr_t mtr;
-#ifdef UNIV_DEBUG
- ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(!rec || rec_offs_validate(rec, index, offsets));
-
- if (rec) {
- ulint local_len;
- const byte* f = rec_get_nth_field(rec, offsets,
- i, &local_len);
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
- f += local_len;
- ut_ad(f == field_ref);
- }
-#endif /* UNIV_DEBUG */
-
- if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE))) {
- /* In the rollback of uncommitted transactions, we may
- encounter a clustered index record whose BLOBs have
- not been written. There is nothing to free then. */
- ut_a(rb_ctx == RB_RECOVERY);
- return;
- }
-
- space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
-
- if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
- ext_zip_size = fil_space_get_zip_size(space_id);
- /* This must be an undo log record in the system tablespace,
- that is, in row_purge_upd_exist_or_extern().
- Currently, externally stored records are stored in the
- same tablespace as the referring records. */
- ut_ad(!page_get_space_id(page_align(field_ref)));
- ut_ad(!rec);
- ut_ad(!page_zip);
- } else {
- ext_zip_size = rec_zip_size;
- }
-
- if (!rec) {
- /* This is a call from row_purge_upd_exist_or_extern(). */
- ut_ad(!page_zip);
- rec_zip_size = 0;
- }
-
- for (;;) {
- buf_block_t* rec_block;
- buf_block_t* ext_block;
-
- mtr_start(&mtr);
-
- rec_block = buf_page_get(page_get_space_id(
- page_align(field_ref)),
- rec_zip_size,
- page_get_page_no(
- page_align(field_ref)),
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
- page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
-
- if (/* There is no external storage data */
- page_no == FIL_NULL
- /* This field does not own the externally stored field */
- || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
- & BTR_EXTERN_OWNER_FLAG)
- /* Rollback and inherited field */
- || (rb_ctx != RB_NONE
- && (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
- & BTR_EXTERN_INHERITED_FLAG))) {
-
- /* Do not free */
- mtr_commit(&mtr);
-
- return;
- }
-
- ext_block = buf_page_get(space_id, ext_zip_size, page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
- page = buf_block_get_frame(ext_block);
-
- if (ext_zip_size) {
- /* Note that page_zip will be NULL
- in row_purge_upd_exist_or_extern(). */
- switch (fil_page_get_type(page)) {
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- break;
- default:
- ut_error;
- }
- next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
-
- btr_page_free_low(index, ext_block, 0, &mtr);
-
- if (UNIV_LIKELY(page_zip != NULL)) {
- mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
- next_page_no);
- mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
- 0);
- page_zip_write_blob_ptr(page_zip, rec, index,
- offsets, i, &mtr);
- } else {
- mlog_write_ulint(field_ref
- + BTR_EXTERN_PAGE_NO,
- next_page_no,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(field_ref
- + BTR_EXTERN_LEN + 4, 0,
- MLOG_4BYTES, &mtr);
- }
- } else {
- ut_a(!page_zip);
- btr_check_blob_fil_page_type(space_id, page_no, page,
- FALSE);
-
- next_page_no = mach_read_from_4(
- page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO);
-
- /* We must supply the page level (= 0) as an argument
- because we did not store it on the page (we save the
- space overhead from an index page header. */
-
- btr_page_free_low(index, ext_block, 0, &mtr);
-
- mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
- next_page_no,
- MLOG_4BYTES, &mtr);
- /* Zero out the BLOB length. If the server
- crashes during the execution of this function,
- trx_rollback_or_clean_all_recovered() could
- dereference the half-deleted BLOB, fetching a
- wrong prefix for the BLOB. */
- mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
- 0,
- MLOG_4BYTES, &mtr);
- }
-
- /* Commit mtr and release the BLOB block to save memory. */
- btr_blob_free(ext_block, TRUE, &mtr);
- }
-}
-
-/***********************************************************//**
-Frees the externally stored fields for a record. */
-static
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
- dict_index_t* index, /*!< in: index of the data, the index
- tree MUST be X-latched */
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr) /*!< in: mini-transaction handle which contains
- an X-latch to record page and to the index
- tree */
-{
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
- /* Free possible externally stored fields in the record */
-
- ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
- n_fields = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n_fields; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- byte* data
- = rec_get_nth_field(rec, offsets, i, &len);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- btr_free_externally_stored_field(
- index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
- rec, offsets, page_zip, i, rb_ctx, mtr);
- }
- }
-}
-
-/***********************************************************//**
-Frees the externally stored fields for a record, if the field is mentioned
-in the update vector. */
-static
-void
-btr_rec_free_updated_extern_fields(
-/*===============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree MUST be
- X-latched */
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const upd_t* update, /*!< in: update vector */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr) /*!< in: mini-transaction handle which contains
- an X-latch to record page and to the tree */
-{
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
-
- /* Free possible externally stored fields in the record */
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- const upd_field_t* ufield = upd_get_nth_field(update, i);
-
- if (rec_offs_nth_extern(offsets, ufield->field_no)) {
- ulint len;
- byte* data = rec_get_nth_field(
- rec, offsets, ufield->field_no, &len);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- btr_free_externally_stored_field(
- index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
- rec, offsets, page_zip,
- ufield->field_no, rb_ctx, mtr);
- }
- }
-}
-
-/*******************************************************************//**
-Copies the prefix of an uncompressed BLOB. The clustered index record
-that points to this BLOB must be protected by a lock or a page latch.
-@return number of bytes written to buf */
-static
-ulint
-btr_copy_blob_prefix(
-/*=================*/
- byte* buf, /*!< out: the externally stored part of
- the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint space_id,/*!< in: space id of the BLOB pages */
- ulint page_no,/*!< in: page number of the first BLOB page */
- ulint offset) /*!< in: offset on the first BLOB page */
-{
- ulint copied_len = 0;
-
- for (;;) {
- mtr_t mtr;
- buf_block_t* block;
- const page_t* page;
- const byte* blob_header;
- ulint part_len;
- ulint copy_len;
-
- mtr_start(&mtr);
-
- block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
- page = buf_block_get_frame(block);
-
- btr_check_blob_fil_page_type(space_id, page_no, page, TRUE);
-
- blob_header = page + offset;
- part_len = btr_blob_get_part_len(blob_header);
- copy_len = ut_min(part_len, len - copied_len);
-
- memcpy(buf + copied_len,
- blob_header + BTR_BLOB_HDR_SIZE, copy_len);
- copied_len += copy_len;
-
- page_no = btr_blob_get_next_page_no(blob_header);
-
- mtr_commit(&mtr);
-
- if (page_no == FIL_NULL || copy_len != part_len) {
- return(copied_len);
- }
-
- /* On other BLOB pages except the first the BLOB header
- always is at the page data start: */
-
- offset = FIL_PAGE_DATA;
-
- ut_ad(copied_len <= len);
- }
-}
-
-/*******************************************************************//**
-Copies the prefix of a compressed BLOB. The clustered index record
-that points to this BLOB must be protected by a lock or a page latch. */
-static
-void
-btr_copy_zblob_prefix(
-/*==================*/
- z_stream* d_stream,/*!< in/out: the decompressing stream */
- ulint zip_size,/*!< in: compressed BLOB page size */
- ulint space_id,/*!< in: space id of the BLOB pages */
- ulint page_no,/*!< in: page number of the first BLOB page */
- ulint offset) /*!< in: offset on the first BLOB page */
-{
- ulint page_type = FIL_PAGE_TYPE_ZBLOB;
-
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
- ut_ad(space_id);
-
- for (;;) {
- buf_page_t* bpage;
- int err;
- ulint next_page_no;
-
- /* There is no latch on bpage directly. Instead,
- bpage is protected by the B-tree page latch that
- is being held on the clustered index record, or,
- in row_merge_copy_blobs(), by an exclusive table lock. */
- bpage = buf_page_get_zip(space_id, zip_size, page_no);
-
- if (UNIV_UNLIKELY(!bpage)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot load"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) page_no, (ulong) space_id);
- return;
- }
-
- if (UNIV_UNLIKELY
- (fil_page_get_type(bpage->zip.data) != page_type)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Unexpected type %lu of"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) fil_page_get_type(bpage->zip.data),
- (ulong) page_no, (ulong) space_id);
- goto end_of_blob;
- }
-
- next_page_no = mach_read_from_4(bpage->zip.data + offset);
-
- if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
- /* When the BLOB begins at page header,
- the compressed data payload does not
- immediately follow the next page pointer. */
- offset = FIL_PAGE_DATA;
- } else {
- offset += 4;
- }
-
- d_stream->next_in = bpage->zip.data + offset;
- d_stream->avail_in = zip_size - offset;
-
- err = inflate(d_stream, Z_NO_FLUSH);
- switch (err) {
- case Z_OK:
- if (!d_stream->avail_out) {
- goto end_of_blob;
- }
- break;
- case Z_STREAM_END:
- if (next_page_no == FIL_NULL) {
- goto end_of_blob;
- }
- /* fall through */
- default:
-inflate_error:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: inflate() of"
- " compressed BLOB"
- " page %lu space %lu returned %d (%s)\n",
- (ulong) page_no, (ulong) space_id,
- err, d_stream->msg);
- case Z_BUF_ERROR:
- goto end_of_blob;
- }
-
- if (next_page_no == FIL_NULL) {
- if (!d_stream->avail_in) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: unexpected end of"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) page_no,
- (ulong) space_id);
- } else {
- err = inflate(d_stream, Z_FINISH);
- switch (err) {
- case Z_STREAM_END:
- case Z_BUF_ERROR:
- break;
- default:
- goto inflate_error;
- }
- }
-
-end_of_blob:
- buf_page_release_zip(bpage);
- return;
- }
-
- buf_page_release_zip(bpage);
-
- /* On other BLOB pages except the first
- the BLOB header always is at the page header: */
-
- page_no = next_page_no;
- offset = FIL_PAGE_NEXT;
- page_type = FIL_PAGE_TYPE_ZBLOB2;
- }
-}
-
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record that points to this BLOB must be protected by a
-lock or a page latch.
-@return number of bytes written to buf */
-static
-ulint
-btr_copy_externally_stored_field_prefix_low(
-/*========================================*/
- byte* buf, /*!< out: the externally stored part of
- the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint space_id,/*!< in: space id of the first BLOB page */
- ulint page_no,/*!< in: page number of the first BLOB page */
- ulint offset) /*!< in: offset on the first BLOB page */
-{
- if (UNIV_UNLIKELY(len == 0)) {
- return(0);
- }
-
- if (UNIV_UNLIKELY(zip_size)) {
- int err;
- z_stream d_stream;
- mem_heap_t* heap;
-
- /* Zlib inflate needs 32 kilobytes for the default
- window size, plus a few kilobytes for small objects. */
- heap = mem_heap_create(40000);
- page_zip_set_alloc(&d_stream, heap);
-
- err = inflateInit(&d_stream);
- ut_a(err == Z_OK);
-
- d_stream.next_out = buf;
- d_stream.avail_out = len;
- d_stream.avail_in = 0;
-
- btr_copy_zblob_prefix(&d_stream, zip_size,
- space_id, page_no, offset);
- inflateEnd(&d_stream);
- mem_heap_free(heap);
- return(d_stream.total_out);
- } else {
- return(btr_copy_blob_prefix(buf, len, space_id,
- page_no, offset));
- }
-}
-
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record must be protected by a lock or a page latch.
-@return the length of the copied field, or 0 if the column was being
-or has been deleted */
-UNIV_INTERN
-ulint
-btr_copy_externally_stored_field_prefix(
-/*====================================*/
- byte* buf, /*!< out: the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint local_len)/*!< in: length of data, in bytes */
-{
- ulint space_id;
- ulint page_no;
- ulint offset;
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- if (UNIV_UNLIKELY(local_len >= len)) {
- memcpy(buf, data, len);
- return(len);
- }
-
- memcpy(buf, data, local_len);
- data += local_len;
-
- ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
-
- if (!mach_read_from_4(data + BTR_EXTERN_LEN + 4)) {
- /* The externally stored part of the column has been
- (partially) deleted. Signal the half-deleted BLOB
- to the caller. */
-
- return(0);
- }
-
- space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
-
- page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
-
- offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
-
- return(local_len
- + btr_copy_externally_stored_field_prefix_low(buf + local_len,
- len - local_len,
- zip_size,
- space_id, page_no,
- offset));
-}
-
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap. The
-clustered index record must be protected by a lock or a page latch.
-@return the whole field copied to heap */
-static
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
- ulint* len, /*!< out: length of the whole field */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint local_len,/*!< in: length of data */
- mem_heap_t* heap) /*!< in: mem heap */
-{
- ulint space_id;
- ulint page_no;
- ulint offset;
- ulint extern_len;
- byte* buf;
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
-
- page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
-
- offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
-
- /* Currently a BLOB cannot be bigger than 4 GB; we
- leave the 4 upper bytes in the length field unused */
-
- extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
-
- buf = mem_heap_alloc(heap, local_len + extern_len);
-
- memcpy(buf, data, local_len);
- *len = local_len
- + btr_copy_externally_stored_field_prefix_low(buf + local_len,
- extern_len,
- zip_size,
- space_id,
- page_no, offset);
-
- return(buf);
-}
-
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return the field copied to heap */
-UNIV_INTERN
-byte*
-btr_rec_copy_externally_stored_field(
-/*=================================*/
- const rec_t* rec, /*!< in: record in a clustered index;
- must be protected by a lock or a page latch */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint no, /*!< in: field number */
- ulint* len, /*!< out: length of the field */
- mem_heap_t* heap) /*!< in: mem heap */
-{
- ulint local_len;
- const byte* data;
-
- ut_a(rec_offs_nth_extern(offsets, no));
-
- /* An externally stored field can contain some initial
- data from the field, and in the last 20 bytes it has the
- space id, page number, and offset where the rest of the
- field data is stored, and the data length in addition to
- the data stored locally. We may need to store some data
- locally to get the local record length above the 128 byte
- limit so that field offsets are stored in two bytes, and
- the extern bit is available in those two bytes. */
-
- data = rec_get_nth_field(rec, offsets, no, &local_len);
-
- return(btr_copy_externally_stored_field(len, data,
- zip_size, local_len, heap));
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/btr/btr0pcur.c b/storage/innodb_plugin/btr/btr0pcur.c
deleted file mode 100644
index ec98692c35b..00000000000
--- a/storage/innodb_plugin/btr/btr0pcur.c
+++ /dev/null
@@ -1,582 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file btr/btr0pcur.c
-The index tree persistent cursor
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-#include "btr0pcur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0pcur.ic"
-#endif
-
-#include "ut0byte.h"
-#include "rem0cmp.h"
-#include "trx0trx.h"
-
-/**************************************************************//**
-Allocates memory for a persistent cursor object and initializes the cursor.
-@return own: persistent cursor */
-UNIV_INTERN
-btr_pcur_t*
-btr_pcur_create_for_mysql(void)
-/*============================*/
-{
- btr_pcur_t* pcur;
-
- pcur = mem_alloc(sizeof(btr_pcur_t));
-
- pcur->btr_cur.index = NULL;
- btr_pcur_init(pcur);
-
- return(pcur);
-}
-
-/**************************************************************//**
-Frees the memory for a persistent cursor object. */
-UNIV_INTERN
-void
-btr_pcur_free_for_mysql(
-/*====================*/
- btr_pcur_t* cursor) /*!< in, own: persistent cursor */
-{
- if (cursor->old_rec_buf != NULL) {
-
- mem_free(cursor->old_rec_buf);
-
- cursor->old_rec_buf = NULL;
- }
-
- cursor->btr_cur.page_cur.rec = NULL;
- cursor->old_rec = NULL;
- cursor->old_n_fields = 0;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->latch_mode = BTR_NO_LATCHES;
- cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
-
- mem_free(cursor);
-}
-
-/**************************************************************//**
-The position of the cursor is stored by taking an initial segment of the
-record the cursor is positioned on, before, or after, and copying it to the
-cursor data structure, or just setting a flag if the cursor id before the
-first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
-page where the cursor is positioned must not be empty if the index tree is
-not totally empty! */
-UNIV_INTERN
-void
-btr_pcur_store_position(
-/*====================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t* page_cursor;
- buf_block_t* block;
- rec_t* rec;
- dict_index_t* index;
- page_t* page;
- ulint offs;
-
- ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- block = btr_pcur_get_block(cursor);
- index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
-
- page_cursor = btr_pcur_get_page_cur(cursor);
-
- rec = page_cur_get_rec(page_cursor);
- page = page_align(rec);
- offs = page_offset(rec);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_a(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
- /* It must be an empty index tree; NOTE that in this case
- we do not store the modify_clock, but always do a search
- if we restore the cursor position */
-
- ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
-
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- if (page_rec_is_supremum_low(offs)) {
-
- cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
- } else {
- cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
- }
-
- return;
- }
-
- if (page_rec_is_supremum_low(offs)) {
-
- rec = page_rec_get_prev(rec);
-
- cursor->rel_pos = BTR_PCUR_AFTER;
-
- } else if (page_rec_is_infimum_low(offs)) {
-
- rec = page_rec_get_next(rec);
-
- cursor->rel_pos = BTR_PCUR_BEFORE;
- } else {
- cursor->rel_pos = BTR_PCUR_ON;
- }
-
- cursor->old_stored = BTR_PCUR_OLD_STORED;
- cursor->old_rec = dict_index_copy_rec_order_prefix(
- index, rec, &cursor->old_n_fields,
- &cursor->old_rec_buf, &cursor->buf_size);
-
- cursor->block_when_stored = block;
- cursor->modify_clock = buf_block_get_modify_clock(block);
-}
-
-/**************************************************************//**
-Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
-void
-btr_pcur_copy_stored_position(
-/*==========================*/
- btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the
- position info */
- btr_pcur_t* pcur_donate) /*!< in: pcur from which the info is
- copied */
-{
- if (pcur_receive->old_rec_buf) {
- mem_free(pcur_receive->old_rec_buf);
- }
-
- ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
-
- if (pcur_donate->old_rec_buf) {
-
- pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
-
- ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
- pcur_donate->buf_size);
- pcur_receive->old_rec = pcur_receive->old_rec_buf
- + (pcur_donate->old_rec - pcur_donate->old_rec_buf);
- }
-
- pcur_receive->old_n_fields = pcur_donate->old_n_fields;
-}
-
-/**************************************************************//**
-Restores the stored position of a persistent cursor bufferfixing the page and
-obtaining the specified latches. If the cursor position was saved when the
-(1) cursor was positioned on a user record: this function restores the position
-to the last record LESS OR EQUAL to the stored record;
-(2) cursor was positioned on a page infimum record: restores the position to
-the last record LESS than the user record which was the successor of the page
-infimum;
-(3) cursor was positioned on the page supremum: restores to the first record
-GREATER than the user record which was the predecessor of the supremum.
-(4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree.
-@return TRUE if the cursor position was stored when it was on a user
-record and it can be restored on a user record whose ordering fields
-are identical to the ones of the original user record */
-UNIV_INTERN
-ibool
-btr_pcur_restore_position(
-/*======================*/
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: detached persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- dtuple_t* tuple;
- ulint mode;
- ulint old_mode;
- mem_heap_t* heap;
-
- index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
-
- if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
- || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
- && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
- ut_print_buf(stderr, cursor, sizeof(btr_pcur_t));
- putc('\n', stderr);
- if (cursor->trx_if_known) {
- trx_print(stderr, cursor->trx_if_known, 0);
- }
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY
- (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
- || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
-
- /* In these cases we do not try an optimistic restoration,
- but always do a search */
-
- btr_cur_open_at_index_side(
- cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
- index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
-
- cursor->block_when_stored = btr_pcur_get_block(cursor);
-
- return(FALSE);
- }
-
- ut_a(cursor->old_rec);
- ut_a(cursor->old_n_fields);
-
- if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
- || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
- /* Try optimistic restoration */
-
- if (UNIV_LIKELY(buf_page_optimistic_get(
- latch_mode,
- cursor->block_when_stored,
- cursor->modify_clock, mtr))) {
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-
- buf_block_dbg_add_level(btr_pcur_get_block(cursor),
- SYNC_TREE_NODE);
-
- if (cursor->rel_pos == BTR_PCUR_ON) {
-#ifdef UNIV_DEBUG
- const rec_t* rec;
- const ulint* offsets1;
- const ulint* offsets2;
-#endif /* UNIV_DEBUG */
- cursor->latch_mode = latch_mode;
-#ifdef UNIV_DEBUG
- rec = btr_pcur_get_rec(cursor);
-
- heap = mem_heap_create(256);
- offsets1 = rec_get_offsets(
- cursor->old_rec, index, NULL,
- cursor->old_n_fields, &heap);
- offsets2 = rec_get_offsets(
- rec, index, NULL,
- cursor->old_n_fields, &heap);
-
- ut_ad(!cmp_rec_rec(cursor->old_rec,
- rec, offsets1, offsets2,
- index));
- mem_heap_free(heap);
-#endif /* UNIV_DEBUG */
- return(TRUE);
- }
-
- return(FALSE);
- }
- }
-
- /* If optimistic restoration did not succeed, open the cursor anew */
-
- heap = mem_heap_create(256);
-
- tuple = dict_index_build_data_tuple(index, cursor->old_rec,
- cursor->old_n_fields, heap);
-
- /* Save the old search mode of the cursor */
- old_mode = cursor->search_mode;
-
- if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
- mode = PAGE_CUR_LE;
- } else if (cursor->rel_pos == BTR_PCUR_AFTER) {
- mode = PAGE_CUR_G;
- } else {
- ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
- mode = PAGE_CUR_L;
- }
-
- btr_pcur_open_with_no_init(index, tuple, mode, latch_mode,
- cursor, 0, mtr);
-
- /* Restore the old search mode */
- cursor->search_mode = old_mode;
-
- if (cursor->rel_pos == BTR_PCUR_ON
- && btr_pcur_is_on_user_rec(cursor)
- && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
- rec_get_offsets(
- btr_pcur_get_rec(cursor), index,
- NULL, ULINT_UNDEFINED, &heap))) {
-
- /* We have to store the NEW value for the modify clock, since
- the cursor can now be on a different page! But we can retain
- the value of old_rec */
-
- cursor->block_when_stored = btr_pcur_get_block(cursor);
- cursor->modify_clock = buf_block_get_modify_clock(
- cursor->block_when_stored);
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
-
- mem_heap_free(heap);
-
- /* We have to store new position information, modify_clock etc.,
- to the cursor because it can now be on a different page, the record
- under it may have been removed, etc. */
-
- btr_pcur_store_position(cursor, mtr);
-
- return(FALSE);
-}
-
-/**************************************************************//**
-If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
-releases the page latch and bufferfix reserved by the cursor.
-NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
-made by the current mini-transaction to the data protected by the
-cursor latch, as then the latch must not be released until mtr_commit. */
-UNIV_INTERN
-void
-btr_pcur_release_leaf(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
-
- ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- block = btr_pcur_get_block(cursor);
-
- btr_leaf_page_release(block, cursor->latch_mode, mtr);
-
- cursor->latch_mode = BTR_NO_LATCHES;
-
- cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the first record on the next page. Releases the
-latch on the current page, and bufferunfixes it. Note that there must not be
-modifications on the current page, as then the x-latch can be released only in
-mtr_commit. */
-UNIV_INTERN
-void
-btr_pcur_move_to_next_page(
-/*=======================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
- last record of the current page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint next_page_no;
- ulint space;
- ulint zip_size;
- page_t* page;
- buf_block_t* next_block;
- page_t* next_page;
-
- ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- ut_ad(btr_pcur_is_after_last_on_page(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- page = btr_pcur_get_page(cursor);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_block_get_space(btr_pcur_get_block(cursor));
- zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
-
- ut_ad(next_page_no != FIL_NULL);
-
- next_block = btr_block_get(space, zip_size, next_page_no,
- cursor->latch_mode, mtr);
- next_page = buf_block_get_frame(next_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == buf_block_get_page_no(btr_pcur_get_block(cursor)));
-#endif /* UNIV_BTR_DEBUG */
- next_block->check_index_page_at_flush = TRUE;
-
- btr_leaf_page_release(btr_pcur_get_block(cursor),
- cursor->latch_mode, mtr);
-
- page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
-
- page_check_dir(next_page);
-}
-
-/*********************************************************//**
-Moves the persistent cursor backward if it is on the first record of the page.
-Commits mtr. Note that to prevent a possible deadlock, the operation
-first stores the position of the cursor, commits mtr, acquires the necessary
-latches and restores the cursor position again before returning. The
-alphabetical position of the cursor is guaranteed to be sensible on
-return, but it may happen that the cursor is not positioned on the last
-record of any page, because the structure of the tree may have changed
-during the time when the cursor had no latches. */
-UNIV_INTERN
-void
-btr_pcur_move_backward_from_page(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the first
- record of the current page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint prev_page_no;
- ulint space;
- page_t* page;
- buf_block_t* prev_block;
- ulint latch_mode;
- ulint latch_mode2;
-
- ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- ut_ad(btr_pcur_is_before_first_on_page(cursor));
- ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
-
- latch_mode = cursor->latch_mode;
-
- if (latch_mode == BTR_SEARCH_LEAF) {
-
- latch_mode2 = BTR_SEARCH_PREV;
-
- } else if (latch_mode == BTR_MODIFY_LEAF) {
-
- latch_mode2 = BTR_MODIFY_PREV;
- } else {
- latch_mode2 = 0; /* To eliminate compiler warning */
- ut_error;
- }
-
- btr_pcur_store_position(cursor, mtr);
-
- mtr_commit(mtr);
-
- mtr_start(mtr);
-
- btr_pcur_restore_position(latch_mode2, cursor, mtr);
-
- page = btr_pcur_get_page(cursor);
-
- prev_page_no = btr_page_get_prev(page, mtr);
- space = buf_block_get_space(btr_pcur_get_block(cursor));
-
- if (prev_page_no == FIL_NULL) {
- } else if (btr_pcur_is_before_first_on_page(cursor)) {
-
- prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
-
- btr_leaf_page_release(btr_pcur_get_block(cursor),
- latch_mode, mtr);
-
- page_cur_set_after_last(prev_block,
- btr_pcur_get_page_cur(cursor));
- } else {
-
- /* The repositioned cursor did not end on an infimum record on
- a page. Cursor repositioning acquired a latch also on the
- previous page, but we do not need the latch: release it. */
-
- prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
-
- btr_leaf_page_release(prev_block, latch_mode, mtr);
- }
-
- cursor->latch_mode = latch_mode;
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'.
-@return TRUE if the cursor was not before first in tree */
-UNIV_INTERN
-ibool
-btr_pcur_move_to_prev(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- if (btr_pcur_is_before_first_on_page(cursor)) {
-
- if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_backward_from_page(cursor, mtr);
-
- return(TRUE);
- }
-
- btr_pcur_move_to_prev_on_page(cursor);
-
- return(TRUE);
-}
-
-/**************************************************************//**
-If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
-user record satisfying the search condition, in the case PAGE_CUR_L or
-PAGE_CUR_LE, on the last user record. If no such user record exists, then
-in the first case sets the cursor after last in tree, and in the latter case
-before first in tree. The latching mode must be BTR_SEARCH_LEAF or
-BTR_MODIFY_LEAF. */
-UNIV_INTERN
-void
-btr_pcur_open_on_user_rec(
-/*======================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent
- cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
-
- if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
-
- if (btr_pcur_is_after_last_on_page(cursor)) {
-
- btr_pcur_move_to_next_user_rec(cursor, mtr);
- }
- } else {
- ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
-
- /* Not implemented yet */
-
- ut_error;
- }
-}
diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c
deleted file mode 100644
index ef7afeb1039..00000000000
--- a/storage/innodb_plugin/btr/btr0sea.c
+++ /dev/null
@@ -1,1889 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file btr/btr0sea.c
-The index tree adaptive search
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "btr0sea.h"
-#ifdef UNIV_NONINL
-#include "btr0sea.ic"
-#endif
-
-#include "buf0buf.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "btr0cur.h"
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "ha0ha.h"
-
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch and btr_search_enabled_mutex. */
-UNIV_INTERN char btr_search_enabled = TRUE;
-
-/** Mutex protecting btr_search_enabled */
-static mutex_t btr_search_enabled_mutex;
-
-/** A dummy variable to fool the compiler */
-UNIV_INTERN ulint btr_search_this_is_zero = 0;
-
-#ifdef UNIV_SEARCH_PERF_STAT
-/** Number of successful adaptive hash index lookups */
-UNIV_INTERN ulint btr_search_n_succ = 0;
-/** Number of failed adaptive hash index lookups */
-UNIV_INTERN ulint btr_search_n_hash_fail = 0;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
-/** padding to prevent other memory update
-hotspots from residing on the same memory
-cache line as btr_search_latch */
-UNIV_INTERN byte btr_sea_pad1[64];
-
-/** The latch protecting the adaptive search system: this latch protects the
-(1) positions of records on those pages where a hash index has been built.
-NOTE: It does not protect values of non-ordering fields within a record from
-being updated in-place! We can use fact (1) to perform unique searches to
-indexes. */
-
-/* We will allocate the latch from dynamic memory to get it to the
-same DRAM page as other hotspot semaphores */
-UNIV_INTERN rw_lock_t* btr_search_latch_temp;
-
-/** padding to prevent other memory update hotspots from residing on
-the same memory cache line */
-UNIV_INTERN byte btr_sea_pad2[64];
-
-/** The adaptive hash index */
-UNIV_INTERN btr_search_sys_t* btr_search_sys;
-
-/** If the number of records on the page divided by this parameter
-would have been successfully accessed using a hash index, the index
-is then built on the page, assuming the global limit has been reached */
-#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
-
-/** The global limit for consecutive potentially successful hash searches,
-before hash index building is started */
-#define BTR_SEARCH_BUILD_LIMIT 100
-
-/********************************************************************//**
-Builds a hash index on a page with the given parameters. If the page already
-has a hash index with different parameters, the old hash index is removed.
-If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
-static
-void
-btr_search_build_page_hash_index(
-/*=============================*/
- dict_index_t* index, /*!< in: index for which to build, or NULL if
- not known */
- buf_block_t* block, /*!< in: index page, s- or x-latched */
- ulint n_fields,/*!< in: hash this many full fields */
- ulint n_bytes,/*!< in: hash this many bytes from the next
- field */
- ibool left_side);/*!< in: hash for searches from left side? */
-
-/*****************************************************************//**
-This function should be called before reserving any btr search mutex, if
-the intended operation might add nodes to the search system hash table.
-Because of the latching order, once we have reserved the btr search system
-latch, we cannot allocate a free frame from the buffer pool. Checks that
-there is a free buffer frame allocated for hash table heap in the btr search
-system. If not, allocates a free frames for the heap. This check makes it
-probable that, when have reserved the btr search system latch and we need to
-allocate a new node to the hash table, it will succeed. However, the check
-will not guarantee success. */
-static
-void
-btr_search_check_free_space_in_heap(void)
-/*=====================================*/
-{
- hash_table_t* table;
- mem_heap_t* heap;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- table = btr_search_sys->hash_index;
-
- heap = table->heap;
-
- /* Note that we peek the value of heap->free_block without reserving
- the latch: this is ok, because we will not guarantee that there will
- be enough free space in the hash table. */
-
- if (heap->free_block == NULL) {
- buf_block_t* block = buf_block_alloc(0);
-
- rw_lock_x_lock(&btr_search_latch);
-
- if (heap->free_block == NULL) {
- heap->free_block = block;
- } else {
- buf_block_free(block);
- }
-
- rw_lock_x_unlock(&btr_search_latch);
- }
-}
-
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
-void
-btr_search_sys_create(
-/*==================*/
- ulint hash_size) /*!< in: hash index hash table size */
-{
- /* We allocate the search latch from dynamic memory:
- see above at the global variable definition */
-
- btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
-
- rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS);
- mutex_create(&btr_search_enabled_mutex, SYNC_SEARCH_SYS_CONF);
-
- btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
-
- btr_search_sys->hash_index = ha_create(hash_size, 0, 0);
-}
-
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
-void
-btr_search_sys_free(void)
-/*=====================*/
-{
- mem_free(btr_search_latch_temp);
- btr_search_latch_temp = NULL;
- mem_heap_free(btr_search_sys->hash_index->heap);
- hash_table_free(btr_search_sys->hash_index);
- mem_free(btr_search_sys);
- btr_search_sys = NULL;
-}
-
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
-void
-btr_search_disable(void)
-/*====================*/
-{
- mutex_enter(&btr_search_enabled_mutex);
- rw_lock_x_lock(&btr_search_latch);
-
- btr_search_enabled = FALSE;
-
- /* Clear all block->is_hashed flags and remove all entries
- from btr_search_sys->hash_index. */
- buf_pool_drop_hash_index();
-
- /* btr_search_enabled_mutex should guarantee this. */
- ut_ad(!btr_search_enabled);
-
- rw_lock_x_unlock(&btr_search_latch);
- mutex_exit(&btr_search_enabled_mutex);
-}
-
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
-void
-btr_search_enable(void)
-/*====================*/
-{
- mutex_enter(&btr_search_enabled_mutex);
- rw_lock_x_lock(&btr_search_latch);
-
- btr_search_enabled = TRUE;
-
- rw_lock_x_unlock(&btr_search_latch);
- mutex_exit(&btr_search_enabled_mutex);
-}
-
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return own: search info struct */
-UNIV_INTERN
-btr_search_t*
-btr_search_info_create(
-/*===================*/
- mem_heap_t* heap) /*!< in: heap where created */
-{
- btr_search_t* info;
-
- info = mem_heap_alloc(heap, sizeof(btr_search_t));
-
-#ifdef UNIV_DEBUG
- info->magic_n = BTR_SEARCH_MAGIC_N;
-#endif /* UNIV_DEBUG */
-
- info->ref_count = 0;
- info->root_guess = NULL;
-
- info->hash_analysis = 0;
- info->n_hash_potential = 0;
-
- info->last_hash_succ = FALSE;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_succ = 0;
- info->n_hash_fail = 0;
- info->n_patt_succ = 0;
- info->n_searches = 0;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
- /* Set some sensible values */
- info->n_fields = 1;
- info->n_bytes = 0;
-
- info->left_side = TRUE;
-
- return(info);
-}
-
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-btr_search_latch.
-@return ref_count value. */
-UNIV_INTERN
-ulint
-btr_search_info_get_ref_count(
-/*==========================*/
- btr_search_t* info) /*!< in: search info. */
-{
- ulint ret;
-
- ut_ad(info);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(&btr_search_latch);
- ret = info->ref_count;
- rw_lock_s_unlock(&btr_search_latch);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Updates the search info of an index about hash successes. NOTE that info
-is NOT protected by any semaphore, to save CPU time! Do not assume its fields
-are consistent. */
-static
-void
-btr_search_info_update_hash(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
-{
- dict_index_t* index;
- ulint n_unique;
- int cmp;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- index = cursor->index;
-
- if (dict_index_is_ibuf(index)) {
- /* So many deletes are performed on an insert buffer tree
- that we do not consider a hash index useful on it: */
-
- return;
- }
-
- n_unique = dict_index_get_n_unique_in_tree(index);
-
- if (info->n_hash_potential == 0) {
-
- goto set_new_recomm;
- }
-
- /* Test if the search would have succeeded using the recommended
- hash prefix */
-
- if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
-increment_potential:
- info->n_hash_potential++;
-
- return;
- }
-
- cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
- cursor->low_match, cursor->low_bytes);
-
- if (info->left_side ? cmp <= 0 : cmp > 0) {
-
- goto set_new_recomm;
- }
-
- cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
- cursor->up_match, cursor->up_bytes);
-
- if (info->left_side ? cmp <= 0 : cmp > 0) {
-
- goto increment_potential;
- }
-
-set_new_recomm:
- /* We have to set a new recommendation; skip the hash analysis
- for a while to avoid unnecessary CPU time usage when there is no
- chance for success */
-
- info->hash_analysis = 0;
-
- cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
- cursor->low_match, cursor->low_bytes);
- if (cmp == 0) {
- info->n_hash_potential = 0;
-
- /* For extra safety, we set some sensible values here */
-
- info->n_fields = 1;
- info->n_bytes = 0;
-
- info->left_side = TRUE;
-
- } else if (cmp > 0) {
- info->n_hash_potential = 1;
-
- if (cursor->up_match >= n_unique) {
-
- info->n_fields = n_unique;
- info->n_bytes = 0;
-
- } else if (cursor->low_match < cursor->up_match) {
-
- info->n_fields = cursor->low_match + 1;
- info->n_bytes = 0;
- } else {
- info->n_fields = cursor->low_match;
- info->n_bytes = cursor->low_bytes + 1;
- }
-
- info->left_side = TRUE;
- } else {
- info->n_hash_potential = 1;
-
- if (cursor->low_match >= n_unique) {
-
- info->n_fields = n_unique;
- info->n_bytes = 0;
-
- } else if (cursor->low_match > cursor->up_match) {
-
- info->n_fields = cursor->up_match + 1;
- info->n_bytes = 0;
- } else {
- info->n_fields = cursor->up_match;
- info->n_bytes = cursor->up_bytes + 1;
- }
-
- info->left_side = FALSE;
- }
-}
-
-/*********************************************************************//**
-Updates the block search info on hash successes. NOTE that info and
-block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
-semaphore, to save CPU time! Do not assume the fields are consistent.
-@return TRUE if building a (new) hash index on the block is recommended */
-static
-ibool
-btr_search_update_block_hash_info(
-/*==============================*/
- btr_search_t* info, /*!< in: search info */
- buf_block_t* block, /*!< in: buffer block */
- btr_cur_t* cursor __attribute__((unused)))
- /*!< in: cursor */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
- || rw_lock_own(&block->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(cursor);
-
- info->last_hash_succ = FALSE;
-
- ut_a(buf_block_state_valid(block));
- ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
-
- if ((block->n_hash_helps > 0)
- && (info->n_hash_potential > 0)
- && (block->n_fields == info->n_fields)
- && (block->n_bytes == info->n_bytes)
- && (block->left_side == info->left_side)) {
-
- if ((block->is_hashed)
- && (block->curr_n_fields == info->n_fields)
- && (block->curr_n_bytes == info->n_bytes)
- && (block->curr_left_side == info->left_side)) {
-
- /* The search would presumably have succeeded using
- the hash index */
-
- info->last_hash_succ = TRUE;
- }
-
- block->n_hash_helps++;
- } else {
- block->n_hash_helps = 1;
- block->n_fields = info->n_fields;
- block->n_bytes = info->n_bytes;
- block->left_side = info->left_side;
- }
-
-#ifdef UNIV_DEBUG
- if (cursor->index->table->does_not_fit_in_memory) {
- block->n_hash_helps = 0;
- }
-#endif /* UNIV_DEBUG */
-
- if ((block->n_hash_helps > page_get_n_recs(block->frame)
- / BTR_SEARCH_PAGE_BUILD_LIMIT)
- && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
-
- if ((!block->is_hashed)
- || (block->n_hash_helps
- > 2 * page_get_n_recs(block->frame))
- || (block->n_fields != block->curr_n_fields)
- || (block->n_bytes != block->curr_n_bytes)
- || (block->left_side != block->curr_left_side)) {
-
- /* Build a new hash index on the page */
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Updates a hash node reference when it has been unsuccessfully used in a
-search which could have succeeded with the used hash parameters. This can
-happen because when building a hash index for a page, we do not check
-what happens at page boundaries, and therefore there can be misleading
-hash nodes. Also, collisions in the fold value can lead to misleading
-references. This function lazily fixes these imperfections in the hash
-index. */
-static
-void
-btr_search_update_hash_ref(
-/*=======================*/
- btr_search_t* info, /*!< in: search info */
- buf_block_t* block, /*!< in: buffer block where cursor positioned */
- btr_cur_t* cursor) /*!< in: cursor */
-{
- ulint fold;
- rec_t* rec;
- dulint index_id;
-
- ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(page_align(btr_cur_get_rec(cursor))
- == buf_block_get_frame(block));
-
- if (!block->is_hashed) {
-
- return;
- }
-
- ut_a(block->index == cursor->index);
- ut_a(!dict_index_is_ibuf(cursor->index));
-
- if ((info->n_hash_potential > 0)
- && (block->curr_n_fields == info->n_fields)
- && (block->curr_n_bytes == info->n_bytes)
- && (block->curr_left_side == info->left_side)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- rec = btr_cur_get_rec(cursor);
-
- if (!page_rec_is_user_rec(rec)) {
-
- return;
- }
-
- index_id = cursor->index->id;
- fold = rec_fold(rec,
- rec_get_offsets(rec, cursor->index, offsets_,
- ULINT_UNDEFINED, &heap),
- block->curr_n_fields,
- block->curr_n_bytes, index_id);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ha_insert_for_fold(btr_search_sys->hash_index, fold,
- block, rec);
- }
-}
-
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INTERN
-void
-btr_search_info_update_slow(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
-{
- buf_block_t* block;
- ibool build_index;
- ulint* params;
- ulint* params2;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- block = btr_cur_get_block(cursor);
-
- /* NOTE that the following two function calls do NOT protect
- info or block->n_fields etc. with any semaphore, to save CPU time!
- We cannot assume the fields are consistent when we return from
- those functions! */
-
- btr_search_info_update_hash(info, cursor);
-
- build_index = btr_search_update_block_hash_info(info, block, cursor);
-
- if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
-
- btr_search_check_free_space_in_heap();
- }
-
- if (cursor->flag == BTR_CUR_HASH_FAIL) {
- /* Update the hash node reference, if appropriate */
-
-#ifdef UNIV_SEARCH_PERF_STAT
- btr_search_n_hash_fail++;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
- rw_lock_x_lock(&btr_search_latch);
-
- btr_search_update_hash_ref(info, block, cursor);
-
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- if (build_index) {
- /* Note that since we did not protect block->n_fields etc.
- with any semaphore, the values can be inconsistent. We have
- to check inside the function call that they make sense. We
- also malloc an array and store the values there to make sure
- the compiler does not let the function call parameters change
- inside the called function. It might be that the compiler
- would optimize the call just to pass pointers to block. */
-
- params = mem_alloc(3 * sizeof(ulint));
- params[0] = block->n_fields;
- params[1] = block->n_bytes;
- params[2] = block->left_side;
-
- /* Make sure the compiler cannot deduce the values and do
- optimizations */
-
- params2 = params + btr_search_this_is_zero;
-
- btr_search_build_page_hash_index(cursor->index,
- block,
- params2[0],
- params2[1],
- params2[2]);
- mem_free(params);
- }
-}
-
-/******************************************************************//**
-Checks if a guessed position for a tree cursor is right. Note that if
-mode is PAGE_CUR_LE, which is used in inserts, and the function returns
-TRUE, then cursor->up_match and cursor->low_match both have sensible values.
-@return TRUE if success */
-static
-ibool
-btr_search_check_guess(
-/*===================*/
- btr_cur_t* cursor, /*!< in: guessed cursor position */
- ibool can_only_compare_to_cursor_rec,
- /*!< in: if we do not have a latch on the page
- of cursor, but only a latch on
- btr_search_latch, then ONLY the columns
- of the record UNDER the cursor are
- protected, not the next or previous record
- in the chain: we cannot look at the next or
- previous record to check our guess! */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- mtr_t* mtr) /*!< in: mtr */
-{
- rec_t* rec;
- ulint n_unique;
- ulint match;
- ulint bytes;
- int cmp;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ibool success = FALSE;
- rec_offs_init(offsets_);
-
- n_unique = dict_index_get_n_unique_in_tree(cursor->index);
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad(page_rec_is_user_rec(rec));
-
- match = 0;
- bytes = 0;
-
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
- offsets, &match, &bytes);
-
- if (mode == PAGE_CUR_GE) {
- if (cmp == 1) {
- goto exit_func;
- }
-
- cursor->up_match = match;
-
- if (match >= n_unique) {
- success = TRUE;
- goto exit_func;
- }
- } else if (mode == PAGE_CUR_LE) {
- if (cmp == -1) {
- goto exit_func;
- }
-
- cursor->low_match = match;
-
- } else if (mode == PAGE_CUR_G) {
- if (cmp != -1) {
- goto exit_func;
- }
- } else if (mode == PAGE_CUR_L) {
- if (cmp != 1) {
- goto exit_func;
- }
- }
-
- if (can_only_compare_to_cursor_rec) {
- /* Since we could not determine if our guess is right just by
- looking at the record under the cursor, return FALSE */
- goto exit_func;
- }
-
- match = 0;
- bytes = 0;
-
- if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
- rec_t* prev_rec;
-
- ut_ad(!page_rec_is_infimum(rec));
-
- prev_rec = page_rec_get_prev(rec);
-
- if (page_rec_is_infimum(prev_rec)) {
- success = btr_page_get_prev(page_align(prev_rec), mtr)
- == FIL_NULL;
-
- goto exit_func;
- }
-
- offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
- offsets, &match, &bytes);
- if (mode == PAGE_CUR_GE) {
- success = cmp == 1;
- } else {
- success = cmp != -1;
- }
-
- goto exit_func;
- } else {
- rec_t* next_rec;
-
- ut_ad(!page_rec_is_supremum(rec));
-
- next_rec = page_rec_get_next(rec);
-
- if (page_rec_is_supremum(next_rec)) {
- if (btr_page_get_next(page_align(next_rec), mtr)
- == FIL_NULL) {
-
- cursor->up_match = 0;
- success = TRUE;
- }
-
- goto exit_func;
- }
-
- offsets = rec_get_offsets(next_rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
- offsets, &match, &bytes);
- if (mode == PAGE_CUR_LE) {
- success = cmp == -1;
- cursor->up_match = match;
- } else {
- success = cmp != 1;
- }
- }
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(success);
-}
-
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
-of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
-and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-btr_search_guess_on_hash(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- btr_search_t* info, /*!< in: index search info */
- const dtuple_t* tuple, /*!< in: logical record */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that only if has_search_latch
- is 0, we will have a latch set on
- the cursor page, otherwise we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /*!< out: tree cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- rec_t* rec;
- ulint fold;
- dulint index_id;
-#ifdef notdefined
- btr_cur_t cursor2;
- btr_pcur_t pcur;
-#endif
- ut_ad(index && info && tuple && cursor && mtr);
- ut_ad((latch_mode == BTR_SEARCH_LEAF)
- || (latch_mode == BTR_MODIFY_LEAF));
-
- /* Note that, for efficiency, the struct info may not be protected by
- any latch here! */
-
- if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
-
- return(FALSE);
- }
-
- cursor->n_fields = info->n_fields;
- cursor->n_bytes = info->n_bytes;
-
- if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple)
- < cursor->n_fields + (cursor->n_bytes > 0))) {
-
- return(FALSE);
- }
-
- index_id = index->id;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_succ++;
-#endif
- fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id);
-
- cursor->fold = fold;
- cursor->flag = BTR_CUR_HASH;
-
- if (UNIV_LIKELY(!has_search_latch)) {
- rw_lock_s_lock(&btr_search_latch);
-
- if (UNIV_UNLIKELY(!btr_search_enabled)) {
- goto failure_unlock;
- }
- }
-
- ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
- ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
-
- rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
-
- if (UNIV_UNLIKELY(!rec)) {
- goto failure_unlock;
- }
-
- block = buf_block_align(rec);
-
- if (UNIV_LIKELY(!has_search_latch)) {
-
- if (UNIV_UNLIKELY(
- !buf_page_get_known_nowait(latch_mode, block,
- BUF_MAKE_YOUNG,
- __FILE__, __LINE__,
- mtr))) {
- goto failure_unlock;
- }
-
- rw_lock_s_unlock(&btr_search_latch);
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
- }
-
- if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
-
- if (UNIV_LIKELY(!has_search_latch)) {
-
- btr_leaf_page_release(block, latch_mode, mtr);
- }
-
- goto failure;
- }
-
- ut_ad(page_rec_is_user_rec(rec));
-
- btr_cur_position(index, rec, block, cursor);
-
- /* Check the validity of the guess within the page */
-
- /* If we only have the latch on btr_search_latch, not on the
- page, it only protects the columns of the record the cursor
- is positioned on. We cannot look at the next of the previous
- record to determine if our guess for the cursor position is
- right. */
- if (UNIV_EXPECT
- (ut_dulint_cmp(index_id, btr_page_get_index_id(block->frame)), 0)
- || !btr_search_check_guess(cursor,
- has_search_latch,
- tuple, mode, mtr)) {
- if (UNIV_LIKELY(!has_search_latch)) {
- btr_leaf_page_release(block, latch_mode, mtr);
- }
-
- goto failure;
- }
-
- if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
-
- info->n_hash_potential++;
- }
-
-#ifdef notdefined
- /* These lines of code can be used in a debug version to check
- the correctness of the searched cursor position: */
-
- info->last_hash_succ = FALSE;
-
- /* Currently, does not work if the following fails: */
- ut_ad(!has_search_latch);
-
- btr_leaf_page_release(block, latch_mode, mtr);
-
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- &cursor2, 0, mtr);
- if (mode == PAGE_CUR_GE
- && page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
-
- /* If mode is PAGE_CUR_GE, then the binary search
- in the index tree may actually take us to the supremum
- of the previous page */
-
- info->last_hash_succ = FALSE;
-
- btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
- &pcur, mtr);
- ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
- } else {
- ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
- }
-
- /* NOTE that it is theoretically possible that the above assertions
- fail if the page of the cursor gets removed from the buffer pool
- meanwhile! Thus it might not be a bug. */
-#endif
- info->last_hash_succ = TRUE;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- btr_search_n_succ++;
-#endif
- if (UNIV_LIKELY(!has_search_latch)
- && buf_page_peek_if_too_old(&block->page)) {
-
- buf_page_make_young(&block->page);
- }
-
- /* Increment the page get statistics though we did not really
- fix the page: for user info only */
-
- buf_pool->stat.n_page_gets++;
-
- return(TRUE);
-
- /*-------------------------------------------*/
-failure_unlock:
- if (UNIV_LIKELY(!has_search_latch)) {
- rw_lock_s_unlock(&btr_search_latch);
- }
-failure:
- cursor->flag = BTR_CUR_HASH_FAIL;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_fail++;
-
- if (info->n_hash_succ > 0) {
- info->n_hash_succ--;
- }
-#endif
- info->last_hash_succ = FALSE;
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_index(
-/*============================*/
- buf_block_t* block) /*!< in: block containing index page,
- s- or x-latched, or an index page
- for which we know that
- block->buf_fix_count == 0 */
-{
- hash_table_t* table;
- ulint n_fields;
- ulint n_bytes;
- const page_t* page;
- const rec_t* rec;
- ulint fold;
- ulint prev_fold;
- dulint index_id;
- ulint n_cached;
- ulint n_recs;
- ulint* folds;
- ulint i;
- mem_heap_t* heap;
- const dict_index_t* index;
- ulint* offsets;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-retry:
- rw_lock_s_lock(&btr_search_latch);
- page = block->frame;
-
- if (UNIV_LIKELY(!block->is_hashed)) {
-
- rw_lock_s_unlock(&btr_search_latch);
-
- return;
- }
-
- table = btr_search_sys->hash_index;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX)
- || (block->page.buf_fix_count == 0));
-#endif /* UNIV_SYNC_DEBUG */
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- index = block->index;
- ut_a(!dict_index_is_ibuf(index));
-
- /* NOTE: The fields of block must not be accessed after
- releasing btr_search_latch, as the index page might only
- be s-latched! */
-
- rw_lock_s_unlock(&btr_search_latch);
-
- ut_a(n_fields + n_bytes > 0);
-
- n_recs = page_get_n_recs(page);
-
- /* Calculate and cache fold values into an array for fast deletion
- from the hash index */
-
- folds = mem_alloc(n_recs * sizeof(ulint));
-
- n_cached = 0;
-
- rec = page_get_infimum_rec(page);
- rec = page_rec_get_next_low(rec, page_is_comp(page));
-
- index_id = btr_page_get_index_id(page);
-
- ut_a(0 == ut_dulint_cmp(index_id, index->id));
-
- prev_fold = 0;
-
- heap = NULL;
- offsets = NULL;
-
- while (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
-
- if (fold == prev_fold && prev_fold != 0) {
-
- goto next_rec;
- }
-
- /* Remove all hash nodes pointing to this page from the
- hash chain */
-
- folds[n_cached] = fold;
- n_cached++;
-next_rec:
- rec = page_rec_get_next_low(rec, page_rec_is_comp(rec));
- prev_fold = fold;
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- rw_lock_x_lock(&btr_search_latch);
-
- if (UNIV_UNLIKELY(!block->is_hashed)) {
- /* Someone else has meanwhile dropped the hash index */
-
- goto cleanup;
- }
-
- ut_a(block->index == index);
-
- if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
- || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
-
- /* Someone else has meanwhile built a new hash index on the
- page, with different parameters */
-
- rw_lock_x_unlock(&btr_search_latch);
-
- mem_free(folds);
- goto retry;
- }
-
- for (i = 0; i < n_cached; i++) {
-
- ha_remove_all_nodes_to_page(table, folds[i], page);
- }
-
- ut_a(index->search_info->ref_count > 0);
- index->search_info->ref_count--;
-
- block->is_hashed = FALSE;
- block->index = NULL;
-
-cleanup:
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (UNIV_UNLIKELY(block->n_pointers)) {
- /* Corruption */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Corruption of adaptive hash index."
- " After dropping\n"
- "InnoDB: the hash index to a page of %s,"
- " still %lu hash nodes remain.\n",
- index->name, (ulong) block->n_pointers);
- rw_lock_x_unlock(&btr_search_latch);
-
- btr_search_validate();
- } else {
- rw_lock_x_unlock(&btr_search_latch);
- }
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- rw_lock_x_unlock(&btr_search_latch);
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
- mem_free(folds);
-}
-
-/********************************************************************//**
-Drops a page hash index when a page is freed from a fseg to the file system.
-Drops possible hash index if the page happens to be in the buffer pool. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_when_freed(
-/*=================================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no) /*!< in: page number */
-{
- buf_block_t* block;
- mtr_t mtr;
-
- if (!buf_page_peek_if_search_hashed(space, page_no)) {
-
- return;
- }
-
- mtr_start(&mtr);
-
- /* We assume that if the caller has a latch on the page, then the
- caller has already dropped the hash index for the page, and we never
- get here. Therefore we can acquire the s-latch to the page without
- having to fear a deadlock. */
-
- block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
- BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
- &mtr);
- /* Because the buffer pool mutex was released by
- buf_page_peek_if_search_hashed(), it is possible that the
- block was removed from the buffer pool by another thread
- before buf_page_get_gen() got a chance to acquire the buffer
- pool mutex again. Thus, we must check for a NULL return. */
-
- if (UNIV_LIKELY(block != NULL)) {
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
-
- btr_search_drop_page_hash_index(block);
- }
-
- mtr_commit(&mtr);
-}
-
-/********************************************************************//**
-Builds a hash index on a page with the given parameters. If the page already
-has a hash index with different parameters, the old hash index is removed.
-If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
-static
-void
-btr_search_build_page_hash_index(
-/*=============================*/
- dict_index_t* index, /*!< in: index for which to build */
- buf_block_t* block, /*!< in: index page, s- or x-latched */
- ulint n_fields,/*!< in: hash this many full fields */
- ulint n_bytes,/*!< in: hash this many bytes from the next
- field */
- ibool left_side)/*!< in: hash for searches from left side? */
-{
- hash_table_t* table;
- page_t* page;
- rec_t* rec;
- rec_t* next_rec;
- ulint fold;
- ulint next_fold;
- dulint index_id;
- ulint n_cached;
- ulint n_recs;
- ulint* folds;
- rec_t** recs;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(index);
- ut_a(!dict_index_is_ibuf(index));
-
- table = btr_search_sys->hash_index;
- page = buf_block_get_frame(block);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(&btr_search_latch);
-
- if (block->is_hashed && ((block->curr_n_fields != n_fields)
- || (block->curr_n_bytes != n_bytes)
- || (block->curr_left_side != left_side))) {
-
- rw_lock_s_unlock(&btr_search_latch);
-
- btr_search_drop_page_hash_index(block);
- } else {
- rw_lock_s_unlock(&btr_search_latch);
- }
-
- n_recs = page_get_n_recs(page);
-
- if (n_recs == 0) {
-
- return;
- }
-
- /* Check that the values for hash index build are sensible */
-
- if (n_fields + n_bytes == 0) {
-
- return;
- }
-
- if (dict_index_get_n_unique_in_tree(index) < n_fields
- || (dict_index_get_n_unique_in_tree(index) == n_fields
- && n_bytes > 0)) {
- return;
- }
-
- /* Calculate and cache fold values and corresponding records into
- an array for fast insertion to the hash index */
-
- folds = mem_alloc(n_recs * sizeof(ulint));
- recs = mem_alloc(n_recs * sizeof(rec_t*));
-
- n_cached = 0;
-
- index_id = btr_page_get_index_id(page);
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
-
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
-
- if (!page_rec_is_supremum(rec)) {
- ut_a(n_fields <= rec_offs_n_fields(offsets));
-
- if (n_bytes > 0) {
- ut_a(n_fields < rec_offs_n_fields(offsets));
- }
- }
-
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
-
- if (left_side) {
-
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
-
- for (;;) {
- next_rec = page_rec_get_next(rec);
-
- if (page_rec_is_supremum(next_rec)) {
-
- if (!left_side) {
-
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
-
- break;
- }
-
- offsets = rec_get_offsets(next_rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- next_fold = rec_fold(next_rec, offsets, n_fields,
- n_bytes, index_id);
-
- if (fold != next_fold) {
- /* Insert an entry into the hash index */
-
- if (left_side) {
-
- folds[n_cached] = next_fold;
- recs[n_cached] = next_rec;
- n_cached++;
- } else {
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
- }
-
- rec = next_rec;
- fold = next_fold;
- }
-
- btr_search_check_free_space_in_heap();
-
- rw_lock_x_lock(&btr_search_latch);
-
- if (UNIV_UNLIKELY(!btr_search_enabled)) {
- goto exit_func;
- }
-
- if (block->is_hashed && ((block->curr_n_fields != n_fields)
- || (block->curr_n_bytes != n_bytes)
- || (block->curr_left_side != left_side))) {
- goto exit_func;
- }
-
- /* This counter is decremented every time we drop page
- hash index entries and is incremented here. Since we can
- rebuild hash index for a page that is already hashed, we
- have to take care not to increment the counter in that
- case. */
- if (!block->is_hashed) {
- index->search_info->ref_count++;
- }
-
- block->is_hashed = TRUE;
- block->n_hash_helps = 0;
-
- block->curr_n_fields = n_fields;
- block->curr_n_bytes = n_bytes;
- block->curr_left_side = left_side;
- block->index = index;
-
- for (i = 0; i < n_cached; i++) {
-
- ha_insert_for_fold(table, folds[i], block, recs[i]);
- }
-
-exit_func:
- rw_lock_x_unlock(&btr_search_latch);
-
- mem_free(folds);
- mem_free(recs);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
-void
-btr_search_move_or_delete_hash_entries(
-/*===================================*/
- buf_block_t* new_block, /*!< in: records are copied
- to this page */
- buf_block_t* block, /*!< in: index page from which
- records were copied, and the
- copied records will be deleted
- from this page */
- dict_index_t* index) /*!< in: record descriptor */
-{
- ulint n_fields;
- ulint n_bytes;
- ibool left_side;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
- ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_a(!new_block->is_hashed || new_block->index == index);
- ut_a(!block->is_hashed || block->index == index);
- ut_a(!(new_block->is_hashed || block->is_hashed)
- || !dict_index_is_ibuf(index));
-
- rw_lock_s_lock(&btr_search_latch);
-
- if (new_block->is_hashed) {
-
- rw_lock_s_unlock(&btr_search_latch);
-
- btr_search_drop_page_hash_index(block);
-
- return;
- }
-
- if (block->is_hashed) {
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- left_side = block->curr_left_side;
-
- new_block->n_fields = block->curr_n_fields;
- new_block->n_bytes = block->curr_n_bytes;
- new_block->left_side = left_side;
-
- rw_lock_s_unlock(&btr_search_latch);
-
- ut_a(n_fields + n_bytes > 0);
-
- btr_search_build_page_hash_index(index, new_block, n_fields,
- n_bytes, left_side);
- ut_ad(n_fields == block->curr_n_fields);
- ut_ad(n_bytes == block->curr_n_bytes);
- ut_ad(left_side == block->curr_left_side);
- return;
- }
-
- rw_lock_s_unlock(&btr_search_latch);
-}
-
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_delete(
-/*=============================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned on the
- record to delete using btr_cur_search_...,
- the record is not yet deleted */
-{
- hash_table_t* table;
- buf_block_t* block;
- rec_t* rec;
- ulint fold;
- dulint index_id;
- ibool found;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- mem_heap_t* heap = NULL;
- rec_offs_init(offsets_);
-
- rec = btr_cur_get_rec(cursor);
-
- block = btr_cur_get_block(cursor);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!block->is_hashed) {
-
- return;
- }
-
- ut_a(block->index == cursor->index);
- ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
- ut_a(!dict_index_is_ibuf(cursor->index));
-
- table = btr_search_sys->hash_index;
-
- index_id = cursor->index->id;
- fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
- ULINT_UNDEFINED, &heap),
- block->curr_n_fields, block->curr_n_bytes, index_id);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- rw_lock_x_lock(&btr_search_latch);
-
- found = ha_search_and_delete_if_found(table, fold, rec);
-
- rw_lock_x_unlock(&btr_search_latch);
-}
-
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-{
- hash_table_t* table;
- buf_block_t* block;
- rec_t* rec;
-
- rec = btr_cur_get_rec(cursor);
-
- block = btr_cur_get_block(cursor);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!block->is_hashed) {
-
- return;
- }
-
- ut_a(block->index == cursor->index);
- ut_a(!dict_index_is_ibuf(cursor->index));
-
- rw_lock_x_lock(&btr_search_latch);
-
- if ((cursor->flag == BTR_CUR_HASH)
- && (cursor->n_fields == block->curr_n_fields)
- && (cursor->n_bytes == block->curr_n_bytes)
- && !block->curr_left_side) {
-
- table = btr_search_sys->hash_index;
-
- ha_search_and_update_if_found(table, cursor->fold, rec,
- block, page_rec_get_next(rec));
-
- rw_lock_x_unlock(&btr_search_latch);
- } else {
- rw_lock_x_unlock(&btr_search_latch);
-
- btr_search_update_hash_on_insert(cursor);
- }
-}
-
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-{
- hash_table_t* table;
- buf_block_t* block;
- rec_t* rec;
- rec_t* ins_rec;
- rec_t* next_rec;
- dulint index_id;
- ulint fold;
- ulint ins_fold;
- ulint next_fold = 0; /* remove warning (??? bug ???) */
- ulint n_fields;
- ulint n_bytes;
- ibool left_side;
- ibool locked = FALSE;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- table = btr_search_sys->hash_index;
-
- btr_search_check_free_space_in_heap();
-
- rec = btr_cur_get_rec(cursor);
-
- block = btr_cur_get_block(cursor);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!block->is_hashed) {
-
- return;
- }
-
- ut_a(block->index == cursor->index);
- ut_a(!dict_index_is_ibuf(cursor->index));
-
- index_id = cursor->index->id;
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- left_side = block->curr_left_side;
-
- ins_rec = page_rec_get_next(rec);
- next_rec = page_rec_get_next(ins_rec);
-
- offsets = rec_get_offsets(ins_rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index_id);
-
- if (!page_rec_is_supremum(next_rec)) {
- offsets = rec_get_offsets(next_rec, cursor->index, offsets,
- n_fields + (n_bytes > 0), &heap);
- next_fold = rec_fold(next_rec, offsets, n_fields,
- n_bytes, index_id);
- }
-
- if (!page_rec_is_infimum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- n_fields + (n_bytes > 0), &heap);
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
- } else {
- if (left_side) {
-
- rw_lock_x_lock(&btr_search_latch);
-
- locked = TRUE;
-
- ha_insert_for_fold(table, ins_fold, block, ins_rec);
- }
-
- goto check_next_rec;
- }
-
- if (fold != ins_fold) {
-
- if (!locked) {
-
- rw_lock_x_lock(&btr_search_latch);
-
- locked = TRUE;
- }
-
- if (!left_side) {
- ha_insert_for_fold(table, fold, block, rec);
- } else {
- ha_insert_for_fold(table, ins_fold, block, ins_rec);
- }
- }
-
-check_next_rec:
- if (page_rec_is_supremum(next_rec)) {
-
- if (!left_side) {
-
- if (!locked) {
- rw_lock_x_lock(&btr_search_latch);
-
- locked = TRUE;
- }
-
- ha_insert_for_fold(table, ins_fold, block, ins_rec);
- }
-
- goto function_exit;
- }
-
- if (ins_fold != next_fold) {
-
- if (!locked) {
-
- rw_lock_x_lock(&btr_search_latch);
-
- locked = TRUE;
- }
-
- if (!left_side) {
-
- ha_insert_for_fold(table, ins_fold, block, ins_rec);
- /*
- fputs("Hash insert for ", stderr);
- dict_index_name_print(stderr, cursor->index);
- fprintf(stderr, " fold %lu\n", ins_fold);
- */
- } else {
- ha_insert_for_fold(table, next_fold, block, next_rec);
- }
- }
-
-function_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- if (locked) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-}
-
-/********************************************************************//**
-Validates the search system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_search_validate(void)
-/*=====================*/
-{
- ha_node_t* node;
- ulint n_page_dumps = 0;
- ibool ok = TRUE;
- ulint i;
- ulint cell_count;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- /* How many cells to check before temporarily releasing
- btr_search_latch. */
- ulint chunk_size = 10000;
-
- rec_offs_init(offsets_);
-
- rw_lock_x_lock(&btr_search_latch);
- buf_pool_mutex_enter();
-
- cell_count = hash_get_n_cells(btr_search_sys->hash_index);
-
- for (i = 0; i < cell_count; i++) {
- /* We release btr_search_latch every once in a while to
- give other queries a chance to run. */
- if ((i != 0) && ((i % chunk_size) == 0)) {
- buf_pool_mutex_exit();
- rw_lock_x_unlock(&btr_search_latch);
- os_thread_yield();
- rw_lock_x_lock(&btr_search_latch);
- buf_pool_mutex_enter();
- }
-
- node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
-
- for (; node != NULL; node = node->next) {
- const buf_block_t* block
- = buf_block_align(node->data);
- const buf_block_t* hash_block;
-
- if (UNIV_LIKELY(buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE)) {
-
- /* The space and offset are only valid
- for file blocks. It is possible that
- the block is being freed
- (BUF_BLOCK_REMOVE_HASH, see the
- assertion and the comment below) */
- hash_block = buf_block_hash_get(
- buf_block_get_space(block),
- buf_block_get_page_no(block));
- } else {
- hash_block = NULL;
- }
-
- if (hash_block) {
- ut_a(hash_block == block);
- } else {
- /* When a block is being freed,
- buf_LRU_search_and_free_block() first
- removes the block from
- buf_pool->page_hash by calling
- buf_LRU_block_remove_hashed_page().
- After that, it invokes
- btr_search_drop_page_hash_index() to
- remove the block from
- btr_search_sys->hash_index. */
-
- ut_a(buf_block_get_state(block)
- == BUF_BLOCK_REMOVE_HASH);
- }
-
- ut_a(!dict_index_is_ibuf(block->index));
-
- offsets = rec_get_offsets((const rec_t*) node->data,
- block->index, offsets,
- block->curr_n_fields
- + (block->curr_n_bytes > 0),
- &heap);
-
- if (!block->is_hashed || node->fold
- != rec_fold((rec_t*)(node->data),
- offsets,
- block->curr_n_fields,
- block->curr_n_bytes,
- btr_page_get_index_id(block->frame))) {
- const page_t* page = block->frame;
-
- ok = FALSE;
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error in an adaptive hash"
- " index pointer to page %lu\n"
- "InnoDB: ptr mem address %p"
- " index id %lu %lu,"
- " node fold %lu, rec fold %lu\n",
- (ulong) page_get_page_no(page),
- node->data,
- (ulong) ut_dulint_get_high(
- btr_page_get_index_id(page)),
- (ulong) ut_dulint_get_low(
- btr_page_get_index_id(page)),
- (ulong) node->fold,
- (ulong) rec_fold((rec_t*)(node->data),
- offsets,
- block->curr_n_fields,
- block->curr_n_bytes,
- btr_page_get_index_id(
- page)));
-
- fputs("InnoDB: Record ", stderr);
- rec_print_new(stderr, (rec_t*)node->data,
- offsets);
- fprintf(stderr, "\nInnoDB: on that page."
- " Page mem address %p, is hashed %lu,"
- " n fields %lu, n bytes %lu\n"
- "InnoDB: side %lu\n",
- (void*) page, (ulong) block->is_hashed,
- (ulong) block->curr_n_fields,
- (ulong) block->curr_n_bytes,
- (ulong) block->curr_left_side);
-
- if (n_page_dumps < 20) {
- buf_page_print(page, 0);
- n_page_dumps++;
- }
- }
- }
- }
-
- for (i = 0; i < cell_count; i += chunk_size) {
- ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
-
- /* We release btr_search_latch every once in a while to
- give other queries a chance to run. */
- if (i != 0) {
- buf_pool_mutex_exit();
- rw_lock_x_unlock(&btr_search_latch);
- os_thread_yield();
- rw_lock_x_lock(&btr_search_latch);
- buf_pool_mutex_enter();
- }
-
- if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
- ok = FALSE;
- }
- }
-
- buf_pool_mutex_exit();
- rw_lock_x_unlock(&btr_search_latch);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(ok);
-}
diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
deleted file mode 100644
index 111d396fbc5..00000000000
--- a/storage/innodb_plugin/buf/buf0buf.c
+++ /dev/null
@@ -1,4052 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0buf.c
-The database buffer buf_pool
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0buf.h"
-
-#ifdef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#include "mem0mem.h"
-#include "btr0btr.h"
-#include "fil0fil.h"
-#ifndef UNIV_HOTBACKUP
-#include "buf0buddy.h"
-#include "lock0lock.h"
-#include "btr0sea.h"
-#include "ibuf0ibuf.h"
-#include "trx0undo.h"
-#include "log0log.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "srv0srv.h"
-#include "dict0dict.h"
-#include "log0recv.h"
-#include "page0zip.h"
-
-/*
- IMPLEMENTATION OF THE BUFFER POOL
- =================================
-
-Performance improvement:
-------------------------
-Thread scheduling in NT may be so slow that the OS wait mechanism should
-not be used even in waiting for disk reads to complete.
-Rather, we should put waiting query threads to the queue of
-waiting jobs, and let the OS thread do something useful while the i/o
-is processed. In this way we could remove most OS thread switches in
-an i/o-intensive benchmark like TPC-C.
-
-A possibility is to put a user space thread library between the database
-and NT. User space thread libraries might be very fast.
-
-SQL Server 7.0 can be configured to use 'fibers' which are lightweight
-threads in NT. These should be studied.
-
- Buffer frames and blocks
- ------------------------
-Following the terminology of Gray and Reuter, we call the memory
-blocks where file pages are loaded buffer frames. For each buffer
-frame there is a control block, or shortly, a block, in the buffer
-control array. The control info which does not need to be stored
-in the file along with the file page, resides in the control block.
-
- Buffer pool struct
- ------------------
-The buffer buf_pool contains a single mutex which protects all the
-control data structures of the buf_pool. The content of a buffer frame is
-protected by a separate read-write lock in its control block, though.
-These locks can be locked and unlocked without owning the buf_pool mutex.
-The OS events in the buf_pool struct can be waited for without owning the
-buf_pool mutex.
-
-The buf_pool mutex is a hot-spot in main memory, causing a lot of
-memory bus traffic on multiprocessor systems when processors
-alternately access the mutex. On our Pentium, the mutex is accessed
-maybe every 10 microseconds. We gave up the solution to have mutexes
-for each control block, for instance, because it seemed to be
-complicated.
-
-A solution to reduce mutex contention of the buf_pool mutex is to
-create a separate mutex for the page hash table. On Pentium,
-accessing the hash table takes 2 microseconds, about half
-of the total buf_pool mutex hold time.
-
- Control blocks
- --------------
-
-The control block contains, for instance, the bufferfix count
-which is incremented when a thread wants a file page to be fixed
-in a buffer frame. The bufferfix operation does not lock the
-contents of the frame, however. For this purpose, the control
-block contains a read-write lock.
-
-The buffer frames have to be aligned so that the start memory
-address of a frame is divisible by the universal page size, which
-is a power of two.
-
-We intend to make the buffer buf_pool size on-line reconfigurable,
-that is, the buf_pool size can be changed without closing the database.
-Then the database administarator may adjust it to be bigger
-at night, for example. The control block array must
-contain enough control blocks for the maximum buffer buf_pool size
-which is used in the particular database.
-If the buf_pool size is cut, we exploit the virtual memory mechanism of
-the OS, and just refrain from using frames at high addresses. Then the OS
-can swap them to disk.
-
-The control blocks containing file pages are put to a hash table
-according to the file address of the page.
-We could speed up the access to an individual page by using
-"pointer swizzling": we could replace the page references on
-non-leaf index pages by direct pointers to the page, if it exists
-in the buf_pool. We could make a separate hash table where we could
-chain all the page references in non-leaf pages residing in the buf_pool,
-using the page reference as the hash key,
-and at the time of reading of a page update the pointers accordingly.
-Drawbacks of this solution are added complexity and,
-possibly, extra space required on non-leaf pages for memory pointers.
-A simpler solution is just to speed up the hash table mechanism
-in the database, using tables whose size is a power of 2.
-
- Lists of blocks
- ---------------
-
-There are several lists of control blocks.
-
-The free list (buf_pool->free) contains blocks which are currently not
-used.
-
-The common LRU list contains all the blocks holding a file page
-except those for which the bufferfix count is non-zero.
-The pages are in the LRU list roughly in the order of the last
-access to the page, so that the oldest pages are at the end of the
-list. We also keep a pointer to near the end of the LRU list,
-which we can use when we want to artificially age a page in the
-buf_pool. This is used if we know that some page is not needed
-again for some time: we insert the block right after the pointer,
-causing it to be replaced sooner than would noramlly be the case.
-Currently this aging mechanism is used for read-ahead mechanism
-of pages, and it can also be used when there is a scan of a full
-table which cannot fit in the memory. Putting the pages near the
-of the LRU list, we make sure that most of the buf_pool stays in the
-main memory, undisturbed.
-
-The unzip_LRU list contains a subset of the common LRU list. The
-blocks on the unzip_LRU list hold a compressed file page and the
-corresponding uncompressed page frame. A block is in unzip_LRU if and
-only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
-holds. The blocks in unzip_LRU will be in same order as they are in
-the common LRU list. That is, each manipulation of the common LRU
-list will result in the same manipulation of the unzip_LRU list.
-
-The chain of modified blocks (buf_pool->flush_list) contains the blocks
-holding file pages that have been modified in the memory
-but not written to disk yet. The block with the oldest modification
-which has not yet been written to disk is at the end of the chain.
-
-The chain of unmodified compressed blocks (buf_pool->zip_clean)
-contains the control blocks (buf_page_t) of those compressed pages
-that are not in buf_pool->flush_list and for which no uncompressed
-page has been allocated in the buffer pool. The control blocks for
-uncompressed pages are accessible via buf_block_t objects that are
-reachable via buf_pool->chunks[].
-
-The chains of free memory blocks (buf_pool->zip_free[]) are used by
-the buddy allocator (buf0buddy.c) to keep track of currently unused
-memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
-blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
-BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
-pool. The buddy allocator is solely used for allocating control
-blocks for compressed pages (buf_page_t) and compressed page frames.
-
- Loading a file page
- -------------------
-
-First, a victim block for replacement has to be found in the
-buf_pool. It is taken from the free list or searched for from the
-end of the LRU-list. An exclusive lock is reserved for the frame,
-the io_fix field is set in the block fixing the block in buf_pool,
-and the io-operation for loading the page is queued. The io-handler thread
-releases the X-lock on the frame and resets the io_fix field
-when the io operation completes.
-
-A thread may request the above operation using the function
-buf_page_get(). It may then continue to request a lock on the frame.
-The lock is granted when the io-handler releases the x-lock.
-
- Read-ahead
- ----------
-
-The read-ahead mechanism is intended to be intelligent and
-isolated from the semantically higher levels of the database
-index management. From the higher level we only need the
-information if a file page has a natural successor or
-predecessor page. On the leaf level of a B-tree index,
-these are the next and previous pages in the natural
-order of the pages.
-
-Let us first explain the read-ahead mechanism when the leafs
-of a B-tree are scanned in an ascending or descending order.
-When a read page is the first time referenced in the buf_pool,
-the buffer manager checks if it is at the border of a so-called
-linear read-ahead area. The tablespace is divided into these
-areas of size 64 blocks, for example. So if the page is at the
-border of such an area, the read-ahead mechanism checks if
-all the other blocks in the area have been accessed in an
-ascending or descending order. If this is the case, the system
-looks at the natural successor or predecessor of the page,
-checks if that is at the border of another area, and in this case
-issues read-requests for all the pages in that area. Maybe
-we could relax the condition that all the pages in the area
-have to be accessed: if data is deleted from a table, there may
-appear holes of unused pages in the area.
-
-A different read-ahead mechanism is used when there appears
-to be a random access pattern to a file.
-If a new page is referenced in the buf_pool, and several pages
-of its random access area (for instance, 32 consecutive pages
-in a tablespace) have recently been referenced, we may predict
-that the whole area may be needed in the near future, and issue
-the read requests for the whole area.
-*/
-
-#ifndef UNIV_HOTBACKUP
-/** Value in microseconds */
-static const int WAIT_FOR_READ = 5000;
-
-/** The buffer buf_pool of the database */
-UNIV_INTERN buf_pool_t* buf_pool = NULL;
-
-/** mutex protecting the buffer pool struct and control blocks, except the
-read-write lock in them */
-UNIV_INTERN mutex_t buf_pool_mutex;
-/** mutex protecting the control blocks of compressed-only pages
-(of type buf_page_t, not buf_block_t) */
-UNIV_INTERN mutex_t buf_pool_zip_mutex;
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
- operations in excution in the
- debug version */
-/** Flag to forbid the release of the buffer pool mutex.
-Protected by buf_pool_mutex. */
-UNIV_INTERN ulint buf_pool_mutex_exit_forbidden = 0;
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_DEBUG
-/** If this is set TRUE, the program prints info whenever
-read-ahead or flush occurs */
-UNIV_INTERN ibool buf_debug_prints = FALSE;
-#endif /* UNIV_DEBUG */
-
-/** A chunk of buffers. The buffer pool is allocated in chunks. */
-struct buf_chunk_struct{
- ulint mem_size; /*!< allocated size of the chunk */
- ulint size; /*!< size of frames[] and blocks[] */
- void* mem; /*!< pointer to the memory area which
- was allocated for the frames */
- buf_block_t* blocks; /*!< array of buffer control blocks */
-};
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- const byte* page) /*!< in: buffer page */
-{
- ulint checksum;
-
- /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
- ..._ARCH_LOG_NO, are written outside the buffer pool to the first
- pages of data files, we have to skip them in the page checksum
- calculation.
- We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
- checksum is stored, and also the last 8 bytes of page because
- there we store the old formula checksum. */
-
- checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
- FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
- + ut_fold_binary(page + FIL_PAGE_DATA,
- UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - FIL_PAGE_END_LSN_OLD_CHKSUM);
- checksum = checksum & 0xFFFFFFFFUL;
-
- return(checksum);
-}
-
-/********************************************************************//**
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input!
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- const byte* page) /*!< in: buffer page */
-{
- ulint checksum;
-
- checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
-
- checksum = checksum & 0xFFFFFFFFUL;
-
- return(checksum);
-}
-
-/********************************************************************//**
-Checks if a page is corrupt.
-@return TRUE if corrupted */
-UNIV_INTERN
-ibool
-buf_page_is_corrupted(
-/*==================*/
- const byte* read_buf, /*!< in: a database page */
- ulint zip_size) /*!< in: size of compressed page;
- 0 for uncompressed pages */
-{
- ulint checksum_field;
- ulint old_checksum_field;
-
- if (UNIV_LIKELY(!zip_size)
- && memcmp(read_buf + FIL_PAGE_LSN + 4,
- read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
-
- /* Stored log sequence numbers at the start and the end
- of page do not match */
-
- return(TRUE);
- }
-
-#ifndef UNIV_HOTBACKUP
- if (recv_lsn_checks_on) {
- ib_uint64_t current_lsn;
-
- if (log_peek_lsn(&current_lsn)
- && current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: page %lu log sequence number"
- " %llu\n"
- "InnoDB: is in the future! Current system "
- "log sequence number %llu.\n"
- "InnoDB: Your database may be corrupt or "
- "you may have copied the InnoDB\n"
- "InnoDB: tablespace but not the InnoDB "
- "log files. See\n"
- "InnoDB: " REFMAN "forcing-recovery.html\n"
- "InnoDB: for more information.\n",
- (ulong) mach_read_from_4(read_buf
- + FIL_PAGE_OFFSET),
- mach_read_ull(read_buf + FIL_PAGE_LSN),
- current_lsn);
- }
- }
-#endif
-
- /* If we use checksums validation, make additional check before
- returning TRUE to ensure that the checksum is not equal to
- BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
- disabled. Otherwise, skip checksum calculation and return FALSE */
-
- if (UNIV_LIKELY(srv_use_checksums)) {
- checksum_field = mach_read_from_4(read_buf
- + FIL_PAGE_SPACE_OR_CHKSUM);
-
- if (UNIV_UNLIKELY(zip_size)) {
- return(checksum_field != BUF_NO_CHECKSUM_MAGIC
- && checksum_field
- != page_zip_calc_checksum(read_buf, zip_size));
- }
-
- old_checksum_field = mach_read_from_4(
- read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM);
-
- /* There are 2 valid formulas for old_checksum_field:
-
- 1. Very old versions of InnoDB only stored 8 byte lsn to the
- start and the end of the page.
-
- 2. Newer InnoDB versions store the old formula checksum
- there. */
-
- if (old_checksum_field != mach_read_from_4(read_buf
- + FIL_PAGE_LSN)
- && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
- && old_checksum_field
- != buf_calc_page_old_checksum(read_buf)) {
-
- return(TRUE);
- }
-
- /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
- (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
-
- if (checksum_field != 0
- && checksum_field != BUF_NO_CHECKSUM_MAGIC
- && checksum_field
- != buf_calc_page_new_checksum(read_buf)) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Prints a page to stderr. */
-UNIV_INTERN
-void
-buf_page_print(
-/*===========*/
- const byte* read_buf, /*!< in: a database page */
- ulint zip_size) /*!< in: compressed page size, or
- 0 for uncompressed pages */
-{
-#ifndef UNIV_HOTBACKUP
- dict_index_t* index;
-#endif /* !UNIV_HOTBACKUP */
- ulint checksum;
- ulint old_checksum;
- ulint size = zip_size;
-
- if (!size) {
- size = UNIV_PAGE_SIZE;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
- (ulong) size);
- ut_print_buf(stderr, read_buf, size);
- fputs("\nInnoDB: End of page dump\n", stderr);
-
- if (zip_size) {
- /* Print compressed page. */
-
- switch (fil_page_get_type(read_buf)) {
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- checksum = srv_use_checksums
- ? page_zip_calc_checksum(read_buf, zip_size)
- : BUF_NO_CHECKSUM_MAGIC;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Compressed BLOB page"
- " checksum %lu, stored %lu\n"
- "InnoDB: Page lsn %lu %lu\n"
- "InnoDB: Page number (if stored"
- " to page already) %lu,\n"
- "InnoDB: space id (if stored"
- " to page already) %lu\n",
- (ulong) checksum,
- (ulong) mach_read_from_4(
- read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
- (ulong) mach_read_from_4(
- read_buf + FIL_PAGE_LSN),
- (ulong) mach_read_from_4(
- read_buf + (FIL_PAGE_LSN + 4)),
- (ulong) mach_read_from_4(
- read_buf + FIL_PAGE_OFFSET),
- (ulong) mach_read_from_4(
- read_buf
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
- return;
- default:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: unknown page type %lu,"
- " assuming FIL_PAGE_INDEX\n",
- fil_page_get_type(read_buf));
- /* fall through */
- case FIL_PAGE_INDEX:
- checksum = srv_use_checksums
- ? page_zip_calc_checksum(read_buf, zip_size)
- : BUF_NO_CHECKSUM_MAGIC;
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Compressed page checksum %lu,"
- " stored %lu\n"
- "InnoDB: Page lsn %lu %lu\n"
- "InnoDB: Page number (if stored"
- " to page already) %lu,\n"
- "InnoDB: space id (if stored"
- " to page already) %lu\n",
- (ulong) checksum,
- (ulong) mach_read_from_4(
- read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
- (ulong) mach_read_from_4(
- read_buf + FIL_PAGE_LSN),
- (ulong) mach_read_from_4(
- read_buf + (FIL_PAGE_LSN + 4)),
- (ulong) mach_read_from_4(
- read_buf + FIL_PAGE_OFFSET),
- (ulong) mach_read_from_4(
- read_buf
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
- return;
- case FIL_PAGE_TYPE_XDES:
- /* This is an uncompressed page. */
- break;
- }
- }
-
- checksum = srv_use_checksums
- ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
- old_checksum = srv_use_checksums
- ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Page checksum %lu, prior-to-4.0.14-form"
- " checksum %lu\n"
- "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
- " stored checksum %lu\n"
- "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
- " at page end %lu\n"
- "InnoDB: Page number (if stored to page already) %lu,\n"
- "InnoDB: space id (if created with >= MySQL-4.1.1"
- " and stored already) %lu\n",
- (ulong) checksum, (ulong) old_checksum,
- (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
- (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM),
- (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
- (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
- (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
- (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
- (ulong) mach_read_from_4(read_buf
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
-
-#ifndef UNIV_HOTBACKUP
- if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_INSERT) {
- fprintf(stderr,
- "InnoDB: Page may be an insert undo log page\n");
- } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_UPDATE) {
- fprintf(stderr,
- "InnoDB: Page may be an update undo log page\n");
- }
-#endif /* !UNIV_HOTBACKUP */
-
- switch (fil_page_get_type(read_buf)) {
- case FIL_PAGE_INDEX:
- fprintf(stderr,
- "InnoDB: Page may be an index page where"
- " index id is %lu %lu\n",
- (ulong) ut_dulint_get_high(
- btr_page_get_index_id(read_buf)),
- (ulong) ut_dulint_get_low(
- btr_page_get_index_id(read_buf)));
-#ifndef UNIV_HOTBACKUP
- index = dict_index_find_on_id_low(
- btr_page_get_index_id(read_buf));
- if (index) {
- fputs("InnoDB: (", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs(")\n", stderr);
- }
-#endif /* !UNIV_HOTBACKUP */
- break;
- case FIL_PAGE_INODE:
- fputs("InnoDB: Page may be an 'inode' page\n", stderr);
- break;
- case FIL_PAGE_IBUF_FREE_LIST:
- fputs("InnoDB: Page may be an insert buffer free list page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_ALLOCATED:
- fputs("InnoDB: Page may be a freshly allocated page\n",
- stderr);
- break;
- case FIL_PAGE_IBUF_BITMAP:
- fputs("InnoDB: Page may be an insert buffer bitmap page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_SYS:
- fputs("InnoDB: Page may be a system page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_TRX_SYS:
- fputs("InnoDB: Page may be a transaction system page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_FSP_HDR:
- fputs("InnoDB: Page may be a file space header page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_XDES:
- fputs("InnoDB: Page may be an extent descriptor page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_BLOB:
- fputs("InnoDB: Page may be a BLOB page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- fputs("InnoDB: Page may be a compressed BLOB page\n",
- stderr);
- break;
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-buf_block_init(
-/*===========*/
- buf_block_t* block, /*!< in: pointer to control block */
- byte* frame) /*!< in: pointer to buffer frame */
-{
- UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
-
- block->frame = frame;
-
- block->page.state = BUF_BLOCK_NOT_USED;
- block->page.buf_fix_count = 0;
- block->page.io_fix = BUF_IO_NONE;
-
- block->modify_clock = 0;
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES */
-
- block->check_index_page_at_flush = FALSE;
- block->index = NULL;
-
-#ifdef UNIV_DEBUG
- block->page.in_page_hash = FALSE;
- block->page.in_zip_hash = FALSE;
- block->page.in_flush_list = FALSE;
- block->page.in_free_list = FALSE;
- block->page.in_LRU_list = FALSE;
- block->in_unzip_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- block->n_pointers = 0;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- page_zip_des_init(&block->page.zip);
-
- mutex_create(&block->mutex, SYNC_BUF_BLOCK);
-
- rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
- ut_ad(rw_lock_validate(&(block->lock)));
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-/********************************************************************//**
-Allocates a chunk of buffer frames.
-@return chunk, or NULL on failure */
-static
-buf_chunk_t*
-buf_chunk_init(
-/*===========*/
- buf_chunk_t* chunk, /*!< out: chunk of buffers */
- ulint mem_size) /*!< in: requested size in bytes */
-{
- buf_block_t* block;
- byte* frame;
- ulint i;
-
- /* Round down to a multiple of page size,
- although it already should be. */
- mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
- /* Reserve space for the block descriptors. */
- mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
- + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
-
- chunk->mem_size = mem_size;
- chunk->mem = os_mem_alloc_large(&chunk->mem_size);
-
- if (UNIV_UNLIKELY(chunk->mem == NULL)) {
-
- return(NULL);
- }
-
- /* Allocate the block descriptors from
- the start of the memory block. */
- chunk->blocks = chunk->mem;
-
- /* Align a pointer to the first frame. Note that when
- os_large_page_size is smaller than UNIV_PAGE_SIZE,
- we may allocate one fewer block than requested. When
- it is bigger, we may allocate more blocks than requested. */
-
- frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
- chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
- - (frame != chunk->mem);
-
- /* Subtract the space needed for block descriptors. */
- {
- ulint size = chunk->size;
-
- while (frame < (byte*) (chunk->blocks + size)) {
- frame += UNIV_PAGE_SIZE;
- size--;
- }
-
- chunk->size = size;
- }
-
- /* Init block structs and assign frames for them. Then we
- assign the frames to the first blocks (we already mapped the
- memory above). */
-
- block = chunk->blocks;
-
- for (i = chunk->size; i--; ) {
-
- buf_block_init(block, frame);
-
-#ifdef HAVE_purify
- /* Wipe contents of frame to eliminate a Purify warning */
- memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#endif
- /* Add the block to the free list */
- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
- ut_d(block->page.in_free_list = TRUE);
-
- block++;
- frame += UNIV_PAGE_SIZE;
- }
-
- return(chunk);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Finds a block in the given buffer chunk that points to a
-given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
-static
-buf_block_t*
-buf_chunk_contains_zip(
-/*===================*/
- buf_chunk_t* chunk, /*!< in: chunk being checked */
- const void* data) /*!< in: pointer to compressed page */
-{
- buf_block_t* block;
- ulint i;
-
- ut_ad(buf_pool);
- ut_ad(buf_pool_mutex_own());
-
- block = chunk->blocks;
-
- for (i = chunk->size; i--; block++) {
- if (block->page.zip.data == data) {
-
- return(block);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Finds a block in the buffer pool that points to a
-given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_pool_contains_zip(
-/*==================*/
- const void* data) /*!< in: pointer to compressed page */
-{
- ulint n;
- buf_chunk_t* chunk = buf_pool->chunks;
-
- for (n = buf_pool->n_chunks; n--; chunk++) {
- buf_block_t* block = buf_chunk_contains_zip(chunk, data);
-
- if (block) {
- return(block);
- }
- }
-
- return(NULL);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Checks that all file pages in the buffer chunk are in a replaceable state.
-@return address of a non-free block, or NULL if all freed */
-static
-const buf_block_t*
-buf_chunk_not_freed(
-/*================*/
- buf_chunk_t* chunk) /*!< in: chunk being checked */
-{
- buf_block_t* block;
- ulint i;
-
- ut_ad(buf_pool);
- ut_ad(buf_pool_mutex_own());
-
- block = chunk->blocks;
-
- for (i = chunk->size; i--; block++) {
- ibool ready;
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- /* The uncompressed buffer pool should never
- contain compressed block descriptors. */
- ut_error;
- break;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- /* Skip blocks that are not being used for
- file pages. */
- break;
- case BUF_BLOCK_FILE_PAGE:
- mutex_enter(&block->mutex);
- ready = buf_flush_ready_for_replace(&block->page);
- mutex_exit(&block->mutex);
-
- if (!ready) {
-
- return(block);
- }
-
- break;
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
-@return TRUE if all freed */
-static
-ibool
-buf_chunk_all_free(
-/*===============*/
- const buf_chunk_t* chunk) /*!< in: chunk being checked */
-{
- const buf_block_t* block;
- ulint i;
-
- ut_ad(buf_pool);
- ut_ad(buf_pool_mutex_own());
-
- block = chunk->blocks;
-
- for (i = chunk->size; i--; block++) {
-
- if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
-
- return(FALSE);
- }
- }
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Frees a chunk of buffer frames. */
-static
-void
-buf_chunk_free(
-/*===========*/
- buf_chunk_t* chunk) /*!< out: chunk of buffers */
-{
- buf_block_t* block;
- const buf_block_t* block_end;
-
- ut_ad(buf_pool_mutex_own());
-
- block_end = chunk->blocks + chunk->size;
-
- for (block = chunk->blocks; block < block_end; block++) {
- ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
- ut_a(!block->page.zip.data);
-
- ut_ad(!block->page.in_LRU_list);
- ut_ad(!block->in_unzip_LRU_list);
- ut_ad(!block->page.in_flush_list);
- /* Remove the block from the free list. */
- ut_ad(block->page.in_free_list);
- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
-
- /* Free the latches. */
- mutex_free(&block->mutex);
- rw_lock_free(&block->lock);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_free(&block->debug_latch);
-#endif /* UNIV_SYNC_DEBUG */
- UNIV_MEM_UNDESC(block);
- }
-
- os_mem_free_large(chunk->mem, chunk->mem_size);
-}
-
-/********************************************************************//**
-Creates the buffer pool.
-@return own: buf_pool object, NULL if not enough memory or error */
-UNIV_INTERN
-buf_pool_t*
-buf_pool_init(void)
-/*===============*/
-{
- buf_chunk_t* chunk;
- ulint i;
-
- buf_pool = mem_zalloc(sizeof(buf_pool_t));
-
- /* 1. Initialize general fields
- ------------------------------- */
- mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
- mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
-
- buf_pool_mutex_enter();
-
- buf_pool->n_chunks = 1;
- buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
-
- UT_LIST_INIT(buf_pool->free);
-
- if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
- mem_free(chunk);
- mem_free(buf_pool);
- buf_pool = NULL;
- return(NULL);
- }
-
- srv_buf_pool_old_size = srv_buf_pool_size;
- buf_pool->curr_size = chunk->size;
- srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
-
- buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
- buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
-
- buf_pool->last_printout_time = time(NULL);
-
- /* 2. Initialize flushing fields
- -------------------------------- */
-
- for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
- buf_pool->no_flush[i] = os_event_create(NULL);
- }
-
- /* 3. Initialize LRU fields
- --------------------------- */
- /* All fields are initialized by mem_zalloc(). */
-
- buf_pool_mutex_exit();
-
- btr_search_sys_create(buf_pool->curr_size
- * UNIV_PAGE_SIZE / sizeof(void*) / 64);
-
- /* 4. Initialize the buddy allocator fields */
- /* All fields are initialized by mem_zalloc(). */
-
- return(buf_pool);
-}
-
-/********************************************************************//**
-Frees the buffer pool at shutdown. This must not be invoked before
-freeing all mutexes. */
-UNIV_INTERN
-void
-buf_pool_free(void)
-/*===============*/
-{
- buf_chunk_t* chunk;
- buf_chunk_t* chunks;
-
- chunks = buf_pool->chunks;
- chunk = chunks + buf_pool->n_chunks;
-
- while (--chunk >= chunks) {
- /* Bypass the checks of buf_chunk_free(), since they
- would fail at shutdown. */
- os_mem_free_large(chunk->mem, chunk->mem_size);
- }
-
- mem_free(buf_pool->chunks);
- hash_table_free(buf_pool->page_hash);
- hash_table_free(buf_pool->zip_hash);
- mem_free(buf_pool);
- buf_pool = NULL;
-}
-
-/********************************************************************//**
-Drops the adaptive hash index. To prevent a livelock, this function
-is only to be called while holding btr_search_latch and while
-btr_search_enabled == FALSE. */
-UNIV_INTERN
-void
-buf_pool_drop_hash_index(void)
-/*==========================*/
-{
- ibool released_search_latch;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!btr_search_enabled);
-
- do {
- buf_chunk_t* chunks = buf_pool->chunks;
- buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
-
- released_search_latch = FALSE;
-
- while (--chunk >= chunks) {
- buf_block_t* block = chunk->blocks;
- ulint i = chunk->size;
-
- for (; i--; block++) {
- /* block->is_hashed cannot be modified
- when we have an x-latch on btr_search_latch;
- see the comment in buf0buf.h */
-
- if (!block->is_hashed) {
- continue;
- }
-
- /* To follow the latching order, we
- have to release btr_search_latch
- before acquiring block->latch. */
- rw_lock_x_unlock(&btr_search_latch);
- /* When we release the search latch,
- we must rescan all blocks, because
- some may become hashed again. */
- released_search_latch = TRUE;
-
- rw_lock_x_lock(&block->lock);
-
- /* This should be guaranteed by the
- callers, which will be holding
- btr_search_enabled_mutex. */
- ut_ad(!btr_search_enabled);
-
- /* Because we did not buffer-fix the
- block by calling buf_block_get_gen(),
- it is possible that the block has been
- allocated for some other use after
- btr_search_latch was released above.
- We do not care which file page the
- block is mapped to. All we want to do
- is to drop any hash entries referring
- to the page. */
-
- /* It is possible that
- block->page.state != BUF_FILE_PAGE.
- Even that does not matter, because
- btr_search_drop_page_hash_index() will
- check block->is_hashed before doing
- anything. block->is_hashed can only
- be set on uncompressed file pages. */
-
- btr_search_drop_page_hash_index(block);
-
- rw_lock_x_unlock(&block->lock);
-
- rw_lock_x_lock(&btr_search_latch);
-
- ut_ad(!btr_search_enabled);
- }
- }
- } while (released_search_latch);
-}
-
-/********************************************************************//**
-Relocate a buffer control block. Relocates the block on the LRU list
-and in buf_pool->page_hash. Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
-UNIV_INTERN
-void
-buf_relocate(
-/*=========*/
- buf_page_t* bpage, /*!< in/out: control block being relocated;
- buf_page_get_state(bpage) must be
- BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
- buf_page_t* dpage) /*!< in/out: destination control block */
-{
- buf_page_t* b;
- ulint fold;
-
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- ut_a(bpage->buf_fix_count == 0);
- ut_ad(bpage->in_LRU_list);
- ut_ad(!bpage->in_zip_hash);
- ut_ad(bpage->in_page_hash);
- ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
-#ifdef UNIV_DEBUG
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_FILE_PAGE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_ZIP_PAGE:
- break;
- }
-#endif /* UNIV_DEBUG */
-
- memcpy(dpage, bpage, sizeof *dpage);
-
- ut_d(bpage->in_LRU_list = FALSE);
- ut_d(bpage->in_page_hash = FALSE);
-
- /* relocate buf_pool->LRU */
- b = UT_LIST_GET_PREV(LRU, bpage);
- UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
-
- if (b) {
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
- } else {
- UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
- }
-
- if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
- buf_pool->LRU_old = dpage;
-#ifdef UNIV_LRU_DEBUG
- /* buf_pool->LRU_old must be the first item in the LRU list
- whose "old" flag is set. */
- ut_a(buf_pool->LRU_old->old);
- ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
- || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
- ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
- || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
- } else {
- /* Check that the "old" flag is consistent in
- the block and its neighbours. */
- buf_page_set_old(dpage, buf_page_is_old(dpage));
-#endif /* UNIV_LRU_DEBUG */
- }
-
- ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
- ut_ad(ut_list_node_313->in_LRU_list)));
-
- /* relocate buf_pool->page_hash */
- fold = buf_page_address_fold(bpage->space, bpage->offset);
-
- HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
-
- UNIV_MEM_INVALID(bpage, sizeof *bpage);
-}
-
-/********************************************************************//**
-Shrinks the buffer pool. */
-static
-void
-buf_pool_shrink(
-/*============*/
- ulint chunk_size) /*!< in: number of pages to remove */
-{
- buf_chunk_t* chunks;
- buf_chunk_t* chunk;
- ulint max_size;
- ulint max_free_size;
- buf_chunk_t* max_chunk;
- buf_chunk_t* max_free_chunk;
-
- ut_ad(!buf_pool_mutex_own());
-
-try_again:
- btr_search_disable(); /* Empty the adaptive hash index again */
- buf_pool_mutex_enter();
-
-shrink_again:
- if (buf_pool->n_chunks <= 1) {
-
- /* Cannot shrink if there is only one chunk */
- goto func_done;
- }
-
- /* Search for the largest free chunk
- not larger than the size difference */
- chunks = buf_pool->chunks;
- chunk = chunks + buf_pool->n_chunks;
- max_size = max_free_size = 0;
- max_chunk = max_free_chunk = NULL;
-
- while (--chunk >= chunks) {
- if (chunk->size <= chunk_size
- && chunk->size > max_free_size) {
- if (chunk->size > max_size) {
- max_size = chunk->size;
- max_chunk = chunk;
- }
-
- if (buf_chunk_all_free(chunk)) {
- max_free_size = chunk->size;
- max_free_chunk = chunk;
- }
- }
- }
-
- if (!max_free_size) {
-
- ulint dirty = 0;
- ulint nonfree = 0;
- buf_block_t* block;
- buf_block_t* bend;
-
- /* Cannot shrink: try again later
- (do not assign srv_buf_pool_old_size) */
- if (!max_chunk) {
-
- goto func_exit;
- }
-
- block = max_chunk->blocks;
- bend = block + max_chunk->size;
-
- /* Move the blocks of chunk to the end of the
- LRU list and try to flush them. */
- for (; block < bend; block++) {
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_NOT_USED:
- continue;
- case BUF_BLOCK_FILE_PAGE:
- break;
- default:
- nonfree++;
- continue;
- }
-
- mutex_enter(&block->mutex);
- /* The following calls will temporarily
- release block->mutex and buf_pool_mutex.
- Therefore, we have to always retry,
- even if !dirty && !nonfree. */
-
- if (!buf_flush_ready_for_replace(&block->page)) {
-
- buf_LRU_make_block_old(&block->page);
- dirty++;
- } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
- != BUF_LRU_FREED) {
- nonfree++;
- }
-
- mutex_exit(&block->mutex);
- }
-
- buf_pool_mutex_exit();
-
- /* Request for a flush of the chunk if it helps.
- Do not flush if there are non-free blocks, since
- flushing will not make the chunk freeable. */
- if (nonfree) {
- /* Avoid busy-waiting. */
- os_thread_sleep(100000);
- } else if (dirty
- && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
- == ULINT_UNDEFINED) {
-
- buf_flush_wait_batch_end(BUF_FLUSH_LRU);
- }
-
- goto try_again;
- }
-
- max_size = max_free_size;
- max_chunk = max_free_chunk;
-
- srv_buf_pool_old_size = srv_buf_pool_size;
-
- /* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
- chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
- memcpy(chunks, buf_pool->chunks,
- (max_chunk - buf_pool->chunks) * sizeof *chunks);
- memcpy(chunks + (max_chunk - buf_pool->chunks),
- max_chunk + 1,
- buf_pool->chunks + buf_pool->n_chunks
- - (max_chunk + 1));
- ut_a(buf_pool->curr_size > max_chunk->size);
- buf_pool->curr_size -= max_chunk->size;
- srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
- chunk_size -= max_chunk->size;
- buf_chunk_free(max_chunk);
- mem_free(buf_pool->chunks);
- buf_pool->chunks = chunks;
- buf_pool->n_chunks--;
-
- /* Allow a slack of one megabyte. */
- if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
-
- goto shrink_again;
- }
-
-func_done:
- srv_buf_pool_old_size = srv_buf_pool_size;
-func_exit:
- buf_pool_mutex_exit();
- btr_search_enable();
-}
-
-/********************************************************************//**
-Rebuild buf_pool->page_hash. */
-static
-void
-buf_pool_page_hash_rebuild(void)
-/*============================*/
-{
- ulint i;
- ulint n_chunks;
- buf_chunk_t* chunk;
- hash_table_t* page_hash;
- hash_table_t* zip_hash;
- buf_page_t* b;
-
- buf_pool_mutex_enter();
-
- /* Free, create, and populate the hash table. */
- hash_table_free(buf_pool->page_hash);
- buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
- zip_hash = hash_create(2 * buf_pool->curr_size);
-
- HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
- BUF_POOL_ZIP_FOLD_BPAGE);
-
- hash_table_free(buf_pool->zip_hash);
- buf_pool->zip_hash = zip_hash;
-
- /* Insert the uncompressed file pages to buf_pool->page_hash. */
-
- chunk = buf_pool->chunks;
- n_chunks = buf_pool->n_chunks;
-
- for (i = 0; i < n_chunks; i++, chunk++) {
- ulint j;
- buf_block_t* block = chunk->blocks;
-
- for (j = 0; j < chunk->size; j++, block++) {
- if (buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE) {
- ut_ad(!block->page.in_zip_hash);
- ut_ad(block->page.in_page_hash);
-
- HASH_INSERT(buf_page_t, hash, page_hash,
- buf_page_address_fold(
- block->page.space,
- block->page.offset),
- &block->page);
- }
- }
- }
-
- /* Insert the compressed-only pages to buf_pool->page_hash.
- All such blocks are either in buf_pool->zip_clean or
- in buf_pool->flush_list. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- ut_ad(!b->in_flush_list);
- ut_ad(b->in_LRU_list);
- ut_ad(b->in_page_hash);
- ut_ad(!b->in_zip_hash);
-
- HASH_INSERT(buf_page_t, hash, page_hash,
- buf_page_address_fold(b->space, b->offset), b);
- }
-
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_ad(b->in_flush_list);
- ut_ad(b->in_LRU_list);
- ut_ad(b->in_page_hash);
- ut_ad(!b->in_zip_hash);
-
- switch (buf_page_get_state(b)) {
- case BUF_BLOCK_ZIP_DIRTY:
- HASH_INSERT(buf_page_t, hash, page_hash,
- buf_page_address_fold(b->space,
- b->offset), b);
- break;
- case BUF_BLOCK_FILE_PAGE:
- /* uncompressed page */
- break;
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
- }
-
- buf_pool_mutex_exit();
-}
-
-/********************************************************************//**
-Resizes the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_resize(void)
-/*=================*/
-{
- buf_pool_mutex_enter();
-
- if (srv_buf_pool_old_size == srv_buf_pool_size) {
-
- buf_pool_mutex_exit();
- return;
- }
-
- if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
-
- buf_pool_mutex_exit();
-
- /* Disable adaptive hash indexes and empty the index
- in order to free up memory in the buffer pool chunks. */
- buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
- / UNIV_PAGE_SIZE);
- } else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
-
- /* Enlarge the buffer pool by at least one megabyte */
-
- ulint mem_size
- = srv_buf_pool_size - srv_buf_pool_curr_size;
- buf_chunk_t* chunks;
- buf_chunk_t* chunk;
-
- chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
-
- memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
- * sizeof *chunks);
-
- chunk = &chunks[buf_pool->n_chunks];
-
- if (!buf_chunk_init(chunk, mem_size)) {
- mem_free(chunks);
- } else {
- buf_pool->curr_size += chunk->size;
- srv_buf_pool_curr_size = buf_pool->curr_size
- * UNIV_PAGE_SIZE;
- mem_free(buf_pool->chunks);
- buf_pool->chunks = chunks;
- buf_pool->n_chunks++;
- }
-
- srv_buf_pool_old_size = srv_buf_pool_size;
- buf_pool_mutex_exit();
- }
-
- buf_pool_page_hash_rebuild();
-}
-
-/********************************************************************//**
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from slipping out of
-the buffer pool. */
-UNIV_INTERN
-void
-buf_page_make_young(
-/*================*/
- buf_page_t* bpage) /*!< in: buffer block of a file page */
-{
- buf_pool_mutex_enter();
-
- ut_a(buf_page_in_file(bpage));
-
- buf_LRU_make_block_young(bpage);
-
- buf_pool_mutex_exit();
-}
-
-/********************************************************************//**
-Sets the time of the first access of a page and moves a page to the
-start of the buffer pool LRU list if it is too old. This high-level
-function can be used to prevent an important page from slipping
-out of the buffer pool. */
-static
-void
-buf_page_set_accessed_make_young(
-/*=============================*/
- buf_page_t* bpage, /*!< in/out: buffer block of a
- file page */
- unsigned access_time) /*!< in: bpage->access_time
- read under mutex protection,
- or 0 if unknown */
-{
- ut_ad(!buf_pool_mutex_own());
- ut_a(buf_page_in_file(bpage));
-
- if (buf_page_peek_if_too_old(bpage)) {
- buf_pool_mutex_enter();
- buf_LRU_make_block_young(bpage);
- buf_pool_mutex_exit();
- } else if (!access_time) {
- ulint time_ms = ut_time_ms();
- buf_pool_mutex_enter();
- buf_page_set_accessed(bpage, time_ms);
- buf_pool_mutex_exit();
- }
-}
-
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_block_t* block;
-
- buf_pool_mutex_enter();
-
- block = (buf_block_t*) buf_page_hash_get(space, offset);
-
- if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
- block->check_index_page_at_flush = FALSE;
- }
-
- buf_pool_mutex_exit();
-}
-
-/********************************************************************//**
-Returns the current state of is_hashed of a page. FALSE if the page is
-not in the pool. NOTE that this operation does not fix the page in the
-pool if it is found there.
-@return TRUE if page hash index is built in search system */
-UNIV_INTERN
-ibool
-buf_page_peek_if_search_hashed(
-/*===========================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_block_t* block;
- ibool is_hashed;
-
- buf_pool_mutex_enter();
-
- block = (buf_block_t*) buf_page_hash_get(space, offset);
-
- if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
- is_hashed = FALSE;
- } else {
- is_hashed = block->is_hashed;
- }
-
- buf_pool_mutex_exit();
-
- return(is_hashed);
-}
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
-
- buf_pool_mutex_enter();
-
- bpage = buf_page_hash_get(space, offset);
-
- if (bpage) {
- bpage->file_page_was_freed = TRUE;
- }
-
- buf_pool_mutex_exit();
-
- return(bpage);
-}
-
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
-
- buf_pool_mutex_enter();
-
- bpage = buf_page_hash_get(space, offset);
-
- if (bpage) {
- bpage->file_page_was_freed = FALSE;
- }
-
- buf_pool_mutex_exit();
-
- return(bpage);
-}
-#endif /* UNIV_DEBUG_FILE_ACCESSES */
-
-/********************************************************************//**
-Get read access to a compressed page (usually of type
-FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
-The page must be released with buf_page_release_zip().
-NOTE: the page is not protected by any latch. Mutual exclusion has to
-be implemented at a higher level. In other words, all possible
-accesses to a given page through this function must be protected by
-the same set of mutexes or latches.
-@return pointer to the block */
-UNIV_INTERN
-buf_page_t*
-buf_page_get_zip(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
- mutex_t* block_mutex;
- ibool must_read;
- unsigned access_time;
-
-#ifndef UNIV_LOG_DEBUG
- ut_ad(!ibuf_inside());
-#endif
- buf_pool->stat.n_page_gets++;
-
- for (;;) {
- buf_pool_mutex_enter();
-lookup:
- bpage = buf_page_hash_get(space, offset);
- if (bpage) {
- break;
- }
-
- /* Page not in buf_pool: needs to be read from file */
-
- buf_pool_mutex_exit();
-
- buf_read_page(space, zip_size, offset);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 37 || buf_validate());
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- }
-
- if (UNIV_UNLIKELY(!bpage->zip.data)) {
- /* There is no compressed page. */
-err_exit:
- buf_pool_mutex_exit();
- return(NULL);
- }
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- case BUF_BLOCK_ZIP_FREE:
- break;
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- block_mutex = &buf_pool_zip_mutex;
- mutex_enter(block_mutex);
- bpage->buf_fix_count++;
- goto got_block;
- case BUF_BLOCK_FILE_PAGE:
- block_mutex = &((buf_block_t*) bpage)->mutex;
- mutex_enter(block_mutex);
-
- /* Discard the uncompressed page frame if possible. */
- if (buf_LRU_free_block(bpage, FALSE, NULL)
- == BUF_LRU_FREED) {
-
- mutex_exit(block_mutex);
- goto lookup;
- }
-
- buf_block_buf_fix_inc((buf_block_t*) bpage,
- __FILE__, __LINE__);
- goto got_block;
- }
-
- ut_error;
- goto err_exit;
-
-got_block:
- must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
- access_time = buf_page_is_accessed(bpage);
-
- buf_pool_mutex_exit();
-
- mutex_exit(block_mutex);
-
- buf_page_set_accessed_make_young(bpage, access_time);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(!bpage->file_page_was_freed);
-#endif
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(bpage->buf_fix_count > 0);
- ut_a(buf_page_in_file(bpage));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- if (must_read) {
- /* Let us wait until the read operation
- completes */
-
- for (;;) {
- enum buf_io_fix io_fix;
-
- mutex_enter(block_mutex);
- io_fix = buf_page_get_io_fix(bpage);
- mutex_exit(block_mutex);
-
- if (io_fix == BUF_IO_READ) {
-
- os_thread_sleep(WAIT_FOR_READ);
- } else {
- break;
- }
- }
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_page_get_space(bpage),
- buf_page_get_page_no(bpage)) == 0);
-#endif
- return(bpage);
-}
-
-/********************************************************************//**
-Initialize some fields of a control block. */
-UNIV_INLINE
-void
-buf_block_init_low(
-/*===============*/
- buf_block_t* block) /*!< in: block to init */
-{
- block->check_index_page_at_flush = FALSE;
- block->index = NULL;
-
- block->n_hash_helps = 0;
- block->is_hashed = FALSE;
- block->n_fields = 1;
- block->n_bytes = 0;
- block->left_side = TRUE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Decompress a block.
-@return TRUE if successful */
-UNIV_INTERN
-ibool
-buf_zip_decompress(
-/*===============*/
- buf_block_t* block, /*!< in/out: block */
- ibool check) /*!< in: TRUE=verify the page checksum */
-{
- const byte* frame = block->page.zip.data;
-
- ut_ad(buf_block_get_zip_size(block));
- ut_a(buf_block_get_space(block) != 0);
-
- if (UNIV_LIKELY(check)) {
- ulint stamp_checksum = mach_read_from_4(
- frame + FIL_PAGE_SPACE_OR_CHKSUM);
- ulint calc_checksum = page_zip_calc_checksum(
- frame, page_zip_get_size(&block->page.zip));
-
- if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: compressed page checksum mismatch"
- " (space %u page %u): %lu != %lu\n",
- block->page.space, block->page.offset,
- stamp_checksum, calc_checksum);
- return(FALSE);
- }
- }
-
- switch (fil_page_get_type(frame)) {
- case FIL_PAGE_INDEX:
- if (page_zip_decompress(&block->page.zip,
- block->frame, TRUE)) {
- return(TRUE);
- }
-
- fprintf(stderr,
- "InnoDB: unable to decompress space %lu page %lu\n",
- (ulong) block->page.space,
- (ulong) block->page.offset);
- return(FALSE);
-
- case FIL_PAGE_TYPE_ALLOCATED:
- case FIL_PAGE_INODE:
- case FIL_PAGE_IBUF_BITMAP:
- case FIL_PAGE_TYPE_FSP_HDR:
- case FIL_PAGE_TYPE_XDES:
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- /* Copy to uncompressed storage. */
- memcpy(block->frame, frame,
- buf_block_get_zip_size(block));
- return(TRUE);
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: unknown compressed page"
- " type %lu\n",
- fil_page_get_type(frame));
- return(FALSE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return pointer to block, never NULL */
-UNIV_INTERN
-buf_block_t*
-buf_block_align(
-/*============*/
- const byte* ptr) /*!< in: pointer to a frame */
-{
- buf_chunk_t* chunk;
- ulint i;
-
- /* TODO: protect buf_pool->chunks with a mutex (it will
- currently remain constant after buf_pool_init()) */
- for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
- lint offs = ptr - chunk->blocks->frame;
-
- if (UNIV_UNLIKELY(offs < 0)) {
-
- continue;
- }
-
- offs >>= UNIV_PAGE_SIZE_SHIFT;
-
- if (UNIV_LIKELY((ulint) offs < chunk->size)) {
- buf_block_t* block = &chunk->blocks[offs];
-
- /* The function buf_chunk_init() invokes
- buf_block_init() so that block[n].frame ==
- block->frame + n * UNIV_PAGE_SIZE. Check it. */
- ut_ad(block->frame == page_align(ptr));
-#ifdef UNIV_DEBUG
- /* A thread that updates these fields must
- hold buf_pool_mutex and block->mutex. Acquire
- only the latter. */
- mutex_enter(&block->mutex);
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- /* These types should only be used in
- the compressed buffer pool, whose
- memory is allocated from
- buf_pool->chunks, in UNIV_PAGE_SIZE
- blocks flagged as BUF_BLOCK_MEMORY. */
- ut_error;
- break;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- /* Some data structures contain
- "guess" pointers to file pages. The
- file pages may have been freed and
- reused. Do not complain. */
- break;
- case BUF_BLOCK_REMOVE_HASH:
- /* buf_LRU_block_remove_hashed_page()
- will overwrite the FIL_PAGE_OFFSET and
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
- 0xff and set the state to
- BUF_BLOCK_REMOVE_HASH. */
- ut_ad(page_get_space_id(page_align(ptr))
- == 0xffffffff);
- ut_ad(page_get_page_no(page_align(ptr))
- == 0xffffffff);
- break;
- case BUF_BLOCK_FILE_PAGE:
- ut_ad(block->page.space
- == page_get_space_id(page_align(ptr)));
- ut_ad(block->page.offset
- == page_get_page_no(page_align(ptr)));
- break;
- }
-
- mutex_exit(&block->mutex);
-#endif /* UNIV_DEBUG */
-
- return(block);
- }
- }
-
- /* The block should always be found. */
- ut_error;
- return(NULL);
-}
-
-/********************************************************************//**
-Find out if a pointer belongs to a buf_block_t. It can be a pointer to
-the buf_block_t itself or a member of it
-@return TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
-ibool
-buf_pointer_is_block_field(
-/*=======================*/
- const void* ptr) /*!< in: pointer not
- dereferenced */
-{
- const buf_chunk_t* chunk = buf_pool->chunks;
- const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
-
- /* TODO: protect buf_pool->chunks with a mutex (it will
- currently remain constant after buf_pool_init()) */
- while (chunk < echunk) {
- if (ptr >= (void *)chunk->blocks
- && ptr < (void *)(chunk->blocks + chunk->size)) {
-
- return(TRUE);
- }
-
- chunk++;
- }
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Find out if a buffer block was created by buf_chunk_init().
-@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
-static
-ibool
-buf_block_is_uncompressed(
-/*======================*/
- const buf_block_t* block) /*!< in: pointer to block,
- not dereferenced */
-{
- ut_ad(buf_pool_mutex_own());
-
- if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
- /* The pointer should be aligned. */
- return(FALSE);
- }
-
- return(buf_pointer_is_block_field((void *)block));
-}
-
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return pointer to the block or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_page_get_gen(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page number */
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_block_t* guess, /*!< in: guessed block or NULL */
- ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_GET_NO_LATCH */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- buf_block_t* block;
- unsigned access_time;
- ulint fix_type;
- ibool must_read;
-
- ut_ad(mtr);
- ut_ad((rw_latch == RW_S_LATCH)
- || (rw_latch == RW_X_LATCH)
- || (rw_latch == RW_NO_LATCH));
- ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
- ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
- || (mode == BUF_GET_NO_LATCH));
- ut_ad(zip_size == fil_space_get_zip_size(space));
- ut_ad(ut_is_2pow(zip_size));
-#ifndef UNIV_LOG_DEBUG
- ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
-#endif
- buf_pool->stat.n_page_gets++;
-loop:
- block = guess;
- buf_pool_mutex_enter();
-
- if (block) {
- /* If the guess is a compressed page descriptor that
- has been allocated by buf_buddy_alloc(), it may have
- been invalidated by buf_buddy_relocate(). In that
- case, block could point to something that happens to
- contain the expected bits in block->page. Similarly,
- the guess may be pointing to a buffer pool chunk that
- has been released when resizing the buffer pool. */
-
- if (!buf_block_is_uncompressed(block)
- || offset != block->page.offset
- || space != block->page.space
- || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
-
- block = guess = NULL;
- } else {
- ut_ad(!block->page.in_zip_hash);
- ut_ad(block->page.in_page_hash);
- }
- }
-
- if (block == NULL) {
- block = (buf_block_t*) buf_page_hash_get(space, offset);
- }
-
-loop2:
- if (block == NULL) {
- /* Page not in buf_pool: needs to be read from file */
-
- buf_pool_mutex_exit();
-
- if (mode == BUF_GET_IF_IN_POOL) {
-
- return(NULL);
- }
-
- buf_read_page(space, zip_size, offset);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 37 || buf_validate());
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- goto loop;
- }
-
- ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
-
- must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
-
- if (must_read && mode == BUF_GET_IF_IN_POOL) {
- /* The page is only being read to buffer */
- buf_pool_mutex_exit();
-
- return(NULL);
- }
-
- switch (buf_block_get_state(block)) {
- buf_page_t* bpage;
- ibool success;
-
- case BUF_BLOCK_FILE_PAGE:
- break;
-
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- bpage = &block->page;
- /* Protect bpage->buf_fix_count. */
- mutex_enter(&buf_pool_zip_mutex);
-
- if (bpage->buf_fix_count
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
- /* This condition often occurs when the buffer
- is not buffer-fixed, but I/O-fixed by
- buf_page_init_for_read(). */
- mutex_exit(&buf_pool_zip_mutex);
-wait_until_unfixed:
- /* The block is buffer-fixed or I/O-fixed.
- Try again later. */
- buf_pool_mutex_exit();
- os_thread_sleep(WAIT_FOR_READ);
-
- goto loop;
- }
-
- /* Allocate an uncompressed page. */
- buf_pool_mutex_exit();
- mutex_exit(&buf_pool_zip_mutex);
-
- block = buf_LRU_get_free_block(0);
- ut_a(block);
-
- buf_pool_mutex_enter();
- mutex_enter(&block->mutex);
-
- {
- buf_page_t* hash_bpage
- = buf_page_hash_get(space, offset);
-
- if (UNIV_UNLIKELY(bpage != hash_bpage)) {
- /* The buf_pool->page_hash was modified
- while buf_pool_mutex was released.
- Free the block that was allocated. */
-
- buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
-
- block = (buf_block_t*) hash_bpage;
- goto loop2;
- }
- }
-
- if (UNIV_UNLIKELY
- (bpage->buf_fix_count
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
-
- /* The block was buffer-fixed or I/O-fixed
- while buf_pool_mutex was not held by this thread.
- Free the block that was allocated and try again.
- This should be extremely unlikely. */
-
- buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
-
- goto wait_until_unfixed;
- }
-
- /* Move the compressed page from bpage to block,
- and uncompress it. */
-
- mutex_enter(&buf_pool_zip_mutex);
-
- buf_relocate(bpage, &block->page);
- buf_block_init_low(block);
- block->lock_hash_val = lock_rec_hash(space, offset);
-
- UNIV_MEM_DESC(&block->page.zip.data,
- page_zip_get_size(&block->page.zip), block);
-
- if (buf_page_get_state(&block->page)
- == BUF_BLOCK_ZIP_PAGE) {
- UT_LIST_REMOVE(list, buf_pool->zip_clean,
- &block->page);
- ut_ad(!block->page.in_flush_list);
- } else {
- /* Relocate buf_pool->flush_list. */
- buf_page_t* b;
-
- b = UT_LIST_GET_PREV(list, &block->page);
- ut_ad(block->page.in_flush_list);
- UT_LIST_REMOVE(list, buf_pool->flush_list,
- &block->page);
-
- if (b) {
- UT_LIST_INSERT_AFTER(
- list, buf_pool->flush_list, b,
- &block->page);
- } else {
- UT_LIST_ADD_FIRST(
- list, buf_pool->flush_list,
- &block->page);
- }
- }
-
- /* Buffer-fix, I/O-fix, and X-latch the block
- for the duration of the decompression.
- Also add the block to the unzip_LRU list. */
- block->page.state = BUF_BLOCK_FILE_PAGE;
-
- /* Insert at the front of unzip_LRU list */
- buf_unzip_LRU_add_block(block, FALSE);
-
- block->page.buf_fix_count = 1;
- buf_block_set_io_fix(block, BUF_IO_READ);
- rw_lock_x_lock(&block->lock);
- mutex_exit(&block->mutex);
- mutex_exit(&buf_pool_zip_mutex);
- buf_pool->n_pend_unzip++;
-
- buf_buddy_free(bpage, sizeof *bpage);
-
- buf_pool_mutex_exit();
-
- /* Decompress the page and apply buffered operations
- while not holding buf_pool_mutex or block->mutex. */
- success = buf_zip_decompress(block, srv_use_checksums);
-
- if (UNIV_LIKELY(success)) {
- ibuf_merge_or_delete_for_page(block, space, offset,
- zip_size, TRUE);
- }
-
- /* Unfix and unlatch the block. */
- buf_pool_mutex_enter();
- mutex_enter(&block->mutex);
- block->page.buf_fix_count--;
- buf_block_set_io_fix(block, BUF_IO_NONE);
- mutex_exit(&block->mutex);
- buf_pool->n_pend_unzip--;
- rw_lock_x_unlock(&block->lock);
-
- if (UNIV_UNLIKELY(!success)) {
-
- buf_pool_mutex_exit();
- return(NULL);
- }
-
- break;
-
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
-
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- mutex_enter(&block->mutex);
- UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
-
- buf_block_buf_fix_inc(block, file, line);
-
- mutex_exit(&block->mutex);
-
- /* Check if this is the first access to the page */
-
- access_time = buf_page_is_accessed(&block->page);
-
- buf_pool_mutex_exit();
-
- buf_page_set_accessed_make_young(&block->page, access_time);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(!block->page.file_page_was_freed);
-#endif
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(block->page.buf_fix_count > 0);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- switch (rw_latch) {
- case RW_NO_LATCH:
- if (must_read) {
- /* Let us wait until the read operation
- completes */
-
- for (;;) {
- enum buf_io_fix io_fix;
-
- mutex_enter(&block->mutex);
- io_fix = buf_block_get_io_fix(block);
- mutex_exit(&block->mutex);
-
- if (io_fix == BUF_IO_READ) {
-
- os_thread_sleep(WAIT_FOR_READ);
- } else {
- break;
- }
- }
- }
-
- fix_type = MTR_MEMO_BUF_FIX;
- break;
-
- case RW_S_LATCH:
- rw_lock_s_lock_func(&(block->lock), 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_S_FIX;
- break;
-
- default:
- ut_ad(rw_latch == RW_X_LATCH);
- rw_lock_x_lock_func(&(block->lock), 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_X_FIX;
- break;
- }
-
- mtr_memo_push(mtr, block, fix_type);
-
- if (!access_time) {
- /* In the case of a first access, try to apply linear
- read-ahead */
-
- buf_read_ahead_linear(space, zip_size, offset);
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
- return(block);
-}
-
-/********************************************************************//**
-This is the general function used to get optimistic access to a database
-page.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-buf_page_optimistic_get_func(
-/*=========================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: guessed buffer block */
- ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
- ..._GUESS_ON_CLOCK */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- unsigned access_time;
- ibool success;
- ulint fix_type;
-
- ut_ad(mtr && block);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- mutex_enter(&block->mutex);
-
- if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
-
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
- buf_block_buf_fix_inc(block, file, line);
-
- mutex_exit(&block->mutex);
-
- /* Check if this is the first access to the page.
- We do a dirty read on purpose, to avoid mutex contention.
- This field is only used for heuristic purposes; it does not
- affect correctness. */
-
- access_time = buf_page_is_accessed(&block->page);
- buf_page_set_accessed_make_young(&block->page, access_time);
-
- ut_ad(!ibuf_inside()
- || ibuf_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block), NULL));
-
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- success = rw_lock_x_lock_func_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_X_FIX;
- }
-
- if (UNIV_UNLIKELY(!success)) {
- mutex_enter(&block->mutex);
- buf_block_buf_fix_dec(block);
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
- if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- if (rw_latch == RW_S_LATCH) {
- rw_lock_s_unlock(&(block->lock));
- } else {
- rw_lock_x_unlock(&(block->lock));
- }
-
- mutex_enter(&block->mutex);
- buf_block_buf_fix_dec(block);
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
- mtr_memo_push(mtr, block, fix_type);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(block->page.buf_fix_count > 0);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->page.file_page_was_freed == FALSE);
-#endif
- if (UNIV_UNLIKELY(!access_time)) {
- /* In the case of a first access, try to apply linear
- read-ahead */
-
- buf_read_ahead_linear(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block));
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
- buf_pool->stat.n_page_gets++;
-
- return(TRUE);
-}
-
-/********************************************************************//**
-This is used to get access to a known database page, when no waiting can be
-done. For example, if a search in an adaptive hash index leads us to this
-frame.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-buf_page_get_known_nowait(
-/*======================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: the known page */
- ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- ibool success;
- ulint fix_type;
-
- ut_ad(mtr);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- mutex_enter(&block->mutex);
-
- if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
- /* Another thread is just freeing the block from the LRU list
- of the buffer pool: do not try to access this page; this
- attempt to access the page can only come through the hash
- index because when the buffer block state is ..._REMOVE_HASH,
- we have already removed it from the page address hash table
- of the buffer pool. */
-
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- buf_block_buf_fix_inc(block, file, line);
-
- mutex_exit(&block->mutex);
-
- if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
- buf_pool_mutex_enter();
- buf_LRU_make_block_young(&block->page);
- buf_pool_mutex_exit();
- } else if (!buf_page_is_accessed(&block->page)) {
- /* Above, we do a dirty read on purpose, to avoid
- mutex contention. The field buf_page_t::access_time
- is only used for heuristic purposes. Writes to the
- field must be protected by mutex, however. */
- ulint time_ms = ut_time_ms();
-
- buf_pool_mutex_enter();
- buf_page_set_accessed(&block->page, time_ms);
- buf_pool_mutex_exit();
- }
-
- ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
-
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- success = rw_lock_x_lock_func_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_X_FIX;
- }
-
- if (!success) {
- mutex_enter(&block->mutex);
- buf_block_buf_fix_dec(block);
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
- mtr_memo_push(mtr, block, fix_type);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(block->page.buf_fix_count > 0);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->page.file_page_was_freed == FALSE);
-#endif
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((mode == BUF_KEEP_OLD)
- || (ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0));
-#endif
- buf_pool->stat.n_page_gets++;
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
-page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the kernel mutex.
-@return pointer to a page or NULL */
-UNIV_INTERN
-const buf_block_t*
-buf_page_try_get_func(
-/*==================*/
- ulint space_id,/*!< in: tablespace id */
- ulint page_no,/*!< in: page number */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- buf_block_t* block;
- ibool success;
- ulint fix_type;
-
- buf_pool_mutex_enter();
- block = buf_block_hash_get(space_id, page_no);
-
- if (!block) {
- buf_pool_mutex_exit();
- return(NULL);
- }
-
- mutex_enter(&block->mutex);
- buf_pool_mutex_exit();
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_a(buf_block_get_space(block) == space_id);
- ut_a(buf_block_get_page_no(block) == page_no);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- buf_block_buf_fix_inc(block, file, line);
- mutex_exit(&block->mutex);
-
- fix_type = MTR_MEMO_PAGE_S_FIX;
- success = rw_lock_s_lock_nowait(&block->lock, file, line);
-
- if (!success) {
- /* Let us try to get an X-latch. If the current thread
- is holding an X-latch on the page, we cannot get an
- S-latch. */
-
- fix_type = MTR_MEMO_PAGE_X_FIX;
- success = rw_lock_x_lock_func_nowait(&block->lock,
- file, line);
- }
-
- if (!success) {
- mutex_enter(&block->mutex);
- buf_block_buf_fix_dec(block);
- mutex_exit(&block->mutex);
-
- return(NULL);
- }
-
- mtr_memo_push(mtr, block, fix_type);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(block->page.buf_fix_count > 0);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->page.file_page_was_freed == FALSE);
-#endif /* UNIV_DEBUG_FILE_ACCESSES */
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- buf_pool->stat.n_page_gets++;
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
-
- return(block);
-}
-
-/********************************************************************//**
-Initialize some fields of a control block. */
-UNIV_INLINE
-void
-buf_page_init_low(
-/*==============*/
- buf_page_t* bpage) /*!< in: block to init */
-{
- bpage->flush_type = BUF_FLUSH_LRU;
- bpage->io_fix = BUF_IO_NONE;
- bpage->buf_fix_count = 0;
- bpage->freed_page_clock = 0;
- bpage->access_time = 0;
- bpage->newest_modification = 0;
- bpage->oldest_modification = 0;
- HASH_INVALIDATE(bpage, hash);
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- bpage->file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES */
-}
-
-/********************************************************************//**
-Inits a page to the buffer buf_pool. */
-static
-void
-buf_page_init(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- buf_block_t* block) /*!< in: block to init */
-{
- buf_page_t* hash_page;
-
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(&(block->mutex)));
- ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-
- /* Set the state of the block */
- buf_block_set_file_page(block, space, offset);
-
-#ifdef UNIV_DEBUG_VALGRIND
- if (!space) {
- /* Silence valid Valgrind warnings about uninitialized
- data being written to data files. There are some unused
- bytes on some pages that InnoDB does not initialize. */
- UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- buf_block_init_low(block);
-
- block->lock_hash_val = lock_rec_hash(space, offset);
-
- /* Insert into the hash table of file pages */
-
- hash_page = buf_page_hash_get(space, offset);
-
- if (UNIV_LIKELY_NULL(hash_page)) {
- fprintf(stderr,
- "InnoDB: Error: page %lu %lu already found"
- " in the hash table: %p, %p\n",
- (ulong) space,
- (ulong) offset,
- (const void*) hash_page, (const void*) block);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- mutex_exit(&block->mutex);
- buf_pool_mutex_exit();
- buf_print();
- buf_LRU_print();
- buf_validate();
- buf_LRU_validate();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- ut_error;
- }
-
- buf_page_init_low(&block->page);
-
- ut_ad(!block->page.in_zip_hash);
- ut_ad(!block->page.in_page_hash);
- ut_d(block->page.in_page_hash = TRUE);
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
- buf_page_address_fold(space, offset), &block->page);
-}
-
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
-(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
-then this function does nothing.
-Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
-on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later.
-@return pointer to the block or NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_init_for_read(
-/*===================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- ibool unzip, /*!< in: TRUE=request uncompressed page */
- ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong
- version of the tablespace in case we have done
- DISCARD + IMPORT */
- ulint offset) /*!< in: page number */
-{
- buf_block_t* block;
- buf_page_t* bpage;
- mtr_t mtr;
- ibool lru = FALSE;
- void* data;
-
- ut_ad(buf_pool);
-
- *err = DB_SUCCESS;
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
- /* It is a read-ahead within an ibuf routine */
-
- ut_ad(!ibuf_bitmap_page(zip_size, offset));
- ut_ad(ibuf_inside());
-
- mtr_start(&mtr);
-
- if (!recv_no_ibuf_operations
- && !ibuf_page(space, zip_size, offset, &mtr)) {
-
- mtr_commit(&mtr);
-
- return(NULL);
- }
- } else {
- ut_ad(mode == BUF_READ_ANY_PAGE);
- }
-
- if (zip_size && UNIV_LIKELY(!unzip)
- && UNIV_LIKELY(!recv_recovery_is_on())) {
- block = NULL;
- } else {
- block = buf_LRU_get_free_block(0);
- ut_ad(block);
- }
-
- buf_pool_mutex_enter();
-
- if (buf_page_hash_get(space, offset)) {
- /* The page is already in the buffer pool. */
-err_exit:
- if (block) {
- mutex_enter(&block->mutex);
- buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
- }
-
- bpage = NULL;
- goto func_exit;
- }
-
- if (fil_tablespace_deleted_or_being_deleted_in_mem(
- space, tablespace_version)) {
- /* The page belongs to a space which has been
- deleted or is being deleted. */
- *err = DB_TABLESPACE_DELETED;
-
- goto err_exit;
- }
-
- if (block) {
- bpage = &block->page;
- mutex_enter(&block->mutex);
- buf_page_init(space, offset, block);
-
- /* The block must be put to the LRU list, to the old blocks */
- buf_LRU_add_block(bpage, TRUE/* to old blocks */);
-
- /* We set a pass-type x-lock on the frame because then
- the same thread which called for the read operation
- (and is running now at this point of code) can wait
- for the read to complete by waiting for the x-lock on
- the frame; if the x-lock were recursive, the same
- thread would illegally get the x-lock before the page
- read is completed. The x-lock is cleared by the
- io-handler thread. */
-
- rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
- buf_page_set_io_fix(bpage, BUF_IO_READ);
-
- if (UNIV_UNLIKELY(zip_size)) {
- page_zip_set_size(&block->page.zip, zip_size);
-
- /* buf_pool_mutex may be released and
- reacquired by buf_buddy_alloc(). Thus, we
- must release block->mutex in order not to
- break the latching order in the reacquisition
- of buf_pool_mutex. We also must defer this
- operation until after the block descriptor has
- been added to buf_pool->LRU and
- buf_pool->page_hash. */
- mutex_exit(&block->mutex);
- data = buf_buddy_alloc(zip_size, &lru);
- mutex_enter(&block->mutex);
- block->page.zip.data = data;
-
- /* To maintain the invariant
- block->in_unzip_LRU_list
- == buf_page_belongs_to_unzip_LRU(&block->page)
- we have to add this block to unzip_LRU
- after block->page.zip.data is set. */
- ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
- buf_unzip_LRU_add_block(block, TRUE);
- }
-
- mutex_exit(&block->mutex);
- } else {
- /* Defer buf_buddy_alloc() until after the block has
- been found not to exist. The buf_buddy_alloc() and
- buf_buddy_free() calls may be expensive because of
- buf_buddy_relocate(). */
-
- /* The compressed page must be allocated before the
- control block (bpage), in order to avoid the
- invocation of buf_buddy_relocate_block() on
- uninitialized data. */
- data = buf_buddy_alloc(zip_size, &lru);
- bpage = buf_buddy_alloc(sizeof *bpage, &lru);
-
- /* If buf_buddy_alloc() allocated storage from the LRU list,
- it released and reacquired buf_pool_mutex. Thus, we must
- check the page_hash again, as it may have been modified. */
- if (UNIV_UNLIKELY(lru)
- && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
-
- /* The block was added by some other thread. */
- buf_buddy_free(bpage, sizeof *bpage);
- buf_buddy_free(data, zip_size);
-
- bpage = NULL;
- goto func_exit;
- }
-
- page_zip_des_init(&bpage->zip);
- page_zip_set_size(&bpage->zip, zip_size);
- bpage->zip.data = data;
-
- mutex_enter(&buf_pool_zip_mutex);
- UNIV_MEM_DESC(bpage->zip.data,
- page_zip_get_size(&bpage->zip), bpage);
- buf_page_init_low(bpage);
- bpage->state = BUF_BLOCK_ZIP_PAGE;
- bpage->space = space;
- bpage->offset = offset;
-
-#ifdef UNIV_DEBUG
- bpage->in_page_hash = FALSE;
- bpage->in_zip_hash = FALSE;
- bpage->in_flush_list = FALSE;
- bpage->in_free_list = FALSE;
- bpage->in_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
-
- ut_d(bpage->in_page_hash = TRUE);
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
- buf_page_address_fold(space, offset), bpage);
-
- /* The block must be put to the LRU list, to the old blocks */
- buf_LRU_add_block(bpage, TRUE/* to old blocks */);
- buf_LRU_insert_zip_clean(bpage);
-
- buf_page_set_io_fix(bpage, BUF_IO_READ);
-
- mutex_exit(&buf_pool_zip_mutex);
- }
-
- buf_pool->n_pend_reads++;
-func_exit:
- buf_pool_mutex_exit();
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-
- mtr_commit(&mtr);
- }
-
- ut_ad(!bpage || buf_page_in_file(bpage));
- return(bpage);
-}
-
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
-from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_get_gen).
-@return pointer to the block, page bufferfixed */
-UNIV_INTERN
-buf_block_t*
-buf_page_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space in units of
- a page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- buf_frame_t* frame;
- buf_block_t* block;
- buf_block_t* free_block = NULL;
- ulint time_ms = ut_time_ms();
-
- ut_ad(mtr);
- ut_ad(space || !zip_size);
-
- free_block = buf_LRU_get_free_block(0);
-
- buf_pool_mutex_enter();
-
- block = (buf_block_t*) buf_page_hash_get(space, offset);
-
- if (block && buf_page_in_file(&block->page)) {
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(space, offset) == 0);
-#endif
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES */
-
- /* Page can be found in buf_pool */
- buf_pool_mutex_exit();
-
- buf_block_free(free_block);
-
- return(buf_page_get_with_no_latch(space, zip_size,
- offset, mtr));
- }
-
- /* If we get here, the page was not in buf_pool: init it there */
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Creating space %lu page %lu to buffer\n",
- (ulong) space, (ulong) offset);
- }
-#endif /* UNIV_DEBUG */
-
- block = free_block;
-
- mutex_enter(&block->mutex);
-
- buf_page_init(space, offset, block);
-
- /* The block must be put to the LRU list */
- buf_LRU_add_block(&block->page, FALSE);
-
- buf_block_buf_fix_inc(block, __FILE__, __LINE__);
- buf_pool->stat.n_pages_created++;
-
- if (zip_size) {
- void* data;
- ibool lru;
-
- /* Prevent race conditions during buf_buddy_alloc(),
- which may release and reacquire buf_pool_mutex,
- by IO-fixing and X-latching the block. */
-
- buf_page_set_io_fix(&block->page, BUF_IO_READ);
- rw_lock_x_lock(&block->lock);
-
- page_zip_set_size(&block->page.zip, zip_size);
- mutex_exit(&block->mutex);
- /* buf_pool_mutex may be released and reacquired by
- buf_buddy_alloc(). Thus, we must release block->mutex
- in order not to break the latching order in
- the reacquisition of buf_pool_mutex. We also must
- defer this operation until after the block descriptor
- has been added to buf_pool->LRU and buf_pool->page_hash. */
- data = buf_buddy_alloc(zip_size, &lru);
- mutex_enter(&block->mutex);
- block->page.zip.data = data;
-
- /* To maintain the invariant
- block->in_unzip_LRU_list
- == buf_page_belongs_to_unzip_LRU(&block->page)
- we have to add this block to unzip_LRU after
- block->page.zip.data is set. */
- ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
- buf_unzip_LRU_add_block(block, FALSE);
-
- buf_page_set_io_fix(&block->page, BUF_IO_NONE);
- rw_lock_x_unlock(&block->lock);
- }
-
- buf_page_set_accessed(&block->page, time_ms);
-
- buf_pool_mutex_exit();
-
- mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
-
- mutex_exit(&block->mutex);
-
- /* Delete possible entries for the page from the insert buffer:
- such can exist if the page belonged to an index which was dropped */
-
- ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
- frame = block->frame;
-
- memset(frame + FIL_PAGE_PREV, 0xff, 4);
- memset(frame + FIL_PAGE_NEXT, 0xff, 4);
- mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
-
- /* Reset to zero the file flush lsn field in the page; if the first
- page of an ibdata file is 'created' in this function into the buffer
- pool then we lose the original contents of the file flush lsn stamp.
- Then InnoDB could in a crash recovery print a big, false, corruption
- warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
-
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 357 || buf_validate());
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
- return(block);
-}
-
-/********************************************************************//**
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool. */
-UNIV_INTERN
-void
-buf_page_io_complete(
-/*=================*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
-{
- enum buf_io_fix io_type;
- const ibool uncompressed = (buf_page_get_state(bpage)
- == BUF_BLOCK_FILE_PAGE);
-
- ut_a(buf_page_in_file(bpage));
-
- /* We do not need protect io_fix here by mutex to read
- it because this is the only function where we can change the value
- from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
- ensures that this is the only thread that handles the i/o for this
- block. */
-
- io_type = buf_page_get_io_fix(bpage);
- ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
-
- if (io_type == BUF_IO_READ) {
- ulint read_page_no;
- ulint read_space_id;
- byte* frame;
-
- if (buf_page_get_zip_size(bpage)) {
- frame = bpage->zip.data;
- buf_pool->n_pend_unzip++;
- if (uncompressed
- && !buf_zip_decompress((buf_block_t*) bpage,
- FALSE)) {
-
- buf_pool->n_pend_unzip--;
- goto corrupt;
- }
- buf_pool->n_pend_unzip--;
- } else {
- ut_a(uncompressed);
- frame = ((buf_block_t*) bpage)->frame;
- }
-
- /* If this page is not uninitialized and not in the
- doublewrite buffer, then the page number and space id
- should be the same as in block. */
- read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
- read_space_id = mach_read_from_4(
- frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- if (bpage->space == TRX_SYS_SPACE
- && trx_doublewrite_page_inside(bpage->offset)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: reading page %lu\n"
- "InnoDB: which is in the"
- " doublewrite buffer!\n",
- (ulong) bpage->offset);
- } else if (!read_space_id && !read_page_no) {
- /* This is likely an uninitialized page. */
- } else if ((bpage->space
- && bpage->space != read_space_id)
- || bpage->offset != read_page_no) {
- /* We did not compare space_id to read_space_id
- if bpage->space == 0, because the field on the
- page may contain garbage in MySQL < 4.1.1,
- which only supported bpage->space == 0. */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: space id and page n:o"
- " stored in the page\n"
- "InnoDB: read in are %lu:%lu,"
- " should be %lu:%lu!\n",
- (ulong) read_space_id, (ulong) read_page_no,
- (ulong) bpage->space,
- (ulong) bpage->offset);
- }
-
- /* From version 3.23.38 up we store the page checksum
- to the 4 first bytes of the page end lsn field */
-
- if (buf_page_is_corrupted(frame,
- buf_page_get_zip_size(bpage))) {
-corrupt:
- fprintf(stderr,
- "InnoDB: Database page corruption on disk"
- " or a failed\n"
- "InnoDB: file read of page %lu.\n"
- "InnoDB: You may have to recover"
- " from a backup.\n",
- (ulong) bpage->offset);
- buf_page_print(frame, buf_page_get_zip_size(bpage));
- fprintf(stderr,
- "InnoDB: Database page corruption on disk"
- " or a failed\n"
- "InnoDB: file read of page %lu.\n"
- "InnoDB: You may have to recover"
- " from a backup.\n",
- (ulong) bpage->offset);
- fputs("InnoDB: It is also possible that"
- " your operating\n"
- "InnoDB: system has corrupted its"
- " own file cache\n"
- "InnoDB: and rebooting your computer"
- " removes the\n"
- "InnoDB: error.\n"
- "InnoDB: If the corrupt page is an index page\n"
- "InnoDB: you can also try to"
- " fix the corruption\n"
- "InnoDB: by dumping, dropping,"
- " and reimporting\n"
- "InnoDB: the corrupt table."
- " You can use CHECK\n"
- "InnoDB: TABLE to scan your"
- " table for corruption.\n"
- "InnoDB: See also "
- REFMAN "forcing-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
-
- if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
- fputs("InnoDB: Ending processing because of"
- " a corrupt database page.\n",
- stderr);
- exit(1);
- }
- }
-
- if (recv_recovery_is_on()) {
- /* Pages must be uncompressed for crash recovery. */
- ut_a(uncompressed);
- recv_recover_page(TRUE, (buf_block_t*) bpage);
- }
-
- if (uncompressed && !recv_no_ibuf_operations) {
- ibuf_merge_or_delete_for_page(
- (buf_block_t*) bpage, bpage->space,
- bpage->offset, buf_page_get_zip_size(bpage),
- TRUE);
- }
- }
-
- buf_pool_mutex_enter();
- mutex_enter(buf_page_get_mutex(bpage));
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- if (io_type == BUF_IO_WRITE || uncompressed) {
- /* For BUF_IO_READ of compressed-only blocks, the
- buffered operations will be merged by buf_page_get_gen()
- after the block has been uncompressed. */
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
- }
-#endif
- /* Because this thread which does the unlocking is not the same that
- did the locking, we use a pass value != 0 in unlock, which simply
- removes the newest lock debug record, without checking the thread
- id. */
-
- buf_page_set_io_fix(bpage, BUF_IO_NONE);
-
- switch (io_type) {
- case BUF_IO_READ:
- /* NOTE that the call to ibuf may have moved the ownership of
- the x-latch to this OS thread: do not let this confuse you in
- debugging! */
-
- ut_ad(buf_pool->n_pend_reads > 0);
- buf_pool->n_pend_reads--;
- buf_pool->stat.n_pages_read++;
-
- if (uncompressed) {
- rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
- BUF_IO_READ);
- }
-
- break;
-
- case BUF_IO_WRITE:
- /* Write means a flush operation: call the completion
- routine in the flush system */
-
- buf_flush_write_complete(bpage);
-
- if (uncompressed) {
- rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
- BUF_IO_WRITE);
- }
-
- buf_pool->stat.n_pages_written++;
-
- break;
-
- default:
- ut_error;
- }
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Has %s page space %lu page no %lu\n",
- io_type == BUF_IO_READ ? "read" : "written",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
- }
-#endif /* UNIV_DEBUG */
-
- mutex_exit(buf_page_get_mutex(bpage));
- buf_pool_mutex_exit();
-}
-
-/*********************************************************************//**
-Invalidates the file pages in the buffer pool when an archive recovery is
-completed. All the file pages buffered must be in a replaceable state when
-this function is called: not latched and not modified. */
-UNIV_INTERN
-void
-buf_pool_invalidate(void)
-/*=====================*/
-{
- ibool freed;
- enum buf_flush i;
-
- buf_pool_mutex_enter();
-
- for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
-
- /* As this function is called during startup and
- during redo application phase during recovery, InnoDB
- is single threaded (apart from IO helper threads) at
- this stage. No new write batch can be in intialization
- stage at this point. */
- ut_ad(buf_pool->init_flush[i] == FALSE);
-
- /* However, it is possible that a write batch that has
- been posted earlier is still not complete. For buffer
- pool invalidation to proceed we must ensure there is NO
- write activity happening. */
- if (buf_pool->n_flush[i] > 0) {
- buf_pool_mutex_exit();
- buf_flush_wait_batch_end(i);
- buf_pool_mutex_enter();
- }
- }
-
- buf_pool_mutex_exit();
-
- ut_ad(buf_all_freed());
-
- freed = TRUE;
-
- while (freed) {
- freed = buf_LRU_search_and_free_block(100);
- }
-
- buf_pool_mutex_enter();
-
- ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
- ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
-
- buf_pool->freed_page_clock = 0;
- buf_pool->LRU_old = NULL;
- buf_pool->LRU_old_len = 0;
- buf_pool->LRU_flush_ended = 0;
-
- memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
- buf_refresh_io_stats();
-
- buf_pool_mutex_exit();
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/*********************************************************************//**
-Validates the buffer buf_pool data structure.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_validate(void)
-/*==============*/
-{
- buf_page_t* b;
- buf_chunk_t* chunk;
- ulint i;
- ulint n_single_flush = 0;
- ulint n_lru_flush = 0;
- ulint n_list_flush = 0;
- ulint n_lru = 0;
- ulint n_flush = 0;
- ulint n_free = 0;
- ulint n_zip = 0;
-
- ut_ad(buf_pool);
-
- buf_pool_mutex_enter();
-
- chunk = buf_pool->chunks;
-
- /* Check the uncompressed blocks. */
-
- for (i = buf_pool->n_chunks; i--; chunk++) {
-
- ulint j;
- buf_block_t* block = chunk->blocks;
-
- for (j = chunk->size; j--; block++) {
-
- mutex_enter(&block->mutex);
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- /* These should only occur on
- zip_clean, zip_free[], or flush_list. */
- ut_error;
- break;
-
- case BUF_BLOCK_FILE_PAGE:
- ut_a(buf_page_hash_get(buf_block_get_space(
- block),
- buf_block_get_page_no(
- block))
- == &block->page);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(buf_page_get_io_fix(&block->page)
- == BUF_IO_READ
- || !ibuf_count_get(buf_block_get_space(
- block),
- buf_block_get_page_no(
- block)));
-#endif
- switch (buf_page_get_io_fix(&block->page)) {
- case BUF_IO_NONE:
- break;
-
- case BUF_IO_WRITE:
- switch (buf_page_get_flush_type(
- &block->page)) {
- case BUF_FLUSH_LRU:
- n_lru_flush++;
- ut_a(rw_lock_is_locked(
- &block->lock,
- RW_LOCK_SHARED));
- break;
- case BUF_FLUSH_LIST:
- n_list_flush++;
- break;
- case BUF_FLUSH_SINGLE_PAGE:
- n_single_flush++;
- break;
- default:
- ut_error;
- }
-
- break;
-
- case BUF_IO_READ:
-
- ut_a(rw_lock_is_locked(&block->lock,
- RW_LOCK_EX));
- break;
- }
-
- n_lru++;
-
- if (block->page.oldest_modification > 0) {
- n_flush++;
- }
-
- break;
-
- case BUF_BLOCK_NOT_USED:
- n_free++;
- break;
-
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- /* do nothing */
- break;
- }
-
- mutex_exit(&block->mutex);
- }
- }
-
- mutex_enter(&buf_pool_zip_mutex);
-
- /* Check clean compressed-only blocks. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- switch (buf_page_get_io_fix(b)) {
- case BUF_IO_NONE:
- /* All clean blocks should be I/O-unfixed. */
- break;
- case BUF_IO_READ:
- /* In buf_LRU_free_block(), we temporarily set
- b->io_fix = BUF_IO_READ for a newly allocated
- control block in order to prevent
- buf_page_get_gen() from decompressing the block. */
- break;
- default:
- ut_error;
- break;
- }
- ut_a(!b->oldest_modification);
- ut_a(buf_page_hash_get(b->space, b->offset) == b);
-
- n_lru++;
- n_zip++;
- }
-
- /* Check dirty compressed-only blocks. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_ad(b->in_flush_list);
-
- switch (buf_page_get_state(b)) {
- case BUF_BLOCK_ZIP_DIRTY:
- ut_a(b->oldest_modification);
- n_lru++;
- n_flush++;
- n_zip++;
- switch (buf_page_get_io_fix(b)) {
- case BUF_IO_NONE:
- case BUF_IO_READ:
- break;
-
- case BUF_IO_WRITE:
- switch (buf_page_get_flush_type(b)) {
- case BUF_FLUSH_LRU:
- n_lru_flush++;
- break;
- case BUF_FLUSH_LIST:
- n_list_flush++;
- break;
- case BUF_FLUSH_SINGLE_PAGE:
- n_single_flush++;
- break;
- default:
- ut_error;
- }
- break;
- }
- break;
- case BUF_BLOCK_FILE_PAGE:
- /* uncompressed page */
- break;
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
- ut_a(buf_page_hash_get(b->space, b->offset) == b);
- }
-
- mutex_exit(&buf_pool_zip_mutex);
-
- if (n_lru + n_free > buf_pool->curr_size + n_zip) {
- fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
- (ulong) n_lru, (ulong) n_free,
- (ulong) buf_pool->curr_size, (ulong) n_zip);
- ut_error;
- }
-
- ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
- if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
- fprintf(stderr, "Free list len %lu, free blocks %lu\n",
- (ulong) UT_LIST_GET_LEN(buf_pool->free),
- (ulong) n_free);
- ut_error;
- }
- ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
-
- ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
- ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
- ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
-
- buf_pool_mutex_exit();
-
- ut_a(buf_LRU_validate());
- ut_a(buf_flush_validate());
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/*********************************************************************//**
-Prints info of the buffer buf_pool data structure. */
-UNIV_INTERN
-void
-buf_print(void)
-/*===========*/
-{
- dulint* index_ids;
- ulint* counts;
- ulint size;
- ulint i;
- ulint j;
- dulint id;
- ulint n_found;
- buf_chunk_t* chunk;
- dict_index_t* index;
-
- ut_ad(buf_pool);
-
- size = buf_pool->curr_size;
-
- index_ids = mem_alloc(sizeof(dulint) * size);
- counts = mem_alloc(sizeof(ulint) * size);
-
- buf_pool_mutex_enter();
-
- fprintf(stderr,
- "buf_pool size %lu\n"
- "database pages %lu\n"
- "free pages %lu\n"
- "modified database pages %lu\n"
- "n pending decompressions %lu\n"
- "n pending reads %lu\n"
- "n pending flush LRU %lu list %lu single page %lu\n"
- "pages made young %lu, not young %lu\n"
- "pages read %lu, created %lu, written %lu\n",
- (ulong) size,
- (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
- (ulong) UT_LIST_GET_LEN(buf_pool->free),
- (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
- (ulong) buf_pool->n_pend_unzip,
- (ulong) buf_pool->n_pend_reads,
- (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
- (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
- (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
- (ulong) buf_pool->stat.n_pages_made_young,
- (ulong) buf_pool->stat.n_pages_not_made_young,
- (ulong) buf_pool->stat.n_pages_read,
- (ulong) buf_pool->stat.n_pages_created,
- (ulong) buf_pool->stat.n_pages_written);
-
- /* Count the number of blocks belonging to each index in the buffer */
-
- n_found = 0;
-
- chunk = buf_pool->chunks;
-
- for (i = buf_pool->n_chunks; i--; chunk++) {
- buf_block_t* block = chunk->blocks;
- ulint n_blocks = chunk->size;
-
- for (; n_blocks--; block++) {
- const buf_frame_t* frame = block->frame;
-
- if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
-
- id = btr_page_get_index_id(frame);
-
- /* Look for the id in the index_ids array */
- j = 0;
-
- while (j < n_found) {
-
- if (ut_dulint_cmp(index_ids[j],
- id) == 0) {
- counts[j]++;
-
- break;
- }
- j++;
- }
-
- if (j == n_found) {
- n_found++;
- index_ids[j] = id;
- counts[j] = 1;
- }
- }
- }
- }
-
- buf_pool_mutex_exit();
-
- for (i = 0; i < n_found; i++) {
- index = dict_index_get_if_in_cache(index_ids[i]);
-
- fprintf(stderr,
- "Block count for index %lu in buffer is about %lu",
- (ulong) ut_dulint_get_low(index_ids[i]),
- (ulong) counts[i]);
-
- if (index) {
- putc(' ', stderr);
- dict_index_name_print(stderr, NULL, index);
- }
-
- putc('\n', stderr);
- }
-
- mem_free(index_ids);
- mem_free(counts);
-
- ut_a(buf_validate());
-}
-#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Returns the number of latched pages in the buffer pool.
-@return number of latched pages */
-UNIV_INTERN
-ulint
-buf_get_latched_pages_number(void)
-/*==============================*/
-{
- buf_chunk_t* chunk;
- buf_page_t* b;
- ulint i;
- ulint fixed_pages_number = 0;
-
- buf_pool_mutex_enter();
-
- chunk = buf_pool->chunks;
-
- for (i = buf_pool->n_chunks; i--; chunk++) {
- buf_block_t* block;
- ulint j;
-
- block = chunk->blocks;
-
- for (j = chunk->size; j--; block++) {
- if (buf_block_get_state(block)
- != BUF_BLOCK_FILE_PAGE) {
-
- continue;
- }
-
- mutex_enter(&block->mutex);
-
- if (block->page.buf_fix_count != 0
- || buf_page_get_io_fix(&block->page)
- != BUF_IO_NONE) {
- fixed_pages_number++;
- }
-
- mutex_exit(&block->mutex);
- }
- }
-
- mutex_enter(&buf_pool_zip_mutex);
-
- /* Traverse the lists of clean and dirty compressed-only blocks. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
-
- if (b->buf_fix_count != 0
- || buf_page_get_io_fix(b) != BUF_IO_NONE) {
- fixed_pages_number++;
- }
- }
-
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_ad(b->in_flush_list);
-
- switch (buf_page_get_state(b)) {
- case BUF_BLOCK_ZIP_DIRTY:
- if (b->buf_fix_count != 0
- || buf_page_get_io_fix(b) != BUF_IO_NONE) {
- fixed_pages_number++;
- }
- break;
- case BUF_BLOCK_FILE_PAGE:
- /* uncompressed page */
- break;
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
- }
-
- mutex_exit(&buf_pool_zip_mutex);
- buf_pool_mutex_exit();
-
- return(fixed_pages_number);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Returns the number of pending buf pool ios.
-@return number of pending I/O operations */
-UNIV_INTERN
-ulint
-buf_get_n_pending_ios(void)
-/*=======================*/
-{
- return(buf_pool->n_pend_reads
- + buf_pool->n_flush[BUF_FLUSH_LRU]
- + buf_pool->n_flush[BUF_FLUSH_LIST]
- + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
-}
-
-/*********************************************************************//**
-Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool.
-@return modified page percentage ratio */
-UNIV_INTERN
-ulint
-buf_get_modified_ratio_pct(void)
-/*============================*/
-{
- ulint ratio;
-
- buf_pool_mutex_enter();
-
- ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
- / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
- + UT_LIST_GET_LEN(buf_pool->free));
-
- /* 1 + is there to avoid division by zero */
-
- buf_pool_mutex_exit();
-
- return(ratio);
-}
-
-/*********************************************************************//**
-Prints info of the buffer i/o. */
-UNIV_INTERN
-void
-buf_print_io(
-/*=========*/
- FILE* file) /*!< in/out: buffer where to print */
-{
- time_t current_time;
- double time_elapsed;
- ulint n_gets_diff;
-
- ut_ad(buf_pool);
-
- buf_pool_mutex_enter();
-
- fprintf(file,
- "Buffer pool size %lu\n"
- "Free buffers %lu\n"
- "Database pages %lu\n"
- "Old database pages %lu\n"
- "Modified db pages %lu\n"
- "Pending reads %lu\n"
- "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
- (ulong) buf_pool->curr_size,
- (ulong) UT_LIST_GET_LEN(buf_pool->free),
- (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
- (ulong) buf_pool->LRU_old_len,
- (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
- (ulong) buf_pool->n_pend_reads,
- (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
- + buf_pool->init_flush[BUF_FLUSH_LRU],
- (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
- + buf_pool->init_flush[BUF_FLUSH_LIST],
- (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
-
- current_time = time(NULL);
- time_elapsed = 0.001 + difftime(current_time,
- buf_pool->last_printout_time);
-
- fprintf(file,
- "Pages made young %lu, not young %lu\n"
- "%.2f youngs/s, %.2f non-youngs/s\n"
- "Pages read %lu, created %lu, written %lu\n"
- "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
- (ulong) buf_pool->stat.n_pages_made_young,
- (ulong) buf_pool->stat.n_pages_not_made_young,
- (buf_pool->stat.n_pages_made_young
- - buf_pool->old_stat.n_pages_made_young)
- / time_elapsed,
- (buf_pool->stat.n_pages_not_made_young
- - buf_pool->old_stat.n_pages_not_made_young)
- / time_elapsed,
- (ulong) buf_pool->stat.n_pages_read,
- (ulong) buf_pool->stat.n_pages_created,
- (ulong) buf_pool->stat.n_pages_written,
- (buf_pool->stat.n_pages_read
- - buf_pool->old_stat.n_pages_read)
- / time_elapsed,
- (buf_pool->stat.n_pages_created
- - buf_pool->old_stat.n_pages_created)
- / time_elapsed,
- (buf_pool->stat.n_pages_written
- - buf_pool->old_stat.n_pages_written)
- / time_elapsed);
-
- n_gets_diff = buf_pool->stat.n_page_gets - buf_pool->old_stat.n_page_gets;
-
- if (n_gets_diff) {
- fprintf(file,
- "Buffer pool hit rate %lu / 1000,"
- " young-making rate %lu / 1000 not %lu / 1000\n",
- (ulong)
- (1000 - ((1000 * (buf_pool->stat.n_pages_read
- - buf_pool->old_stat.n_pages_read))
- / (buf_pool->stat.n_page_gets
- - buf_pool->old_stat.n_page_gets))),
- (ulong)
- (1000 * (buf_pool->stat.n_pages_made_young
- - buf_pool->old_stat.n_pages_made_young)
- / n_gets_diff),
- (ulong)
- (1000 * (buf_pool->stat.n_pages_not_made_young
- - buf_pool->old_stat.n_pages_not_made_young)
- / n_gets_diff));
- } else {
- fputs("No buffer pool page gets since the last printout\n",
- file);
- }
-
- /* Statistics about read ahead algorithm */
- fprintf(file, "Pages read ahead %.2f/s,"
- " evicted without access %.2f/s\n",
- (buf_pool->stat.n_ra_pages_read
- - buf_pool->old_stat.n_ra_pages_read)
- / time_elapsed,
- (buf_pool->stat.n_ra_pages_evicted
- - buf_pool->old_stat.n_ra_pages_evicted)
- / time_elapsed);
-
- /* Print some values to help us with visualizing what is
- happening with LRU eviction. */
- fprintf(file,
- "LRU len: %lu, unzip_LRU len: %lu\n"
- "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
- UT_LIST_GET_LEN(buf_pool->LRU),
- UT_LIST_GET_LEN(buf_pool->unzip_LRU),
- buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
- buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
-
- buf_refresh_io_stats();
- buf_pool_mutex_exit();
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-buf_refresh_io_stats(void)
-/*======================*/
-{
- buf_pool->last_printout_time = time(NULL);
- buf_pool->old_stat = buf_pool->stat;
-}
-
-/*********************************************************************//**
-Asserts that all file pages in the buffer are in a replaceable state.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_all_freed(void)
-/*===============*/
-{
- buf_chunk_t* chunk;
- ulint i;
-
- ut_ad(buf_pool);
-
- buf_pool_mutex_enter();
-
- chunk = buf_pool->chunks;
-
- for (i = buf_pool->n_chunks; i--; chunk++) {
-
- const buf_block_t* block = buf_chunk_not_freed(chunk);
-
- if (UNIV_LIKELY_NULL(block)) {
- fprintf(stderr,
- "Page %lu %lu still fixed or dirty\n",
- (ulong) block->page.space,
- (ulong) block->page.offset);
- ut_error;
- }
- }
-
- buf_pool_mutex_exit();
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Checks that there currently are no pending i/o-operations for the buffer
-pool.
-@return TRUE if there is no pending i/o */
-UNIV_INTERN
-ibool
-buf_pool_check_no_pending_io(void)
-/*==============================*/
-{
- ibool ret;
-
- buf_pool_mutex_enter();
-
- if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
- + buf_pool->n_flush[BUF_FLUSH_LIST]
- + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
- ret = FALSE;
- } else {
- ret = TRUE;
- }
-
- buf_pool_mutex_exit();
-
- return(ret);
-}
-
-/*********************************************************************//**
-Gets the current length of the free list of buffer blocks.
-@return length of the free list */
-UNIV_INTERN
-ulint
-buf_get_free_list_len(void)
-/*=======================*/
-{
- ulint len;
-
- buf_pool_mutex_enter();
-
- len = UT_LIST_GET_LEN(buf_pool->free);
-
- buf_pool_mutex_exit();
-
- return(len);
-}
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
-UNIV_INTERN
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- buf_block_t* block) /*!< in: block to init */
-{
- block->page.state = BUF_BLOCK_FILE_PAGE;
- block->page.space = space;
- block->page.offset = offset;
-
- page_zip_des_init(&block->page.zip);
-
- /* We assume that block->page.data has been allocated
- with zip_size == UNIV_PAGE_SIZE. */
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
- ut_ad(ut_is_2pow(zip_size));
- page_zip_set_size(&block->page.zip, zip_size);
- if (zip_size) {
- block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/buf/buf0flu.c b/storage/innodb_plugin/buf/buf0flu.c
deleted file mode 100644
index 8b614ce90e5..00000000000
--- a/storage/innodb_plugin/buf/buf0flu.c
+++ /dev/null
@@ -1,1410 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0flu.c
-The database buffer buf_pool flush algorithm
-
-Created 11/11/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0flu.h"
-
-#ifdef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
-
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "page0zip.h"
-#ifndef UNIV_HOTBACKUP
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "page0page.h"
-#include "fil0fil.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-#include "os0file.h"
-#include "trx0sys.h"
-
-/**********************************************************************
-These statistics are generated for heuristics used in estimating the
-rate at which we should flush the dirty blocks to avoid bursty IO
-activity. Note that the rate of flushing not only depends on how many
-dirty pages we have in the buffer pool but it is also a fucntion of
-how much redo the workload is generating and at what rate. */
-/* @{ */
-
-/** Number of intervals for which we keep the history of these stats.
-Each interval is 1 second, defined by the rate at which
-srv_error_monitor_thread() calls buf_flush_stat_update(). */
-#define BUF_FLUSH_STAT_N_INTERVAL 20
-
-/** Sampled values buf_flush_stat_cur.
-Not protected by any mutex. Updated by buf_flush_stat_update(). */
-static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
-
-/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
-static ulint buf_flush_stat_arr_ind;
-
-/** Values at start of the current interval. Reset by
-buf_flush_stat_update(). */
-static buf_flush_stat_t buf_flush_stat_cur;
-
-/** Running sum of past values of buf_flush_stat_cur.
-Updated by buf_flush_stat_update(). Not protected by any mutex. */
-static buf_flush_stat_t buf_flush_stat_sum;
-
-/** Number of pages flushed through non flush_list flushes. */
-static ulint buf_lru_flush_page_count = 0;
-
-/* @} */
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/******************************************************************//**
-Validates the flush list.
-@return TRUE if ok */
-static
-ibool
-buf_flush_validate_low(void);
-/*========================*/
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-/********************************************************************//**
-Inserts a modified block into the flush list. */
-UNIV_INTERN
-void
-buf_flush_insert_into_flush_list(
-/*=============================*/
- buf_block_t* block) /*!< in/out: block which is modified */
-{
- ut_ad(buf_pool_mutex_own());
- ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
- || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
- <= block->page.oldest_modification));
-
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.in_LRU_list);
- ut_ad(block->page.in_page_hash);
- ut_ad(!block->page.in_zip_hash);
- ut_ad(!block->page.in_flush_list);
- ut_d(block->page.in_flush_list = TRUE);
- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low());
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-}
-
-/********************************************************************//**
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-UNIV_INTERN
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
- buf_block_t* block) /*!< in/out: block which is modified */
-{
- buf_page_t* prev_b;
- buf_page_t* b;
-
- ut_ad(buf_pool_mutex_own());
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- ut_ad(block->page.in_LRU_list);
- ut_ad(block->page.in_page_hash);
- ut_ad(!block->page.in_zip_hash);
- ut_ad(!block->page.in_flush_list);
- ut_d(block->page.in_flush_list = TRUE);
-
- prev_b = NULL;
- b = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- while (b && b->oldest_modification > block->page.oldest_modification) {
- ut_ad(b->in_flush_list);
- prev_b = b;
- b = UT_LIST_GET_NEXT(list, b);
- }
-
- if (prev_b == NULL) {
- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
- } else {
- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
- prev_b, &block->page);
- }
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low());
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-}
-
-/********************************************************************//**
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., the transition FILE_PAGE => NOT_USED allowed.
-@return TRUE if can replace immediately */
-UNIV_INTERN
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
- buf_page_t* bpage) /*!< in: buffer control block, must be
- buf_page_in_file(bpage) and in the LRU list */
-{
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(bpage->in_LRU_list);
-
- if (UNIV_LIKELY(buf_page_in_file(bpage))) {
-
- return(bpage->oldest_modification == 0
- && buf_page_get_io_fix(bpage) == BUF_IO_NONE
- && bpage->buf_fix_count == 0);
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: buffer block state %lu"
- " in the LRU list!\n",
- (ulong) buf_page_get_state(bpage));
- ut_print_buf(stderr, bpage, sizeof(buf_page_t));
- putc('\n', stderr);
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Returns TRUE if the block is modified and ready for flushing.
-@return TRUE if can flush immediately */
-UNIV_INLINE
-ibool
-buf_flush_ready_for_flush(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer control block, must be
- buf_page_in_file(bpage) */
- enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-{
- ut_a(buf_page_in_file(bpage));
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
-
- if (bpage->oldest_modification != 0
- && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
- ut_ad(bpage->in_flush_list);
-
- if (flush_type != BUF_FLUSH_LRU) {
-
- return(TRUE);
-
- } else if (bpage->buf_fix_count == 0) {
-
- /* If we are flushing the LRU list, to avoid deadlocks
- we require the block not to be bufferfixed, and hence
- not latched. */
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
-void
-buf_flush_remove(
-/*=============*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
-{
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(bpage->in_flush_list);
- ut_d(bpage->in_flush_list = FALSE);
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_PAGE:
- /* clean compressed pages should not be on the flush list */
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- return;
- case BUF_BLOCK_ZIP_DIRTY:
- buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
- buf_LRU_insert_zip_clean(bpage);
- break;
- case BUF_BLOCK_FILE_PAGE:
- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
- break;
- }
-
- bpage->oldest_modification = 0;
-
- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
- ut_ad(ut_list_node_313->in_flush_list)));
-}
-
-/********************************************************************//**
-Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
-void
-buf_flush_write_complete(
-/*=====================*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
-{
- enum buf_flush flush_type;
-
- ut_ad(bpage);
-
- buf_flush_remove(bpage);
-
- flush_type = buf_page_get_flush_type(bpage);
- buf_pool->n_flush[flush_type]--;
-
- if (flush_type == BUF_FLUSH_LRU) {
- /* Put the block to the end of the LRU list to wait to be
- moved to the free list */
-
- buf_LRU_make_block_old(bpage);
-
- buf_pool->LRU_flush_ended++;
- }
-
- /* fprintf(stderr, "n pending flush %lu\n",
- buf_pool->n_flush[flush_type]); */
-
- if ((buf_pool->n_flush[flush_type] == 0)
- && (buf_pool->init_flush[flush_type] == FALSE)) {
-
- /* The running flush batch has ended */
-
- os_event_set(buf_pool->no_flush[flush_type]);
- }
-}
-
-/********************************************************************//**
-Flush a batch of writes to the datafiles that have already been
-written by the OS. */
-static
-void
-buf_flush_sync_datafiles(void)
-/*==========================*/
-{
- /* Wake possible simulated aio thread to actually post the
- writes to the operating system */
- os_aio_simulated_wake_handler_threads();
-
- /* Wait that all async writes to tablespaces have been posted to
- the OS */
- os_aio_wait_until_no_pending_writes();
-
- /* Now we flush the data to disk (for example, with fsync) */
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- return;
-}
-
-/********************************************************************//**
-Flushes possible buffered writes from the doublewrite memory buffer to disk,
-and also wakes up the aio thread if simulated aio is used. It is very
-important to call this function after a batch of writes has been posted,
-and also when we may have to wait for a page latch! Otherwise a deadlock
-of threads can occur. */
-static
-void
-buf_flush_buffered_writes(void)
-/*===========================*/
-{
- byte* write_buf;
- ulint len;
- ulint len2;
- ulint i;
-
- if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
- /* Sync the writes to the disk. */
- buf_flush_sync_datafiles();
- return;
- }
-
- mutex_enter(&(trx_doublewrite->mutex));
-
- /* Write first to doublewrite buffer blocks. We use synchronous
- aio and thus know that file write has been completed when the
- control returns. */
-
- if (trx_doublewrite->first_free == 0) {
-
- mutex_exit(&(trx_doublewrite->mutex));
-
- return;
- }
-
- for (i = 0; i < trx_doublewrite->first_free; i++) {
-
- const buf_block_t* block;
-
- block = (buf_block_t*) trx_doublewrite->buf_block_arr[i];
-
- if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
- || block->page.zip.data) {
- /* No simple validate for compressed pages exists. */
- continue;
- }
-
- if (UNIV_UNLIKELY
- (memcmp(block->frame + (FIL_PAGE_LSN + 4),
- block->frame + (UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
- 4))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in the buffer pool\n"
- "InnoDB: before posting to the"
- " doublewrite buffer.\n");
- }
-
- if (!block->check_index_page_at_flush) {
- } else if (page_is_comp(block->frame)) {
- if (UNIV_UNLIKELY
- (!page_simple_validate_new(block->frame))) {
-corrupted_page:
- buf_page_print(block->frame, 0);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Apparent corruption of an"
- " index page n:o %lu in space %lu\n"
- "InnoDB: to be written to data file."
- " We intentionally crash server\n"
- "InnoDB: to prevent corrupt data"
- " from ending up in data\n"
- "InnoDB: files.\n",
- (ulong) buf_block_get_page_no(block),
- (ulong) buf_block_get_space(block));
-
- ut_error;
- }
- } else if (UNIV_UNLIKELY
- (!page_simple_validate_old(block->frame))) {
-
- goto corrupted_page;
- }
- }
-
- /* increment the doublewrite flushed pages counter */
- srv_dblwr_pages_written+= trx_doublewrite->first_free;
- srv_dblwr_writes++;
-
- len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
- trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
-
- write_buf = trx_doublewrite->write_buf;
- i = 0;
-
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
- trx_doublewrite->block1, 0, len,
- (void*) write_buf, NULL);
-
- for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
- len2 += UNIV_PAGE_SIZE, i++) {
- const buf_block_t* block = (buf_block_t*)
- trx_doublewrite->buf_block_arr[i];
-
- if (UNIV_LIKELY(!block->page.zip.data)
- && UNIV_LIKELY(buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE)
- && UNIV_UNLIKELY
- (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
- write_buf + len2
- + (UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in the doublewrite block1.\n");
- }
- }
-
- if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- goto flush;
- }
-
- len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
- * UNIV_PAGE_SIZE;
-
- write_buf = trx_doublewrite->write_buf
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
-
- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
- trx_doublewrite->block2, 0, len,
- (void*) write_buf, NULL);
-
- for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
- len2 += UNIV_PAGE_SIZE, i++) {
- const buf_block_t* block = (buf_block_t*)
- trx_doublewrite->buf_block_arr[i];
-
- if (UNIV_LIKELY(!block->page.zip.data)
- && UNIV_LIKELY(buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE)
- && UNIV_UNLIKELY
- (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
- write_buf + len2
- + (UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be"
- " written seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in"
- " the doublewrite block2.\n");
- }
- }
-
-flush:
- /* Now flush the doublewrite buffer data to disk */
-
- fil_flush(TRX_SYS_SPACE);
-
- /* We know that the writes have been flushed to disk now
- and in recovery we will find them in the doublewrite buffer
- blocks. Next do the writes to the intended positions. */
-
- for (i = 0; i < trx_doublewrite->first_free; i++) {
- const buf_block_t* block = (buf_block_t*)
- trx_doublewrite->buf_block_arr[i];
-
- ut_a(buf_page_in_file(&block->page));
- if (UNIV_LIKELY_NULL(block->page.zip.data)) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_page_get_space(&block->page),
- buf_page_get_zip_size(&block->page),
- buf_page_get_page_no(&block->page), 0,
- buf_page_get_zip_size(&block->page),
- (void*)block->page.zip.data,
- (void*)block);
-
- /* Increment the counter of I/O operations used
- for selecting LRU policy. */
- buf_LRU_stat_inc_io();
-
- continue;
- }
-
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
- block->frame
- + (UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
- 4))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in the buffer pool\n"
- "InnoDB: after posting and flushing"
- " the doublewrite buffer.\n"
- "InnoDB: Page buf fix count %lu,"
- " io fix %lu, state %lu\n",
- (ulong)block->page.buf_fix_count,
- (ulong)buf_block_get_io_fix(block),
- (ulong)buf_block_get_state(block));
- }
-
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_block_get_space(block), 0,
- buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
- (void*)block->frame, (void*)block);
-
- /* Increment the counter of I/O operations used
- for selecting LRU policy. */
- buf_LRU_stat_inc_io();
- }
-
- /* Sync the writes to the disk. */
- buf_flush_sync_datafiles();
-
- /* We can now reuse the doublewrite memory buffer: */
- trx_doublewrite->first_free = 0;
-
- mutex_exit(&(trx_doublewrite->mutex));
-}
-
-/********************************************************************//**
-Posts a buffer page for writing. If the doublewrite memory buffer is
-full, calls buf_flush_buffered_writes and waits for for free space to
-appear. */
-static
-void
-buf_flush_post_to_doublewrite_buf(
-/*==============================*/
- buf_page_t* bpage) /*!< in: buffer block to write */
-{
- ulint zip_size;
-try_again:
- mutex_enter(&(trx_doublewrite->mutex));
-
- ut_a(buf_page_in_file(bpage));
-
- if (trx_doublewrite->first_free
- >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- mutex_exit(&(trx_doublewrite->mutex));
-
- buf_flush_buffered_writes();
-
- goto try_again;
- }
-
- zip_size = buf_page_get_zip_size(bpage);
-
- if (UNIV_UNLIKELY(zip_size)) {
- /* Copy the compressed page and clear the rest. */
- memcpy(trx_doublewrite->write_buf
- + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
- bpage->zip.data, zip_size);
- memset(trx_doublewrite->write_buf
- + UNIV_PAGE_SIZE * trx_doublewrite->first_free
- + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
- } else {
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-
- memcpy(trx_doublewrite->write_buf
- + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
- ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
- }
-
- trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
-
- trx_doublewrite->first_free++;
-
- if (trx_doublewrite->first_free
- >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- mutex_exit(&(trx_doublewrite->mutex));
-
- buf_flush_buffered_writes();
-
- return;
- }
-
- mutex_exit(&(trx_doublewrite->mutex));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
-void
-buf_flush_init_for_writing(
-/*=======================*/
- byte* page, /*!< in/out: page */
- void* page_zip_, /*!< in/out: compressed page, or NULL */
- ib_uint64_t newest_lsn) /*!< in: newest modification lsn
- to the page */
-{
- ut_ad(page);
-
- if (page_zip_) {
- page_zip_des_t* page_zip = page_zip_;
- ulint zip_size = page_zip_get_size(page_zip);
- ut_ad(zip_size);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
-
- switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
- case FIL_PAGE_TYPE_ALLOCATED:
- case FIL_PAGE_INODE:
- case FIL_PAGE_IBUF_BITMAP:
- case FIL_PAGE_TYPE_FSP_HDR:
- case FIL_PAGE_TYPE_XDES:
- /* These are essentially uncompressed pages. */
- memcpy(page_zip->data, page, zip_size);
- /* fall through */
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- case FIL_PAGE_INDEX:
- mach_write_ull(page_zip->data
- + FIL_PAGE_LSN, newest_lsn);
- memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
- mach_write_to_4(page_zip->data
- + FIL_PAGE_SPACE_OR_CHKSUM,
- srv_use_checksums
- ? page_zip_calc_checksum(
- page_zip->data, zip_size)
- : BUF_NO_CHECKSUM_MAGIC);
- return;
- }
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ERROR: The compressed page to be written"
- " seems corrupt:", stderr);
- ut_print_buf(stderr, page, zip_size);
- fputs("\nInnoDB: Possibly older version of the page:", stderr);
- ut_print_buf(stderr, page_zip->data, zip_size);
- putc('\n', stderr);
- ut_error;
- }
-
- /* Write the newest modification lsn to the page header and trailer */
- mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
-
- mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- newest_lsn);
-
- /* Store the new formula checksum */
-
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
- srv_use_checksums
- ? buf_calc_page_new_checksum(page)
- : BUF_NO_CHECKSUM_MAGIC);
-
- /* We overwrite the first 4 bytes of the end lsn field to store
- the old formula checksum. Since it depends also on the field
- FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
- new formula checksum. */
-
- mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- srv_use_checksums
- ? buf_calc_page_old_checksum(page)
- : BUF_NO_CHECKSUM_MAGIC);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Does an asynchronous write of a buffer page. NOTE: in simulated aio and
-also when the doublewrite buffer is used, we must call
-buf_flush_buffered_writes after we have posted a batch of writes! */
-static
-void
-buf_flush_write_block_low(
-/*======================*/
- buf_page_t* bpage) /*!< in: buffer block to write */
-{
- ulint zip_size = buf_page_get_zip_size(bpage);
- page_t* frame = NULL;
-#ifdef UNIV_LOG_DEBUG
- static ibool univ_log_debug_warned;
-#endif /* UNIV_LOG_DEBUG */
-
- ut_ad(buf_page_in_file(bpage));
-
- /* We are not holding buf_pool_mutex or block_mutex here.
- Nevertheless, it is safe to access bpage, because it is
- io_fixed and oldest_modification != 0. Thus, it cannot be
- relocated in the buffer pool or removed from flush_list or
- LRU_list. */
- ut_ad(!buf_pool_mutex_own());
- ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
- ut_ad(bpage->oldest_modification != 0);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
-#endif
- ut_ad(bpage->newest_modification != 0);
-
-#ifdef UNIV_LOG_DEBUG
- if (!univ_log_debug_warned) {
- univ_log_debug_warned = TRUE;
- fputs("Warning: cannot force log to disk if"
- " UNIV_LOG_DEBUG is defined!\n"
- "Crash recovery will not work!\n",
- stderr);
- }
-#else
- /* Force the log to the disk before writing the modified block */
- log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
-#endif
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- case BUF_BLOCK_ZIP_DIRTY:
- frame = bpage->zip.data;
- if (UNIV_LIKELY(srv_use_checksums)) {
- ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
- == page_zip_calc_checksum(frame, zip_size));
- }
- mach_write_ull(frame + FIL_PAGE_LSN,
- bpage->newest_modification);
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
- break;
- case BUF_BLOCK_FILE_PAGE:
- frame = bpage->zip.data;
- if (!frame) {
- frame = ((buf_block_t*) bpage)->frame;
- }
-
- buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
- bpage->zip.data
- ? &bpage->zip : NULL,
- bpage->newest_modification);
- break;
- }
-
- if (!srv_use_doublewrite_buf || !trx_doublewrite) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_page_get_space(bpage), zip_size,
- buf_page_get_page_no(bpage), 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- frame, bpage);
- } else {
- buf_flush_post_to_doublewrite_buf(bpage);
- }
-}
-
-/********************************************************************//**
-Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: in simulated aio we must call
-os_aio_simulated_wake_handler_threads after we have posted a batch of
-writes! NOTE: buf_pool_mutex and buf_page_get_mutex(bpage) must be
-held upon entering this function, and they will be released by this
-function. */
-static
-void
-buf_flush_page(
-/*===========*/
- buf_page_t* bpage, /*!< in: buffer control block */
- enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-{
- mutex_t* block_mutex;
- ibool is_uncompressed;
-
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
- ut_ad(buf_pool_mutex_own());
- ut_ad(buf_page_in_file(bpage));
-
- block_mutex = buf_page_get_mutex(bpage);
- ut_ad(mutex_own(block_mutex));
-
- ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
-
- buf_page_set_io_fix(bpage, BUF_IO_WRITE);
-
- buf_page_set_flush_type(bpage, flush_type);
-
- if (buf_pool->n_flush[flush_type] == 0) {
-
- os_event_reset(buf_pool->no_flush[flush_type]);
- }
-
- buf_pool->n_flush[flush_type]++;
-
- is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
- ut_ad(is_uncompressed == (block_mutex != &buf_pool_zip_mutex));
-
- switch (flush_type) {
- ibool is_s_latched;
- case BUF_FLUSH_LIST:
- /* If the simulated aio thread is not running, we must
- not wait for any latch, as we may end up in a deadlock:
- if buf_fix_count == 0, then we know we need not wait */
-
- is_s_latched = (bpage->buf_fix_count == 0);
- if (is_s_latched && is_uncompressed) {
- rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
- BUF_IO_WRITE);
- }
-
- mutex_exit(block_mutex);
- buf_pool_mutex_exit();
-
- /* Even though bpage is not protected by any mutex at
- this point, it is safe to access bpage, because it is
- io_fixed and oldest_modification != 0. Thus, it
- cannot be relocated in the buffer pool or removed from
- flush_list or LRU_list. */
-
- if (!is_s_latched) {
- buf_flush_buffered_writes();
-
- if (is_uncompressed) {
- rw_lock_s_lock_gen(&((buf_block_t*) bpage)
- ->lock, BUF_IO_WRITE);
- }
- }
-
- break;
-
- case BUF_FLUSH_LRU:
- /* VERY IMPORTANT:
- Because any thread may call the LRU flush, even when owning
- locks on pages, to avoid deadlocks, we must make sure that the
- s-lock is acquired on the page without waiting: this is
- accomplished because buf_flush_ready_for_flush() must hold,
- and that requires the page not to be bufferfixed. */
-
- if (is_uncompressed) {
- rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
- BUF_IO_WRITE);
- }
-
- /* Note that the s-latch is acquired before releasing the
- buf_pool mutex: this ensures that the latch is acquired
- immediately. */
-
- mutex_exit(block_mutex);
- buf_pool_mutex_exit();
- break;
-
- default:
- ut_error;
- }
-
- /* Even though bpage is not protected by any mutex at this
- point, it is safe to access bpage, because it is io_fixed and
- oldest_modification != 0. Thus, it cannot be relocated in the
- buffer pool or removed from flush_list or LRU_list. */
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Flushing %u space %u page %u\n",
- flush_type, bpage->space, bpage->offset);
- }
-#endif /* UNIV_DEBUG */
- buf_flush_write_block_low(bpage);
-}
-
-/***********************************************************//**
-Flushes to disk all flushable pages within the flush area.
-@return number of pages flushed */
-static
-ulint
-buf_flush_try_neighbors(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset */
- enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST */
-{
- buf_page_t* bpage;
- ulint low, high;
- ulint count = 0;
- ulint i;
-
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
- /* If there is little space, it is better not to flush any
- block except from the end of the LRU list */
-
- low = offset;
- high = offset + 1;
- } else {
- /* When flushed, dirty blocks are searched in neighborhoods of
- this size, and flushed along with the original page. */
-
- ulint buf_flush_area = ut_min(BUF_READ_AHEAD_AREA,
- buf_pool->curr_size / 16);
-
- low = (offset / buf_flush_area) * buf_flush_area;
- high = (offset / buf_flush_area + 1) * buf_flush_area;
- }
-
- /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
-
- if (high > fil_space_get_size(space)) {
- high = fil_space_get_size(space);
- }
-
- buf_pool_mutex_enter();
-
- for (i = low; i < high; i++) {
-
- bpage = buf_page_hash_get(space, i);
-
- if (!bpage) {
-
- continue;
- }
-
- ut_a(buf_page_in_file(bpage));
-
- /* We avoid flushing 'non-old' blocks in an LRU flush,
- because the flushed blocks are soon freed */
-
- if (flush_type != BUF_FLUSH_LRU
- || i == offset
- || buf_page_is_old(bpage)) {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- if (buf_flush_ready_for_flush(bpage, flush_type)
- && (i == offset || !bpage->buf_fix_count)) {
- /* We only try to flush those
- neighbors != offset where the buf fix count is
- zero, as we then know that we probably can
- latch the page without a semaphore wait.
- Semaphore waits are expensive because we must
- flush the doublewrite buffer before we start
- waiting. */
-
- buf_flush_page(bpage, flush_type);
- ut_ad(!mutex_own(block_mutex));
- count++;
-
- buf_pool_mutex_enter();
- } else {
- mutex_exit(block_mutex);
- }
- }
- }
-
- buf_pool_mutex_exit();
-
- return(count);
-}
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
-UNIV_INTERN
-ulint
-buf_flush_batch(
-/*============*/
- enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
- then the caller must not own any
- latches on pages */
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
-{
- buf_page_t* bpage;
- ulint page_count = 0;
- ulint old_page_count;
- ulint space;
- ulint offset;
-
- ut_ad((flush_type == BUF_FLUSH_LRU)
- || (flush_type == BUF_FLUSH_LIST));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((flush_type != BUF_FLUSH_LIST)
- || sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
- buf_pool_mutex_enter();
-
- if ((buf_pool->n_flush[flush_type] > 0)
- || (buf_pool->init_flush[flush_type] == TRUE)) {
-
- /* There is already a flush batch of the same type running */
-
- buf_pool_mutex_exit();
-
- return(ULINT_UNDEFINED);
- }
-
- buf_pool->init_flush[flush_type] = TRUE;
-
- for (;;) {
-flush_next:
- /* If we have flushed enough, leave the loop */
- if (page_count >= min_n) {
-
- break;
- }
-
- /* Start from the end of the list looking for a suitable
- block to be flushed. */
-
- if (flush_type == BUF_FLUSH_LRU) {
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- } else {
- ut_ad(flush_type == BUF_FLUSH_LIST);
-
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
- if (!bpage
- || bpage->oldest_modification >= lsn_limit) {
- /* We have flushed enough */
-
- break;
- }
- ut_ad(bpage->in_flush_list);
- }
-
- /* Note that after finding a single flushable page, we try to
- flush also all its neighbors, and after that start from the
- END of the LRU list or flush list again: the list may change
- during the flushing and we cannot safely preserve within this
- function a pointer to a block in the list! */
-
- do {
- mutex_t*block_mutex = buf_page_get_mutex(bpage);
- ibool ready;
-
- ut_a(buf_page_in_file(bpage));
-
- mutex_enter(block_mutex);
- ready = buf_flush_ready_for_flush(bpage, flush_type);
- mutex_exit(block_mutex);
-
- if (ready) {
- space = buf_page_get_space(bpage);
- offset = buf_page_get_page_no(bpage);
-
- buf_pool_mutex_exit();
-
- old_page_count = page_count;
-
- /* Try to flush also all the neighbors */
- page_count += buf_flush_try_neighbors(
- space, offset, flush_type);
- /* fprintf(stderr,
- "Flush type %lu, page no %lu, neighb %lu\n",
- flush_type, offset,
- page_count - old_page_count); */
-
- buf_pool_mutex_enter();
- goto flush_next;
-
- } else if (flush_type == BUF_FLUSH_LRU) {
- bpage = UT_LIST_GET_PREV(LRU, bpage);
- } else {
- ut_ad(flush_type == BUF_FLUSH_LIST);
-
- bpage = UT_LIST_GET_PREV(list, bpage);
- ut_ad(!bpage || bpage->in_flush_list);
- }
- } while (bpage != NULL);
-
- /* If we could not find anything to flush, leave the loop */
-
- break;
- }
-
- buf_pool->init_flush[flush_type] = FALSE;
-
- if (buf_pool->n_flush[flush_type] == 0) {
-
- /* The running flush batch has ended */
-
- os_event_set(buf_pool->no_flush[flush_type]);
- }
-
- buf_pool_mutex_exit();
-
- buf_flush_buffered_writes();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && page_count > 0) {
- ut_a(flush_type == BUF_FLUSH_LRU
- || flush_type == BUF_FLUSH_LIST);
- fprintf(stderr, flush_type == BUF_FLUSH_LRU
- ? "Flushed %lu pages in LRU flush\n"
- : "Flushed %lu pages in flush list flush\n",
- (ulong) page_count);
- }
-#endif /* UNIV_DEBUG */
-
- srv_buf_pool_flushed += page_count;
-
- /* We keep track of all flushes happening as part of LRU
- flush. When estimating the desired rate at which flush_list
- should be flushed we factor in this value. */
- if (flush_type == BUF_FLUSH_LRU) {
- buf_lru_flush_page_count += page_count;
- }
-
- return(page_count);
-}
-
-/******************************************************************//**
-Waits until a flush batch of the given type ends */
-UNIV_INTERN
-void
-buf_flush_wait_batch_end(
-/*=====================*/
- enum buf_flush type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-{
- ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
-
- os_event_wait(buf_pool->no_flush[type]);
-}
-
-/******************************************************************//**
-Gives a recommendation of how many blocks should be flushed to establish
-a big enough margin of replaceable blocks near the end of the LRU list
-and in the free list.
-@return number of blocks which should be flushed from the end of the
-LRU list */
-static
-ulint
-buf_flush_LRU_recommendation(void)
-/*==============================*/
-{
- buf_page_t* bpage;
- ulint n_replaceable;
- ulint distance = 0;
-
- buf_pool_mutex_enter();
-
- n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
-
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while ((bpage != NULL)
- && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
- + BUF_FLUSH_EXTRA_MARGIN)
- && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
-
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- if (buf_flush_ready_for_replace(bpage)) {
- n_replaceable++;
- }
-
- mutex_exit(block_mutex);
-
- distance++;
-
- bpage = UT_LIST_GET_PREV(LRU, bpage);
- }
-
- buf_pool_mutex_exit();
-
- if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
-
- return(0);
- }
-
- return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
- - n_replaceable);
-}
-
-/*********************************************************************//**
-Flushes pages from the end of the LRU list if there is too small a margin
-of replaceable pages there or in the free list. VERY IMPORTANT: this function
-is called also by threads which have locks on pages. To avoid deadlocks, we
-flush only pages such that the s-lock required for flushing can be acquired
-immediately, without waiting. */
-UNIV_INTERN
-void
-buf_flush_free_margin(void)
-/*=======================*/
-{
- ulint n_to_flush;
- ulint n_flushed;
-
- n_to_flush = buf_flush_LRU_recommendation();
-
- if (n_to_flush > 0) {
- n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
- if (n_flushed == ULINT_UNDEFINED) {
- /* There was an LRU type flush batch already running;
- let us wait for it to end */
-
- buf_flush_wait_batch_end(BUF_FLUSH_LRU);
- }
- }
-}
-
-/*********************************************************************
-Update the historical stats that we are collecting for flush rate
-heuristics at the end of each interval.
-Flush rate heuristic depends on (a) rate of redo log generation and
-(b) the rate at which LRU flush is happening. */
-UNIV_INTERN
-void
-buf_flush_stat_update(void)
-/*=======================*/
-{
- buf_flush_stat_t* item;
- ib_uint64_t lsn_diff;
- ib_uint64_t lsn;
- ulint n_flushed;
-
- lsn = log_get_lsn();
- if (buf_flush_stat_cur.redo == 0) {
- /* First time around. Just update the current LSN
- and return. */
- buf_flush_stat_cur.redo = lsn;
- return;
- }
-
- item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
-
- /* values for this interval */
- lsn_diff = lsn - buf_flush_stat_cur.redo;
- n_flushed = buf_lru_flush_page_count
- - buf_flush_stat_cur.n_flushed;
-
- /* add the current value and subtract the obsolete entry. */
- buf_flush_stat_sum.redo += lsn_diff - item->redo;
- buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
-
- /* put current entry in the array. */
- item->redo = lsn_diff;
- item->n_flushed = n_flushed;
-
- /* update the index */
- buf_flush_stat_arr_ind++;
- buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
-
- /* reset the current entry. */
- buf_flush_stat_cur.redo = lsn;
- buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
-}
-
-/*********************************************************************
-Determines the fraction of dirty pages that need to be flushed based
-on the speed at which we generate redo log. Note that if redo log
-is generated at a significant rate without corresponding increase
-in the number of dirty pages (for example, an in-memory workload)
-it can cause IO bursts of flushing. This function implements heuristics
-to avoid this burstiness.
-@return number of dirty pages to be flushed / second */
-UNIV_INTERN
-ulint
-buf_flush_get_desired_flush_rate(void)
-/*==================================*/
-{
- ulint redo_avg;
- ulint lru_flush_avg;
- ulint n_dirty;
- ulint n_flush_req;
- lint rate;
- ib_uint64_t lsn = log_get_lsn();
- ulint log_capacity = log_get_capacity();
-
- /* log_capacity should never be zero after the initialization
- of log subsystem. */
- ut_ad(log_capacity != 0);
-
- /* Get total number of dirty pages. It is OK to access
- flush_list without holding any mtex as we are using this
- only for heuristics. */
- n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list);
-
- /* An overflow can happen if we generate more than 2^32 bytes
- of redo in this interval i.e.: 4G of redo in 1 second. We can
- safely consider this as infinity because if we ever come close
- to 4G we'll start a synchronous flush of dirty pages. */
- /* redo_avg below is average at which redo is generated in
- past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
- interval. */
- redo_avg = (ulint) (buf_flush_stat_sum.redo
- / BUF_FLUSH_STAT_N_INTERVAL
- + (lsn - buf_flush_stat_cur.redo));
-
- /* An overflow can happen possibly if we flush more than 2^32
- pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
- unlikely scenario. Even when this happens it means that our
- flush rate will be off the mark. It won't affect correctness
- of any subsystem. */
- /* lru_flush_avg below is rate at which pages are flushed as
- part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
- number of pages flushed in the current interval. */
- lru_flush_avg = buf_flush_stat_sum.n_flushed
- / BUF_FLUSH_STAT_N_INTERVAL
- + (buf_lru_flush_page_count
- - buf_flush_stat_cur.n_flushed);
-
- n_flush_req = (n_dirty * redo_avg) / log_capacity;
-
- /* The number of pages that we want to flush from the flush
- list is the difference between the required rate and the
- number of pages that we are historically flushing from the
- LRU list */
- rate = n_flush_req - lru_flush_avg;
- return(rate > 0 ? (ulint) rate : 0);
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/******************************************************************//**
-Validates the flush list.
-@return TRUE if ok */
-static
-ibool
-buf_flush_validate_low(void)
-/*========================*/
-{
- buf_page_t* bpage;
-
- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
- ut_ad(ut_list_node_313->in_flush_list));
-
- bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- while (bpage != NULL) {
- const ib_uint64_t om = bpage->oldest_modification;
- ut_ad(bpage->in_flush_list);
- ut_a(buf_page_in_file(bpage));
- ut_a(om > 0);
-
- bpage = UT_LIST_GET_NEXT(list, bpage);
-
- ut_a(!bpage || om >= bpage->oldest_modification);
- }
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Validates the flush list.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-buf_flush_validate(void)
-/*====================*/
-{
- ibool ret;
-
- buf_pool_mutex_enter();
-
- ret = buf_flush_validate_low();
-
- buf_pool_mutex_exit();
-
- return(ret);
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c
deleted file mode 100644
index 4f19fd13fa5..00000000000
--- a/storage/innodb_plugin/buf/buf0lru.c
+++ /dev/null
@@ -1,2092 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0lru.c
-The database buffer replacement algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0lru.h"
-
-#ifdef UNIV_NONINL
-#include "buf0lru.ic"
-#endif
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "ut0rnd.h"
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "hash0hash.h"
-#include "os0sync.h"
-#include "fil0fil.h"
-#include "btr0btr.h"
-#include "buf0buddy.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0rea.h"
-#include "btr0sea.h"
-#include "ibuf0ibuf.h"
-#include "os0file.h"
-#include "page0zip.h"
-#include "log0recv.h"
-#include "srv0srv.h"
-
-/** The number of blocks from the LRU_old pointer onward, including
-the block pointed to, must be buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
-of the whole LRU list length, except that the tolerance defined below
-is allowed. Note that the tolerance must be small enough such that for
-even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
-allowed to point to either end of the LRU list. */
-
-#define BUF_LRU_OLD_TOLERANCE 20
-
-/** The minimum amount of non-old blocks when the LRU_old list exists
-(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
-@see buf_LRU_old_adjust_len */
-#define BUF_LRU_NON_OLD_MIN_LEN 5
-#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
-# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
-#endif
-
-/** When dropping the search hash index entries before deleting an ibd
-file, we build a local array of pages belonging to that tablespace
-in the buffer pool. Following is the size of that array. */
-#define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024
-
-/** If we switch on the InnoDB monitor because there are too few available
-frames in the buffer pool, we set this to TRUE */
-static ibool buf_lru_switched_on_innodb_mon = FALSE;
-
-/******************************************************************//**
-These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
-and page_zip_decompress() operations. Based on the statistics,
-buf_LRU_evict_from_unzip_LRU() decides if we want to evict from
-unzip_LRU or the regular LRU. From unzip_LRU, we will only evict the
-uncompressed frame (meaning we can evict dirty blocks as well). From
-the regular LRU, we will evict the entire block (i.e.: both the
-uncompressed and compressed data), which must be clean. */
-
-/* @{ */
-
-/** Number of intervals for which we keep the history of these stats.
-Each interval is 1 second, defined by the rate at which
-srv_error_monitor_thread() calls buf_LRU_stat_update(). */
-#define BUF_LRU_STAT_N_INTERVAL 50
-
-/** Co-efficient with which we multiply I/O operations to equate them
-with page_zip_decompress() operations. */
-#define BUF_LRU_IO_TO_UNZIP_FACTOR 50
-
-/** Sampled values buf_LRU_stat_cur.
-Protected by buf_pool_mutex. Updated by buf_LRU_stat_update(). */
-static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
-/** Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */
-static ulint buf_LRU_stat_arr_ind;
-
-/** Current operation counters. Not protected by any mutex. Cleared
-by buf_LRU_stat_update(). */
-UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_cur;
-
-/** Running sum of past values of buf_LRU_stat_cur.
-Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
-UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum;
-
-/* @} */
-
-/** @name Heuristics for detecting index scan @{ */
-/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
-"old" blocks. Protected by buf_pool_mutex. */
-UNIV_INTERN uint buf_LRU_old_ratio;
-/** Move blocks to "new" LRU list only if the first access was at
-least this many milliseconds ago. Not protected by any mutex or latch. */
-UNIV_INTERN uint buf_LRU_old_threshold_ms;
-/* @} */
-
-/******************************************************************//**
-Takes a block out of the LRU list and page hash table.
-If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
-the object will be freed and buf_pool_zip_mutex will be released.
-
-If a compressed page or a compressed-only block descriptor is freed,
-other compressed pages or compressed-only block descriptors may be
-relocated.
-@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state
-was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */
-static
-enum buf_page_state
-buf_LRU_block_remove_hashed_page(
-/*=============================*/
- buf_page_t* bpage, /*!< in: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
- ibool zip); /*!< in: TRUE if should remove also the
- compressed page of an uncompressed page */
-/******************************************************************//**
-Puts a file page whose has no hash index to the free list. */
-static
-void
-buf_LRU_block_free_hashed_page(
-/*===========================*/
- buf_block_t* block); /*!< in: block, must contain a file page and
- be in a state where it can be freed */
-
-/******************************************************************//**
-Determines if the unzip_LRU list should be used for evicting a victim
-instead of the general LRU list.
-@return TRUE if should use unzip_LRU */
-UNIV_INLINE
-ibool
-buf_LRU_evict_from_unzip_LRU(void)
-/*==============================*/
-{
- ulint io_avg;
- ulint unzip_avg;
-
- ut_ad(buf_pool_mutex_own());
-
- /* If the unzip_LRU list is empty, we can only use the LRU. */
- if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
- return(FALSE);
- }
-
- /* If unzip_LRU is at most 10% of the size of the LRU list,
- then use the LRU. This slack allows us to keep hot
- decompressed pages in the buffer pool. */
- if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
- <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
- return(FALSE);
- }
-
- /* If eviction hasn't started yet, we assume by default
- that a workload is disk bound. */
- if (buf_pool->freed_page_clock == 0) {
- return(TRUE);
- }
-
- /* Calculate the average over past intervals, and add the values
- of the current interval. */
- io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
- + buf_LRU_stat_cur.io;
- unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
- + buf_LRU_stat_cur.unzip;
-
- /* Decide based on our formula. If the load is I/O bound
- (unzip_avg is smaller than the weighted io_avg), evict an
- uncompressed frame from unzip_LRU. Otherwise we assume that
- the load is CPU bound and evict from the regular LRU. */
- return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
-}
-
-/******************************************************************//**
-Attempts to drop page hash index on a batch of pages belonging to a
-particular space id. */
-static
-void
-buf_LRU_drop_page_hash_batch(
-/*=========================*/
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- const ulint* arr, /*!< in: array of page_no */
- ulint count) /*!< in: number of entries in array */
-{
- ulint i;
-
- ut_ad(arr != NULL);
- ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
-
- for (i = 0; i < count; ++i) {
- btr_search_drop_page_hash_when_freed(space_id, zip_size,
- arr[i]);
- }
-}
-
-/******************************************************************//**
-When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
-hash index entries belonging to that table. This function tries to
-do that in batch. Note that this is a 'best effort' attempt and does
-not guarantee that ALL hash entries will be removed. */
-static
-void
-buf_LRU_drop_page_hash_for_tablespace(
-/*==================================*/
- ulint id) /*!< in: space id */
-{
- buf_page_t* bpage;
- ulint* page_arr;
- ulint num_entries;
- ulint zip_size;
-
- zip_size = fil_space_get_zip_size(id);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* Somehow, the tablespace does not exist. Nothing to drop. */
- ut_ad(0);
- return;
- }
-
- page_arr = ut_malloc(sizeof(ulint)
- * BUF_LRU_DROP_SEARCH_HASH_SIZE);
- buf_pool_mutex_enter();
-
-scan_again:
- num_entries = 0;
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (bpage != NULL) {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
- buf_page_t* prev_bpage;
-
- mutex_enter(block_mutex);
- prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
- ut_a(buf_page_in_file(bpage));
-
- if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
- || bpage->space != id
- || bpage->buf_fix_count > 0
- || bpage->io_fix != BUF_IO_NONE) {
- /* We leave the fixed pages as is in this scan.
- To be dealt with later in the final scan. */
- mutex_exit(block_mutex);
- goto next_page;
- }
-
- if (((buf_block_t*) bpage)->is_hashed) {
-
- /* Store the offset(i.e.: page_no) in the array
- so that we can drop hash index in a batch
- later. */
- page_arr[num_entries] = bpage->offset;
- mutex_exit(block_mutex);
- ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
- ++num_entries;
-
- if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
- goto next_page;
- }
- /* Array full. We release the buf_pool_mutex to
- obey the latching order. */
- buf_pool_mutex_exit();
-
- buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
- num_entries);
- num_entries = 0;
- buf_pool_mutex_enter();
- } else {
- mutex_exit(block_mutex);
- }
-
-next_page:
- /* Note that we may have released the buf_pool mutex
- above after reading the prev_bpage during processing
- of a page_hash_batch (i.e.: when the array was full).
- This means that prev_bpage can change in LRU list.
- This is OK because this function is a 'best effort'
- to drop as many search hash entries as possible and
- it does not guarantee that ALL such entries will be
- dropped. */
- bpage = prev_bpage;
-
- /* If, however, bpage has been removed from LRU list
- to the free list then we should restart the scan.
- bpage->state is protected by buf_pool mutex. */
- if (bpage && !buf_page_in_file(bpage)) {
- ut_a(num_entries == 0);
- goto scan_again;
- }
- }
-
- buf_pool_mutex_exit();
-
- /* Drop any remaining batch of search hashed pages. */
- buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
- ut_free(page_arr);
-}
-
-/******************************************************************//**
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. */
-UNIV_INTERN
-void
-buf_LRU_invalidate_tablespace(
-/*==========================*/
- ulint id) /*!< in: space id */
-{
- buf_page_t* bpage;
- ibool all_freed;
-
- /* Before we attempt to drop pages one by one we first
- attempt to drop page hash index entries in batches to make
- it more efficient. The batching attempt is a best effort
- attempt and does not guarantee that all pages hash entries
- will be dropped. We get rid of remaining page hash entries
- one by one below. */
- buf_LRU_drop_page_hash_for_tablespace(id);
-
-scan_again:
- buf_pool_mutex_enter();
-
- all_freed = TRUE;
-
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (bpage != NULL) {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
- buf_page_t* prev_bpage;
-
- ut_a(buf_page_in_file(bpage));
-
- mutex_enter(block_mutex);
- prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
- if (buf_page_get_space(bpage) == id) {
- if (bpage->buf_fix_count > 0
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
-
- /* We cannot remove this page during
- this scan yet; maybe the system is
- currently reading it in, or flushing
- the modifications to the file */
-
- all_freed = FALSE;
-
- goto next_page;
- }
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Dropping space %lu page %lu\n",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
- }
-#endif
- if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE
- && ((buf_block_t*) bpage)->is_hashed) {
- ulint page_no;
- ulint zip_size;
-
- buf_pool_mutex_exit();
-
- zip_size = buf_page_get_zip_size(bpage);
- page_no = buf_page_get_page_no(bpage);
-
- mutex_exit(block_mutex);
-
- /* Note that the following call will acquire
- an S-latch on the page */
-
- btr_search_drop_page_hash_when_freed(
- id, zip_size, page_no);
- goto scan_again;
- }
-
- if (bpage->oldest_modification != 0) {
-
- buf_flush_remove(bpage);
- }
-
- /* Remove from the LRU list */
- if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
- != BUF_BLOCK_ZIP_FREE) {
- buf_LRU_block_free_hashed_page((buf_block_t*)
- bpage);
- } else {
- /* The block_mutex should have been
- released by buf_LRU_block_remove_hashed_page()
- when it returns BUF_BLOCK_ZIP_FREE. */
- ut_ad(block_mutex == &buf_pool_zip_mutex);
- ut_ad(!mutex_own(block_mutex));
-
- /* The compressed block descriptor
- (bpage) has been deallocated and
- block_mutex released. Also,
- buf_buddy_free() may have relocated
- prev_bpage. Rescan the LRU list. */
-
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- continue;
- }
- }
-next_page:
- mutex_exit(block_mutex);
- bpage = prev_bpage;
- }
-
- buf_pool_mutex_exit();
-
- if (!all_freed) {
- os_thread_sleep(20000);
-
- goto scan_again;
- }
-}
-
-/********************************************************************//**
-Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
-void
-buf_LRU_insert_zip_clean(
-/*=====================*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
-{
- buf_page_t* b;
-
- ut_ad(buf_pool_mutex_own());
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
-
- /* Find the first successor of bpage in the LRU list
- that is in the zip_clean list. */
- b = bpage;
- do {
- b = UT_LIST_GET_NEXT(LRU, b);
- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
-
- /* Insert bpage before b, i.e., after the predecessor of b. */
- if (b) {
- b = UT_LIST_GET_PREV(list, b);
- }
-
- if (b) {
- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
- } else {
- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
- }
-}
-
-/******************************************************************//**
-Try to free an uncompressed page of a compressed block from the unzip
-LRU list. The compressed page is preserved, and it need not be clean.
-@return TRUE if freed */
-UNIV_INLINE
-ibool
-buf_LRU_free_from_unzip_LRU_list(
-/*=============================*/
- ulint n_iterations) /*!< in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; we will search
- n_iterations / 5 of the unzip_LRU list,
- or nothing if n_iterations >= 5 */
-{
- buf_block_t* block;
- ulint distance;
-
- ut_ad(buf_pool_mutex_own());
-
- /* Theoratically it should be much easier to find a victim
- from unzip_LRU as we can choose even a dirty block (as we'll
- be evicting only the uncompressed frame). In a very unlikely
- eventuality that we are unable to find a victim from
- unzip_LRU, we fall back to the regular LRU list. We do this
- if we have done five iterations so far. */
-
- if (UNIV_UNLIKELY(n_iterations >= 5)
- || !buf_LRU_evict_from_unzip_LRU()) {
-
- return(FALSE);
- }
-
- distance = 100 + (n_iterations
- * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
-
- for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
- UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
- block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
-
- enum buf_lru_free_block_status freed;
-
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->in_unzip_LRU_list);
- ut_ad(block->page.in_LRU_list);
-
- mutex_enter(&block->mutex);
- freed = buf_LRU_free_block(&block->page, FALSE, NULL);
- mutex_exit(&block->mutex);
-
- switch (freed) {
- case BUF_LRU_FREED:
- return(TRUE);
-
- case BUF_LRU_CANNOT_RELOCATE:
- /* If we failed to relocate, try
- regular LRU eviction. */
- return(FALSE);
-
- case BUF_LRU_NOT_FREED:
- /* The block was buffer-fixed or I/O-fixed.
- Keep looking. */
- continue;
- }
-
- /* inappropriate return value from
- buf_LRU_free_block() */
- ut_error;
- }
-
- return(FALSE);
-}
-
-/******************************************************************//**
-Try to free a clean page from the common LRU list.
-@return TRUE if freed */
-UNIV_INLINE
-ibool
-buf_LRU_free_from_common_LRU_list(
-/*==============================*/
- ulint n_iterations) /*!< in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; if
- n_iterations < 10, then we search
- n_iterations / 10 * buf_pool->curr_size
- pages from the end of the LRU list */
-{
- buf_page_t* bpage;
- ulint distance;
-
- ut_ad(buf_pool_mutex_own());
-
- distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
-
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
- bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
-
- enum buf_lru_free_block_status freed;
- unsigned accessed;
- mutex_t* block_mutex
- = buf_page_get_mutex(bpage);
-
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
-
- mutex_enter(block_mutex);
- accessed = buf_page_is_accessed(bpage);
- freed = buf_LRU_free_block(bpage, TRUE, NULL);
- mutex_exit(block_mutex);
-
- switch (freed) {
- case BUF_LRU_FREED:
- /* Keep track of pages that are evicted without
- ever being accessed. This gives us a measure of
- the effectiveness of readahead */
- if (!accessed) {
- ++buf_pool->stat.n_ra_pages_evicted;
- }
- return(TRUE);
-
- case BUF_LRU_NOT_FREED:
- /* The block was dirty, buffer-fixed, or I/O-fixed.
- Keep looking. */
- continue;
-
- case BUF_LRU_CANNOT_RELOCATE:
- /* This should never occur, because we
- want to discard the compressed page too. */
- break;
- }
-
- /* inappropriate return value from
- buf_LRU_free_block() */
- ut_error;
- }
-
- return(FALSE);
-}
-
-/******************************************************************//**
-Try to free a replaceable block.
-@return TRUE if found and freed */
-UNIV_INTERN
-ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
- ulint n_iterations) /*!< in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; if
- n_iterations < 10, then we search
- n_iterations / 10 * buf_pool->curr_size
- pages from the end of the LRU list; if
- n_iterations < 5, then we will also search
- n_iterations / 5 of the unzip_LRU list. */
-{
- ibool freed = FALSE;
-
- buf_pool_mutex_enter();
-
- freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
-
- if (!freed) {
- freed = buf_LRU_free_from_common_LRU_list(n_iterations);
- }
-
- if (!freed) {
- buf_pool->LRU_flush_ended = 0;
- } else if (buf_pool->LRU_flush_ended > 0) {
- buf_pool->LRU_flush_ended--;
- }
-
- buf_pool_mutex_exit();
-
- return(freed);
-}
-
-/******************************************************************//**
-Tries to remove LRU flushed blocks from the end of the LRU list and put them
-to the free list. This is beneficial for the efficiency of the insert buffer
-operation, as flushed pages from non-unique non-clustered indexes are here
-taken out of the buffer pool, and their inserts redirected to the insert
-buffer. Otherwise, the flushed blocks could get modified again before read
-operations need new buffer blocks, and the i/o work done in flushing would be
-wasted. */
-UNIV_INTERN
-void
-buf_LRU_try_free_flushed_blocks(void)
-/*=================================*/
-{
- buf_pool_mutex_enter();
-
- while (buf_pool->LRU_flush_ended > 0) {
-
- buf_pool_mutex_exit();
-
- buf_LRU_search_and_free_block(1);
-
- buf_pool_mutex_enter();
- }
-
- buf_pool_mutex_exit();
-}
-
-/******************************************************************//**
-Returns TRUE if less than 25 % of the buffer pool is available. This can be
-used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks.
-@return TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
-ibool
-buf_LRU_buf_pool_running_out(void)
-/*==============================*/
-{
- ibool ret = FALSE;
-
- buf_pool_mutex_enter();
-
- if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
-
- ret = TRUE;
- }
-
- buf_pool_mutex_exit();
-
- return(ret);
-}
-
-/******************************************************************//**
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, returns NULL.
-@return a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
-buf_block_t*
-buf_LRU_get_free_only(void)
-/*=======================*/
-{
- buf_block_t* block;
-
- ut_ad(buf_pool_mutex_own());
-
- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
-
- if (block) {
- ut_ad(block->page.in_free_list);
- ut_d(block->page.in_free_list = FALSE);
- ut_ad(!block->page.in_flush_list);
- ut_ad(!block->page.in_LRU_list);
- ut_a(!buf_page_in_file(&block->page));
- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
-
- mutex_enter(&block->mutex);
-
- buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-
- mutex_exit(&block->mutex);
- }
-
- return(block);
-}
-
-/******************************************************************//**
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, blocks are moved from the end of the
-LRU list to the free list.
-@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
-buf_block_t*
-buf_LRU_get_free_block(
-/*===================*/
- ulint zip_size) /*!< in: compressed page size in bytes,
- or 0 if uncompressed tablespace */
-{
- buf_block_t* block = NULL;
- ibool freed;
- ulint n_iterations = 1;
- ibool mon_value_was = FALSE;
- ibool started_monitor = FALSE;
-loop:
- buf_pool_mutex_enter();
-
- if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: ERROR: over 95 percent of the buffer pool"
- " is occupied by\n"
- "InnoDB: lock heaps or the adaptive hash index!"
- " Check that your\n"
- "InnoDB: transactions do not set too many row locks.\n"
- "InnoDB: Your buffer pool size is %lu MB."
- " Maybe you should make\n"
- "InnoDB: the buffer pool bigger?\n"
- "InnoDB: We intentionally generate a seg fault"
- " to print a stack trace\n"
- "InnoDB: on Linux!\n",
- (ulong) (buf_pool->curr_size
- / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
- ut_error;
-
- } else if (!recv_recovery_on
- && (UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU))
- < buf_pool->curr_size / 3) {
-
- if (!buf_lru_switched_on_innodb_mon) {
-
- /* Over 67 % of the buffer pool is occupied by lock
- heaps or the adaptive hash index. This may be a memory
- leak! */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: WARNING: over 67 percent of"
- " the buffer pool is occupied by\n"
- "InnoDB: lock heaps or the adaptive"
- " hash index! Check that your\n"
- "InnoDB: transactions do not set too many"
- " row locks.\n"
- "InnoDB: Your buffer pool size is %lu MB."
- " Maybe you should make\n"
- "InnoDB: the buffer pool bigger?\n"
- "InnoDB: Starting the InnoDB Monitor to print"
- " diagnostics, including\n"
- "InnoDB: lock heap and hash index sizes.\n",
- (ulong) (buf_pool->curr_size
- / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
- buf_lru_switched_on_innodb_mon = TRUE;
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- }
- } else if (buf_lru_switched_on_innodb_mon) {
-
- /* Switch off the InnoDB Monitor; this is a simple way
- to stop the monitor if the situation becomes less urgent,
- but may also surprise users if the user also switched on the
- monitor! */
-
- buf_lru_switched_on_innodb_mon = FALSE;
- srv_print_innodb_monitor = FALSE;
- }
-
- /* If there is a block in the free list, take it */
- block = buf_LRU_get_free_only();
- if (block) {
-
-#ifdef UNIV_DEBUG
- block->page.zip.m_start =
-#endif /* UNIV_DEBUG */
- block->page.zip.m_end =
- block->page.zip.m_nonempty =
- block->page.zip.n_blobs = 0;
-
- if (UNIV_UNLIKELY(zip_size)) {
- ibool lru;
- page_zip_set_size(&block->page.zip, zip_size);
- block->page.zip.data = buf_buddy_alloc(zip_size, &lru);
- UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
- } else {
- page_zip_set_size(&block->page.zip, 0);
- block->page.zip.data = NULL;
- }
-
- buf_pool_mutex_exit();
-
- if (started_monitor) {
- srv_print_innodb_monitor = mon_value_was;
- }
-
- return(block);
- }
-
- /* If no block was in the free list, search from the end of the LRU
- list and try to free a block there */
-
- buf_pool_mutex_exit();
-
- freed = buf_LRU_search_and_free_block(n_iterations);
-
- if (freed > 0) {
- goto loop;
- }
-
- if (n_iterations > 30) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: difficult to find free blocks in\n"
- "InnoDB: the buffer pool (%lu search iterations)!"
- " Consider\n"
- "InnoDB: increasing the buffer pool size.\n"
- "InnoDB: It is also possible that"
- " in your Unix version\n"
- "InnoDB: fsync is very slow, or"
- " completely frozen inside\n"
- "InnoDB: the OS kernel. Then upgrading to"
- " a newer version\n"
- "InnoDB: of your operating system may help."
- " Look at the\n"
- "InnoDB: number of fsyncs in diagnostic info below.\n"
- "InnoDB: Pending flushes (fsync) log: %lu;"
- " buffer pool: %lu\n"
- "InnoDB: %lu OS file reads, %lu OS file writes,"
- " %lu OS fsyncs\n"
- "InnoDB: Starting InnoDB Monitor to print further\n"
- "InnoDB: diagnostics to the standard output.\n",
- (ulong) n_iterations,
- (ulong) fil_n_pending_log_flushes,
- (ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads, (ulong) os_n_file_writes,
- (ulong) os_n_fsyncs);
-
- mon_value_was = srv_print_innodb_monitor;
- started_monitor = TRUE;
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- }
-
- /* No free block was found: try to flush the LRU list */
-
- buf_flush_free_margin();
- ++srv_buf_pool_wait_free;
-
- os_aio_simulated_wake_handler_threads();
-
- buf_pool_mutex_enter();
-
- if (buf_pool->LRU_flush_ended > 0) {
- /* We have written pages in an LRU flush. To make the insert
- buffer more efficient, we try to move these pages to the free
- list. */
-
- buf_pool_mutex_exit();
-
- buf_LRU_try_free_flushed_blocks();
- } else {
- buf_pool_mutex_exit();
- }
-
- if (n_iterations > 10) {
-
- os_thread_sleep(500000);
- }
-
- n_iterations++;
-
- goto loop;
-}
-
-/*******************************************************************//**
-Moves the LRU_old pointer so that the length of the old blocks list
-is inside the allowed limits. */
-UNIV_INLINE
-void
-buf_LRU_old_adjust_len(void)
-/*========================*/
-{
- ulint old_len;
- ulint new_len;
-
- ut_a(buf_pool->LRU_old);
- ut_ad(buf_pool_mutex_own());
- ut_ad(buf_LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
- ut_ad(buf_LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
-#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
-# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
-#endif
-#ifdef UNIV_LRU_DEBUG
- /* buf_pool->LRU_old must be the first item in the LRU list
- whose "old" flag is set. */
- ut_a(buf_pool->LRU_old->old);
- ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
- || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
- ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
- || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
-#endif /* UNIV_LRU_DEBUG */
-
- old_len = buf_pool->LRU_old_len;
- new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
- * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
- UT_LIST_GET_LEN(buf_pool->LRU)
- - (BUF_LRU_OLD_TOLERANCE
- + BUF_LRU_NON_OLD_MIN_LEN));
-
- for (;;) {
- buf_page_t* LRU_old = buf_pool->LRU_old;
-
- ut_a(LRU_old);
- ut_ad(LRU_old->in_LRU_list);
-#ifdef UNIV_LRU_DEBUG
- ut_a(LRU_old->old);
-#endif /* UNIV_LRU_DEBUG */
-
- /* Update the LRU_old pointer if necessary */
-
- if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
-
- buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV(
- LRU, LRU_old);
-#ifdef UNIV_LRU_DEBUG
- ut_a(!LRU_old->old);
-#endif /* UNIV_LRU_DEBUG */
- old_len = ++buf_pool->LRU_old_len;
- buf_page_set_old(LRU_old, TRUE);
-
- } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
-
- buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
- old_len = --buf_pool->LRU_old_len;
- buf_page_set_old(LRU_old, FALSE);
- } else {
- return;
- }
- }
-}
-
-/*******************************************************************//**
-Initializes the old blocks pointer in the LRU list. This function should be
-called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
-static
-void
-buf_LRU_old_init(void)
-/*==================*/
-{
- buf_page_t* bpage;
-
- ut_ad(buf_pool_mutex_own());
- ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
-
- /* We first initialize all blocks in the LRU list as old and then use
- the adjust function to move the LRU_old pointer to the right
- position */
-
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL;
- bpage = UT_LIST_GET_PREV(LRU, bpage)) {
- ut_ad(bpage->in_LRU_list);
- ut_ad(buf_page_in_file(bpage));
- /* This loop temporarily violates the
- assertions of buf_page_set_old(). */
- bpage->old = TRUE;
- }
-
- buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
- buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- buf_LRU_old_adjust_len();
-}
-
-/******************************************************************//**
-Remove a block from the unzip_LRU list if it belonged to the list. */
-static
-void
-buf_unzip_LRU_remove_block_if_needed(
-/*=================================*/
- buf_page_t* bpage) /*!< in/out: control block */
-{
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(buf_page_in_file(bpage));
- ut_ad(buf_pool_mutex_own());
-
- if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_block_t* block = (buf_block_t*) bpage;
-
- ut_ad(block->in_unzip_LRU_list);
- ut_d(block->in_unzip_LRU_list = FALSE);
-
- UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
- }
-}
-
-/******************************************************************//**
-Removes a block from the LRU list. */
-UNIV_INLINE
-void
-buf_LRU_remove_block(
-/*=================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(buf_pool_mutex_own());
-
- ut_a(buf_page_in_file(bpage));
-
- ut_ad(bpage->in_LRU_list);
-
- /* If the LRU_old pointer is defined and points to just this block,
- move it backward one step */
-
- if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
-
- /* Below: the previous block is guaranteed to exist,
- because the LRU_old pointer is only allowed to differ
- by BUF_LRU_OLD_TOLERANCE from strict
- buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
- list length. */
- buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
- ut_a(prev_bpage);
-#ifdef UNIV_LRU_DEBUG
- ut_a(!prev_bpage->old);
-#endif /* UNIV_LRU_DEBUG */
- buf_pool->LRU_old = prev_bpage;
- buf_page_set_old(prev_bpage, TRUE);
-
- buf_pool->LRU_old_len++;
- }
-
- /* Remove the block from the LRU list */
- UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
- ut_d(bpage->in_LRU_list = FALSE);
-
- buf_unzip_LRU_remove_block_if_needed(bpage);
-
- /* If the LRU list is so short that LRU_old is not defined,
- clear the "old" flags and return */
- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL;
- bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
- /* This loop temporarily violates the
- assertions of buf_page_set_old(). */
- bpage->old = FALSE;
- }
-
- buf_pool->LRU_old = NULL;
- buf_pool->LRU_old_len = 0;
-
- return;
- }
-
- ut_ad(buf_pool->LRU_old);
-
- /* Update the LRU_old_len field if necessary */
- if (buf_page_is_old(bpage)) {
-
- buf_pool->LRU_old_len--;
- }
-
- /* Adjust the length of the old block list if necessary */
- buf_LRU_old_adjust_len();
-}
-
-/******************************************************************//**
-Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
-void
-buf_unzip_LRU_add_block(
-/*====================*/
- buf_block_t* block, /*!< in: control block */
- ibool old) /*!< in: TRUE if should be put to the end
- of the list, else put to the start */
-{
- ut_ad(buf_pool);
- ut_ad(block);
- ut_ad(buf_pool_mutex_own());
-
- ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
-
- ut_ad(!block->in_unzip_LRU_list);
- ut_d(block->in_unzip_LRU_list = TRUE);
-
- if (old) {
- UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
- } else {
- UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
- }
-}
-
-/******************************************************************//**
-Adds a block to the LRU list end. */
-UNIV_INLINE
-void
-buf_LRU_add_block_to_end_low(
-/*=========================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(buf_pool_mutex_own());
-
- ut_a(buf_page_in_file(bpage));
-
- ut_ad(!bpage->in_LRU_list);
- UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
- ut_d(bpage->in_LRU_list = TRUE);
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
- ut_ad(buf_pool->LRU_old);
-
- /* Adjust the length of the old block list if necessary */
-
- buf_page_set_old(bpage, TRUE);
- buf_pool->LRU_old_len++;
- buf_LRU_old_adjust_len();
-
- } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
- /* The LRU list is now long enough for LRU_old to become
- defined: init it */
-
- buf_LRU_old_init();
- } else {
- buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
- }
-
- /* If this is a zipped block with decompressed frame as well
- then put it on the unzip_LRU list */
- if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE);
- }
-}
-
-/******************************************************************//**
-Adds a block to the LRU list. */
-UNIV_INLINE
-void
-buf_LRU_add_block_low(
-/*==================*/
- buf_page_t* bpage, /*!< in: control block */
- ibool old) /*!< in: TRUE if should be put to the old blocks
- in the LRU list, else put to the start; if the
- LRU list is very short, the block is added to
- the start, regardless of this parameter */
-{
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(buf_pool_mutex_own());
-
- ut_a(buf_page_in_file(bpage));
- ut_ad(!bpage->in_LRU_list);
-
- if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
-
- UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
-
- bpage->freed_page_clock = buf_pool->freed_page_clock;
- } else {
-#ifdef UNIV_LRU_DEBUG
- /* buf_pool->LRU_old must be the first item in the LRU list
- whose "old" flag is set. */
- ut_a(buf_pool->LRU_old->old);
- ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
- || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
- ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
- || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
-#endif /* UNIV_LRU_DEBUG */
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
- bpage);
- buf_pool->LRU_old_len++;
- }
-
- ut_d(bpage->in_LRU_list = TRUE);
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
- ut_ad(buf_pool->LRU_old);
-
- /* Adjust the length of the old block list if necessary */
-
- buf_page_set_old(bpage, old);
- buf_LRU_old_adjust_len();
-
- } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
- /* The LRU list is now long enough for LRU_old to become
- defined: init it */
-
- buf_LRU_old_init();
- } else {
- buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
- }
-
- /* If this is a zipped block with decompressed frame as well
- then put it on the unzip_LRU list */
- if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
- }
-}
-
-/******************************************************************//**
-Adds a block to the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_add_block(
-/*==============*/
- buf_page_t* bpage, /*!< in: control block */
- ibool old) /*!< in: TRUE if should be put to the old
- blocks in the LRU list, else put to the start;
- if the LRU list is very short, the block is
- added to the start, regardless of this
- parameter */
-{
- buf_LRU_add_block_low(bpage, old);
-}
-
-/******************************************************************//**
-Moves a block to the start of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_young(
-/*=====================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- ut_ad(buf_pool_mutex_own());
-
- if (bpage->old) {
- buf_pool->stat.n_pages_made_young++;
- }
-
- buf_LRU_remove_block(bpage);
- buf_LRU_add_block_low(bpage, FALSE);
-}
-
-/******************************************************************//**
-Moves a block to the end of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_old(
-/*===================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- buf_LRU_remove_block(bpage);
- buf_LRU_add_block_to_end_low(bpage);
-}
-
-/******************************************************************//**
-Try to free a block. If bpage is a descriptor of a compressed-only
-page, the descriptor object will be freed as well.
-
-NOTE: If this function returns BUF_LRU_FREED, it will not temporarily
-release buf_pool_mutex. Furthermore, the page frame will no longer be
-accessible via bpage.
-
-The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
-release these two mutexes after the call. No other
-buf_page_get_mutex() may be held when calling this function.
-@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
-BUF_LRU_NOT_FREED otherwise. */
-UNIV_INTERN
-enum buf_lru_free_block_status
-buf_LRU_free_block(
-/*===============*/
- buf_page_t* bpage, /*!< in: block to be freed */
- ibool zip, /*!< in: TRUE if should remove also the
- compressed page of an uncompressed page */
- ibool* buf_pool_mutex_released)
- /*!< in: pointer to a variable that will
- be assigned TRUE if buf_pool_mutex
- was temporarily released, or NULL */
-{
- buf_page_t* b = NULL;
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(block_mutex));
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
- ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
- UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
-
- if (!buf_page_can_relocate(bpage)) {
-
- /* Do not free buffer-fixed or I/O-fixed blocks. */
- return(BUF_LRU_NOT_FREED);
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
-#endif /* UNIV_IBUF_COUNT_DEBUG */
-
- if (zip || !bpage->zip.data) {
- /* This would completely free the block. */
- /* Do not completely free dirty blocks. */
-
- if (bpage->oldest_modification) {
- return(BUF_LRU_NOT_FREED);
- }
- } else if (bpage->oldest_modification) {
- /* Do not completely free dirty blocks. */
-
- if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
- ut_ad(buf_page_get_state(bpage)
- == BUF_BLOCK_ZIP_DIRTY);
- return(BUF_LRU_NOT_FREED);
- }
-
- goto alloc;
- } else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
- /* Allocate the control block for the compressed page.
- If it cannot be allocated (without freeing a block
- from the LRU list), refuse to free bpage. */
-alloc:
- buf_pool_mutex_exit_forbid();
- b = buf_buddy_alloc(sizeof *b, NULL);
- buf_pool_mutex_exit_allow();
-
- if (UNIV_UNLIKELY(!b)) {
- return(BUF_LRU_CANNOT_RELOCATE);
- }
-
- memcpy(b, bpage, sizeof *b);
- }
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Putting space %lu page %lu to free list\n",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
- }
-#endif /* UNIV_DEBUG */
-
- if (buf_LRU_block_remove_hashed_page(bpage, zip)
- != BUF_BLOCK_ZIP_FREE) {
- ut_a(bpage->buf_fix_count == 0);
-
- if (b) {
- buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
- const ulint fold = buf_page_address_fold(
- bpage->space, bpage->offset);
-
- ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
-
- b->state = b->oldest_modification
- ? BUF_BLOCK_ZIP_DIRTY
- : BUF_BLOCK_ZIP_PAGE;
- UNIV_MEM_DESC(b->zip.data,
- page_zip_get_size(&b->zip), b);
-
- /* The fields in_page_hash and in_LRU_list of
- the to-be-freed block descriptor should have
- been cleared in
- buf_LRU_block_remove_hashed_page(), which
- invokes buf_LRU_remove_block(). */
- ut_ad(!bpage->in_page_hash);
- ut_ad(!bpage->in_LRU_list);
- /* bpage->state was BUF_BLOCK_FILE_PAGE because
- b != NULL. The type cast below is thus valid. */
- ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
-
- /* The fields of bpage were copied to b before
- buf_LRU_block_remove_hashed_page() was invoked. */
- ut_ad(!b->in_zip_hash);
- ut_ad(b->in_page_hash);
- ut_ad(b->in_LRU_list);
-
- HASH_INSERT(buf_page_t, hash,
- buf_pool->page_hash, fold, b);
-
- /* Insert b where bpage was in the LRU list. */
- if (UNIV_LIKELY(prev_b != NULL)) {
- ulint lru_len;
-
- ut_ad(prev_b->in_LRU_list);
- ut_ad(buf_page_in_file(prev_b));
- UNIV_MEM_ASSERT_RW(prev_b, sizeof *prev_b);
-
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
- prev_b, b);
-
- if (buf_page_is_old(b)) {
- buf_pool->LRU_old_len++;
- if (UNIV_UNLIKELY
- (buf_pool->LRU_old
- == UT_LIST_GET_NEXT(LRU, b))) {
-
- buf_pool->LRU_old = b;
- }
- }
-
- lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- if (lru_len > BUF_LRU_OLD_MIN_LEN) {
- ut_ad(buf_pool->LRU_old);
- /* Adjust the length of the
- old block list if necessary */
- buf_LRU_old_adjust_len();
- } else if (lru_len == BUF_LRU_OLD_MIN_LEN) {
- /* The LRU list is now long
- enough for LRU_old to become
- defined: init it */
- buf_LRU_old_init();
- }
-#ifdef UNIV_LRU_DEBUG
- /* Check that the "old" flag is consistent
- in the block and its neighbours. */
- buf_page_set_old(b, buf_page_is_old(b));
-#endif /* UNIV_LRU_DEBUG */
- } else {
- ut_d(b->in_LRU_list = FALSE);
- buf_LRU_add_block_low(b, buf_page_is_old(b));
- }
-
- if (b->state == BUF_BLOCK_ZIP_PAGE) {
- buf_LRU_insert_zip_clean(b);
- } else {
- buf_page_t* prev;
-
- ut_ad(b->in_flush_list);
- ut_d(bpage->in_flush_list = FALSE);
-
- prev = UT_LIST_GET_PREV(list, b);
- UT_LIST_REMOVE(list, buf_pool->flush_list, b);
-
- if (prev) {
- ut_ad(prev->in_flush_list);
- UT_LIST_INSERT_AFTER(
- list,
- buf_pool->flush_list,
- prev, b);
- } else {
- UT_LIST_ADD_FIRST(
- list,
- buf_pool->flush_list,
- b);
- }
- }
-
- bpage->zip.data = NULL;
- page_zip_set_size(&bpage->zip, 0);
-
- /* Prevent buf_page_get_gen() from
- decompressing the block while we release
- buf_pool_mutex and block_mutex. */
- b->buf_fix_count++;
- b->io_fix = BUF_IO_READ;
- }
-
- if (buf_pool_mutex_released) {
- *buf_pool_mutex_released = TRUE;
- }
-
- buf_pool_mutex_exit();
- mutex_exit(block_mutex);
-
- /* Remove possible adaptive hash index on the page.
- The page was declared uninitialized by
- buf_LRU_block_remove_hashed_page(). We need to flag
- the contents of the page valid (which it still is) in
- order to avoid bogus Valgrind warnings.*/
-
- UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
- btr_search_drop_page_hash_index((buf_block_t*) bpage);
- UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
-
- if (b) {
- /* Compute and stamp the compressed page
- checksum while not holding any mutex. The
- block is already half-freed
- (BUF_BLOCK_REMOVE_HASH) and removed from
- buf_pool->page_hash, thus inaccessible by any
- other thread. */
-
- mach_write_to_4(
- b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM,
- UNIV_LIKELY(srv_use_checksums)
- ? page_zip_calc_checksum(
- b->zip.data,
- page_zip_get_size(&b->zip))
- : BUF_NO_CHECKSUM_MAGIC);
- }
-
- buf_pool_mutex_enter();
- mutex_enter(block_mutex);
-
- if (b) {
- mutex_enter(&buf_pool_zip_mutex);
- b->buf_fix_count--;
- buf_page_set_io_fix(b, BUF_IO_NONE);
- mutex_exit(&buf_pool_zip_mutex);
- }
-
- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
- } else {
- /* The block_mutex should have been released by
- buf_LRU_block_remove_hashed_page() when it returns
- BUF_BLOCK_ZIP_FREE. */
- ut_ad(block_mutex == &buf_pool_zip_mutex);
- mutex_enter(block_mutex);
- }
-
- return(BUF_LRU_FREED);
-}
-
-/******************************************************************//**
-Puts a block back to the free list. */
-UNIV_INTERN
-void
-buf_LRU_block_free_non_file_page(
-/*=============================*/
- buf_block_t* block) /*!< in: block, must not contain a file page */
-{
- void* data;
-
- ut_ad(block);
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(&block->mutex));
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_READY_FOR_USE:
- break;
- default:
- ut_error;
- }
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ut_a(block->n_pointers == 0);
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- ut_ad(!block->page.in_free_list);
- ut_ad(!block->page.in_flush_list);
- ut_ad(!block->page.in_LRU_list);
-
- buf_block_set_state(block, BUF_BLOCK_NOT_USED);
-
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-#ifdef UNIV_DEBUG
- /* Wipe contents of page to reveal possible stale pointers to it */
- memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#else
- /* Wipe page_no and space_id */
- memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
- memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
-#endif
- data = block->page.zip.data;
-
- if (data) {
- block->page.zip.data = NULL;
- mutex_exit(&block->mutex);
- buf_pool_mutex_exit_forbid();
- buf_buddy_free(data, page_zip_get_size(&block->page.zip));
- buf_pool_mutex_exit_allow();
- mutex_enter(&block->mutex);
- page_zip_set_size(&block->page.zip, 0);
- }
-
- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
- ut_d(block->page.in_free_list = TRUE);
-
- UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
-}
-
-/******************************************************************//**
-Takes a block out of the LRU list and page hash table.
-If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
-the object will be freed and buf_pool_zip_mutex will be released.
-
-If a compressed page or a compressed-only block descriptor is freed,
-other compressed pages or compressed-only block descriptors may be
-relocated.
-@return the new state of the block (BUF_BLOCK_ZIP_FREE if the state
-was BUF_BLOCK_ZIP_PAGE, or BUF_BLOCK_REMOVE_HASH otherwise) */
-static
-enum buf_page_state
-buf_LRU_block_remove_hashed_page(
-/*=============================*/
- buf_page_t* bpage, /*!< in: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
- ibool zip) /*!< in: TRUE if should remove also the
- compressed page of an uncompressed page */
-{
- const buf_page_t* hashed_bpage;
- ut_ad(bpage);
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- ut_a(bpage->buf_fix_count == 0);
-
- UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
-
- buf_LRU_remove_block(bpage);
-
- buf_pool->freed_page_clock += 1;
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_FILE_PAGE:
- UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t));
- UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
- buf_block_modify_clock_inc((buf_block_t*) bpage);
- if (bpage->zip.data) {
- const page_t* page = ((buf_block_t*) bpage)->frame;
- const ulint zip_size
- = page_zip_get_size(&bpage->zip);
-
- ut_a(!zip || bpage->oldest_modification == 0);
-
- switch (UNIV_EXPECT(fil_page_get_type(page),
- FIL_PAGE_INDEX)) {
- case FIL_PAGE_TYPE_ALLOCATED:
- case FIL_PAGE_INODE:
- case FIL_PAGE_IBUF_BITMAP:
- case FIL_PAGE_TYPE_FSP_HDR:
- case FIL_PAGE_TYPE_XDES:
- /* These are essentially uncompressed pages. */
- if (!zip) {
- /* InnoDB writes the data to the
- uncompressed page frame. Copy it
- to the compressed page, which will
- be preserved. */
- memcpy(bpage->zip.data, page,
- zip_size);
- }
- break;
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- break;
- case FIL_PAGE_INDEX:
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(&bpage->zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- break;
- default:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ERROR: The compressed page"
- " to be evicted seems corrupt:", stderr);
- ut_print_buf(stderr, page, zip_size);
- fputs("\nInnoDB: Possibly older version"
- " of the page:", stderr);
- ut_print_buf(stderr, bpage->zip.data,
- zip_size);
- putc('\n', stderr);
- ut_error;
- }
-
- break;
- }
- /* fall through */
- case BUF_BLOCK_ZIP_PAGE:
- ut_a(bpage->oldest_modification == 0);
- UNIV_MEM_ASSERT_W(bpage->zip.data,
- page_zip_get_size(&bpage->zip));
- break;
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
-
- hashed_bpage = buf_page_hash_get(bpage->space, bpage->offset);
-
- if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
- fprintf(stderr,
- "InnoDB: Error: page %lu %lu not found"
- " in the hash table\n",
- (ulong) bpage->space,
- (ulong) bpage->offset);
- if (hashed_bpage) {
- fprintf(stderr,
- "InnoDB: In hash table we find block"
- " %p of %lu %lu which is not %p\n",
- (const void*) hashed_bpage,
- (ulong) hashed_bpage->space,
- (ulong) hashed_bpage->offset,
- (const void*) bpage);
- }
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- mutex_exit(buf_page_get_mutex(bpage));
- buf_pool_mutex_exit();
- buf_print();
- buf_LRU_print();
- buf_validate();
- buf_LRU_validate();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- ut_error;
- }
-
- ut_ad(!bpage->in_zip_hash);
- ut_ad(bpage->in_page_hash);
- ut_d(bpage->in_page_hash = FALSE);
- HASH_DELETE(buf_page_t, hash, buf_pool->page_hash,
- buf_page_address_fold(bpage->space, bpage->offset),
- bpage);
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_PAGE:
- ut_ad(!bpage->in_free_list);
- ut_ad(!bpage->in_flush_list);
- ut_ad(!bpage->in_LRU_list);
- ut_a(bpage->zip.data);
- ut_a(buf_page_get_zip_size(bpage));
-
- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
-
- mutex_exit(&buf_pool_zip_mutex);
- buf_pool_mutex_exit_forbid();
- buf_buddy_free(bpage->zip.data,
- page_zip_get_size(&bpage->zip));
- buf_buddy_free(bpage, sizeof(*bpage));
- buf_pool_mutex_exit_allow();
- UNIV_MEM_UNDESC(bpage);
- return(BUF_BLOCK_ZIP_FREE);
-
- case BUF_BLOCK_FILE_PAGE:
- memset(((buf_block_t*) bpage)->frame
- + FIL_PAGE_OFFSET, 0xff, 4);
- memset(((buf_block_t*) bpage)->frame
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
- UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
- buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
-
- if (zip && bpage->zip.data) {
- /* Free the compressed page. */
- void* data = bpage->zip.data;
- bpage->zip.data = NULL;
-
- ut_ad(!bpage->in_free_list);
- ut_ad(!bpage->in_flush_list);
- ut_ad(!bpage->in_LRU_list);
- mutex_exit(&((buf_block_t*) bpage)->mutex);
- buf_pool_mutex_exit_forbid();
- buf_buddy_free(data, page_zip_get_size(&bpage->zip));
- buf_pool_mutex_exit_allow();
- mutex_enter(&((buf_block_t*) bpage)->mutex);
- page_zip_set_size(&bpage->zip, 0);
- }
-
- return(BUF_BLOCK_REMOVE_HASH);
-
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- break;
- }
-
- ut_error;
- return(BUF_BLOCK_ZIP_FREE);
-}
-
-/******************************************************************//**
-Puts a file page whose has no hash index to the free list. */
-static
-void
-buf_LRU_block_free_hashed_page(
-/*===========================*/
- buf_block_t* block) /*!< in: block, must contain a file page and
- be in a state where it can be freed */
-{
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(&block->mutex));
-
- buf_block_set_state(block, BUF_BLOCK_MEMORY);
-
- buf_LRU_block_free_non_file_page(block);
-}
-
-/**********************************************************************//**
-Updates buf_LRU_old_ratio.
-@return updated old_pct */
-UNIV_INTERN
-uint
-buf_LRU_old_ratio_update(
-/*=====================*/
- uint old_pct,/*!< in: Reserve this percentage of
- the buffer pool for "old" blocks. */
- ibool adjust) /*!< in: TRUE=adjust the LRU list;
- FALSE=just assign buf_LRU_old_ratio
- during the initialization of InnoDB */
-{
- uint ratio;
-
- ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
- if (ratio < BUF_LRU_OLD_RATIO_MIN) {
- ratio = BUF_LRU_OLD_RATIO_MIN;
- } else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
- ratio = BUF_LRU_OLD_RATIO_MAX;
- }
-
- if (adjust) {
- buf_pool_mutex_enter();
-
- if (ratio != buf_LRU_old_ratio) {
- buf_LRU_old_ratio = ratio;
-
- if (UT_LIST_GET_LEN(buf_pool->LRU)
- >= BUF_LRU_OLD_MIN_LEN) {
- buf_LRU_old_adjust_len();
- }
- }
-
- buf_pool_mutex_exit();
- } else {
- buf_LRU_old_ratio = ratio;
- }
-
- /* the reverse of
- ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */
- return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5));
-}
-
-/********************************************************************//**
-Update the historical stats that we are collecting for LRU eviction
-policy at the end of each interval. */
-UNIV_INTERN
-void
-buf_LRU_stat_update(void)
-/*=====================*/
-{
- buf_LRU_stat_t* item;
-
- /* If we haven't started eviction yet then don't update stats. */
- if (buf_pool->freed_page_clock == 0) {
- goto func_exit;
- }
-
- buf_pool_mutex_enter();
-
- /* Update the index. */
- item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
- buf_LRU_stat_arr_ind++;
- buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
-
- /* Add the current value and subtract the obsolete entry. */
- buf_LRU_stat_sum.io += buf_LRU_stat_cur.io - item->io;
- buf_LRU_stat_sum.unzip += buf_LRU_stat_cur.unzip - item->unzip;
-
- /* Put current entry in the array. */
- memcpy(item, &buf_LRU_stat_cur, sizeof *item);
-
- buf_pool_mutex_exit();
-
-func_exit:
- /* Clear the current entry. */
- memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Validates the LRU list.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_LRU_validate(void)
-/*==================*/
-{
- buf_page_t* bpage;
- buf_block_t* block;
- ulint old_len;
- ulint new_len;
-
- ut_ad(buf_pool);
- buf_pool_mutex_enter();
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
- ut_a(buf_pool->LRU_old);
- old_len = buf_pool->LRU_old_len;
- new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
- * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
- UT_LIST_GET_LEN(buf_pool->LRU)
- - (BUF_LRU_OLD_TOLERANCE
- + BUF_LRU_NON_OLD_MIN_LEN));
- ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
- ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
- }
-
- UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
- ut_ad(ut_list_node_313->in_LRU_list));
-
- bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- old_len = 0;
-
- while (bpage != NULL) {
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- case BUF_BLOCK_FILE_PAGE:
- ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list
- == buf_page_belongs_to_unzip_LRU(bpage));
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- break;
- }
-
- if (buf_page_is_old(bpage)) {
- const buf_page_t* prev
- = UT_LIST_GET_PREV(LRU, bpage);
- const buf_page_t* next
- = UT_LIST_GET_NEXT(LRU, bpage);
-
- if (!old_len++) {
- ut_a(buf_pool->LRU_old == bpage);
- } else {
- ut_a(!prev || buf_page_is_old(prev));
- }
-
- ut_a(!next || buf_page_is_old(next));
- }
-
- bpage = UT_LIST_GET_NEXT(LRU, bpage);
- }
-
- ut_a(buf_pool->LRU_old_len == old_len);
-
- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
- ut_ad(ut_list_node_313->in_free_list));
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
- bpage != NULL;
- bpage = UT_LIST_GET_NEXT(list, bpage)) {
-
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
- }
-
- UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
- ut_ad(ut_list_node_313->in_unzip_LRU_list
- && ut_list_node_313->page.in_LRU_list));
-
- for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
- block;
- block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
-
- ut_ad(block->in_unzip_LRU_list);
- ut_ad(block->page.in_LRU_list);
- ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
- }
-
- buf_pool_mutex_exit();
- return(TRUE);
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Prints the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_print(void)
-/*===============*/
-{
- const buf_page_t* bpage;
-
- ut_ad(buf_pool);
- buf_pool_mutex_enter();
-
- bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- while (bpage != NULL) {
-
- fprintf(stderr, "BLOCK space %lu page %lu ",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
-
- if (buf_page_is_old(bpage)) {
- fputs("old ", stderr);
- }
-
- if (bpage->buf_fix_count) {
- fprintf(stderr, "buffix count %lu ",
- (ulong) bpage->buf_fix_count);
- }
-
- if (buf_page_get_io_fix(bpage)) {
- fprintf(stderr, "io_fix %lu ",
- (ulong) buf_page_get_io_fix(bpage));
- }
-
- if (bpage->oldest_modification) {
- fputs("modif. ", stderr);
- }
-
- switch (buf_page_get_state(bpage)) {
- const byte* frame;
- case BUF_BLOCK_FILE_PAGE:
- frame = buf_block_get_frame((buf_block_t*) bpage);
- fprintf(stderr, "\ntype %lu"
- " index id %lu\n",
- (ulong) fil_page_get_type(frame),
- (ulong) ut_dulint_get_low(
- btr_page_get_index_id(frame)));
- break;
- case BUF_BLOCK_ZIP_PAGE:
- frame = bpage->zip.data;
- fprintf(stderr, "\ntype %lu size %lu"
- " index id %lu\n",
- (ulong) fil_page_get_type(frame),
- (ulong) buf_page_get_zip_size(bpage),
- (ulong) ut_dulint_get_low(
- btr_page_get_index_id(frame)));
- break;
-
- default:
- fprintf(stderr, "\n!state %lu!\n",
- (ulong) buf_page_get_state(bpage));
- break;
- }
-
- bpage = UT_LIST_GET_NEXT(LRU, bpage);
- }
-
- buf_pool_mutex_exit();
-}
-#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
diff --git a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
deleted file mode 100644
index dd98ea17eb5..00000000000
--- a/storage/innodb_plugin/buf/buf0rea.c
+++ /dev/null
@@ -1,656 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0rea.c
-The database buffer read
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0rea.h"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "ibuf0ibuf.h"
-#include "log0recv.h"
-#include "trx0sys.h"
-#include "os0file.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-
-/** The linear read-ahead area size */
-#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
-
-/** If there are buf_pool->curr_size per the number below pending reads, then
-read-ahead is not done: this is to prevent flooding the buffer pool with
-i/o-fixed buffer blocks */
-#define BUF_READ_AHEAD_PEND_LIMIT 2
-
-/********************************************************************//**
-Low-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there, in which case does nothing.
-Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
-flag is cleared and the x-lock released by an i/o-handler thread.
-@return 1 if a read request was queued, 0 if the page already resided
-in buf_pool, or if the page is in the doublewrite buffer blocks in
-which case it is never read into the pool, or if the tablespace does
-not exist or is being dropped
-@return 1 if read request is issued. 0 if it is not */
-static
-ulint
-buf_read_page_low(
-/*==============*/
- ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
- trying to read from a non-existent tablespace, or a
- tablespace which is just now being dropped */
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
- ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
- ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
- at read-ahead functions) */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- ibool unzip, /*!< in: TRUE=request uncompressed page */
- ib_int64_t tablespace_version, /*!< in: if the space memory object has
- this timestamp different from what we are giving here,
- treat the tablespace as dropped; this is a timestamp we
- use to stop dangling page reads from a tablespace
- which we have DISCARDed + IMPORTed back */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
- ulint wake_later;
-
- *err = DB_SUCCESS;
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
-
- if (trx_doublewrite && space == TRX_SYS_SPACE
- && ( (offset >= trx_doublewrite->block1
- && offset < trx_doublewrite->block1
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
- || (offset >= trx_doublewrite->block2
- && offset < trx_doublewrite->block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: trying to read"
- " doublewrite buffer page %lu\n",
- (ulong) offset);
-
- return(0);
- }
-
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
-
- /* Trx sys header is so low in the latching order that we play
- safe and do not leave the i/o-completion to an asynchronous
- i/o-thread. Ibuf bitmap pages must always be read with
- syncronous i/o, to make sure they do not get involved in
- thread deadlocks. */
-
- sync = TRUE;
- }
-
- /* The following call will also check if the tablespace does not exist
- or is being dropped; if we succeed in initing the page in the buffer
- pool for read, then DISCARD cannot proceed until the read has
- completed */
- bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
- tablespace_version, offset);
- if (bpage == NULL) {
-
- return(0);
- }
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Posting read request for page %lu, sync %lu\n",
- (ulong) offset,
- (ulong) sync);
- }
-#endif
-
- ut_ad(buf_page_in_file(bpage));
-
- if (zip_size) {
- *err = fil_io(OS_FILE_READ | wake_later,
- sync, space, zip_size, offset, 0, zip_size,
- bpage->zip.data, bpage);
- } else {
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-
- *err = fil_io(OS_FILE_READ | wake_later,
- sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
- ((buf_block_t*) bpage)->frame, bpage);
- }
- ut_a(*err == DB_SUCCESS);
-
- if (sync) {
- /* The i/o is already completed when we arrive from
- fil_read */
- buf_page_io_complete(bpage);
- }
-
- return(1);
-}
-
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread.
-@return TRUE if page has been read in, FALSE in case of failure */
-UNIV_INTERN
-ibool
-buf_read_page(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint offset) /*!< in: page number */
-{
- ib_int64_t tablespace_version;
- ulint count;
- ulint err;
-
- tablespace_version = fil_space_get_version(space);
-
- /* We do the i/o in the synchronous aio mode to save thread
- switches: hence TRUE */
-
- count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
- zip_size, FALSE,
- tablespace_version, offset);
- srv_buf_pool_reads += count;
- if (err == DB_TABLESPACE_DELETED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to access"
- " tablespace %lu page no. %lu,\n"
- "InnoDB: but the tablespace does not exist"
- " or is just being dropped.\n",
- (ulong) space, (ulong) offset);
- }
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
- /* Increment number of I/O operations used for LRU policy. */
- buf_LRU_stat_inc_io();
-
- return(count > 0);
-}
-
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
-a linear read-ahead area and all the pages in the area have been accessed.
-Does not read any page if the read-ahead mechanism is not activated. Note
-that the algorithm looks at the 'natural' adjacent successor and
-predecessor of the page, which on the leaf level of a B-tree are the next
-and previous page in the chain of leaves. To know these, the page specified
-in (space, offset) must already be present in the buf_pool. Thus, the
-natural way to use this function is to call it when a page in the buf_pool
-is accessed the first time, calling this function just after it has been
-bufferfixed.
-NOTE 1: as this function looks at the natural predecessor and successor
-fields on the page, what happens, if these are not initialized to any
-sensible value? No problem, before applying read-ahead we check that the
-area to read is within the span of the space, if not, read-ahead is not
-applied. An uninitialized value may result in a useless read operation, but
-only very improbably.
-NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
-function must be written such that it cannot end up waiting for these
-latches!
-NOTE 3: the calling thread must want access to the page given: this rule is
-set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io.
-@return number of page read requests issued */
-UNIV_INTERN
-ulint
-buf_read_ahead_linear(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint offset) /*!< in: page number of a page; NOTE: the current thread
- must want access to this page (see NOTE 3 above) */
-{
- ib_int64_t tablespace_version;
- buf_page_t* bpage;
- buf_frame_t* frame;
- buf_page_t* pred_bpage = NULL;
- ulint pred_offset;
- ulint succ_offset;
- ulint count;
- int asc_or_desc;
- ulint new_offset;
- ulint fail_count;
- ulint ibuf_mode;
- ulint low, high;
- ulint err;
- ulint i;
- const ulint buf_read_ahead_linear_area
- = BUF_READ_AHEAD_LINEAR_AREA;
- ulint threshold;
-
- if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
- /* No read-ahead to avoid thread deadlocks */
- return(0);
- }
-
- low = (offset / buf_read_ahead_linear_area)
- * buf_read_ahead_linear_area;
- high = (offset / buf_read_ahead_linear_area + 1)
- * buf_read_ahead_linear_area;
-
- if ((offset != low) && (offset != high - 1)) {
- /* This is not a border page of the area: return */
-
- return(0);
- }
-
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
-
- /* If it is an ibuf bitmap page or trx sys hdr, we do
- no read-ahead, as that could break the ibuf page access
- order */
-
- return(0);
- }
-
- /* Remember the tablespace version before we ask te tablespace size
- below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
- do not try to read outside the bounds of the tablespace! */
-
- tablespace_version = fil_space_get_version(space);
-
- buf_pool_mutex_enter();
-
- if (high > fil_space_get_size(space)) {
- buf_pool_mutex_exit();
- /* The area is not whole, return */
-
- return(0);
- }
-
- if (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- buf_pool_mutex_exit();
-
- return(0);
- }
-
- /* Check that almost all pages in the area have been accessed; if
- offset == low, the accesses must be in a descending order, otherwise,
- in an ascending order. */
-
- asc_or_desc = 1;
-
- if (offset == low) {
- asc_or_desc = -1;
- }
-
- /* How many out of order accessed pages can we ignore
- when working out the access pattern for linear readahead */
- threshold = ut_min((64 - srv_read_ahead_threshold),
- BUF_READ_AHEAD_AREA);
-
- fail_count = 0;
-
- for (i = low; i < high; i++) {
- bpage = buf_page_hash_get(space, i);
-
- if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
- /* Not accessed */
- fail_count++;
-
- } else if (pred_bpage) {
- /* Note that buf_page_is_accessed() returns
- the time of the first access. If some blocks
- of the extent existed in the buffer pool at
- the time of a linear access pattern, the first
- access times may be nonmonotonic, even though
- the latest access times were linear. The
- threshold (srv_read_ahead_factor) should help
- a little against this. */
- int res = ut_ulint_cmp(
- buf_page_is_accessed(bpage),
- buf_page_is_accessed(pred_bpage));
- /* Accesses not in the right order */
- if (res != 0 && res != asc_or_desc) {
- fail_count++;
- }
- }
-
- if (fail_count > threshold) {
- /* Too many failures: return */
- buf_pool_mutex_exit();
- return(0);
- }
-
- if (bpage && buf_page_is_accessed(bpage)) {
- pred_bpage = bpage;
- }
- }
-
- /* If we got this far, we know that enough pages in the area have
- been accessed in the right order: linear read-ahead can be sensible */
-
- bpage = buf_page_hash_get(space, offset);
-
- if (bpage == NULL) {
- buf_pool_mutex_exit();
-
- return(0);
- }
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_PAGE:
- frame = bpage->zip.data;
- break;
- case BUF_BLOCK_FILE_PAGE:
- frame = ((buf_block_t*) bpage)->frame;
- break;
- default:
- ut_error;
- break;
- }
-
- /* Read the natural predecessor and successor page addresses from
- the page; NOTE that because the calling thread may have an x-latch
- on the page, we do not acquire an s-latch on the page, this is to
- prevent deadlocks. Even if we read values which are nonsense, the
- algorithm will work. */
-
- pred_offset = fil_page_get_prev(frame);
- succ_offset = fil_page_get_next(frame);
-
- buf_pool_mutex_exit();
-
- if ((offset == low) && (succ_offset == offset + 1)) {
-
- /* This is ok, we can continue */
- new_offset = pred_offset;
-
- } else if ((offset == high - 1) && (pred_offset == offset - 1)) {
-
- /* This is ok, we can continue */
- new_offset = succ_offset;
- } else {
- /* Successor or predecessor not in the right order */
-
- return(0);
- }
-
- low = (new_offset / buf_read_ahead_linear_area)
- * buf_read_ahead_linear_area;
- high = (new_offset / buf_read_ahead_linear_area + 1)
- * buf_read_ahead_linear_area;
-
- if ((new_offset != low) && (new_offset != high - 1)) {
- /* This is not a border page of the area: return */
-
- return(0);
- }
-
- if (high > fil_space_get_size(space)) {
- /* The area is not whole, return */
-
- return(0);
- }
-
- /* If we got this far, read-ahead can be sensible: do it */
-
- if (ibuf_inside()) {
- ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
- } else {
- ibuf_mode = BUF_READ_ANY_PAGE;
- }
-
- count = 0;
-
- /* Since Windows XP seems to schedule the i/o handler thread
- very eagerly, and consequently it does not wait for the
- full read batch to be posted, we use special heuristics here */
-
- os_aio_simulated_put_read_threads_to_sleep();
-
- for (i = low; i < high; i++) {
- /* It is only sensible to do read-ahead in the non-sync
- aio mode: hence FALSE as the first parameter */
-
- if (!ibuf_bitmap_page(zip_size, i)) {
- count += buf_read_page_low(
- &err, FALSE,
- ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
- space, zip_size, FALSE, tablespace_version, i);
- if (err == DB_TABLESPACE_DELETED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: in"
- " linear readahead trying to access\n"
- "InnoDB: tablespace %lu page %lu,\n"
- "InnoDB: but the tablespace does not"
- " exist or is just being dropped.\n",
- (ulong) space, (ulong) i);
- }
- }
- }
-
- /* In simulated aio we wake the aio handler threads only after
- queuing all aio requests, in native aio the following call does
- nothing: */
-
- os_aio_simulated_wake_handler_threads();
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "LINEAR read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset, (ulong) count);
- }
-#endif /* UNIV_DEBUG */
-
- /* Read ahead is considered one I/O operation for the purpose of
- LRU policy decision. */
- buf_LRU_stat_inc_io();
-
- buf_pool->stat.n_ra_pages_read += count;
- return(count);
-}
-
-/********************************************************************//**
-Issues read requests for pages which the ibuf module wants to read in, in
-order to contract the insert buffer tree. Technically, this function is like
-a read-ahead function. */
-UNIV_INTERN
-void
-buf_read_ibuf_merge_pages(
-/*======================*/
- ibool sync, /*!< in: TRUE if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- const ulint* space_ids, /*!< in: array of space ids */
- const ib_int64_t* space_versions,/*!< in: the spaces must have
- this version number
- (timestamp), otherwise we
- discard the read; we use this
- to cancel reads if DISCARD +
- IMPORT may have changed the
- tablespace size */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored) /*!< in: number of elements
- in the arrays */
-{
- ulint i;
-
- ut_ad(!ibuf_inside());
-#ifdef UNIV_IBUF_DEBUG
- ut_a(n_stored < UNIV_PAGE_SIZE);
-#endif
- while (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- os_thread_sleep(500000);
- }
-
- for (i = 0; i < n_stored; i++) {
- ulint zip_size = fil_space_get_zip_size(space_ids[i]);
- ulint err;
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
-
- goto tablespace_deleted;
- }
-
- buf_read_page_low(&err, sync && (i + 1 == n_stored),
- BUF_READ_ANY_PAGE, space_ids[i],
- zip_size, TRUE, space_versions[i],
- page_nos[i]);
-
- if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
-tablespace_deleted:
- /* We have deleted or are deleting the single-table
- tablespace: remove the entries for that page */
-
- ibuf_merge_or_delete_for_page(NULL, space_ids[i],
- page_nos[i],
- zip_size, FALSE);
- }
- }
-
- os_aio_simulated_wake_handler_threads();
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Ibuf merge read-ahead space %lu pages %lu\n",
- (ulong) space_ids[0], (ulong) n_stored);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
-void
-buf_read_recv_pages(
-/*================*/
- ibool sync, /*!< in: TRUE if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in
- bytes, or 0 */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored) /*!< in: number of page numbers
- in the array */
-{
- ib_int64_t tablespace_version;
- ulint count;
- ulint err;
- ulint i;
-
- zip_size = fil_space_get_zip_size(space);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
-
- return;
- }
-
- tablespace_version = fil_space_get_version(space);
-
- for (i = 0; i < n_stored; i++) {
-
- count = 0;
-
- os_aio_print_debug = FALSE;
-
- while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
-
- os_aio_simulated_wake_handler_threads();
- os_thread_sleep(500000);
-
- count++;
-
- if (count > 100) {
- fprintf(stderr,
- "InnoDB: Error: InnoDB has waited for"
- " 50 seconds for pending\n"
- "InnoDB: reads to the buffer pool to"
- " be finished.\n"
- "InnoDB: Number of pending reads %lu,"
- " pending pread calls %lu\n",
- (ulong) buf_pool->n_pend_reads,
- (ulong)os_file_n_pending_preads);
-
- os_aio_print_debug = TRUE;
- }
- }
-
- os_aio_print_debug = FALSE;
-
- if ((i + 1 == n_stored) && sync) {
- buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
- zip_size, TRUE, tablespace_version,
- page_nos[i]);
- } else {
- buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
- | OS_AIO_SIMULATED_WAKE_LATER,
- space, zip_size, TRUE,
- tablespace_version, page_nos[i]);
- }
- }
-
- os_aio_simulated_wake_handler_threads();
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Recovery applies read-ahead pages %lu\n",
- (ulong) n_stored);
- }
-#endif /* UNIV_DEBUG */
-}
diff --git a/storage/innodb_plugin/data/data0data.c b/storage/innodb_plugin/data/data0data.c
deleted file mode 100644
index e3c1f1b4f23..00000000000
--- a/storage/innodb_plugin/data/data0data.c
+++ /dev/null
@@ -1,764 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file data/data0data.c
-SQL data field and tuple
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "data0data.h"
-
-#ifdef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "page0page.h"
-#include "page0zip.h"
-#include "dict0dict.h"
-#include "btr0cur.h"
-
-#include <ctype.h>
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/** Dummy variable to catch access to uninitialized fields. In the
-debug version, dtuple_create() will make all fields of dtuple_t point
-to data_error. */
-UNIV_INTERN byte data_error;
-
-# ifndef UNIV_DEBUG_VALGRIND
-/** this is used to fool the compiler in dtuple_validate */
-UNIV_INTERN ulint data_dummy;
-# endif /* !UNIV_DEBUG_VALGRIND */
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Tests if dfield data length and content is equal to the given.
-@return TRUE if equal */
-UNIV_INTERN
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
- const dfield_t* field, /*!< in: field */
- ulint len, /*!< in: data length or UNIV_SQL_NULL */
- const byte* data) /*!< in: data */
-{
- if (len != dfield_get_len(field)) {
-
- return(FALSE);
- }
-
- if (len == UNIV_SQL_NULL) {
-
- return(TRUE);
- }
-
- if (0 != memcmp(dfield_get_data(field), data, len)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
-int
-dtuple_coll_cmp(
-/*============*/
- const dtuple_t* tuple1, /*!< in: tuple 1 */
- const dtuple_t* tuple2) /*!< in: tuple 2 */
-{
- ulint n_fields;
- ulint i;
-
- ut_ad(tuple1 && tuple2);
- ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(dtuple_check_typed(tuple1));
- ut_ad(dtuple_check_typed(tuple2));
-
- n_fields = dtuple_get_n_fields(tuple1);
-
- if (n_fields != dtuple_get_n_fields(tuple2)) {
-
- return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
- }
-
- for (i = 0; i < n_fields; i++) {
- int cmp;
- const dfield_t* field1 = dtuple_get_nth_field(tuple1, i);
- const dfield_t* field2 = dtuple_get_nth_field(tuple2, i);
-
- cmp = cmp_dfield_dfield(field1, field2);
-
- if (cmp) {
- return(cmp);
- }
- }
-
- return(0);
-}
-
-/*********************************************************************//**
-Sets number of fields used in a tuple. Normally this is set in
-dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
-void
-dtuple_set_n_fields(
-/*================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields) /*!< in: number of fields */
-{
- ut_ad(tuple);
-
- tuple->n_fields = n_fields;
- tuple->n_fields_cmp = n_fields;
-}
-
-/**********************************************************//**
-Checks that a data field is typed.
-@return TRUE if ok */
-static
-ibool
-dfield_check_typed_no_assert(
-/*=========================*/
- const dfield_t* field) /*!< in: data field */
-{
- if (dfield_get_type(field)->mtype > DATA_MYSQL
- || dfield_get_type(field)->mtype < DATA_VARCHAR) {
-
- fprintf(stderr,
- "InnoDB: Error: data field type %lu, len %lu\n",
- (ulong) dfield_get_type(field)->mtype,
- (ulong) dfield_get_len(field));
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/**********************************************************//**
-Checks that a data tuple is typed.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- const dfield_t* field;
- ulint i;
-
- if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
- fprintf(stderr,
- "InnoDB: Error: index entry has %lu fields\n",
- (ulong) dtuple_get_n_fields(tuple));
-dump:
- fputs("InnoDB: Tuple contents: ", stderr);
- dtuple_print(stderr, tuple);
- putc('\n', stderr);
-
- return(FALSE);
- }
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- field = dtuple_get_nth_field(tuple, i);
-
- if (!dfield_check_typed_no_assert(field)) {
- goto dump;
- }
- }
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/**********************************************************//**
-Checks that a data field is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dfield_check_typed(
-/*===============*/
- const dfield_t* field) /*!< in: data field */
-{
- if (dfield_get_type(field)->mtype > DATA_MYSQL
- || dfield_get_type(field)->mtype < DATA_VARCHAR) {
-
- fprintf(stderr,
- "InnoDB: Error: data field type %lu, len %lu\n",
- (ulong) dfield_get_type(field)->mtype,
- (ulong) dfield_get_len(field));
-
- ut_error;
- }
-
- return(TRUE);
-}
-
-/**********************************************************//**
-Checks that a data tuple is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed(
-/*===============*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- const dfield_t* field;
- ulint i;
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- field = dtuple_get_nth_field(tuple, i);
-
- ut_a(dfield_check_typed(field));
- }
-
- return(TRUE);
-}
-
-/**********************************************************//**
-Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_validate(
-/*============*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- const dfield_t* field;
- ulint n_fields;
- ulint len;
- ulint i;
-
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
- n_fields = dtuple_get_n_fields(tuple);
-
- /* We dereference all the data of each field to test
- for memory traps */
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(tuple, i);
- len = dfield_get_len(field);
-
- if (!dfield_is_null(field)) {
-
- const byte* data = dfield_get_data(field);
-#ifndef UNIV_DEBUG_VALGRIND
- ulint j;
-
- for (j = 0; j < len; j++) {
-
- data_dummy += *data; /* fool the compiler not
- to optimize out this
- code */
- data++;
- }
-#endif /* !UNIV_DEBUG_VALGRIND */
-
- UNIV_MEM_ASSERT_RW(data, len);
- }
- }
-
- ut_a(dtuple_check_typed(tuple));
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
-void
-dfield_print(
-/*=========*/
- const dfield_t* dfield) /*!< in: dfield */
-{
- const byte* data;
- ulint len;
- ulint i;
-
- len = dfield_get_len(dfield);
- data = dfield_get_data(dfield);
-
- if (dfield_is_null(dfield)) {
- fputs("NULL", stderr);
-
- return;
- }
-
- switch (dtype_get_mtype(dfield_get_type(dfield))) {
- case DATA_CHAR:
- case DATA_VARCHAR:
- for (i = 0; i < len; i++) {
- int c = *data++;
- putc(isprint(c) ? c : ' ', stderr);
- }
-
- if (dfield_is_ext(dfield)) {
- fputs("(external)", stderr);
- }
- break;
- case DATA_INT:
- ut_a(len == 4); /* only works for 32-bit integers */
- fprintf(stderr, "%d", (int)mach_read_from_4(data));
- break;
- default:
- ut_error;
- }
-}
-
-/*************************************************************//**
-Pretty prints a dfield value according to its data type. Also the hex string
-is printed if a string contains non-printable characters. */
-UNIV_INTERN
-void
-dfield_print_also_hex(
-/*==================*/
- const dfield_t* dfield) /*!< in: dfield */
-{
- const byte* data;
- ulint len;
- ulint prtype;
- ulint i;
- ibool print_also_hex;
-
- len = dfield_get_len(dfield);
- data = dfield_get_data(dfield);
-
- if (dfield_is_null(dfield)) {
- fputs("NULL", stderr);
-
- return;
- }
-
- prtype = dtype_get_prtype(dfield_get_type(dfield));
-
- switch (dtype_get_mtype(dfield_get_type(dfield))) {
- dulint id;
- case DATA_INT:
- switch (len) {
- ulint val;
- case 1:
- val = mach_read_from_1(data);
-
- if (!(prtype & DATA_UNSIGNED)) {
- val &= ~0x80;
- fprintf(stderr, "%ld", (long) val);
- } else {
- fprintf(stderr, "%lu", (ulong) val);
- }
- break;
-
- case 2:
- val = mach_read_from_2(data);
-
- if (!(prtype & DATA_UNSIGNED)) {
- val &= ~0x8000;
- fprintf(stderr, "%ld", (long) val);
- } else {
- fprintf(stderr, "%lu", (ulong) val);
- }
- break;
-
- case 3:
- val = mach_read_from_3(data);
-
- if (!(prtype & DATA_UNSIGNED)) {
- val &= ~0x800000;
- fprintf(stderr, "%ld", (long) val);
- } else {
- fprintf(stderr, "%lu", (ulong) val);
- }
- break;
-
- case 4:
- val = mach_read_from_4(data);
-
- if (!(prtype & DATA_UNSIGNED)) {
- val &= ~0x80000000;
- fprintf(stderr, "%ld", (long) val);
- } else {
- fprintf(stderr, "%lu", (ulong) val);
- }
- break;
-
- case 6:
- id = mach_read_from_6(data);
- fprintf(stderr, "{%lu %lu}",
- ut_dulint_get_high(id),
- ut_dulint_get_low(id));
- break;
-
- case 7:
- id = mach_read_from_7(data);
- fprintf(stderr, "{%lu %lu}",
- ut_dulint_get_high(id),
- ut_dulint_get_low(id));
- break;
- case 8:
- id = mach_read_from_8(data);
- fprintf(stderr, "{%lu %lu}",
- ut_dulint_get_high(id),
- ut_dulint_get_low(id));
- break;
- default:
- goto print_hex;
- }
- break;
-
- case DATA_SYS:
- switch (prtype & DATA_SYS_PRTYPE_MASK) {
- case DATA_TRX_ID:
- id = mach_read_from_6(data);
-
- fprintf(stderr, "trx_id " TRX_ID_FMT,
- TRX_ID_PREP_PRINTF(id));
- break;
-
- case DATA_ROLL_PTR:
- id = mach_read_from_7(data);
-
- fprintf(stderr, "roll_ptr {%lu %lu}",
- ut_dulint_get_high(id), ut_dulint_get_low(id));
- break;
-
- case DATA_ROW_ID:
- id = mach_read_from_6(data);
-
- fprintf(stderr, "row_id {%lu %lu}",
- ut_dulint_get_high(id), ut_dulint_get_low(id));
- break;
-
- default:
- id = mach_dulint_read_compressed(data);
-
- fprintf(stderr, "mix_id {%lu %lu}",
- ut_dulint_get_high(id), ut_dulint_get_low(id));
- }
- break;
-
- case DATA_CHAR:
- case DATA_VARCHAR:
- print_also_hex = FALSE;
-
- for (i = 0; i < len; i++) {
- int c = *data++;
-
- if (!isprint(c)) {
- print_also_hex = TRUE;
-
- fprintf(stderr, "\\x%02x", (unsigned char) c);
- } else {
- putc(c, stderr);
- }
- }
-
- if (dfield_is_ext(dfield)) {
- fputs("(external)", stderr);
- }
-
- if (!print_also_hex) {
- break;
- }
-
- data = dfield_get_data(dfield);
- /* fall through */
-
- case DATA_BINARY:
- default:
-print_hex:
- fputs(" Hex: ",stderr);
-
- for (i = 0; i < len; i++) {
- fprintf(stderr, "%02lx", (ulint) *data++);
- }
-
- if (dfield_is_ext(dfield)) {
- fputs("(external)", stderr);
- }
- }
-}
-
-/*************************************************************//**
-Print a dfield value using ut_print_buf. */
-static
-void
-dfield_print_raw(
-/*=============*/
- FILE* f, /*!< in: output stream */
- const dfield_t* dfield) /*!< in: dfield */
-{
- ulint len = dfield_get_len(dfield);
- if (!dfield_is_null(dfield)) {
- ulint print_len = ut_min(len, 1000);
- ut_print_buf(f, dfield_get_data(dfield), print_len);
- if (len != print_len) {
- fprintf(f, "(total %lu bytes%s)",
- (ulong) len,
- dfield_is_ext(dfield) ? ", external" : "");
- }
- } else {
- fputs(" SQL NULL", f);
- }
-}
-
-/**********************************************************//**
-The following function prints the contents of a tuple. */
-UNIV_INTERN
-void
-dtuple_print(
-/*=========*/
- FILE* f, /*!< in: output stream */
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ulint n_fields;
- ulint i;
-
- n_fields = dtuple_get_n_fields(tuple);
-
- fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
-
- for (i = 0; i < n_fields; i++) {
- fprintf(f, " %lu:", (ulong) i);
-
- dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
-
- putc(';', f);
- putc('\n', f);
- }
-
- ut_ad(dtuple_validate(tuple));
-}
-
-/**************************************************************//**
-Moves parts of long fields in entry to the big record vector so that
-the size of tuple drops below the maximum record size allowed in the
-database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index.
-@return own: created big record vector, NULL if we are not able to
-shorten the entry enough, i.e., if there are too many fixed-length or
-short fields in entry or the index is clustered */
-UNIV_INTERN
-big_rec_t*
-dtuple_convert_big_rec(
-/*===================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in/out: index entry */
- ulint* n_ext) /*!< in/out: number of
- externally stored columns */
-{
- mem_heap_t* heap;
- big_rec_t* vector;
- dfield_t* dfield;
- dict_field_t* ifield;
- ulint size;
- ulint n_fields;
- ulint local_len;
- ulint local_prefix_len;
-
- if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
- return(NULL);
- }
-
- if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
- /* up to MySQL 5.1: store a 768-byte prefix locally */
- local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
- } else {
- /* new-format table: do not store any BLOB prefix locally */
- local_len = BTR_EXTERN_FIELD_REF_SIZE;
- }
-
- ut_a(dtuple_check_typed_no_assert(entry));
-
- size = rec_get_converted_size(index, entry, *n_ext);
-
- if (UNIV_UNLIKELY(size > 1000000000)) {
- fprintf(stderr,
- "InnoDB: Warning: tuple size very big: %lu\n",
- (ulong) size);
- fputs("InnoDB: Tuple contents: ", stderr);
- dtuple_print(stderr, entry);
- putc('\n', stderr);
- }
-
- heap = mem_heap_create(size + dtuple_get_n_fields(entry)
- * sizeof(big_rec_field_t) + 1000);
-
- vector = mem_heap_alloc(heap, sizeof(big_rec_t));
-
- vector->heap = heap;
- vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
- * sizeof(big_rec_field_t));
-
- /* Decide which fields to shorten: the algorithm is to look for
- a variable-length field that yields the biggest savings when
- stored externally */
-
- n_fields = 0;
-
- while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
- *n_ext),
- dict_table_is_comp(index->table),
- dict_index_get_n_fields(index),
- dict_table_zip_size(index->table))) {
- ulint i;
- ulint longest = 0;
- ulint longest_i = ULINT_MAX;
- byte* data;
- big_rec_field_t* b;
-
- for (i = dict_index_get_n_unique_in_tree(index);
- i < dtuple_get_n_fields(entry); i++) {
- ulint savings;
-
- dfield = dtuple_get_nth_field(entry, i);
- ifield = dict_index_get_nth_field(index, i);
-
- /* Skip fixed-length, NULL, externally stored,
- or short columns */
-
- if (ifield->fixed_len
- || dfield_is_null(dfield)
- || dfield_is_ext(dfield)
- || dfield_get_len(dfield) <= local_len
- || dfield_get_len(dfield)
- <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
- goto skip_field;
- }
-
- savings = dfield_get_len(dfield) - local_len;
-
- /* Check that there would be savings */
- if (longest >= savings) {
- goto skip_field;
- }
-
- longest_i = i;
- longest = savings;
-
-skip_field:
- continue;
- }
-
- if (!longest) {
- /* Cannot shorten more */
-
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /* Move data from field longest_i to big rec vector.
-
- We store the first bytes locally to the record. Then
- we can calculate all ordering fields in all indexes
- from locally stored data. */
-
- dfield = dtuple_get_nth_field(entry, longest_i);
- ifield = dict_index_get_nth_field(index, longest_i);
- local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b = &vector->fields[n_fields];
- b->field_no = longest_i;
- b->len = dfield_get_len(dfield) - local_prefix_len;
- b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
-
- /* Allocate the locally stored part of the column. */
- data = mem_heap_alloc(heap, local_len);
-
- /* Copy the local prefix. */
- memcpy(data, dfield_get_data(dfield), local_prefix_len);
- /* Clear the extern field reference (BLOB pointer). */
- memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
-#if 0
- /* The following would fail the Valgrind checks in
- page_cur_insert_rec_low() and page_cur_insert_rec_zip().
- The BLOB pointers in the record will be initialized after
- the record and the BLOBs have been written. */
- UNIV_MEM_ALLOC(data + local_prefix_len,
- BTR_EXTERN_FIELD_REF_SIZE);
-#endif
-
- dfield_set_data(dfield, data, local_len);
- dfield_set_ext(dfield);
-
- n_fields++;
- (*n_ext)++;
- ut_ad(n_fields < dtuple_get_n_fields(entry));
- }
-
- vector->n_fields = n_fields;
- return(vector);
-}
-
-/**************************************************************//**
-Puts back to entry the data stored in vector. Note that to ensure the
-fields in entry can accommodate the data, vector must have been created
-from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
-void
-dtuple_convert_back_big_rec(
-/*========================*/
- dict_index_t* index __attribute__((unused)), /*!< in: index */
- dtuple_t* entry, /*!< in: entry whose data was put to vector */
- big_rec_t* vector) /*!< in, own: big rec vector; it is
- freed in this function */
-{
- big_rec_field_t* b = vector->fields;
- const big_rec_field_t* const end = b + vector->n_fields;
-
- for (; b < end; b++) {
- dfield_t* dfield;
- ulint local_len;
-
- dfield = dtuple_get_nth_field(entry, b->field_no);
- local_len = dfield_get_len(dfield);
-
- ut_ad(dfield_is_ext(dfield));
- ut_ad(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN);
-
- dfield_set_data(dfield,
- (char*) b->data - local_len,
- b->len + local_len);
- }
-
- mem_heap_free(vector->heap);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/data/data0type.c b/storage/innodb_plugin/data/data0type.c
deleted file mode 100644
index e834fd2ec55..00000000000
--- a/storage/innodb_plugin/data/data0type.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file data/data0type.c
-Data types
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#include "data0type.h"
-
-#ifdef UNIV_NONINL
-#include "data0type.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-
-/* At the database startup we store the default-charset collation number of
-this MySQL installation to this global variable. If we have < 4.1.2 format
-column definitions, or records in the insert buffer, we use this
-charset-collation code for them. */
-
-UNIV_INTERN ulint data_mysql_default_charset_coll;
-
-/*********************************************************************//**
-Determine how many bytes the first n characters of the given string occupy.
-If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy.
-@return length of the prefix, in bytes */
-UNIV_INTERN
-ulint
-dtype_get_at_most_n_mbchars(
-/*========================*/
- ulint prtype, /*!< in: precise type */
- ulint mbminlen, /*!< in: minimum length of a
- multi-byte character */
- ulint mbmaxlen, /*!< in: maximum length of a
- multi-byte character */
- ulint prefix_len, /*!< in: length of the requested
- prefix, in characters, multiplied by
- dtype_get_mbmaxlen(dtype) */
- ulint data_len, /*!< in: length of str (in bytes) */
- const char* str) /*!< in: the string whose prefix
- length is being determined */
-{
- ut_a(data_len != UNIV_SQL_NULL);
- ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
-
- if (mbminlen != mbmaxlen) {
- ut_a(!(prefix_len % mbmaxlen));
- return(innobase_get_at_most_n_mbchars(
- dtype_get_charset_coll(prtype),
- prefix_len, data_len, str));
- }
-
- if (prefix_len < data_len) {
-
- return(prefix_len);
-
- }
-
- return(data_len);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Checks if a data main type is a string type. Also a BLOB is considered a
-string type.
-@return TRUE if string type */
-UNIV_INTERN
-ibool
-dtype_is_string_type(
-/*=================*/
- ulint mtype) /*!< in: InnoDB main data type code: DATA_CHAR, ... */
-{
- if (mtype <= DATA_BLOB
- || mtype == DATA_MYSQL
- || mtype == DATA_VARMYSQL) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Checks if a type is a binary string type. Note that for tables created with
-< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE.
-@return TRUE if binary string type */
-UNIV_INTERN
-ibool
-dtype_is_binary_string_type(
-/*========================*/
- ulint mtype, /*!< in: main data type */
- ulint prtype) /*!< in: precise type */
-{
- if ((mtype == DATA_FIXBINARY)
- || (mtype == DATA_BINARY)
- || (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Checks if a type is a non-binary string type. That is, dtype_is_string_type is
-TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
-with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE.
-@return TRUE if non-binary string type */
-UNIV_INTERN
-ibool
-dtype_is_non_binary_string_type(
-/*============================*/
- ulint mtype, /*!< in: main data type */
- ulint prtype) /*!< in: precise type */
-{
- if (dtype_is_string_type(mtype) == TRUE
- && dtype_is_binary_string_type(mtype, prtype) == FALSE) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code.
-@return precise type, including the charset-collation code */
-UNIV_INTERN
-ulint
-dtype_form_prtype(
-/*==============*/
- ulint old_prtype, /*!< in: the MySQL type code and the flags
- DATA_BINARY_TYPE etc. */
- ulint charset_coll) /*!< in: MySQL charset-collation code */
-{
- ut_a(old_prtype < 256 * 256);
- ut_a(charset_coll < 256);
-
- return(old_prtype + (charset_coll << 16));
-}
-
-/*********************************************************************//**
-Validates a data type structure.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtype_validate(
-/*===========*/
- const dtype_t* type) /*!< in: type struct to validate */
-{
- ut_a(type);
- ut_a(type->mtype >= DATA_VARCHAR);
- ut_a(type->mtype <= DATA_MYSQL);
-
- if (type->mtype == DATA_SYS) {
- ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
- }
-
-#ifndef UNIV_HOTBACKUP
- ut_a(type->mbminlen <= type->mbmaxlen);
-#endif /* !UNIV_HOTBACKUP */
-
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Prints a data type structure. */
-UNIV_INTERN
-void
-dtype_print(
-/*========*/
- const dtype_t* type) /*!< in: type */
-{
- ulint mtype;
- ulint prtype;
- ulint len;
-
- ut_a(type);
-
- mtype = type->mtype;
- prtype = type->prtype;
-
- switch (mtype) {
- case DATA_VARCHAR:
- fputs("DATA_VARCHAR", stderr);
- break;
-
- case DATA_CHAR:
- fputs("DATA_CHAR", stderr);
- break;
-
- case DATA_BINARY:
- fputs("DATA_BINARY", stderr);
- break;
-
- case DATA_FIXBINARY:
- fputs("DATA_FIXBINARY", stderr);
- break;
-
- case DATA_BLOB:
- fputs("DATA_BLOB", stderr);
- break;
-
- case DATA_INT:
- fputs("DATA_INT", stderr);
- break;
-
- case DATA_MYSQL:
- fputs("DATA_MYSQL", stderr);
- break;
-
- case DATA_SYS:
- fputs("DATA_SYS", stderr);
- break;
-
- case DATA_FLOAT:
- fputs("DATA_FLOAT", stderr);
- break;
-
- case DATA_DOUBLE:
- fputs("DATA_DOUBLE", stderr);
- break;
-
- case DATA_DECIMAL:
- fputs("DATA_DECIMAL", stderr);
- break;
-
- case DATA_VARMYSQL:
- fputs("DATA_VARMYSQL", stderr);
- break;
-
- default:
- fprintf(stderr, "type %lu", (ulong) mtype);
- break;
- }
-
- len = type->len;
-
- if ((type->mtype == DATA_SYS)
- || (type->mtype == DATA_VARCHAR)
- || (type->mtype == DATA_CHAR)) {
- putc(' ', stderr);
- if (prtype == DATA_ROW_ID) {
- fputs("DATA_ROW_ID", stderr);
- len = DATA_ROW_ID_LEN;
- } else if (prtype == DATA_ROLL_PTR) {
- fputs("DATA_ROLL_PTR", stderr);
- len = DATA_ROLL_PTR_LEN;
- } else if (prtype == DATA_TRX_ID) {
- fputs("DATA_TRX_ID", stderr);
- len = DATA_TRX_ID_LEN;
- } else if (prtype == DATA_ENGLISH) {
- fputs("DATA_ENGLISH", stderr);
- } else {
- fprintf(stderr, "prtype %lu", (ulong) prtype);
- }
- } else {
- if (prtype & DATA_UNSIGNED) {
- fputs(" DATA_UNSIGNED", stderr);
- }
-
- if (prtype & DATA_BINARY_TYPE) {
- fputs(" DATA_BINARY_TYPE", stderr);
- }
-
- if (prtype & DATA_NOT_NULL) {
- fputs(" DATA_NOT_NULL", stderr);
- }
- }
-
- fprintf(stderr, " len %lu", (ulong) len);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/dict/dict0boot.c b/storage/innodb_plugin/dict/dict0boot.c
deleted file mode 100644
index e55de30481b..00000000000
--- a/storage/innodb_plugin/dict/dict0boot.c
+++ /dev/null
@@ -1,462 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dict/dict0boot.c
-Data dictionary creation and booting
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0boot.h"
-
-#ifdef UNIV_NONINL
-#include "dict0boot.ic"
-#endif
-
-#include "dict0crea.h"
-#include "btr0btr.h"
-#include "dict0load.h"
-#include "dict0load.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-#include "ibuf0ibuf.h"
-#include "buf0flu.h"
-#include "log0recv.h"
-#include "os0file.h"
-
-/**********************************************************************//**
-Gets a pointer to the dictionary header and x-latches its page.
-@return pointer to the dictionary header, page x-latched */
-UNIV_INTERN
-dict_hdr_t*
-dict_hdr_get(
-/*=========*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- dict_hdr_t* header;
-
- block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
- RW_X_LATCH, mtr);
- header = DICT_HDR + buf_block_get_frame(block);
-
- buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
-
- return(header);
-}
-
-/**********************************************************************//**
-Returns a new table, index, or tree id.
-@return the new id */
-UNIV_INTERN
-dulint
-dict_hdr_get_new_id(
-/*================*/
- ulint type) /*!< in: DICT_HDR_ROW_ID, ... */
-{
- dict_hdr_t* dict_hdr;
- dulint id;
- mtr_t mtr;
-
- ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID));
-
- mtr_start(&mtr);
-
- dict_hdr = dict_hdr_get(&mtr);
-
- id = mtr_read_dulint(dict_hdr + type, &mtr);
- id = ut_dulint_add(id, 1);
-
- mlog_write_dulint(dict_hdr + type, id, &mtr);
-
- mtr_commit(&mtr);
-
- return(id);
-}
-
-/**********************************************************************//**
-Writes the current value of the row id counter to the dictionary header file
-page. */
-UNIV_INTERN
-void
-dict_hdr_flush_row_id(void)
-/*=======================*/
-{
- dict_hdr_t* dict_hdr;
- dulint id;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- id = dict_sys->row_id;
-
- mtr_start(&mtr);
-
- dict_hdr = dict_hdr_get(&mtr);
-
- mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);
-
- mtr_commit(&mtr);
-}
-
-/*****************************************************************//**
-Creates the file page for the dictionary header. This function is
-called only at the database creation.
-@return TRUE if succeed */
-static
-ibool
-dict_hdr_create(
-/*============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- dict_hdr_t* dict_header;
- ulint root_page_no;
-
- ut_ad(mtr);
-
- /* Create the dictionary header file block in a new, allocated file
- segment in the system tablespace */
- block = fseg_create(DICT_HDR_SPACE, 0,
- DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
-
- ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
-
- dict_header = dict_hdr_get(mtr);
-
- /* Start counting row, table, index, and tree ids from
- DICT_HDR_FIRST_ID */
- mlog_write_dulint(dict_header + DICT_HDR_ROW_ID,
- ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
-
- mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID,
- ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
-
- mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID,
- ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
-
- /* Obsolete, but we must initialize it to 0 anyway. */
- mlog_write_dulint(dict_header + DICT_HDR_MIX_ID,
- ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
-
- /* Create the B-tree roots for the clustered indexes of the basic
- system tables */
-
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_TABLES_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
- DICT_TABLE_IDS_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
-
- return(TRUE);
-}
-
-/*****************************************************************//**
-Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
-UNIV_INTERN
-void
-dict_boot(void)
-/*===========*/
-{
- dict_table_t* table;
- dict_index_t* index;
- dict_hdr_t* dict_hdr;
- mem_heap_t* heap;
- mtr_t mtr;
- ulint error;
-
- mtr_start(&mtr);
-
- /* Create the hash tables etc. */
- dict_init();
-
- heap = mem_heap_create(450);
-
- mutex_enter(&(dict_sys->mutex));
-
- /* Get the dictionary header */
- dict_hdr = dict_hdr_get(&mtr);
-
- /* Because we only write new row ids to disk-based data structure
- (dictionary header) when it is divisible by
- DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
- the latest value of the row id counter. Therefore we advance
- the counter at the database startup to avoid overlapping values.
- Note that when a user after database startup first time asks for
- a new row id, then because the counter is now divisible by
- ..._MARGIN, it will immediately be updated to the disk-based
- header. */
-
- dict_sys->row_id = ut_dulint_add(
- ut_dulint_align_up(mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
- &mtr),
- DICT_HDR_ROW_ID_WRITE_MARGIN),
- DICT_HDR_ROW_ID_WRITE_MARGIN);
-
- /* Insert into the dictionary cache the descriptions of the basic
- system tables */
- /*-------------------------*/
- table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);
-
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
- /* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
- dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
- /* TYPE is either DICT_TABLE_ORDINARY, or (TYPE & DICT_TF_COMPACT)
- and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
- dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
-
- table->id = DICT_TABLES_ID;
-
- dict_table_add_to_cache(table, heap);
- dict_sys->sys_tables = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 1);
-
- dict_mem_index_add_field(index, "NAME", 0);
-
- index->id = DICT_TABLES_ID;
-
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_TABLES,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- /*-------------------------*/
- index = dict_mem_index_create("SYS_TABLES", "ID_IND",
- DICT_HDR_SPACE, DICT_UNIQUE, 1);
- dict_mem_index_add_field(index, "ID", 0);
-
- index->id = DICT_TABLE_IDS_ID;
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_TABLE_IDS,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
-
- dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);
-
- table->id = DICT_COLUMNS_ID;
-
- dict_table_add_to_cache(table, heap);
- dict_sys->sys_columns = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "TABLE_ID", 0);
- dict_mem_index_add_field(index, "POS", 0);
-
- index->id = DICT_COLUMNS_ID;
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_COLUMNS,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
-
- dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
-
- /* The '+ 2' below comes from the 2 system fields */
-#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
-#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
-#endif
-#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
-#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
-#endif
-#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
-#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
-#endif
-
- table->id = DICT_INDEXES_ID;
- dict_table_add_to_cache(table, heap);
- dict_sys->sys_indexes = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "TABLE_ID", 0);
- dict_mem_index_add_field(index, "ID", 0);
-
- index->id = DICT_INDEXES_ID;
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_INDEXES,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
-
- dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
-
- table->id = DICT_FIELDS_ID;
- dict_table_add_to_cache(table, heap);
- dict_sys->sys_fields = table;
- mem_heap_free(heap);
-
- index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "INDEX_ID", 0);
- dict_mem_index_add_field(index, "POS", 0);
-
- index->id = DICT_FIELDS_ID;
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_FIELDS,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- mtr_commit(&mtr);
- /*-------------------------*/
-
- /* Initialize the insert buffer table and index for each tablespace */
-
- ibuf_init_at_db_start();
-
- /* Load definitions of other indexes on system tables */
-
- dict_load_sys_table(dict_sys->sys_tables);
- dict_load_sys_table(dict_sys->sys_columns);
- dict_load_sys_table(dict_sys->sys_indexes);
- dict_load_sys_table(dict_sys->sys_fields);
-
- mutex_exit(&(dict_sys->mutex));
-}
-
-/*****************************************************************//**
-Inserts the basic system table data into themselves in the database
-creation. */
-static
-void
-dict_insert_initial_data(void)
-/*==========================*/
-{
- /* Does nothing yet */
-}
-
-/*****************************************************************//**
-Creates and initializes the data dictionary at the database creation. */
-UNIV_INTERN
-void
-dict_create(void)
-/*=============*/
-{
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- dict_hdr_create(&mtr);
-
- mtr_commit(&mtr);
-
- dict_boot();
-
- dict_insert_initial_data();
-}
diff --git a/storage/innodb_plugin/dict/dict0crea.c b/storage/innodb_plugin/dict/dict0crea.c
deleted file mode 100644
index 96a9bd8152e..00000000000
--- a/storage/innodb_plugin/dict/dict0crea.c
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dict/dict0crea.c
-Database object creation
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0crea.h"
-
-#ifdef UNIV_NONINL
-#include "dict0crea.ic"
-#endif
-
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "page0page.h"
-#include "mach0data.h"
-#include "dict0boot.h"
-#include "dict0dict.h"
-#include "que0que.h"
-#include "row0ins.h"
-#include "row0mysql.h"
-#include "pars0pars.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
-#include "ut0vec.h"
-
-/*****************************************************************//**
-Based on a table object, this function builds the entry to be inserted
-in the SYS_TABLES system table.
-@return the tuple which should be inserted */
-static
-dtuple_t*
-dict_create_sys_tables_tuple(
-/*=========================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dict_table_t* sys_tables;
- dtuple_t* entry;
- dfield_t* dfield;
- byte* ptr;
-
- ut_ad(table && heap);
-
- sys_tables = dict_sys->sys_tables;
-
- entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
-
- dict_table_copy_types(entry, sys_tables);
-
- /* 0: NAME -----------------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
-
- dfield_set_data(dfield, table->name, ut_strlen(table->name));
- /* 3: ID -------------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 4: N_COLS ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
-
-#if DICT_TF_COMPACT != 1
-#error
-#endif
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, table->n_def
- | ((table->flags & DICT_TF_COMPACT) << 31));
- dfield_set_data(dfield, ptr, 4);
- /* 5: TYPE -----------------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
-
- ptr = mem_heap_alloc(heap, 4);
- if (table->flags & ~DICT_TF_COMPACT) {
- ut_a(table->flags & DICT_TF_COMPACT);
- ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
- ut_a((table->flags & DICT_TF_ZSSIZE_MASK)
- <= (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT));
- ut_a(!(table->flags & (~0 << DICT_TF_BITS)));
- mach_write_to_4(ptr, table->flags);
- } else {
- mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
- }
-
- dfield_set_data(dfield, ptr, 4);
- /* 6: MIX_ID (obsolete) ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
-
- ptr = mem_heap_zalloc(heap, 8);
-
- dfield_set_data(dfield, ptr, 8);
- /* 7: MIX_LEN (obsolete) --------------------------*/
-
- dfield = dtuple_get_nth_field(entry, 5);
-
- ptr = mem_heap_zalloc(heap, 4);
-
- dfield_set_data(dfield, ptr, 4);
- /* 8: CLUSTER_NAME ---------------------*/
- dfield = dtuple_get_nth_field(entry, 6);
- dfield_set_null(dfield); /* not supported */
-
- /* 9: SPACE ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 7);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, table->space);
-
- dfield_set_data(dfield, ptr, 4);
- /*----------------------------------*/
-
- return(entry);
-}
-
-/*****************************************************************//**
-Based on a table object, this function builds the entry to be inserted
-in the SYS_COLUMNS system table.
-@return the tuple which should be inserted */
-static
-dtuple_t*
-dict_create_sys_columns_tuple(
-/*==========================*/
- dict_table_t* table, /*!< in: table */
- ulint i, /*!< in: column number */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dict_table_t* sys_columns;
- dtuple_t* entry;
- const dict_col_t* column;
- dfield_t* dfield;
- byte* ptr;
- const char* col_name;
-
- ut_ad(table && heap);
-
- column = dict_table_get_nth_col(table, i);
-
- sys_columns = dict_sys->sys_columns;
-
- entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
-
- dict_table_copy_types(entry, sys_columns);
-
- /* 0: TABLE_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 1: POS ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, i);
-
- dfield_set_data(dfield, ptr, 4);
- /* 4: NAME ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
-
- col_name = dict_table_get_col_name(table, i);
- dfield_set_data(dfield, col_name, ut_strlen(col_name));
- /* 5: MTYPE --------------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, column->mtype);
-
- dfield_set_data(dfield, ptr, 4);
- /* 6: PRTYPE -------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, column->prtype);
-
- dfield_set_data(dfield, ptr, 4);
- /* 7: LEN ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 5);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, column->len);
-
- dfield_set_data(dfield, ptr, 4);
- /* 8: PREC ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 6);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, 0/* unused */);
-
- dfield_set_data(dfield, ptr, 4);
- /*---------------------------------*/
-
- return(entry);
-}
-
-/***************************************************************//**
-Builds a table definition to insert.
-@return DB_SUCCESS or error code */
-static
-ulint
-dict_build_table_def_step(
-/*======================*/
- que_thr_t* thr, /*!< in: query thread */
- tab_node_t* node) /*!< in: table create node */
-{
- dict_table_t* table;
- dtuple_t* row;
- ulint error;
- const char* path_or_name;
- ibool is_path;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = node->table;
-
- table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
-
- thr_get_trx(thr)->table_id = table->id;
-
- if (srv_file_per_table) {
- /* We create a new single-table tablespace for the table.
- We initially let it be 4 pages:
- - page 0 is the fsp header and an extent descriptor page,
- - page 1 is an ibuf bitmap page,
- - page 2 is the first inode page,
- - page 3 will contain the root of the clustered index of the
- table we create here. */
-
- ulint space = 0; /* reset to zero for the call below */
-
- if (table->dir_path_of_temp_table) {
- /* We place tables created with CREATE TEMPORARY
- TABLE in the tmp dir of mysqld server */
-
- path_or_name = table->dir_path_of_temp_table;
- is_path = TRUE;
- } else {
- path_or_name = table->name;
- is_path = FALSE;
- }
-
- ut_ad(dict_table_get_format(table) <= DICT_TF_FORMAT_MAX);
- ut_ad(!dict_table_zip_size(table)
- || dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
-
- error = fil_create_new_single_table_tablespace(
- &space, path_or_name, is_path,
- table->flags == DICT_TF_COMPACT ? 0 : table->flags,
- FIL_IBD_FILE_INITIAL_SIZE);
- table->space = (unsigned int) space;
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
-
- mtr_start(&mtr);
-
- fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
-
- mtr_commit(&mtr);
- } else {
- /* Create in the system tablespace: disallow new features */
- table->flags &= DICT_TF_COMPACT;
- }
-
- row = dict_create_sys_tables_tuple(table, node->heap);
-
- ins_node_set_new_row(node->tab_def, row);
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************//**
-Builds a column definition to insert.
-@return DB_SUCCESS */
-static
-ulint
-dict_build_col_def_step(
-/*====================*/
- tab_node_t* node) /*!< in: table create node */
-{
- dtuple_t* row;
-
- row = dict_create_sys_columns_tuple(node->table, node->col_no,
- node->heap);
- ins_node_set_new_row(node->col_def, row);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Based on an index object, this function builds the entry to be inserted
-in the SYS_INDEXES system table.
-@return the tuple which should be inserted */
-static
-dtuple_t*
-dict_create_sys_indexes_tuple(
-/*==========================*/
- dict_index_t* index, /*!< in: index */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dict_table_t* sys_indexes;
- dict_table_t* table;
- dtuple_t* entry;
- dfield_t* dfield;
- byte* ptr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(index && heap);
-
- sys_indexes = dict_sys->sys_indexes;
-
- table = dict_table_get_low(index->table_name);
-
- entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
-
- dict_table_copy_types(entry, sys_indexes);
-
- /* 0: TABLE_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 1: ID ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, index->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 4: NAME --------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
-
- dfield_set_data(dfield, index->name, ut_strlen(index->name));
- /* 5: N_FIELDS ----------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, index->n_fields);
-
- dfield_set_data(dfield, ptr, 4);
- /* 6: TYPE --------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, index->type);
-
- dfield_set_data(dfield, ptr, 4);
- /* 7: SPACE --------------------------*/
-
-#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 7
-#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 7"
-#endif
-
- dfield = dtuple_get_nth_field(entry, 5);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, index->space);
-
- dfield_set_data(dfield, ptr, 4);
- /* 8: PAGE_NO --------------------------*/
-
-#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 8
-#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 8"
-#endif
-
- dfield = dtuple_get_nth_field(entry, 6);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, FIL_NULL);
-
- dfield_set_data(dfield, ptr, 4);
- /*--------------------------------*/
-
- return(entry);
-}
-
-/*****************************************************************//**
-Based on an index object, this function builds the entry to be inserted
-in the SYS_FIELDS system table.
-@return the tuple which should be inserted */
-static
-dtuple_t*
-dict_create_sys_fields_tuple(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- ulint i, /*!< in: field number */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dict_table_t* sys_fields;
- dtuple_t* entry;
- dict_field_t* field;
- dfield_t* dfield;
- byte* ptr;
- ibool index_contains_column_prefix_field = FALSE;
- ulint j;
-
- ut_ad(index && heap);
-
- for (j = 0; j < index->n_fields; j++) {
- if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
- index_contains_column_prefix_field = TRUE;
- break;
- }
- }
-
- field = dict_index_get_nth_field(index, i);
-
- sys_fields = dict_sys->sys_fields;
-
- entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
-
- dict_table_copy_types(entry, sys_fields);
-
- /* 0: INDEX_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, index->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 1: POS + PREFIX LENGTH ----------------------------*/
-
- dfield = dtuple_get_nth_field(entry, 1);
-
- ptr = mem_heap_alloc(heap, 4);
-
- if (index_contains_column_prefix_field) {
- /* If there are column prefix fields in the index, then
- we store the number of the field to the 2 HIGH bytes
- and the prefix length to the 2 low bytes, */
-
- mach_write_to_4(ptr, (i << 16) + field->prefix_len);
- } else {
- /* Else we store the number of the field to the 2 LOW bytes.
- This is to keep the storage format compatible with
- InnoDB versions < 4.0.14. */
-
- mach_write_to_4(ptr, i);
- }
-
- dfield_set_data(dfield, ptr, 4);
- /* 4: COL_NAME -------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
-
- dfield_set_data(dfield, field->name,
- ut_strlen(field->name));
- /*---------------------------------*/
-
- return(entry);
-}
-
-/*****************************************************************//**
-Creates the tuple with which the index entry is searched for writing the index
-tree root page number, if such a tree is created.
-@return the tuple for search */
-static
-dtuple_t*
-dict_create_search_tuple(
-/*=====================*/
- const dtuple_t* tuple, /*!< in: the tuple inserted in the SYS_INDEXES
- table */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dtuple_t* search_tuple;
- const dfield_t* field1;
- dfield_t* field2;
-
- ut_ad(tuple && heap);
-
- search_tuple = dtuple_create(heap, 2);
-
- field1 = dtuple_get_nth_field(tuple, 0);
- field2 = dtuple_get_nth_field(search_tuple, 0);
-
- dfield_copy(field2, field1);
-
- field1 = dtuple_get_nth_field(tuple, 1);
- field2 = dtuple_get_nth_field(search_tuple, 1);
-
- dfield_copy(field2, field1);
-
- ut_ad(dtuple_validate(search_tuple));
-
- return(search_tuple);
-}
-
-/***************************************************************//**
-Builds an index definition row to insert.
-@return DB_SUCCESS or error code */
-static
-ulint
-dict_build_index_def_step(
-/*======================*/
- que_thr_t* thr, /*!< in: query thread */
- ind_node_t* node) /*!< in: index create node */
-{
- dict_table_t* table;
- dict_index_t* index;
- dtuple_t* row;
- trx_t* trx;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- index = node->index;
-
- table = dict_table_get_low(index->table_name);
-
- if (table == NULL) {
- return(DB_TABLE_NOT_FOUND);
- }
-
- trx->table_id = table->id;
-
- node->table = table;
-
- ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
- || dict_index_is_clust(index));
-
- index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
-
- /* Inherit the space id from the table; we store all indexes of a
- table in the same tablespace */
-
- index->space = table->space;
- node->page_no = FIL_NULL;
- row = dict_create_sys_indexes_tuple(index, node->heap);
- node->ind_row = row;
-
- ins_node_set_new_row(node->ind_def, row);
-
- /* Note that the index was created by this transaction. */
- index->trx_id = (ib_uint64_t) ut_conv_dulint_to_longlong(trx->id);
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************//**
-Builds a field definition row to insert.
-@return DB_SUCCESS */
-static
-ulint
-dict_build_field_def_step(
-/*======================*/
- ind_node_t* node) /*!< in: index create node */
-{
- dict_index_t* index;
- dtuple_t* row;
-
- index = node->index;
-
- row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
-
- ins_node_set_new_row(node->field_def, row);
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************//**
-Creates an index tree for the index if it is not a member of a cluster.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-dict_create_index_tree_step(
-/*========================*/
- ind_node_t* node) /*!< in: index create node */
-{
- dict_index_t* index;
- dict_table_t* sys_indexes;
- dict_table_t* table;
- dtuple_t* search_tuple;
- btr_pcur_t pcur;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- index = node->index;
- table = node->table;
-
- sys_indexes = dict_sys->sys_indexes;
-
- /* Run a mini-transaction in which the index tree is allocated for
- the index and its root address is written to the index entry in
- sys_indexes */
-
- mtr_start(&mtr);
-
- search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
-
- btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
- search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
- &pcur, &mtr);
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- node->page_no = btr_create(index->type, index->space,
- dict_table_zip_size(index->table),
- index->id, index, &mtr);
- /* printf("Created a new index tree in space %lu root page %lu\n",
- index->space, index->page_no); */
-
- page_rec_write_index_page_no(btr_pcur_get_rec(&pcur),
- DICT_SYS_INDEXES_PAGE_NO_FIELD,
- node->page_no, &mtr);
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- if (node->page_no == FIL_NULL) {
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
-void
-dict_drop_index_tree(
-/*=================*/
- rec_t* rec, /*!< in/out: record in the clustered index
- of SYS_INDEXES table */
- mtr_t* mtr) /*!< in: mtr having the latch on the record page */
-{
- ulint root_page_no;
- ulint space;
- ulint zip_size;
- const byte* ptr;
- ulint len;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
- ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);
-
- ut_ad(len == 4);
-
- root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (root_page_no == FIL_NULL) {
- /* The tree has already been freed */
-
- return;
- }
-
- ptr = rec_get_nth_field_old(rec,
- DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);
-
- ut_ad(len == 4);
-
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
- zip_size = fil_space_get_zip_size(space);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
-
- return;
- }
-
- /* We free all the pages but the root page first; this operation
- may span several mini-transactions */
-
- btr_free_but_not_root(space, zip_size, root_page_no);
-
- /* Then we free the root page in the same mini-transaction where
- we write FIL_NULL to the appropriate field in the SYS_INDEXES
- record: this mini-transaction marks the B-tree totally freed */
-
- /* printf("Dropping index tree in space %lu root page %lu\n", space,
- root_page_no); */
- btr_free_root(space, zip_size, root_page_no, mtr);
-
- page_rec_write_index_page_no(rec,
- DICT_SYS_INDEXES_PAGE_NO_FIELD,
- FIL_NULL, mtr);
-}
-
-/*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
-@return new root page number, or FIL_NULL on failure */
-UNIV_INTERN
-ulint
-dict_truncate_index_tree(
-/*=====================*/
- dict_table_t* table, /*!< in: the table the index belongs to */
- ulint space, /*!< in: 0=truncate,
- nonzero=create the index tree in the
- given tablespace */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
- record in the clustered index of
- SYS_INDEXES table. The cursor may be
- repositioned in this call. */
- mtr_t* mtr) /*!< in: mtr having the latch
- on the record page. The mtr may be
- committed and restarted in this call. */
-{
- ulint root_page_no;
- ibool drop = !space;
- ulint zip_size;
- ulint type;
- dulint index_id;
- rec_t* rec;
- const byte* ptr;
- ulint len;
- dict_index_t* index;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
- rec = btr_pcur_get_rec(pcur);
- ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);
-
- ut_ad(len == 4);
-
- root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (drop && root_page_no == FIL_NULL) {
- /* The tree has been freed. */
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Trying to TRUNCATE"
- " a missing index of table %s!\n", table->name);
- drop = FALSE;
- }
-
- ptr = rec_get_nth_field_old(rec,
- DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);
-
- ut_ad(len == 4);
-
- if (drop) {
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
- }
-
- zip_size = fil_space_get_zip_size(space);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Trying to TRUNCATE"
- " a missing .ibd file of table %s!\n", table->name);
- return(FIL_NULL);
- }
-
- ptr = rec_get_nth_field_old(rec,
- DICT_SYS_INDEXES_TYPE_FIELD, &len);
- ut_ad(len == 4);
- type = mach_read_from_4(ptr);
-
- ptr = rec_get_nth_field_old(rec, 1, &len);
- ut_ad(len == 8);
- index_id = mach_read_from_8(ptr);
-
- if (!drop) {
-
- goto create;
- }
-
- /* We free all the pages but the root page first; this operation
- may span several mini-transactions */
-
- btr_free_but_not_root(space, zip_size, root_page_no);
-
- /* Then we free the root page in the same mini-transaction where
- we create the b-tree and write its new root page number to the
- appropriate field in the SYS_INDEXES record: this mini-transaction
- marks the B-tree totally truncated */
-
- btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
-
- btr_free_root(space, zip_size, root_page_no, mtr);
-create:
- /* We will temporarily write FIL_NULL to the PAGE_NO field
- in SYS_INDEXES, so that the database will not get into an
- inconsistent state in case it crashes between the mtr_commit()
- below and the following mtr_commit() call. */
- page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
- FIL_NULL, mtr);
-
- /* We will need to commit the mini-transaction in order to avoid
- deadlocks in the btr_create() call, because otherwise we would
- be freeing and allocating pages in the same mini-transaction. */
- btr_pcur_store_position(pcur, mtr);
- mtr_commit(mtr);
-
- mtr_start(mtr);
- btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
-
- /* Find the index corresponding to this SYS_INDEXES record. */
- for (index = UT_LIST_GET_FIRST(table->indexes);
- index;
- index = UT_LIST_GET_NEXT(indexes, index)) {
- if (!ut_dulint_cmp(index->id, index_id)) {
- root_page_no = btr_create(type, space, zip_size,
- index_id, index, mtr);
- index->page = (unsigned int) root_page_no;
- return(root_page_no);
- }
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Index %lu %lu of table %s is missing\n"
- "InnoDB: from the data dictionary during TRUNCATE!\n",
- ut_dulint_get_high(index_id),
- ut_dulint_get_low(index_id),
- table->name);
-
- return(FIL_NULL);
-}
-
-/*********************************************************************//**
-Creates a table create graph.
-@return own: table create node */
-UNIV_INTERN
-tab_node_t*
-tab_create_graph_create(
-/*====================*/
- dict_table_t* table, /*!< in: table to create, built as a memory data
- structure */
- mem_heap_t* heap) /*!< in: heap where created */
-{
- tab_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(tab_node_t));
-
- node->common.type = QUE_NODE_CREATE_TABLE;
-
- node->table = table;
-
- node->state = TABLE_BUILD_TABLE_DEF;
- node->heap = mem_heap_create(256);
-
- node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables,
- heap);
- node->tab_def->common.parent = node;
-
- node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns,
- heap);
- node->col_def->common.parent = node;
-
- node->commit_node = commit_node_create(heap);
- node->commit_node->common.parent = node;
-
- return(node);
-}
-
-/*********************************************************************//**
-Creates an index create graph.
-@return own: index create node */
-UNIV_INTERN
-ind_node_t*
-ind_create_graph_create(
-/*====================*/
- dict_index_t* index, /*!< in: index to create, built as a memory data
- structure */
- mem_heap_t* heap) /*!< in: heap where created */
-{
- ind_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(ind_node_t));
-
- node->common.type = QUE_NODE_CREATE_INDEX;
-
- node->index = index;
-
- node->state = INDEX_BUILD_INDEX_DEF;
- node->page_no = FIL_NULL;
- node->heap = mem_heap_create(256);
-
- node->ind_def = ins_node_create(INS_DIRECT,
- dict_sys->sys_indexes, heap);
- node->ind_def->common.parent = node;
-
- node->field_def = ins_node_create(INS_DIRECT,
- dict_sys->sys_fields, heap);
- node->field_def->common.parent = node;
-
- node->commit_node = commit_node_create(heap);
- node->commit_node->common.parent = node;
-
- return(node);
-}
-
-/***********************************************************//**
-Creates a table. This is a high-level function used in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-dict_create_table_step(
-/*===================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- tab_node_t* node;
- ulint err = DB_ERROR;
- trx_t* trx;
-
- ut_ad(thr);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = TABLE_BUILD_TABLE_DEF;
- }
-
- if (node->state == TABLE_BUILD_TABLE_DEF) {
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = dict_build_table_def_step(thr, node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = TABLE_BUILD_COL_DEF;
- node->col_no = 0;
-
- thr->run_node = node->tab_def;
-
- return(thr);
- }
-
- if (node->state == TABLE_BUILD_COL_DEF) {
-
- if (node->col_no < (node->table)->n_def) {
-
- err = dict_build_col_def_step(node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->col_no++;
-
- thr->run_node = node->col_def;
-
- return(thr);
- } else {
- node->state = TABLE_COMMIT_WORK;
- }
- }
-
- if (node->state == TABLE_COMMIT_WORK) {
-
- /* Table was correctly defined: do NOT commit the transaction
- (CREATE TABLE does NOT do an implicit commit of the current
- transaction) */
-
- node->state = TABLE_ADD_TO_CACHE;
-
- /* thr->run_node = node->commit_node;
-
- return(thr); */
- }
-
- if (node->state == TABLE_ADD_TO_CACHE) {
-
- dict_table_add_to_cache(node->table, node->heap);
-
- err = DB_SUCCESS;
- }
-
-function_exit:
- trx->error_state = err;
-
- if (err == DB_SUCCESS) {
- /* Ok: do nothing */
-
- } else if (err == DB_LOCK_WAIT) {
-
- return(NULL);
- } else {
- /* SQL error detected */
-
- return(NULL);
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/***********************************************************//**
-Creates an index. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-dict_create_index_step(
-/*===================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ind_node_t* node;
- ulint err = DB_ERROR;
- trx_t* trx;
-
- ut_ad(thr);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = INDEX_BUILD_INDEX_DEF;
- }
-
- if (node->state == INDEX_BUILD_INDEX_DEF) {
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
- err = dict_build_index_def_step(thr, node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = INDEX_BUILD_FIELD_DEF;
- node->field_no = 0;
-
- thr->run_node = node->ind_def;
-
- return(thr);
- }
-
- if (node->state == INDEX_BUILD_FIELD_DEF) {
-
- if (node->field_no < (node->index)->n_fields) {
-
- err = dict_build_field_def_step(node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->field_no++;
-
- thr->run_node = node->field_def;
-
- return(thr);
- } else {
- node->state = INDEX_ADD_TO_CACHE;
- }
- }
-
- if (node->state == INDEX_ADD_TO_CACHE) {
-
- dulint index_id = node->index->id;
-
- err = dict_index_add_to_cache(node->table, node->index,
- FIL_NULL, TRUE);
-
- node->index = dict_index_get_if_in_cache_low(index_id);
- ut_a(!node->index == (err != DB_SUCCESS));
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = INDEX_CREATE_INDEX_TREE;
- }
-
- if (node->state == INDEX_CREATE_INDEX_TREE) {
-
- err = dict_create_index_tree_step(node);
-
- if (err != DB_SUCCESS) {
- dict_index_remove_from_cache(node->table, node->index);
- node->index = NULL;
-
- goto function_exit;
- }
-
- node->index->page = node->page_no;
- node->state = INDEX_COMMIT_WORK;
- }
-
- if (node->state == INDEX_COMMIT_WORK) {
-
- /* Index was correctly defined: do NOT commit the transaction
- (CREATE INDEX does NOT currently do an implicit commit of
- the current transaction) */
-
- node->state = INDEX_CREATE_INDEX_TREE;
-
- /* thr->run_node = node->commit_node;
-
- return(thr); */
- }
-
-function_exit:
- trx->error_state = err;
-
- if (err == DB_SUCCESS) {
- /* Ok: do nothing */
-
- } else if (err == DB_LOCK_WAIT) {
-
- return(NULL);
- } else {
- /* SQL error detected */
-
- return(NULL);
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/****************************************************************//**
-Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
-not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-dict_create_or_check_foreign_constraint_tables(void)
-/*================================================*/
-{
- dict_table_t* table1;
- dict_table_t* table2;
- ulint error;
- trx_t* trx;
-
- mutex_enter(&(dict_sys->mutex));
-
- table1 = dict_table_get_low("SYS_FOREIGN");
- table2 = dict_table_get_low("SYS_FOREIGN_COLS");
-
- if (table1 && table2
- && UT_LIST_GET_LEN(table1->indexes) == 3
- && UT_LIST_GET_LEN(table2->indexes) == 1) {
-
- /* Foreign constraint system tables have already been
- created, and they are ok */
-
- mutex_exit(&(dict_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(dict_sys->mutex));
-
- trx = trx_allocate_for_mysql();
-
- trx->op_info = "creating foreign key sys tables";
-
- row_mysql_lock_data_dictionary(trx);
-
- if (table1) {
- fprintf(stderr,
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN table\n");
- row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
- }
-
- if (table2) {
- fprintf(stderr,
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN_COLS table\n");
- row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
- }
-
- fprintf(stderr,
- "InnoDB: Creating foreign key constraint system tables\n");
-
- /* NOTE: in dict_load_foreigns we use the fact that
- there are 2 secondary indexes on SYS_FOREIGN, and they
- are defined just like below */
-
- /* NOTE: when designing InnoDB's foreign key support in 2001, we made
- an error and made the table names and the foreign key id of type
- 'CHAR' (internally, really a VARCHAR). We should have made the type
- VARBINARY, like in other InnoDB system tables, to get a clean
- design. */
-
- error = que_eval_sql(NULL,
- "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
- "BEGIN\n"
- "CREATE TABLE\n"
- "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
- " REF_NAME CHAR, N_COLS INT);\n"
- "CREATE UNIQUE CLUSTERED INDEX ID_IND"
- " ON SYS_FOREIGN (ID);\n"
- "CREATE INDEX FOR_IND"
- " ON SYS_FOREIGN (FOR_NAME);\n"
- "CREATE INDEX REF_IND"
- " ON SYS_FOREIGN (REF_NAME);\n"
- "CREATE TABLE\n"
- "SYS_FOREIGN_COLS(ID CHAR, POS INT,"
- " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
- "CREATE UNIQUE CLUSTERED INDEX ID_IND"
- " ON SYS_FOREIGN_COLS (ID, POS);\n"
- "END;\n"
- , FALSE, trx);
-
- if (error != DB_SUCCESS) {
- fprintf(stderr, "InnoDB: error %lu in creation\n",
- (ulong) error);
-
- ut_a(error == DB_OUT_OF_FILE_SPACE
- || error == DB_TOO_MANY_CONCURRENT_TRXS);
-
- fprintf(stderr,
- "InnoDB: creation failed\n"
- "InnoDB: tablespace is full\n"
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN tables\n");
-
- row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
- row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
-
- error = DB_MUST_GET_MORE_FILE_SPACE;
- }
-
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_mysql(trx);
-
- if (error == DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Foreign key constraint system tables"
- " created\n");
- }
-
- return(error);
-}
-
-/****************************************************************//**
-Evaluate the given foreign key SQL statement.
-@return error code or DB_SUCCESS */
-static
-ulint
-dict_foreign_eval_sql(
-/*==================*/
- pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql, /*!< in: SQL string to evaluate */
- dict_table_t* table, /*!< in: table */
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx) /*!< in: transaction */
-{
- ulint error;
- FILE* ef = dict_foreign_err_file;
-
- error = que_eval_sql(info, sql, FALSE, trx);
-
- if (error == DB_DUPLICATE_KEY) {
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Error in foreign key constraint creation for table ",
- ef);
- ut_print_name(ef, trx, TRUE, table->name);
- fputs(".\nA foreign key constraint of name ", ef);
- ut_print_name(ef, trx, TRUE, foreign->id);
- fputs("\nalready exists."
- " (Note that internally InnoDB adds 'databasename'\n"
- "in front of the user-defined constraint name.)\n"
- "Note that InnoDB's FOREIGN KEY system tables store\n"
- "constraint names as case-insensitive, with the\n"
- "MySQL standard latin1_swedish_ci collation. If you\n"
- "create tables or databases whose names differ only in\n"
- "the character case, then collisions in constraint\n"
- "names can occur. Workaround: name your constraints\n"
- "explicitly with unique names.\n",
- ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-
- return(error);
- }
-
- if (error != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Foreign key constraint creation failed:\n"
- "InnoDB: internal error number %lu\n", (ulong) error);
-
- mutex_enter(&dict_foreign_err_mutex);
- ut_print_timestamp(ef);
- fputs(" Internal error in foreign key constraint creation"
- " for table ", ef);
- ut_print_name(ef, trx, TRUE, table->name);
- fputs(".\n"
- "See the MySQL .err log in the datadir"
- " for more information.\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(error);
- }
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Add a single foreign key field definition to the data dictionary tables in
-the database.
-@return error code or DB_SUCCESS */
-static
-ulint
-dict_create_add_foreign_field_to_dictionary(
-/*========================================*/
- ulint field_nr, /*!< in: foreign field number */
- dict_table_t* table, /*!< in: table */
- dict_foreign_t* foreign, /*!< in: foreign */
- trx_t* trx) /*!< in: transaction */
-{
- pars_info_t* info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", foreign->id);
-
- pars_info_add_int4_literal(info, "pos", field_nr);
-
- pars_info_add_str_literal(info, "for_col_name",
- foreign->foreign_col_names[field_nr]);
-
- pars_info_add_str_literal(info, "ref_col_name",
- foreign->referenced_col_names[field_nr]);
-
- return(dict_foreign_eval_sql(
- info,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "INSERT INTO SYS_FOREIGN_COLS VALUES"
- "(:id, :pos, :for_col_name, :ref_col_name);\n"
- "END;\n",
- table, foreign, trx));
-}
-
-/********************************************************************//**
-Add a single foreign key definition to the data dictionary tables in the
-database. We also generate names to constraints that were not named by the
-user. A generated constraint has a name of the format
-databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and
-are given locally for this table, that is, the number is not global, as in
-the old format constraints < 4.0.18 it used to be.
-@return error code or DB_SUCCESS */
-static
-ulint
-dict_create_add_foreign_to_dictionary(
-/*==================================*/
- ulint* id_nr, /*!< in/out: number to use in id generation;
- incremented if used */
- dict_table_t* table, /*!< in: table */
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx) /*!< in: transaction */
-{
- ulint error;
- ulint i;
-
- pars_info_t* info = pars_info_create();
-
- if (foreign->id == NULL) {
- /* Generate a new constraint id */
- ulint namelen = strlen(table->name);
- char* id = mem_heap_alloc(foreign->heap, namelen + 20);
- /* no overflow if number < 1e13 */
- sprintf(id, "%s_ibfk_%lu", table->name, (ulong) (*id_nr)++);
- foreign->id = id;
- }
-
- pars_info_add_str_literal(info, "id", foreign->id);
-
- pars_info_add_str_literal(info, "for_name", table->name);
-
- pars_info_add_str_literal(info, "ref_name",
- foreign->referenced_table_name);
-
- pars_info_add_int4_literal(info, "n_cols",
- foreign->n_fields + (foreign->type << 24));
-
- error = dict_foreign_eval_sql(info,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "INSERT INTO SYS_FOREIGN VALUES"
- "(:id, :for_name, :ref_name, :n_cols);\n"
- "END;\n"
- , table, foreign, trx);
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
-
- for (i = 0; i < foreign->n_fields; i++) {
- error = dict_create_add_foreign_field_to_dictionary(
- i, table, foreign, trx);
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
- }
-
- error = dict_foreign_eval_sql(NULL,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "COMMIT WORK;\n"
- "END;\n"
- , table, foreign, trx);
-
- return(error);
-}
-
-/********************************************************************//**
-Adds foreign key definitions to data dictionary tables in the database.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-dict_create_add_foreigns_to_dictionary(
-/*===================================*/
- ulint start_id,/*!< in: if we are actually doing ALTER TABLE
- ADD CONSTRAINT, we want to generate constraint
- numbers which are bigger than in the table so
- far; we number the constraints from
- start_id + 1 up; start_id should be set to 0 if
- we are creating a new table, or if the table
- so far has no constraints for which the name
- was generated here */
- dict_table_t* table, /*!< in: table */
- trx_t* trx) /*!< in: transaction */
-{
- dict_foreign_t* foreign;
- ulint number = start_id + 1;
- ulint error;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if (NULL == dict_table_get_low("SYS_FOREIGN")) {
- fprintf(stderr,
- "InnoDB: table SYS_FOREIGN not found"
- " in internal data dictionary\n");
-
- return(DB_ERROR);
- }
-
- for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
- foreign;
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
-
- error = dict_create_add_foreign_to_dictionary(&number, table,
- foreign, trx);
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
- }
-
- return(DB_SUCCESS);
-}
diff --git a/storage/innodb_plugin/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c
deleted file mode 100644
index 2e524a5a2e3..00000000000
--- a/storage/innodb_plugin/dict/dict0dict.c
+++ /dev/null
@@ -1,4851 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file dict/dict0dict.c
-Data dictionary system
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "dict0dict.h"
-
-#ifdef UNIV_NONINL
-#include "dict0dict.ic"
-#endif
-
-/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
-UNIV_INTERN dict_index_t* dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-UNIV_INTERN dict_index_t* dict_ind_compact;
-
-#ifndef UNIV_HOTBACKUP
-#include "buf0buf.h"
-#include "data0type.h"
-#include "mach0data.h"
-#include "dict0boot.h"
-#include "dict0mem.h"
-#include "dict0crea.h"
-#include "trx0undo.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "page0zip.h"
-#include "page0page.h"
-#include "pars0pars.h"
-#include "pars0sym.h"
-#include "que0que.h"
-#include "rem0cmp.h"
-#include "row0merge.h"
-#include "m_ctype.h" /* my_isspace() */
-#include "ha_prototypes.h" /* innobase_strcasecmp() */
-
-#include <ctype.h>
-
-/** the dictionary system */
-UNIV_INTERN dict_sys_t* dict_sys = NULL;
-
-/** @brief the data dictionary rw-latch protecting dict_sys
-
-table create, drop, etc. reserve this in X-mode; implicit or
-backround operations purge, rollback, foreign key checks reserve this
-in S-mode; we cannot trust that MySQL protects implicit or background
-operations a table drop since MySQL does not know of them; therefore
-we need this; NOTE: a transaction which reserves this must keep book
-on the mode in trx_struct::dict_operation_lock_mode */
-UNIV_INTERN rw_lock_t dict_operation_lock;
-
-#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
- creating a table or index object */
-#define DICT_POOL_PER_TABLE_HASH 512 /*!< buffer pool max size per table
- hash table fixed size in bytes */
-#define DICT_POOL_PER_VARYING 4 /*!< buffer pool max size per data
- dictionary varying size in bytes */
-
-/** Identifies generated InnoDB foreign key names */
-static char dict_ibfk[] = "_ibfk_";
-
-/*******************************************************************//**
-Tries to find column names for the index and sets the col field of the
-index.
-@return TRUE if the column names were found */
-static
-ibool
-dict_index_find_cols(
-/*=================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index); /*!< in: index */
-/*******************************************************************//**
-Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user.
-@return own: the internal representation of the clustered index */
-static
-dict_index_t*
-dict_index_build_internal_clust(
-/*============================*/
- const dict_table_t* table, /*!< in: table */
- dict_index_t* index); /*!< in: user representation of
- a clustered index */
-/*******************************************************************//**
-Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user.
-@return own: the internal representation of the non-clustered index */
-static
-dict_index_t*
-dict_index_build_internal_non_clust(
-/*================================*/
- const dict_table_t* table, /*!< in: table */
- dict_index_t* index); /*!< in: user representation of
- a non-clustered index */
-/**********************************************************************//**
-Removes a foreign constraint struct from the dictionary cache. */
-static
-void
-dict_foreign_remove_from_cache(
-/*===========================*/
- dict_foreign_t* foreign); /*!< in, own: foreign constraint */
-/**********************************************************************//**
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
- const dict_table_t* table, /*!< in: table */
- const dict_col_t* col); /*!< in: column */
-/**********************************************************************//**
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
- dict_index_t* index); /*!< in: index */
-/**********************************************************************//**
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
- dict_field_t* field); /*!< in: field */
-/*********************************************************************//**
-Frees a foreign key struct. */
-static
-void
-dict_foreign_free(
-/*==============*/
- dict_foreign_t* foreign); /*!< in, own: foreign key struct */
-
-/* Stream for storing detailed information about the latest foreign key
-and unique key errors */
-UNIV_INTERN FILE* dict_foreign_err_file = NULL;
-/* mutex protecting the foreign and unique error buffers */
-UNIV_INTERN mutex_t dict_foreign_err_mutex;
-
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
- char* a) /*!< in/out: string to put in lower case */
-{
- innobase_casedn_str(a);
-}
-
-/********************************************************************//**
-Checks if the database name in two table names is the same.
-@return TRUE if same db name */
-UNIV_INTERN
-ibool
-dict_tables_have_same_db(
-/*=====================*/
- const char* name1, /*!< in: table name in the form
- dbname '/' tablename */
- const char* name2) /*!< in: table name in the form
- dbname '/' tablename */
-{
- for (; *name1 == *name2; name1++, name2++) {
- if (*name1 == '/') {
- return(TRUE);
- }
- ut_a(*name1); /* the names must contain '/' */
- }
- return(FALSE);
-}
-
-/********************************************************************//**
-Return the end of table name where we have removed dbname and '/'.
-@return table name */
-UNIV_INTERN
-const char*
-dict_remove_db_name(
-/*================*/
- const char* name) /*!< in: table name in the form
- dbname '/' tablename */
-{
- const char* s = strchr(name, '/');
- ut_a(s);
-
- return(s + 1);
-}
-
-/********************************************************************//**
-Get the database name length in a table name.
-@return database name length */
-UNIV_INTERN
-ulint
-dict_get_db_name_len(
-/*=================*/
- const char* name) /*!< in: table name in the form
- dbname '/' tablename */
-{
- const char* s;
- s = strchr(name, '/');
- ut_a(s);
- return(s - name);
-}
-
-/********************************************************************//**
-Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
-void
-dict_mutex_enter_for_mysql(void)
-/*============================*/
-{
- mutex_enter(&(dict_sys->mutex));
-}
-
-/********************************************************************//**
-Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
-void
-dict_mutex_exit_for_mysql(void)
-/*===========================*/
-{
- mutex_exit(&(dict_sys->mutex));
-}
-
-/********************************************************************//**
-Decrements the count of open MySQL handles to a table. */
-UNIV_INTERN
-void
-dict_table_decrement_handle_count(
-/*==============================*/
- dict_table_t* table, /*!< in/out: table */
- ibool dict_locked) /*!< in: TRUE=data dictionary locked */
-{
- if (!dict_locked) {
- mutex_enter(&dict_sys->mutex);
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_a(table->n_mysql_handles_opened > 0);
-
- table->n_mysql_handles_opened--;
-
- if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-UNIV_INTERN
-const char*
-dict_table_get_col_name(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_nr) /*!< in: column number */
-{
- ulint i;
- const char* s;
-
- ut_ad(table);
- ut_ad(col_nr < table->n_def);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- s = table->col_names;
- if (s) {
- for (i = 0; i < col_nr; i++) {
- s += strlen(s) + 1;
- }
- }
-
- return(s);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Acquire the autoinc lock. */
-UNIV_INTERN
-void
-dict_table_autoinc_lock(
-/*====================*/
- dict_table_t* table) /*!< in/out: table */
-{
- mutex_enter(&table->autoinc_mutex);
-}
-
-/********************************************************************//**
-Unconditionally set the autoinc counter. */
-UNIV_INTERN
-void
-dict_table_autoinc_initialize(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: next value to assign to a row */
-{
- ut_ad(mutex_own(&table->autoinc_mutex));
-
- table->autoinc = value;
-}
-
-/********************************************************************//**
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized.
-@return value for a new row, or 0 */
-UNIV_INTERN
-ib_uint64_t
-dict_table_autoinc_read(
-/*====================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(mutex_own(&table->autoinc_mutex));
-
- return(table->autoinc);
-}
-
-/********************************************************************//**
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-UNIV_INTERN
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
-
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: value which was assigned to a row */
-{
- ut_ad(mutex_own(&table->autoinc_mutex));
-
- if (value > table->autoinc) {
-
- table->autoinc = value;
- }
-}
-
-/********************************************************************//**
-Release the autoinc lock. */
-UNIV_INTERN
-void
-dict_table_autoinc_unlock(
-/*======================*/
- dict_table_t* table) /*!< in/out: table */
-{
- mutex_exit(&table->autoinc_mutex);
-}
-
-/**********************************************************************//**
-Looks for an index with the given table and index id.
-NOTE that we do not reserve the dictionary mutex.
-@return index or NULL if not found from cache */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_on_id_low(
-/*=====================*/
- dict_table_t* table, /*!< in: table */
- dulint id) /*!< in: index id */
-{
- dict_index_t* index;
-
- index = dict_table_get_first_index(table);
-
- while (index) {
- if (0 == ut_dulint_cmp(id, index->id)) {
- /* Found */
-
- return(index);
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(NULL);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Looks for column n in an index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INTERN
-ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint n) /*!< in: column number */
-{
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- col = dict_table_get_nth_col(index->table, n);
-
- if (dict_index_is_clust(index)) {
-
- return(dict_col_get_clust_pos(col, index));
- }
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col && field->prefix_len == 0) {
-
- return(pos);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Returns TRUE if the index contains a column or a prefix of that column.
-@return TRUE if contains the column or its prefix */
-UNIV_INTERN
-ibool
-dict_index_contains_col_or_prefix(
-/*==============================*/
- const dict_index_t* index, /*!< in: index */
- ulint n) /*!< in: column number */
-{
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- if (dict_index_is_clust(index)) {
-
- return(TRUE);
- }
-
- col = dict_table_get_nth_col(index->table, n);
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Looks for a matching field in an index. The column has to be the same. The
-column in index must be complete, or must contain a prefix longer than the
-column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INTERN
-ulint
-dict_index_get_nth_field_pos(
-/*=========================*/
- const dict_index_t* index, /*!< in: index from which to search */
- const dict_index_t* index2, /*!< in: index */
- ulint n) /*!< in: field number in index2 */
-{
- const dict_field_t* field;
- const dict_field_t* field2;
- ulint n_fields;
- ulint pos;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- field2 = dict_index_get_nth_field(index2, n);
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (field->col == field2->col
- && (field->prefix_len == 0
- || (field->prefix_len >= field2->prefix_len
- && field2->prefix_len != 0))) {
-
- return(pos);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_get_on_id(
-/*=================*/
- dulint table_id, /*!< in: table id */
- trx_t* trx) /*!< in: transaction handle */
-{
- dict_table_t* table;
-
- if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0
- || trx->dict_operation_lock_mode == RW_X_LATCH) {
- /* It is a system table which will always exist in the table
- cache: we avoid acquiring the dictionary mutex, because
- if we are doing a rollback to handle an error in TABLE
- CREATE, for example, we already have the mutex! */
-
- ut_ad(mutex_own(&(dict_sys->mutex))
- || trx->dict_operation_lock_mode == RW_X_LATCH);
-
- return(dict_table_get_on_id_low(table_id));
- }
-
- mutex_enter(&(dict_sys->mutex));
-
- table = dict_table_get_on_id_low(table_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return(table);
-}
-
-/********************************************************************//**
-Looks for column n position in the clustered index.
-@return position in internal representation of the clustered index */
-UNIV_INTERN
-ulint
-dict_table_get_nth_col_pos(
-/*=======================*/
- const dict_table_t* table, /*!< in: table */
- ulint n) /*!< in: column number */
-{
- return(dict_index_get_nth_col_pos(dict_table_get_first_index(table),
- n));
-}
-
-/********************************************************************//**
-Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns.
-@return TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
-ibool
-dict_table_col_in_clustered_key(
-/*============================*/
- const dict_table_t* table, /*!< in: table */
- ulint n) /*!< in: column number */
-{
- const dict_index_t* index;
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
-
- ut_ad(table);
-
- col = dict_table_get_nth_col(table, n);
-
- index = dict_table_get_first_index(table);
-
- n_fields = dict_index_get_n_unique(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Inits the data dictionary module. */
-UNIV_INTERN
-void
-dict_init(void)
-/*===========*/
-{
- dict_sys = mem_alloc(sizeof(dict_sys_t));
-
- mutex_create(&dict_sys->mutex, SYNC_DICT);
-
- dict_sys->table_hash = hash_create(buf_pool_get_curr_size()
- / (DICT_POOL_PER_TABLE_HASH
- * UNIV_WORD_SIZE));
- dict_sys->table_id_hash = hash_create(buf_pool_get_curr_size()
- / (DICT_POOL_PER_TABLE_HASH
- * UNIV_WORD_SIZE));
- dict_sys->size = 0;
-
- UT_LIST_INIT(dict_sys->table_LRU);
-
- rw_lock_create(&dict_operation_lock, SYNC_DICT_OPERATION);
-
- dict_foreign_err_file = os_file_create_tmpfile();
- ut_a(dict_foreign_err_file);
-
- mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH);
-}
-
-/**********************************************************************//**
-Returns a table object and optionally increment its MySQL open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_get(
-/*===========*/
- const char* table_name, /*!< in: table name */
- ibool inc_mysql_count)/*!< in: whether to increment the open
- handle count on the table */
-{
- dict_table_t* table;
-
- mutex_enter(&(dict_sys->mutex));
-
- table = dict_table_get_low(table_name);
-
- if (inc_mysql_count && table) {
- table->n_mysql_handles_opened++;
- }
-
- mutex_exit(&(dict_sys->mutex));
-
- if (table != NULL) {
- if (!table->stat_initialized) {
- /* If table->ibd_file_missing == TRUE, this will
- print an error message and return without doing
- anything. */
- dict_update_statistics(table);
- }
- }
-
- return(table);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Adds system columns to a table object. */
-UNIV_INTERN
-void
-dict_table_add_system_columns(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- mem_heap_t* heap) /*!< in: temporary heap */
-{
- ut_ad(table);
- ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(!table->cached);
-
- /* NOTE: the system columns MUST be added in the following order
- (so that they can be indexed by the numerical value of DATA_ROW_ID,
- etc.) and as the last columns of the table memory object.
- The clustered index will not always physically contain all
- system columns. */
-
- dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS,
- DATA_ROW_ID | DATA_NOT_NULL,
- DATA_ROW_ID_LEN);
-#if DATA_ROW_ID != 0
-#error "DATA_ROW_ID != 0"
-#endif
- dict_mem_table_add_col(table, heap, "DB_TRX_ID", DATA_SYS,
- DATA_TRX_ID | DATA_NOT_NULL,
- DATA_TRX_ID_LEN);
-#if DATA_TRX_ID != 1
-#error "DATA_TRX_ID != 1"
-#endif
- dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS,
- DATA_ROLL_PTR | DATA_NOT_NULL,
- DATA_ROLL_PTR_LEN);
-#if DATA_ROLL_PTR != 2
-#error "DATA_ROLL_PTR != 2"
-#endif
-
- /* This check reminds that if a new system column is added to
- the program, it should be dealt with here */
-#if DATA_N_SYS_COLS != 3
-#error "DATA_N_SYS_COLS != 3"
-#endif
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Adds a table object to the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap) /*!< in: temporary heap */
-{
- ulint fold;
- ulint id_fold;
- ulint i;
- ulint row_len;
-
- /* The lower limit for what we consider a "big" row */
-#define BIG_ROW_SIZE 1024
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_table_add_system_columns(table, heap);
-
- table->cached = TRUE;
-
- fold = ut_fold_string(table->name);
- id_fold = ut_fold_dulint(table->id);
-
- row_len = 0;
- for (i = 0; i < table->n_def; i++) {
- ulint col_len = dict_col_get_max_size(
- dict_table_get_nth_col(table, i));
-
- row_len += col_len;
-
- /* If we have a single unbounded field, or several gigantic
- fields, mark the maximum row size as BIG_ROW_SIZE. */
- if (row_len >= BIG_ROW_SIZE || col_len >= BIG_ROW_SIZE) {
- row_len = BIG_ROW_SIZE;
-
- break;
- }
- }
-
- table->big_rows = row_len >= BIG_ROW_SIZE;
-
- /* Look for a table with the same name: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
- dict_table_t*, table2, ut_ad(table2->cached),
- ut_strcmp(table2->name, table->name) == 0);
- ut_a(table2 == NULL);
-
-#ifdef UNIV_DEBUG
- /* Look for the same table pointer with a different name */
- HASH_SEARCH_ALL(name_hash, dict_sys->table_hash,
- dict_table_t*, table2, ut_ad(table2->cached),
- table2 == table);
- ut_ad(table2 == NULL);
-#endif /* UNIV_DEBUG */
- }
-
- /* Look for a table with the same id: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold,
- dict_table_t*, table2, ut_ad(table2->cached),
- ut_dulint_cmp(table2->id, table->id) == 0);
- ut_a(table2 == NULL);
-
-#ifdef UNIV_DEBUG
- /* Look for the same table pointer with a different id */
- HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash,
- dict_table_t*, table2, ut_ad(table2->cached),
- table2 == table);
- ut_ad(table2 == NULL);
-#endif /* UNIV_DEBUG */
- }
-
- /* Add table to hash table of tables */
- HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
- table);
-
- /* Add table to hash table of tables based on table id */
- HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold,
- table);
- /* Add table to LRU list of tables */
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
-
- dict_sys->size += mem_heap_get_size(table->heap);
-}
-
-/**********************************************************************//**
-Looks for an index with the given id. NOTE that we do not reserve
-the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page!
-@return index or NULL if not found from cache */
-UNIV_INTERN
-dict_index_t*
-dict_index_find_on_id_low(
-/*======================*/
- dulint id) /*!< in: index id */
-{
- dict_table_t* table;
- dict_index_t* index;
-
- table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
-
- while (table) {
- index = dict_table_get_first_index(table);
-
- while (index) {
- if (0 == ut_dulint_cmp(id, index->id)) {
- /* Found */
-
- return(index);
- }
-
- index = dict_table_get_next_index(index);
- }
-
- table = UT_LIST_GET_NEXT(table_LRU, table);
- }
-
- return(NULL);
-}
-
-/**********************************************************************//**
-Renames a table object.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-dict_table_rename_in_cache(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- const char* new_name, /*!< in: new name */
- ibool rename_also_foreigns)/*!< in: in ALTER TABLE we want
- to preserve the original table name
- in constraints which reference it */
-{
- dict_foreign_t* foreign;
- dict_index_t* index;
- ulint fold;
- ulint old_size;
- const char* old_name;
-
- ut_ad(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- old_size = mem_heap_get_size(table->heap);
- old_name = table->name;
-
- fold = ut_fold_string(new_name);
-
- /* Look for a table with the same name: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
- dict_table_t*, table2, ut_ad(table2->cached),
- (ut_strcmp(table2->name, new_name) == 0));
- if (UNIV_LIKELY_NULL(table2)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: dictionary cache"
- " already contains a table ", stderr);
- ut_print_name(stderr, NULL, TRUE, new_name);
- fputs("\n"
- "InnoDB: cannot rename table ", stderr);
- ut_print_name(stderr, NULL, TRUE, old_name);
- putc('\n', stderr);
- return(FALSE);
- }
- }
-
- /* If the table is stored in a single-table tablespace, rename the
- .ibd file */
-
- if (table->space != 0) {
- if (table->dir_path_of_temp_table != NULL) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to rename a"
- " TEMPORARY TABLE ", stderr);
- ut_print_name(stderr, NULL, TRUE, old_name);
- fputs(" (", stderr);
- ut_print_filename(stderr,
- table->dir_path_of_temp_table);
- fputs(" )\n", stderr);
- return(FALSE);
- } else if (!fil_rename_tablespace(old_name, table->space,
- new_name)) {
- return(FALSE);
- }
- }
-
- /* Remove table from the hash tables of tables */
- HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
- ut_fold_string(old_name), table);
- table->name = mem_heap_strdup(table->heap, new_name);
-
- /* Add table to hash table of tables */
- HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
- table);
- dict_sys->size += (mem_heap_get_size(table->heap) - old_size);
-
- /* Update the table_name field in indexes */
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- index->table_name = table->name;
-
- index = dict_table_get_next_index(index);
- }
-
- if (!rename_also_foreigns) {
- /* In ALTER TABLE we think of the rename table operation
- in the direction table -> temporary table (#sql...)
- as dropping the table with the old name and creating
- a new with the new name. Thus we kind of drop the
- constraints from the dictionary cache here. The foreign key
- constraints will be inherited to the new table from the
- system tables through a call of dict_load_foreigns. */
-
- /* Remove the foreign constraints from the cache */
- foreign = UT_LIST_GET_LAST(table->foreign_list);
-
- while (foreign != NULL) {
- dict_foreign_remove_from_cache(foreign);
- foreign = UT_LIST_GET_LAST(table->foreign_list);
- }
-
- /* Reset table field in referencing constraints */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign != NULL) {
- foreign->referenced_table = NULL;
- foreign->referenced_index = NULL;
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- /* Make the list of referencing constraints empty */
-
- UT_LIST_INIT(table->referenced_list);
-
- return(TRUE);
- }
-
- /* Update the table name fields in foreign constraints, and update also
- the constraint id of new format >= 4.0.18 constraints. Note that at
- this point we have already changed table->name to the new name. */
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign != NULL) {
- if (ut_strlen(foreign->foreign_table_name)
- < ut_strlen(table->name)) {
- /* Allocate a longer name buffer;
- TODO: store buf len to save memory */
-
- foreign->foreign_table_name
- = mem_heap_alloc(foreign->heap,
- ut_strlen(table->name) + 1);
- }
-
- strcpy(foreign->foreign_table_name, table->name);
-
- if (strchr(foreign->id, '/')) {
- ulint db_len;
- char* old_id;
-
- /* This is a >= 4.0.18 format id */
-
- old_id = mem_strdup(foreign->id);
-
- if (ut_strlen(foreign->id) > ut_strlen(old_name)
- + ((sizeof dict_ibfk) - 1)
- && !memcmp(foreign->id, old_name,
- ut_strlen(old_name))
- && !memcmp(foreign->id + ut_strlen(old_name),
- dict_ibfk, (sizeof dict_ibfk) - 1)) {
-
- /* This is a generated >= 4.0.18 format id */
-
- if (strlen(table->name) > strlen(old_name)) {
- foreign->id = mem_heap_alloc(
- foreign->heap,
- strlen(table->name)
- + strlen(old_id) + 1);
- }
-
- /* Replace the prefix 'databasename/tablename'
- with the new names */
- strcpy(foreign->id, table->name);
- strcat(foreign->id,
- old_id + ut_strlen(old_name));
- } else {
- /* This is a >= 4.0.18 format id where the user
- gave the id name */
- db_len = dict_get_db_name_len(table->name) + 1;
-
- if (dict_get_db_name_len(table->name)
- > dict_get_db_name_len(foreign->id)) {
-
- foreign->id = mem_heap_alloc(
- foreign->heap,
- db_len + strlen(old_id) + 1);
- }
-
- /* Replace the database prefix in id with the
- one from table->name */
-
- ut_memcpy(foreign->id, table->name, db_len);
-
- strcpy(foreign->id + db_len,
- dict_remove_db_name(old_id));
- }
-
- mem_free(old_id);
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign != NULL) {
- if (ut_strlen(foreign->referenced_table_name)
- < ut_strlen(table->name)) {
- /* Allocate a longer name buffer;
- TODO: store buf len to save memory */
-
- foreign->referenced_table_name = mem_heap_alloc(
- foreign->heap, strlen(table->name) + 1);
- }
-
- strcpy(foreign->referenced_table_name, table->name);
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Change the id of a table object in the dictionary cache. This is used in
-DISCARD TABLESPACE. */
-UNIV_INTERN
-void
-dict_table_change_id_in_cache(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table object already in cache */
- dulint new_id) /*!< in: new id to set */
-{
- ut_ad(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* Remove the table from the hash table of id's */
-
- HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_dulint(table->id), table);
- table->id = new_id;
-
- /* Add the table back to the hash table */
- HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_dulint(table->id), table);
-}
-
-/**********************************************************************//**
-Removes a table object from the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_remove_from_cache(
-/*=========================*/
- dict_table_t* table) /*!< in, own: table */
-{
- dict_foreign_t* foreign;
- dict_index_t* index;
- ulint size;
-
- ut_ad(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-#if 0
- fputs("Removing table ", stderr);
- ut_print_name(stderr, table->name, ULINT_UNDEFINED);
- fputs(" from dictionary cache\n", stderr);
-#endif
-
- /* Remove the foreign constraints from the cache */
- foreign = UT_LIST_GET_LAST(table->foreign_list);
-
- while (foreign != NULL) {
- dict_foreign_remove_from_cache(foreign);
- foreign = UT_LIST_GET_LAST(table->foreign_list);
- }
-
- /* Reset table field in referencing constraints */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign != NULL) {
- foreign->referenced_table = NULL;
- foreign->referenced_index = NULL;
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- /* Remove the indexes from the cache */
- index = UT_LIST_GET_LAST(table->indexes);
-
- while (index != NULL) {
- dict_index_remove_from_cache(table, index);
- index = UT_LIST_GET_LAST(table->indexes);
- }
-
- /* Remove table from the hash tables of tables */
- HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
- ut_fold_string(table->name), table);
- HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_dulint(table->id), table);
-
- /* Remove table from LRU list of tables */
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
-
- size = mem_heap_get_size(table->heap);
-
- ut_ad(dict_sys->size >= size);
-
- dict_sys->size -= size;
-
- dict_mem_table_free(table);
-}
-
-/****************************************************************//**
-If the given column name is reserved for InnoDB system columns, return
-TRUE.
-@return TRUE if name is reserved */
-UNIV_INTERN
-ibool
-dict_col_name_is_reserved(
-/*======================*/
- const char* name) /*!< in: column name */
-{
- /* This check reminds that if a new system column is added to
- the program, it should be dealt with here. */
-#if DATA_N_SYS_COLS != 3
-#error "DATA_N_SYS_COLS != 3"
-#endif
-
- static const char* reserved_names[] = {
- "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"
- };
-
- ulint i;
-
- for (i = 0; i < UT_ARR_SIZE(reserved_names); i++) {
- if (innobase_strcasecmp(name, reserved_names[i]) == 0) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/****************************************************************//**
-If an undo log record for this table might not fit on a single page,
-return TRUE.
-@return TRUE if the undo log record could become too big */
-static
-ibool
-dict_index_too_big_for_undo(
-/*========================*/
- const dict_table_t* table, /*!< in: table */
- const dict_index_t* new_index) /*!< in: index */
-{
- /* Make sure that all column prefixes will fit in the undo log record
- in trx_undo_page_report_modify() right after trx_undo_page_init(). */
-
- ulint i;
- const dict_index_t* clust_index
- = dict_table_get_first_index(table);
- ulint undo_page_len
- = TRX_UNDO_PAGE_HDR - TRX_UNDO_PAGE_HDR_SIZE
- + 2 /* next record pointer */
- + 1 /* type_cmpl */
- + 11 /* trx->undo_no */ + 11 /* table->id */
- + 1 /* rec_get_info_bits() */
- + 11 /* DB_TRX_ID */
- + 11 /* DB_ROLL_PTR */
- + 10 + FIL_PAGE_DATA_END /* trx_undo_left() */
- + 2/* pointer to previous undo log record */;
-
- if (UNIV_UNLIKELY(!clust_index)) {
- ut_a(dict_index_is_clust(new_index));
- clust_index = new_index;
- }
-
- /* Add the size of the ordering columns in the
- clustered index. */
- for (i = 0; i < clust_index->n_uniq; i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(clust_index, i);
-
- /* Use the maximum output size of
- mach_write_compressed(), although the encoded
- length should always fit in 2 bytes. */
- undo_page_len += 5 + dict_col_get_max_size(col);
- }
-
- /* Add the old values of the columns to be updated.
- First, the amount and the numbers of the columns.
- These are written by mach_write_compressed() whose
- maximum output length is 5 bytes. However, given that
- the quantities are below REC_MAX_N_FIELDS (10 bits),
- the maximum length is 2 bytes per item. */
- undo_page_len += 2 * (dict_table_get_n_cols(table) + 1);
-
- for (i = 0; i < clust_index->n_def; i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(clust_index, i);
- ulint max_size
- = dict_col_get_max_size(col);
- ulint fixed_size
- = dict_col_get_fixed_size(col,
- dict_table_is_comp(table));
-
- if (fixed_size) {
- /* Fixed-size columns are stored locally. */
- max_size = fixed_size;
- } else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
- /* Short columns are stored locally. */
- } else if (!col->ord_part) {
- /* See if col->ord_part would be set
- because of new_index. */
- ulint j;
-
- for (j = 0; j < new_index->n_uniq; j++) {
- if (dict_index_get_nth_col(
- new_index, j) == col) {
-
- goto is_ord_part;
- }
- }
-
- /* This is not an ordering column in any index.
- Thus, it can be stored completely externally. */
- max_size = BTR_EXTERN_FIELD_REF_SIZE;
- } else {
-is_ord_part:
- /* This is an ordering column in some index.
- A long enough prefix must be written to the
- undo log. See trx_undo_page_fetch_ext(). */
-
- if (max_size > REC_MAX_INDEX_COL_LEN) {
- max_size = REC_MAX_INDEX_COL_LEN;
- }
-
- max_size += BTR_EXTERN_FIELD_REF_SIZE;
- }
-
- undo_page_len += 5 + max_size;
- }
-
- return(undo_page_len >= UNIV_PAGE_SIZE);
-}
-
-/****************************************************************//**
-If a record of this index might not fit on a single B-tree page,
-return TRUE.
-@return TRUE if the index record could become too big */
-static
-ibool
-dict_index_too_big_for_tree(
-/*========================*/
- const dict_table_t* table, /*!< in: table */
- const dict_index_t* new_index) /*!< in: index */
-{
- ulint zip_size;
- ulint comp;
- ulint i;
- /* maximum possible storage size of a record */
- ulint rec_max_size;
- /* maximum allowed size of a record on a leaf page */
- ulint page_rec_max;
- /* maximum allowed size of a node pointer record */
- ulint page_ptr_max;
-
- comp = dict_table_is_comp(table);
- zip_size = dict_table_zip_size(table);
-
- if (zip_size && zip_size < UNIV_PAGE_SIZE) {
- /* On a compressed page, two records must fit in the
- uncompressed page modification log. On compressed
- pages with zip_size == UNIV_PAGE_SIZE, this limit will
- never be reached. */
- ut_ad(comp);
- /* The maximum allowed record size is the size of
- an empty page, minus a byte for recoding the heap
- number in the page modification log. The maximum
- allowed node pointer size is half that. */
- page_rec_max = page_zip_empty_size(new_index->n_fields,
- zip_size) - 1;
- page_ptr_max = page_rec_max / 2;
- /* On a compressed page, there is a two-byte entry in
- the dense page directory for every record. But there
- is no record header. */
- rec_max_size = 2;
- } else {
- /* The maximum allowed record size is half a B-tree
- page. No additional sparse page directory entry will
- be generated for the first few user records. */
- page_rec_max = page_get_free_space_of_empty(comp) / 2;
- page_ptr_max = page_rec_max;
- /* Each record has a header. */
- rec_max_size = comp
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES;
- }
-
- if (comp) {
- /* Include the "null" flags in the
- maximum possible record size. */
- rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable);
- } else {
- /* For each column, include a 2-byte offset and a
- "null" flag. The 1-byte format is only used in short
- records that do not contain externally stored columns.
- Such records could never exceed the page limit, even
- when using the 2-byte format. */
- rec_max_size += 2 * new_index->n_fields;
- }
-
- /* Compute the maximum possible record size. */
- for (i = 0; i < new_index->n_fields; i++) {
- const dict_field_t* field
- = dict_index_get_nth_field(new_index, i);
- const dict_col_t* col
- = dict_field_get_col(field);
- ulint field_max_size;
- ulint field_ext_max_size;
-
- /* In dtuple_convert_big_rec(), variable-length columns
- that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
- may be chosen for external storage.
-
- Fixed-length columns, and all columns of secondary
- index records are always stored inline. */
-
- /* Determine the maximum length of the index field.
- The field_ext_max_size should be computed as the worst
- case in rec_get_converted_size_comp() for
- REC_STATUS_ORDINARY records. */
-
- field_max_size = dict_col_get_fixed_size(col, comp);
- if (field_max_size) {
- /* dict_index_add_col() should guarantee this */
- ut_ad(!field->prefix_len
- || field->fixed_len == field->prefix_len);
- /* Fixed lengths are not encoded
- in ROW_FORMAT=COMPACT. */
- field_ext_max_size = 0;
- goto add_field_size;
- }
-
- field_max_size = dict_col_get_max_size(col);
- field_ext_max_size = field_max_size < 256 ? 1 : 2;
-
- if (field->prefix_len) {
- if (field->prefix_len < field_max_size) {
- field_max_size = field->prefix_len;
- }
- } else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2
- && dict_index_is_clust(new_index)) {
-
- /* In the worst case, we have a locally stored
- column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes.
- The length can be stored in one byte. If the
- column were stored externally, the lengths in
- the clustered index page would be
- BTR_EXTERN_FIELD_REF_SIZE and 2. */
- field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2;
- field_ext_max_size = 1;
- }
-
- if (comp) {
- /* Add the extra size for ROW_FORMAT=COMPACT.
- For ROW_FORMAT=REDUNDANT, these bytes were
- added to rec_max_size before this loop. */
- rec_max_size += field_ext_max_size;
- }
-add_field_size:
- rec_max_size += field_max_size;
-
- /* Check the size limit on leaf pages. */
- if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) {
-
- return(TRUE);
- }
-
- /* Check the size limit on non-leaf pages. Records
- stored in non-leaf B-tree pages consist of the unique
- columns of the record (the key columns of the B-tree)
- and a node pointer field. When we have processed the
- unique columns, rec_max_size equals the size of the
- node pointer record minus the node pointer column. */
- if (i + 1 == dict_index_get_n_unique_in_tree(new_index)
- && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
-ulint
-dict_index_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table on which the index is */
- dict_index_t* index, /*!< in, own: index; NOTE! The index memory
- object is freed in this function! */
- ulint page_no,/*!< in: root page number of the index */
- ibool strict) /*!< in: TRUE=refuse to create the index
- if records could be too big to fit in
- an B-tree page */
-{
- dict_index_t* new_index;
- ulint n_ord;
- ulint i;
-
- ut_ad(index);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(index->n_def == index->n_fields);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- ut_ad(mem_heap_validate(index->heap));
- ut_a(!dict_index_is_clust(index)
- || UT_LIST_GET_LEN(table->indexes) == 0);
-
- if (!dict_index_find_cols(table, index)) {
-
- return(DB_CORRUPTION);
- }
-
- /* Build the cache internal representation of the index,
- containing also the added system fields */
-
- if (dict_index_is_clust(index)) {
- new_index = dict_index_build_internal_clust(table, index);
- } else {
- new_index = dict_index_build_internal_non_clust(table, index);
- }
-
- /* Set the n_fields value in new_index to the actual defined
- number of fields in the cache internal representation */
-
- new_index->n_fields = new_index->n_def;
-
- if (strict && dict_index_too_big_for_tree(table, new_index)) {
-too_big:
- dict_mem_index_free(new_index);
- dict_mem_index_free(index);
- return(DB_TOO_BIG_RECORD);
- }
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- n_ord = new_index->n_fields;
- } else {
- n_ord = new_index->n_uniq;
- }
-
- switch (dict_table_get_format(table)) {
- case DICT_TF_FORMAT_51:
- /* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store
- prefixes of externally stored columns locally within
- the record. There are no special considerations for
- the undo log record size. */
- goto undo_size_ok;
-
- case DICT_TF_FORMAT_ZIP:
- /* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED,
- column prefix indexes require that prefixes of
- externally stored columns are written to the undo log.
- This may make the undo log record bigger than the
- record on the B-tree page. The maximum size of an
- undo log record is the page size. That must be
- checked for below. */
- break;
-
-#if DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX
-# error "DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX"
-#endif
- }
-
- for (i = 0; i < n_ord; i++) {
- const dict_field_t* field
- = dict_index_get_nth_field(new_index, i);
- const dict_col_t* col
- = dict_field_get_col(field);
-
- /* In dtuple_convert_big_rec(), variable-length columns
- that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
- may be chosen for external storage. If the column appears
- in an ordering column of an index, a longer prefix of
- REC_MAX_INDEX_COL_LEN will be copied to the undo log
- by trx_undo_page_report_modify() and
- trx_undo_page_fetch_ext(). It suffices to check the
- capacity of the undo log whenever new_index includes
- a column prefix on a column that may be stored externally. */
-
- if (field->prefix_len /* prefix index */
- && !col->ord_part /* not yet ordering column */
- && !dict_col_get_fixed_size(col, TRUE) /* variable-length */
- && dict_col_get_max_size(col)
- > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) {
-
- if (dict_index_too_big_for_undo(table, new_index)) {
- /* An undo log record might not fit in
- a single page. Refuse to create this index. */
-
- goto too_big;
- }
-
- break;
- }
- }
-
-undo_size_ok:
- /* Flag the ordering columns */
-
- for (i = 0; i < n_ord; i++) {
-
- dict_index_get_nth_field(new_index, i)->col->ord_part = 1;
- }
-
- /* Add the new index as the last index for the table */
-
- UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
- new_index->table = table;
- new_index->table_name = table->name;
-
- new_index->search_info = btr_search_info_create(new_index->heap);
-
- new_index->stat_index_size = 1;
- new_index->stat_n_leaf_pages = 1;
-
- new_index->page = page_no;
- rw_lock_create(&new_index->lock, SYNC_INDEX_TREE);
-
- if (!UNIV_UNLIKELY(new_index->type & DICT_UNIVERSAL)) {
-
- new_index->stat_n_diff_key_vals = mem_heap_alloc(
- new_index->heap,
- (1 + dict_index_get_n_unique(new_index))
- * sizeof(ib_int64_t));
- /* Give some sensible values to stat_n_... in case we do
- not calculate statistics quickly enough */
-
- for (i = 0; i <= dict_index_get_n_unique(new_index); i++) {
-
- new_index->stat_n_diff_key_vals[i] = 100;
- }
- }
-
- dict_sys->size += mem_heap_get_size(new_index->heap);
-
- dict_mem_index_free(index);
-
- return(DB_SUCCESS);
-}
-
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in, own: index */
-{
- ulint size;
- ulint retries = 0;
- btr_search_t* info;
-
- ut_ad(table && index);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* We always create search info whether or not adaptive
- hash index is enabled or not. */
- info = index->search_info;
- ut_ad(info);
-
- /* We are not allowed to free the in-memory index struct
- dict_index_t until all entries in the adaptive hash index
- that point to any of the page belonging to his b-tree index
- are dropped. This is so because dropping of these entries
- require access to dict_index_t struct. To avoid such scenario
- We keep a count of number of such pages in the search_info and
- only free the dict_index_t struct when this count drops to
- zero. */
-
- for (;;) {
- ulint ref_count = btr_search_info_get_ref_count(info);
- if (ref_count == 0) {
- break;
- }
-
- /* Sleep for 10ms before trying again. */
- os_thread_sleep(10000);
- ++retries;
-
- if (retries % 500 == 0) {
- /* No luck after 5 seconds of wait. */
- fprintf(stderr, "InnoDB: Error: Waited for"
- " %lu secs for hash index"
- " ref_count (%lu) to drop"
- " to 0.\n"
- "index: \"%s\""
- " table: \"%s\"\n",
- retries/100,
- ref_count,
- index->name,
- table->name);
- }
-
- /* To avoid a hang here we commit suicide if the
- ref_count doesn't drop to zero in 600 seconds. */
- if (retries >= 60000) {
- ut_error;
- }
- }
-
- rw_lock_free(&index->lock);
-
- /* Remove the index from the list of indexes of the table */
- UT_LIST_REMOVE(indexes, table->indexes, index);
-
- size = mem_heap_get_size(index->heap);
-
- ut_ad(dict_sys->size >= size);
-
- dict_sys->size -= size;
-
- dict_mem_index_free(index);
-}
-
-/*******************************************************************//**
-Tries to find column names for the index and sets the col field of the
-index.
-@return TRUE if the column names were found */
-static
-ibool
-dict_index_find_cols(
-/*=================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: index */
-{
- ulint i;
-
- ut_ad(table && index);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- for (i = 0; i < index->n_fields; i++) {
- ulint j;
- dict_field_t* field = dict_index_get_nth_field(index, i);
-
- for (j = 0; j < table->n_cols; j++) {
- if (!strcmp(dict_table_get_col_name(table, j),
- field->name)) {
- field->col = dict_table_get_nth_col(table, j);
-
- goto found;
- }
- }
-
-#ifdef UNIV_DEBUG
- /* It is an error not to find a matching column. */
- fputs("InnoDB: Error: no matching column for ", stderr);
- ut_print_name(stderr, NULL, FALSE, field->name);
- fputs(" in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs("!\n", stderr);
-#endif /* UNIV_DEBUG */
- return(FALSE);
-
-found:
- ;
- }
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Adds a column to index. */
-UNIV_INTERN
-void
-dict_index_add_col(
-/*===============*/
- dict_index_t* index, /*!< in/out: index */
- const dict_table_t* table, /*!< in: table */
- dict_col_t* col, /*!< in: column */
- ulint prefix_len) /*!< in: column prefix length */
-{
- dict_field_t* field;
- const char* col_name;
-
- col_name = dict_table_get_col_name(table, dict_col_get_no(col));
-
- dict_mem_index_add_field(index, col_name, prefix_len);
-
- field = dict_index_get_nth_field(index, index->n_def - 1);
-
- field->col = col;
- field->fixed_len = (unsigned int) dict_col_get_fixed_size(
- col, dict_table_is_comp(table));
-
- if (prefix_len && field->fixed_len > prefix_len) {
- field->fixed_len = (unsigned int) prefix_len;
- }
-
- /* Long fixed-length fields that need external storage are treated as
- variable-length fields, so that the extern flag can be embedded in
- the length word. */
-
- if (field->fixed_len > DICT_MAX_INDEX_COL_LEN) {
- field->fixed_len = 0;
- }
-#if DICT_MAX_INDEX_COL_LEN != 768
- /* The comparison limit above must be constant. If it were
- changed, the disk format of some fixed-length columns would
- change, which would be a disaster. */
-# error "DICT_MAX_INDEX_COL_LEN != 768"
-#endif
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- index->n_nullable++;
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Copies fields contained in index2 to index1. */
-static
-void
-dict_index_copy(
-/*============*/
- dict_index_t* index1, /*!< in: index to copy to */
- dict_index_t* index2, /*!< in: index to copy from */
- const dict_table_t* table, /*!< in: table */
- ulint start, /*!< in: first position to copy */
- ulint end) /*!< in: last position to copy */
-{
- dict_field_t* field;
- ulint i;
-
- /* Copy fields contained in index2 */
-
- for (i = start; i < end; i++) {
-
- field = dict_index_get_nth_field(index2, i);
- dict_index_add_col(index1, table, field->col,
- field->prefix_len);
- }
-}
-
-/*******************************************************************//**
-Copies types of fields contained in index to tuple. */
-UNIV_INTERN
-void
-dict_index_copy_types(
-/*==================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_index_t* index, /*!< in: index */
- ulint n_fields) /*!< in: number of
- field types to copy */
-{
- ulint i;
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- dtuple_set_types_binary(tuple, n_fields);
-
- return;
- }
-
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* ifield;
- dtype_t* dfield_type;
-
- ifield = dict_index_get_nth_field(index, i);
- dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
- dict_col_copy_type(dict_field_get_col(ifield), dfield_type);
- }
-}
-
-/*******************************************************************//**
-Copies types of columns contained in table to tuple and sets all
-fields of the tuple to the SQL NULL value. This function should
-be called right after dtuple_create(). */
-UNIV_INTERN
-void
-dict_table_copy_types(
-/*==================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_table_t* table) /*!< in: table */
-{
- ulint i;
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- dfield_t* dfield = dtuple_get_nth_field(tuple, i);
- dtype_t* dtype = dfield_get_type(dfield);
-
- dfield_set_null(dfield);
- dict_col_copy_type(dict_table_get_nth_col(table, i), dtype);
- }
-}
-
-/*******************************************************************//**
-Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user.
-@return own: the internal representation of the clustered index */
-static
-dict_index_t*
-dict_index_build_internal_clust(
-/*============================*/
- const dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: user representation of
- a clustered index */
-{
- dict_index_t* new_index;
- dict_field_t* field;
- ulint fixed_size;
- ulint trx_id_pos;
- ulint i;
- ibool* indexed;
-
- ut_ad(table && index);
- ut_ad(dict_index_is_clust(index));
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* Create a new index object with certainly enough fields */
- new_index = dict_mem_index_create(table->name,
- index->name, table->space,
- index->type,
- index->n_fields + table->n_cols);
-
- /* Copy other relevant data from the old index struct to the new
- struct: it inherits the values */
-
- new_index->n_user_defined_cols = index->n_fields;
-
- new_index->id = index->id;
-
- /* Copy the fields of index */
- dict_index_copy(new_index, index, table, 0, index->n_fields);
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- /* No fixed number of fields determines an entry uniquely */
-
- new_index->n_uniq = REC_MAX_N_FIELDS;
-
- } else if (dict_index_is_unique(index)) {
- /* Only the fields defined so far are needed to identify
- the index entry uniquely */
-
- new_index->n_uniq = new_index->n_def;
- } else {
- /* Also the row id is needed to identify the entry */
- new_index->n_uniq = 1 + new_index->n_def;
- }
-
- new_index->trx_id_offset = 0;
-
- if (!dict_index_is_ibuf(index)) {
- /* Add system columns, trx id first */
-
- trx_id_pos = new_index->n_def;
-
-#if DATA_ROW_ID != 0
-# error "DATA_ROW_ID != 0"
-#endif
-#if DATA_TRX_ID != 1
-# error "DATA_TRX_ID != 1"
-#endif
-#if DATA_ROLL_PTR != 2
-# error "DATA_ROLL_PTR != 2"
-#endif
-
- if (!dict_index_is_unique(index)) {
- dict_index_add_col(new_index, table,
- dict_table_get_sys_col(
- table, DATA_ROW_ID),
- 0);
- trx_id_pos++;
- }
-
- dict_index_add_col(new_index, table,
- dict_table_get_sys_col(table, DATA_TRX_ID),
- 0);
-
- dict_index_add_col(new_index, table,
- dict_table_get_sys_col(table,
- DATA_ROLL_PTR),
- 0);
-
- for (i = 0; i < trx_id_pos; i++) {
-
- fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(new_index, i),
- dict_table_is_comp(table));
-
- if (fixed_size == 0) {
- new_index->trx_id_offset = 0;
-
- break;
- }
-
- if (dict_index_get_nth_field(new_index, i)->prefix_len
- > 0) {
- new_index->trx_id_offset = 0;
-
- break;
- }
-
- new_index->trx_id_offset += (unsigned int) fixed_size;
- }
-
- }
-
- /* Remember the table columns already contained in new_index */
- indexed = mem_zalloc(table->n_cols * sizeof *indexed);
-
- /* Mark the table columns already contained in new_index */
- for (i = 0; i < new_index->n_def; i++) {
-
- field = dict_index_get_nth_field(new_index, i);
-
- /* If there is only a prefix of the column in the index
- field, do not mark the column as contained in the index */
-
- if (field->prefix_len == 0) {
-
- indexed[field->col->ind] = TRUE;
- }
- }
-
- /* Add to new_index non-system columns of table not yet included
- there */
- for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
-
- dict_col_t* col = dict_table_get_nth_col(table, i);
- ut_ad(col->mtype != DATA_SYS);
-
- if (!indexed[col->ind]) {
- dict_index_add_col(new_index, table, col, 0);
- }
- }
-
- mem_free(indexed);
-
- ut_ad(dict_index_is_ibuf(index)
- || (UT_LIST_GET_LEN(table->indexes) == 0));
-
- new_index->cached = TRUE;
-
- return(new_index);
-}
-
-/*******************************************************************//**
-Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user.
-@return own: the internal representation of the non-clustered index */
-static
-dict_index_t*
-dict_index_build_internal_non_clust(
-/*================================*/
- const dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: user representation of
- a non-clustered index */
-{
- dict_field_t* field;
- dict_index_t* new_index;
- dict_index_t* clust_index;
- ulint i;
- ibool* indexed;
-
- ut_ad(table && index);
- ut_ad(!dict_index_is_clust(index));
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* The clustered index should be the first in the list of indexes */
- clust_index = UT_LIST_GET_FIRST(table->indexes);
-
- ut_ad(clust_index);
- ut_ad(dict_index_is_clust(clust_index));
- ut_ad(!(clust_index->type & DICT_UNIVERSAL));
-
- /* Create a new index */
- new_index = dict_mem_index_create(
- table->name, index->name, index->space, index->type,
- index->n_fields + 1 + clust_index->n_uniq);
-
- /* Copy other relevant data from the old index
- struct to the new struct: it inherits the values */
-
- new_index->n_user_defined_cols = index->n_fields;
-
- new_index->id = index->id;
-
- /* Copy fields from index to new_index */
- dict_index_copy(new_index, index, table, 0, index->n_fields);
-
- /* Remember the table columns already contained in new_index */
- indexed = mem_zalloc(table->n_cols * sizeof *indexed);
-
- /* Mark the table columns already contained in new_index */
- for (i = 0; i < new_index->n_def; i++) {
-
- field = dict_index_get_nth_field(new_index, i);
-
- /* If there is only a prefix of the column in the index
- field, do not mark the column as contained in the index */
-
- if (field->prefix_len == 0) {
-
- indexed[field->col->ind] = TRUE;
- }
- }
-
- /* Add to new_index the columns necessary to determine the clustered
- index entry uniquely */
-
- for (i = 0; i < clust_index->n_uniq; i++) {
-
- field = dict_index_get_nth_field(clust_index, i);
-
- if (!indexed[field->col->ind]) {
- dict_index_add_col(new_index, table, field->col,
- field->prefix_len);
- }
- }
-
- mem_free(indexed);
-
- if (dict_index_is_unique(index)) {
- new_index->n_uniq = index->n_fields;
- } else {
- new_index->n_uniq = new_index->n_def;
- }
-
- /* Set the n_fields value in new_index to the actual defined
- number of fields */
-
- new_index->n_fields = new_index->n_def;
-
- new_index->cached = TRUE;
-
- return(new_index);
-}
-
-/*====================== FOREIGN KEY PROCESSING ========================*/
-
-/*********************************************************************//**
-Checks if a table is referenced by foreign keys.
-@return TRUE if table is referenced by a foreign key */
-UNIV_INTERN
-ibool
-dict_table_is_referenced_by_foreign_key(
-/*====================================*/
- const dict_table_t* table) /*!< in: InnoDB table */
-{
- return(UT_LIST_GET_LEN(table->referenced_list) > 0);
-}
-
-/*********************************************************************//**
-Check if the index is referenced by a foreign key, if TRUE return foreign
-else return NULL
-@return pointer to foreign key struct if index is defined for foreign
-key, otherwise NULL */
-UNIV_INTERN
-dict_foreign_t*
-dict_table_get_referenced_constraint(
-/*=================================*/
- dict_table_t* table, /*!< in: InnoDB table */
- dict_index_t* index) /*!< in: InnoDB index */
-{
- dict_foreign_t* foreign;
-
- ut_ad(index != NULL);
- ut_ad(table != NULL);
-
- for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
- foreign;
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
-
- if (foreign->referenced_index == index) {
-
- return(foreign);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Checks if a index is defined for a foreign key constraint. Index is a part
-of a foreign key constraint if the index is referenced by foreign key
-or index is a foreign key index.
-@return pointer to foreign key struct if index is defined for foreign
-key, otherwise NULL */
-UNIV_INTERN
-dict_foreign_t*
-dict_table_get_foreign_constraint(
-/*==============================*/
- dict_table_t* table, /*!< in: InnoDB table */
- dict_index_t* index) /*!< in: InnoDB index */
-{
- dict_foreign_t* foreign;
-
- ut_ad(index != NULL);
- ut_ad(table != NULL);
-
- for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
- foreign;
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
-
- if (foreign->foreign_index == index
- || foreign->referenced_index == index) {
-
- return(foreign);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Frees a foreign key struct. */
-static
-void
-dict_foreign_free(
-/*==============*/
- dict_foreign_t* foreign) /*!< in, own: foreign key struct */
-{
- mem_heap_free(foreign->heap);
-}
-
-/**********************************************************************//**
-Removes a foreign constraint struct from the dictionary cache. */
-static
-void
-dict_foreign_remove_from_cache(
-/*===========================*/
- dict_foreign_t* foreign) /*!< in, own: foreign constraint */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(foreign);
-
- if (foreign->referenced_table) {
- UT_LIST_REMOVE(referenced_list,
- foreign->referenced_table->referenced_list,
- foreign);
- }
-
- if (foreign->foreign_table) {
- UT_LIST_REMOVE(foreign_list,
- foreign->foreign_table->foreign_list,
- foreign);
- }
-
- dict_foreign_free(foreign);
-}
-
-/**********************************************************************//**
-Looks for the foreign constraint from the foreign and referenced lists
-of a table.
-@return foreign constraint */
-static
-dict_foreign_t*
-dict_foreign_find(
-/*==============*/
- dict_table_t* table, /*!< in: table object */
- const char* id) /*!< in: foreign constraint id */
-{
- dict_foreign_t* foreign;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign) {
- if (ut_strcmp(id, foreign->id) == 0) {
-
- return(foreign);
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign) {
- if (ut_strcmp(id, foreign->id) == 0) {
-
- return(foreign);
- }
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Tries to find an index whose first fields are the columns in the array,
-in the same order and is not marked for deletion and is not the same
-as types_idx.
-@return matching index, NULL if not found */
-static
-dict_index_t*
-dict_foreign_find_index(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- dict_index_t* types_idx, /*!< in: NULL or an index to whose types the
- column types must match */
- ibool check_charsets,
- /*!< in: whether to check charsets.
- only has an effect if types_idx != NULL */
- ulint check_null)
- /*!< in: nonzero if none of the columns must
- be declared NOT NULL */
-{
- dict_index_t* index;
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- /* Ignore matches that refer to the same instance
- or the index is to be dropped */
- if (index->to_be_dropped || types_idx == index) {
-
- goto next_rec;
-
- } else if (dict_index_get_n_fields(index) >= n_cols) {
- ulint i;
-
- for (i = 0; i < n_cols; i++) {
- dict_field_t* field;
- const char* col_name;
-
- field = dict_index_get_nth_field(index, i);
-
- col_name = dict_table_get_col_name(
- table, dict_col_get_no(field->col));
-
- if (field->prefix_len != 0) {
- /* We do not accept column prefix
- indexes here */
-
- break;
- }
-
- if (0 != innobase_strcasecmp(columns[i],
- col_name)) {
- break;
- }
-
- if (check_null
- && (field->col->prtype & DATA_NOT_NULL)) {
-
- return(NULL);
- }
-
- if (types_idx && !cmp_cols_are_equal(
- dict_index_get_nth_col(index, i),
- dict_index_get_nth_col(types_idx,
- i),
- check_charsets)) {
-
- break;
- }
- }
-
- if (i == n_cols) {
- /* We found a matching index */
-
- return(index);
- }
- }
-
-next_rec:
- index = dict_table_get_next_index(index);
- }
-
- return(NULL);
-}
-
-/**********************************************************************//**
-Find an index that is equivalent to the one passed in and is not marked
-for deletion.
-@return index equivalent to foreign->foreign_index, or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_equiv_index(
-/*==========================*/
- dict_foreign_t* foreign)/*!< in: foreign key */
-{
- ut_a(foreign != NULL);
-
- /* Try to find an index which contains the columns as the
- first fields and in the right order, and the types are the
- same as in foreign->foreign_index */
-
- return(dict_foreign_find_index(
- foreign->foreign_table,
- foreign->foreign_col_names, foreign->n_fields,
- foreign->foreign_index, TRUE, /* check types */
- FALSE/* allow columns to be NULL */));
-}
-
-/**********************************************************************//**
-Returns an index object by matching on the name and column names and
-if more than one index matches return the index with the max id
-@return matching index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_by_max_id(
-/*===========================*/
- dict_table_t* table, /*!< in: table */
- const char* name, /*!< in: the index name to find */
- const char** columns,/*!< in: array of column names */
- ulint n_cols) /*!< in: number of columns */
-{
- dict_index_t* index;
- dict_index_t* found;
-
- found = NULL;
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (ut_strcmp(index->name, name) == 0
- && dict_index_get_n_ordering_defined_by_user(index)
- == n_cols) {
-
- ulint i;
-
- for (i = 0; i < n_cols; i++) {
- dict_field_t* field;
- const char* col_name;
-
- field = dict_index_get_nth_field(index, i);
-
- col_name = dict_table_get_col_name(
- table, dict_col_get_no(field->col));
-
- if (0 != innobase_strcasecmp(
- columns[i], col_name)) {
-
- break;
- }
- }
-
- if (i == n_cols) {
- /* We found a matching index, select
- the index with the higher id*/
-
- if (!found
- || ut_dulint_cmp(index->id, found->id) > 0) {
-
- found = index;
- }
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(found);
-}
-
-/**********************************************************************//**
-Report an error in a foreign key definition. */
-static
-void
-dict_foreign_error_report_low(
-/*==========================*/
- FILE* file, /*!< in: output stream */
- const char* name) /*!< in: table name */
-{
- rewind(file);
- ut_print_timestamp(file);
- fprintf(file, " Error in foreign key constraint of table %s:\n",
- name);
-}
-
-/**********************************************************************//**
-Report an error in a foreign key definition. */
-static
-void
-dict_foreign_error_report(
-/*======================*/
- FILE* file, /*!< in: output stream */
- dict_foreign_t* fk, /*!< in: foreign key constraint */
- const char* msg) /*!< in: the error message */
-{
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(file, fk->foreign_table_name);
- fputs(msg, file);
- fputs(" Constraint:\n", file);
- dict_print_info_on_foreign_key_in_create_format(file, NULL, fk, TRUE);
- putc('\n', file);
- if (fk->foreign_index) {
- fputs("The index in the foreign key in table is ", file);
- ut_print_name(file, NULL, FALSE, fk->foreign_index->name);
- fputs("\n"
- "See " REFMAN "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- file);
- }
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/**********************************************************************//**
-Adds a foreign key constraint object to the dictionary cache. May free
-the object if there already is an object with the same identifier in.
-At least one of the foreign table and the referenced table must already
-be in the dictionary cache!
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-dict_foreign_add_to_cache(
-/*======================*/
- dict_foreign_t* foreign, /*!< in, own: foreign key constraint */
- ibool check_charsets) /*!< in: TRUE=check charset
- compatibility */
-{
- dict_table_t* for_table;
- dict_table_t* ref_table;
- dict_foreign_t* for_in_cache = NULL;
- dict_index_t* index;
- ibool added_to_referenced_list= FALSE;
- FILE* ef = dict_foreign_err_file;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- for_table = dict_table_check_if_in_cache_low(
- foreign->foreign_table_name);
-
- ref_table = dict_table_check_if_in_cache_low(
- foreign->referenced_table_name);
- ut_a(for_table || ref_table);
-
- if (for_table) {
- for_in_cache = dict_foreign_find(for_table, foreign->id);
- }
-
- if (!for_in_cache && ref_table) {
- for_in_cache = dict_foreign_find(ref_table, foreign->id);
- }
-
- if (for_in_cache) {
- /* Free the foreign object */
- mem_heap_free(foreign->heap);
- } else {
- for_in_cache = foreign;
- }
-
- if (for_in_cache->referenced_table == NULL && ref_table) {
- index = dict_foreign_find_index(
- ref_table,
- for_in_cache->referenced_col_names,
- for_in_cache->n_fields, for_in_cache->foreign_index,
- check_charsets, FALSE);
-
- if (index == NULL) {
- dict_foreign_error_report(
- ef, for_in_cache,
- "there is no index in referenced table"
- " which would contain\n"
- "the columns as the first columns,"
- " or the data types in the\n"
- "referenced table do not match"
- " the ones in table.");
-
- if (for_in_cache == foreign) {
- mem_heap_free(foreign->heap);
- }
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for_in_cache->referenced_table = ref_table;
- for_in_cache->referenced_index = index;
- UT_LIST_ADD_LAST(referenced_list,
- ref_table->referenced_list,
- for_in_cache);
- added_to_referenced_list = TRUE;
- }
-
- if (for_in_cache->foreign_table == NULL && for_table) {
- index = dict_foreign_find_index(
- for_table,
- for_in_cache->foreign_col_names,
- for_in_cache->n_fields,
- for_in_cache->referenced_index, check_charsets,
- for_in_cache->type
- & (DICT_FOREIGN_ON_DELETE_SET_NULL
- | DICT_FOREIGN_ON_UPDATE_SET_NULL));
-
- if (index == NULL) {
- dict_foreign_error_report(
- ef, for_in_cache,
- "there is no index in the table"
- " which would contain\n"
- "the columns as the first columns,"
- " or the data types in the\n"
- "table do not match"
- " the ones in the referenced table\n"
- "or one of the ON ... SET NULL columns"
- " is declared NOT NULL.");
-
- if (for_in_cache == foreign) {
- if (added_to_referenced_list) {
- UT_LIST_REMOVE(
- referenced_list,
- ref_table->referenced_list,
- for_in_cache);
- }
-
- mem_heap_free(foreign->heap);
- }
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for_in_cache->foreign_table = for_table;
- for_in_cache->foreign_index = index;
- UT_LIST_ADD_LAST(foreign_list,
- for_table->foreign_list,
- for_in_cache);
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Scans from pointer onwards. Stops if is at the start of a copy of
-'string' where characters are compared without case sensitivity, and
-only outside `` or "" quotes. Stops also at NUL.
-@return scanned up to this */
-static
-const char*
-dict_scan_to(
-/*=========*/
- const char* ptr, /*!< in: scan from */
- const char* string) /*!< in: look for this */
-{
- char quote = '\0';
-
- for (; *ptr; ptr++) {
- if (*ptr == quote) {
- /* Closing quote character: do not look for
- starting quote or the keyword. */
- quote = '\0';
- } else if (quote) {
- /* Within quotes: do nothing. */
- } else if (*ptr == '`' || *ptr == '"') {
- /* Starting quote: remember the quote character. */
- quote = *ptr;
- } else {
- /* Outside quotes: look for the keyword. */
- ulint i;
- for (i = 0; string[i]; i++) {
- if (toupper((int)(unsigned char)(ptr[i]))
- != toupper((int)(unsigned char)
- (string[i]))) {
- goto nomatch;
- }
- }
- break;
-nomatch:
- ;
- }
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Accepts a specified string. Comparisons are case-insensitive.
-@return if string was accepted, the pointer is moved after that, else
-ptr is returned */
-static
-const char*
-dict_accept(
-/*========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
- const char* ptr, /*!< in: scan from this */
- const char* string, /*!< in: accept only this string as the next
- non-whitespace string */
- ibool* success)/*!< out: TRUE if accepted */
-{
- const char* old_ptr = ptr;
- const char* old_ptr2;
-
- *success = FALSE;
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- old_ptr2 = ptr;
-
- ptr = dict_scan_to(ptr, string);
-
- if (*ptr == '\0' || old_ptr2 != ptr) {
- return(old_ptr);
- }
-
- *success = TRUE;
-
- return(ptr + ut_strlen(string));
-}
-
-/*********************************************************************//**
-Scans an id. For the lexical definition of an 'id', see the code below.
-Strips backquotes or double quotes from around the id.
-@return scanned to */
-static
-const char*
-dict_scan_id(
-/*=========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
- const char* ptr, /*!< in: scanned to */
- mem_heap_t* heap, /*!< in: heap where to allocate the id
- (NULL=id will not be allocated, but it
- will point to string near ptr) */
- const char** id, /*!< out,own: the id; NULL if no id was
- scannable */
- ibool table_id,/*!< in: TRUE=convert the allocated id
- as a table name; FALSE=convert to UTF-8 */
- ibool accept_also_dot)
- /*!< in: TRUE if also a dot can appear in a
- non-quoted id; in a quoted id it can appear
- always */
-{
- char quote = '\0';
- ulint len = 0;
- const char* s;
- char* str;
- char* dst;
-
- *id = NULL;
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- if (*ptr == '\0') {
-
- return(ptr);
- }
-
- if (*ptr == '`' || *ptr == '"') {
- quote = *ptr++;
- }
-
- s = ptr;
-
- if (quote) {
- for (;;) {
- if (!*ptr) {
- /* Syntax error */
- return(ptr);
- }
- if (*ptr == quote) {
- ptr++;
- if (*ptr != quote) {
- break;
- }
- }
- ptr++;
- len++;
- }
- } else {
- while (!my_isspace(cs, *ptr) && *ptr != '(' && *ptr != ')'
- && (accept_also_dot || *ptr != '.')
- && *ptr != ',' && *ptr != '\0') {
-
- ptr++;
- }
-
- len = ptr - s;
- }
-
- if (UNIV_UNLIKELY(!heap)) {
- /* no heap given: id will point to source string */
- *id = s;
- return(ptr);
- }
-
- if (quote) {
- char* d;
- str = d = mem_heap_alloc(heap, len + 1);
- while (len--) {
- if ((*d++ = *s++) == quote) {
- s++;
- }
- }
- *d++ = 0;
- len = d - str;
- ut_ad(*s == quote);
- ut_ad(s + 1 == ptr);
- } else {
- str = mem_heap_strdupl(heap, s, len);
- }
-
- if (!table_id) {
-convert_id:
- /* Convert the identifier from connection character set
- to UTF-8. */
- len = 3 * len + 1;
- *id = dst = mem_heap_alloc(heap, len);
-
- innobase_convert_from_id(cs, dst, str, len);
- } else if (!strncmp(str, srv_mysql50_table_name_prefix,
- sizeof srv_mysql50_table_name_prefix)) {
- /* This is a pre-5.1 table name
- containing chars other than [A-Za-z0-9].
- Discard the prefix and use raw UTF-8 encoding. */
- str += sizeof srv_mysql50_table_name_prefix;
- len -= sizeof srv_mysql50_table_name_prefix;
- goto convert_id;
- } else {
- /* Encode using filename-safe characters. */
- len = 5 * len + 1;
- *id = dst = mem_heap_alloc(heap, len);
-
- innobase_convert_from_table_id(cs, dst, str, len);
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Tries to scan a column name.
-@return scanned to */
-static
-const char*
-dict_scan_col(
-/*==========*/
- struct charset_info_st* cs, /*!< in: the character set of ptr */
- const char* ptr, /*!< in: scanned to */
- ibool* success,/*!< out: TRUE if success */
- dict_table_t* table, /*!< in: table in which the column is */
- const dict_col_t** column, /*!< out: pointer to column if success */
- mem_heap_t* heap, /*!< in: heap where to allocate */
- const char** name) /*!< out,own: the column name;
- NULL if no name was scannable */
-{
- ulint i;
-
- *success = FALSE;
-
- ptr = dict_scan_id(cs, ptr, heap, name, FALSE, TRUE);
-
- if (*name == NULL) {
-
- return(ptr); /* Syntax error */
- }
-
- if (table == NULL) {
- *success = TRUE;
- *column = NULL;
- } else {
- for (i = 0; i < dict_table_get_n_cols(table); i++) {
-
- const char* col_name = dict_table_get_col_name(
- table, i);
-
- if (0 == innobase_strcasecmp(col_name, *name)) {
- /* Found */
-
- *success = TRUE;
- *column = dict_table_get_nth_col(table, i);
- strcpy((char*) *name, col_name);
-
- break;
- }
- }
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Scans a table name from an SQL string.
-@return scanned to */
-static
-const char*
-dict_scan_table_name(
-/*=================*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
- const char* ptr, /*!< in: scanned to */
- dict_table_t** table, /*!< out: table object or NULL */
- const char* name, /*!< in: foreign key table name */
- ibool* success,/*!< out: TRUE if ok name found */
- mem_heap_t* heap, /*!< in: heap where to allocate the id */
- const char** ref_name)/*!< out,own: the table name;
- NULL if no name was scannable */
-{
- const char* database_name = NULL;
- ulint database_name_len = 0;
- const char* table_name = NULL;
- ulint table_name_len;
- const char* scan_name;
- char* ref;
-
- *success = FALSE;
- *table = NULL;
-
- ptr = dict_scan_id(cs, ptr, heap, &scan_name, TRUE, FALSE);
-
- if (scan_name == NULL) {
-
- return(ptr); /* Syntax error */
- }
-
- if (*ptr == '.') {
- /* We scanned the database name; scan also the table name */
-
- ptr++;
-
- database_name = scan_name;
- database_name_len = strlen(database_name);
-
- ptr = dict_scan_id(cs, ptr, heap, &table_name, TRUE, FALSE);
-
- if (table_name == NULL) {
-
- return(ptr); /* Syntax error */
- }
- } else {
- /* To be able to read table dumps made with InnoDB-4.0.17 or
- earlier, we must allow the dot separator between the database
- name and the table name also to appear within a quoted
- identifier! InnoDB used to print a constraint as:
- ... REFERENCES `databasename.tablename` ...
- starting from 4.0.18 it is
- ... REFERENCES `databasename`.`tablename` ... */
- const char* s;
-
- for (s = scan_name; *s; s++) {
- if (*s == '.') {
- database_name = scan_name;
- database_name_len = s - scan_name;
- scan_name = ++s;
- break;/* to do: multiple dots? */
- }
- }
-
- table_name = scan_name;
- }
-
- if (database_name == NULL) {
- /* Use the database name of the foreign key table */
-
- database_name = name;
- database_name_len = dict_get_db_name_len(name);
- }
-
- table_name_len = strlen(table_name);
-
- /* Copy database_name, '/', table_name, '\0' */
- ref = mem_heap_alloc(heap, database_name_len + table_name_len + 2);
- memcpy(ref, database_name, database_name_len);
- ref[database_name_len] = '/';
- memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
-#ifndef __WIN__
- if (srv_lower_case_table_names) {
-#endif /* !__WIN__ */
- /* The table name is always put to lower case on Windows. */
- innobase_casedn_str(ref);
-#ifndef __WIN__
- }
-#endif /* !__WIN__ */
-
- *success = TRUE;
- *ref_name = ref;
- *table = dict_table_get_low(ref);
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Skips one id. The id is allowed to contain also '.'.
-@return scanned to */
-static
-const char*
-dict_skip_word(
-/*===========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
- const char* ptr, /*!< in: scanned to */
- ibool* success)/*!< out: TRUE if success, FALSE if just spaces
- left in string or a syntax error */
-{
- const char* start;
-
- *success = FALSE;
-
- ptr = dict_scan_id(cs, ptr, NULL, &start, FALSE, TRUE);
-
- if (start) {
- *success = TRUE;
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Removes MySQL comments from an SQL string. A comment is either
-(a) '#' to the end of the line,
-(b) '--[space]' to the end of the line, or
-(c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar
-C comment syntax).
-@return own: SQL string stripped from comments; the caller must free
-this with mem_free()! */
-static
-char*
-dict_strip_comments(
-/*================*/
- const char* sql_string) /*!< in: SQL string */
-{
- char* str;
- const char* sptr;
- char* ptr;
- /* unclosed quote character (0 if none) */
- char quote = 0;
-
- str = mem_alloc(strlen(sql_string) + 1);
-
- sptr = sql_string;
- ptr = str;
-
- for (;;) {
-scan_more:
- if (*sptr == '\0') {
- *ptr = '\0';
-
- ut_a(ptr <= str + strlen(sql_string));
-
- return(str);
- }
-
- if (*sptr == quote) {
- /* Closing quote character: do not look for
- starting quote or comments. */
- quote = 0;
- } else if (quote) {
- /* Within quotes: do not look for
- starting quotes or comments. */
- } else if (*sptr == '"' || *sptr == '`' || *sptr == '\'') {
- /* Starting quote: remember the quote character. */
- quote = *sptr;
- } else if (*sptr == '#'
- || (sptr[0] == '-' && sptr[1] == '-'
- && sptr[2] == ' ')) {
- for (;;) {
- /* In Unix a newline is 0x0A while in Windows
- it is 0x0D followed by 0x0A */
-
- if (*sptr == (char)0x0A
- || *sptr == (char)0x0D
- || *sptr == '\0') {
-
- goto scan_more;
- }
-
- sptr++;
- }
- } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') {
- for (;;) {
- if (*sptr == '*' && *(sptr + 1) == '/') {
-
- sptr += 2;
-
- goto scan_more;
- }
-
- if (*sptr == '\0') {
-
- goto scan_more;
- }
-
- sptr++;
- }
- }
-
- *ptr = *sptr;
-
- ptr++;
- sptr++;
- }
-}
-
-/*********************************************************************//**
-Finds the highest [number] for foreign key constraints of the table. Looks
-only at the >= 4.0.18-format id's, which are of the form
-databasename/tablename_ibfk_[number].
-@return highest number, 0 if table has no new format foreign key constraints */
-static
-ulint
-dict_table_get_highest_foreign_id(
-/*==============================*/
- dict_table_t* table) /*!< in: table in the dictionary memory cache */
-{
- dict_foreign_t* foreign;
- char* endp;
- ulint biggest_id = 0;
- ulint id;
- ulint len;
-
- ut_a(table);
-
- len = ut_strlen(table->name);
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign) {
- if (ut_strlen(foreign->id) > ((sizeof dict_ibfk) - 1) + len
- && 0 == ut_memcmp(foreign->id, table->name, len)
- && 0 == ut_memcmp(foreign->id + len,
- dict_ibfk, (sizeof dict_ibfk) - 1)
- && foreign->id[len + ((sizeof dict_ibfk) - 1)] != '0') {
- /* It is of the >= 4.0.18 format */
-
- id = strtoul(foreign->id + len
- + ((sizeof dict_ibfk) - 1),
- &endp, 10);
- if (*endp == '\0') {
- ut_a(id != biggest_id);
-
- if (id > biggest_id) {
- biggest_id = id;
- }
- }
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- return(biggest_id);
-}
-
-/*********************************************************************//**
-Reports a simple foreign key create clause syntax error. */
-static
-void
-dict_foreign_report_syntax_err(
-/*===========================*/
- const char* name, /*!< in: table name */
- const char* start_of_latest_foreign,
- /*!< in: start of the foreign key clause
- in the SQL string */
- const char* ptr) /*!< in: place of the syntax error */
-{
- FILE* ef = dict_foreign_err_file;
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\nSyntax error close to:\n%s\n",
- start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-static
-ulint
-dict_create_foreign_constraints_low(
-/*================================*/
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap, /*!< in: memory heap */
- struct charset_info_st* cs,/*!< in: the character set of sql_string */
- const char* sql_string,
- /*!< in: CREATE TABLE or ALTER TABLE statement
- where foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the database
- name before it: test.table2; the default
- database is the database of parameter name */
- const char* name, /*!< in: table full name in the normalized form
- database_name/table_name */
- ibool reject_fks)
- /*!< in: if TRUE, fail with error code
- DB_CANNOT_ADD_CONSTRAINT if any foreign
- keys are found. */
-{
- dict_table_t* table;
- dict_table_t* referenced_table;
- dict_table_t* table_to_alter;
- ulint highest_id_so_far = 0;
- dict_index_t* index;
- dict_foreign_t* foreign;
- const char* ptr = sql_string;
- const char* start_of_latest_foreign = sql_string;
- FILE* ef = dict_foreign_err_file;
- const char* constraint_name;
- ibool success;
- ulint error;
- const char* ptr1;
- const char* ptr2;
- ulint i;
- ulint j;
- ibool is_on_delete;
- ulint n_on_deletes;
- ulint n_on_updates;
- const dict_col_t*columns[500];
- const char* column_names[500];
- const char* referenced_table_name;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = dict_table_get_low(name);
-
- if (table == NULL) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef,
- "Cannot find the table in the internal"
- " data dictionary of InnoDB.\n"
- "Create table statement:\n%s\n", sql_string);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_ERROR);
- }
-
- /* First check if we are actually doing an ALTER TABLE, and in that
- case look for the table being altered */
-
- ptr = dict_accept(cs, ptr, "ALTER", &success);
-
- if (!success) {
-
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "TABLE", &success);
-
- if (!success) {
-
- goto loop;
- }
-
- /* We are doing an ALTER TABLE: scan the table name we are altering */
-
- ptr = dict_scan_table_name(cs, ptr, &table_to_alter, name,
- &success, heap, &referenced_table_name);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Error: could not find"
- " the table being ALTERED in:\n%s\n",
- sql_string);
-
- return(DB_ERROR);
- }
-
- /* Starting from 4.0.18 and 4.1.2, we generate foreign key id's in the
- format databasename/tablename_ibfk_[number], where [number] is local
- to the table; look for the highest [number] for table_to_alter, so
- that we can assign to new constraints higher numbers. */
-
- /* If we are altering a temporary table, the table name after ALTER
- TABLE does not correspond to the internal table name, and
- table_to_alter is NULL. TODO: should we fix this somehow? */
-
- if (table_to_alter == NULL) {
- highest_id_so_far = 0;
- } else {
- highest_id_so_far = dict_table_get_highest_foreign_id(
- table_to_alter);
- }
-
- /* Scan for foreign key declarations in a loop */
-loop:
- /* Scan either to "CONSTRAINT" or "FOREIGN", whichever is closer */
-
- ptr1 = dict_scan_to(ptr, "CONSTRAINT");
- ptr2 = dict_scan_to(ptr, "FOREIGN");
-
- constraint_name = NULL;
-
- if (ptr1 < ptr2) {
- /* The user may have specified a constraint name. Pick it so
- that we can store 'databasename/constraintname' as the id of
- of the constraint to system tables. */
- ptr = ptr1;
-
- ptr = dict_accept(cs, ptr, "CONSTRAINT", &success);
-
- ut_a(success);
-
- if (!my_isspace(cs, *ptr) && *ptr != '"' && *ptr != '`') {
- goto loop;
- }
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- /* read constraint name unless got "CONSTRAINT FOREIGN" */
- if (ptr != ptr2) {
- ptr = dict_scan_id(cs, ptr, heap,
- &constraint_name, FALSE, FALSE);
- }
- } else {
- ptr = ptr2;
- }
-
- if (*ptr == '\0') {
- /* The proper way to reject foreign keys for temporary
- tables would be to split the lexing and syntactical
- analysis of foreign key clauses from the actual adding
- of them, so that ha_innodb.cc could first parse the SQL
- command, determine if there are any foreign keys, and
- if so, immediately reject the command if the table is a
- temporary one. For now, this kludge will work. */
- if (reject_fks && (UT_LIST_GET_LEN(table->foreign_list) > 0)) {
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /**********************************************************/
- /* The following call adds the foreign key constraints
- to the data dictionary system tables on disk */
-
- error = dict_create_add_foreigns_to_dictionary(
- highest_id_so_far, table, trx);
- return(error);
- }
-
- start_of_latest_foreign = ptr;
-
- ptr = dict_accept(cs, ptr, "FOREIGN", &success);
-
- if (!success) {
- goto loop;
- }
-
- if (!my_isspace(cs, *ptr)) {
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "KEY", &success);
-
- if (!success) {
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- /* MySQL allows also an index id before the '('; we
- skip it */
- ptr = dict_skip_word(cs, ptr, &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- /* We do not flag a syntax error here because in an
- ALTER TABLE we may also have DROP FOREIGN KEY abc */
-
- goto loop;
- }
- }
-
- i = 0;
-
- /* Scan the columns in the first list */
-col_loop1:
- ut_a(i < (sizeof column_names) / sizeof *column_names);
- ptr = dict_scan_col(cs, ptr, &success, table, columns + i,
- heap, column_names + i);
- if (!success) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\nCannot resolve column name close to:\n%s\n",
- start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- i++;
-
- ptr = dict_accept(cs, ptr, ",", &success);
-
- if (success) {
- goto col_loop1;
- }
-
- ptr = dict_accept(cs, ptr, ")", &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Try to find an index which contains the columns
- as the first fields and in the right order */
-
- index = dict_foreign_find_index(table, column_names, i,
- NULL, TRUE, FALSE);
-
- if (!index) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fputs("There is no index in table ", ef);
- ut_print_name(ef, NULL, TRUE, name);
- fprintf(ef, " where the columns appear\n"
- "as the first columns. Constraint:\n%s\n"
- "See " REFMAN "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- ptr = dict_accept(cs, ptr, "REFERENCES", &success);
-
- if (!success || !my_isspace(cs, *ptr)) {
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Let us create a constraint struct */
-
- foreign = dict_mem_foreign_create();
-
- if (constraint_name) {
- ulint db_len;
-
- /* Catenate 'databasename/' to the constraint name specified
- by the user: we conceive the constraint as belonging to the
- same MySQL 'database' as the table itself. We store the name
- to foreign->id. */
-
- db_len = dict_get_db_name_len(table->name);
-
- foreign->id = mem_heap_alloc(
- foreign->heap, db_len + strlen(constraint_name) + 2);
-
- ut_memcpy(foreign->id, table->name, db_len);
- foreign->id[db_len] = '/';
- strcpy(foreign->id + db_len + 1, constraint_name);
- }
-
- foreign->foreign_table = table;
- foreign->foreign_table_name = mem_heap_strdup(foreign->heap,
- table->name);
- foreign->foreign_index = index;
- foreign->n_fields = (unsigned int) i;
- foreign->foreign_col_names = mem_heap_alloc(foreign->heap,
- i * sizeof(void*));
- for (i = 0; i < foreign->n_fields; i++) {
- foreign->foreign_col_names[i] = mem_heap_strdup(
- foreign->heap,
- dict_table_get_col_name(table,
- dict_col_get_no(columns[i])));
- }
-
- ptr = dict_scan_table_name(cs, ptr, &referenced_table, name,
- &success, heap, &referenced_table_name);
-
- /* Note that referenced_table can be NULL if the user has suppressed
- checking of foreign key constraints! */
-
- if (!success || (!referenced_table && trx->check_foreigns)) {
- dict_foreign_free(foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\nCannot resolve table name close to:\n"
- "%s\n",
- start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
- dict_foreign_report_syntax_err(name, start_of_latest_foreign,
- ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Scan the columns in the second list */
- i = 0;
-
-col_loop2:
- ptr = dict_scan_col(cs, ptr, &success, referenced_table, columns + i,
- heap, column_names + i);
- i++;
-
- if (!success) {
- dict_foreign_free(foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\nCannot resolve column name close to:\n"
- "%s\n",
- start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- ptr = dict_accept(cs, ptr, ",", &success);
-
- if (success) {
- goto col_loop2;
- }
-
- ptr = dict_accept(cs, ptr, ")", &success);
-
- if (!success || foreign->n_fields != i) {
- dict_foreign_free(foreign);
-
- dict_foreign_report_syntax_err(name, start_of_latest_foreign,
- ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- n_on_deletes = 0;
- n_on_updates = 0;
-
-scan_on_conditions:
- /* Loop here as long as we can find ON ... conditions */
-
- ptr = dict_accept(cs, ptr, "ON", &success);
-
- if (!success) {
-
- goto try_find_index;
- }
-
- ptr = dict_accept(cs, ptr, "DELETE", &success);
-
- if (!success) {
- ptr = dict_accept(cs, ptr, "UPDATE", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
-
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- is_on_delete = FALSE;
- n_on_updates++;
- } else {
- is_on_delete = TRUE;
- n_on_deletes++;
- }
-
- ptr = dict_accept(cs, ptr, "RESTRICT", &success);
-
- if (success) {
- goto scan_on_conditions;
- }
-
- ptr = dict_accept(cs, ptr, "CASCADE", &success);
-
- if (success) {
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_CASCADE;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE;
- }
-
- goto scan_on_conditions;
- }
-
- ptr = dict_accept(cs, ptr, "NO", &success);
-
- if (success) {
- ptr = dict_accept(cs, ptr, "ACTION", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_NO_ACTION;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION;
- }
-
- goto scan_on_conditions;
- }
-
- ptr = dict_accept(cs, ptr, "SET", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
- dict_foreign_report_syntax_err(name, start_of_latest_foreign,
- ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- ptr = dict_accept(cs, ptr, "NULL", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
- dict_foreign_report_syntax_err(name, start_of_latest_foreign,
- ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for (j = 0; j < foreign->n_fields; j++) {
- if ((dict_index_get_nth_col(foreign->foreign_index, j)->prtype)
- & DATA_NOT_NULL) {
-
- /* It is not sensible to define SET NULL
- if the column is not allowed to be NULL! */
-
- dict_foreign_free(foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\n"
- "You have defined a SET NULL condition"
- " though some of the\n"
- "columns are defined as NOT NULL.\n",
- start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- }
-
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_SET_NULL;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL;
- }
-
- goto scan_on_conditions;
-
-try_find_index:
- if (n_on_deletes > 1 || n_on_updates > 1) {
- /* It is an error to define more than 1 action */
-
- dict_foreign_free(foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\n"
- "You have twice an ON DELETE clause"
- " or twice an ON UPDATE clause.\n",
- start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Try to find an index which contains the columns as the first fields
- and in the right order, and the types are the same as in
- foreign->foreign_index */
-
- if (referenced_table) {
- index = dict_foreign_find_index(referenced_table,
- column_names, i,
- foreign->foreign_index,
- TRUE, FALSE);
- if (!index) {
- dict_foreign_free(foreign);
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\n"
- "Cannot find an index in the"
- " referenced table where the\n"
- "referenced columns appear as the"
- " first columns, or column types\n"
- "in the table and the referenced table"
- " do not match for constraint.\n"
- "Note that the internal storage type of"
- " ENUM and SET changed in\n"
- "tables created with >= InnoDB-4.1.12,"
- " and such columns in old tables\n"
- "cannot be referenced by such columns"
- " in new tables.\n"
- "See " REFMAN
- "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- } else {
- ut_a(trx->check_foreigns == FALSE);
- index = NULL;
- }
-
- foreign->referenced_index = index;
- foreign->referenced_table = referenced_table;
-
- foreign->referenced_table_name
- = mem_heap_strdup(foreign->heap, referenced_table_name);
-
- foreign->referenced_col_names = mem_heap_alloc(foreign->heap,
- i * sizeof(void*));
- for (i = 0; i < foreign->n_fields; i++) {
- foreign->referenced_col_names[i]
- = mem_heap_strdup(foreign->heap, column_names[i]);
- }
-
- /* We found an ok constraint definition: add to the lists */
-
- UT_LIST_ADD_LAST(foreign_list, table->foreign_list, foreign);
-
- if (referenced_table) {
- UT_LIST_ADD_LAST(referenced_list,
- referenced_table->referenced_list,
- foreign);
- }
-
- goto loop;
-}
-
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-dict_create_foreign_constraints(
-/*============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES
- table2(c, d), table2 can be written
- also with the database
- name before it: test.table2; the
- default database id the database of
- parameter name */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-{
- char* str;
- ulint err;
- mem_heap_t* heap;
-
- ut_a(trx);
- ut_a(trx->mysql_thd);
-
- str = dict_strip_comments(sql_string);
- heap = mem_heap_create(10000);
-
- err = dict_create_foreign_constraints_low(
- trx, heap, innobase_get_charset(trx->mysql_thd), str, name,
- reject_fks);
-
- mem_heap_free(heap);
- mem_free(str);
-
- return(err);
-}
-
-/**********************************************************************//**
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
-@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
-constraint id does not match */
-UNIV_INTERN
-ulint
-dict_foreign_parse_drop_constraints(
-/*================================*/
- mem_heap_t* heap, /*!< in: heap from which we can
- allocate memory */
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: table */
- ulint* n, /*!< out: number of constraints
- to drop */
- const char*** constraints_to_drop) /*!< out: id's of the
- constraints to drop */
-{
- dict_foreign_t* foreign;
- ibool success;
- char* str;
- const char* ptr;
- const char* id;
- FILE* ef = dict_foreign_err_file;
- struct charset_info_st* cs;
-
- ut_a(trx);
- ut_a(trx->mysql_thd);
-
- cs = innobase_get_charset(trx->mysql_thd);
-
- *n = 0;
-
- *constraints_to_drop = mem_heap_alloc(heap, 1000 * sizeof(char*));
-
- str = dict_strip_comments(*(trx->mysql_query_str));
- ptr = str;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-loop:
- ptr = dict_scan_to(ptr, "DROP");
-
- if (*ptr == '\0') {
- mem_free(str);
-
- return(DB_SUCCESS);
- }
-
- ptr = dict_accept(cs, ptr, "DROP", &success);
-
- if (!my_isspace(cs, *ptr)) {
-
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "FOREIGN", &success);
-
- if (!success || !my_isspace(cs, *ptr)) {
-
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "KEY", &success);
-
- if (!success) {
-
- goto syntax_error;
- }
-
- ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE);
-
- if (id == NULL) {
-
- goto syntax_error;
- }
-
- ut_a(*n < 1000);
- (*constraints_to_drop)[*n] = id;
- (*n)++;
-
- /* Look for the given constraint id */
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign != NULL) {
- if (0 == strcmp(foreign->id, id)
- || (strchr(foreign->id, '/')
- && 0 == strcmp(id,
- dict_remove_db_name(foreign->id)))) {
- /* Found */
- break;
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- if (foreign == NULL) {
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Error in dropping of a foreign key constraint"
- " of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fputs(",\n"
- "in SQL command\n", ef);
- fputs(str, ef);
- fputs("\nCannot find a constraint with the given id ", ef);
- ut_print_name(ef, NULL, FALSE, id);
- fputs(".\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- mem_free(str);
-
- return(DB_CANNOT_DROP_CONSTRAINT);
- }
-
- goto loop;
-
-syntax_error:
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Syntax error in dropping of a"
- " foreign key constraint of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fprintf(ef, ",\n"
- "close to:\n%s\n in SQL command\n%s\n", ptr, str);
- mutex_exit(&dict_foreign_err_mutex);
-
- mem_free(str);
-
- return(DB_CANNOT_DROP_CONSTRAINT);
-}
-
-/*==================== END OF FOREIGN KEY PROCESSING ====================*/
-
-/**********************************************************************//**
-Returns an index object if it is found in the dictionary cache.
-Assumes that dict_sys->mutex is already being held.
-@return index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_if_in_cache_low(
-/*===========================*/
- dulint index_id) /*!< in: index id */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- return(dict_index_find_on_id_low(index_id));
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Returns an index object if it is found in the dictionary cache.
-@return index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_if_in_cache(
-/*=======================*/
- dulint index_id) /*!< in: index id */
-{
- dict_index_t* index;
-
- if (dict_sys == NULL) {
- return(NULL);
- }
-
- mutex_enter(&(dict_sys->mutex));
-
- index = dict_index_get_if_in_cache_low(index_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return(index);
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dict_index_check_search_tuple(
-/*==========================*/
- const dict_index_t* index, /*!< in: index tree */
- const dtuple_t* tuple) /*!< in: tuple used in a search */
-{
- ut_a(index);
- ut_a(dtuple_get_n_fields_cmp(tuple)
- <= dict_index_get_n_unique_in_tree(index));
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************************//**
-Builds a node pointer out of a physical record and a page number.
-@return own: node pointer */
-UNIV_INTERN
-dtuple_t*
-dict_index_build_node_ptr(
-/*======================*/
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record for which to build node
- pointer */
- ulint page_no,/*!< in: page number to put in node
- pointer */
- mem_heap_t* heap, /*!< in: memory heap where pointer
- created */
- ulint level) /*!< in: level of rec in tree:
- 0 means leaf level */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
- ulint n_unique;
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- /* In a universal index tree, we take the whole record as
- the node pointer if the record is on the leaf level,
- on non-leaf levels we remove the last field, which
- contains the page number of the child page */
-
- ut_a(!dict_table_is_comp(index->table));
- n_unique = rec_get_n_fields_old(rec);
-
- if (level > 0) {
- ut_a(n_unique > 1);
- n_unique--;
- }
- } else {
- n_unique = dict_index_get_n_unique_in_tree(index);
- }
-
- tuple = dtuple_create(heap, n_unique + 1);
-
- /* When searching in the tree for the node pointer, we must not do
- comparison on the last field, the page number field, as on upper
- levels in the tree there may be identical node pointers with a
- different page number; therefore, we set the n_fields_cmp to one
- less: */
-
- dtuple_set_n_fields_cmp(tuple, n_unique);
-
- dict_index_copy_types(tuple, index, n_unique);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, page_no);
-
- field = dtuple_get_nth_field(tuple, n_unique);
- dfield_set_data(field, buf, 4);
-
- dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4);
-
- rec_copy_prefix_to_dtuple(tuple, rec, index, n_unique, heap);
- dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple)
- | REC_STATUS_NODE_PTR);
-
- ut_ad(dtuple_check_typed(tuple));
-
- return(tuple);
-}
-
-/**********************************************************************//**
-Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely.
-@return pointer to the prefix record */
-UNIV_INTERN
-rec_t*
-dict_index_copy_rec_order_prefix(
-/*=============================*/
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record for which to
- copy prefix */
- ulint* n_fields,/*!< out: number of fields copied */
- byte** buf, /*!< in/out: memory buffer for the
- copied prefix, or NULL */
- ulint* buf_size)/*!< in/out: buffer size */
-{
- ulint n;
-
- UNIV_PREFETCH_R(rec);
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- ut_a(!dict_table_is_comp(index->table));
- n = rec_get_n_fields_old(rec);
- } else {
- n = dict_index_get_n_unique_in_tree(index);
- }
-
- *n_fields = n;
- return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size));
-}
-
-/**********************************************************************//**
-Builds a typed data tuple out of a physical record.
-@return own: data tuple */
-UNIV_INTERN
-dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
- dict_index_t* index, /*!< in: index tree */
- rec_t* rec, /*!< in: record for which to build data tuple */
- ulint n_fields,/*!< in: number of data fields */
- mem_heap_t* heap) /*!< in: memory heap where tuple created */
-{
- dtuple_t* tuple;
-
- ut_ad(dict_table_is_comp(index->table)
- || n_fields <= rec_get_n_fields_old(rec));
-
- tuple = dtuple_create(heap, n_fields);
-
- dict_index_copy_types(tuple, index, n_fields);
-
- rec_copy_prefix_to_dtuple(tuple, rec, index, n_fields, heap);
-
- ut_ad(dtuple_check_typed(tuple));
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Calculates the minimum record length in an index. */
-UNIV_INTERN
-ulint
-dict_index_calc_min_rec_len(
-/*========================*/
- const dict_index_t* index) /*!< in: index */
-{
- ulint sum = 0;
- ulint i;
- ulint comp = dict_table_is_comp(index->table);
-
- if (comp) {
- ulint nullable = 0;
- sum = REC_N_NEW_EXTRA_BYTES;
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(index, i);
- ulint size = dict_col_get_fixed_size(col, comp);
- sum += size;
- if (!size) {
- size = col->len;
- sum += size < 128 ? 1 : 2;
- }
- if (!(col->prtype & DATA_NOT_NULL)) {
- nullable++;
- }
- }
-
- /* round the NULL flags up to full bytes */
- sum += UT_BITS_IN_BYTES(nullable);
-
- return(sum);
- }
-
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- sum += dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i), comp);
- }
-
- if (sum > 127) {
- sum += 2 * dict_index_get_n_fields(index);
- } else {
- sum += dict_index_get_n_fields(index);
- }
-
- sum += REC_N_OLD_EXTRA_BYTES;
-
- return(sum);
-}
-
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-UNIV_INTERN
-void
-dict_update_statistics_low(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- ibool has_dict_mutex __attribute__((unused)))
- /*!< in: TRUE if the caller has the
- dictionary mutex */
-{
- dict_index_t* index;
- ulint size;
- ulint sum_of_index_sizes = 0;
-
- if (table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: cannot calculate statistics for table %s\n"
- "InnoDB: because the .ibd file is missing. For help,"
- " please refer to\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- table->name);
-
- return;
- }
-
- /* If we have set a high innodb_force_recovery level, do not calculate
- statistics, as a badly corrupted index can cause a crash in it. */
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
- return;
- }
-
- /* Find out the sizes of the indexes and how many different values
- for the key they approximately have */
-
- index = dict_table_get_first_index(table);
-
- if (index == NULL) {
- /* Table definition is corrupt */
-
- return;
- }
-
- while (index) {
- size = btr_get_size(index, BTR_TOTAL_SIZE);
-
- index->stat_index_size = size;
-
- sum_of_index_sizes += size;
-
- size = btr_get_size(index, BTR_N_LEAF_PAGES);
-
- if (size == 0) {
- /* The root node of the tree is a leaf */
- size = 1;
- }
-
- index->stat_n_leaf_pages = size;
-
- btr_estimate_number_of_different_key_vals(index);
-
- index = dict_table_get_next_index(index);
- }
-
- index = dict_table_get_first_index(table);
-
- table->stat_n_rows = index->stat_n_diff_key_vals[
- dict_index_get_n_unique(index)];
-
- table->stat_clustered_index_size = index->stat_index_size;
-
- table->stat_sum_of_other_index_sizes = sum_of_index_sizes
- - index->stat_index_size;
-
- table->stat_initialized = TRUE;
-
- table->stat_modified_counter = 0;
-}
-
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-UNIV_INTERN
-void
-dict_update_statistics(
-/*===================*/
- dict_table_t* table) /*!< in/out: table */
-{
- dict_update_statistics_low(table, FALSE);
-}
-
-/**********************************************************************//**
-Prints info of a foreign key constraint. */
-static
-void
-dict_foreign_print_low(
-/*===================*/
- dict_foreign_t* foreign) /*!< in: foreign key constraint */
-{
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- fprintf(stderr, " FOREIGN KEY CONSTRAINT %s: %s (",
- foreign->id, foreign->foreign_table_name);
-
- for (i = 0; i < foreign->n_fields; i++) {
- fprintf(stderr, " %s", foreign->foreign_col_names[i]);
- }
-
- fprintf(stderr, " )\n"
- " REFERENCES %s (",
- foreign->referenced_table_name);
-
- for (i = 0; i < foreign->n_fields; i++) {
- fprintf(stderr, " %s", foreign->referenced_col_names[i]);
- }
-
- fputs(" )\n", stderr);
-}
-
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
- dict_table_t* table) /*!< in: table */
-{
- mutex_enter(&(dict_sys->mutex));
- dict_table_print_low(table);
- mutex_exit(&(dict_sys->mutex));
-}
-
-/**********************************************************************//**
-Prints a table data when we know the table name. */
-UNIV_INTERN
-void
-dict_table_print_by_name(
-/*=====================*/
- const char* name) /*!< in: table name */
-{
- dict_table_t* table;
-
- mutex_enter(&(dict_sys->mutex));
-
- table = dict_table_get_low(name);
-
- ut_a(table);
-
- dict_table_print_low(table);
- mutex_exit(&(dict_sys->mutex));
-}
-
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print_low(
-/*=================*/
- dict_table_t* table) /*!< in: table */
-{
- dict_index_t* index;
- dict_foreign_t* foreign;
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_update_statistics_low(table, TRUE);
-
- fprintf(stderr,
- "--------------------------------------\n"
- "TABLE: name %s, id %lu %lu, flags %lx, columns %lu,"
- " indexes %lu, appr.rows %lu\n"
- " COLUMNS: ",
- table->name,
- (ulong) ut_dulint_get_high(table->id),
- (ulong) ut_dulint_get_low(table->id),
- (ulong) table->flags,
- (ulong) table->n_cols,
- (ulong) UT_LIST_GET_LEN(table->indexes),
- (ulong) table->stat_n_rows);
-
- for (i = 0; i < (ulint) table->n_cols; i++) {
- dict_col_print_low(table, dict_table_get_nth_col(table, i));
- fputs("; ", stderr);
- }
-
- putc('\n', stderr);
-
- index = UT_LIST_GET_FIRST(table->indexes);
-
- while (index != NULL) {
- dict_index_print_low(index);
- index = UT_LIST_GET_NEXT(indexes, index);
- }
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign != NULL) {
- dict_foreign_print_low(foreign);
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign != NULL) {
- dict_foreign_print_low(foreign);
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-}
-
-/**********************************************************************//**
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
- const dict_table_t* table, /*!< in: table */
- const dict_col_t* col) /*!< in: column */
-{
- dtype_t type;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_col_copy_type(col, &type);
- fprintf(stderr, "%s: ", dict_table_get_col_name(table,
- dict_col_get_no(col)));
-
- dtype_print(&type);
-}
-
-/**********************************************************************//**
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
- dict_index_t* index) /*!< in: index */
-{
- ib_int64_t n_vals;
- ulint i;
- const char* type_string;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if (index->n_user_defined_cols > 0) {
- n_vals = index->stat_n_diff_key_vals[
- index->n_user_defined_cols];
- } else {
- n_vals = index->stat_n_diff_key_vals[1];
- }
-
- if (dict_index_is_clust(index)) {
- type_string = "clustered index";
- } else if (dict_index_is_unique(index)) {
- type_string = "unique index";
- } else {
- type_string = "secondary index";
- }
-
- fprintf(stderr,
- " INDEX: name %s, id %lu %lu, fields %lu/%lu,"
- " uniq %lu, type %lu\n"
- " root page %lu, appr.key vals %lu,"
- " leaf pages %lu, size pages %lu\n"
- " FIELDS: ",
- index->name,
- (ulong) ut_dulint_get_high(index->id),
- (ulong) ut_dulint_get_low(index->id),
- (ulong) index->n_user_defined_cols,
- (ulong) index->n_fields,
- (ulong) index->n_uniq,
- (ulong) index->type,
- (ulong) index->page,
- (ulong) n_vals,
- (ulong) index->stat_n_leaf_pages,
- (ulong) index->stat_index_size);
-
- for (i = 0; i < index->n_fields; i++) {
- dict_field_print_low(dict_index_get_nth_field(index, i));
- }
-
- putc('\n', stderr);
-
-#ifdef UNIV_BTR_PRINT
- btr_print_size(index);
-
- btr_print_index(index, 7);
-#endif /* UNIV_BTR_PRINT */
-}
-
-/**********************************************************************//**
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
- dict_field_t* field) /*!< in: field */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- fprintf(stderr, " %s", field->name);
-
- if (field->prefix_len != 0) {
- fprintf(stderr, "(%lu)", (ulong) field->prefix_len);
- }
-}
-
-/**********************************************************************//**
-Outputs info on a foreign key of a table in a format suitable for
-CREATE TABLE. */
-UNIV_INTERN
-void
-dict_print_info_on_foreign_key_in_create_format(
-/*============================================*/
- FILE* file, /*!< in: file where to print */
- trx_t* trx, /*!< in: transaction */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- ibool add_newline) /*!< in: whether to add a newline */
-{
- const char* stripped_id;
- ulint i;
-
- if (strchr(foreign->id, '/')) {
- /* Strip the preceding database name from the constraint id */
- stripped_id = foreign->id + 1
- + dict_get_db_name_len(foreign->id);
- } else {
- stripped_id = foreign->id;
- }
-
- putc(',', file);
-
- if (add_newline) {
- /* SHOW CREATE TABLE wants constraints each printed nicely
- on its own line, while error messages want no newlines
- inserted. */
- fputs("\n ", file);
- }
-
- fputs(" CONSTRAINT ", file);
- ut_print_name(file, trx, FALSE, stripped_id);
- fputs(" FOREIGN KEY (", file);
-
- for (i = 0;;) {
- ut_print_name(file, trx, FALSE, foreign->foreign_col_names[i]);
- if (++i < foreign->n_fields) {
- fputs(", ", file);
- } else {
- break;
- }
- }
-
- fputs(") REFERENCES ", file);
-
- if (dict_tables_have_same_db(foreign->foreign_table_name,
- foreign->referenced_table_name)) {
- /* Do not print the database name of the referenced table */
- ut_print_name(file, trx, TRUE,
- dict_remove_db_name(
- foreign->referenced_table_name));
- } else {
- ut_print_name(file, trx, TRUE,
- foreign->referenced_table_name);
- }
-
- putc(' ', file);
- putc('(', file);
-
- for (i = 0;;) {
- ut_print_name(file, trx, FALSE,
- foreign->referenced_col_names[i]);
- if (++i < foreign->n_fields) {
- fputs(", ", file);
- } else {
- break;
- }
- }
-
- putc(')', file);
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
- fputs(" ON DELETE CASCADE", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
- fputs(" ON DELETE SET NULL", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
- fputs(" ON DELETE NO ACTION", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
- fputs(" ON UPDATE CASCADE", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
- fputs(" ON UPDATE SET NULL", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
- fputs(" ON UPDATE NO ACTION", file);
- }
-}
-
-/**********************************************************************//**
-Outputs info on foreign keys of a table. */
-UNIV_INTERN
-void
-dict_print_info_on_foreign_keys(
-/*============================*/
- ibool create_table_format, /*!< in: if TRUE then print in
- a format suitable to be inserted into
- a CREATE TABLE, otherwise in the format
- of SHOW TABLE STATUS */
- FILE* file, /*!< in: file where to print */
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table) /*!< in: table */
-{
- dict_foreign_t* foreign;
-
- mutex_enter(&(dict_sys->mutex));
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- if (foreign == NULL) {
- mutex_exit(&(dict_sys->mutex));
-
- return;
- }
-
- while (foreign != NULL) {
- if (create_table_format) {
- dict_print_info_on_foreign_key_in_create_format(
- file, trx, foreign, TRUE);
- } else {
- ulint i;
- fputs("; (", file);
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (i) {
- putc(' ', file);
- }
-
- ut_print_name(file, trx, FALSE,
- foreign->foreign_col_names[i]);
- }
-
- fputs(") REFER ", file);
- ut_print_name(file, trx, TRUE,
- foreign->referenced_table_name);
- putc('(', file);
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (i) {
- putc(' ', file);
- }
- ut_print_name(
- file, trx, FALSE,
- foreign->referenced_col_names[i]);
- }
-
- putc(')', file);
-
- if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) {
- fputs(" ON DELETE CASCADE", file);
- }
-
- if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) {
- fputs(" ON DELETE SET NULL", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
- fputs(" ON DELETE NO ACTION", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
- fputs(" ON UPDATE CASCADE", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
- fputs(" ON UPDATE SET NULL", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
- fputs(" ON UPDATE NO ACTION", file);
- }
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- mutex_exit(&(dict_sys->mutex));
-}
-
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
- FILE* file, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- const dict_index_t* index) /*!< in: index to print */
-{
- fputs("index ", file);
- ut_print_name(file, trx, FALSE, index->name);
- fputs(" of table ", file);
- ut_print_name(file, trx, TRUE, index->table_name);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
-void
-dict_ind_init(void)
-/*===============*/
-{
- dict_table_t* table;
-
- /* create dummy table and index for REDUNDANT infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
-
- dict_ind_redundant = dict_mem_index_create("SYS_DUMMY1", "SYS_DUMMY1",
- DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(dict_ind_redundant, table,
- dict_table_get_nth_col(table, 0), 0);
- dict_ind_redundant->table = table;
- /* create dummy table and index for COMPACT infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY2",
- DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
- dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2",
- DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(dict_ind_compact, table,
- dict_table_get_nth_col(table, 0), 0);
- dict_ind_compact->table = table;
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- dict_ind_redundant->cached = dict_ind_compact->cached = TRUE;
-}
-
-/**********************************************************************//**
-Frees dict_ind_redundant and dict_ind_compact. */
-static
-void
-dict_ind_free(void)
-/*===============*/
-{
- dict_table_t* table;
-
- table = dict_ind_compact->table;
- dict_mem_index_free(dict_ind_compact);
- dict_ind_compact = NULL;
- dict_mem_table_free(table);
-
- table = dict_ind_redundant->table;
- dict_mem_index_free(dict_ind_redundant);
- dict_ind_redundant = NULL;
- dict_mem_table_free(table);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Get index by name
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name(
-/*=========================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
-{
- dict_index_t* index;
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (ut_strcmp(index->name, name) == 0) {
-
- return(index);
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(NULL);
-
-}
-
-/**********************************************************************//**
-Replace the index passed in with another equivalent index in the tables
-foreign key list. */
-UNIV_INTERN
-void
-dict_table_replace_index_in_foreign_list(
-/*=====================================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in: index to be replaced */
-{
- dict_foreign_t* foreign;
-
- for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
- foreign;
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
-
- if (foreign->foreign_index == index) {
- dict_index_t* new_index
- = dict_foreign_find_equiv_index(foreign);
- ut_a(new_index);
-
- foreign->foreign_index = new_index;
- }
- }
-}
-
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*=====================================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
-{
- dict_index_t* index;
- dict_index_t* min_index; /* Index with matching name and min(id) */
-
- min_index = NULL;
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (ut_strcmp(index->name, name) == 0) {
- if (!min_index
- || ut_dulint_cmp(index->id, min_index->id) < 0) {
-
- min_index = index;
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(min_index);
-
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
-void
-dict_table_check_for_dup_indexes(
-/*=============================*/
- const dict_table_t* table) /*!< in: Check for dup indexes
- in this table */
-{
- /* Check for duplicates, ignoring indexes that are marked
- as to be dropped */
-
- const dict_index_t* index1;
- const dict_index_t* index2;
-
- /* The primary index _must_ exist */
- ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
-
- index1 = UT_LIST_GET_FIRST(table->indexes);
- index2 = UT_LIST_GET_NEXT(indexes, index1);
-
- while (index1 && index2) {
-
- while (index2) {
-
- if (!index2->to_be_dropped) {
- ut_ad(ut_strcmp(index1->name, index2->name));
- }
-
- index2 = UT_LIST_GET_NEXT(indexes, index2);
- }
-
- index1 = UT_LIST_GET_NEXT(indexes, index1);
- index2 = UT_LIST_GET_NEXT(indexes, index1);
- }
-}
-#endif /* UNIV_DEBUG */
-
-/**************************************************************************
-Closes the data dictionary module. */
-UNIV_INTERN
-void
-dict_close(void)
-/*============*/
-{
- ulint i;
-
- /* Free the hash elements. We don't remove them from the table
- because we are going to destroy the table anyway. */
- for (i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) {
- dict_table_t* table;
-
- table = HASH_GET_FIRST(dict_sys->table_hash, i);
-
- while (table) {
- dict_table_t* prev_table = table;
-
- table = HASH_GET_NEXT(name_hash, prev_table);
-#ifdef UNIV_DEBUG
- ut_a(prev_table->magic_n == DICT_TABLE_MAGIC_N);
-#endif
- /* Acquire only because it's a pre-condition. */
- mutex_enter(&dict_sys->mutex);
-
- dict_table_remove_from_cache(prev_table);
-
- mutex_exit(&dict_sys->mutex);
- }
- }
-
- hash_table_free(dict_sys->table_hash);
-
- /* The elements are the same instance as in dict_sys->table_hash,
- therefore we don't delete the individual elements. */
- hash_table_free(dict_sys->table_id_hash);
-
- dict_ind_free();
-
- mutex_free(&dict_sys->mutex);
-
- rw_lock_free(&dict_operation_lock);
- memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock));
-
- mutex_free(&dict_foreign_err_mutex);
-
- mem_free(dict_sys);
- dict_sys = NULL;
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/dict/dict0load.c b/storage/innodb_plugin/dict/dict0load.c
deleted file mode 100644
index 842a129c1a6..00000000000
--- a/storage/innodb_plugin/dict/dict0load.c
+++ /dev/null
@@ -1,1450 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dict/dict0load.c
-Loads to the memory cache database object definitions
-from dictionary tables
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0load.h"
-#include "mysql_version.h"
-
-#ifdef UNIV_NONINL
-#include "dict0load.ic"
-#endif
-
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "page0page.h"
-#include "mach0data.h"
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "rem0cmp.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-
-/****************************************************************//**
-Compare the name of an index column.
-@return TRUE if the i'th column of index is 'name'. */
-static
-ibool
-name_of_col_is(
-/*===========*/
- const dict_table_t* table, /*!< in: table */
- const dict_index_t* index, /*!< in: index */
- ulint i, /*!< in: index field offset */
- const char* name) /*!< in: name to compare to */
-{
- ulint tmp = dict_col_get_no(dict_field_get_col(
- dict_index_get_nth_field(
- index, i)));
-
- return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0);
-}
-
-/********************************************************************//**
-Finds the first table name in the given database.
-@return own: table name, NULL if does not exist; the caller must free
-the memory in the string! */
-UNIV_INTERN
-char*
-dict_get_first_table_name_in_db(
-/*============================*/
- const char* name) /*!< in: database name which ends in '/' */
-{
- dict_table_t* sys_tables;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(1000);
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_a(!dict_table_is_comp(sys_tables));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, name, ut_strlen(name));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-loop:
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- if (len < strlen(name)
- || ut_memcmp(name, field, strlen(name)) != 0) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- if (!rec_get_deleted_flag(rec, 0)) {
-
- /* We found one */
-
- char* table_name = mem_strdupl((char*) field, len);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(table_name);
- }
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- goto loop;
-}
-
-/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void)
-/*============*/
-{
- dict_table_t* sys_tables;
- dict_index_t* sys_index;
- dict_table_t* table;
- btr_pcur_t pcur;
- const rec_t* rec;
- const byte* field;
- ulint len;
- mtr_t mtr;
-
- /* Enlarge the fatal semaphore wait timeout during the InnoDB table
- monitor printout */
-
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- mutex_enter(&(dict_sys->mutex));
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
-
- btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
- TRUE, &mtr);
-loop:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* end of index */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- mutex_exit(&(dict_sys->mutex));
-
- /* Restore the fatal semaphore wait timeout */
-
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- if (!rec_get_deleted_flag(rec, 0)) {
-
- /* We found one */
-
- char* table_name = mem_strdupl((char*) field, len);
-
- btr_pcur_store_position(&pcur, &mtr);
-
- mtr_commit(&mtr);
-
- table = dict_table_get_low(table_name);
- mem_free(table_name);
-
- if (table == NULL) {
- fputs("InnoDB: Failed to load table ", stderr);
- ut_print_namel(stderr, NULL, TRUE, (char*) field, len);
- putc('\n', stderr);
- } else {
- /* The table definition was corrupt if there
- is no index */
-
- if (dict_table_get_first_index(table)) {
- dict_update_statistics_low(table, TRUE);
- }
-
- dict_table_print_low(table);
- }
-
- mtr_start(&mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
- }
-
- goto loop;
-}
-
-/********************************************************************//**
-Determine the flags of a table described in SYS_TABLES.
-@return compressed page size in kilobytes; or 0 if the tablespace is
-uncompressed, ULINT_UNDEFINED on error */
-static
-ulint
-dict_sys_tables_get_flags(
-/*======================*/
- const rec_t* rec) /*!< in: a record of SYS_TABLES */
-{
- const byte* field;
- ulint len;
- ulint n_cols;
- ulint flags;
-
- field = rec_get_nth_field_old(rec, 5, &len);
- ut_a(len == 4);
-
- flags = mach_read_from_4(field);
-
- if (UNIV_LIKELY(flags == DICT_TABLE_ORDINARY)) {
- return(0);
- }
-
- field = rec_get_nth_field_old(rec, 4, &len);
- n_cols = mach_read_from_4(field);
-
- if (UNIV_UNLIKELY(!(n_cols & 0x80000000UL))) {
- /* New file formats require ROW_FORMAT=COMPACT. */
- return(ULINT_UNDEFINED);
- }
-
- switch (flags & (DICT_TF_FORMAT_MASK | DICT_TF_COMPACT)) {
- default:
- case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT:
- case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT | DICT_TF_COMPACT:
- /* flags should be DICT_TABLE_ORDINARY,
- or DICT_TF_FORMAT_MASK should be nonzero. */
- return(ULINT_UNDEFINED);
-
- case DICT_TF_FORMAT_ZIP << DICT_TF_FORMAT_SHIFT | DICT_TF_COMPACT:
-#if DICT_TF_FORMAT_MAX > DICT_TF_FORMAT_ZIP
-# error "missing case labels for DICT_TF_FORMAT_ZIP .. DICT_TF_FORMAT_MAX"
-#endif
- /* We support this format. */
- break;
- }
-
- if (UNIV_UNLIKELY((flags & DICT_TF_ZSSIZE_MASK)
- > (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT))) {
- /* Unsupported compressed page size. */
- return(ULINT_UNDEFINED);
- }
-
- if (UNIV_UNLIKELY(flags & (~0 << DICT_TF_BITS))) {
- /* Some unused bits are set. */
- return(ULINT_UNDEFINED);
- }
-
- return(flags);
-}
-
-/********************************************************************//**
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
-
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
- ibool in_crash_recovery) /*!< in: are we doing a crash recovery */
-{
- dict_table_t* sys_tables;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- const rec_t* rec;
- ulint max_space_id = 0;
- mtr_t mtr;
-
- mutex_enter(&(dict_sys->mutex));
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_a(!dict_table_is_comp(sys_tables));
-
- btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
- TRUE, &mtr);
-loop:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* end of index */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- /* We must make the tablespace cache aware of the biggest
- known space id */
-
- /* printf("Biggest space id in data dictionary %lu\n",
- max_space_id); */
- fil_set_max_space_id_if_bigger(max_space_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return;
- }
-
- if (!rec_get_deleted_flag(rec, 0)) {
-
- /* We found one */
- const byte* field;
- ulint len;
- ulint space_id;
- ulint flags;
- char* name;
-
- field = rec_get_nth_field_old(rec, 0, &len);
- name = mem_strdupl((char*) field, len);
-
- flags = dict_sys_tables_get_flags(rec);
- if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
-
- field = rec_get_nth_field_old(rec, 5, &len);
- flags = mach_read_from_4(field);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has unknown type %lx.\n",
- (ulong) flags);
-
- goto loop;
- }
-
- field = rec_get_nth_field_old(rec, 9, &len);
- ut_a(len == 4);
-
- space_id = mach_read_from_4(field);
-
- btr_pcur_store_position(&pcur, &mtr);
-
- mtr_commit(&mtr);
-
- if (space_id != 0 && in_crash_recovery) {
- /* Check that the tablespace (the .ibd file) really
- exists; print a warning to the .err log if not */
-
- fil_space_for_table_exists_in_mem(space_id, name,
- FALSE, TRUE, TRUE);
- }
-
- if (space_id != 0 && !in_crash_recovery) {
- /* It is a normal database startup: create the space
- object and check that the .ibd file exists. */
-
- fil_open_single_table_tablespace(FALSE, space_id,
- flags, name);
- }
-
- mem_free(name);
-
- if (space_id > max_space_id) {
- max_space_id = space_id;
- }
-
- mtr_start(&mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
- }
-
- goto loop;
-}
-
-/********************************************************************//**
-Loads definitions for table columns. */
-static
-void
-dict_load_columns(
-/*==============*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap) /*!< in: memory heap for temporary storage */
-{
- dict_table_t* sys_columns;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- byte* buf;
- char* name;
- ulint mtype;
- ulint prtype;
- ulint col_len;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_columns = dict_table_get_low("SYS_COLUMNS");
- sys_index = UT_LIST_GET_FIRST(sys_columns->indexes);
- ut_a(!dict_table_is_comp(sys_columns));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur));
-
- ut_a(!rec_get_deleted_flag(rec, 0));
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
- ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0);
-
- field = rec_get_nth_field_old(rec, 1, &len);
- ut_ad(len == 4);
- ut_a(i == mach_read_from_4(field));
-
- ut_a(name_of_col_is(sys_columns, sys_index, 4, "NAME"));
-
- field = rec_get_nth_field_old(rec, 4, &len);
- name = mem_heap_strdupl(heap, (char*) field, len);
-
- field = rec_get_nth_field_old(rec, 5, &len);
- mtype = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(rec, 6, &len);
- prtype = mach_read_from_4(field);
-
- if (dtype_get_charset_coll(prtype) == 0
- && dtype_is_string_type(mtype)) {
- /* The table was created with < 4.1.2. */
-
- if (dtype_is_binary_string_type(mtype, prtype)) {
- /* Use the binary collation for
- string columns of binary type. */
-
- prtype = dtype_form_prtype(
- prtype,
- DATA_MYSQL_BINARY_CHARSET_COLL);
- } else {
- /* Use the default charset for
- other than binary columns. */
-
- prtype = dtype_form_prtype(
- prtype,
- data_mysql_default_charset_coll);
- }
- }
-
- field = rec_get_nth_field_old(rec, 7, &len);
- col_len = mach_read_from_4(field);
-
- ut_a(name_of_col_is(sys_columns, sys_index, 8, "PREC"));
-
- dict_mem_table_add_col(table, heap, name,
- mtype, prtype, col_len);
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/********************************************************************//**
-Loads definitions for index fields. */
-static
-void
-dict_load_fields(
-/*=============*/
- dict_index_t* index, /*!< in: index whose fields to load */
- mem_heap_t* heap) /*!< in: memory heap for temporary storage */
-{
- dict_table_t* sys_fields;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- ulint pos_and_prefix_len;
- ulint prefix_len;
- const rec_t* rec;
- const byte* field;
- ulint len;
- byte* buf;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_fields = dict_table_get_low("SYS_FIELDS");
- sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
- ut_a(!dict_table_is_comp(sys_fields));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, index->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < index->n_fields; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur));
-
- /* There could be delete marked records in SYS_FIELDS
- because SYS_FIELDS.INDEX_ID can be updated
- by ALTER TABLE ADD INDEX. */
-
- if (rec_get_deleted_flag(rec, 0)) {
-
- goto next_rec;
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
-
- field = rec_get_nth_field_old(rec, 1, &len);
- ut_a(len == 4);
-
- /* The next field stores the field position in the index
- and a possible column prefix length if the index field
- does not contain the whole column. The storage format is
- like this: if there is at least one prefix field in the index,
- then the HIGH 2 bytes contain the field number (== i) and the
- low 2 bytes the prefix length for the field. Otherwise the
- field number (== i) is contained in the 2 LOW bytes. */
-
- pos_and_prefix_len = mach_read_from_4(field);
-
- ut_a((pos_and_prefix_len & 0xFFFFUL) == i
- || (pos_and_prefix_len & 0xFFFF0000UL) == (i << 16));
-
- if ((i == 0 && pos_and_prefix_len > 0)
- || (pos_and_prefix_len & 0xFFFF0000UL) > 0) {
-
- prefix_len = pos_and_prefix_len & 0xFFFFUL;
- } else {
- prefix_len = 0;
- }
-
- ut_a(name_of_col_is(sys_fields, sys_index, 4, "COL_NAME"));
-
- field = rec_get_nth_field_old(rec, 4, &len);
-
- dict_mem_index_add_field(index,
- mem_heap_strdupl(heap,
- (char*) field, len),
- prefix_len);
-
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/********************************************************************//**
-Loads definitions for table indexes. Adds them to the data dictionary
-cache.
-@return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary
-table or DB_UNSUPPORTED if table has unknown index type */
-static
-ulint
-dict_load_indexes(
-/*==============*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap) /*!< in: memory heap for temporary storage */
-{
- dict_table_t* sys_indexes;
- dict_index_t* sys_index;
- dict_index_t* index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- ulint name_len;
- char* name_buf;
- ulint type;
- ulint space;
- ulint page_no;
- ulint n_fields;
- byte* buf;
- ibool is_sys_table;
- dulint id;
- mtr_t mtr;
- ulint error = DB_SUCCESS;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if ((ut_dulint_get_high(table->id) == 0)
- && (ut_dulint_get_low(table->id) < DICT_HDR_FIRST_ID)) {
- is_sys_table = TRUE;
- } else {
- is_sys_table = FALSE;
- }
-
- mtr_start(&mtr);
-
- sys_indexes = dict_table_get_low("SYS_INDEXES");
- sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes);
- ut_a(!dict_table_is_comp(sys_indexes));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (;;) {
- if (!btr_pcur_is_on_user_rec(&pcur)) {
-
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
-
- if (ut_memcmp(buf, field, len) != 0) {
- break;
- } else if (rec_get_deleted_flag(rec, 0)) {
- /* Skip delete marked records */
- goto next_rec;
- }
-
- field = rec_get_nth_field_old(rec, 1, &len);
- ut_ad(len == 8);
- id = mach_read_from_8(field);
-
- ut_a(name_of_col_is(sys_indexes, sys_index, 4, "NAME"));
-
- field = rec_get_nth_field_old(rec, 4, &name_len);
- name_buf = mem_heap_strdupl(heap, (char*) field, name_len);
-
- field = rec_get_nth_field_old(rec, 5, &len);
- n_fields = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(rec, 6, &len);
- type = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(rec, 7, &len);
- space = mach_read_from_4(field);
-
- ut_a(name_of_col_is(sys_indexes, sys_index, 8, "PAGE_NO"));
-
- field = rec_get_nth_field_old(rec, 8, &len);
- page_no = mach_read_from_4(field);
-
- /* We check for unsupported types first, so that the
- subsequent checks are relevant for the supported types. */
- if (type & ~(DICT_CLUSTERED | DICT_UNIQUE)) {
-
- fprintf(stderr,
- "InnoDB: Error: unknown type %lu"
- " of index %s of table %s\n",
- (ulong) type, name_buf, table->name);
-
- error = DB_UNSUPPORTED;
- goto func_exit;
- } else if (page_no == FIL_NULL) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to load index %s"
- " for table %s\n"
- "InnoDB: but the index tree has been freed!\n",
- name_buf, table->name);
-
- error = DB_CORRUPTION;
- goto func_exit;
- } else if ((type & DICT_CLUSTERED) == 0
- && NULL == dict_table_get_first_index(table)) {
-
- fputs("InnoDB: Error: trying to load index ",
- stderr);
- ut_print_name(stderr, NULL, FALSE, name_buf);
- fputs(" for table ", stderr);
- ut_print_name(stderr, NULL, TRUE, table->name);
- fputs("\nInnoDB: but the first index"
- " is not clustered!\n", stderr);
-
- error = DB_CORRUPTION;
- goto func_exit;
- } else if (is_sys_table
- && ((type & DICT_CLUSTERED)
- || ((table == dict_sys->sys_tables)
- && (name_len == (sizeof "ID_IND") - 1)
- && (0 == ut_memcmp(name_buf,
- "ID_IND", name_len))))) {
-
- /* The index was created in memory already at booting
- of the database server */
- } else {
- index = dict_mem_index_create(table->name, name_buf,
- space, type, n_fields);
- index->id = id;
-
- dict_load_fields(index, heap);
- error = dict_index_add_to_cache(table, index, page_no,
- FALSE);
- /* The data dictionary tables should never contain
- invalid index definitions. If we ignored this error
- and simply did not load this index definition, the
- .frm file would disagree with the index definitions
- inside InnoDB. */
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-
- goto func_exit;
- }
- }
-
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
-func_exit:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(error);
-}
-
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
-the cluster definition if the table is a member in a cluster. Also loads
-all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. Adds all these to the data
-dictionary cache.
-@return table, NULL if does not exist; if the table is stored in an
-.ibd file, but the file does not exist, then we set the
-ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
-dict_table_t*
-dict_load_table(
-/*============*/
- const char* name) /*!< in: table name in the
- databasename/tablename format */
-{
- ibool ibd_file_missing = FALSE;
- dict_table_t* table;
- dict_table_t* sys_tables;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- ulint space;
- ulint n_cols;
- ulint flags;
- ulint err;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(32000);
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_a(!dict_table_is_comp(sys_tables));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, name, ut_strlen(name));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)
- || rec_get_deleted_flag(rec, 0)) {
- /* Not found */
-err_exit:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- /* Check if the table name in record is the searched one */
- if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {
-
- goto err_exit;
- }
-
- ut_a(name_of_col_is(sys_tables, sys_index, 9, "SPACE"));
-
- field = rec_get_nth_field_old(rec, 9, &len);
- space = mach_read_from_4(field);
-
- /* Check if the tablespace exists and has the right name */
- if (space != 0) {
- flags = dict_sys_tables_get_flags(rec);
-
- if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
- field = rec_get_nth_field_old(rec, 5, &len);
- flags = mach_read_from_4(field);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has unknown type %lx.\n",
- (ulong) flags);
- goto err_exit;
- }
-
- if (fil_space_for_table_exists_in_mem(space, name, FALSE,
- FALSE, FALSE)) {
- /* Ok; (if we did a crash recovery then the tablespace
- can already be in the memory cache) */
- } else {
- /* In >= 4.1.9, InnoDB scans the data dictionary also
- at a normal mysqld startup. It is an error if the
- space object does not exist in memory. */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error: space object of table %s,\n"
- "InnoDB: space id %lu did not exist in memory."
- " Retrying an open.\n",
- name, (ulong)space);
- /* Try to open the tablespace */
- if (!fil_open_single_table_tablespace(
- TRUE, space, flags, name)) {
- /* We failed to find a sensible tablespace
- file */
-
- ibd_file_missing = TRUE;
- }
- }
- } else {
- flags = 0;
- }
-
- ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS"));
-
- field = rec_get_nth_field_old(rec, 4, &len);
- n_cols = mach_read_from_4(field);
-
- /* The high-order bit of N_COLS is the "compact format" flag. */
- if (n_cols & 0x80000000UL) {
- flags |= DICT_TF_COMPACT;
- }
-
- table = dict_mem_table_create(name, space, n_cols & ~0x80000000UL,
- flags);
-
- table->ibd_file_missing = (unsigned int) ibd_file_missing;
-
- ut_a(name_of_col_is(sys_tables, sys_index, 3, "ID"));
-
- field = rec_get_nth_field_old(rec, 3, &len);
- table->id = mach_read_from_8(field);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- dict_load_columns(table, heap);
-
- dict_table_add_to_cache(table, heap);
-
- mem_heap_empty(heap);
-
- err = dict_load_indexes(table, heap);
-
- /* If the force recovery flag is set, we open the table irrespective
- of the error condition, since the user may want to dump data from the
- clustered index. However we load the foreign key information only if
- all indexes were loaded. */
- if (err == DB_SUCCESS) {
- err = dict_load_foreigns(table->name, TRUE);
- } else if (!srv_force_recovery) {
- dict_table_remove_from_cache(table);
- table = NULL;
- }
-#if 0
- if (err != DB_SUCCESS && table != NULL) {
-
- mutex_enter(&dict_foreign_err_mutex);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: could not make a foreign key"
- " definition to match\n"
- "InnoDB: the foreign key table"
- " or the referenced table!\n"
- "InnoDB: The data dictionary of InnoDB is corrupt."
- " You may need to drop\n"
- "InnoDB: and recreate the foreign key table"
- " or the referenced table.\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Latest foreign key error printout:\n%s\n",
- dict_foreign_err_buf);
-
- mutex_exit(&dict_foreign_err_mutex);
- }
-#endif /* 0 */
- mem_heap_free(heap);
-
- return(table);
-}
-
-/***********************************************************************//**
-Loads a table object based on the table id.
-@return table; NULL if table does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_load_table_on_id(
-/*==================*/
- dulint table_id) /*!< in: table id */
-{
- byte id_buf[8];
- btr_pcur_t pcur;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sys_table_ids;
- dict_table_t* sys_tables;
- const rec_t* rec;
- const byte* field;
- ulint len;
- dict_table_t* table;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* NOTE that the operation of this function is protected by
- the dictionary mutex, and therefore no deadlocks can occur
- with other dictionary operations. */
-
- mtr_start(&mtr);
- /*---------------------------------------------------*/
- /* Get the secondary index based on ID for table SYS_TABLES */
- sys_tables = dict_sys->sys_tables;
- sys_table_ids = dict_table_get_next_index(
- dict_table_get_first_index(sys_tables));
- ut_a(!dict_table_is_comp(sys_tables));
- heap = mem_heap_create(256);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- /* Write the table id in byte format to id_buf */
- mach_write_to_8(id_buf, table_id);
-
- dfield_set_data(dfield, id_buf, 8);
- dict_index_copy_types(tuple, sys_table_ids, 1);
-
- btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)
- || rec_get_deleted_flag(rec, 0)) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /*---------------------------------------------------*/
- /* Now we have the record in the secondary index containing the
- table ID and NAME */
-
- rec = btr_pcur_get_rec(&pcur);
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
-
- /* Check if the table id in record is the one searched for */
- if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /* Now we get the table name from the record */
- field = rec_get_nth_field_old(rec, 1, &len);
- /* Load the table definition to memory */
- table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len));
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(table);
-}
-
-/********************************************************************//**
-This function is called when the database is booted. Loads system table
-index definitions except for the clustered index which is added to the
-dictionary cache at booting before calling this function. */
-UNIV_INTERN
-void
-dict_load_sys_table(
-/*================*/
- dict_table_t* table) /*!< in: system table */
-{
- mem_heap_t* heap;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(1000);
-
- dict_load_indexes(table, heap);
-
- mem_heap_free(heap);
-}
-
-/********************************************************************//**
-Loads foreign key constraint col names (also for the referenced table). */
-static
-void
-dict_load_foreign_cols(
-/*===================*/
- const char* id, /*!< in: foreign constraint id as a
- null-terminated string */
- dict_foreign_t* foreign)/*!< in: foreign constraint object */
-{
- dict_table_t* sys_foreign_cols;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- foreign->foreign_col_names = mem_heap_alloc(
- foreign->heap, foreign->n_fields * sizeof(void*));
-
- foreign->referenced_col_names = mem_heap_alloc(
- foreign->heap, foreign->n_fields * sizeof(void*));
- mtr_start(&mtr);
-
- sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS");
- sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes);
- ut_a(!dict_table_is_comp(sys_foreign_cols));
-
- tuple = dtuple_create(foreign->heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, id, ut_strlen(id));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < foreign->n_fields; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur));
- ut_a(!rec_get_deleted_flag(rec, 0));
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_a(len == ut_strlen(id));
- ut_a(ut_memcmp(id, field, len) == 0);
-
- field = rec_get_nth_field_old(rec, 1, &len);
- ut_a(len == 4);
- ut_a(i == mach_read_from_4(field));
-
- field = rec_get_nth_field_old(rec, 4, &len);
- foreign->foreign_col_names[i] = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- field = rec_get_nth_field_old(rec, 5, &len);
- foreign->referenced_col_names[i] = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/***********************************************************************//**
-Loads a foreign key constraint to the dictionary cache.
-@return DB_SUCCESS or error code */
-static
-ulint
-dict_load_foreign(
-/*==============*/
- const char* id, /*!< in: foreign constraint id as a
- null-terminated string */
- ibool check_charsets)
- /*!< in: TRUE=check charset compatibility */
-{
- dict_foreign_t* foreign;
- dict_table_t* sys_foreign;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap2;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- ulint n_fields_and_type;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap2 = mem_heap_create(1000);
-
- mtr_start(&mtr);
-
- sys_foreign = dict_table_get_low("SYS_FOREIGN");
- sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes);
- ut_a(!dict_table_is_comp(sys_foreign));
-
- tuple = dtuple_create(heap2, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, id, ut_strlen(id));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)
- || rec_get_deleted_flag(rec, 0)) {
- /* Not found */
-
- fprintf(stderr,
- "InnoDB: Error A: cannot load foreign constraint %s\n",
- id);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap2);
-
- return(DB_ERROR);
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- /* Check if the id in record is the searched one */
- if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) {
-
- fprintf(stderr,
- "InnoDB: Error B: cannot load foreign constraint %s\n",
- id);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap2);
-
- return(DB_ERROR);
- }
-
- /* Read the table names and the number of columns associated
- with the constraint */
-
- mem_heap_free(heap2);
-
- foreign = dict_mem_foreign_create();
-
- n_fields_and_type = mach_read_from_4(
- rec_get_nth_field_old(rec, 5, &len));
-
- ut_a(len == 4);
-
- /* We store the type in the bits 24..29 of n_fields_and_type. */
-
- foreign->type = (unsigned int) (n_fields_and_type >> 24);
- foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL);
-
- foreign->id = mem_heap_strdup(foreign->heap, id);
-
- field = rec_get_nth_field_old(rec, 3, &len);
- foreign->foreign_table_name = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- field = rec_get_nth_field_old(rec, 4, &len);
- foreign->referenced_table_name = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- dict_load_foreign_cols(id, foreign);
-
- /* If the foreign table is not yet in the dictionary cache, we
- have to load it so that we are able to make type comparisons
- in the next function call. */
-
- dict_table_get_low(foreign->foreign_table_name);
-
- /* Note that there may already be a foreign constraint object in
- the dictionary cache for this constraint: then the following
- call only sets the pointers in it to point to the appropriate table
- and index objects and frees the newly created object foreign.
- Adding to the cache should always succeed since we are not creating
- a new foreign key constraint but loading one from the data
- dictionary. */
-
- return(dict_foreign_add_to_cache(foreign, check_charsets));
-}
-
-/***********************************************************************//**
-Loads foreign key constraints where the table is either the foreign key
-holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-dict_load_foreigns(
-/*===============*/
- const char* table_name, /*!< in: table name */
- ibool check_charsets) /*!< in: TRUE=check charset
- compatibility */
-{
- btr_pcur_t pcur;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sec_index;
- dict_table_t* sys_foreign;
- const rec_t* rec;
- const byte* field;
- ulint len;
- char* id ;
- ulint err;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- sys_foreign = dict_table_get_low("SYS_FOREIGN");
-
- if (sys_foreign == NULL) {
- /* No foreign keys defined yet in this database */
-
- fprintf(stderr,
- "InnoDB: Error: no foreign key system tables"
- " in the database\n");
-
- return(DB_ERROR);
- }
-
- ut_a(!dict_table_is_comp(sys_foreign));
- mtr_start(&mtr);
-
- /* Get the secondary index based on FOR_NAME from table
- SYS_FOREIGN */
-
- sec_index = dict_table_get_next_index(
- dict_table_get_first_index(sys_foreign));
-start_load:
- heap = mem_heap_create(256);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, table_name, ut_strlen(table_name));
- dict_index_copy_types(tuple, sec_index, 1);
-
- btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-loop:
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* End of index */
-
- goto load_next_index;
- }
-
- /* Now we have the record in the secondary index containing a table
- name and a foreign constraint ID */
-
- rec = btr_pcur_get_rec(&pcur);
- field = rec_get_nth_field_old(rec, 0, &len);
-
- /* Check if the table name in the record is the one searched for; the
- following call does the comparison in the latin1_swedish_ci
- charset-collation, in a case-insensitive way. */
-
- if (0 != cmp_data_data(dfield_get_type(dfield)->mtype,
- dfield_get_type(dfield)->prtype,
- dfield_get_data(dfield), dfield_get_len(dfield),
- field, len)) {
-
- goto load_next_index;
- }
-
- /* Since table names in SYS_FOREIGN are stored in a case-insensitive
- order, we have to check that the table name matches also in a binary
- string comparison. On Unix, MySQL allows table names that only differ
- in character case. */
-
- if (0 != ut_memcmp(field, table_name, len)) {
-
- goto next_rec;
- }
-
- if (rec_get_deleted_flag(rec, 0)) {
-
- goto next_rec;
- }
-
- /* Now we get a foreign key constraint id */
- field = rec_get_nth_field_old(rec, 1, &len);
- id = mem_heap_strdupl(heap, (char*) field, len);
-
- btr_pcur_store_position(&pcur, &mtr);
-
- mtr_commit(&mtr);
-
- /* Load the foreign constraint definition to the dictionary cache */
-
- err = dict_load_foreign(id, check_charsets);
-
- if (err != DB_SUCCESS) {
- btr_pcur_close(&pcur);
- mem_heap_free(heap);
-
- return(err);
- }
-
- mtr_start(&mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- goto loop;
-
-load_next_index:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- sec_index = dict_table_get_next_index(sec_index);
-
- if (sec_index != NULL) {
-
- mtr_start(&mtr);
-
- goto start_load;
- }
-
- return(DB_SUCCESS);
-}
diff --git a/storage/innodb_plugin/dict/dict0mem.c b/storage/innodb_plugin/dict/dict0mem.c
deleted file mode 100644
index 6458cbab92d..00000000000
--- a/storage/innodb_plugin/dict/dict0mem.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file dict/dict0mem.c
-Data dictionary memory object creation
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "dict0mem.h"
-
-#ifdef UNIV_NONINL
-#include "dict0mem.ic"
-#endif
-
-#include "rem0rec.h"
-#include "data0type.h"
-#include "mach0data.h"
-#include "dict0dict.h"
-#ifndef UNIV_HOTBACKUP
-# include "lock0lock.h"
-#endif /* !UNIV_HOTBACKUP */
-
-#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
- creating a table or index object */
-
-/**********************************************************************//**
-Creates a table memory object.
-@return own: table object */
-UNIV_INTERN
-dict_table_t*
-dict_mem_table_create(
-/*==================*/
- const char* name, /*!< in: table name */
- ulint space, /*!< in: space where the clustered index of
- the table is placed; this parameter is
- ignored if the table is made a member of
- a cluster */
- ulint n_cols, /*!< in: number of columns */
- ulint flags) /*!< in: table flags */
-{
- dict_table_t* table;
- mem_heap_t* heap;
-
- ut_ad(name);
- ut_a(!(flags & (~0 << DICT_TF_BITS)));
-
- heap = mem_heap_create(DICT_HEAP_SIZE);
-
- table = mem_heap_zalloc(heap, sizeof(dict_table_t));
-
- table->heap = heap;
-
- table->flags = (unsigned int) flags;
- table->name = mem_heap_strdup(heap, name);
- table->space = (unsigned int) space;
- table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
-
- table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
- * sizeof(dict_col_t));
-
-#ifndef UNIV_HOTBACKUP
- table->autoinc_lock = mem_heap_alloc(heap, lock_get_size());
-
- mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
-
- table->autoinc = 0;
-
- /* The number of transactions that are either waiting on the
- AUTOINC lock or have been granted the lock. */
- table->n_waiting_or_granted_auto_inc_locks = 0;
-#endif /* !UNIV_HOTBACKUP */
-
- ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
- return(table);
-}
-
-/****************************************************************//**
-Free a table memory object. */
-UNIV_INTERN
-void
-dict_mem_table_free(
-/*================*/
- dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_d(table->cached = FALSE);
-
-#ifndef UNIV_HOTBACKUP
- mutex_free(&(table->autoinc_mutex));
-#endif /* UNIV_HOTBACKUP */
- mem_heap_free(table->heap);
-}
-
-/****************************************************************//**
-Append 'name' to 'col_names'. @see dict_table_t::col_names
-@return new column names array */
-static
-const char*
-dict_add_col_name(
-/*==============*/
- const char* col_names, /*!< in: existing column names, or
- NULL */
- ulint cols, /*!< in: number of existing columns */
- const char* name, /*!< in: new column name */
- mem_heap_t* heap) /*!< in: heap */
-{
- ulint old_len;
- ulint new_len;
- ulint total_len;
- char* res;
-
- ut_ad(!cols == !col_names);
-
- /* Find out length of existing array. */
- if (col_names) {
- const char* s = col_names;
- ulint i;
-
- for (i = 0; i < cols; i++) {
- s += strlen(s) + 1;
- }
-
- old_len = s - col_names;
- } else {
- old_len = 0;
- }
-
- new_len = strlen(name) + 1;
- total_len = old_len + new_len;
-
- res = mem_heap_alloc(heap, total_len);
-
- if (old_len > 0) {
- memcpy(res, col_names, old_len);
- }
-
- memcpy(res + old_len, name, new_len);
-
- return(res);
-}
-
-/**********************************************************************//**
-Adds a column definition to a table. */
-UNIV_INTERN
-void
-dict_mem_table_add_col(
-/*===================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
- const char* name, /*!< in: column name, or NULL */
- ulint mtype, /*!< in: main datatype */
- ulint prtype, /*!< in: precise type */
- ulint len) /*!< in: precision */
-{
- dict_col_t* col;
-#ifndef UNIV_HOTBACKUP
- ulint mbminlen;
- ulint mbmaxlen;
-#endif /* !UNIV_HOTBACKUP */
- ulint i;
-
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(!heap == !name);
-
- i = table->n_def++;
-
- if (name) {
- if (UNIV_UNLIKELY(table->n_def == table->n_cols)) {
- heap = table->heap;
- }
- if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) {
- /* All preceding column names are empty. */
- char* s = mem_heap_zalloc(heap, table->n_def);
- table->col_names = s;
- }
-
- table->col_names = dict_add_col_name(table->col_names,
- i, name, heap);
- }
-
- col = dict_table_get_nth_col(table, i);
-
- col->ind = (unsigned int) i;
- col->ord_part = 0;
-
- col->mtype = (unsigned int) mtype;
- col->prtype = (unsigned int) prtype;
- col->len = (unsigned int) len;
-
-#ifndef UNIV_HOTBACKUP
- dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
-
- col->mbminlen = (unsigned int) mbminlen;
- col->mbmaxlen = (unsigned int) mbmaxlen;
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/**********************************************************************//**
-Creates an index memory object.
-@return own: index object */
-UNIV_INTERN
-dict_index_t*
-dict_mem_index_create(
-/*==================*/
- const char* table_name, /*!< in: table name */
- const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
- ulint type, /*!< in: DICT_UNIQUE,
- DICT_CLUSTERED, ... ORed */
- ulint n_fields) /*!< in: number of fields */
-{
- dict_index_t* index;
- mem_heap_t* heap;
-
- ut_ad(table_name && index_name);
-
- heap = mem_heap_create(DICT_HEAP_SIZE);
- index = mem_heap_zalloc(heap, sizeof(dict_index_t));
-
- index->heap = heap;
-
- index->type = type;
-#ifndef UNIV_HOTBACKUP
- index->space = (unsigned int) space;
-#endif /* !UNIV_HOTBACKUP */
- index->name = mem_heap_strdup(heap, index_name);
- index->table_name = table_name;
- index->n_fields = (unsigned int) n_fields;
- index->fields = mem_heap_alloc(heap, 1 + n_fields
- * sizeof(dict_field_t));
- /* The '1 +' above prevents allocation
- of an empty mem block */
-#ifdef UNIV_DEBUG
- index->magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
- return(index);
-}
-
-/**********************************************************************//**
-Creates and initializes a foreign constraint memory object.
-@return own: foreign constraint struct */
-UNIV_INTERN
-dict_foreign_t*
-dict_mem_foreign_create(void)
-/*=========================*/
-{
- dict_foreign_t* foreign;
- mem_heap_t* heap;
-
- heap = mem_heap_create(100);
-
- foreign = mem_heap_zalloc(heap, sizeof(dict_foreign_t));
-
- foreign->heap = heap;
-
- return(foreign);
-}
-
-/**********************************************************************//**
-Adds a field definition to an index. NOTE: does not take a copy
-of the column name if the field is a column. The memory occupied
-by the column name may be released only after publishing the index. */
-UNIV_INTERN
-void
-dict_mem_index_add_field(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- const char* name, /*!< in: column name */
- ulint prefix_len) /*!< in: 0 or the column prefix length
- in a MySQL index like
- INDEX (textcol(25)) */
-{
- dict_field_t* field;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->n_def++;
-
- field = dict_index_get_nth_field(index, index->n_def - 1);
-
- field->name = name;
- field->prefix_len = (unsigned int) prefix_len;
-}
-
-/**********************************************************************//**
-Frees an index memory object. */
-UNIV_INTERN
-void
-dict_mem_index_free(
-/*================*/
- dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- mem_heap_free(index->heap);
-}
diff --git a/storage/innodb_plugin/dyn/dyn0dyn.c b/storage/innodb_plugin/dyn/dyn0dyn.c
deleted file mode 100644
index e1275f040f3..00000000000
--- a/storage/innodb_plugin/dyn/dyn0dyn.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dyn/dyn0dyn.c
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dyn0dyn.h"
-#ifdef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- mem_heap_t* heap;
- dyn_block_t* block;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- if (arr->heap == NULL) {
- UT_LIST_INIT(arr->base);
- UT_LIST_ADD_FIRST(list, arr->base, arr);
-
- arr->heap = mem_heap_create(sizeof(dyn_block_t));
- }
-
- block = dyn_array_get_last_block(arr);
- block->used = block->used | DYN_BLOCK_FULL_FLAG;
-
- heap = arr->heap;
-
- block = mem_heap_alloc(heap, sizeof(dyn_block_t));
-
- block->used = 0;
-
- UT_LIST_ADD_LAST(list, arr->base, block);
-
- return(block);
-}
diff --git a/storage/innodb_plugin/eval/eval0eval.c b/storage/innodb_plugin/eval/eval0eval.c
deleted file mode 100644
index 589b0fa1576..00000000000
--- a/storage/innodb_plugin/eval/eval0eval.c
+++ /dev/null
@@ -1,852 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file eval/eval0eval.c
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#include "eval0eval.h"
-
-#ifdef UNIV_NONINL
-#include "eval0eval.ic"
-#endif
-
-#include "data0data.h"
-#include "row0sel.h"
-
-/** The RND function seed */
-static ulint eval_rnd = 128367121;
-
-/** Dummy adress used when we should allocate a buffer of size 0 in
-eval_node_alloc_val_buf */
-
-static byte eval_dummy;
-
-/*****************************************************************//**
-Allocate a buffer from global dynamic memory for a value of a que_node.
-NOTE that this memory must be explicitly freed when the query graph is
-freed. If the node already has an allocated buffer, that buffer is freed
-here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field.
-@return pointer to allocated buffer */
-UNIV_INTERN
-byte*
-eval_node_alloc_val_buf(
-/*====================*/
- que_node_t* node, /*!< in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size) /*!< in: buffer size */
-{
- dfield_t* dfield;
- byte* data;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
- || que_node_get_type(node) == QUE_NODE_FUNC);
-
- dfield = que_node_get_val(node);
-
- data = dfield_get_data(dfield);
-
- if (data && data != &eval_dummy) {
- mem_free(data);
- }
-
- if (size == 0) {
- data = &eval_dummy;
- } else {
- data = mem_alloc(size);
- }
-
- que_node_set_val_buf_size(node, size);
-
- dfield_set_data(dfield, data, size);
-
- return(data);
-}
-
-/*****************************************************************//**
-Free the buffer from global dynamic memory for a value of a que_node,
-if it has been allocated in the above function. The freeing for pushed
-column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
-void
-eval_node_free_val_buf(
-/*===================*/
- que_node_t* node) /*!< in: query graph node */
-{
- dfield_t* dfield;
- byte* data;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
- || que_node_get_type(node) == QUE_NODE_FUNC);
-
- dfield = que_node_get_val(node);
-
- data = dfield_get_data(dfield);
-
- if (que_node_get_val_buf_size(node) > 0) {
- ut_a(data);
-
- mem_free(data);
- }
-}
-
-/*****************************************************************//**
-Evaluates a comparison node.
-@return the result of the comparison */
-UNIV_INTERN
-ibool
-eval_cmp(
-/*=====*/
- func_node_t* cmp_node) /*!< in: comparison node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- int res;
- ibool val;
- int func;
-
- ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
-
- arg1 = cmp_node->args;
- arg2 = que_node_get_next(arg1);
-
- res = cmp_dfield_dfield(que_node_get_val(arg1),
- que_node_get_val(arg2));
- val = TRUE;
-
- func = cmp_node->func;
-
- if (func == '=') {
- if (res != 0) {
- val = FALSE;
- }
- } else if (func == '<') {
- if (res != -1) {
- val = FALSE;
- }
- } else if (func == PARS_LE_TOKEN) {
- if (res == 1) {
- val = FALSE;
- }
- } else if (func == PARS_NE_TOKEN) {
- if (res == 0) {
- val = FALSE;
- }
- } else if (func == PARS_GE_TOKEN) {
- if (res == -1) {
- val = FALSE;
- }
- } else {
- ut_ad(func == '>');
-
- if (res != 1) {
- val = FALSE;
- }
- }
-
- eval_node_set_ibool_val(cmp_node, val);
-
- return(val);
-}
-
-/*****************************************************************//**
-Evaluates a logical operation node. */
-UNIV_INLINE
-void
-eval_logical(
-/*=========*/
- func_node_t* logical_node) /*!< in: logical operation node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- ibool val1;
- ibool val2 = 0; /* remove warning */
- ibool val = 0; /* remove warning */
- int func;
-
- ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
-
- arg1 = logical_node->args;
- arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
-
- val1 = eval_node_get_ibool_val(arg1);
-
- if (arg2) {
- val2 = eval_node_get_ibool_val(arg2);
- }
-
- func = logical_node->func;
-
- if (func == PARS_AND_TOKEN) {
- val = val1 & val2;
- } else if (func == PARS_OR_TOKEN) {
- val = val1 | val2;
- } else if (func == PARS_NOT_TOKEN) {
- val = TRUE - val1;
- } else {
- ut_error;
- }
-
- eval_node_set_ibool_val(logical_node, val);
-}
-
-/*****************************************************************//**
-Evaluates an arithmetic operation node. */
-UNIV_INLINE
-void
-eval_arith(
-/*=======*/
- func_node_t* arith_node) /*!< in: arithmetic operation node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- lint val1;
- lint val2 = 0; /* remove warning */
- lint val;
- int func;
-
- ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
-
- arg1 = arith_node->args;
- arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
-
- val1 = eval_node_get_int_val(arg1);
-
- if (arg2) {
- val2 = eval_node_get_int_val(arg2);
- }
-
- func = arith_node->func;
-
- if (func == '+') {
- val = val1 + val2;
- } else if ((func == '-') && arg2) {
- val = val1 - val2;
- } else if (func == '-') {
- val = -val1;
- } else if (func == '*') {
- val = val1 * val2;
- } else {
- ut_ad(func == '/');
- val = val1 / val2;
- }
-
- eval_node_set_int_val(arith_node, val);
-}
-
-/*****************************************************************//**
-Evaluates an aggregate operation node. */
-UNIV_INLINE
-void
-eval_aggregate(
-/*===========*/
- func_node_t* node) /*!< in: aggregate operation node */
-{
- que_node_t* arg;
- lint val;
- lint arg_val;
- int func;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
-
- val = eval_node_get_int_val(node);
-
- func = node->func;
-
- if (func == PARS_COUNT_TOKEN) {
-
- val = val + 1;
- } else {
- ut_ad(func == PARS_SUM_TOKEN);
-
- arg = node->args;
- arg_val = eval_node_get_int_val(arg);
-
- val = val + arg_val;
- }
-
- eval_node_set_int_val(node, val);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node where the function is not relevant
-in benchmarks. */
-static
-void
-eval_predefined_2(
-/*==============*/
- func_node_t* func_node) /*!< in: predefined function node */
-{
- que_node_t* arg;
- que_node_t* arg1;
- que_node_t* arg2 = 0; /* remove warning (??? bug ???) */
- lint int_val;
- byte* data;
- ulint len1;
- ulint len2;
- int func;
- ulint i;
-
- ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
-
- arg1 = func_node->args;
-
- if (arg1) {
- arg2 = que_node_get_next(arg1);
- }
-
- func = func_node->func;
-
- if (func == PARS_PRINTF_TOKEN) {
-
- arg = arg1;
-
- while (arg) {
- dfield_print(que_node_get_val(arg));
-
- arg = que_node_get_next(arg);
- }
-
- putc('\n', stderr);
-
- } else if (func == PARS_ASSERT_TOKEN) {
-
- if (!eval_node_get_ibool_val(arg1)) {
- fputs("SQL assertion fails in a stored procedure!\n",
- stderr);
- }
-
- ut_a(eval_node_get_ibool_val(arg1));
-
- /* This function, or more precisely, a debug procedure,
- returns no value */
-
- } else if (func == PARS_RND_TOKEN) {
-
- len1 = (ulint)eval_node_get_int_val(arg1);
- len2 = (ulint)eval_node_get_int_val(arg2);
-
- ut_ad(len2 >= len1);
-
- if (len2 > len1) {
- int_val = (lint) (len1
- + (eval_rnd % (len2 - len1 + 1)));
- } else {
- int_val = (lint) len1;
- }
-
- eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
-
- eval_node_set_int_val(func_node, int_val);
-
- } else if (func == PARS_RND_STR_TOKEN) {
-
- len1 = (ulint)eval_node_get_int_val(arg1);
-
- data = eval_node_ensure_val_buf(func_node, len1);
-
- for (i = 0; i < len1; i++) {
- data[i] = (byte)(97 + (eval_rnd % 3));
-
- eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
- }
- } else {
- ut_error;
- }
-}
-
-/*****************************************************************//**
-Evaluates a notfound-function node. */
-UNIV_INLINE
-void
-eval_notfound(
-/*==========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- sym_node_t* cursor;
- sel_node_t* sel_node;
- ibool ibool_val;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
-
- cursor = arg1;
-
- ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
-
- if (cursor->token_type == SYM_LIT) {
-
- ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
- "SQL", 3) == 0);
-
- sel_node = cursor->sym_table->query_graph->last_sel_node;
- } else {
- sel_node = cursor->alias->cursor_def;
- }
-
- if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
- ibool_val = TRUE;
- } else {
- ibool_val = FALSE;
- }
-
- eval_node_set_ibool_val(func_node, ibool_val);
-}
-
-/*****************************************************************//**
-Evaluates a substr-function node. */
-UNIV_INLINE
-void
-eval_substr(
-/*========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- que_node_t* arg3;
- dfield_t* dfield;
- byte* str1;
- ulint len1;
- ulint len2;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
-
- arg3 = que_node_get_next(arg2);
-
- str1 = dfield_get_data(que_node_get_val(arg1));
-
- len1 = (ulint)eval_node_get_int_val(arg2);
- len2 = (ulint)eval_node_get_int_val(arg3);
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1 + len1, len2);
-}
-
-/*****************************************************************//**
-Evaluates a replstr-procedure node. */
-static
-void
-eval_replstr(
-/*=========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- que_node_t* arg3;
- que_node_t* arg4;
- byte* str1;
- byte* str2;
- ulint len1;
- ulint len2;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
-
- arg3 = que_node_get_next(arg2);
- arg4 = que_node_get_next(arg3);
-
- str1 = dfield_get_data(que_node_get_val(arg1));
- str2 = dfield_get_data(que_node_get_val(arg2));
-
- len1 = (ulint)eval_node_get_int_val(arg3);
- len2 = (ulint)eval_node_get_int_val(arg4);
-
- if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
- || (dfield_get_len(que_node_get_val(arg2)) < len2)) {
-
- ut_error;
- }
-
- ut_memcpy(str1 + len1, str2, len2);
-}
-
-/*****************************************************************//**
-Evaluates an instr-function node. */
-static
-void
-eval_instr(
-/*=======*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- dfield_t* dfield1;
- dfield_t* dfield2;
- lint int_val;
- byte* str1;
- byte* str2;
- byte match_char;
- ulint len1;
- ulint len2;
- ulint i;
- ulint j;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- dfield1 = que_node_get_val(arg1);
- dfield2 = que_node_get_val(arg2);
-
- str1 = dfield_get_data(dfield1);
- str2 = dfield_get_data(dfield2);
-
- len1 = dfield_get_len(dfield1);
- len2 = dfield_get_len(dfield2);
-
- if (len2 == 0) {
- ut_error;
- }
-
- match_char = str2[0];
-
- for (i = 0; i < len1; i++) {
- /* In this outer loop, the number of matched characters is 0 */
-
- if (str1[i] == match_char) {
-
- if (i + len2 > len1) {
-
- break;
- }
-
- for (j = 1;; j++) {
- /* We have already matched j characters */
-
- if (j == len2) {
- int_val = i + 1;
-
- goto match_found;
- }
-
- if (str1[i + j] != str2[j]) {
-
- break;
- }
- }
- }
- }
-
- int_val = 0;
-
-match_found:
- eval_node_set_int_val(func_node, int_val);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. */
-UNIV_INLINE
-void
-eval_binary_to_number(
-/*==================*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- dfield_t* dfield;
- byte* str1;
- byte* str2;
- ulint len1;
- ulint int_val;
-
- arg1 = func_node->args;
-
- dfield = que_node_get_val(arg1);
-
- str1 = dfield_get_data(dfield);
- len1 = dfield_get_len(dfield);
-
- if (len1 > 4) {
- ut_error;
- }
-
- if (len1 == 4) {
- str2 = str1;
- } else {
- int_val = 0;
- str2 = (byte*)&int_val;
-
- ut_memcpy(str2 + (4 - len1), str1, len1);
- }
-
- eval_node_copy_and_alloc_val(func_node, str2, 4);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. */
-static
-void
-eval_concat(
-/*========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg;
- dfield_t* dfield;
- byte* data;
- ulint len;
- ulint len1;
-
- arg = func_node->args;
- len = 0;
-
- while (arg) {
- len1 = dfield_get_len(que_node_get_val(arg));
-
- len += len1;
-
- arg = que_node_get_next(arg);
- }
-
- data = eval_node_ensure_val_buf(func_node, len);
-
- arg = func_node->args;
- len = 0;
-
- while (arg) {
- dfield = que_node_get_val(arg);
- len1 = dfield_get_len(dfield);
-
- ut_memcpy(data + len, dfield_get_data(dfield), len1);
-
- len += len1;
-
- arg = que_node_get_next(arg);
- }
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. If the first argument is an integer,
-this function looks at the second argument which is the integer length in
-bytes, and converts the integer to a VARCHAR.
-If the first argument is of some other type, this function converts it to
-BINARY. */
-UNIV_INLINE
-void
-eval_to_binary(
-/*===========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- dfield_t* dfield;
- byte* str1;
- ulint len;
- ulint len1;
-
- arg1 = func_node->args;
-
- str1 = dfield_get_data(que_node_get_val(arg1));
-
- if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) {
-
- len = dfield_get_len(que_node_get_val(arg1));
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1, len);
-
- return;
- }
-
- arg2 = que_node_get_next(arg1);
-
- len1 = (ulint)eval_node_get_int_val(arg2);
-
- if (len1 > 4) {
-
- ut_error;
- }
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1 + (4 - len1), len1);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. */
-UNIV_INLINE
-void
-eval_predefined(
-/*============*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- lint int_val;
- byte* data;
- int func;
-
- func = func_node->func;
-
- arg1 = func_node->args;
-
- if (func == PARS_LENGTH_TOKEN) {
-
- int_val = (lint)dfield_get_len(que_node_get_val(arg1));
-
- } else if (func == PARS_TO_CHAR_TOKEN) {
-
- /* Convert number to character string as a
- signed decimal integer. */
-
- ulint uint_val;
- int int_len;
-
- int_val = eval_node_get_int_val(arg1);
-
- /* Determine the length of the string. */
-
- if (int_val == 0) {
- int_len = 1; /* the number 0 occupies 1 byte */
- } else {
- int_len = 0;
- if (int_val < 0) {
- uint_val = ((ulint) -int_val - 1) + 1;
- int_len++; /* reserve space for minus sign */
- } else {
- uint_val = (ulint) int_val;
- }
- for (; uint_val > 0; int_len++) {
- uint_val /= 10;
- }
- }
-
- /* allocate the string */
- data = eval_node_ensure_val_buf(func_node, int_len + 1);
-
- /* add terminating NUL character */
- data[int_len] = 0;
-
- /* convert the number */
-
- if (int_val == 0) {
- data[0] = '0';
- } else {
- int tmp;
- if (int_val < 0) {
- data[0] = '-'; /* preceding minus sign */
- uint_val = ((ulint) -int_val - 1) + 1;
- } else {
- uint_val = (ulint) int_val;
- }
- for (tmp = int_len; uint_val > 0; uint_val /= 10) {
- data[--tmp] = (byte)
- ('0' + (byte)(uint_val % 10));
- }
- }
-
- dfield_set_len(que_node_get_val(func_node), int_len);
-
- return;
-
- } else if (func == PARS_TO_NUMBER_TOKEN) {
-
- int_val = atoi((char*)
- dfield_get_data(que_node_get_val(arg1)));
-
- } else if (func == PARS_SYSDATE_TOKEN) {
- int_val = (lint)ut_time();
- } else {
- eval_predefined_2(func_node);
-
- return;
- }
-
- eval_node_set_int_val(func_node, int_val);
-}
-
-/*****************************************************************//**
-Evaluates a function node. */
-UNIV_INTERN
-void
-eval_func(
-/*======*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg;
- ulint class;
- ulint func;
-
- ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
-
- class = func_node->class;
- func = func_node->func;
-
- arg = func_node->args;
-
- /* Evaluate first the argument list */
- while (arg) {
- eval_exp(arg);
-
- /* The functions are not defined for SQL null argument
- values, except for eval_cmp and notfound */
-
- if (dfield_is_null(que_node_get_val(arg))
- && (class != PARS_FUNC_CMP)
- && (func != PARS_NOTFOUND_TOKEN)
- && (func != PARS_PRINTF_TOKEN)) {
- ut_error;
- }
-
- arg = que_node_get_next(arg);
- }
-
- if (class == PARS_FUNC_CMP) {
- eval_cmp(func_node);
- } else if (class == PARS_FUNC_ARITH) {
- eval_arith(func_node);
- } else if (class == PARS_FUNC_AGGREGATE) {
- eval_aggregate(func_node);
- } else if (class == PARS_FUNC_PREDEFINED) {
-
- if (func == PARS_NOTFOUND_TOKEN) {
- eval_notfound(func_node);
- } else if (func == PARS_SUBSTR_TOKEN) {
- eval_substr(func_node);
- } else if (func == PARS_REPLSTR_TOKEN) {
- eval_replstr(func_node);
- } else if (func == PARS_INSTR_TOKEN) {
- eval_instr(func_node);
- } else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
- eval_binary_to_number(func_node);
- } else if (func == PARS_CONCAT_TOKEN) {
- eval_concat(func_node);
- } else if (func == PARS_TO_BINARY_TOKEN) {
- eval_to_binary(func_node);
- } else {
- eval_predefined(func_node);
- }
- } else {
- ut_ad(class == PARS_FUNC_LOGICAL);
-
- eval_logical(func_node);
- }
-}
diff --git a/storage/innodb_plugin/eval/eval0proc.c b/storage/innodb_plugin/eval/eval0proc.c
deleted file mode 100644
index 3a4218d92bf..00000000000
--- a/storage/innodb_plugin/eval/eval0proc.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file eval/eval0proc.c
-Executes SQL stored procedures and their control structures
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#include "eval0proc.h"
-
-#ifdef UNIV_NONINL
-#include "eval0proc.ic"
-#endif
-
-/**********************************************************************//**
-Performs an execution step of an if-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-if_step(
-/*====*/
- que_thr_t* thr) /*!< in: query thread */
-{
- if_node_t* node;
- elsif_node_t* elsif_node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_IF);
-
- if (thr->prev_node == que_node_get_parent(node)) {
-
- /* Evaluate the condition */
-
- eval_exp(node->cond);
-
- if (eval_node_get_ibool_val(node->cond)) {
-
- /* The condition evaluated to TRUE: start execution
- from the first statement in the statement list */
-
- thr->run_node = node->stat_list;
-
- } else if (node->else_part) {
- thr->run_node = node->else_part;
-
- } else if (node->elsif_list) {
- elsif_node = node->elsif_list;
-
- for (;;) {
- eval_exp(elsif_node->cond);
-
- if (eval_node_get_ibool_val(
- elsif_node->cond)) {
-
- /* The condition evaluated to TRUE:
- start execution from the first
- statement in the statement list */
-
- thr->run_node = elsif_node->stat_list;
-
- break;
- }
-
- elsif_node = que_node_get_next(elsif_node);
-
- if (elsif_node == NULL) {
- thr->run_node = NULL;
-
- break;
- }
- }
- } else {
- thr->run_node = NULL;
- }
- } else {
- /* Move to the next statement */
- ut_ad(que_node_get_next(thr->prev_node) == NULL);
-
- thr->run_node = NULL;
- }
-
- if (thr->run_node == NULL) {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of a while-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-while_step(
-/*=======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- while_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
-
- ut_ad((thr->prev_node == que_node_get_parent(node))
- || (que_node_get_next(thr->prev_node) == NULL));
-
- /* Evaluate the condition */
-
- eval_exp(node->cond);
-
- if (eval_node_get_ibool_val(node->cond)) {
-
- /* The condition evaluated to TRUE: start execution
- from the first statement in the statement list */
-
- thr->run_node = node->stat_list;
- } else {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of an assignment statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-assign_step(
-/*========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- assign_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
-
- /* Evaluate the value to assign */
-
- eval_exp(node->val);
-
- eval_node_copy_val(node->var->alias, node->val);
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of a for-loop node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-for_step(
-/*=====*/
- que_thr_t* thr) /*!< in: query thread */
-{
- for_node_t* node;
- que_node_t* parent;
- lint loop_var_value;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
-
- parent = que_node_get_parent(node);
-
- if (thr->prev_node != parent) {
-
- /* Move to the next statement */
- thr->run_node = que_node_get_next(thr->prev_node);
-
- if (thr->run_node != NULL) {
-
- return(thr);
- }
-
- /* Increment the value of loop_var */
-
- loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
- } else {
- /* Initialize the loop */
-
- eval_exp(node->loop_start_limit);
- eval_exp(node->loop_end_limit);
-
- loop_var_value = eval_node_get_int_val(node->loop_start_limit);
-
- node->loop_end_value
- = (int) eval_node_get_int_val(node->loop_end_limit);
- }
-
- /* Check if we should do another loop */
-
- if (loop_var_value > node->loop_end_value) {
-
- /* Enough loops done */
-
- thr->run_node = parent;
- } else {
- eval_node_set_int_val(node->loop_var, loop_var_value);
-
- thr->run_node = node->stat_list;
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of an exit statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-exit_step(
-/*======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- exit_node_t* node;
- que_node_t* loop_node;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_EXIT);
-
- /* Loops exit by setting thr->run_node as the loop node's parent, so
- find our containing loop node and get its parent. */
-
- loop_node = que_node_get_containing_loop_node(node);
-
- /* If someone uses an EXIT statement outside of a loop, this will
- trigger. */
- ut_a(loop_node);
-
- thr->run_node = que_node_get_parent(loop_node);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of a return-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-return_step(
-/*========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- return_node_t* node;
- que_node_t* parent;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
-
- parent = node;
-
- while (que_node_get_type(parent) != QUE_NODE_PROC) {
-
- parent = que_node_get_parent(parent);
- }
-
- ut_a(parent);
-
- thr->run_node = que_node_get_parent(parent);
-
- return(thr);
-}
diff --git a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0fil.c
deleted file mode 100644
index 112a0e27d50..00000000000
--- a/storage/innodb_plugin/fil/fil0fil.c
+++ /dev/null
@@ -1,4798 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file fil/fil0fil.c
-The tablespace memory cache
-
-Created 10/25/1995 Heikki Tuuri
-*******************************************************/
-
-#include "fil0fil.h"
-
-#include "mem0mem.h"
-#include "hash0hash.h"
-#include "os0file.h"
-#include "mach0data.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "log0recv.h"
-#include "fsp0fsp.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "dict0dict.h"
-#include "page0zip.h"
-#ifndef UNIV_HOTBACKUP
-# include "buf0lru.h"
-# include "ibuf0ibuf.h"
-# include "sync0sync.h"
-# include "os0sync.h"
-#else /* !UNIV_HOTBACKUP */
-static ulint srv_data_read, srv_data_written;
-#endif /* !UNIV_HOTBACKUP */
-
-/*
- IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
- =============================================
-
-The tablespace cache is responsible for providing fast read/write access to
-tablespaces and logs of the database. File creation and deletion is done
-in other modules which know more of the logic of the operation, however.
-
-A tablespace consists of a chain of files. The size of the files does not
-have to be divisible by the database block size, because we may just leave
-the last incomplete block unused. When a new file is appended to the
-tablespace, the maximum size of the file is also specified. At the moment,
-we think that it is best to extend the file to its maximum size already at
-the creation of the file, because then we can avoid dynamically extending
-the file when more space is needed for the tablespace.
-
-A block's position in the tablespace is specified with a 32-bit unsigned
-integer. The files in the chain are thought to be catenated, and the block
-corresponding to an address n is the nth block in the catenated file (where
-the first block is named the 0th block, and the incomplete block fragments
-at the end of files are not taken into account). A tablespace can be extended
-by appending a new file at the end of the chain.
-
-Our tablespace concept is similar to the one of Oracle.
-
-To acquire more speed in disk transfers, a technique called disk striping is
-sometimes used. This means that logical block addresses are divided in a
-round-robin fashion across several disks. Windows NT supports disk striping,
-so there we do not need to support it in the database. Disk striping is
-implemented in hardware in RAID disks. We conclude that it is not necessary
-to implement it in the database. Oracle 7 does not support disk striping,
-either.
-
-Another trick used at some database sites is replacing tablespace files by
-raw disks, that is, the whole physical disk drive, or a partition of it, is
-opened as a single file, and it is accessed through byte offsets calculated
-from the start of the disk or the partition. This is recommended in some
-books on database tuning to achieve more speed in i/o. Using raw disk
-certainly prevents the OS from fragmenting disk space, but it is not clear
-if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
-system + EIDE Conner disk only a negligible difference in speed when reading
-from a file, versus reading from a raw disk.
-
-To have fast access to a tablespace or a log file, we put the data structures
-to a hash table. Each tablespace and log file is given an unique 32-bit
-identifier.
-
-Some operating systems do not support many open files at the same time,
-though NT seems to tolerate at least 900 open files. Therefore, we put the
-open files in an LRU-list. If we need to open another file, we may close the
-file at the end of the LRU-list. When an i/o-operation is pending on a file,
-the file cannot be closed. We take the file nodes with pending i/o-operations
-out of the LRU-list and keep a count of pending operations. When an operation
-completes, we decrement the count and return the file node to the LRU-list if
-the count drops to zero. */
-
-/** When mysqld is run, the default directory "." is the mysqld datadir,
-but in the MySQL Embedded Server Library and ibbackup it is not the default
-directory, and we must set the base file path explicitly */
-UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
-
-/** The number of fsyncs done to the log */
-UNIV_INTERN ulint fil_n_log_flushes = 0;
-
-/** Number of pending redo log flushes */
-UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
-/** Number of pending tablespace flushes */
-UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
-
-/** The null file address */
-UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
-
-/** File node of a tablespace or the log data space */
-struct fil_node_struct {
- fil_space_t* space; /*!< backpointer to the space where this node
- belongs */
- char* name; /*!< path to the file */
- ibool open; /*!< TRUE if file open */
- os_file_t handle; /*!< OS handle to the file, if file open */
- ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw
- device or a raw disk partition */
- ulint size; /*!< size of the file in database pages, 0 if
- not known yet; the possible last incomplete
- megabyte may be ignored if space == 0 */
- ulint n_pending;
- /*!< count of pending i/o's on this file;
- closing of the file is not allowed if
- this is > 0 */
- ulint n_pending_flushes;
- /*!< count of pending flushes on this file;
- closing of the file is not allowed if
- this is > 0 */
- ib_int64_t modification_counter;/*!< when we write to the file we
- increment this by one */
- ib_int64_t flush_counter;/*!< up to what
- modification_counter value we have
- flushed the modifications to disk */
- UT_LIST_NODE_T(fil_node_t) chain;
- /*!< link field for the file chain */
- UT_LIST_NODE_T(fil_node_t) LRU;
- /*!< link field for the LRU list */
- ulint magic_n;/*!< FIL_NODE_MAGIC_N */
-};
-
-/** Value of fil_node_struct::magic_n */
-#define FIL_NODE_MAGIC_N 89389
-
-/** Tablespace or log data space: let us call them by a common name space */
-struct fil_space_struct {
- char* name; /*!< space name = the path to the first file in
- it */
- ulint id; /*!< space id */
- ib_int64_t tablespace_version;
- /*!< in DISCARD/IMPORT this timestamp
- is used to check if we should ignore
- an insert buffer merge request for a
- page because it actually was for the
- previous incarnation of the space */
- ibool mark; /*!< this is set to TRUE at database startup if
- the space corresponds to a table in the InnoDB
- data dictionary; so we can print a warning of
- orphaned tablespaces */
- ibool stop_ios;/*!< TRUE if we want to rename the
- .ibd file of tablespace and want to
- stop temporarily posting of new i/o
- requests on the file */
- ibool stop_ibuf_merges;
- /*!< we set this TRUE when we start
- deleting a single-table tablespace */
- ibool is_being_deleted;
- /*!< this is set to TRUE when we start
- deleting a single-table tablespace and its
- file; when this flag is set no further i/o
- or flush requests can be placed on this space,
- though there may be such requests still being
- processed on this space */
- ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
- FIL_ARCH_LOG */
- UT_LIST_BASE_NODE_T(fil_node_t) chain;
- /*!< base node for the file chain */
- ulint size; /*!< space size in pages; 0 if a single-table
- tablespace whose size we do not know yet;
- last incomplete megabytes in data files may be
- ignored if space == 0 */
- ulint flags; /*!< compressed page size and file format, or 0 */
- ulint n_reserved_extents;
- /*!< number of reserved free extents for
- ongoing operations like B-tree page split */
- ulint n_pending_flushes; /*!< this is positive when flushing
- the tablespace to disk; dropping of the
- tablespace is forbidden if this is positive */
- ulint n_pending_ibuf_merges;/*!< this is positive
- when merging insert buffer entries to
- a page so that we may need to access
- the ibuf bitmap page in the
- tablespade: dropping of the tablespace
- is forbidden if this is positive */
- hash_node_t hash; /*!< hash chain node */
- hash_node_t name_hash;/*!< hash chain the name_hash table */
-#ifndef UNIV_HOTBACKUP
- rw_lock_t latch; /*!< latch protecting the file space storage
- allocation */
-#endif /* !UNIV_HOTBACKUP */
- UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
- /*!< list of spaces with at least one unflushed
- file we have written to */
- ibool is_in_unflushed_spaces; /*!< TRUE if this space is
- currently in unflushed_spaces */
- UT_LIST_NODE_T(fil_space_t) space_list;
- /*!< list of all spaces */
- ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
-};
-
-/** Value of fil_space_struct::magic_n */
-#define FIL_SPACE_MAGIC_N 89472
-
-/** The tablespace memory cache */
-typedef struct fil_system_struct fil_system_t;
-
-/** The tablespace memory cache; also the totality of logs (the log
-data space) is stored here; below we talk about tablespaces, but also
-the ib_logfiles form a 'space' and it is handled here */
-
-struct fil_system_struct {
-#ifndef UNIV_HOTBACKUP
- mutex_t mutex; /*!< The mutex protecting the cache */
-#endif /* !UNIV_HOTBACKUP */
- hash_table_t* spaces; /*!< The hash table of spaces in the
- system; they are hashed on the space
- id */
- hash_table_t* name_hash; /*!< hash table based on the space
- name */
- UT_LIST_BASE_NODE_T(fil_node_t) LRU;
- /*!< base node for the LRU list of the
- most recently used open files with no
- pending i/o's; if we start an i/o on
- the file, we first remove it from this
- list, and return it to the start of
- the list when the i/o ends;
- log files and the system tablespace are
- not put to this list: they are opened
- after the startup, and kept open until
- shutdown */
- UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
- /*!< base node for the list of those
- tablespaces whose files contain
- unflushed writes; those spaces have
- at least one file node where
- modification_counter > flush_counter */
- ulint n_open; /*!< number of files currently open */
- ulint max_n_open; /*!< n_open is not allowed to exceed
- this */
- ib_int64_t modification_counter;/*!< when we write to a file we
- increment this by one */
- ulint max_assigned_id;/*!< maximum space id in the existing
- tables, or assigned during the time
- mysqld has been up; at an InnoDB
- startup we scan the data dictionary
- and set here the maximum of the
- space id's of the tables there */
- ib_int64_t tablespace_version;
- /*!< a counter which is incremented for
- every space object memory creation;
- every space mem object gets a
- 'timestamp' from this; in DISCARD/
- IMPORT this is used to check if we
- should ignore an insert buffer merge
- request */
- UT_LIST_BASE_NODE_T(fil_space_t) space_list;
- /*!< list of all file spaces */
-};
-
-/** The tablespace memory cache. This variable is NULL before the module is
-initialized. */
-static fil_system_t* fil_system = NULL;
-
-
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex. */
-static
-void
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space); /*!< in: space */
-/********************************************************************//**
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
-static
-void
-fil_node_complete_io(
-/*=================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
- the node as modified if
- type == OS_FILE_WRITE */
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return space id, ULINT_UNDEFINED if not found */
-static
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
- const char* name); /*!< in: table name in the standard
- 'databasename/tablename' format */
-/*******************************************************************//**
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
-flushes on the files. */
-static
-ibool
-fil_space_free(
-/*===========*/
- /* out: TRUE if success */
- ulint id, /* in: space id */
- ibool own_mutex);/* in: TRUE if own system->mutex */
-/********************************************************************//**
-Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space.
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INLINE
-ulint
-fil_read(
-/*=====*/
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /*!< in: how many bytes to read; this must not
- cross a file boundary; in aio this must be a
- block size multiple */
- void* buf, /*!< in/out: buffer where to store data read;
- in aio this must be appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
- aio used, else ignored */
-{
- return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message));
-}
-
-/********************************************************************//**
-Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space.
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INLINE
-ulint
-fil_write(
-/*======*/
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /*!< in: how many bytes to write; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /*!< in: buffer from which to write; in aio
- this must be appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
- aio used, else ignored */
-{
- return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message));
-}
-
-/*******************************************************************//**
-Returns the table space by a given id, NULL if not found. */
-UNIV_INLINE
-fil_space_t*
-fil_space_get_by_id(
-/*================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- HASH_SEARCH(hash, fil_system->spaces, id,
- fil_space_t*, space,
- ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
- space->id == id);
-
- return(space);
-}
-
-/*******************************************************************//**
-Returns the table space by a given name, NULL if not found. */
-UNIV_INLINE
-fil_space_t*
-fil_space_get_by_name(
-/*==================*/
- const char* name) /*!< in: space name */
-{
- fil_space_t* space;
- ulint fold;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- fold = ut_fold_string(name);
-
- HASH_SEARCH(name_hash, fil_system->name_hash, fold,
- fil_space_t*, space,
- ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
- !strcmp(name, space->name));
-
- return(space);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- ib_int64_t version = -1;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space) {
- version = space->tablespace_version;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(version);
-}
-
-/*******************************************************************//**
-Returns the latch of a file space.
-@return latch protecting storage allocation */
-UNIV_INTERN
-rw_lock_t*
-fil_space_get_latch(
-/*================*/
- ulint id, /*!< in: space id */
- ulint* flags) /*!< out: tablespace flags */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- if (flags) {
- *flags = space->flags;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(&(space->latch));
-}
-
-/*******************************************************************//**
-Returns the type of a file space.
-@return FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
-fil_space_get_type(
-/*===============*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- mutex_exit(&fil_system->mutex);
-
- return(space->purpose);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Checks if all the file nodes in a space are flushed. The caller must hold
-the fil_system mutex.
-@return TRUE if all are flushed */
-static
-ibool
-fil_space_is_flushed(
-/*=================*/
- fil_space_t* space) /*!< in: space */
-{
- fil_node_t* node;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node) {
- if (node->modification_counter > node->flush_counter) {
-
- return(FALSE);
- }
-
- node = UT_LIST_GET_NEXT(chain, node);
- }
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed. */
-UNIV_INTERN
-void
-fil_node_create(
-/*============*/
- const char* name, /*!< in: file name (file must be closed) */
- ulint size, /*!< in: file size in database blocks, rounded
- downwards to an integer */
- ulint id, /*!< in: space id where to append */
- ibool is_raw) /*!< in: TRUE if a raw device or
- a raw disk partition */
-{
- fil_node_t* node;
- fil_space_t* space;
-
- ut_a(fil_system);
- ut_a(name);
-
- mutex_enter(&fil_system->mutex);
-
- node = mem_alloc(sizeof(fil_node_t));
-
- node->name = mem_strdup(name);
- node->open = FALSE;
-
- ut_a(!is_raw || srv_start_raw_disk_in_use);
-
- node->is_raw_disk = is_raw;
- node->size = size;
- node->magic_n = FIL_NODE_MAGIC_N;
- node->n_pending = 0;
- node->n_pending_flushes = 0;
-
- node->modification_counter = 0;
- node->flush_counter = 0;
-
- space = fil_space_get_by_id(id);
-
- if (!space) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Could not find tablespace %lu for\n"
- "InnoDB: file ", (ulong) id);
- ut_print_filename(stderr, name);
- fputs(" in the tablespace memory cache.\n", stderr);
- mem_free(node->name);
-
- mem_free(node);
-
- mutex_exit(&fil_system->mutex);
-
- return;
- }
-
- space->size += size;
-
- node->space = space;
-
- UT_LIST_ADD_LAST(chain, space->chain, node);
-
- if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
-
- fil_system->max_assigned_id = id;
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/********************************************************************//**
-Opens a the file of a node of a tablespace. The caller must own the fil_system
-mutex. */
-static
-void
-fil_node_open_file(
-/*===============*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space) /*!< in: space */
-{
- ib_int64_t size_bytes;
- ulint size_low;
- ulint size_high;
- ibool ret;
- ibool success;
- byte* buf2;
- byte* page;
- ulint space_id;
- ulint flags;
-
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->n_pending == 0);
- ut_a(node->open == FALSE);
-
- if (node->size == 0) {
- /* It must be a single-table tablespace and we do not know the
- size of the file yet. First we open the file in the normal
- mode, no async I/O here, for simplicity. Then do some checks,
- and close the file again.
- NOTE that we could not use the simple file read function
- os_file_read() in Windows to read from a file opened for
- async I/O! */
-
- node->handle = os_file_create_simple_no_error_handling(
- node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Fatal error: cannot open %s\n."
- "InnoDB: Have you deleted .ibd files"
- " under a running mysqld server?\n",
- node->name);
- ut_a(0);
- }
-
- os_file_get_size(node->handle, &size_low, &size_high);
-
- size_bytes = (((ib_int64_t)size_high) << 32)
- + (ib_int64_t)size_low;
-#ifdef UNIV_HOTBACKUP
- if (space->id == 0) {
- node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
- os_file_close(node->handle);
- goto add_size;
- }
-#endif /* UNIV_HOTBACKUP */
- ut_a(space->purpose != FIL_LOG);
- ut_a(space->id != 0);
-
- if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: the size of single-table"
- " tablespace file %s\n"
- "InnoDB: is only %lu %lu,"
- " should be at least %lu!\n",
- node->name,
- (ulong) size_high,
- (ulong) size_low,
- (ulong) (FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
-
- ut_a(0);
- }
-
- /* Read the first page of the tablespace */
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT
- set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- success = os_file_read(node->handle, page, 0, 0,
- UNIV_PAGE_SIZE);
- space_id = fsp_header_get_space_id(page);
- flags = fsp_header_get_flags(page);
-
- ut_free(buf2);
-
- /* Close the file now that we have read the space id from it */
-
- os_file_close(node->handle);
-
- if (UNIV_UNLIKELY(space_id != space->id)) {
- fprintf(stderr,
- "InnoDB: Error: tablespace id is %lu"
- " in the data dictionary\n"
- "InnoDB: but in file %s it is %lu!\n",
- space->id, node->name, space_id);
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
- || space_id == 0)) {
- fprintf(stderr,
- "InnoDB: Error: tablespace id %lu"
- " in file %s is not sensible\n",
- (ulong) space_id, node->name);
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY(space->flags != flags)) {
- fprintf(stderr,
- "InnoDB: Error: table flags are %lx"
- " in the data dictionary\n"
- "InnoDB: but the flags in file %s are %lx!\n",
- space->flags, node->name, flags);
-
- ut_error;
- }
-
- if (size_bytes >= 1024 * 1024) {
- /* Truncate the size to whole megabytes. */
- size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
- }
-
- if (!(flags & DICT_TF_ZSSIZE_MASK)) {
- node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
- } else {
- node->size = (ulint)
- (size_bytes
- / dict_table_flags_to_zip_size(flags));
- }
-
-#ifdef UNIV_HOTBACKUP
-add_size:
-#endif /* UNIV_HOTBACKUP */
- space->size += node->size;
- }
-
- /* printf("Opening file %s\n", node->name); */
-
- /* Open the file for reading and writing, in Windows normally in the
- unbuffered async I/O mode, though global variables may make
- os_file_create() to fall back to the normal file I/O mode. */
-
- if (space->purpose == FIL_LOG) {
- node->handle = os_file_create(node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_LOG_FILE, &ret);
- } else if (node->is_raw_disk) {
- node->handle = os_file_create(node->name,
- OS_FILE_OPEN_RAW,
- OS_FILE_AIO, OS_DATA_FILE, &ret);
- } else {
- node->handle = os_file_create(node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_DATA_FILE, &ret);
- }
-
- ut_a(ret);
-
- node->open = TRUE;
-
- system->n_open++;
-
- if (space->purpose == FIL_TABLESPACE && space->id != 0) {
- /* Put the node to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
- }
-}
-
-/**********************************************************************//**
-Closes a file. */
-static
-void
-fil_node_close_file(
-/*================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system) /*!< in: tablespace memory cache */
-{
- ibool ret;
-
- ut_ad(node && system);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->open);
- ut_a(node->n_pending == 0);
- ut_a(node->n_pending_flushes == 0);
- ut_a(node->modification_counter == node->flush_counter);
-
- ret = os_file_close(node->handle);
- ut_a(ret);
-
- /* printf("Closing file %s\n", node->name); */
-
- node->open = FALSE;
- ut_a(system->n_open > 0);
- system->n_open--;
-
- if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
- /* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(LRU, system->LRU, node);
- }
-}
-
-/********************************************************************//**
-Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex.
-@return TRUE if success, FALSE if should retry later; since i/o's
-generally complete in < 100 ms, and as InnoDB writes at most 128 pages
-from the buffer pool in a batch, and then immediately flushes the
-files, there is a good chance that the next time we find a suitable
-node from the LRU list */
-static
-ibool
-fil_try_to_close_file_in_LRU(
-/*=========================*/
- ibool print_info) /*!< in: if TRUE, prints information why it
- cannot close a file */
-{
- fil_node_t* node;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- node = UT_LIST_GET_LAST(fil_system->LRU);
-
- if (print_info) {
- fprintf(stderr,
- "InnoDB: fil_sys open file LRU len %lu\n",
- (ulong) UT_LIST_GET_LEN(fil_system->LRU));
- }
-
- while (node != NULL) {
- if (node->modification_counter == node->flush_counter
- && node->n_pending_flushes == 0) {
-
- fil_node_close_file(node, fil_system);
-
- return(TRUE);
- }
-
- if (print_info && node->n_pending_flushes > 0) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr, ", because n_pending_flushes %lu\n",
- (ulong) node->n_pending_flushes);
- }
-
- if (print_info
- && node->modification_counter != node->flush_counter) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr,
- ", because mod_count %ld != fl_count %ld\n",
- (long) node->modification_counter,
- (long) node->flush_counter);
- }
-
- node = UT_LIST_GET_PREV(LRU, node);
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Reserves the fil_system mutex and tries to make sure we can open at least one
-file while holding it. This should be called before calling
-fil_node_prepare_for_io(), because that function may need to open a file. */
-static
-void
-fil_mutex_enter_and_prepare_for_io(
-/*===============================*/
- ulint space_id) /*!< in: space id */
-{
- fil_space_t* space;
- ibool success;
- ibool print_info = FALSE;
- ulint count = 0;
- ulint count2 = 0;
-
-retry:
- mutex_enter(&fil_system->mutex);
-
- if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
- /* We keep log files and system tablespace files always open;
- this is important in preventing deadlocks in this module, as
- a page read completion often performs another read from the
- insert buffer. The insert buffer is in tablespace 0, and we
- cannot end up waiting in this function. */
-
- return;
- }
-
- if (fil_system->n_open < fil_system->max_n_open) {
-
- return;
- }
-
- space = fil_space_get_by_id(space_id);
-
- if (space != NULL && space->stop_ios) {
- /* We are going to do a rename file and want to stop new i/o's
- for a while */
-
- if (count2 > 20000) {
- fputs("InnoDB: Warning: tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr,
- " has i/o ops stopped for a long time %lu\n",
- (ulong) count2);
- }
-
- mutex_exit(&fil_system->mutex);
-
- os_thread_sleep(20000);
-
- count2++;
-
- goto retry;
- }
-
- /* If the file is already open, no need to do anything; if the space
- does not exist, we handle the situation in the function which called
- this function */
-
- if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
-
- return;
- }
-
- if (count > 1) {
- print_info = TRUE;
- }
-
- /* Too many files are open, try to close some */
-close_more:
- success = fil_try_to_close_file_in_LRU(print_info);
-
- if (success && fil_system->n_open >= fil_system->max_n_open) {
-
- goto close_more;
- }
-
- if (fil_system->n_open < fil_system->max_n_open) {
- /* Ok */
-
- return;
- }
-
- if (count >= 2) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: too many (%lu) files stay open"
- " while the maximum\n"
- "InnoDB: allowed value would be %lu.\n"
- "InnoDB: You may need to raise the value of"
- " innodb_open_files in\n"
- "InnoDB: my.cnf.\n",
- (ulong) fil_system->n_open,
- (ulong) fil_system->max_n_open);
-
- return;
- }
-
- mutex_exit(&fil_system->mutex);
-
-#ifndef UNIV_HOTBACKUP
- /* Wake the i/o-handler threads to make sure pending i/o's are
- performed */
- os_aio_simulated_wake_handler_threads();
-
- os_thread_sleep(20000);
-#endif
- /* Flush tablespaces so that we can close modified files in the LRU
- list */
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- count++;
-
- goto retry;
-}
-
-/*******************************************************************//**
-Frees a file node object from a tablespace memory cache. */
-static
-void
-fil_node_free(
-/*==========*/
- fil_node_t* node, /*!< in, own: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space) /*!< in: space where the file node is chained */
-{
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->magic_n == FIL_NODE_MAGIC_N);
- ut_a(node->n_pending == 0);
-
- if (node->open) {
- /* We fool the assertion in fil_node_close_file() to think
- there are no unflushed modifications in the file */
-
- node->modification_counter = node->flush_counter;
-
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- space->is_in_unflushed_spaces = FALSE;
-
- UT_LIST_REMOVE(unflushed_spaces,
- system->unflushed_spaces,
- space);
- }
-
- fil_node_close_file(node, system);
- }
-
- space->size -= node->size;
-
- UT_LIST_REMOVE(chain, space->chain, node);
-
- mem_free(node->name);
- mem_free(node);
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
-void
-fil_space_truncate_start(
-/*=====================*/
- ulint id, /*!< in: space id */
- ulint trunc_len) /*!< in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
-{
- fil_node_t* node;
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- while (trunc_len > 0) {
- node = UT_LIST_GET_FIRST(space->chain);
-
- ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
-
- trunc_len -= node->size * UNIV_PAGE_SIZE;
-
- fil_node_free(node, fil_system, space);
- }
-
- mutex_exit(&fil_system->mutex);
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-/*******************************************************************//**
-Creates a space memory object and puts it to the tablespace memory cache. If
-there is an error, prints an error message to the .err log.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_space_create(
-/*=============*/
- const char* name, /*!< in: space name */
- ulint id, /*!< in: space id */
- ulint flags, /*!< in: compressed page size
- and file format, or 0 */
- ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
-{
- fil_space_t* space;
-
- /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
- ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
- ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
- format, the tablespace flags should equal table->flags. */
- ut_a(flags != DICT_TF_COMPACT);
-
-try_again:
- /*printf(
- "InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
- purpose);*/
-
- ut_a(fil_system);
- ut_a(name);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_name(name);
-
- if (UNIV_LIKELY_NULL(space)) {
- ulint namesake_id;
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: trying to init to the"
- " tablespace memory cache\n"
- "InnoDB: a tablespace %lu of name ", (ulong) id);
- ut_print_filename(stderr, name);
- fprintf(stderr, ",\n"
- "InnoDB: but a tablespace %lu of the same name\n"
- "InnoDB: already exists in the"
- " tablespace memory cache!\n",
- (ulong) space->id);
-
- if (id == 0 || purpose != FIL_TABLESPACE) {
-
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- fprintf(stderr,
- "InnoDB: We assume that InnoDB did a crash recovery,"
- " and you had\n"
- "InnoDB: an .ibd file for which the table"
- " did not exist in the\n"
- "InnoDB: InnoDB internal data dictionary in the"
- " ibdata files.\n"
- "InnoDB: We assume that you later removed the"
- " .ibd and .frm files,\n"
- "InnoDB: and are now trying to recreate the table."
- " We now remove the\n"
- "InnoDB: conflicting tablespace object"
- " from the memory cache and try\n"
- "InnoDB: the init again.\n");
-
- namesake_id = space->id;
-
- mutex_exit(&fil_system->mutex);
-
- fil_space_free(namesake_id, FALSE);
-
- goto try_again;
- }
-
- space = fil_space_get_by_id(id);
-
- if (UNIV_LIKELY_NULL(space)) {
- fprintf(stderr,
- "InnoDB: Error: trying to add tablespace %lu"
- " of name ", (ulong) id);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: to the tablespace memory cache,"
- " but tablespace\n"
- "InnoDB: %lu of name ", (ulong) space->id);
- ut_print_filename(stderr, space->name);
- fputs(" already exists in the tablespace\n"
- "InnoDB: memory cache!\n", stderr);
-
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- space = mem_alloc(sizeof(fil_space_t));
-
- space->name = mem_strdup(name);
- space->id = id;
-
- fil_system->tablespace_version++;
- space->tablespace_version = fil_system->tablespace_version;
- space->mark = FALSE;
-
- if (purpose == FIL_TABLESPACE && id > fil_system->max_assigned_id) {
- fil_system->max_assigned_id = id;
- }
-
- space->stop_ios = FALSE;
- space->stop_ibuf_merges = FALSE;
- space->is_being_deleted = FALSE;
- space->purpose = purpose;
- space->size = 0;
- space->flags = flags;
-
- space->n_reserved_extents = 0;
-
- space->n_pending_flushes = 0;
- space->n_pending_ibuf_merges = 0;
-
- UT_LIST_INIT(space->chain);
- space->magic_n = FIL_SPACE_MAGIC_N;
-
- rw_lock_create(&space->latch, SYNC_FSP);
-
- HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
-
- HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(name), space);
- space->is_in_unflushed_spaces = FALSE;
-
- UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
-
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Assigns a new space id for a new single-table tablespace. This works simply by
-incrementing the global counter. If 4 billion id's is not enough, we may need
-to recycle id's.
-@return new tablespace id; ULINT_UNDEFINED if could not assign an id */
-static
-ulint
-fil_assign_new_space_id(void)
-/*=========================*/
-{
- ulint id;
-
- mutex_enter(&fil_system->mutex);
-
- fil_system->max_assigned_id++;
-
- id = fil_system->max_assigned_id;
-
- if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Warning: you are running out of new"
- " single-table tablespace id's.\n"
- "InnoDB: Current counter is %lu and it"
- " must not exceed %lu!\n"
- "InnoDB: To reset the counter to zero"
- " you have to dump all your tables and\n"
- "InnoDB: recreate the whole InnoDB installation.\n",
- (ulong) id,
- (ulong) SRV_LOG_SPACE_FIRST_ID);
- }
-
- if (id >= SRV_LOG_SPACE_FIRST_ID) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: You have run out of single-table"
- " tablespace id's!\n"
- "InnoDB: Current counter is %lu.\n"
- "InnoDB: To reset the counter to zero you"
- " have to dump all your tables and\n"
- "InnoDB: recreate the whole InnoDB installation.\n",
- (ulong) id);
- fil_system->max_assigned_id--;
-
- id = ULINT_UNDEFINED;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(id);
-}
-
-/*******************************************************************//**
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
-flushes on the files.
-@return TRUE if success */
-static
-ibool
-fil_space_free(
-/*===========*/
- /* out: TRUE if success */
- ulint id, /* in: space id */
- ibool own_mutex) /* in: TRUE if own system->mutex */
-{
- fil_space_t* space;
- fil_space_t* namespace;
- fil_node_t* fil_node;
-
- if (!own_mutex) {
- mutex_enter(&fil_system->mutex);
- }
-
- space = fil_space_get_by_id(id);
-
- if (!space) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to remove tablespace %lu"
- " from the cache but\n"
- "InnoDB: it is not there.\n", (ulong) id);
-
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
-
- namespace = fil_space_get_by_name(space->name);
- ut_a(namespace);
- ut_a(space == namespace);
-
- HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(space->name), space);
-
- if (space->is_in_unflushed_spaces) {
- space->is_in_unflushed_spaces = FALSE;
-
- UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
- space);
- }
-
- UT_LIST_REMOVE(space_list, fil_system->space_list, space);
-
- ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_a(0 == space->n_pending_flushes);
-
- fil_node = UT_LIST_GET_FIRST(space->chain);
-
- while (fil_node != NULL) {
- fil_node_free(fil_node, fil_system, space);
-
- fil_node = UT_LIST_GET_FIRST(space->chain);
- }
-
- ut_a(0 == UT_LIST_GET_LEN(space->chain));
-
- if (!own_mutex) {
- mutex_exit(&fil_system->mutex);
- }
-
- rw_lock_free(&(space->latch));
-
- mem_free(space->name);
- mem_free(space);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return space size, 0 if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_size(
-/*===============*/
- ulint id) /*!< in: space id */
-{
- fil_node_t* node;
- fil_space_t* space;
- ulint size;
-
- ut_ad(fil_system);
-
- fil_mutex_enter_and_prepare_for_io(id);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL) {
- mutex_exit(&fil_system->mutex);
-
- return(0);
- }
-
- if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
- ut_a(id != 0);
-
- ut_a(1 == UT_LIST_GET_LEN(space->chain));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- /* It must be a single-table tablespace and we have not opened
- the file yet; the following calls will open it and update the
- size fields */
-
- fil_node_prepare_for_io(node, fil_system, space);
- fil_node_complete_io(node, fil_system, OS_FILE_READ);
- }
-
- size = space->size;
-
- mutex_exit(&fil_system->mutex);
-
- return(size);
-}
-
-/*******************************************************************//**
-Returns the flags of the space. The tablespace must be cached
-in the memory cache.
-@return flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_flags(
-/*================*/
- ulint id) /*!< in: space id */
-{
- fil_node_t* node;
- fil_space_t* space;
- ulint flags;
-
- ut_ad(fil_system);
-
- if (UNIV_UNLIKELY(!id)) {
- return(0);
- }
-
- fil_mutex_enter_and_prepare_for_io(id);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL) {
- mutex_exit(&fil_system->mutex);
-
- return(ULINT_UNDEFINED);
- }
-
- if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
- ut_a(id != 0);
-
- ut_a(1 == UT_LIST_GET_LEN(space->chain));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- /* It must be a single-table tablespace and we have not opened
- the file yet; the following calls will open it and update the
- size fields */
-
- fil_node_prepare_for_io(node, fil_system, space);
- fil_node_complete_io(node, fil_system, OS_FILE_READ);
- }
-
- flags = space->flags;
-
- mutex_exit(&fil_system->mutex);
-
- return(flags);
-}
-
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
- ulint id) /*!< in: space id */
-{
- ulint flags;
-
- flags = fil_space_get_flags(id);
-
- if (flags && flags != ULINT_UNDEFINED) {
-
- return(dict_table_flags_to_zip_size(flags));
- }
-
- return(flags);
-}
-
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- if (fil_space_get_size(id) > page_no) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/****************************************************************//**
-Initializes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_init(
-/*=====*/
- ulint hash_size, /*!< in: hash table size */
- ulint max_n_open) /*!< in: max number of open files */
-{
- ut_a(fil_system == NULL);
-
- ut_a(hash_size > 0);
- ut_a(max_n_open > 0);
-
- fil_system = mem_alloc(sizeof(fil_system_t));
-
- mutex_create(&fil_system->mutex, SYNC_ANY_LATCH);
-
- fil_system->spaces = hash_create(hash_size);
- fil_system->name_hash = hash_create(hash_size);
-
- UT_LIST_INIT(fil_system->LRU);
-
- fil_system->n_open = 0;
- fil_system->max_n_open = max_n_open;
-
- fil_system->modification_counter = 0;
- fil_system->max_assigned_id = 0;
-
- fil_system->tablespace_version = 0;
-
- UT_LIST_INIT(fil_system->unflushed_spaces);
- UT_LIST_INIT(fil_system->space_list);
-}
-
-/*******************************************************************//**
-Opens all log files and system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the log and the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
-void
-fil_open_log_and_system_tablespace_files(void)
-/*==========================================*/
-{
- fil_space_t* space;
- fil_node_t* node;
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space != NULL) {
- if (space->purpose != FIL_TABLESPACE || space->id == 0) {
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node != NULL) {
- if (!node->open) {
- fil_node_open_file(node, fil_system,
- space);
- }
- if (fil_system->max_n_open
- < 10 + fil_system->n_open) {
- fprintf(stderr,
- "InnoDB: Warning: you must"
- " raise the value of"
- " innodb_open_files in\n"
- "InnoDB: my.cnf! Remember that"
- " InnoDB keeps all log files"
- " and all system\n"
- "InnoDB: tablespace files open"
- " for the whole time mysqld is"
- " running, and\n"
- "InnoDB: needs to open also"
- " some .ibd files if the"
- " file-per-table storage\n"
- "InnoDB: model is used."
- " Current open files %lu,"
- " max allowed"
- " open files %lu.\n",
- (ulong) fil_system->n_open,
- (ulong) fil_system->max_n_open);
- }
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Closes all open files. There must not be any pending i/o's or not flushed
-modifications in the files. */
-UNIV_INTERN
-void
-fil_close_all_files(void)
-/*=====================*/
-{
- fil_space_t* space;
- fil_node_t* node;
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space != NULL) {
- fil_space_t* prev_space = space;
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node != NULL) {
- if (node->open) {
- fil_node_close_file(node, fil_system);
- }
- node = UT_LIST_GET_NEXT(chain, node);
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- fil_space_free(prev_space->id, TRUE);
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Sets the max tablespace id counter if the given number is bigger than the
-previous value. */
-UNIV_INTERN
-void
-fil_set_max_space_id_if_bigger(
-/*===========================*/
- ulint max_id) /*!< in: maximum known id */
-{
- if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
- fprintf(stderr,
- "InnoDB: Fatal error: max tablespace id"
- " is too high, %lu\n", (ulong) max_id);
- ut_error;
- }
-
- mutex_enter(&fil_system->mutex);
-
- if (fil_system->max_assigned_id < max_id) {
-
- fil_system->max_assigned_id = max_id;
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/****************************************************************//**
-Writes the flushed lsn and the latest archived log number to the page header
-of the first page of a data file of the system tablespace (space 0),
-which is uncompressed. */
-static
-ulint
-fil_write_lsn_and_arch_no_to_file(
-/*==============================*/
- ulint sum_of_sizes, /*!< in: combined size of previous files
- in space, in database pages */
- ib_uint64_t lsn, /*!< in: lsn to write */
- ulint arch_log_no __attribute__((unused)))
- /*!< in: archived log number to write */
-{
- byte* buf1;
- byte* buf;
-
- buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
- buf = ut_align(buf1, UNIV_PAGE_SIZE);
-
- fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
-
- mach_write_ull(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
-
- fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
-
- mem_free(buf1);
-
- return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-fil_write_flushed_lsn_to_data_files(
-/*================================*/
- ib_uint64_t lsn, /*!< in: lsn to write */
- ulint arch_log_no) /*!< in: latest archived log
- file number */
-{
- fil_space_t* space;
- fil_node_t* node;
- ulint sum_of_sizes;
- ulint err;
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- /* We only write the lsn to all existing data files which have
- been open during the lifetime of the mysqld process; they are
- represented by the space objects in the tablespace memory
- cache. Note that all data files in the system tablespace 0 are
- always open. */
-
- if (space->purpose == FIL_TABLESPACE
- && space->id == 0) {
- sum_of_sizes = 0;
-
- node = UT_LIST_GET_FIRST(space->chain);
- while (node) {
- mutex_exit(&fil_system->mutex);
-
- err = fil_write_lsn_and_arch_no_to_file(
- sum_of_sizes, lsn, arch_log_no);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- mutex_enter(&fil_system->mutex);
-
- sum_of_sizes += node->size;
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Reads the flushed lsn and arch no fields from a data file at database
-startup. */
-UNIV_INTERN
-void
-fil_read_flushed_lsn_and_arch_log_no(
-/*=================================*/
- os_file_t data_file, /*!< in: open data file */
- ibool one_read_already, /*!< in: TRUE if min and max
- parameters below already
- contain sensible data */
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no, /*!< in/out: */
- ulint* max_arch_log_no, /*!< in/out: */
-#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t* min_flushed_lsn, /*!< in/out: */
- ib_uint64_t* max_flushed_lsn) /*!< in/out: */
-{
- byte* buf;
- byte* buf2;
- ib_uint64_t flushed_lsn;
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for a possible read from a raw device */
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
- os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
-
- flushed_lsn = mach_read_ull(buf + FIL_PAGE_FILE_FLUSH_LSN);
-
- ut_free(buf2);
-
- if (!one_read_already) {
- *min_flushed_lsn = flushed_lsn;
- *max_flushed_lsn = flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- *min_arch_log_no = arch_log_no;
- *max_arch_log_no = arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
- return;
- }
-
- if (*min_flushed_lsn > flushed_lsn) {
- *min_flushed_lsn = flushed_lsn;
- }
- if (*max_flushed_lsn < flushed_lsn) {
- *max_flushed_lsn = flushed_lsn;
- }
-#ifdef UNIV_LOG_ARCHIVE
- if (*min_arch_log_no > arch_log_no) {
- *min_arch_log_no = arch_log_no;
- }
- if (*max_arch_log_no < arch_log_no) {
- *max_arch_log_no = arch_log_no;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-}
-
-/*================ SINGLE-TABLE TABLESPACES ==========================*/
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Increments the count of pending insert buffer page merges, if space is not
-being deleted.
-@return TRUE if being deleted, and ibuf merges should be skipped */
-UNIV_INTERN
-ibool
-fil_inc_pending_ibuf_merges(
-/*========================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL) {
- fprintf(stderr,
- "InnoDB: Error: trying to do ibuf merge to a"
- " dropped tablespace %lu\n",
- (ulong) id);
- }
-
- if (space == NULL || space->stop_ibuf_merges) {
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- space->n_pending_ibuf_merges++;
-
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Decrements the count of pending insert buffer page merges. */
-UNIV_INTERN
-void
-fil_decr_pending_ibuf_merges(
-/*=========================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL) {
- fprintf(stderr,
- "InnoDB: Error: decrementing ibuf merge of a"
- " dropped tablespace %lu\n",
- (ulong) id);
- }
-
- if (space != NULL) {
- space->n_pending_ibuf_merges--;
- }
-
- mutex_exit(&fil_system->mutex);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Creates the database directory for a table if it does not exist yet. */
-static
-void
-fil_create_directory_for_tablename(
-/*===============================*/
- const char* name) /*!< in: name in the standard
- 'databasename/tablename' format */
-{
- const char* namend;
- char* path;
- ulint len;
-
- len = strlen(fil_path_to_mysql_datadir);
- namend = strchr(name, '/');
- ut_a(namend);
- path = mem_alloc(len + (namend - name) + 2);
-
- memcpy(path, fil_path_to_mysql_datadir, len);
- path[len] = '/';
- memcpy(path + len + 1, name, namend - name);
- path[len + (namend - name) + 1] = 0;
-
- srv_normalize_path_for_win(path);
-
- ut_a(os_file_create_directory(path, FALSE));
- mem_free(path);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Writes a log record about an .ibd file create/rename/delete. */
-static
-void
-fil_op_write_log(
-/*=============*/
- ulint type, /*!< in: MLOG_FILE_CREATE,
- MLOG_FILE_CREATE2,
- MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id, /*!< in: space id */
- ulint log_flags, /*!< in: redo log flags (stored
- in the page number field) */
- ulint flags, /*!< in: compressed page size
- and file format
- if type==MLOG_FILE_CREATE2, or 0 */
- const char* name, /*!< in: table name in the familiar
- 'databasename/tablename' format, or
- the file path in the case of
- MLOG_FILE_DELETE */
- const char* new_name, /*!< in: if type is MLOG_FILE_RENAME,
- the new table name in the
- 'databasename/tablename' format */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
- ulint len;
-
- log_ptr = mlog_open(mtr, 11 + 2 + 1);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_for_file_op(
- type, space_id, log_flags, log_ptr, mtr);
- if (type == MLOG_FILE_CREATE2) {
- mach_write_to_4(log_ptr, flags);
- log_ptr += 4;
- }
- /* Let us store the strings as null-terminated for easier readability
- and handling */
-
- len = strlen(name) + 1;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, (byte*) name, len);
-
- if (type == MLOG_FILE_RENAME) {
- len = strlen(new_name) + 1;
- log_ptr = mlog_open(mtr, 2 + len);
- ut_a(log_ptr);
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, (byte*) new_name, len);
- }
-}
-#endif
-
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
-datadir that we should use in replaying the file operations.
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
- byte* ptr, /*!< in: buffer containing the log record body,
- or an initial segment of it, if the record does
- not fir completely between ptr and end_ptr */
- byte* end_ptr, /*!< in: buffer end */
- ulint type, /*!< in: the type of this log record */
- ulint space_id, /*!< in: the space id of the tablespace in
- question, or 0 if the log record should
- only be parsed but not replayed */
- ulint log_flags) /*!< in: redo log flags
- (stored in the page number parameter) */
-{
- ulint name_len;
- ulint new_name_len;
- const char* name;
- const char* new_name = NULL;
- ulint flags = 0;
-
- if (type == MLOG_FILE_CREATE2) {
- if (end_ptr < ptr + 4) {
-
- return(NULL);
- }
-
- flags = mach_read_from_4(ptr);
- ptr += 4;
- }
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- name_len = mach_read_from_2(ptr);
-
- ptr += 2;
-
- if (end_ptr < ptr + name_len) {
-
- return(NULL);
- }
-
- name = (const char*) ptr;
-
- ptr += name_len;
-
- if (type == MLOG_FILE_RENAME) {
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- new_name_len = mach_read_from_2(ptr);
-
- ptr += 2;
-
- if (end_ptr < ptr + new_name_len) {
-
- return(NULL);
- }
-
- new_name = (const char*) ptr;
-
- ptr += new_name_len;
- }
-
- /* We managed to parse a full log record body */
- /*
- printf("Parsed log rec of type %lu space %lu\n"
- "name %s\n", type, space_id, name);
-
- if (type == MLOG_FILE_RENAME) {
- printf("new name %s\n", new_name);
- }
- */
- if (!space_id) {
-
- return(ptr);
- }
-
- /* Let us try to perform the file operation, if sensible. Note that
- ibbackup has at this stage already read in all space id info to the
- fil0fil.c data structures.
-
- NOTE that our algorithm is not guaranteed to work correctly if there
- were renames of tables during the backup. See ibbackup code for more
- on the problem. */
-
- switch (type) {
- case MLOG_FILE_DELETE:
- if (fil_tablespace_exists_in_mem(space_id)) {
- ut_a(fil_delete_tablespace(space_id));
- }
-
- break;
-
- case MLOG_FILE_RENAME:
- /* We do the rename based on space id, not old file name;
- this should guarantee that after the log replay each .ibd file
- has the correct name for the latest log sequence number; the
- proof is left as an exercise :) */
-
- if (fil_tablespace_exists_in_mem(space_id)) {
- /* Create the database directory for the new name, if
- it does not exist yet */
- fil_create_directory_for_tablename(new_name);
-
- /* Rename the table if there is not yet a tablespace
- with the same name */
-
- if (fil_get_space_id_for_table(new_name)
- == ULINT_UNDEFINED) {
- /* We do not care of the old name, that is
- why we pass NULL as the first argument */
- if (!fil_rename_tablespace(NULL, space_id,
- new_name)) {
- ut_error;
- }
- }
- }
-
- break;
-
- case MLOG_FILE_CREATE:
- case MLOG_FILE_CREATE2:
- if (fil_tablespace_exists_in_mem(space_id)) {
- /* Do nothing */
- } else if (fil_get_space_id_for_table(name)
- != ULINT_UNDEFINED) {
- /* Do nothing */
- } else if (log_flags & MLOG_FILE_FLAG_TEMP) {
- /* Temporary table, do nothing */
- } else {
- /* Create the database directory for name, if it does
- not exist yet */
- fil_create_directory_for_tablename(name);
-
- if (fil_create_new_single_table_tablespace(
- &space_id, name, FALSE, flags,
- FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
- ut_error;
- }
- }
-
- break;
-
- default:
- ut_error;
- }
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_delete_tablespace(
-/*==================*/
- ulint id) /*!< in: space id */
-{
- ibool success;
- fil_space_t* space;
- fil_node_t* node;
- ulint count = 0;
- char* path;
-
- ut_a(id != 0);
-stop_ibuf_merges:
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space != NULL) {
- space->stop_ibuf_merges = TRUE;
-
- if (space->n_pending_ibuf_merges == 0) {
- mutex_exit(&fil_system->mutex);
-
- count = 0;
-
- goto try_again;
- } else {
- if (count > 5000) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: trying to"
- " delete tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr, ",\n"
- "InnoDB: but there are %lu pending"
- " ibuf merges on it.\n"
- "InnoDB: Loop %lu.\n",
- (ulong) space->n_pending_ibuf_merges,
- (ulong) count);
- }
-
- mutex_exit(&fil_system->mutex);
-
- os_thread_sleep(20000);
- count++;
-
- goto stop_ibuf_merges;
- }
- }
-
- mutex_exit(&fil_system->mutex);
- count = 0;
-
-try_again:
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: cannot delete tablespace %lu\n"
- "InnoDB: because it is not found in the"
- " tablespace memory cache.\n",
- (ulong) id);
-
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- ut_a(space);
- ut_a(space->n_pending_ibuf_merges == 0);
-
- space->is_being_deleted = TRUE;
-
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
- node = UT_LIST_GET_FIRST(space->chain);
-
- if (space->n_pending_flushes > 0 || node->n_pending > 0) {
- if (count > 1000) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: trying to"
- " delete tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr, ",\n"
- "InnoDB: but there are %lu flushes"
- " and %lu pending i/o's on it\n"
- "InnoDB: Loop %lu.\n",
- (ulong) space->n_pending_flushes,
- (ulong) node->n_pending,
- (ulong) count);
- }
- mutex_exit(&fil_system->mutex);
- os_thread_sleep(20000);
-
- count++;
-
- goto try_again;
- }
-
- path = mem_strdup(space->name);
-
- mutex_exit(&fil_system->mutex);
-#ifndef UNIV_HOTBACKUP
- /* Invalidate in the buffer pool all pages belonging to the
- tablespace. Since we have set space->is_being_deleted = TRUE, readahead
- or ibuf merge can no longer read more pages of this tablespace to the
- buffer pool. Thus we can clean the tablespace out of the buffer pool
- completely and permanently. The flag is_being_deleted also prevents
- fil_flush() from being applied to this tablespace. */
-
- buf_LRU_invalidate_tablespace(id);
-#endif
- /* printf("Deleting tablespace %s id %lu\n", space->name, id); */
-
- success = fil_space_free(id, FALSE);
-
- if (success) {
- success = os_file_delete(path);
-
- if (!success) {
- success = os_file_delete_if_exists(path);
- }
- }
-
- if (success) {
-#ifndef UNIV_HOTBACKUP
- /* Write a log record about the deletion of the .ibd
- file, so that ibbackup can replay it in the
- --apply-log phase. We use a dummy mtr and the familiar
- log write mechanism. */
- mtr_t mtr;
-
- /* When replaying the operation in ibbackup, do not try
- to write any log record */
- mtr_start(&mtr);
-
- fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
- mtr_commit(&mtr);
-#endif
- mem_free(path);
-
- return(TRUE);
- }
-
- mem_free(path);
-
- return(FALSE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_discard_tablespace(
-/*===================*/
- ulint id) /*!< in: space id */
-{
- ibool success;
-
- success = fil_delete_tablespace(id);
-
- if (!success) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete tablespace %lu"
- " in DISCARD TABLESPACE.\n"
- "InnoDB: But let us remove the"
- " insert buffer entries for this tablespace.\n",
- (ulong) id);
- }
-
- /* Remove all insert buffer entries for the tablespace */
-
- ibuf_delete_for_discarded_space(id);
-
- return(success);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Renames the memory cache structures of a single-table tablespace.
-@return TRUE if success */
-static
-ibool
-fil_rename_tablespace_in_mem(
-/*=========================*/
- fil_space_t* space, /*!< in: tablespace memory object */
- fil_node_t* node, /*!< in: file node of that tablespace */
- const char* path) /*!< in: new name */
-{
- fil_space_t* space2;
- const char* old_name = space->name;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- space2 = fil_space_get_by_name(old_name);
- if (space != space2) {
- fputs("InnoDB: Error: cannot find ", stderr);
- ut_print_filename(stderr, old_name);
- fputs(" in tablespace memory cache\n", stderr);
-
- return(FALSE);
- }
-
- space2 = fil_space_get_by_name(path);
- if (space2 != NULL) {
- fputs("InnoDB: Error: ", stderr);
- ut_print_filename(stderr, path);
- fputs(" is already in tablespace memory cache\n", stderr);
-
- return(FALSE);
- }
-
- HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(space->name), space);
- mem_free(space->name);
- mem_free(node->name);
-
- space->name = mem_strdup(path);
- node->name = mem_strdup(path);
-
- HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(path), space);
- return(TRUE);
-}
-
-/*******************************************************************//**
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free().
-@return own: file name */
-static
-char*
-fil_make_ibd_name(
-/*==============*/
- const char* name, /*!< in: table name or a dir path of a
- TEMPORARY table */
- ibool is_temp) /*!< in: TRUE if it is a dir path */
-{
- ulint namelen = strlen(name);
- ulint dirlen = strlen(fil_path_to_mysql_datadir);
- char* filename = mem_alloc(namelen + dirlen + sizeof "/.ibd");
-
- if (is_temp) {
- memcpy(filename, name, namelen);
- memcpy(filename + namelen, ".ibd", sizeof ".ibd");
- } else {
- memcpy(filename, fil_path_to_mysql_datadir, dirlen);
- filename[dirlen] = '/';
-
- memcpy(filename + dirlen + 1, name, namelen);
- memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
- }
-
- srv_normalize_path_for_win(filename);
-
- return(filename);
-}
-
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_rename_tablespace(
-/*==================*/
- const char* old_name, /*!< in: old table name in the standard
- databasename/tablename format of
- InnoDB, or NULL if we do the rename
- based on the space id only */
- ulint id, /*!< in: space id */
- const char* new_name) /*!< in: new table name in the standard
- databasename/tablename format
- of InnoDB */
-{
- ibool success;
- fil_space_t* space;
- fil_node_t* node;
- ulint count = 0;
- char* path;
- ibool old_name_was_specified = TRUE;
- char* old_path;
-
- ut_a(id != 0);
-
- if (old_name == NULL) {
- old_name = "(name not specified)";
- old_name_was_specified = FALSE;
- }
-retry:
- count++;
-
- if (count > 1000) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: problems renaming ", stderr);
- ut_print_filename(stderr, old_name);
- fputs(" to ", stderr);
- ut_print_filename(stderr, new_name);
- fprintf(stderr, ", %lu iterations\n", (ulong) count);
- }
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL) {
- fprintf(stderr,
- "InnoDB: Error: cannot find space id %lu"
- " in the tablespace memory cache\n"
- "InnoDB: though the table ", (ulong) id);
- ut_print_filename(stderr, old_name);
- fputs(" in a rename operation should have that id\n", stderr);
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- if (count > 25000) {
- space->stop_ios = FALSE;
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- /* We temporarily close the .ibd file because we do not trust that
- operating systems can rename an open file. For the closing we have to
- wait until there are no pending i/o's or flushes on the file. */
-
- space->stop_ios = TRUE;
-
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
- node = UT_LIST_GET_FIRST(space->chain);
-
- if (node->n_pending > 0 || node->n_pending_flushes > 0) {
- /* There are pending i/o's or flushes, sleep for a while and
- retry */
-
- mutex_exit(&fil_system->mutex);
-
- os_thread_sleep(20000);
-
- goto retry;
-
- } else if (node->modification_counter > node->flush_counter) {
- /* Flush the space */
-
- mutex_exit(&fil_system->mutex);
-
- os_thread_sleep(20000);
-
- fil_flush(id);
-
- goto retry;
-
- } else if (node->open) {
- /* Close the file */
-
- fil_node_close_file(node, fil_system);
- }
-
- /* Check that the old name in the space is right */
-
- if (old_name_was_specified) {
- old_path = fil_make_ibd_name(old_name, FALSE);
-
- ut_a(strcmp(space->name, old_path) == 0);
- ut_a(strcmp(node->name, old_path) == 0);
- } else {
- old_path = mem_strdup(space->name);
- }
-
- /* Rename the tablespace and the node in the memory cache */
- path = fil_make_ibd_name(new_name, FALSE);
- success = fil_rename_tablespace_in_mem(space, node, path);
-
- if (success) {
- success = os_file_rename(old_path, path);
-
- if (!success) {
- /* We have to revert the changes we made
- to the tablespace memory cache */
-
- ut_a(fil_rename_tablespace_in_mem(space, node,
- old_path));
- }
- }
-
- mem_free(path);
- mem_free(old_path);
-
- space->stop_ios = FALSE;
-
- mutex_exit(&fil_system->mutex);
-
-#ifndef UNIV_HOTBACKUP
- if (success) {
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
- &mtr);
- mtr_commit(&mtr);
- }
-#endif
- return(success);
-}
-
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-fil_create_new_single_table_tablespace(
-/*===================================*/
- ulint* space_id, /*!< in/out: space id; if this is != 0,
- then this is an input parameter,
- otherwise output */
- const char* tablename, /*!< in: the table name in the usual
- databasename/tablename format
- of InnoDB, or a dir path to a temp
- table */
- ibool is_temp, /*!< in: TRUE if a table created with
- CREATE TEMPORARY TABLE */
- ulint flags, /*!< in: tablespace flags */
- ulint size) /*!< in: the initial size of the
- tablespace file in pages,
- must be >= FIL_IBD_FILE_INITIAL_SIZE */
-{
- os_file_t file;
- ibool ret;
- ulint err;
- byte* buf2;
- byte* page;
- ibool success;
- char* path;
-
- ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
- /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
- ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
- ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
- format, the tablespace flags should equal table->flags. */
- ut_a(flags != DICT_TF_COMPACT);
-
- path = fil_make_ibd_name(tablename, is_temp);
-
- file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
- if (ret == FALSE) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error creating file ", stderr);
- ut_print_filename(stderr, path);
- fputs(".\n", stderr);
-
- /* The following call will print an error message */
-
- err = os_file_get_last_error(TRUE);
-
- if (err == OS_FILE_ALREADY_EXISTS) {
- fputs("InnoDB: The file already exists though"
- " the corresponding table did not\n"
- "InnoDB: exist in the InnoDB data dictionary."
- " Have you moved InnoDB\n"
- "InnoDB: .ibd files around without using the"
- " SQL commands\n"
- "InnoDB: DISCARD TABLESPACE and"
- " IMPORT TABLESPACE, or did\n"
- "InnoDB: mysqld crash in the middle of"
- " CREATE TABLE? You can\n"
- "InnoDB: resolve the problem by"
- " removing the file ", stderr);
- ut_print_filename(stderr, path);
- fputs("\n"
- "InnoDB: under the 'datadir' of MySQL.\n",
- stderr);
-
- mem_free(path);
- return(DB_TABLESPACE_ALREADY_EXISTS);
- }
-
- if (err == OS_FILE_DISK_FULL) {
-
- mem_free(path);
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- mem_free(path);
- return(DB_ERROR);
- }
-
- buf2 = ut_malloc(3 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
-
- if (!ret) {
- ut_free(buf2);
- os_file_close(file);
- os_file_delete(path);
-
- mem_free(path);
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- if (*space_id == 0) {
- *space_id = fil_assign_new_space_id();
- }
-
- /* printf("Creating tablespace %s id %lu\n", path, *space_id); */
-
- if (*space_id == ULINT_UNDEFINED) {
- ut_free(buf2);
-error_exit:
- os_file_close(file);
-error_exit2:
- os_file_delete(path);
-
- mem_free(path);
- return(DB_ERROR);
- }
-
- /* We have to write the space id to the file immediately and flush the
- file to disk. This is because in crash recovery we must be aware what
- tablespaces exist and what are their space id's, so that we can apply
- the log records to the right file. It may take quite a while until
- buffer pool flush algorithms write anything to the file and flush it to
- disk. If we would not write here anything, the file would be filled
- with zeros from the call of os_file_set_size(), until a buffer pool
- flush would write to it. */
-
- memset(page, '\0', UNIV_PAGE_SIZE);
-
- fsp_header_init_fields(page, *space_id, flags);
- mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, *space_id);
-
- if (!(flags & DICT_TF_ZSSIZE_MASK)) {
- buf_flush_init_for_writing(page, NULL, 0);
- ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
- } else {
- page_zip_des_t page_zip;
- ulint zip_size;
-
- zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
- << ((flags & DICT_TF_ZSSIZE_MASK)
- >> DICT_TF_ZSSIZE_SHIFT));
-
- page_zip_set_size(&page_zip, zip_size);
- page_zip.data = page + UNIV_PAGE_SIZE;
-#ifdef UNIV_DEBUG
- page_zip.m_start =
-#endif /* UNIV_DEBUG */
- page_zip.m_end = page_zip.m_nonempty =
- page_zip.n_blobs = 0;
- buf_flush_init_for_writing(page, &page_zip, 0);
- ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size);
- }
-
- ut_free(buf2);
-
- if (!ret) {
- fputs("InnoDB: Error: could not write the first page"
- " to tablespace ", stderr);
- ut_print_filename(stderr, path);
- putc('\n', stderr);
- goto error_exit;
- }
-
- ret = os_file_flush(file);
-
- if (!ret) {
- fputs("InnoDB: Error: file flush of tablespace ", stderr);
- ut_print_filename(stderr, path);
- fputs(" failed\n", stderr);
- goto error_exit;
- }
-
- os_file_close(file);
-
- if (*space_id == ULINT_UNDEFINED) {
- goto error_exit2;
- }
-
- success = fil_space_create(path, *space_id, flags, FIL_TABLESPACE);
-
- if (!success) {
- goto error_exit2;
- }
-
- fil_node_create(path, size, *space_id, FALSE);
-
-#ifndef UNIV_HOTBACKUP
- {
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- fil_op_write_log(flags
- ? MLOG_FILE_CREATE2
- : MLOG_FILE_CREATE,
- *space_id,
- is_temp ? MLOG_FILE_FLAG_TEMP : 0,
- flags,
- tablename, NULL, &mtr);
-
- mtr_commit(&mtr);
- }
-#endif
- mem_free(path);
- return(DB_SUCCESS);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
- const char* name, /*!< in: table name in the
- databasename/tablename format */
- ib_uint64_t current_lsn) /*!< in: reset lsn's if the lsn stamped
- to FIL_PAGE_FILE_FLUSH_LSN in the
- first page is too high */
-{
- os_file_t file;
- char* filepath;
- byte* page;
- byte* buf2;
- ib_uint64_t flush_lsn;
- ulint space_id;
- ib_int64_t file_size;
- ib_int64_t offset;
- ulint zip_size;
- ibool success;
-
- filepath = fil_make_ibd_name(name, FALSE);
-
- file = os_file_create_simple_no_error_handling(
- filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: trying to open a table,"
- " but could not\n"
- "InnoDB: open the tablespace file ", stderr);
- ut_print_filename(stderr, filepath);
- fputs("!\n", stderr);
- mem_free(filepath);
-
- return(FALSE);
- }
-
- /* Read the first page of the tablespace */
-
- buf2 = ut_malloc(3 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
-
- /* We have to read the file flush lsn from the header of the file */
-
- flush_lsn = mach_read_ull(page + FIL_PAGE_FILE_FLUSH_LSN);
-
- if (current_lsn >= flush_lsn) {
- /* Ok */
- success = TRUE;
-
- goto func_exit;
- }
-
- space_id = fsp_header_get_space_id(page);
- zip_size = fsp_header_get_zip_size(page);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Flush lsn in the tablespace file %lu"
- " to be imported\n"
- "InnoDB: is %llu, which exceeds current"
- " system lsn %llu.\n"
- "InnoDB: We reset the lsn's in the file ",
- (ulong) space_id,
- flush_lsn, current_lsn);
- ut_print_filename(stderr, filepath);
- fputs(".\n", stderr);
-
- ut_a(ut_is_2pow(zip_size));
- ut_a(zip_size <= UNIV_PAGE_SIZE);
-
- /* Loop through all the pages in the tablespace and reset the lsn and
- the page checksum if necessary */
-
- file_size = os_file_get_size_as_iblonglong(file);
-
- for (offset = 0; offset < file_size;
- offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
- success = os_file_read(file, page,
- (ulint)(offset & 0xFFFFFFFFUL),
- (ulint)(offset >> 32),
- zip_size ? zip_size : UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
- if (mach_read_ull(page + FIL_PAGE_LSN) > current_lsn) {
- /* We have to reset the lsn */
-
- if (zip_size) {
- memcpy(page + UNIV_PAGE_SIZE, page, zip_size);
- buf_flush_init_for_writing(
- page, page + UNIV_PAGE_SIZE,
- current_lsn);
- } else {
- buf_flush_init_for_writing(
- page, NULL, current_lsn);
- }
- success = os_file_write(filepath, file, page,
- (ulint)(offset & 0xFFFFFFFFUL),
- (ulint)(offset >> 32),
- zip_size
- ? zip_size
- : UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
- }
- }
-
- success = os_file_flush(file);
- if (!success) {
-
- goto func_exit;
- }
-
- /* We now update the flush_lsn stamp at the start of the file */
- success = os_file_read(file, page, 0, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
-
- mach_write_ull(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
-
- success = os_file_write(filepath, file, page, 0, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
- success = os_file_flush(file);
-func_exit:
- os_file_close(file);
- ut_free(buf2);
- mem_free(filepath);
-
- return(success);
-}
-
-/********************************************************************//**
-Tries to open a single-table tablespace and optionally checks the space id is
-right in it. If does not succeed, prints an error message to the .err log. This
-function is used to open a tablespace when we start up mysqld, and also in
-IMPORT TABLESPACE.
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_open_single_table_tablespace(
-/*=============================*/
- ibool check_space_id, /*!< in: should we check that the space
- id in the file is right; we assume
- that this function runs much faster
- if no check is made, since accessing
- the file inode probably is much
- faster (the OS caches them) than
- accessing the first page of the file */
- ulint id, /*!< in: space id */
- ulint flags, /*!< in: tablespace flags */
- const char* name) /*!< in: table name in the
- databasename/tablename format */
-{
- os_file_t file;
- char* filepath;
- ibool success;
- byte* buf2;
- byte* page;
- ulint space_id;
- ulint space_flags;
-
- filepath = fil_make_ibd_name(name, FALSE);
-
- /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
- ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
- ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
- format, the tablespace flags should equal table->flags. */
- ut_a(flags != DICT_TF_COMPACT);
-
- file = os_file_create_simple_no_error_handling(
- filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: trying to open a table,"
- " but could not\n"
- "InnoDB: open the tablespace file ", stderr);
- ut_print_filename(stderr, filepath);
- fputs("!\n"
- "InnoDB: Have you moved InnoDB .ibd files around"
- " without using the\n"
- "InnoDB: commands DISCARD TABLESPACE and"
- " IMPORT TABLESPACE?\n"
- "InnoDB: It is also possible that this is"
- " a temporary table #sql...,\n"
- "InnoDB: and MySQL removed the .ibd file for this.\n"
- "InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
- "InnoDB: for how to resolve the issue.\n", stderr);
-
- mem_free(filepath);
-
- return(FALSE);
- }
-
- if (!check_space_id) {
- space_id = id;
-
- goto skip_check;
- }
-
- /* Read the first page of the tablespace */
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
-
- /* We have to read the tablespace id and flags from the file. */
-
- space_id = fsp_header_get_space_id(page);
- space_flags = fsp_header_get_flags(page);
-
- ut_free(buf2);
-
- if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: tablespace id and flags in file ",
- stderr);
- ut_print_filename(stderr, filepath);
- fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
- "InnoDB: data dictionary they are %lu and %lu.\n"
- "InnoDB: Have you moved InnoDB .ibd files"
- " around without using the\n"
- "InnoDB: commands DISCARD TABLESPACE and"
- " IMPORT TABLESPACE?\n"
- "InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
- "InnoDB: for how to resolve the issue.\n",
- (ulong) space_id, (ulong) space_flags,
- (ulong) id, (ulong) flags);
-
- success = FALSE;
-
- goto func_exit;
- }
-
-skip_check:
- success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
-
- if (!success) {
- goto func_exit;
- }
-
- /* We do not measure the size of the file, that is why we pass the 0
- below */
-
- fil_node_create(filepath, 0, space_id, FALSE);
-func_exit:
- os_file_close(file);
- mem_free(filepath);
-
- return(success);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Allocates a file name for an old version of a single-table tablespace.
-The string must be freed by caller with mem_free()!
-@return own: file name */
-static
-char*
-fil_make_ibbackup_old_name(
-/*=======================*/
- const char* name) /*!< in: original file name */
-{
- static const char suffix[] = "_ibbackup_old_vers_";
- ulint len = strlen(name);
- char* path = mem_alloc(len + (15 + sizeof suffix));
-
- memcpy(path, name, len);
- memcpy(path + len, suffix, (sizeof suffix) - 1);
- ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
- return(path);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Opens an .ibd file and adds the associated single-table tablespace to the
-InnoDB fil0fil.c data structures. */
-static
-void
-fil_load_single_table_tablespace(
-/*=============================*/
- const char* dbname, /*!< in: database name */
- const char* filename) /*!< in: file name (not a path),
- including the .ibd extension */
-{
- os_file_t file;
- char* filepath;
- ibool success;
- byte* buf2;
- byte* page;
- ulint space_id;
- ulint flags;
- ulint size_low;
- ulint size_high;
- ib_int64_t size;
-#ifdef UNIV_HOTBACKUP
- fil_space_t* space;
-#endif
- filepath = mem_alloc(strlen(dbname) + strlen(filename)
- + strlen(fil_path_to_mysql_datadir) + 3);
-
- sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
- filename);
- srv_normalize_path_for_win(filepath);
-#ifdef __WIN__
-# ifndef UNIV_HOTBACKUP
- /* If lower_case_table_names is 0 or 2, then MySQL allows database
- directory names with upper case letters. On Windows, all table and
- database names in InnoDB are internally always in lower case. Put the
- file path to lower case, so that we are consistent with InnoDB's
- internal data dictionary. */
-
- dict_casedn_str(filepath);
-# endif /* !UNIV_HOTBACKUP */
-#endif
- file = os_file_create_simple_no_error_handling(
- filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- fprintf(stderr,
- "InnoDB: Error: could not open single-table tablespace"
- " file\n"
- "InnoDB: %s!\n"
- "InnoDB: We do not continue the crash recovery,"
- " because the table may become\n"
- "InnoDB: corrupt if we cannot apply the log records"
- " in the InnoDB log to it.\n"
- "InnoDB: To fix the problem and start mysqld:\n"
- "InnoDB: 1) If there is a permission problem"
- " in the file and mysqld cannot\n"
- "InnoDB: open the file, you should"
- " modify the permissions.\n"
- "InnoDB: 2) If the table is not needed, or you can"
- " restore it from a backup,\n"
- "InnoDB: then you can remove the .ibd file,"
- " and InnoDB will do a normal\n"
- "InnoDB: crash recovery and ignore that table.\n"
- "InnoDB: 3) If the file system or the"
- " disk is broken, and you cannot remove\n"
- "InnoDB: the .ibd file, you can set"
- " innodb_force_recovery > 0 in my.cnf\n"
- "InnoDB: and force InnoDB to continue crash"
- " recovery here.\n", filepath);
-
- mem_free(filepath);
-
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery"
- " was set to %lu. Continuing crash recovery\n"
- "InnoDB: even though we cannot access"
- " the .ibd file of this table.\n",
- srv_force_recovery);
- return;
- }
-
- exit(1);
- }
-
- success = os_file_get_size(file, &size_low, &size_high);
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- fprintf(stderr,
- "InnoDB: Error: could not measure the size"
- " of single-table tablespace file\n"
- "InnoDB: %s!\n"
- "InnoDB: We do not continue crash recovery,"
- " because the table will become\n"
- "InnoDB: corrupt if we cannot apply the log records"
- " in the InnoDB log to it.\n"
- "InnoDB: To fix the problem and start mysqld:\n"
- "InnoDB: 1) If there is a permission problem"
- " in the file and mysqld cannot\n"
- "InnoDB: access the file, you should"
- " modify the permissions.\n"
- "InnoDB: 2) If the table is not needed,"
- " or you can restore it from a backup,\n"
- "InnoDB: then you can remove the .ibd file,"
- " and InnoDB will do a normal\n"
- "InnoDB: crash recovery and ignore that table.\n"
- "InnoDB: 3) If the file system or the disk is broken,"
- " and you cannot remove\n"
- "InnoDB: the .ibd file, you can set"
- " innodb_force_recovery > 0 in my.cnf\n"
- "InnoDB: and force InnoDB to continue"
- " crash recovery here.\n", filepath);
-
- os_file_close(file);
- mem_free(filepath);
-
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery"
- " was set to %lu. Continuing crash recovery\n"
- "InnoDB: even though we cannot access"
- " the .ibd file of this table.\n",
- srv_force_recovery);
- return;
- }
-
- exit(1);
- }
-
- /* TODO: What to do in other cases where we cannot access an .ibd
- file during a crash recovery? */
-
- /* Every .ibd file is created >= 4 pages in size. Smaller files
- cannot be ok. */
-
- size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low;
-#ifndef UNIV_HOTBACKUP
- if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: the size of single-table tablespace"
- " file %s\n"
- "InnoDB: is only %lu %lu, should be at least %lu!",
- filepath,
- (ulong) size_high,
- (ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
- os_file_close(file);
- mem_free(filepath);
-
- return;
- }
-#endif
- /* Read the first page of the tablespace if the size big enough */
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
-
- /* We have to read the tablespace id from the file */
-
- space_id = fsp_header_get_space_id(page);
- flags = fsp_header_get_flags(page);
- } else {
- space_id = ULINT_UNDEFINED;
- flags = 0;
- }
-
-#ifndef UNIV_HOTBACKUP
- if (space_id == ULINT_UNDEFINED || space_id == 0) {
- fprintf(stderr,
- "InnoDB: Error: tablespace id %lu in file %s"
- " is not sensible\n",
- (ulong) space_id,
- filepath);
- goto func_exit;
- }
-#else
- if (space_id == ULINT_UNDEFINED || space_id == 0) {
- char* new_path;
-
- fprintf(stderr,
- "InnoDB: Renaming tablespace %s of id %lu,\n"
- "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because its size %" PRId64 " is too small"
- " (< 4 pages 16 kB each),\n"
- "InnoDB: or the space id in the file header"
- " is not sensible.\n"
- "InnoDB: This can happen in an ibbackup run,"
- " and is not dangerous.\n",
- filepath, space_id, filepath, size);
- os_file_close(file);
-
- new_path = fil_make_ibbackup_old_name(filepath);
- ut_a(os_file_rename(filepath, new_path));
-
- ut_free(buf2);
- mem_free(filepath);
- mem_free(new_path);
-
- return;
- }
-
- /* A backup may contain the same space several times, if the space got
- renamed at a sensitive time. Since it is enough to have one version of
- the space, we rename the file if a space with the same space id
- already exists in the tablespace memory cache. We rather rename the
- file than delete it, because if there is a bug, we do not want to
- destroy valuable data. */
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(space_id);
-
- if (space) {
- char* new_path;
-
- fprintf(stderr,
- "InnoDB: Renaming tablespace %s of id %lu,\n"
- "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because space %s with the same id\n"
- "InnoDB: was scanned earlier. This can happen"
- " if you have renamed tables\n"
- "InnoDB: during an ibbackup run.\n",
- filepath, space_id, filepath,
- space->name);
- os_file_close(file);
-
- new_path = fil_make_ibbackup_old_name(filepath);
-
- mutex_exit(&fil_system->mutex);
-
- ut_a(os_file_rename(filepath, new_path));
-
- ut_free(buf2);
- mem_free(filepath);
- mem_free(new_path);
-
- return;
- }
- mutex_exit(&fil_system->mutex);
-#endif
- success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
-
- if (!success) {
-
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery"
- " was set to %lu. Continuing crash recovery\n"
- "InnoDB: even though the tablespace creation"
- " of this table failed.\n",
- srv_force_recovery);
- goto func_exit;
- }
-
- exit(1);
- }
-
- /* We do not use the size information we have about the file, because
- the rounding formula for extents and pages is somewhat complex; we
- let fil_node_open() do that task. */
-
- fil_node_create(filepath, 0, space_id, FALSE);
-func_exit:
- os_file_close(file);
- ut_free(buf2);
- mem_free(filepath);
-}
-
-/***********************************************************************//**
-A fault-tolerant function that tries to read the next file name in the
-directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
-idea is to read as much good data as we can and jump over bad data.
-@return 0 if ok, -1 if error even after the retries, 1 if at the end
-of the directory */
-static
-int
-fil_file_readdir_next_file(
-/*=======================*/
- ulint* err, /*!< out: this is set to DB_ERROR if an error
- was encountered, otherwise not changed */
- const char* dirname,/*!< in: directory name or path */
- os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
-{
- ulint i;
- int ret;
-
- for (i = 0; i < 100; i++) {
- ret = os_file_readdir_next_file(dirname, dir, info);
-
- if (ret != -1) {
-
- return(ret);
- }
-
- fprintf(stderr,
- "InnoDB: Error: os_file_readdir_next_file()"
- " returned -1 in\n"
- "InnoDB: directory %s\n"
- "InnoDB: Crash recovery may have failed"
- " for some .ibd files!\n", dirname);
-
- *err = DB_ERROR;
- }
-
- return(-1);
-}
-
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-fil_load_single_table_tablespaces(void)
-/*===================================*/
-{
- int ret;
- char* dbpath = NULL;
- ulint dbpath_len = 100;
- os_file_dir_t dir;
- os_file_dir_t dbdir;
- os_file_stat_t dbinfo;
- os_file_stat_t fileinfo;
- ulint err = DB_SUCCESS;
-
- /* The datadir of MySQL is always the default directory of mysqld */
-
- dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
-
- if (dir == NULL) {
-
- return(DB_ERROR);
- }
-
- dbpath = mem_alloc(dbpath_len);
-
- /* Scan all directories under the datadir. They are the database
- directories of MySQL. */
-
- ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
- &dbinfo);
- while (ret == 0) {
- ulint len;
- /* printf("Looking at %s in datadir\n", dbinfo.name); */
-
- if (dbinfo.type == OS_FILE_TYPE_FILE
- || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
-
- goto next_datadir_item;
- }
-
- /* We found a symlink or a directory; try opening it to see
- if a symlink is a directory */
-
- len = strlen(fil_path_to_mysql_datadir)
- + strlen (dbinfo.name) + 2;
- if (len > dbpath_len) {
- dbpath_len = len;
-
- if (dbpath) {
- mem_free(dbpath);
- }
-
- dbpath = mem_alloc(dbpath_len);
- }
- sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
- dbinfo.name);
- srv_normalize_path_for_win(dbpath);
-
- dbdir = os_file_opendir(dbpath, FALSE);
-
- if (dbdir != NULL) {
- /* printf("Opened dir %s\n", dbinfo.name); */
-
- /* We found a database directory; loop through it,
- looking for possible .ibd files in it */
-
- ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
- &fileinfo);
- while (ret == 0) {
- /* printf(
- " Looking at file %s\n", fileinfo.name); */
-
- if (fileinfo.type == OS_FILE_TYPE_DIR) {
-
- goto next_file_item;
- }
-
- /* We found a symlink or a file */
- if (strlen(fileinfo.name) > 4
- && 0 == strcmp(fileinfo.name
- + strlen(fileinfo.name) - 4,
- ".ibd")) {
- /* The name ends in .ibd; try opening
- the file */
- fil_load_single_table_tablespace(
- dbinfo.name, fileinfo.name);
- }
-next_file_item:
- ret = fil_file_readdir_next_file(&err,
- dbpath, dbdir,
- &fileinfo);
- }
-
- if (0 != os_file_closedir(dbdir)) {
- fputs("InnoDB: Warning: could not"
- " close database directory ", stderr);
- ut_print_filename(stderr, dbpath);
- putc('\n', stderr);
-
- err = DB_ERROR;
- }
- }
-
-next_datadir_item:
- ret = fil_file_readdir_next_file(&err,
- fil_path_to_mysql_datadir,
- dir, &dbinfo);
- }
-
- mem_free(dbpath);
-
- if (0 != os_file_closedir(dir)) {
- fprintf(stderr,
- "InnoDB: Error: could not close MySQL datadir\n");
-
- return(DB_ERROR);
- }
-
- return(err);
-}
-
-/********************************************************************//**
-If we need crash recovery, and we have called
-fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
-we can call this function to print an error message of orphaned .ibd files
-for which there is not a data dictionary entry with a matching table name
-and space id. */
-UNIV_INTERN
-void
-fil_print_orphaned_tablespaces(void)
-/*================================*/
-{
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- if (space->purpose == FIL_TABLESPACE && space->id != 0
- && !space->mark) {
- fputs("InnoDB: Warning: tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr, " of id %lu has no matching table in\n"
- "InnoDB: the InnoDB data dictionary.\n",
- (ulong) space->id);
- }
-
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return TRUE if does not exist or is being\ deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
- ulint id, /*!< in: space id */
- ib_int64_t version)/*!< in: tablespace_version should be this; if
- you pass -1 as the value of this, then this
- parameter is ignored */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL || space->is_being_deleted) {
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- if (version != ((ib_int64_t)-1)
- && space->tablespace_version != version) {
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- mutex_exit(&fil_system->mutex);
-
- return(space != NULL);
-}
-
-/*******************************************************************//**
-Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache.
-@return TRUE if a matching tablespace exists in the memory cache */
-UNIV_INTERN
-ibool
-fil_space_for_table_exists_in_mem(
-/*==============================*/
- ulint id, /*!< in: space id */
- const char* name, /*!< in: table name in the standard
- 'databasename/tablename' format or
- the dir path to a temp table */
- ibool is_temp, /*!< in: TRUE if created with CREATE
- TEMPORARY TABLE */
- ibool mark_space, /*!< in: in crash recovery, at database
- startup we mark all spaces which have
- an associated table in the InnoDB
- data dictionary, so that
- we can print a warning about orphaned
- tablespaces */
- ibool print_error_if_does_not_exist)
- /*!< in: print detailed error
- information to the .err log if a
- matching tablespace is not found from
- memory */
-{
- fil_space_t* namespace;
- fil_space_t* space;
- char* path;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- path = fil_make_ibd_name(name, is_temp);
-
- /* Look if there is a space with the same id */
-
- space = fil_space_get_by_id(id);
-
- /* Look if there is a space with the same name; the name is the
- directory path from the datadir to the file */
-
- namespace = fil_space_get_by_name(path);
- if (space && space == namespace) {
- /* Found */
-
- if (mark_space) {
- space->mark = TRUE;
- }
-
- mem_free(path);
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- if (!print_error_if_does_not_exist) {
-
- mem_free(path);
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- if (space == NULL) {
- if (namespace == NULL) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has tablespace id %lu,\n"
- "InnoDB: but tablespace with that id"
- " or name does not exist. Have\n"
- "InnoDB: you deleted or moved .ibd files?\n"
- "InnoDB: This may also be a table created with"
- " CREATE TEMPORARY TABLE\n"
- "InnoDB: whose .ibd and .frm files"
- " MySQL automatically removed, but the\n"
- "InnoDB: table still exists in the"
- " InnoDB internal data dictionary.\n",
- (ulong) id);
- } else {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary has"
- " tablespace id %lu,\n"
- "InnoDB: but a tablespace with that id"
- " does not exist. There is\n"
- "InnoDB: a tablespace of name %s and id %lu,"
- " though. Have\n"
- "InnoDB: you deleted or moved .ibd files?\n",
- (ulong) id, namespace->name,
- (ulong) namespace->id);
- }
-error_exit:
- fputs("InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
- "InnoDB: for how to resolve the issue.\n", stderr);
-
- mem_free(path);
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- if (0 != strcmp(space->name, path)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary has"
- " tablespace id %lu,\n"
- "InnoDB: but the tablespace with that id"
- " has name %s.\n"
- "InnoDB: Have you deleted or moved .ibd files?\n",
- (ulong) id, space->name);
-
- if (namespace != NULL) {
- fputs("InnoDB: There is a tablespace"
- " with the right name\n"
- "InnoDB: ", stderr);
- ut_print_filename(stderr, namespace->name);
- fprintf(stderr, ", but its id is %lu.\n",
- (ulong) namespace->id);
- }
-
- goto error_exit;
- }
-
- mem_free(path);
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return space id, ULINT_UNDEFINED if not found */
-static
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
- const char* name) /*!< in: table name in the standard
- 'databasename/tablename' format */
-{
- fil_space_t* namespace;
- ulint id = ULINT_UNDEFINED;
- char* path;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- path = fil_make_ibd_name(name, FALSE);
-
- /* Look if there is a space with the same name; the name is the
- directory path to the file */
-
- namespace = fil_space_get_by_name(path);
-
- if (namespace) {
- id = namespace->id;
- }
-
- mem_free(path);
-
- mutex_exit(&fil_system->mutex);
-
- return(id);
-}
-
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
- ulint* actual_size, /*!< out: size of the space after extension;
- if we ran out of disk space this may be lower
- than the desired size */
- ulint space_id, /*!< in: space id */
- ulint size_after_extend)/*!< in: desired size in pages after the
- extension; if the current space size is bigger
- than this already, the function does nothing */
-{
- fil_node_t* node;
- fil_space_t* space;
- byte* buf2;
- byte* buf;
- ulint buf_size;
- ulint start_page_no;
- ulint file_start_page_no;
- ulint offset_high;
- ulint offset_low;
- ulint page_size;
- ibool success = TRUE;
-
- fil_mutex_enter_and_prepare_for_io(space_id);
-
- space = fil_space_get_by_id(space_id);
- ut_a(space);
-
- if (space->size >= size_after_extend) {
- /* Space already big enough */
-
- *actual_size = space->size;
-
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- page_size = dict_table_flags_to_zip_size(space->flags);
- if (!page_size) {
- page_size = UNIV_PAGE_SIZE;
- }
-
- node = UT_LIST_GET_LAST(space->chain);
-
- fil_node_prepare_for_io(node, fil_system, space);
-
- start_page_no = space->size;
- file_start_page_no = space->size - node->size;
-
- /* Extend at most 64 pages at a time */
- buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
- buf2 = mem_alloc(buf_size + page_size);
- buf = ut_align(buf2, page_size);
-
- memset(buf, 0, buf_size);
-
- while (start_page_no < size_after_extend) {
- ulint n_pages = ut_min(buf_size / page_size,
- size_after_extend - start_page_no);
-
- offset_high = (start_page_no - file_start_page_no)
- / (4096 * ((1024 * 1024) / page_size));
- offset_low = ((start_page_no - file_start_page_no)
- % (4096 * ((1024 * 1024) / page_size)))
- * page_size;
-#ifdef UNIV_HOTBACKUP
- success = os_file_write(node->name, node->handle, buf,
- offset_low, offset_high,
- page_size * n_pages);
-#else
- success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
- node->name, node->handle, buf,
- offset_low, offset_high,
- page_size * n_pages,
- NULL, NULL);
-#endif
- if (success) {
- node->size += n_pages;
- space->size += n_pages;
-
- os_has_said_disk_full = FALSE;
- } else {
- /* Let us measure the size of the file to determine
- how much we were able to extend it */
-
- n_pages = ((ulint)
- (os_file_get_size_as_iblonglong(
- node->handle)
- / page_size)) - node->size;
-
- node->size += n_pages;
- space->size += n_pages;
-
- break;
- }
-
- start_page_no += n_pages;
- }
-
- mem_free(buf2);
-
- fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
-
- *actual_size = space->size;
-
-#ifndef UNIV_HOTBACKUP
- if (space_id == 0) {
- ulint pages_per_mb = (1024 * 1024) / page_size;
-
- /* Keep the last data file size info up to date, rounded to
- full megabytes */
-
- srv_data_file_sizes[srv_n_data_files - 1]
- = (node->size / pages_per_mb) * pages_per_mb;
- }
-#endif /* !UNIV_HOTBACKUP */
-
- /*
- printf("Extended %s to %lu, actual size %lu pages\n", space->name,
- size_after_extend, *actual_size); */
- mutex_exit(&fil_system->mutex);
-
- fil_flush(space_id);
-
- return(success);
-}
-
-#ifdef UNIV_HOTBACKUP
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-ibbackup --apply-log phase we extended the spaces on-demand so that log records
-could be applied, but that may have left spaces still too small compared to
-the size stored in the space header. */
-UNIV_INTERN
-void
-fil_extend_tablespaces_to_stored_len(void)
-/*======================================*/
-{
- fil_space_t* space;
- byte* buf;
- ulint actual_size;
- ulint size_in_header;
- ulint error;
- ibool success;
-
- buf = mem_alloc(UNIV_PAGE_SIZE);
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- ut_a(space->purpose == FIL_TABLESPACE);
-
- mutex_exit(&fil_system->mutex); /* no need to protect with a
- mutex, because this is a
- single-threaded operation */
- error = fil_read(TRUE, space->id,
- dict_table_flags_to_zip_size(space->flags),
- 0, 0, UNIV_PAGE_SIZE, buf, NULL);
- ut_a(error == DB_SUCCESS);
-
- size_in_header = fsp_get_size_low(buf);
-
- success = fil_extend_space_to_desired_size(
- &actual_size, space->id, size_in_header);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Error: could not extend the"
- " tablespace of %s\n"
- "InnoDB: to the size stored in header,"
- " %lu pages;\n"
- "InnoDB: size after extension %lu pages\n"
- "InnoDB: Check that you have free disk space"
- " and retry!\n",
- space->name, size_in_header, actual_size);
- exit(1);
- }
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&fil_system->mutex);
-
- mem_free(buf);
-}
-#endif
-
-/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
-
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return TRUE if succeed */
-UNIV_INTERN
-ibool
-fil_space_reserve_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_free_now, /*!< in: number of free extents now */
- ulint n_to_reserve) /*!< in: how many one wants to reserve */
-{
- fil_space_t* space;
- ibool success;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- if (space->n_reserved_extents + n_to_reserve > n_free_now) {
- success = FALSE;
- } else {
- space->n_reserved_extents += n_to_reserve;
- success = TRUE;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(success);
-}
-
-/*******************************************************************//**
-Releases free extents in a file space. */
-UNIV_INTERN
-void
-fil_space_release_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_reserved) /*!< in: how many one reserved */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
- ut_a(space->n_reserved_extents >= n_reserved);
-
- space->n_reserved_extents -= n_reserved;
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-UNIV_INTERN
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- ulint n;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- n = space->n_reserved_extents;
-
- mutex_exit(&fil_system->mutex);
-
- return(n);
-}
-
-/*============================ FILE I/O ================================*/
-
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex. */
-static
-void
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space) /*!< in: space */
-{
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
-
- if (system->n_open > system->max_n_open + 5) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: open files %lu"
- " exceeds the limit %lu\n",
- (ulong) system->n_open,
- (ulong) system->max_n_open);
- }
-
- if (node->open == FALSE) {
- /* File is closed: open it */
- ut_a(node->n_pending == 0);
-
- fil_node_open_file(node, system, space);
- }
-
- if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
- && space->id != 0) {
- /* The node is in the LRU list, remove it */
-
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
- UT_LIST_REMOVE(LRU, system->LRU, node);
- }
-
- node->n_pending++;
-}
-
-/********************************************************************//**
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
-static
-void
-fil_node_complete_io(
-/*=================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
- the node as modified if
- type == OS_FILE_WRITE */
-{
- ut_ad(node);
- ut_ad(system);
- ut_ad(mutex_own(&(system->mutex)));
-
- ut_a(node->n_pending > 0);
-
- node->n_pending--;
-
- if (type == OS_FILE_WRITE) {
- system->modification_counter++;
- node->modification_counter = system->modification_counter;
-
- if (!node->space->is_in_unflushed_spaces) {
-
- node->space->is_in_unflushed_spaces = TRUE;
- UT_LIST_ADD_FIRST(unflushed_spaces,
- system->unflushed_spaces,
- node->space);
- }
- }
-
- if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
- && node->space->id != 0) {
- /* The node must be put back to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
- }
-}
-
-/********************************************************************//**
-Report information about an invalid page access. */
-static
-void
-fil_report_invalid_page_access(
-/*===========================*/
- ulint block_offset, /*!< in: block offset */
- ulint space_id, /*!< in: space id */
- const char* space_name, /*!< in: space name */
- ulint byte_offset, /*!< in: byte offset */
- ulint len, /*!< in: I/O length */
- ulint type) /*!< in: I/O type */
-{
- fprintf(stderr,
- "InnoDB: Error: trying to access page number %lu"
- " in space %lu,\n"
- "InnoDB: space name %s,\n"
- "InnoDB: which is outside the tablespace bounds.\n"
- "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
- "InnoDB: If you get this error at mysqld startup,"
- " please check that\n"
- "InnoDB: your my.cnf matches the ibdata files"
- " that you have in the\n"
- "InnoDB: MySQL server.\n",
- (ulong) block_offset, (ulong) space_id, space_name,
- (ulong) byte_offset, (ulong) len, (ulong) type);
-}
-
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
-ulint
-fil_io(
-/*===*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
- void* buf, /*!< in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message) /*!< in: message for aio handler if non-sync
- aio used, else ignored */
-{
- ulint mode;
- fil_space_t* space;
- fil_node_t* node;
- ulint offset_high;
- ulint offset_low;
- ibool ret;
- ulint is_log;
- ulint wake_later;
-
- is_log = type & OS_FILE_LOG;
- type = type & ~OS_FILE_LOG;
-
- wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
- type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
-
- ut_ad(byte_offset < UNIV_PAGE_SIZE);
- ut_ad(!zip_size || !byte_offset);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(buf);
- ut_ad(len > 0);
-#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
-# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
-#endif
- ut_ad(fil_validate());
-#ifndef UNIV_HOTBACKUP
-# ifndef UNIV_LOG_DEBUG
- /* ibuf bitmap pages must be read in the sync aio mode: */
- ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
- || !ibuf_bitmap_page(zip_size, block_offset)
- || sync || is_log);
- ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
- || ibuf_page(space_id, zip_size, block_offset, NULL));
-# endif /* UNIV_LOG_DEBUG */
- if (sync) {
- mode = OS_AIO_SYNC;
- } else if (is_log) {
- mode = OS_AIO_LOG;
- } else if (type == OS_FILE_READ
- && !recv_no_ibuf_operations
- && ibuf_page(space_id, zip_size, block_offset, NULL)) {
- mode = OS_AIO_IBUF;
- } else {
- mode = OS_AIO_NORMAL;
- }
-#else /* !UNIV_HOTBACKUP */
- ut_a(sync);
- mode = OS_AIO_SYNC;
-#endif /* !UNIV_HOTBACKUP */
-
- if (type == OS_FILE_READ) {
- srv_data_read+= len;
- } else if (type == OS_FILE_WRITE) {
- srv_data_written+= len;
- }
-
- /* Reserve the fil_system mutex and make sure that we can open at
- least one file while holding it, if the file is not already open */
-
- fil_mutex_enter_and_prepare_for_io(space_id);
-
- space = fil_space_get_by_id(space_id);
-
- if (!space) {
- mutex_exit(&fil_system->mutex);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to do i/o"
- " to a tablespace which does not exist.\n"
- "InnoDB: i/o type %lu, space id %lu,"
- " page no. %lu, i/o length %lu bytes\n",
- (ulong) type, (ulong) space_id, (ulong) block_offset,
- (ulong) len);
-
- return(DB_TABLESPACE_DELETED);
- }
-
- ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- for (;;) {
- if (UNIV_UNLIKELY(node == NULL)) {
- fil_report_invalid_page_access(
- block_offset, space_id, space->name,
- byte_offset, len, type);
-
- ut_error;
- }
-
- if (space->id != 0 && node->size == 0) {
- /* We do not know the size of a single-table tablespace
- before we open the file */
-
- break;
- }
-
- if (node->size > block_offset) {
- /* Found! */
- break;
- } else {
- block_offset -= node->size;
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
-
- /* Open file if closed */
- fil_node_prepare_for_io(node, fil_system, space);
-
- /* Check that at least the start offset is within the bounds of a
- single-table tablespace */
- if (UNIV_UNLIKELY(node->size <= block_offset)
- && space->id != 0 && space->purpose == FIL_TABLESPACE) {
-
- fil_report_invalid_page_access(
- block_offset, space_id, space->name, byte_offset,
- len, type);
-
- ut_error;
- }
-
- /* Now we have made the changes in the data structures of fil_system */
- mutex_exit(&fil_system->mutex);
-
- /* Calculate the low 32 bits and the high 32 bits of the file offset */
-
- if (!zip_size) {
- offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
- offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT)
- & 0xFFFFFFFFUL) + byte_offset;
-
- ut_a(node->size - block_offset
- >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
- / UNIV_PAGE_SIZE));
- } else {
- ulint zip_size_shift;
- switch (zip_size) {
- case 1024: zip_size_shift = 10; break;
- case 2048: zip_size_shift = 11; break;
- case 4096: zip_size_shift = 12; break;
- case 8192: zip_size_shift = 13; break;
- case 16384: zip_size_shift = 14; break;
- default: ut_error;
- }
- offset_high = block_offset >> (32 - zip_size_shift);
- offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL)
- + byte_offset;
- ut_a(node->size - block_offset
- >= (len + (zip_size - 1)) / zip_size);
- }
-
- /* Do aio */
-
- ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
-
-#ifdef UNIV_HOTBACKUP
- /* In ibbackup do normal i/o, not aio */
- if (type == OS_FILE_READ) {
- ret = os_file_read(node->handle, buf, offset_low, offset_high,
- len);
- } else {
- ret = os_file_write(node->name, node->handle, buf,
- offset_low, offset_high, len);
- }
-#else
- /* Queue the aio request */
- ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
- offset_low, offset_high, len, node, message);
-#endif
- ut_a(ret);
-
- if (mode == OS_AIO_SYNC) {
- /* The i/o operation is already completed when we return from
- os_aio: */
-
- mutex_enter(&fil_system->mutex);
-
- fil_node_complete_io(node, fil_system, type);
-
- mutex_exit(&fil_system->mutex);
-
- ut_ad(fil_validate());
- }
-
- return(DB_SUCCESS);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Waits for an aio operation to complete. This function is used to write the
-handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.c for more info). The thread specifies which
-segment it wants to wait for. */
-UNIV_INTERN
-void
-fil_aio_wait(
-/*=========*/
- ulint segment) /*!< in: the number of the segment in the aio
- array to wait for */
-{
- ibool ret;
- fil_node_t* fil_node;
- void* message;
- ulint type;
-
- ut_ad(fil_validate());
-
- if (os_aio_use_native_aio) {
- srv_set_io_thread_op_info(segment, "native aio handle");
-#ifdef WIN_ASYNC_IO
- ret = os_aio_windows_handle(segment, 0, &fil_node,
- &message, &type);
-#else
- ret = 0; /* Eliminate compiler warning */
- ut_error;
-#endif
- } else {
- srv_set_io_thread_op_info(segment, "simulated aio handle");
-
- ret = os_aio_simulated_handle(segment, &fil_node,
- &message, &type);
- }
-
- ut_a(ret);
-
- srv_set_io_thread_op_info(segment, "complete io for fil node");
-
- mutex_enter(&fil_system->mutex);
-
- fil_node_complete_io(fil_node, fil_system, type);
-
- mutex_exit(&fil_system->mutex);
-
- ut_ad(fil_validate());
-
- /* Do the i/o handling */
- /* IMPORTANT: since i/o handling for reads will read also the insert
- buffer in tablespace 0, you have to be very careful not to introduce
- deadlocks in the i/o system. We keep tablespace 0 data files always
- open, and use a special i/o thread to serve insert buffer requests. */
-
- if (fil_node->space->purpose == FIL_TABLESPACE) {
- srv_set_io_thread_op_info(segment, "complete io for buf page");
- buf_page_io_complete(message);
- } else {
- srv_set_io_thread_op_info(segment, "complete io for log");
- log_io_complete(message);
- }
-}
-#endif /* UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-UNIV_INTERN
-void
-fil_flush(
-/*======*/
- ulint space_id) /*!< in: file space id (this can be a group of
- log files or a tablespace of the database) */
-{
- fil_space_t* space;
- fil_node_t* node;
- os_file_t file;
- ib_int64_t old_mod_counter;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(space_id);
-
- if (!space || space->is_being_deleted) {
- mutex_exit(&fil_system->mutex);
-
- return;
- }
-
- space->n_pending_flushes++; /*!< prevent dropping of the space while
- we are flushing */
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node) {
- if (node->modification_counter > node->flush_counter) {
- ut_a(node->open);
-
- /* We want to flush the changes at least up to
- old_mod_counter */
- old_mod_counter = node->modification_counter;
-
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes++;
- } else {
- fil_n_pending_log_flushes++;
- fil_n_log_flushes++;
- }
-#ifdef __WIN__
- if (node->is_raw_disk) {
-
- goto skip_flush;
- }
-#endif
-retry:
- if (node->n_pending_flushes > 0) {
- /* We want to avoid calling os_file_flush() on
- the file twice at the same time, because we do
- not know what bugs OS's may contain in file
- i/o; sleep for a while */
-
- mutex_exit(&fil_system->mutex);
-
- os_thread_sleep(20000);
-
- mutex_enter(&fil_system->mutex);
-
- if (node->flush_counter >= old_mod_counter) {
-
- goto skip_flush;
- }
-
- goto retry;
- }
-
- ut_a(node->open);
- file = node->handle;
- node->n_pending_flushes++;
-
- mutex_exit(&fil_system->mutex);
-
- /* fprintf(stderr, "Flushing to file %s\n",
- node->name); */
-
- os_file_flush(file);
-
- mutex_enter(&fil_system->mutex);
-
- node->n_pending_flushes--;
-skip_flush:
- if (node->flush_counter < old_mod_counter) {
- node->flush_counter = old_mod_counter;
-
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- space->is_in_unflushed_spaces = FALSE;
-
- UT_LIST_REMOVE(
- unflushed_spaces,
- fil_system->unflushed_spaces,
- space);
- }
- }
-
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes--;
- } else {
- fil_n_pending_log_flushes--;
- }
- }
-
- node = UT_LIST_GET_NEXT(chain, node);
- }
-
- space->n_pending_flushes--;
-
- mutex_exit(&fil_system->mutex);
-}
-
-/**********************************************************************//**
-Flushes to disk the writes in file spaces of the given type possibly cached by
-the OS. */
-UNIV_INTERN
-void
-fil_flush_file_spaces(
-/*==================*/
- ulint purpose) /*!< in: FIL_TABLESPACE, FIL_LOG */
-{
- fil_space_t* space;
- ulint* space_ids;
- ulint n_space_ids;
- ulint i;
-
- mutex_enter(&fil_system->mutex);
-
- n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
- if (n_space_ids == 0) {
-
- mutex_exit(&fil_system->mutex);
- return;
- }
-
- /* Assemble a list of space ids to flush. Previously, we
- traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
- on a space that was just removed from the list by fil_flush().
- Thus, the space could be dropped and the memory overwritten. */
- space_ids = mem_alloc(n_space_ids * sizeof *space_ids);
-
- n_space_ids = 0;
-
- for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
- space;
- space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
-
- if (space->purpose == purpose && !space->is_being_deleted) {
-
- space_ids[n_space_ids++] = space->id;
- }
- }
-
- mutex_exit(&fil_system->mutex);
-
- /* Flush the spaces. It will not hurt to call fil_flush() on
- a non-existing space id. */
- for (i = 0; i < n_space_ids; i++) {
-
- fil_flush(space_ids[i]);
- }
-
- mem_free(space_ids);
-}
-
-/******************************************************************//**
-Checks the consistency of the tablespace cache.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fil_validate(void)
-/*==============*/
-{
- fil_space_t* space;
- fil_node_t* fil_node;
- ulint n_open = 0;
- ulint i;
-
- mutex_enter(&fil_system->mutex);
-
- /* Look for spaces in the hash table */
-
- for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
-
- space = HASH_GET_FIRST(fil_system->spaces, i);
-
- while (space != NULL) {
- UT_LIST_VALIDATE(chain, fil_node_t, space->chain,
- ut_a(ut_list_node_313->open
- || !ut_list_node_313->n_pending));
-
- fil_node = UT_LIST_GET_FIRST(space->chain);
-
- while (fil_node != NULL) {
- if (fil_node->n_pending > 0) {
- ut_a(fil_node->open);
- }
-
- if (fil_node->open) {
- n_open++;
- }
- fil_node = UT_LIST_GET_NEXT(chain, fil_node);
- }
- space = HASH_GET_NEXT(hash, space);
- }
- }
-
- ut_a(fil_system->n_open == n_open);
-
- UT_LIST_VALIDATE(LRU, fil_node_t, fil_system->LRU, (void) 0);
-
- fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
-
- while (fil_node != NULL) {
- ut_a(fil_node->n_pending == 0);
- ut_a(fil_node->open);
- ut_a(fil_node->space->purpose == FIL_TABLESPACE);
- ut_a(fil_node->space->id != 0);
-
- fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Returns TRUE if file address is undefined.
-@return TRUE if undefined */
-UNIV_INTERN
-ibool
-fil_addr_is_null(
-/*=============*/
- fil_addr_t addr) /*!< in: address */
-{
- return(addr.page == FIL_NULL);
-}
-
-/********************************************************************//**
-Get the predecessor of a file page.
-@return FIL_PAGE_PREV */
-UNIV_INTERN
-ulint
-fil_page_get_prev(
-/*==============*/
- const byte* page) /*!< in: file page */
-{
- return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-/********************************************************************//**
-Get the successor of a file page.
-@return FIL_PAGE_NEXT */
-UNIV_INTERN
-ulint
-fil_page_get_next(
-/*==============*/
- const byte* page) /*!< in: file page */
-{
- return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/*********************************************************************//**
-Sets the file page type. */
-UNIV_INTERN
-void
-fil_page_set_type(
-/*==============*/
- byte* page, /*!< in/out: file page */
- ulint type) /*!< in: type */
-{
- ut_ad(page);
-
- mach_write_to_2(page + FIL_PAGE_TYPE, type);
-}
-
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
-ulint
-fil_page_get_type(
-/*==============*/
- const byte* page) /*!< in: file page */
-{
- ut_ad(page);
-
- return(mach_read_from_2(page + FIL_PAGE_TYPE));
-}
-
-/********************************************************************
-Initializes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_close(void)
-/*===========*/
-{
- /* The mutex should already have been freed. */
- ut_ad(fil_system->mutex.magic_n == 0);
-
- hash_table_free(fil_system->spaces);
-
- hash_table_free(fil_system->name_hash);
-
- ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
- ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
- ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
-
- mem_free(fil_system);
-
- fil_system = NULL;
-}
diff --git a/storage/innodb_plugin/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c
deleted file mode 100644
index 3cc4318fc06..00000000000
--- a/storage/innodb_plugin/fsp/fsp0fsp.c
+++ /dev/null
@@ -1,4310 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fsp/fsp0fsp.c
-File space management
-
-Created 11/29/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fsp0fsp.h"
-
-#ifdef UNIV_NONINL
-#include "fsp0fsp.ic"
-#endif
-
-#include "buf0buf.h"
-#include "fil0fil.h"
-#include "mtr0log.h"
-#include "ut0byte.h"
-#include "page0page.h"
-#include "page0zip.h"
-#ifdef UNIV_HOTBACKUP
-# include "fut0lst.h"
-#else /* UNIV_HOTBACKUP */
-# include "sync0sync.h"
-# include "fut0fut.h"
-# include "srv0srv.h"
-# include "ibuf0ibuf.h"
-# include "btr0btr.h"
-# include "btr0sea.h"
-# include "dict0boot.h"
-# include "log0log.h"
-#endif /* UNIV_HOTBACKUP */
-#include "dict0mem.h"
-
-
-#define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header
- within a file page */
-
-/* The data structures in files are defined just as byte strings in C */
-typedef byte fsp_header_t;
-typedef byte xdes_t;
-
-/* SPACE HEADER
- ============
-
-File space header data structure: this data structure is contained in the
-first page of a space. The space for this header is reserved in every extent
-descriptor page, but used only in the first. */
-
-/*-------------------------------------*/
-#define FSP_SPACE_ID 0 /* space id */
-#define FSP_NOT_USED 4 /* this field contained a value up to
- which we know that the modifications
- in the database have been flushed to
- the file space; not used now */
-#define FSP_SIZE 8 /* Current size of the space in
- pages */
-#define FSP_FREE_LIMIT 12 /* Minimum page number for which the
- free list has not been initialized:
- the pages >= this limit are, by
- definition, free; note that in a
- single-table tablespace where size
- < 64 pages, this number is 64, i.e.,
- we have initialized the space
- about the first extent, but have not
- physically allocted those pages to the
- file */
-#define FSP_SPACE_FLAGS 16 /* table->flags & ~DICT_TF_COMPACT */
-#define FSP_FRAG_N_USED 20 /* number of used pages in the
- FSP_FREE_FRAG list */
-#define FSP_FREE 24 /* list of free extents */
-#define FSP_FREE_FRAG (24 + FLST_BASE_NODE_SIZE)
- /* list of partially free extents not
- belonging to any segment */
-#define FSP_FULL_FRAG (24 + 2 * FLST_BASE_NODE_SIZE)
- /* list of full extents not belonging
- to any segment */
-#define FSP_SEG_ID (24 + 3 * FLST_BASE_NODE_SIZE)
- /* 8 bytes which give the first unused
- segment id */
-#define FSP_SEG_INODES_FULL (32 + 3 * FLST_BASE_NODE_SIZE)
- /* list of pages containing segment
- headers, where all the segment inode
- slots are reserved */
-#define FSP_SEG_INODES_FREE (32 + 4 * FLST_BASE_NODE_SIZE)
- /* list of pages containing segment
- headers, where not all the segment
- header slots are reserved */
-/*-------------------------------------*/
-/* File space header size */
-#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
-
-#define FSP_FREE_ADD 4 /* this many free extents are added
- to the free list from above
- FSP_FREE_LIMIT at a time */
-
-/* FILE SEGMENT INODE
- ==================
-
-Segment inode which is created for each segment in a tablespace. NOTE: in
-purge we assume that a segment having only one currently used page can be
-freed in a few steps, so that the freeing cannot fill the file buffer with
-bufferfixed file pages. */
-
-typedef byte fseg_inode_t;
-
-#define FSEG_INODE_PAGE_NODE FSEG_PAGE_DATA
- /* the list node for linking
- segment inode pages */
-
-#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE)
-/*-------------------------------------*/
-#define FSEG_ID 0 /* 8 bytes of segment id: if this is
- ut_dulint_zero, it means that the
- header is unused */
-#define FSEG_NOT_FULL_N_USED 8
- /* number of used segment pages in
- the FSEG_NOT_FULL list */
-#define FSEG_FREE 12
- /* list of free extents of this
- segment */
-#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE)
- /* list of partially free extents */
-#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE)
- /* list of full extents */
-#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE)
- /* magic number used in debugging */
-#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE)
- /* array of individual pages
- belonging to this segment in fsp
- fragment extent lists */
-#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2)
- /* number of slots in the array for
- the fragment pages */
-#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its
- page number within space, FIL_NULL
- means that the slot is not in use */
-/*-------------------------------------*/
-#define FSEG_INODE_SIZE \
- (16 + 3 * FLST_BASE_NODE_SIZE \
- + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
-
-#define FSP_SEG_INODES_PER_PAGE(zip_size) \
- (((zip_size ? zip_size : UNIV_PAGE_SIZE) \
- - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
- /* Number of segment inodes which fit on a
- single page */
-
-#define FSEG_MAGIC_N_VALUE 97937874
-
-#define FSEG_FILLFACTOR 8 /* If this value is x, then if
- the number of unused but reserved
- pages in a segment is less than
- reserved pages * 1/x, and there are
- at least FSEG_FRAG_LIMIT used pages,
- then we allow a new empty extent to
- be added to the segment in
- fseg_alloc_free_page. Otherwise, we
- use unused pages of the segment. */
-
-#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS
- /* If the segment has >= this many
- used pages, it may be expanded by
- allocating extents to the segment;
- until that only individual fragment
- pages are allocated from the space */
-
-#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
- is at least this many extents, we
- allow extents to be put to the free
- list of the extent: at most
- FSEG_FREE_LIST_MAX_LEN many */
-#define FSEG_FREE_LIST_MAX_LEN 4
-
-
-/* EXTENT DESCRIPTOR
- =================
-
-File extent descriptor data structure: contains bits to tell which pages in
-the extent are free and which contain old tuple version to clean. */
-
-/*-------------------------------------*/
-#define XDES_ID 0 /* The identifier of the segment
- to which this extent belongs */
-#define XDES_FLST_NODE 8 /* The list node data structure
- for the descriptors */
-#define XDES_STATE (FLST_NODE_SIZE + 8)
- /* contains state information
- of the extent */
-#define XDES_BITMAP (FLST_NODE_SIZE + 12)
- /* Descriptor bitmap of the pages
- in the extent */
-/*-------------------------------------*/
-
-#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */
-#define XDES_FREE_BIT 0 /* Index of the bit which tells if
- the page is free */
-#define XDES_CLEAN_BIT 1 /* NOTE: currently not used!
- Index of the bit which tells if
- there are old versions of tuples
- on the page */
-/* States of a descriptor */
-#define XDES_FREE 1 /* extent is in free list of space */
-#define XDES_FREE_FRAG 2 /* extent is in free fragment list of
- space */
-#define XDES_FULL_FRAG 3 /* extent is in full fragment list of
- space */
-#define XDES_FSEG 4 /* extent belongs to a segment */
-
-/* File extent data structure size in bytes. */
-#define XDES_SIZE \
- (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
-
-/* Offset of the descriptor array on a descriptor page */
-#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
-
-#ifndef UNIV_HOTBACKUP
-/* Flag to indicate if we have printed the tablespace full error. */
-static ibool fsp_tbs_full_error_printed = FALSE;
-
-/**********************************************************************//**
-Returns an extent to the free list of a space. */
-static
-void
-fsp_free_extent(
-/*============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Frees an extent of a segment to the space free list. */
-static
-void
-fseg_free_extent(
-/*=============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr); /*!< in: mtr handle */
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how
-many pages are currently used.
-@return number of reserved pages */
-static
-ulint
-fseg_n_reserved_pages_low(
-/*======================*/
- fseg_inode_t* header, /*!< in: segment inode */
- ulint* used, /*!< out: number of pages used (not
- more than reserved) */
- mtr_t* mtr); /*!< in: mtr handle */
-/********************************************************************//**
-Marks a page used. The page must reside within the extents of the given
-segment. */
-static
-void
-fseg_mark_page_used(
-/*================*/
- fseg_inode_t* seg_inode,/*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE.
-@return the first extent descriptor, or NULL if none */
-static
-xdes_t*
-fseg_get_first_extent(
-/*==================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Puts new extents to the free list if
-there are free extents above the free limit. If an extent happens
-to contain an extent descriptor page, the extent is put to
-the FSP_FREE_FRAG list with the page marked as used. */
-static
-void
-fsp_fill_free_list(
-/*===============*/
- ibool init_space, /*!< in: TRUE if this is a single-table
- tablespace and we are only initing
- the tablespace's first extent
- descriptor page and ibuf bitmap page;
- then we do not allocate more extents */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in: space header */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@return the allocated page number, FIL_NULL if no page could be allocated */
-static
-ulint
-fseg_alloc_free_page_low(
-/*=====================*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint hint, /*!< in: hint of which page would be desirable */
- byte direction, /*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /*!< in: mtr handle */
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return tablespace size stored in the space header */
-UNIV_INTERN
-ulint
-fsp_get_size_low(
-/*=============*/
- page_t* page) /*!< in: header page (page 0 in the tablespace) */
-{
- return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Gets a pointer to the space header and x-locks its page.
-@return pointer to the space header, page x-locked */
-UNIV_INLINE
-fsp_header_t*
-fsp_get_space_header(
-/*=================*/
- ulint id, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- fsp_header_t* header;
-
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
- ut_ad(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
- ut_ad(id || !zip_size);
-
- block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
- header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
- ut_ad(zip_size == dict_table_flags_to_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + header)));
- return(header);
-}
-
-/**********************************************************************//**
-Gets a descriptor bit of a page.
-@return TRUE if free */
-UNIV_INLINE
-ibool
-xdes_get_bit(
-/*=========*/
- xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /*!< in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint index;
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
- ut_ad(offset < FSP_EXTENT_SIZE);
-
- index = bit + XDES_BITS_PER_PAGE * offset;
-
- byte_index = index / 8;
- bit_index = index % 8;
-
- return(ut_bit_get_nth(mtr_read_ulint(descr + XDES_BITMAP + byte_index,
- MLOG_1BYTE, mtr),
- bit_index));
-}
-
-/**********************************************************************//**
-Sets a descriptor bit of a page. */
-UNIV_INLINE
-void
-xdes_set_bit(
-/*=========*/
- xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /*!< in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
- ibool val, /*!< in: bit value */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint index;
- ulint byte_index;
- ulint bit_index;
- ulint descr_byte;
-
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
- ut_ad(offset < FSP_EXTENT_SIZE);
-
- index = bit + XDES_BITS_PER_PAGE * offset;
-
- byte_index = index / 8;
- bit_index = index % 8;
-
- descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
- MLOG_1BYTE, mtr);
- descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
-
- mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte,
- MLOG_1BYTE, mtr);
-}
-
-/**********************************************************************//**
-Looks for a descriptor bit having the desired value. Starts from hint
-and scans upward; at the end of the extent the search is wrapped to
-the start of the extent.
-@return bit index of the bit, ULINT_UNDEFINED if not found */
-UNIV_INLINE
-ulint
-xdes_find_bit(
-/*==========*/
- xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ibool val, /*!< in: desired bit value */
- ulint hint, /*!< in: hint of which bit position would be desirable */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
-
- ut_ad(descr && mtr);
- ut_ad(val <= TRUE);
- ut_ad(hint < FSP_EXTENT_SIZE);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- for (i = hint; i < FSP_EXTENT_SIZE; i++) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
-
- return(i);
- }
- }
-
- for (i = 0; i < hint; i++) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Looks for a descriptor bit having the desired value. Scans the extent in
-a direction opposite to xdes_find_bit.
-@return bit index of the bit, ULINT_UNDEFINED if not found */
-UNIV_INLINE
-ulint
-xdes_find_bit_downward(
-/*===================*/
- xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ibool val, /*!< in: desired bit value */
- ulint hint, /*!< in: hint of which bit position would be desirable */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
-
- ut_ad(descr && mtr);
- ut_ad(val <= TRUE);
- ut_ad(hint < FSP_EXTENT_SIZE);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- for (i = hint + 1; i > 0; i--) {
- if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
-
- return(i - 1);
- }
- }
-
- for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Returns the number of used pages in a descriptor.
-@return number of pages used */
-UNIV_INLINE
-ulint
-xdes_get_n_used(
-/*============*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
- ulint count = 0;
-
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
- if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
- count++;
- }
- }
-
- return(count);
-}
-
-/**********************************************************************//**
-Returns true if extent contains no used pages.
-@return TRUE if totally free */
-UNIV_INLINE
-ibool
-xdes_is_free(
-/*=========*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (0 == xdes_get_n_used(descr, mtr)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Returns true if extent contains no free pages.
-@return TRUE if full */
-UNIV_INLINE
-ibool
-xdes_is_full(
-/*=========*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Sets the state of an xdes. */
-UNIV_INLINE
-void
-xdes_set_state(
-/*===========*/
- xdes_t* descr, /*!< in: descriptor */
- ulint state, /*!< in: state to set */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ut_ad(descr && mtr);
- ut_ad(state >= XDES_FREE);
- ut_ad(state <= XDES_FSEG);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
-
- mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
-}
-
-/**********************************************************************//**
-Gets the state of an xdes.
-@return state */
-UNIV_INLINE
-ulint
-xdes_get_state(
-/*===========*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint state;
-
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
-
- state = mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr);
- ut_ad(state - 1 < XDES_FSEG);
- return(state);
-}
-
-/**********************************************************************//**
-Inits an extent descriptor to the free and clean state. */
-UNIV_INLINE
-void
-xdes_init(
-/*======*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
-
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
-
- for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
- mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr);
- }
-
- xdes_set_state(descr, XDES_FREE, mtr);
-}
-
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return descriptor page offset */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_page(
-/*======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset) /*!< in: page offset */
-{
-#ifndef DOXYGEN /* Doxygen gets confused of these */
-# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \
- + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
-# error
-# endif
-# if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET \
- + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
-# error
-# endif
-#endif /* !DOXYGEN */
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
- } else {
- ut_ad(zip_size > XDES_ARR_OFFSET
- + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
- return(ut_2pow_round(offset, zip_size));
- }
-}
-
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return descriptor index */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_index(
-/*=======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset) /*!< in: page offset */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
- / FSP_EXTENT_SIZE);
- } else {
- return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
- }
-}
-
-/********************************************************************//**
-Gets pointer to a the extent descriptor of a page. The page where the extent
-descriptor resides is x-locked. If the page offset is equal to the free limit
-of the space, adds new extents from above the free limit to the space free
-list, if not free limit == space size. This adding is necessary to make the
-descriptor defined, as they are uninitialized above the free limit.
-@return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset exceeds the free limit */
-UNIV_INLINE
-xdes_t*
-xdes_get_descriptor_with_space_hdr(
-/*===============================*/
- fsp_header_t* sp_header,/*!< in: space header, x-latched */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset;
- if equal to the free limit,
- we try to add new extents to
- the space free list */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint limit;
- ulint size;
- ulint zip_size;
- ulint descr_page_no;
- page_t* descr_page;
-
- ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
- /* Read free limit and space size */
- limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
- size = mach_read_from_4(sp_header + FSP_SIZE);
- zip_size = dict_table_flags_to_zip_size(
- mach_read_from_4(sp_header + FSP_SPACE_FLAGS));
-
- /* If offset is >= size or > limit, return NULL */
-
- if ((offset >= size) || (offset > limit)) {
-
- return(NULL);
- }
-
- /* If offset is == limit, fill free list of the space. */
-
- if (offset == limit) {
- fsp_fill_free_list(FALSE, space, sp_header, mtr);
- }
-
- descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
-
- if (descr_page_no == 0) {
- /* It is on the space header page */
-
- descr_page = page_align(sp_header);
- } else {
- buf_block_t* block;
-
- block = buf_page_get(space, zip_size, descr_page_no,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- descr_page = buf_block_get_frame(block);
- }
-
- return(descr_page + XDES_ARR_OFFSET
- + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset));
-}
-
-/********************************************************************//**
-Gets pointer to a the extent descriptor of a page. The page where the
-extent descriptor resides is x-locked. If the page offset is equal to
-the free limit of the space, adds new extents from above the free limit
-to the space free list, if not free limit == space size. This adding
-is necessary to make the descriptor defined, as they are uninitialized
-above the free limit.
-@return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset exceeds the free limit */
-static
-xdes_t*
-xdes_get_descriptor(
-/*================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page offset; if equal to the free limit,
- we try to add new extents to the space free list */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- buf_block_t* block;
- fsp_header_t* sp_header;
-
- block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
- return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
- mtr));
-}
-
-/********************************************************************//**
-Gets pointer to a the extent descriptor if the file address
-of the descriptor list node is known. The page where the
-extent descriptor resides is x-locked.
-@return pointer to the extent descriptor */
-UNIV_INLINE
-xdes_t*
-xdes_lst_get_descriptor(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t lst_node,/*!< in: file address of the list node
- contained in the descriptor */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- xdes_t* descr;
-
- ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
- descr = fut_get_ptr(space, zip_size, lst_node, RW_X_LATCH, mtr)
- - XDES_FLST_NODE;
-
- return(descr);
-}
-
-/********************************************************************//**
-Returns page offset of the first page in extent described by a descriptor.
-@return offset of the first page in extent */
-UNIV_INLINE
-ulint
-xdes_get_offset(
-/*============*/
- xdes_t* descr) /*!< in: extent descriptor */
-{
- ut_ad(descr);
-
- return(page_get_page_no(page_align(descr))
- + ((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE)
- * FSP_EXTENT_SIZE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Inits a file page whose prior contents should be ignored. */
-static
-void
-fsp_init_file_page_low(
-/*===================*/
- buf_block_t* block) /*!< in: pointer to a page */
-{
- page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
-
-#ifndef UNIV_HOTBACKUP
- block->check_index_page_at_flush = FALSE;
-#endif /* !UNIV_HOTBACKUP */
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- memset(page, 0, UNIV_PAGE_SIZE);
- memset(page_zip->data, 0, page_zip_get_size(page_zip));
- mach_write_to_4(page + FIL_PAGE_OFFSET,
- buf_block_get_page_no(block));
- mach_write_to_4(page
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- buf_block_get_space(block));
- memcpy(page_zip->data + FIL_PAGE_OFFSET,
- page + FIL_PAGE_OFFSET, 4);
- memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
- return;
- }
-
-#ifdef UNIV_BASIC_LOG_DEBUG
- memset(page, 0xff, UNIV_PAGE_SIZE);
-#endif
- mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
- memset(page + FIL_PAGE_LSN, 0, 8);
- mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- buf_block_get_space(block));
- memset(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, 0, 8);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Inits a file page whose prior contents should be ignored. */
-static
-void
-fsp_init_file_page(
-/*===============*/
- buf_block_t* block, /*!< in: pointer to a page */
- mtr_t* mtr) /*!< in: mtr */
-{
- fsp_init_file_page_low(block);
-
- mlog_write_initial_log_record(buf_block_get_frame(block),
- MLOG_INIT_FILE_PAGE, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of a file page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
- buf_block_t* block) /*!< in: block or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (block) {
- fsp_init_file_page_low(block);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Initializes the fsp system. */
-UNIV_INTERN
-void
-fsp_init(void)
-/*==========*/
-{
- /* Does nothing at the moment */
-}
-
-/**********************************************************************//**
-Writes the space id and compressed page size to a tablespace header.
-This function is used past the buffer pool when we in fil0fil.c create
-a new single-table tablespace. */
-UNIV_INTERN
-void
-fsp_header_init_fields(
-/*===================*/
- page_t* page, /*!< in/out: first page in the space */
- ulint space_id, /*!< in: space id */
- ulint flags) /*!< in: tablespace flags (FSP_SPACE_FLAGS):
- 0, or table->flags if newer than COMPACT */
-{
- /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
- ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
- ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
- format, the tablespace flags should equal table->flags. */
- ut_a(flags != DICT_TF_COMPACT);
-
- mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page,
- space_id);
- mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page,
- flags);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Initializes the space header of a new created space and creates also the
-insert buffer tree root if space == 0. */
-UNIV_INTERN
-void
-fsp_header_init(
-/*============*/
- ulint space, /*!< in: space id */
- ulint size, /*!< in: current size in blocks */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- fsp_header_t* header;
- buf_block_t* block;
- page_t* page;
- ulint flags;
- ulint zip_size;
-
- ut_ad(mtr);
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
-
- zip_size = dict_table_flags_to_zip_size(flags);
- block = buf_page_create(space, 0, zip_size, mtr);
- buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- /* The prior contents of the file page should be ignored */
-
- fsp_init_file_page(block, mtr);
- page = buf_block_get_frame(block);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR,
- MLOG_2BYTES, mtr);
-
- header = FSP_HEADER_OFFSET + page;
-
- mlog_write_ulint(header + FSP_SPACE_ID, space, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_SPACE_FLAGS, flags,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
-
- flst_init(header + FSP_FREE, mtr);
- flst_init(header + FSP_FREE_FRAG, mtr);
- flst_init(header + FSP_FULL_FRAG, mtr);
- flst_init(header + FSP_SEG_INODES_FULL, mtr);
- flst_init(header + FSP_SEG_INODES_FREE, mtr);
-
- mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1), mtr);
- if (space == 0) {
- fsp_fill_free_list(FALSE, space, header, mtr);
- btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
- 0, 0, ut_dulint_add(DICT_IBUF_ID_MIN, space),
- dict_ind_redundant, mtr);
- } else {
- fsp_fill_free_list(TRUE, space, header, mtr);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Reads the space id from the first page of a tablespace.
-@return space id, ULINT UNDEFINED if error */
-UNIV_INTERN
-ulint
-fsp_header_get_space_id(
-/*====================*/
- const page_t* page) /*!< in: first page of a tablespace */
-{
- ulint fsp_id;
- ulint id;
-
- fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID);
-
- id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- if (id != fsp_id) {
- fprintf(stderr,
- "InnoDB: Error: space id in fsp header %lu,"
- " but in the page header %lu\n",
- (ulong) fsp_id, (ulong) id);
-
- return(ULINT_UNDEFINED);
- }
-
- return(id);
-}
-
-/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return flags */
-UNIV_INTERN
-ulint
-fsp_header_get_flags(
-/*=================*/
- const page_t* page) /*!< in: first page of a tablespace */
-{
- ut_ad(!page_offset(page));
-
- return(mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page));
-}
-
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
-ulint
-fsp_header_get_zip_size(
-/*====================*/
- const page_t* page) /*!< in: first page of a tablespace */
-{
- ulint flags = fsp_header_get_flags(page);
-
- return(dict_table_flags_to_zip_size(flags));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Increases the space size field of a space. */
-UNIV_INTERN
-void
-fsp_header_inc_size(
-/*================*/
- ulint space, /*!< in: space id */
- ulint size_inc,/*!< in: size increment in pages */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- fsp_header_t* header;
- ulint size;
- ulint flags;
-
- ut_ad(mtr);
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
-
- header = fsp_get_space_header(space,
- dict_table_flags_to_zip_size(flags),
- mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES,
- mtr);
-}
-
-/**********************************************************************//**
-Gets the current free limit of the system tablespace. The free limit
-means the place of the first page which has never been put to the
-free list for allocation. The space above that address is initialized
-to zero. Sets also the global variable log_fsp_current_free_limit.
-@return free limit in megabytes */
-UNIV_INTERN
-ulint
-fsp_header_get_free_limit(void)
-/*===========================*/
-{
- fsp_header_t* header;
- ulint limit;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
-
- header = fsp_get_space_header(0, 0, &mtr);
-
- limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, &mtr);
-
- limit /= ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- log_fsp_current_free_limit_set_and_checkpoint(limit);
-
- mtr_commit(&mtr);
-
- return(limit);
-}
-
-/**********************************************************************//**
-Gets the size of the system tablespace from the tablespace header. If
-we do not have an auto-extending data file, this should be equal to
-the size of the data files. If there is an auto-extending data file,
-this can be smaller.
-@return size in pages */
-UNIV_INTERN
-ulint
-fsp_header_get_tablespace_size(void)
-/*================================*/
-{
- fsp_header_t* header;
- ulint size;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
-
- header = fsp_get_space_header(0, 0, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- mtr_commit(&mtr);
-
- return(size);
-}
-
-/***********************************************************************//**
-Tries to extend a single-table tablespace so that a page would fit in the
-data file.
-@return TRUE if success */
-static
-ibool
-fsp_try_extend_data_file_with_pages(
-/*================================*/
- ulint space, /*!< in: space */
- ulint page_no, /*!< in: page number */
- fsp_header_t* header, /*!< in: space header */
- mtr_t* mtr) /*!< in: mtr */
-{
- ibool success;
- ulint actual_size;
- ulint size;
-
- ut_a(space != 0);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- ut_a(page_no >= size);
-
- success = fil_extend_space_to_desired_size(&actual_size, space,
- page_no + 1);
- /* actual_size now has the space size in pages; it may be less than
- we wanted if we ran out of disk space */
-
- mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr);
-
- return(success);
-}
-
-/***********************************************************************//**
-Tries to extend the last data file of a tablespace if it is auto-extending.
-@return FALSE if not auto-extending */
-static
-ibool
-fsp_try_extend_data_file(
-/*=====================*/
- ulint* actual_increase,/*!< out: actual increase in pages, where
- we measure the tablespace size from
- what the header field says; it may be
- the actual file size rounded down to
- megabyte */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in: space header */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint size;
- ulint zip_size;
- ulint new_size;
- ulint old_size;
- ulint size_increase;
- ulint actual_size;
- ibool success;
-
- *actual_increase = 0;
-
- if (space == 0 && !srv_auto_extend_last_data_file) {
-
- /* We print the error message only once to avoid
- spamming the error log. Note that we don't need
- to reset the flag to FALSE as dealing with this
- error requires server restart. */
- if (fsp_tbs_full_error_printed == FALSE) {
- fprintf(stderr,
- "InnoDB: Error: Data file(s) ran"
- " out of space.\n"
- "Please add another data file or"
- " use \'autoextend\' for the last"
- " data file.\n");
- fsp_tbs_full_error_printed = TRUE;
- }
- return(FALSE);
- }
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- zip_size = dict_table_flags_to_zip_size(
- mach_read_from_4(header + FSP_SPACE_FLAGS));
-
- old_size = size;
-
- if (space == 0) {
- if (!srv_last_file_size_max) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
- } else {
- if (srv_last_file_size_max
- < srv_data_file_sizes[srv_n_data_files - 1]) {
-
- fprintf(stderr,
- "InnoDB: Error: Last data file size"
- " is %lu, max size allowed %lu\n",
- (ulong) srv_data_file_sizes[
- srv_n_data_files - 1],
- (ulong) srv_last_file_size_max);
- }
-
- size_increase = srv_last_file_size_max
- - srv_data_file_sizes[srv_n_data_files - 1];
- if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
- }
- }
- } else {
- /* We extend single-table tablespaces first one extent
- at a time, but for bigger tablespaces more. It is not
- enough to extend always by one extent, because some
- extents are frag page extents. */
- ulint extent_size; /*!< one megabyte, in pages */
-
- if (!zip_size) {
- extent_size = FSP_EXTENT_SIZE;
- } else {
- extent_size = FSP_EXTENT_SIZE
- * UNIV_PAGE_SIZE / zip_size;
- }
-
- if (size < extent_size) {
- /* Let us first extend the file to extent_size */
- success = fsp_try_extend_data_file_with_pages(
- space, extent_size - 1, header, mtr);
- if (!success) {
- new_size = mtr_read_ulint(header + FSP_SIZE,
- MLOG_4BYTES, mtr);
-
- *actual_increase = new_size - old_size;
-
- return(FALSE);
- }
-
- size = extent_size;
- }
-
- if (size < 32 * extent_size) {
- size_increase = extent_size;
- } else {
- /* Below in fsp_fill_free_list() we assume
- that we add at most FSP_FREE_ADD extents at
- a time */
- size_increase = FSP_FREE_ADD * extent_size;
- }
- }
-
- if (size_increase == 0) {
-
- return(TRUE);
- }
-
- success = fil_extend_space_to_desired_size(&actual_size, space,
- size + size_increase);
- /* We ignore any fragments of a full megabyte when storing the size
- to the space header */
-
- if (!zip_size) {
- new_size = ut_calc_align_down(actual_size,
- (1024 * 1024) / UNIV_PAGE_SIZE);
- } else {
- new_size = ut_calc_align_down(actual_size,
- (1024 * 1024) / zip_size);
- }
- mlog_write_ulint(header + FSP_SIZE, new_size, MLOG_4BYTES, mtr);
-
- *actual_increase = new_size - old_size;
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Puts new extents to the free list if there are free extents above the free
-limit. If an extent happens to contain an extent descriptor page, the extent
-is put to the FSP_FREE_FRAG list with the page marked as used. */
-static
-void
-fsp_fill_free_list(
-/*===============*/
- ibool init_space, /*!< in: TRUE if this is a single-table
- tablespace and we are only initing
- the tablespace's first extent
- descriptor page and ibuf bitmap page;
- then we do not allocate more extents */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in: space header */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint limit;
- ulint size;
- ulint zip_size;
- xdes_t* descr;
- ulint count = 0;
- ulint frag_n_used;
- ulint actual_increase;
- ulint i;
- mtr_t ibuf_mtr;
-
- ut_ad(header && mtr);
- ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
-
- /* Check if we can fill free list from above the free list limit */
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
-
- zip_size = dict_table_flags_to_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + header));
- ut_a(ut_is_2pow(zip_size));
- ut_a(zip_size <= UNIV_PAGE_SIZE);
- ut_a(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
-
- if (space == 0 && srv_auto_extend_last_data_file
- && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
-
- /* Try to increase the last data file size */
- fsp_try_extend_data_file(&actual_increase, space, header, mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- }
-
- if (space != 0 && !init_space
- && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
-
- /* Try to increase the .ibd file size */
- fsp_try_extend_data_file(&actual_increase, space, header, mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- }
-
- i = limit;
-
- while ((init_space && i < 1)
- || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
-
- ibool init_xdes;
- if (zip_size) {
- init_xdes = ut_2pow_remainder(i, zip_size) == 0;
- } else {
- init_xdes = ut_2pow_remainder(i, UNIV_PAGE_SIZE) == 0;
- }
-
- mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
- MLOG_4BYTES, mtr);
-
- /* Update the free limit info in the log system and make
- a checkpoint */
- if (space == 0) {
- ut_a(!zip_size);
- log_fsp_current_free_limit_set_and_checkpoint(
- (i + FSP_EXTENT_SIZE)
- / ((1024 * 1024) / UNIV_PAGE_SIZE));
- }
-
- if (UNIV_UNLIKELY(init_xdes)) {
-
- buf_block_t* block;
-
- /* We are going to initialize a new descriptor page
- and a new ibuf bitmap page: the prior contents of the
- pages should be ignored. */
-
- if (i > 0) {
- block = buf_page_create(
- space, i, zip_size, mtr);
- buf_page_get(space, zip_size, i,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block,
- SYNC_FSP_PAGE);
-
- fsp_init_file_page(block, mtr);
- mlog_write_ulint(buf_block_get_frame(block)
- + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_XDES,
- MLOG_2BYTES, mtr);
- }
-
- /* Initialize the ibuf bitmap page in a separate
- mini-transaction because it is low in the latching
- order, and we must be able to release its latch
- before returning from the fsp routine */
-
- mtr_start(&ibuf_mtr);
-
- block = buf_page_create(space,
- i + FSP_IBUF_BITMAP_OFFSET,
- zip_size, &ibuf_mtr);
- buf_page_get(space, zip_size,
- i + FSP_IBUF_BITMAP_OFFSET,
- RW_X_LATCH, &ibuf_mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- fsp_init_file_page(block, &ibuf_mtr);
-
- ibuf_bitmap_page_init(block, &ibuf_mtr);
-
- mtr_commit(&ibuf_mtr);
- }
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, i,
- mtr);
- xdes_init(descr, mtr);
-
-#if UNIV_PAGE_SIZE % FSP_EXTENT_SIZE
-# error "UNIV_PAGE_SIZE % FSP_EXTENT_SIZE != 0"
-#endif
-#if PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE
-# error "PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE != 0"
-#endif
-
- if (UNIV_UNLIKELY(init_xdes)) {
-
- /* The first page in the extent is a descriptor page
- and the second is an ibuf bitmap page: mark them
- used */
-
- xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr);
- xdes_set_bit(descr, XDES_FREE_BIT,
- FSP_IBUF_BITMAP_OFFSET, FALSE, mtr);
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
-
- flst_add_last(header + FSP_FREE_FRAG,
- descr + XDES_FLST_NODE, mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used + 2, MLOG_4BYTES, mtr);
- } else {
- flst_add_last(header + FSP_FREE,
- descr + XDES_FLST_NODE, mtr);
- count++;
- }
-
- i += FSP_EXTENT_SIZE;
- }
-}
-
-/**********************************************************************//**
-Allocates a new free extent.
-@return extent descriptor, NULL if cannot be allocated */
-static
-xdes_t*
-fsp_alloc_free_extent(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint of which extent would be desirable: any
- page offset in the extent goes; the hint must not
- be > FSP_FREE_LIMIT */
- mtr_t* mtr) /*!< in: mtr */
-{
- fsp_header_t* header;
- fil_addr_t first;
- xdes_t* descr;
-
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, zip_size, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
-
- if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
- /* Ok, we can take this extent */
- } else {
- /* Take the first extent in the free list */
- first = flst_get_first(header + FSP_FREE, mtr);
-
- if (fil_addr_is_null(first)) {
- fsp_fill_free_list(FALSE, space, header, mtr);
-
- first = flst_get_first(header + FSP_FREE, mtr);
- }
-
- if (fil_addr_is_null(first)) {
-
- return(NULL); /* No free extents left */
- }
-
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
- }
-
- flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
-
- return(descr);
-}
-
-/**********************************************************************//**
-Allocates a single free page from a space. The page is marked as used.
-@return the page offset, FIL_NULL if no page could be allocated */
-static
-ulint
-fsp_alloc_free_page(
-/*================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint of which page would be desirable */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- fsp_header_t* header;
- fil_addr_t first;
- xdes_t* descr;
- buf_block_t* block;
- ulint free;
- ulint frag_n_used;
- ulint page_no;
- ulint space_size;
- ibool success;
-
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, zip_size, mtr);
-
- /* Get the hinted descriptor */
- descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
-
- if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
- /* Ok, we can take this extent */
- } else {
- /* Else take the first extent in free_frag list */
- first = flst_get_first(header + FSP_FREE_FRAG, mtr);
-
- if (fil_addr_is_null(first)) {
- /* There are no partially full fragments: allocate
- a free extent and add it to the FREE_FRAG list. NOTE
- that the allocation may have as a side-effect that an
- extent containing a descriptor page is added to the
- FREE_FRAG list. But we will allocate our page from the
- the free extent anyway. */
-
- descr = fsp_alloc_free_extent(space, zip_size,
- hint, mtr);
-
- if (descr == NULL) {
- /* No free space left */
-
- return(FIL_NULL);
- }
-
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
- flst_add_last(header + FSP_FREE_FRAG,
- descr + XDES_FLST_NODE, mtr);
- } else {
- descr = xdes_lst_get_descriptor(space, zip_size,
- first, mtr);
- }
-
- /* Reset the hint */
- hint = 0;
- }
-
- /* Now we have in descr an extent with at least one free page. Look
- for a free page in the extent. */
-
- free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
- hint % FSP_EXTENT_SIZE, mtr);
- if (free == ULINT_UNDEFINED) {
-
- ut_print_buf(stderr, ((byte*)descr) - 500, 1000);
- putc('\n', stderr);
-
- ut_error;
- }
-
- page_no = xdes_get_offset(descr) + free;
-
- space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- if (space_size <= page_no) {
- /* It must be that we are extending a single-table tablespace
- whose size is still < 64 pages */
-
- ut_a(space != 0);
- if (page_no >= FSP_EXTENT_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: trying to extend a"
- " single-table tablespace %lu\n"
- "InnoDB: by single page(s) though the"
- " space size %lu. Page no %lu.\n",
- (ulong) space, (ulong) space_size,
- (ulong) page_no);
- return(FIL_NULL);
- }
- success = fsp_try_extend_data_file_with_pages(space, page_no,
- header, mtr);
- if (!success) {
- /* No disk space left */
- return(FIL_NULL);
- }
- }
-
- xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
-
- /* Update the FRAG_N_USED field */
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- frag_n_used++;
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
- mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FULL_FRAG, mtr);
-
- flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
- mtr);
- }
-
- /* Initialize the allocated page to the buffer pool, so that it can
- be obtained immediately with buf_page_get without need for a disk
- read. */
-
- buf_page_create(space, page_no, zip_size, mtr);
-
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- /* Prior contents of the page should be ignored */
- fsp_init_file_page(block, mtr);
-
- return(page_no);
-}
-
-/**********************************************************************//**
-Frees a single page of a space. The page is marked as free and clean. */
-static
-void
-fsp_free_page(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- fsp_header_t* header;
- xdes_t* descr;
- ulint state;
- ulint frag_n_used;
-
- ut_ad(mtr);
-
- /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
-
- header = fsp_get_space_header(space, zip_size, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
-
- state = xdes_get_state(descr, mtr);
-
- if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
- fprintf(stderr,
- "InnoDB: Error: File space extent descriptor"
- " of page %lu has state %lu\n",
- (ulong) page,
- (ulong) state);
- fputs("InnoDB: Dump of descriptor: ", stderr);
- ut_print_buf(stderr, ((byte*)descr) - 50, 200);
- putc('\n', stderr);
-
- if (state == XDES_FREE) {
- /* We put here some fault tolerance: if the page
- is already free, return without doing anything! */
-
- return;
- }
-
- ut_error;
- }
-
- if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
- fprintf(stderr,
- "InnoDB: Error: File space extent descriptor"
- " of page %lu says it is free\n"
- "InnoDB: Dump of descriptor: ", (ulong) page);
- ut_print_buf(stderr, ((byte*)descr) - 50, 200);
- putc('\n', stderr);
-
- /* We put here some fault tolerance: if the page
- is already free, return without doing anything! */
-
- return;
- }
-
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
- xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
-
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- if (state == XDES_FULL_FRAG) {
- /* The fragment was full: move it to another list */
- flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
- flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used + FSP_EXTENT_SIZE - 1,
- MLOG_4BYTES, mtr);
- } else {
- ut_a(frag_n_used > 0);
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1,
- MLOG_4BYTES, mtr);
- }
-
- if (xdes_is_free(descr, mtr)) {
- /* The extent has become free: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- fsp_free_extent(space, zip_size, page, mtr);
- }
-}
-
-/**********************************************************************//**
-Returns an extent to the free list of a space. */
-static
-void
-fsp_free_extent(
-/*============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr) /*!< in: mtr */
-{
- fsp_header_t* header;
- xdes_t* descr;
-
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, zip_size, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
-
- if (xdes_get_state(descr, mtr) == XDES_FREE) {
-
- ut_print_buf(stderr, (byte*)descr - 500, 1000);
- putc('\n', stderr);
-
- ut_error;
- }
-
- xdes_init(descr, mtr);
-
- flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
-}
-
-/**********************************************************************//**
-Returns the nth inode slot on an inode page.
-@return segment inode */
-UNIV_INLINE
-fseg_inode_t*
-fsp_seg_inode_page_get_nth_inode(
-/*=============================*/
- page_t* page, /*!< in: segment inode page */
- ulint i, /*!< in: inode index on page */
- ulint zip_size __attribute__((unused)),
- /*!< in: compressed page size, or 0 */
- mtr_t* mtr __attribute__((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(i < FSP_SEG_INODES_PER_PAGE(zip_size));
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
-
- return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
-}
-
-/**********************************************************************//**
-Looks for a used segment inode on a segment inode page.
-@return segment inode index, or ULINT_UNDEFINED if not found */
-static
-ulint
-fsp_seg_inode_page_find_used(
-/*=========================*/
- page_t* page, /*!< in: segment inode page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint i;
- fseg_inode_t* inode;
-
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(
- page, i, zip_size, mtr);
-
- if (!ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))) {
- /* This is used */
-
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Looks for an unused segment inode on a segment inode page.
-@return segment inode index, or ULINT_UNDEFINED if not found */
-static
-ulint
-fsp_seg_inode_page_find_free(
-/*=========================*/
- page_t* page, /*!< in: segment inode page */
- ulint i, /*!< in: search forward starting from this index */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- fseg_inode_t* inode;
-
- for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(
- page, i, zip_size, mtr);
-
- if (ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))) {
- /* This is unused */
-
- return(i);
- }
-
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Allocates a new file segment inode page.
-@return TRUE if could be allocated */
-static
-ibool
-fsp_alloc_seg_inode_page(
-/*=====================*/
- fsp_header_t* space_header, /*!< in: space header */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- fseg_inode_t* inode;
- buf_block_t* block;
- page_t* page;
- ulint page_no;
- ulint space;
- ulint zip_size;
- ulint i;
-
- ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
-
- space = page_get_space_id(page_align(space_header));
- zip_size = dict_table_flags_to_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + space_header));
-
- page_no = fsp_alloc_free_page(space, zip_size, 0, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- block->check_index_page_at_flush = FALSE;
-
- page = buf_block_get_frame(block);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
- MLOG_2BYTES, mtr);
-
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(page, i,
- zip_size, mtr);
-
- mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
- }
-
- flst_add_last(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
- return(TRUE);
-}
-
-/**********************************************************************//**
-Allocates a new file segment inode.
-@return segment inode, or NULL if not enough space */
-static
-fseg_inode_t*
-fsp_alloc_seg_inode(
-/*================*/
- fsp_header_t* space_header, /*!< in: space header */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint page_no;
- buf_block_t* block;
- page_t* page;
- fseg_inode_t* inode;
- ibool success;
- ulint zip_size;
- ulint n;
-
- ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
-
- if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
- /* Allocate a new segment inode page */
-
- success = fsp_alloc_seg_inode_page(space_header, mtr);
-
- if (!success) {
-
- return(NULL);
- }
- }
-
- page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;
-
- zip_size = dict_table_flags_to_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + space_header));
- block = buf_page_get(page_get_space_id(page_align(space_header)),
- zip_size, page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- page = buf_block_get_frame(block);
-
- n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
-
- ut_a(n != ULINT_UNDEFINED);
-
- inode = fsp_seg_inode_page_get_nth_inode(page, n, zip_size, mtr);
-
- if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
- zip_size, mtr)) {
- /* There are no other unused headers left on the page: move it
- to another list */
-
- flst_remove(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- flst_add_last(space_header + FSP_SEG_INODES_FULL,
- page + FSEG_INODE_PAGE_NODE, mtr);
- }
-
- ut_ad(ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID))
- || mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
- return(inode);
-}
-
-/**********************************************************************//**
-Frees a file segment inode. */
-static
-void
-fsp_free_seg_inode(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- page_t* page;
- fsp_header_t* space_header;
-
- page = page_align(inode);
-
- space_header = fsp_get_space_header(space, zip_size, mtr);
-
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- if (ULINT_UNDEFINED
- == fsp_seg_inode_page_find_free(page, 0, zip_size, mtr)) {
-
- /* Move the page to another list */
-
- flst_remove(space_header + FSP_SEG_INODES_FULL,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- flst_add_last(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
- }
-
- mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
- mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr);
-
- if (ULINT_UNDEFINED
- == fsp_seg_inode_page_find_used(page, zip_size, mtr)) {
-
- /* There are no other used headers left on the page: free it */
-
- flst_remove(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- fsp_free_page(space, zip_size, page_get_page_no(page), mtr);
- }
-}
-
-/**********************************************************************//**
-Returns the file segment inode, page x-latched.
-@return segment inode, page x-latched; NULL if the inode is free */
-static
-fseg_inode_t*
-fseg_inode_try_get(
-/*===============*/
- fseg_header_t* header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- fil_addr_t inode_addr;
- fseg_inode_t* inode;
-
- inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
- inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
- ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
-
- inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr);
-
- if (UNIV_UNLIKELY
- (ut_dulint_is_zero(mach_read_from_8(inode + FSEG_ID)))) {
-
- inode = NULL;
- } else {
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- }
-
- return(inode);
-}
-
-/**********************************************************************//**
-Returns the file segment inode, page x-latched.
-@return segment inode, page x-latched */
-static
-fseg_inode_t*
-fseg_inode_get(
-/*===========*/
- fseg_header_t* header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- fseg_inode_t* inode
- = fseg_inode_try_get(header, space, zip_size, mtr);
- ut_a(inode);
- return(inode);
-}
-
-/**********************************************************************//**
-Gets the page number from the nth fragment page slot.
-@return page number, FIL_NULL if not in use */
-UNIV_INLINE
-ulint
-fseg_get_nth_frag_page_no(
-/*======================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint n, /*!< in: slot index */
- mtr_t* mtr __attribute__((unused))) /*!< in: mtr handle */
-{
- ut_ad(inode && mtr);
- ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
- return(mach_read_from_4(inode + FSEG_FRAG_ARR
- + n * FSEG_FRAG_SLOT_SIZE));
-}
-
-/**********************************************************************//**
-Sets the page number in the nth fragment page slot. */
-UNIV_INLINE
-void
-fseg_set_nth_frag_page_no(
-/*======================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint n, /*!< in: slot index */
- ulint page_no,/*!< in: page number to set */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ut_ad(inode && mtr);
- ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
- page_no, MLOG_4BYTES, mtr);
-}
-
-/**********************************************************************//**
-Finds a fragment page slot which is free.
-@return slot index; ULINT_UNDEFINED if none found */
-static
-ulint
-fseg_find_free_frag_page_slot(
-/*==========================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint i;
- ulint page_no;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- page_no = fseg_get_nth_frag_page_no(inode, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Finds a fragment page slot which is used and last in the array.
-@return slot index; ULINT_UNDEFINED if none found */
-static
-ulint
-fseg_find_last_used_frag_page_slot(
-/*===============================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint i;
- ulint page_no;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- page_no = fseg_get_nth_frag_page_no(
- inode, FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr);
-
- if (page_no != FIL_NULL) {
-
- return(FSEG_FRAG_ARR_N_SLOTS - i - 1);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Calculates reserved fragment page slots.
-@return number of fragment pages */
-static
-ulint
-fseg_get_n_frag_pages(
-/*==================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint i;
- ulint count = 0;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) {
- count++;
- }
- }
-
- return(count);
-}
-
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-UNIV_INTERN
-buf_block_t*
-fseg_create_general(
-/*================*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- ibool has_done_reservation, /*!< in: TRUE if the caller has already
- done the reservation for the pages with
- fsp_reserve_free_extents (at least 2 extents: one for
- the inode and the other for the segment) then there is
- no need to do the check for this individual
- operation */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint flags;
- ulint zip_size;
- fsp_header_t* space_header;
- fseg_inode_t* inode;
- dulint seg_id;
- buf_block_t* block = 0; /* remove warning */
- fseg_header_t* header = 0; /* remove warning */
- rw_lock_t* latch;
- ibool success;
- ulint n_reserved;
- ulint i;
-
- ut_ad(mtr);
- ut_ad(byte_offset + FSEG_HEADER_SIZE
- <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- if (page != 0) {
- block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
- header = byte_offset + buf_block_get_frame(block);
- }
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
-
- mtr_x_lock(latch, mtr);
-
- if (rw_lock_get_x_lock_count(latch) == 1) {
- /* This thread did not own the latch before this call: free
- excess pages from the insert buffer free list */
-
- if (space == IBUF_SPACE_ID) {
- ibuf_free_excess_pages();
- }
- }
-
- if (!has_done_reservation) {
- success = fsp_reserve_free_extents(&n_reserved, space, 2,
- FSP_NORMAL, mtr);
- if (!success) {
- return(NULL);
- }
- }
-
- space_header = fsp_get_space_header(space, zip_size, mtr);
-
- inode = fsp_alloc_seg_inode(space_header, mtr);
-
- if (inode == NULL) {
-
- goto funct_exit;
- }
-
- /* Read the next segment id from space header and increment the
- value in space header */
-
- seg_id = mtr_read_dulint(space_header + FSP_SEG_ID, mtr);
-
- mlog_write_dulint(space_header + FSP_SEG_ID, ut_dulint_add(seg_id, 1),
- mtr);
-
- mlog_write_dulint(inode + FSEG_ID, seg_id, mtr);
- mlog_write_ulint(inode + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr);
-
- flst_init(inode + FSEG_FREE, mtr);
- flst_init(inode + FSEG_NOT_FULL, mtr);
- flst_init(inode + FSEG_FULL, mtr);
-
- mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE,
- MLOG_4BYTES, mtr);
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr);
- }
-
- if (page == 0) {
- page = fseg_alloc_free_page_low(space, zip_size,
- inode, 0, FSP_UP, mtr);
-
- if (page == FIL_NULL) {
-
- fsp_free_seg_inode(space, zip_size, inode, mtr);
-
- goto funct_exit;
- }
-
- block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
- header = byte_offset + buf_block_get_frame(block);
- mlog_write_ulint(header - byte_offset + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr);
- }
-
- mlog_write_ulint(header + FSEG_HDR_OFFSET,
- page_offset(inode), MLOG_2BYTES, mtr);
-
- mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
- page_get_page_no(page_align(inode)),
- MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);
-
-funct_exit:
- if (!has_done_reservation) {
-
- fil_space_release_free_extents(space, n_reserved);
- }
-
- return(block);
-}
-
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-UNIV_INTERN
-buf_block_t*
-fseg_create(
-/*========*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- mtr_t* mtr) /*!< in: mtr */
-{
- return(fseg_create_general(space, page, byte_offset, FALSE, mtr));
-}
-
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used.
-@return number of reserved pages */
-static
-ulint
-fseg_n_reserved_pages_low(
-/*======================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint* used, /*!< out: number of pages used (not
- more than reserved) */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint ret;
-
- ut_ad(inode && used && mtr);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
-
- *used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
- + fseg_get_n_frag_pages(inode, mtr);
-
- ret = fseg_get_n_frag_pages(inode, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr);
-
- return(ret);
-}
-
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used.
-@return number of reserved pages */
-UNIV_INTERN
-ulint
-fseg_n_reserved_pages(
-/*==================*/
- fseg_header_t* header, /*!< in: segment header */
- ulint* used, /*!< out: number of pages used (<= reserved) */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint ret;
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- rw_lock_t* latch;
-
- space = page_get_space_id(page_align(header));
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
-
- mtr_x_lock(latch, mtr);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- ret = fseg_n_reserved_pages_low(inode, used, mtr);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Tries to fill the free list of a segment with consecutive free extents.
-This happens if the segment is big enough to allow extents in the free list,
-the free list is empty, and the extents can be allocated consecutively from
-the hint onward. */
-static
-void
-fseg_fill_free_list(
-/*================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint which extent would be good as
- the first extent */
- mtr_t* mtr) /*!< in: mtr */
-{
- xdes_t* descr;
- ulint i;
- dulint seg_id;
- ulint reserved;
- ulint used;
-
- ut_ad(inode && mtr);
- ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
-
- reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
-
- if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
-
- /* The segment is too small to allow extents in free list */
-
- return;
- }
-
- if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
- /* Free list is not empty */
-
- return;
- }
-
- for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
- descr = xdes_get_descriptor(space, zip_size, hint, mtr);
-
- if ((descr == NULL)
- || (XDES_FREE != xdes_get_state(descr, mtr))) {
-
- /* We cannot allocate the desired extent: stop */
-
- return;
- }
-
- descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
-
- xdes_set_state(descr, XDES_FSEG, mtr);
-
- seg_id = mtr_read_dulint(inode + FSEG_ID, mtr);
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- mlog_write_dulint(descr + XDES_ID, seg_id, mtr);
-
- flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
- hint += FSP_EXTENT_SIZE;
- }
-}
-
-/*********************************************************************//**
-Allocates a free extent for the segment: looks first in the free list of the
-segment, then tries to allocate from the space free list. NOTE that the extent
-returned still resides in the segment free list, it is not yet taken off it!
-@return allocated extent, still placed in the segment free list, NULL
-if could not be allocated */
-static
-xdes_t*
-fseg_alloc_free_extent(
-/*===================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in: mtr */
-{
- xdes_t* descr;
- dulint seg_id;
- fil_addr_t first;
-
- ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
- /* Segment free list is not empty, allocate from it */
-
- first = flst_get_first(inode + FSEG_FREE, mtr);
-
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
- } else {
- /* Segment free list was empty, allocate from space */
- descr = fsp_alloc_free_extent(space, zip_size, 0, mtr);
-
- if (descr == NULL) {
-
- return(NULL);
- }
-
- seg_id = mtr_read_dulint(inode + FSEG_ID, mtr);
-
- xdes_set_state(descr, XDES_FSEG, mtr);
- mlog_write_dulint(descr + XDES_ID, seg_id, mtr);
- flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
-
- /* Try to fill the segment free list */
- fseg_fill_free_list(inode, space, zip_size,
- xdes_get_offset(descr) + FSP_EXTENT_SIZE,
- mtr);
- }
-
- return(descr);
-}
-
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@return the allocated page number, FIL_NULL if no page could be allocated */
-static
-ulint
-fseg_alloc_free_page_low(
-/*=====================*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint hint, /*!< in: hint of which page would be desirable */
- byte direction, /*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- fsp_header_t* space_header;
- ulint space_size;
- dulint seg_id;
- ulint used;
- ulint reserved;
- xdes_t* descr; /*!< extent of the hinted page */
- ulint ret_page; /*!< the allocated page offset, FIL_NULL
- if could not be allocated */
- xdes_t* ret_descr; /*!< the extent of the allocated page */
- ibool frag_page_allocated = FALSE;
- ibool success;
- ulint n;
-
- ut_ad(mtr);
- ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
- seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
-
- ut_ad(!ut_dulint_is_zero(seg_id));
-
- reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
-
- space_header = fsp_get_space_header(space, zip_size, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(space_header, space,
- hint, mtr);
- if (descr == NULL) {
- /* Hint outside space or too high above free limit: reset
- hint */
- hint = 0;
- descr = xdes_get_descriptor(space, zip_size, hint, mtr);
- }
-
- /* In the big if-else below we look for ret_page and ret_descr */
- /*-------------------------------------------------------------*/
- if ((xdes_get_state(descr, mtr) == XDES_FSEG)
- && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
- mtr), seg_id))
- && (xdes_get_bit(descr, XDES_FREE_BIT,
- hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
-
- /* 1. We can take the hinted page
- =================================*/
- ret_descr = descr;
- ret_page = hint;
- /*-----------------------------------------------------------*/
- } else if ((xdes_get_state(descr, mtr) == XDES_FREE)
- && ((reserved - used) < reserved / FSEG_FILLFACTOR)
- && (used >= FSEG_FRAG_LIMIT)) {
-
- /* 2. We allocate the free extent from space and can take
- =========================================================
- the hinted page
- ===============*/
- ret_descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
-
- ut_a(ret_descr == descr);
-
- xdes_set_state(ret_descr, XDES_FSEG, mtr);
- mlog_write_dulint(ret_descr + XDES_ID, seg_id, mtr);
- flst_add_last(seg_inode + FSEG_FREE,
- ret_descr + XDES_FLST_NODE, mtr);
-
- /* Try to fill the segment free list */
- fseg_fill_free_list(seg_inode, space, zip_size,
- hint + FSP_EXTENT_SIZE, mtr);
- ret_page = hint;
- /*-----------------------------------------------------------*/
- } else if ((direction != FSP_NO_DIR)
- && ((reserved - used) < reserved / FSEG_FILLFACTOR)
- && (used >= FSEG_FRAG_LIMIT)
- && (!!(ret_descr
- = fseg_alloc_free_extent(seg_inode,
- space, zip_size, mtr)))) {
-
- /* 3. We take any free extent (which was already assigned above
- ===============================================================
- in the if-condition to ret_descr) and take the lowest or
- ========================================================
- highest page in it, depending on the direction
- ==============================================*/
- ret_page = xdes_get_offset(ret_descr);
-
- if (direction == FSP_DOWN) {
- ret_page += FSP_EXTENT_SIZE - 1;
- }
- /*-----------------------------------------------------------*/
- } else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
- && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
- mtr), seg_id))
- && (!xdes_is_full(descr, mtr))) {
-
- /* 4. We can take the page from the same extent as the
- ======================================================
- hinted page (and the extent already belongs to the
- ==================================================
- segment)
- ========*/
- ret_descr = descr;
- ret_page = xdes_get_offset(ret_descr)
- + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
- hint % FSP_EXTENT_SIZE, mtr);
- /*-----------------------------------------------------------*/
- } else if (reserved - used > 0) {
- /* 5. We take any unused page from the segment
- ==============================================*/
- fil_addr_t first;
-
- if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) {
- first = flst_get_first(seg_inode + FSEG_NOT_FULL,
- mtr);
- } else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) {
- first = flst_get_first(seg_inode + FSEG_FREE, mtr);
- } else {
- ut_error;
- return(FIL_NULL);
- }
-
- ret_descr = xdes_lst_get_descriptor(space, zip_size,
- first, mtr);
- ret_page = xdes_get_offset(ret_descr)
- + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
- 0, mtr);
- /*-----------------------------------------------------------*/
- } else if (used < FSEG_FRAG_LIMIT) {
- /* 6. We allocate an individual page from the space
- ===================================================*/
- ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr);
- ret_descr = NULL;
-
- frag_page_allocated = TRUE;
-
- if (ret_page != FIL_NULL) {
- /* Put the page in the fragment page array of the
- segment */
- n = fseg_find_free_frag_page_slot(seg_inode, mtr);
- ut_a(n != FIL_NULL);
-
- fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
- mtr);
- }
- /*-----------------------------------------------------------*/
- } else {
- /* 7. We allocate a new extent and take its first page
- ======================================================*/
- ret_descr = fseg_alloc_free_extent(seg_inode,
- space, zip_size, mtr);
-
- if (ret_descr == NULL) {
- ret_page = FIL_NULL;
- } else {
- ret_page = xdes_get_offset(ret_descr);
- }
- }
-
- if (ret_page == FIL_NULL) {
- /* Page could not be allocated */
-
- return(FIL_NULL);
- }
-
- if (space != 0) {
- space_size = fil_space_get_size(space);
-
- if (space_size <= ret_page) {
- /* It must be that we are extending a single-table
- tablespace whose size is still < 64 pages */
-
- if (ret_page >= FSP_EXTENT_SIZE) {
- fprintf(stderr,
- "InnoDB: Error (2): trying to extend"
- " a single-table tablespace %lu\n"
- "InnoDB: by single page(s) though"
- " the space size %lu. Page no %lu.\n",
- (ulong) space, (ulong) space_size,
- (ulong) ret_page);
- return(FIL_NULL);
- }
-
- success = fsp_try_extend_data_file_with_pages(
- space, ret_page, space_header, mtr);
- if (!success) {
- /* No disk space left */
- return(FIL_NULL);
- }
- }
- }
-
- if (!frag_page_allocated) {
- /* Initialize the allocated page to buffer pool, so that it
- can be obtained immediately with buf_page_get without need
- for a disk read */
- buf_block_t* block;
- ulint zip_size = dict_table_flags_to_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + space_header));
-
- block = buf_page_create(space, ret_page, zip_size, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size,
- ret_page, RW_X_LATCH,
- mtr))) {
- ut_error;
- }
-
- /* The prior contents of the page should be ignored */
- fsp_init_file_page(block, mtr);
-
- /* At this point we know the extent and the page offset.
- The extent is still in the appropriate list (FSEG_NOT_FULL
- or FSEG_FREE), and the page is not yet marked as used. */
-
- ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr)
- == ret_descr);
- ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
- ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
-
- fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr);
- }
-
- buf_reset_check_index_page_at_flush(space, ret_page);
-
- return(ret_page);
-}
-
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@return allocated page offset, FIL_NULL if no page could be allocated */
-UNIV_INTERN
-ulint
-fseg_alloc_free_page_general(
-/*=========================*/
- fseg_header_t* seg_header,/*!< in: segment header */
- ulint hint, /*!< in: hint of which page would be desirable */
- byte direction,/*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- ibool has_done_reservation, /*!< in: TRUE if the caller has
- already done the reservation for the page
- with fsp_reserve_free_extents, then there
- is no need to do the check for this individual
- page */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- rw_lock_t* latch;
- ibool success;
- ulint page_no;
- ulint n_reserved;
-
- space = page_get_space_id(page_align(seg_header));
-
- latch = fil_space_get_latch(space, &flags);
-
- zip_size = dict_table_flags_to_zip_size(flags);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
-
- mtr_x_lock(latch, mtr);
-
- if (rw_lock_get_x_lock_count(latch) == 1) {
- /* This thread did not own the latch before this call: free
- excess pages from the insert buffer free list */
-
- if (space == IBUF_SPACE_ID) {
- ibuf_free_excess_pages();
- }
- }
-
- inode = fseg_inode_get(seg_header, space, zip_size, mtr);
-
- if (!has_done_reservation) {
- success = fsp_reserve_free_extents(&n_reserved, space, 2,
- FSP_NORMAL, mtr);
- if (!success) {
- return(FIL_NULL);
- }
- }
-
- page_no = fseg_alloc_free_page_low(space, zip_size,
- inode, hint, direction, mtr);
- if (!has_done_reservation) {
- fil_space_release_free_extents(space, n_reserved);
- }
-
- return(page_no);
-}
-
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@return allocated page offset, FIL_NULL if no page could be allocated */
-UNIV_INTERN
-ulint
-fseg_alloc_free_page(
-/*=================*/
- fseg_header_t* seg_header,/*!< in: segment header */
- ulint hint, /*!< in: hint of which page would be desirable */
- byte direction,/*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- return(fseg_alloc_free_page_general(seg_header, hint, direction,
- FALSE, mtr));
-}
-
-/**********************************************************************//**
-Checks that we have at least 2 frag pages free in the first extent of a
-single-table tablespace, and they are also physically initialized to the data
-file. That is we have already extended the data file so that those pages are
-inside the data file. If not, this function extends the tablespace with
-pages.
-@return TRUE if there were >= 3 free pages, or we were able to extend */
-static
-ibool
-fsp_reserve_free_pages(
-/*===================*/
- ulint space, /*!< in: space id, must be != 0 */
- fsp_header_t* space_header, /*!< in: header of that space,
- x-latched */
- ulint size, /*!< in: size of the tablespace in pages,
- must be < FSP_EXTENT_SIZE / 2 */
- mtr_t* mtr) /*!< in: mtr */
-{
- xdes_t* descr;
- ulint n_used;
-
- ut_a(space != 0);
- ut_a(size < FSP_EXTENT_SIZE / 2);
-
- descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0,
- mtr);
- n_used = xdes_get_n_used(descr, mtr);
-
- ut_a(n_used <= size);
-
- if (size >= n_used + 2) {
-
- return(TRUE);
- }
-
- return(fsp_try_extend_data_file_with_pages(space, n_used + 1,
- space_header, mtr));
-}
-
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
-use several pages from the tablespace should call this function beforehand
-and reserve enough free extents so that they certainly will be able
-to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
-
-The alloc_type below has the following meaning: FSP_NORMAL means an
-operation which will probably result in more space usage, like an
-insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
-deleting rows, then this allocation will in the long run result in
-less space usage (after a purge); FSP_CLEANING means allocation done
-in a physical record delete (like in a purge) or other cleaning operation
-which will result in less space usage in the long run. We prefer the latter
-two types of allocation: when space is scarce, FSP_NORMAL allocations
-will not succeed, but the latter two allocations will succeed, if possible.
-The purpose is to avoid dead end where the database is full but the
-user cannot free any space because these freeing operations temporarily
-reserve some space.
-
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
-fsp_reserve_free_extents(
-/*=====================*/
- ulint* n_reserved,/*!< out: number of extents actually reserved; if we
- return TRUE and the tablespace size is < 64 pages,
- then this can be 0, otherwise it is n_ext */
- ulint space, /*!< in: space id */
- ulint n_ext, /*!< in: number of extents to reserve */
- ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr) /*!< in: mtr */
-{
- fsp_header_t* space_header;
- rw_lock_t* latch;
- ulint n_free_list_ext;
- ulint free_limit;
- ulint size;
- ulint flags;
- ulint zip_size;
- ulint n_free;
- ulint n_free_up;
- ulint reserve;
- ibool success;
- ulint n_pages_added;
-
- ut_ad(mtr);
- *n_reserved = n_ext;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
-
- mtr_x_lock(latch, mtr);
-
- space_header = fsp_get_space_header(space, zip_size, mtr);
-try_again:
- size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- if (size < FSP_EXTENT_SIZE / 2) {
- /* Use different rules for small single-table tablespaces */
- *n_reserved = 0;
- return(fsp_reserve_free_pages(space, space_header, size, mtr));
- }
-
- n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
-
- free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
- MLOG_4BYTES, mtr);
-
- /* Below we play safe when counting free extents above the free limit:
- some of them will contain extent descriptor pages, and therefore
- will not be free extents */
-
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
-
- if (n_free_up > 0) {
- n_free_up--;
- if (!zip_size) {
- n_free_up -= n_free_up
- / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
- } else {
- n_free_up -= n_free_up
- / (zip_size / FSP_EXTENT_SIZE);
- }
- }
-
- n_free = n_free_list_ext + n_free_up;
-
- if (alloc_type == FSP_NORMAL) {
- /* We reserve 1 extent + 0.5 % of the space size to undo logs
- and 1 extent + 0.5 % to cleaning operations; NOTE: this source
- code is duplicated in the function below! */
-
- reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
-
- if (n_free <= reserve + n_ext) {
-
- goto try_to_extend;
- }
- } else if (alloc_type == FSP_UNDO) {
- /* We reserve 0.5 % of the space size to cleaning operations */
-
- reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200;
-
- if (n_free <= reserve + n_ext) {
-
- goto try_to_extend;
- }
- } else {
- ut_a(alloc_type == FSP_CLEANING);
- }
-
- success = fil_space_reserve_free_extents(space, n_free, n_ext);
-
- if (success) {
- return(TRUE);
- }
-try_to_extend:
- success = fsp_try_extend_data_file(&n_pages_added, space,
- space_header, mtr);
- if (success && n_pages_added > 0) {
-
- goto try_again;
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return available space in kB */
-UNIV_INTERN
-ullint
-fsp_get_available_space_in_free_extents(
-/*====================================*/
- ulint space) /*!< in: space id */
-{
- fsp_header_t* space_header;
- ulint n_free_list_ext;
- ulint free_limit;
- ulint size;
- ulint flags;
- ulint zip_size;
- ulint n_free;
- ulint n_free_up;
- ulint reserve;
- rw_lock_t* latch;
- mtr_t mtr;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- mtr_start(&mtr);
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- mtr_x_lock(latch, &mtr);
-
- space_header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr);
-
- free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- if (size < FSP_EXTENT_SIZE) {
- ut_a(space != 0); /* This must be a single-table
- tablespace */
-
- return(0); /* TODO: count free frag pages and
- return a value based on that */
- }
-
- /* Below we play safe when counting free extents above the free limit:
- some of them will contain extent descriptor pages, and therefore
- will not be free extents */
-
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
-
- if (n_free_up > 0) {
- n_free_up--;
- if (!zip_size) {
- n_free_up -= n_free_up
- / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
- } else {
- n_free_up -= n_free_up
- / (zip_size / FSP_EXTENT_SIZE);
- }
- }
-
- n_free = n_free_list_ext + n_free_up;
-
- /* We reserve 1 extent + 0.5 % of the space size to undo logs
- and 1 extent + 0.5 % to cleaning operations; NOTE: this source
- code is duplicated in the function above! */
-
- reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
-
- if (reserve > n_free) {
- return(0);
- }
-
- if (!zip_size) {
- return((ullint) (n_free - reserve)
- * FSP_EXTENT_SIZE
- * (UNIV_PAGE_SIZE / 1024));
- } else {
- return((ullint) (n_free - reserve)
- * FSP_EXTENT_SIZE
- * (zip_size / 1024));
- }
-}
-
-/********************************************************************//**
-Marks a page used. The page must reside within the extents of the given
-segment. */
-static
-void
-fseg_mark_page_used(
-/*================*/
- fseg_inode_t* seg_inode,/*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in: mtr */
-{
- xdes_t* descr;
- ulint not_full_n_used;
-
- ut_ad(seg_inode && mtr);
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
-
- descr = xdes_get_descriptor(space, zip_size, page, mtr);
-
- ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr)
- == mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
-
- if (xdes_is_free(descr, mtr)) {
- /* We move the extent from the free list to the
- NOT_FULL list */
- flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE,
- mtr);
- flst_add_last(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- }
-
- ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
- == TRUE);
- /* We mark the page as used */
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
-
- not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- not_full_n_used++;
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used,
- MLOG_4BYTES, mtr);
- if (xdes_is_full(descr, mtr)) {
- /* We move the extent from the NOT_FULL list to the
- FULL list */
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- flst_add_last(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
-
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - FSP_EXTENT_SIZE,
- MLOG_4BYTES, mtr);
- }
-}
-
-/**********************************************************************//**
-Frees a single page of a segment. */
-static
-void
-fseg_free_page_low(
-/*===============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- xdes_t* descr;
- ulint not_full_n_used;
- ulint state;
- dulint descr_id;
- dulint seg_id;
- ulint i;
-
- ut_ad(seg_inode && mtr);
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
-
- /* Drop search system page hash index if the page is found in
- the pool and is hashed */
-
- btr_search_drop_page_hash_when_freed(space, zip_size, page);
-
- descr = xdes_get_descriptor(space, zip_size, page, mtr);
-
- ut_a(descr);
- if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
- fputs("InnoDB: Dump of the tablespace extent descriptor: ",
- stderr);
- ut_print_buf(stderr, descr, 40);
-
- fprintf(stderr, "\n"
- "InnoDB: Serious error! InnoDB is trying to"
- " free page %lu\n"
- "InnoDB: though it is already marked as free"
- " in the tablespace!\n"
- "InnoDB: The tablespace free space info is corrupt.\n"
- "InnoDB: You may need to dump your"
- " InnoDB tables and recreate the whole\n"
- "InnoDB: database!\n", (ulong) page);
-crash:
- fputs("InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "forcing-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
- ut_error;
- }
-
- state = xdes_get_state(descr, mtr);
-
- if (state != XDES_FSEG) {
- /* The page is in the fragment pages of the segment */
-
- for (i = 0;; i++) {
- if (fseg_get_nth_frag_page_no(seg_inode, i, mtr)
- == page) {
-
- fseg_set_nth_frag_page_no(seg_inode, i,
- FIL_NULL, mtr);
- break;
- }
- }
-
- fsp_free_page(space, zip_size, page, mtr);
-
- return;
- }
-
- /* If we get here, the page is in some extent of the segment */
-
- descr_id = mtr_read_dulint(descr + XDES_ID, mtr);
- seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
-#if 0
- fprintf(stderr,
- "InnoDB: InnoDB is freeing space %lu page %lu,\n"
- "InnoDB: which belongs to descr seg %lu %lu\n"
- "InnoDB: segment %lu %lu.\n",
- (ulong) space, (ulong) page,
- (ulong) ut_dulint_get_high(descr_id),
- (ulong) ut_dulint_get_low(descr_id),
- (ulong) ut_dulint_get_high(seg_id),
- (ulong) ut_dulint_get_low(seg_id));
-#endif /* 0 */
- if (0 != ut_dulint_cmp(descr_id, seg_id)) {
- fputs("InnoDB: Dump of the tablespace extent descriptor: ",
- stderr);
- ut_print_buf(stderr, descr, 40);
- fputs("\nInnoDB: Dump of the segment inode: ", stderr);
- ut_print_buf(stderr, seg_inode, 40);
- putc('\n', stderr);
-
- fprintf(stderr,
- "InnoDB: Serious error: InnoDB is trying to"
- " free space %lu page %lu,\n"
- "InnoDB: which does not belong to"
- " segment %lu %lu but belongs\n"
- "InnoDB: to segment %lu %lu.\n",
- (ulong) space, (ulong) page,
- (ulong) ut_dulint_get_high(descr_id),
- (ulong) ut_dulint_get_low(descr_id),
- (ulong) ut_dulint_get_high(seg_id),
- (ulong) ut_dulint_get_low(seg_id));
- goto crash;
- }
-
- not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
- flst_add_last(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used + FSP_EXTENT_SIZE - 1,
- MLOG_4BYTES, mtr);
- } else {
- ut_a(not_full_n_used > 0);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - 1, MLOG_4BYTES, mtr);
- }
-
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
- xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
-
- if (xdes_is_free(descr, mtr)) {
- /* The extent has become free: free it to space */
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- fsp_free_extent(space, zip_size, page, mtr);
- }
-}
-
-/**********************************************************************//**
-Frees a single page of a segment. */
-UNIV_INTERN
-void
-fseg_free_page(
-/*===========*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint flags;
- ulint zip_size;
- fseg_inode_t* seg_inode;
- rw_lock_t* latch;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
-
- mtr_x_lock(latch, mtr);
-
- seg_inode = fseg_inode_get(seg_header, space, zip_size, mtr);
-
- fseg_free_page_low(seg_inode, space, zip_size, page, mtr);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- buf_page_set_file_page_was_freed(space, page);
-#endif
-}
-
-/**********************************************************************//**
-Frees an extent of a segment to the space free list. */
-static
-void
-fseg_free_extent(
-/*=============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: a page in the extent */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- ulint first_page_in_extent;
- xdes_t* descr;
- ulint not_full_n_used;
- ulint descr_n_used;
- ulint i;
-
- ut_ad(seg_inode && mtr);
-
- descr = xdes_get_descriptor(space, zip_size, page, mtr);
-
- ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
- ut_a(0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, mtr),
- mtr_read_dulint(seg_inode + FSEG_ID, mtr)));
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
-
- first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
-
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
- if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
-
- /* Drop search system page hash index if the page is
- found in the pool and is hashed */
-
- btr_search_drop_page_hash_when_freed(
- space, zip_size, first_page_in_extent + i);
- }
- }
-
- if (xdes_is_full(descr, mtr)) {
- flst_remove(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
- } else if (xdes_is_free(descr, mtr)) {
- flst_remove(seg_inode + FSEG_FREE,
- descr + XDES_FLST_NODE, mtr);
- } else {
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
-
- not_full_n_used = mtr_read_ulint(
- seg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr);
-
- descr_n_used = xdes_get_n_used(descr, mtr);
- ut_a(not_full_n_used >= descr_n_used);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - descr_n_used,
- MLOG_4BYTES, mtr);
- }
-
- fsp_free_extent(space, zip_size, page, mtr);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
-
- buf_page_set_file_page_was_freed(space,
- first_page_in_extent + i);
- }
-#endif
-}
-
-/**********************************************************************//**
-Frees part of a segment. This function can be used to free a segment by
-repeatedly calling this function in different mini-transactions. Doing
-the freeing in a single mini-transaction might result in too big a
-mini-transaction.
-@return TRUE if freeing completed */
-UNIV_INTERN
-ibool
-fseg_free_step(
-/*===========*/
- fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
- resides on the first page of the frag list
- of the segment, this pointer becomes obsolete
- after the last freeing step */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint n;
- ulint page;
- xdes_t* descr;
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- ulint header_page;
- rw_lock_t* latch;
-
- space = page_get_space_id(page_align(header));
- header_page = page_get_page_no(page_align(header));
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
-
- mtr_x_lock(latch, mtr);
-
- descr = xdes_get_descriptor(space, zip_size, header_page, mtr);
-
- /* Check that the header resides on a page which has not been
- freed yet */
-
- ut_a(descr);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT,
- header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
- inode = fseg_inode_try_get(header, space, zip_size, mtr);
-
- if (UNIV_UNLIKELY(inode == NULL)) {
- fprintf(stderr, "double free of inode from %u:%u\n",
- (unsigned) space, (unsigned) header_page);
- return(TRUE);
- }
-
- descr = fseg_get_first_extent(inode, space, zip_size, mtr);
-
- if (descr != NULL) {
- /* Free the extent held by the segment */
- page = xdes_get_offset(descr);
-
- fseg_free_extent(inode, space, zip_size, page, mtr);
-
- return(FALSE);
- }
-
- /* Free a frag page */
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- /* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, zip_size, inode, mtr);
-
- return(TRUE);
- }
-
- fseg_free_page_low(inode, space, zip_size,
- fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
-
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- /* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, zip_size, inode, mtr);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed.
-@return TRUE if freeing completed, except the header page */
-UNIV_INTERN
-ibool
-fseg_free_step_not_header(
-/*======================*/
- fseg_header_t* header, /*!< in: segment header which must reside on
- the first fragment page of the segment */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint n;
- ulint page;
- xdes_t* descr;
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- ulint page_no;
- rw_lock_t* latch;
-
- space = page_get_space_id(page_align(header));
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, latch, MTR_MEMO_X_LOCK));
-
- mtr_x_lock(latch, mtr);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- descr = fseg_get_first_extent(inode, space, zip_size, mtr);
-
- if (descr != NULL) {
- /* Free the extent held by the segment */
- page = xdes_get_offset(descr);
-
- fseg_free_extent(inode, space, zip_size, page, mtr);
-
- return(FALSE);
- }
-
- /* Free a frag page */
-
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- ut_error;
- }
-
- page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
-
- if (page_no == page_get_page_no(page_align(header))) {
-
- return(TRUE);
- }
-
- fseg_free_page_low(inode, space, zip_size, page_no, mtr);
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE.
-@return the first extent descriptor, or NULL if none */
-static
-xdes_t*
-fseg_get_first_extent(
-/*==================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in: mtr */
-{
- fil_addr_t first;
- xdes_t* descr;
-
- ut_ad(inode && mtr);
-
- ut_ad(space == page_get_space_id(page_align(inode)));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- first = fil_addr_null;
-
- if (flst_get_len(inode + FSEG_FULL, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_FULL, mtr);
-
- } else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_NOT_FULL, mtr);
-
- } else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_FREE, mtr);
- }
-
- if (first.page == FIL_NULL) {
-
- return(NULL);
- }
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
-
- return(descr);
-}
-
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-static
-ibool
-fseg_validate_low(
-/*==============*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr2) /*!< in: mtr */
-{
- ulint space;
- dulint seg_id;
- mtr_t mtr;
- xdes_t* descr;
- fil_addr_t node_addr;
- ulint n_used = 0;
- ulint n_used2 = 0;
-
- ut_ad(mtr_memo_contains_page(mtr2, inode, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- space = page_get_space_id(page_align(inode));
-
- seg_id = mtr_read_dulint(inode + FSEG_ID, mtr2);
- n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr2);
- flst_validate(inode + FSEG_FREE, mtr2);
- flst_validate(inode + FSEG_NOT_FULL, mtr2);
- flst_validate(inode + FSEG_FULL, mtr2);
-
- /* Validate FSEG_FREE list */
- node_addr = flst_get_first(inode + FSEG_FREE, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == 0);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr),
- seg_id));
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSEG_NOT_FULL list */
-
- node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) > 0);
- ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr),
- seg_id));
-
- n_used2 += xdes_get_n_used(descr, &mtr);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSEG_FULL list */
-
- node_addr = flst_get_first(inode + FSEG_FULL, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr),
- seg_id));
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- ut_a(n_used == n_used2);
-
- return(TRUE);
-}
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fseg_validate(
-/*==========*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr) /*!< in: mtr */
-{
- fseg_inode_t* inode;
- ibool ret;
- ulint space;
- ulint flags;
- ulint zip_size;
-
- space = page_get_space_id(page_align(header));
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- ret = fseg_validate_low(inode, mtr);
-
- return(ret);
-}
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Writes info of a segment. */
-static
-void
-fseg_print_low(
-/*===========*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint seg_id_low;
- ulint seg_id_high;
- ulint n_used;
- ulint n_frag;
- ulint n_free;
- ulint n_not_full;
- ulint n_full;
- ulint reserved;
- ulint used;
- ulint page_no;
- dulint d_var;
-
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
- space = page_get_space_id(page_align(inode));
- page_no = page_get_page_no(page_align(inode));
-
- reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
-
- d_var = mtr_read_dulint(inode + FSEG_ID, mtr);
-
- seg_id_low = ut_dulint_get_low(d_var);
- seg_id_high = ut_dulint_get_high(d_var);
-
- n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- n_frag = fseg_get_n_frag_pages(inode, mtr);
- n_free = flst_get_len(inode + FSEG_FREE, mtr);
- n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr);
- n_full = flst_get_len(inode + FSEG_FULL, mtr);
-
- fprintf(stderr,
- "SEGMENT id %lu %lu space %lu; page %lu;"
- " res %lu used %lu; full ext %lu\n"
- "fragm pages %lu; free extents %lu;"
- " not full extents %lu: pages %lu\n",
- (ulong) seg_id_high, (ulong) seg_id_low,
- (ulong) space, (ulong) page_no,
- (ulong) reserved, (ulong) used, (ulong) n_full,
- (ulong) n_frag, (ulong) n_free, (ulong) n_not_full,
- (ulong) n_used);
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-}
-
-#ifdef UNIV_BTR_PRINT
-/*******************************************************************//**
-Writes info of a segment. */
-UNIV_INTERN
-void
-fseg_print(
-/*=======*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr) /*!< in: mtr */
-{
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
-
- space = page_get_space_id(page_align(header));
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- fseg_print_low(inode, mtr);
-}
-#endif /* UNIV_BTR_PRINT */
-
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
- ulint space) /*!< in: space id */
-{
- fsp_header_t* header;
- fseg_inode_t* seg_inode;
- page_t* seg_inode_page;
- rw_lock_t* latch;
- ulint size;
- ulint flags;
- ulint zip_size;
- ulint free_limit;
- ulint frag_n_used;
- mtr_t mtr;
- mtr_t mtr2;
- xdes_t* descr;
- fil_addr_t node_addr;
- fil_addr_t next_node_addr;
- ulint descr_count = 0;
- ulint n_used = 0;
- ulint n_used2 = 0;
- ulint n_full_frag_pages;
- ulint n;
- ulint seg_inode_len_free;
- ulint seg_inode_len_full;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
- ut_a(ut_is_2pow(zip_size));
- ut_a(zip_size <= UNIV_PAGE_SIZE);
- ut_a(!zip_size || zip_size >= PAGE_ZIP_MIN_SIZE);
-
- /* Start first a mini-transaction mtr2 to lock out all other threads
- from the fsp system */
- mtr_start(&mtr2);
- mtr_x_lock(latch, &mtr2);
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
- free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
- MLOG_4BYTES, &mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
- MLOG_4BYTES, &mtr);
-
- n_full_frag_pages = FSP_EXTENT_SIZE
- * flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
- if (UNIV_UNLIKELY(free_limit > size)) {
-
- ut_a(space != 0);
- ut_a(size < FSP_EXTENT_SIZE);
- }
-
- flst_validate(header + FSP_FREE, &mtr);
- flst_validate(header + FSP_FREE_FRAG, &mtr);
- flst_validate(header + FSP_FULL_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- /* Validate FSP_FREE list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == 0);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSP_FREE_FRAG list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) > 0);
- ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);
-
- n_used += xdes_get_n_used(descr, &mtr);
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
-
- mtr_commit(&mtr);
- }
-
- /* Validate FSP_FULL_FRAG list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate segments */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
- seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
- do {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- ut_a(!ut_dulint_is_zero(
- mach_read_from_8(seg_inode + FSEG_ID)));
- fseg_validate_low(seg_inode, &mtr);
-
- descr_count += flst_get_len(seg_inode + FSEG_FREE,
- &mtr);
- descr_count += flst_get_len(seg_inode + FSEG_FULL,
- &mtr);
- descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL,
- &mtr);
-
- n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
- seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- if (!ut_dulint_is_zero(
- mach_read_from_8(seg_inode + FSEG_ID))) {
- fseg_validate_low(seg_inode, &mtr);
-
- descr_count += flst_get_len(
- seg_inode + FSEG_FREE, &mtr);
- descr_count += flst_get_len(
- seg_inode + FSEG_FULL, &mtr);
- descr_count += flst_get_len(
- seg_inode + FSEG_NOT_FULL, &mtr);
- n_used2 += fseg_get_n_frag_pages(
- seg_inode, &mtr);
- }
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
- if (!zip_size) {
- ut_a(n_used + n_full_frag_pages
- == n_used2 + 2 * ((free_limit + (UNIV_PAGE_SIZE - 1))
- / UNIV_PAGE_SIZE)
- + seg_inode_len_full + seg_inode_len_free);
- } else {
- ut_a(n_used + n_full_frag_pages
- == n_used2 + 2 * ((free_limit + (zip_size - 1))
- / zip_size)
- + seg_inode_len_full + seg_inode_len_free);
- }
- ut_a(frag_n_used == n_used);
-
- mtr_commit(&mtr2);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
- ulint space) /*!< in: space id */
-{
- fsp_header_t* header;
- fseg_inode_t* seg_inode;
- page_t* seg_inode_page;
- rw_lock_t* latch;
- ulint flags;
- ulint zip_size;
- ulint size;
- ulint free_limit;
- ulint frag_n_used;
- fil_addr_t node_addr;
- fil_addr_t next_node_addr;
- ulint n_free;
- ulint n_free_frag;
- ulint n_full_frag;
- ulint seg_id_low;
- ulint seg_id_high;
- ulint n;
- ulint n_segs = 0;
- dulint d_var;
- mtr_t mtr;
- mtr_t mtr2;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- /* Start first a mini-transaction mtr2 to lock out all other threads
- from the fsp system */
-
- mtr_start(&mtr2);
-
- mtr_x_lock(latch, &mtr2);
-
- mtr_start(&mtr);
-
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES,
- &mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- &mtr);
- n_free = flst_get_len(header + FSP_FREE, &mtr);
- n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
- n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
- d_var = mtr_read_dulint(header + FSP_SEG_ID, &mtr);
-
- seg_id_low = ut_dulint_get_low(d_var);
- seg_id_high = ut_dulint_get_high(d_var);
-
- fprintf(stderr,
- "FILE SPACE INFO: id %lu\n"
- "size %lu, free limit %lu, free extents %lu\n"
- "not full frag extents %lu: used pages %lu,"
- " full frag extents %lu\n"
- "first seg id not used %lu %lu\n",
- (ulong) space,
- (ulong) size, (ulong) free_limit, (ulong) n_free,
- (ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag,
- (ulong) seg_id_high, (ulong) seg_id_low);
-
- mtr_commit(&mtr);
-
- /* Print segments */
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- ut_a(!ut_dulint_is_zero(
- mach_read_from_8(seg_inode + FSEG_ID)));
- fseg_print_low(seg_inode, &mtr);
-
- n_segs++;
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- if (!ut_dulint_is_zero(
- mach_read_from_8(seg_inode + FSEG_ID))) {
-
- fseg_print_low(seg_inode, &mtr);
- n_segs++;
- }
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_commit(&mtr2);
-
- fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/fut/fut0fut.c b/storage/innodb_plugin/fut/fut0fut.c
deleted file mode 100644
index 20b45a575e6..00000000000
--- a/storage/innodb_plugin/fut/fut0fut.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fut/fut0fut.c
-File-based utilities
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0fut.h"
-
-#ifdef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
diff --git a/storage/innodb_plugin/fut/fut0lst.c b/storage/innodb_plugin/fut/fut0lst.c
deleted file mode 100644
index a1e21c22725..00000000000
--- a/storage/innodb_plugin/fut/fut0lst.c
+++ /dev/null
@@ -1,530 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fut/fut0lst.c
-File-based list utilities
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0lst.h"
-
-#ifdef UNIV_NONINL
-#include "fut0lst.ic"
-#endif
-
-#include "buf0buf.h"
-#include "page0page.h"
-
-/********************************************************************//**
-Adds a node to an empty list. */
-static
-void
-flst_add_to_empty(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of
- empty list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- ut_a(len == 0);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* Update first and last fields of base node */
- flst_write_addr(base + FLST_FIRST, node_addr, mtr);
- flst_write_addr(base + FLST_LAST, node_addr, mtr);
-
- /* Set prev and next fields of node to add */
- flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
- flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
-
- /* Update len of base node */
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Adds a node as the last node in a list. */
-UNIV_INTERN
-void
-flst_add_last(
-/*==========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
- fil_addr_t last_addr;
- flst_node_t* last_node;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- last_addr = flst_get_last(base, mtr);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* If the list is not empty, call flst_insert_after */
- if (len != 0) {
- if (last_addr.page == node_addr.page) {
- last_node = page_align(node) + last_addr.boffset;
- } else {
- ulint zip_size = fil_space_get_zip_size(space);
-
- last_node = fut_get_ptr(space, zip_size, last_addr,
- RW_X_LATCH, mtr);
- }
-
- flst_insert_after(base, last_node, node, mtr);
- } else {
- /* else call flst_add_to_empty */
- flst_add_to_empty(base, node, mtr);
- }
-}
-
-/********************************************************************//**
-Adds a node as the first node in a list. */
-UNIV_INTERN
-void
-flst_add_first(
-/*===========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
- fil_addr_t first_addr;
- flst_node_t* first_node;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- first_addr = flst_get_first(base, mtr);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* If the list is not empty, call flst_insert_before */
- if (len != 0) {
- if (first_addr.page == node_addr.page) {
- first_node = page_align(node) + first_addr.boffset;
- } else {
- ulint zip_size = fil_space_get_zip_size(space);
-
- first_node = fut_get_ptr(space, zip_size, first_addr,
- RW_X_LATCH, mtr);
- }
-
- flst_insert_before(base, node, first_node, mtr);
- } else {
- /* else call flst_add_to_empty */
- flst_add_to_empty(base, node, mtr);
- }
-}
-
-/********************************************************************//**
-Inserts a node after another in a list. */
-UNIV_INTERN
-void
-flst_insert_after(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node1, /*!< in: node to insert after */
- flst_node_t* node2, /*!< in: node to add */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- flst_node_t* node3;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node1 && node2 && base);
- ut_ad(base != node1);
- ut_ad(base != node2);
- ut_ad(node2 != node1);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-
- node3_addr = flst_get_next_addr(node1, mtr);
-
- /* Set prev and next fields of node2 */
- flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
- flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
-
- if (!fil_addr_is_null(node3_addr)) {
- /* Update prev field of node3 */
- ulint zip_size = fil_space_get_zip_size(space);
-
- node3 = fut_get_ptr(space, zip_size,
- node3_addr, RW_X_LATCH, mtr);
- flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
- } else {
- /* node1 was last in list: update last field in base */
- flst_write_addr(base + FLST_LAST, node2_addr, mtr);
- }
-
- /* Set next field of node1 */
- flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Inserts a node before another in a list. */
-UNIV_INTERN
-void
-flst_insert_before(
-/*===============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to insert */
- flst_node_t* node3, /*!< in: node to insert before */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- flst_node_t* node1;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node2 && node3 && base);
- ut_ad(base != node2);
- ut_ad(base != node3);
- ut_ad(node2 != node3);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
- buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
-
- node1_addr = flst_get_prev_addr(node3, mtr);
-
- /* Set prev and next fields of node2 */
- flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
- flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
-
- if (!fil_addr_is_null(node1_addr)) {
- ulint zip_size = fil_space_get_zip_size(space);
- /* Update next field of node1 */
- node1 = fut_get_ptr(space, zip_size, node1_addr,
- RW_X_LATCH, mtr);
- flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
- } else {
- /* node3 was first in list: update first field in base */
- flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
- }
-
- /* Set prev field of node3 */
- flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Removes a node. */
-UNIV_INTERN
-void
-flst_remove(
-/*========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to remove */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- ulint zip_size;
- flst_node_t* node1;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- flst_node_t* node3;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
- zip_size = fil_space_get_zip_size(space);
-
- node1_addr = flst_get_prev_addr(node2, mtr);
- node3_addr = flst_get_next_addr(node2, mtr);
-
- if (!fil_addr_is_null(node1_addr)) {
-
- /* Update next field of node1 */
-
- if (node1_addr.page == node2_addr.page) {
-
- node1 = page_align(node2) + node1_addr.boffset;
- } else {
- node1 = fut_get_ptr(space, zip_size,
- node1_addr, RW_X_LATCH, mtr);
- }
-
- ut_ad(node1 != node2);
-
- flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
- } else {
- /* node2 was first in list: update first field in base */
- flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
- }
-
- if (!fil_addr_is_null(node3_addr)) {
- /* Update prev field of node3 */
-
- if (node3_addr.page == node2_addr.page) {
-
- node3 = page_align(node2) + node3_addr.boffset;
- } else {
- node3 = fut_get_ptr(space, zip_size,
- node3_addr, RW_X_LATCH, mtr);
- }
-
- ut_ad(node2 != node3);
-
- flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
- } else {
- /* node2 was last in list: update last field in base */
- flst_write_addr(base + FLST_LAST, node1_addr, mtr);
- }
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- ut_ad(len > 0);
-
- mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Cuts off the tail of the list, including the node given. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-UNIV_INTERN
-void
-flst_cut_end(
-/*=========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: first node to remove */
- ulint n_nodes,/*!< in: number of nodes to remove,
- must be >= 1 */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- flst_node_t* node1;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- ulint len;
-
- ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
- ut_ad(n_nodes > 0);
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-
- node1_addr = flst_get_prev_addr(node2, mtr);
-
- if (!fil_addr_is_null(node1_addr)) {
-
- /* Update next field of node1 */
-
- if (node1_addr.page == node2_addr.page) {
-
- node1 = page_align(node2) + node1_addr.boffset;
- } else {
- node1 = fut_get_ptr(space,
- fil_space_get_zip_size(space),
- node1_addr, RW_X_LATCH, mtr);
- }
-
- flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
- } else {
- /* node2 was first in list: update the field in base */
- flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
- }
-
- flst_write_addr(base + FLST_LAST, node1_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- ut_ad(len >= n_nodes);
-
- mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Cuts off the tail of the list, not including the given node. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-UNIV_INTERN
-void
-flst_truncate_end(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: first node not to remove */
- ulint n_nodes,/*!< in: number of nodes to remove */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- fil_addr_t node2_addr;
- ulint len;
- ulint space;
-
- ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
- if (n_nodes == 0) {
-
- ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
-
- return;
- }
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-
- /* Update next field of node2 */
- flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr);
-
- flst_write_addr(base + FLST_LAST, node2_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- ut_ad(len >= n_nodes);
-
- mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Validates a file-based list.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-flst_validate(
-/*==========*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr1) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- const flst_node_t* node;
- fil_addr_t node_addr;
- fil_addr_t base_addr;
- ulint len;
- ulint i;
- mtr_t mtr2;
-
- ut_ad(base);
- ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
-
- /* We use two mini-transaction handles: the first is used to
- lock the base node, and prevent other threads from modifying the
- list. The second is used to traverse the list. We cannot run the
- second mtr without committing it at times, because if the list
- is long, then the x-locked pages could fill the buffer resulting
- in a deadlock. */
-
- /* Find out the space id */
- buf_ptr_get_fsp_addr(base, &space, &base_addr);
- zip_size = fil_space_get_zip_size(space);
-
- len = flst_get_len(base, mtr1);
- node_addr = flst_get_first(base, mtr1);
-
- for (i = 0; i < len; i++) {
- mtr_start(&mtr2);
-
- node = fut_get_ptr(space, zip_size,
- node_addr, RW_X_LATCH, &mtr2);
- node_addr = flst_get_next_addr(node, &mtr2);
-
- mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
- becoming full */
- }
-
- ut_a(fil_addr_is_null(node_addr));
-
- node_addr = flst_get_last(base, mtr1);
-
- for (i = 0; i < len; i++) {
- mtr_start(&mtr2);
-
- node = fut_get_ptr(space, zip_size,
- node_addr, RW_X_LATCH, &mtr2);
- node_addr = flst_get_prev_addr(node, &mtr2);
-
- mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
- becoming full */
- }
-
- ut_a(fil_addr_is_null(node_addr));
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Prints info of a file-based list. */
-UNIV_INTERN
-void
-flst_print(
-/*=======*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr) /*!< in: mtr */
-{
- const buf_frame_t* frame;
- ulint len;
-
- ut_ad(base && mtr);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- frame = page_align((byte*) base);
-
- len = flst_get_len(base, mtr);
-
- fprintf(stderr,
- "FILE-BASED LIST:\n"
- "Base node in space %lu page %lu byte offset %lu; len %lu\n",
- (ulong) page_get_space_id(frame),
- (ulong) page_get_page_no(frame),
- (ulong) page_offset(base), (ulong) len);
-}
diff --git a/storage/innodb_plugin/ha/ha0ha.c b/storage/innodb_plugin/ha/ha0ha.c
deleted file mode 100644
index cb5e541b55d..00000000000
--- a/storage/innodb_plugin/ha/ha0ha.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file ha/ha0ha.c
-The hash table with external chains
-
-Created 8/22/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ha0ha.h"
-#ifdef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-
-#ifdef UNIV_DEBUG
-# include "buf0buf.h"
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-# include "btr0sea.h"
-#endif /* UNIV_SYNC_DEBUG */
-#include "page0page.h"
-
-/*************************************************************//**
-Creates a hash table with at least n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
-hash_table_t*
-ha_create_func(
-/*===========*/
- ulint n, /*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
- ulint mutex_level, /*!< in: level of the mutexes in the latching
- order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes) /*!< in: number of mutexes to protect the
- hash table: must be a power of 2, or 0 */
-{
- hash_table_t* table;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
-
- ut_ad(ut_is_2pow(n_mutexes));
- table = hash_create(n);
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
- table->adaptive = TRUE;
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- /* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
- but in practise it never should in this case, hence the asserts. */
-
- if (n_mutexes == 0) {
- table->heap = mem_heap_create_in_btr_search(
- ut_min(4096, MEM_MAX_ALLOC_IN_BUF));
- ut_a(table->heap);
-
- return(table);
- }
-
-#ifndef UNIV_HOTBACKUP
- hash_create_mutexes(table, n_mutexes, mutex_level);
-
- table->heaps = mem_alloc(n_mutexes * sizeof(void*));
-
- for (i = 0; i < n_mutexes; i++) {
- table->heaps[i] = mem_heap_create_in_btr_search(4096);
- ut_a(table->heaps[i]);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- return(table);
-}
-
-/*************************************************************//**
-Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
-void
-ha_clear(
-/*=====*/
- hash_table_t* table) /*!< in, own: hash table */
-{
- ulint i;
- ulint n;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
- /* Free the memory heaps. */
- n = table->n_mutexes;
-
- for (i = 0; i < n; i++) {
- mem_heap_free(table->heaps[i]);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- /* Clear the hash table. */
- n = hash_get_n_cells(table);
-
- for (i = 0; i < n; i++) {
- hash_get_nth_cell(table, i)->node = NULL;
- }
-}
-
-/*************************************************************//**
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
-ibool
-ha_insert_for_fold_func(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of data; if a node with
- the same fold value already exists, it is
- updated to point to the same data, and no new
- node is created! */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* block, /*!< in: buffer block containing the data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- void* data) /*!< in: data, must not be NULL */
-{
- hash_cell_t* cell;
- ha_node_t* node;
- ha_node_t* prev_node;
- ulint hash;
-
- ut_ad(table && data);
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ut_a(block->frame == page_align(data));
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- ASSERT_HASH_MUTEX_OWN(table, fold);
-
- hash = hash_calc_hash(fold, table);
-
- cell = hash_get_nth_cell(table, hash);
-
- prev_node = cell->node;
-
- while (prev_node != NULL) {
- if (prev_node->fold == fold) {
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
- if (table->adaptive) {
- buf_block_t* prev_block = prev_node->block;
- ut_a(prev_block->frame
- == page_align(prev_node->data));
- ut_a(prev_block->n_pointers > 0);
- prev_block->n_pointers--;
- block->n_pointers++;
- }
-# endif /* !UNIV_HOTBACKUP */
-
- prev_node->block = block;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- prev_node->data = data;
-
- return(TRUE);
- }
-
- prev_node = prev_node->next;
- }
-
- /* We have to allocate a new chain node */
-
- node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
-
- if (node == NULL) {
- /* It was a btr search type memory heap and at the moment
- no more memory could be allocated: return */
-
- ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
-
- return(FALSE);
- }
-
- ha_node_set_data(node, block, data);
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
- if (table->adaptive) {
- block->n_pointers++;
- }
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
- node->fold = fold;
-
- node->next = NULL;
-
- prev_node = cell->node;
-
- if (prev_node == NULL) {
-
- cell->node = node;
-
- return(TRUE);
- }
-
- while (prev_node->next != NULL) {
-
- prev_node = prev_node->next;
- }
-
- prev_node->next = node;
-
- return(TRUE);
-}
-
-/***********************************************************//**
-Deletes a hash node. */
-UNIV_INTERN
-void
-ha_delete_hash_node(
-/*================*/
- hash_table_t* table, /*!< in: hash table */
- ha_node_t* del_node) /*!< in: node to be deleted */
-{
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
- if (table->adaptive) {
- ut_a(del_node->block->frame = page_align(del_node->data));
- ut_a(del_node->block->n_pointers > 0);
- del_node->block->n_pointers--;
- }
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
- HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
-}
-
-/*********************************************************//**
-Looks for an element when we know the pointer to the data, and updates
-the pointer to data, if found. */
-UNIV_INTERN
-void
-ha_search_and_update_if_found_func(
-/*===============================*/
- hash_table_t* table, /*!< in/out: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- void* data, /*!< in: pointer to the data */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* new_block,/*!< in: block containing new_data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- void* new_data)/*!< in: new pointer to the data */
-{
- ha_node_t* node;
-
- ASSERT_HASH_MUTEX_OWN(table, fold);
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ut_a(new_block->frame == page_align(new_data));
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
- node = ha_search_with_data(table, fold, data);
-
- if (node) {
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
- if (table->adaptive) {
- ut_a(node->block->n_pointers > 0);
- node->block->n_pointers--;
- new_block->n_pointers++;
- }
-# endif /* !UNIV_HOTBACKUP */
-
- node->block = new_block;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- node->data = new_data;
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Removes from the chain determined by fold all nodes whose data pointer
-points to the page given. */
-UNIV_INTERN
-void
-ha_remove_all_nodes_to_page(
-/*========================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: fold value */
- const page_t* page) /*!< in: buffer page */
-{
- ha_node_t* node;
-
- ASSERT_HASH_MUTEX_OWN(table, fold);
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (page_align(ha_node_get_data(node)) == page) {
-
- /* Remove the hash node */
-
- ha_delete_hash_node(table, node);
-
- /* Start again from the first node in the chain
- because the deletion may compact the heap of
- nodes and move other nodes! */
-
- node = ha_chain_get_first(table, fold);
- } else {
- node = ha_chain_get_next(node);
- }
- }
-#ifdef UNIV_DEBUG
- /* Check that all nodes really got deleted */
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- ut_a(page_align(ha_node_get_data(node)) != page);
-
- node = ha_chain_get_next(node);
- }
-#endif
-}
-
-/*************************************************************//**
-Validates a given range of the cells in hash table.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-ha_validate(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint start_index, /*!< in: start index */
- ulint end_index) /*!< in: end index */
-{
- hash_cell_t* cell;
- ha_node_t* node;
- ibool ok = TRUE;
- ulint i;
-
- ut_a(start_index <= end_index);
- ut_a(start_index < hash_get_n_cells(table));
- ut_a(end_index < hash_get_n_cells(table));
-
- for (i = start_index; i <= end_index; i++) {
-
- cell = hash_get_nth_cell(table, i);
-
- node = cell->node;
-
- while (node) {
- if (hash_calc_hash(node->fold, table) != i) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Error: hash table node"
- " fold value %lu does not\n"
- "InnoDB: match the cell number %lu.\n",
- (ulong) node->fold, (ulong) i);
-
- ok = FALSE;
- }
-
- node = node->next;
- }
- }
-
- return(ok);
-}
-
-/*************************************************************//**
-Prints info of a hash table. */
-UNIV_INTERN
-void
-ha_print_info(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- hash_table_t* table) /*!< in: hash table */
-{
-#ifdef UNIV_DEBUG
-/* Some of the code here is disabled for performance reasons in production
-builds, see http://bugs.mysql.com/36941 */
-#define PRINT_USED_CELLS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_USED_CELLS
- hash_cell_t* cell;
- ulint cells = 0;
- ulint i;
-#endif /* PRINT_USED_CELLS */
- ulint n_bufs;
-
-#ifdef PRINT_USED_CELLS
- for (i = 0; i < hash_get_n_cells(table); i++) {
-
- cell = hash_get_nth_cell(table, i);
-
- if (cell->node) {
-
- cells++;
- }
- }
-#endif /* PRINT_USED_CELLS */
-
- fprintf(file, "Hash table size %lu",
- (ulong) hash_get_n_cells(table));
-
-#ifdef PRINT_USED_CELLS
- fprintf(file, ", used cells %lu", (ulong) cells);
-#endif /* PRINT_USED_CELLS */
-
- if (table->heaps == NULL && table->heap != NULL) {
-
- /* This calculation is intended for the adaptive hash
- index: how many buffer frames we have reserved? */
-
- n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
-
- if (table->heap->free_block) {
- n_bufs++;
- }
-
- fprintf(file, ", node heap has %lu buffer(s)\n",
- (ulong) n_bufs);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/ha/hash0hash.c b/storage/innodb_plugin/ha/hash0hash.c
deleted file mode 100644
index 2800d7793f8..00000000000
--- a/storage/innodb_plugin/ha/hash0hash.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file ha/hash0hash.c
-The simple hash table utility
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "hash0hash.h"
-#ifdef UNIV_NONINL
-#include "hash0hash.ic"
-#endif
-
-#include "mem0mem.h"
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_enter(
-/*=============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- mutex_enter(hash_get_mutex(table, fold));
-}
-
-/************************************************************//**
-Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit(
-/*============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- mutex_exit(hash_get_mutex(table, fold));
-}
-
-/************************************************************//**
-Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_mutex_enter_all(
-/*=================*/
- hash_table_t* table) /*!< in: hash table */
-{
- ulint i;
-
- for (i = 0; i < table->n_mutexes; i++) {
-
- mutex_enter(table->mutexes + i);
- }
-}
-
-/************************************************************//**
-Releases all the mutexes of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all(
-/*================*/
- hash_table_t* table) /*!< in: hash table */
-{
- ulint i;
-
- for (i = 0; i < table->n_mutexes; i++) {
-
- mutex_exit(table->mutexes + i);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
-hash_table_t*
-hash_create(
-/*========*/
- ulint n) /*!< in: number of array cells */
-{
- hash_cell_t* array;
- ulint prime;
- hash_table_t* table;
-
- prime = ut_find_prime(n);
-
- table = mem_alloc(sizeof(hash_table_t));
-
- array = ut_malloc(sizeof(hash_cell_t) * prime);
-
- table->array = array;
- table->n_cells = prime;
-#ifndef UNIV_HOTBACKUP
-# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- table->adaptive = FALSE;
-# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- table->n_mutexes = 0;
- table->mutexes = NULL;
- table->heaps = NULL;
-#endif /* !UNIV_HOTBACKUP */
- table->heap = NULL;
- table->magic_n = HASH_TABLE_MAGIC_N;
-
- /* Initialize the cell array */
- hash_table_clear(table);
-
- return(table);
-}
-
-/*************************************************************//**
-Frees a hash table. */
-UNIV_INTERN
-void
-hash_table_free(
-/*============*/
- hash_table_t* table) /*!< in, own: hash table */
-{
-#ifndef UNIV_HOTBACKUP
- ut_a(table->mutexes == NULL);
-#endif /* !UNIV_HOTBACKUP */
-
- ut_free(table->array);
- mem_free(table);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Creates a mutex array to protect a hash table. */
-UNIV_INTERN
-void
-hash_create_mutexes_func(
-/*=====================*/
- hash_table_t* table, /*!< in: hash table */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /*!< in: latching order level of the
- mutexes: used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes) /*!< in: number of mutexes, must be a
- power of 2 */
-{
- ulint i;
-
- ut_a(n_mutexes > 0);
- ut_a(ut_is_2pow(n_mutexes));
-
- table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
-
- for (i = 0; i < n_mutexes; i++) {
- mutex_create(table->mutexes + i, sync_level);
- }
-
- table->n_mutexes = n_mutexes;
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
deleted file mode 100644
index 0dc845f83e0..00000000000
--- a/storage/innodb_plugin/handler/ha_innodb.cc
+++ /dev/null
@@ -1,10420 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, 2009 Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
-
-/* TODO list for the InnoDB handler in 5.0:
- - Remove the flag trx->active_trans and look at trx->conc_state
- - fix savepoint functions to use savepoint storage area
- - Find out what kind of problems the OS X case-insensitivity causes to
- table and database names; should we 'normalize' the names like we do
- in Windows?
-*/
-
-#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
-#endif
-
-#include <mysql_priv.h>
-
-#include <m_ctype.h>
-#include <mysys_err.h>
-#include <mysql/plugin.h>
-
-/** @file ha_innodb.cc */
-
-/* Include necessary InnoDB headers */
-extern "C" {
-#include "univ.i"
-#include "buf0lru.h"
-#include "btr0sea.h"
-#include "os0file.h"
-#include "os0thread.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "trx0roll.h"
-#include "trx0trx.h"
-#include "trx0sys.h"
-#include "mtr0mtr.h"
-#include "row0ins.h"
-#include "row0mysql.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "log0log.h"
-#include "lock0lock.h"
-#include "dict0crea.h"
-#include "btr0cur.h"
-#include "btr0btr.h"
-#include "fsp0fsp.h"
-#include "sync0sync.h"
-#include "fil0fil.h"
-#include "trx0xa.h"
-#include "row0merge.h"
-#include "thr0loc.h"
-#include "dict0boot.h"
-#include "ha_prototypes.h"
-#include "ut0mem.h"
-#include "ibuf0ibuf.h"
-}
-
-#include "ha_innodb.h"
-#include "i_s.h"
-
-#ifndef MYSQL_SERVER
-# ifndef MYSQL_PLUGIN_IMPORT
-# define MYSQL_PLUGIN_IMPORT /* nothing */
-# endif /* MYSQL_PLUGIN_IMPORT */
-/* This is needed because of Bug #3596. Let us hope that pthread_mutex_t
-is defined the same in both builds: the MySQL server and the InnoDB plugin. */
-extern MYSQL_PLUGIN_IMPORT pthread_mutex_t LOCK_thread_count;
-
-#if MYSQL_VERSION_ID < 50124
-/* this is defined in mysql_priv.h inside #ifdef MYSQL_SERVER
-but we need it here */
-bool check_global_access(THD *thd, ulong want_access);
-#endif /* MYSQL_VERSION_ID < 50124 */
-#endif /* MYSQL_SERVER */
-
-/** to protect innobase_open_files */
-static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
-static ulong commit_threads = 0;
-static pthread_mutex_t commit_threads_m;
-static pthread_cond_t commit_cond;
-static pthread_mutex_t commit_cond_m;
-static pthread_mutex_t analyze_mutex;
-static bool innodb_inited = 0;
-
-#define INSIDE_HA_INNOBASE_CC
-
-/* In the Windows plugin, the return value of current_thd is
-undefined. Map it to NULL. */
-
-#define EQ_CURRENT_THD(thd) ((thd) == current_thd)
-
-
-static struct handlerton* innodb_hton_ptr;
-
-static const long AUTOINC_OLD_STYLE_LOCKING = 0;
-static const long AUTOINC_NEW_STYLE_LOCKING = 1;
-static const long AUTOINC_NO_LOCKING = 2;
-
-static long innobase_mirrored_log_groups, innobase_log_files_in_group,
- innobase_log_buffer_size,
- innobase_additional_mem_pool_size, innobase_file_io_threads,
- innobase_force_recovery, innobase_open_files,
- innobase_autoinc_lock_mode;
-static ulong innobase_commit_concurrency = 0;
-static ulong innobase_read_io_threads;
-static ulong innobase_write_io_threads;
-
-static long long innobase_buffer_pool_size, innobase_log_file_size;
-
-/** Percentage of the buffer pool to reserve for 'old' blocks.
-Connected to buf_LRU_old_ratio. */
-static uint innobase_old_blocks_pct;
-
-/* The default values for the following char* start-up parameters
-are determined in innobase_init below: */
-
-static char* innobase_data_home_dir = NULL;
-static char* innobase_data_file_path = NULL;
-static char* innobase_log_group_home_dir = NULL;
-static char* innobase_file_format_name = NULL;
-static char* innobase_change_buffering = NULL;
-
-/* Note: This variable can be set to on/off and any of the supported
-file formats in the configuration file, but can only be set to any
-of the supported file formats during runtime. */
-static char* innobase_file_format_check = NULL;
-
-static char* innobase_file_flush_method = NULL;
-
-/* Below we have boolean-valued start-up parameters, and their default
-values */
-
-static ulong innobase_fast_shutdown = 1;
-#ifdef UNIV_LOG_ARCHIVE
-static my_bool innobase_log_archive = FALSE;
-static char* innobase_log_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
-static my_bool innobase_use_doublewrite = TRUE;
-static my_bool innobase_use_checksums = TRUE;
-static my_bool innobase_locks_unsafe_for_binlog = FALSE;
-static my_bool innobase_rollback_on_timeout = FALSE;
-static my_bool innobase_create_status_file = FALSE;
-static my_bool innobase_stats_on_metadata = TRUE;
-
-static char* internal_innobase_data_file_path = NULL;
-
-static char* innodb_version_str = (char*) INNODB_VERSION_STR;
-
-/* The following counter is used to convey information to InnoDB
-about server activity: in selects it is not sensible to call
-srv_active_wake_master_thread after each fetch or search, we only do
-it every INNOBASE_WAKE_INTERVAL'th step. */
-
-#define INNOBASE_WAKE_INTERVAL 32
-static ulong innobase_active_counter = 0;
-
-static hash_table_t* innobase_open_tables;
-
-#ifdef __NETWARE__ /* some special cleanup for NetWare */
-bool nw_panic = FALSE;
-#endif
-
-/** Allowed values of innodb_change_buffering */
-static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
- "none", /* IBUF_USE_NONE */
- "inserts" /* IBUF_USE_INSERT */
-};
-
-static INNOBASE_SHARE *get_share(const char *table_name);
-static void free_share(INNOBASE_SHARE *share);
-static int innobase_close_connection(handlerton *hton, THD* thd);
-static int innobase_commit(handlerton *hton, THD* thd, bool all);
-static int innobase_rollback(handlerton *hton, THD* thd, bool all);
-static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
- void *savepoint);
-static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint);
-static int innobase_release_savepoint(handlerton *hton, THD* thd,
- void *savepoint);
-static handler *innobase_create_handler(handlerton *hton,
- TABLE_SHARE *table,
- MEM_ROOT *mem_root);
-
-/* "GEN_CLUST_INDEX" is the name reserved for Innodb default
-system primary index. */
-static const char innobase_index_reserve_name[]= "GEN_CLUST_INDEX";
-
-/** @brief Initialize the default value of innodb_commit_concurrency.
-
-Once InnoDB is running, the innodb_commit_concurrency must not change
-from zero to nonzero. (Bug #42101)
-
-The initial default value is 0, and without this extra initialization,
-SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
-to 0, even if it was initially set to nonzero at the command line
-or configuration file. */
-static
-void
-innobase_commit_concurrency_init_default(void);
-/*==========================================*/
-
-/************************************************************//**
-Validate the file format name and return its corresponding id.
-@return valid file format id */
-static
-uint
-innobase_file_format_name_lookup(
-/*=============================*/
- const char* format_name); /*!< in: pointer to file format
- name */
-/************************************************************//**
-Validate the file format check config parameters, as a side effect it
-sets the srv_check_file_format_at_startup variable.
-@return true if one of "on" or "off" */
-static
-bool
-innobase_file_format_check_on_off(
-/*==============================*/
- const char* format_check); /*!< in: parameter value */
-/************************************************************//**
-Validate the file format check config parameters, as a side effect it
-sets the srv_check_file_format_at_startup variable.
-@return the format_id if valid config value, otherwise, return -1 */
-static
-int
-innobase_file_format_validate_and_set(
-/*================================*/
- const char* format_check); /*!< in: parameter value */
-/****************************************************************//**
-Return alter table flags supported in an InnoDB database. */
-static
-uint
-innobase_alter_table_flags(
-/*=======================*/
- uint flags);
-
-static const char innobase_hton_name[]= "InnoDB";
-
-/*************************************************************//**
-Check for a valid value of innobase_commit_concurrency.
-@return 0 for valid innodb_commit_concurrency */
-static
-int
-innobase_commit_concurrency_validate(
-/*=================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- long long intbuf;
- ulong commit_concurrency;
-
- DBUG_ENTER("innobase_commit_concurrency_validate");
-
- if (value->val_int(value, &intbuf)) {
- /* The value is NULL. That is invalid. */
- DBUG_RETURN(1);
- }
-
- *reinterpret_cast<ulong*>(save) = commit_concurrency
- = static_cast<ulong>(intbuf);
-
- /* Allow the value to be updated, as long as it remains zero
- or nonzero. */
- DBUG_RETURN(!(!commit_concurrency == !innobase_commit_concurrency));
-}
-
-static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB support for the XA two-phase commit",
- /* check_func */ NULL, /* update_func */ NULL,
- /* default */ TRUE);
-
-static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB locking in LOCK TABLES",
- /* check_func */ NULL, /* update_func */ NULL,
- /* default */ TRUE);
-
-static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
- "Use strict mode when evaluating create options.",
- NULL, NULL, FALSE);
-
-static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
- "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
- NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
-
-
-static handler *innobase_create_handler(handlerton *hton,
- TABLE_SHARE *table,
- MEM_ROOT *mem_root)
-{
- return new (mem_root) ha_innobase(hton, table);
-}
-
-/*******************************************************************//**
-This function is used to prepare an X/Open XA distributed transaction.
-@return 0 or error number */
-static
-int
-innobase_xa_prepare(
-/*================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- THD* thd, /*!< in: handle to the MySQL thread of
- the user whose XA transaction should
- be prepared */
- bool all); /*!< in: TRUE - commit transaction
- FALSE - the current SQL statement
- ended */
-/*******************************************************************//**
-This function is used to recover X/Open XA distributed transactions.
-@return number of prepared transactions stored in xid_list */
-static
-int
-innobase_xa_recover(
-/*================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid_list,/*!< in/out: prepared transactions */
- uint len); /*!< in: number of slots in xid_list */
-/*******************************************************************//**
-This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_commit_by_xid(
-/*===================*/
- handlerton* hton,
- XID* xid); /*!< in: X/Open XA transaction identification */
-/*******************************************************************//**
-This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_rollback_by_xid(
-/*=====================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid); /*!< in: X/Open XA transaction
- identification */
-/*******************************************************************//**
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor.
-@return pointer to cursor view or NULL */
-static
-void*
-innobase_create_cursor_view(
-/*========================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd); /*!< in: user thread handle */
-/*******************************************************************//**
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
- handlerton* hton,
- THD* thd, /*!< in: user thread handle */
- void* curview);/*!< in: Consistent cursor view to be set */
-/*******************************************************************//**
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
- handlerton* hton,
- THD* thd, /*!< in: user thread handle */
- void* curview);/*!< in: Consistent read view to be closed */
-/*****************************************************************//**
-Removes all tables in the named database inside InnoDB. */
-static
-void
-innobase_drop_database(
-/*===================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- char* path); /*!< in: database path; inside InnoDB the name
- of the last directory in the path is used as
- the database name: for example, in 'mysql/data/test'
- the database name is 'test' */
-/*******************************************************************//**
-Closes an InnoDB database. */
-static
-int
-innobase_end(handlerton *hton, ha_panic_function type);
-
-/*****************************************************************//**
-Creates an InnoDB transaction struct for the thd if it does not yet have one.
-Starts a new InnoDB transaction if a transaction is not yet started. And
-assigns a new snapshot for a consistent read if the transaction does not yet
-have one.
-@return 0 */
-static
-int
-innobase_start_trx_and_assign_read_view(
-/*====================================*/
- handlerton* hton, /*!< in: Innodb handlerton */
- THD* thd); /*!< in: MySQL thread handle of the user for whom
- the transaction should be committed */
-/****************************************************************//**
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint.
-@return TRUE if error */
-static
-bool
-innobase_flush_logs(
-/*================*/
- handlerton* hton); /*!< in: InnoDB handlerton */
-
-/************************************************************************//**
-Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
-Monitor to the client. */
-static
-bool
-innodb_show_status(
-/*===============*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread of the caller */
- stat_print_fn *stat_print);
-static
-bool innobase_show_status(handlerton *hton, THD* thd,
- stat_print_fn* stat_print,
- enum ha_stat_type stat_type);
-
-/*****************************************************************//**
-Commits a transaction in an InnoDB database. */
-static
-void
-innobase_commit_low(
-/*================*/
- trx_t* trx); /*!< in: transaction handle */
-
-static SHOW_VAR innodb_status_variables[]= {
- {"buffer_pool_pages_data",
- (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
- {"buffer_pool_pages_dirty",
- (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
- {"buffer_pool_pages_flushed",
- (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
- {"buffer_pool_pages_free",
- (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG},
-#ifdef UNIV_DEBUG
- {"buffer_pool_pages_latched",
- (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG},
-#endif /* UNIV_DEBUG */
- {"buffer_pool_pages_misc",
- (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
- {"buffer_pool_pages_total",
- (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
- {"buffer_pool_read_ahead",
- (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
- {"buffer_pool_read_ahead_evicted",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG},
- {"buffer_pool_read_requests",
- (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG},
- {"buffer_pool_reads",
- (char*) &export_vars.innodb_buffer_pool_reads, SHOW_LONG},
- {"buffer_pool_wait_free",
- (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG},
- {"buffer_pool_write_requests",
- (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
- {"data_fsyncs",
- (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG},
- {"data_pending_fsyncs",
- (char*) &export_vars.innodb_data_pending_fsyncs, SHOW_LONG},
- {"data_pending_reads",
- (char*) &export_vars.innodb_data_pending_reads, SHOW_LONG},
- {"data_pending_writes",
- (char*) &export_vars.innodb_data_pending_writes, SHOW_LONG},
- {"data_read",
- (char*) &export_vars.innodb_data_read, SHOW_LONG},
- {"data_reads",
- (char*) &export_vars.innodb_data_reads, SHOW_LONG},
- {"data_writes",
- (char*) &export_vars.innodb_data_writes, SHOW_LONG},
- {"data_written",
- (char*) &export_vars.innodb_data_written, SHOW_LONG},
- {"dblwr_pages_written",
- (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
- {"dblwr_writes",
- (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
- {"have_atomic_builtins",
- (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL},
- {"log_waits",
- (char*) &export_vars.innodb_log_waits, SHOW_LONG},
- {"log_write_requests",
- (char*) &export_vars.innodb_log_write_requests, SHOW_LONG},
- {"log_writes",
- (char*) &export_vars.innodb_log_writes, SHOW_LONG},
- {"os_log_fsyncs",
- (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG},
- {"os_log_pending_fsyncs",
- (char*) &export_vars.innodb_os_log_pending_fsyncs, SHOW_LONG},
- {"os_log_pending_writes",
- (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG},
- {"os_log_written",
- (char*) &export_vars.innodb_os_log_written, SHOW_LONG},
- {"page_size",
- (char*) &export_vars.innodb_page_size, SHOW_LONG},
- {"pages_created",
- (char*) &export_vars.innodb_pages_created, SHOW_LONG},
- {"pages_read",
- (char*) &export_vars.innodb_pages_read, SHOW_LONG},
- {"pages_written",
- (char*) &export_vars.innodb_pages_written, SHOW_LONG},
- {"row_lock_current_waits",
- (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG},
- {"row_lock_time",
- (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG},
- {"row_lock_time_avg",
- (char*) &export_vars.innodb_row_lock_time_avg, SHOW_LONG},
- {"row_lock_time_max",
- (char*) &export_vars.innodb_row_lock_time_max, SHOW_LONG},
- {"row_lock_waits",
- (char*) &export_vars.innodb_row_lock_waits, SHOW_LONG},
- {"rows_deleted",
- (char*) &export_vars.innodb_rows_deleted, SHOW_LONG},
- {"rows_inserted",
- (char*) &export_vars.innodb_rows_inserted, SHOW_LONG},
- {"rows_read",
- (char*) &export_vars.innodb_rows_read, SHOW_LONG},
- {"rows_updated",
- (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
- {NullS, NullS, SHOW_LONG}
-};
-
-/* General functions */
-
-/******************************************************************//**
-Returns true if the thread is the replication thread on the slave
-server. Used in srv_conc_enter_innodb() to determine if the thread
-should be allowed to enter InnoDB - the replication thread is treated
-differently than other threads. Also used in
-srv_conc_force_exit_innodb().
-@return true if thd is the replication thread */
-extern "C" UNIV_INTERN
-ibool
-thd_is_replication_slave_thread(
-/*============================*/
- void* thd) /*!< in: thread handle (THD*) */
-{
- return((ibool) thd_slave_thread((THD*) thd));
-}
-
-/******************************************************************//**
-Save some CPU by testing the value of srv_thread_concurrency in inline
-functions. */
-static inline
-void
-innodb_srv_conc_enter_innodb(
-/*=========================*/
- trx_t* trx) /*!< in: transaction handle */
-{
- if (UNIV_LIKELY(!srv_thread_concurrency)) {
-
- return;
- }
-
- srv_conc_enter_innodb(trx);
-}
-
-/******************************************************************//**
-Save some CPU by testing the value of srv_thread_concurrency in inline
-functions. */
-static inline
-void
-innodb_srv_conc_exit_innodb(
-/*========================*/
- trx_t* trx) /*!< in: transaction handle */
-{
- if (UNIV_LIKELY(!trx->declared_to_be_inside_innodb)) {
-
- return;
- }
-
- srv_conc_exit_innodb(trx);
-}
-
-/******************************************************************//**
-Releases possible search latch and InnoDB thread FIFO ticket. These should
-be released at each SQL statement end, and also when mysqld passes the
-control to the client. It does no harm to release these also in the middle
-of an SQL statement. */
-static inline
-void
-innobase_release_stat_resources(
-/*============================*/
- trx_t* trx) /*!< in: transaction object */
-{
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- if (trx->declared_to_be_inside_innodb) {
- /* Release our possible ticket in the FIFO */
-
- srv_conc_force_exit_innodb(trx);
- }
-}
-
-/******************************************************************//**
-Returns true if the transaction this thread is processing has edited
-non-transactional tables. Used by the deadlock detector when deciding
-which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables.
-@return true if non-transactional tables have been edited */
-extern "C" UNIV_INTERN
-ibool
-thd_has_edited_nontrans_tables(
-/*===========================*/
- void* thd) /*!< in: thread handle (THD*) */
-{
- return((ibool) thd_non_transactional_update((THD*) thd));
-}
-
-/******************************************************************//**
-Returns true if the thread is executing a SELECT statement.
-@return true if thd is executing SELECT */
-extern "C" UNIV_INTERN
-ibool
-thd_is_select(
-/*==========*/
- const void* thd) /*!< in: thread handle (THD*) */
-{
- return(thd_sql_command((const THD*) thd) == SQLCOM_SELECT);
-}
-
-/******************************************************************//**
-Returns true if the thread supports XA,
-global value of innodb_supports_xa if thd is NULL.
-@return true if thd has XA support */
-extern "C" UNIV_INTERN
-ibool
-thd_supports_xa(
-/*============*/
- void* thd) /*!< in: thread handle (THD*), or NULL to query
- the global innodb_supports_xa */
-{
- return(THDVAR((THD*) thd, support_xa));
-}
-
-/******************************************************************//**
-Returns the lock wait timeout for the current connection.
-@return the lock wait timeout, in seconds */
-extern "C" UNIV_INTERN
-ulong
-thd_lock_wait_timeout(
-/*==================*/
- void* thd) /*!< in: thread handle (THD*), or NULL to query
- the global innodb_lock_wait_timeout */
-{
- /* According to <mysql/plugin.h>, passing thd == NULL
- returns the global value of the session variable. */
- return(THDVAR((THD*) thd, lock_wait_timeout));
-}
-
-/********************************************************************//**
-Obtain the InnoDB transaction of a MySQL thread.
-@return reference to transaction pointer */
-static inline
-trx_t*&
-thd_to_trx(
-/*=======*/
- THD* thd) /*!< in: MySQL thread */
-{
- return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
-}
-
-/********************************************************************//**
-Call this function when mysqld passes control to the client. That is to
-avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
-documentation, see handler.cc.
-@return 0 */
-static
-int
-innobase_release_temporary_latches(
-/*===============================*/
- handlerton* hton, /*!< in: handlerton */
- THD* thd) /*!< in: MySQL thread */
-{
- trx_t* trx;
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (!innodb_inited) {
-
- return(0);
- }
-
- trx = thd_to_trx(thd);
-
- if (trx) {
- innobase_release_stat_resources(trx);
- }
- return(0);
-}
-
-/********************************************************************//**
-Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
-time calls srv_active_wake_master_thread. This function should be used
-when a single database operation may introduce a small need for
-server utility activity, like checkpointing. */
-static inline
-void
-innobase_active_small(void)
-/*=======================*/
-{
- innobase_active_counter++;
-
- if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
- srv_active_wake_master_thread();
- }
-}
-
-/********************************************************************//**
-Converts an InnoDB error code to a MySQL error code and also tells to MySQL
-about a possible transaction rollback inside InnoDB caused by a lock wait
-timeout or a deadlock.
-@return MySQL error code */
-extern "C" UNIV_INTERN
-int
-convert_error_code_to_mysql(
-/*========================*/
- int error, /*!< in: InnoDB error code */
- ulint flags, /*!< in: InnoDB table flags, or 0 */
- THD* thd) /*!< in: user thread handle or NULL */
-{
- switch (error) {
- case DB_SUCCESS:
- return(0);
-
- case DB_INTERRUPTED:
- my_error(ER_QUERY_INTERRUPTED, MYF(0));
- /* fall through */
- case DB_ERROR:
- default:
- return(-1); /* unspecified error */
-
- case DB_DUPLICATE_KEY:
- /* Be cautious with returning this error, since
- mysql could re-enter the storage layer to get
- duplicated key info, the operation requires a
- valid table handle and/or transaction information,
- which might not always be available in the error
- handling stage. */
- return(HA_ERR_FOUND_DUPP_KEY);
-
- case DB_FOREIGN_DUPLICATE_KEY:
- return(HA_ERR_FOREIGN_DUPLICATE_KEY);
-
- case DB_MISSING_HISTORY:
- return(HA_ERR_TABLE_DEF_CHANGED);
-
- case DB_RECORD_NOT_FOUND:
- return(HA_ERR_NO_ACTIVE_RECORD);
-
- case DB_DEADLOCK:
- /* Since we rolled back the whole transaction, we must
- tell it also to MySQL so that MySQL knows to empty the
- cached binlog for this transaction */
-
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
- }
-
- return(HA_ERR_LOCK_DEADLOCK);
-
- case DB_LOCK_WAIT_TIMEOUT:
- /* Starting from 5.0.13, we let MySQL just roll back the
- latest SQL statement in a lock wait timeout. Previously, we
- rolled back the whole transaction. */
-
- if (thd) {
- thd_mark_transaction_to_rollback(
- thd, (bool)row_rollback_on_timeout);
- }
-
- return(HA_ERR_LOCK_WAIT_TIMEOUT);
-
- case DB_NO_REFERENCED_ROW:
- return(HA_ERR_NO_REFERENCED_ROW);
-
- case DB_ROW_IS_REFERENCED:
- return(HA_ERR_ROW_IS_REFERENCED);
-
- case DB_CANNOT_ADD_CONSTRAINT:
- return(HA_ERR_CANNOT_ADD_FOREIGN);
-
- case DB_CANNOT_DROP_CONSTRAINT:
-
- return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
- misleading, a new MySQL error
- code should be introduced */
-
- case DB_COL_APPEARS_TWICE_IN_INDEX:
- case DB_CORRUPTION:
- return(HA_ERR_CRASHED);
-
- case DB_OUT_OF_FILE_SPACE:
- return(HA_ERR_RECORD_FILE_FULL);
-
- case DB_TABLE_IS_BEING_USED:
- return(HA_ERR_WRONG_COMMAND);
-
- case DB_TABLE_NOT_FOUND:
- return(HA_ERR_NO_SUCH_TABLE);
-
- case DB_TOO_BIG_RECORD:
- my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
- page_get_free_space_of_empty(flags
- & DICT_TF_COMPACT) / 2);
- return(HA_ERR_TO_BIG_ROW);
-
- case DB_NO_SAVEPOINT:
- return(HA_ERR_NO_SAVEPOINT);
-
- case DB_LOCK_TABLE_FULL:
- /* Since we rolled back the whole transaction, we must
- tell it also to MySQL so that MySQL knows to empty the
- cached binlog for this transaction */
-
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
- }
-
- return(HA_ERR_LOCK_TABLE_FULL);
-
- case DB_PRIMARY_KEY_IS_NULL:
- return(ER_PRIMARY_CANT_HAVE_NULL);
-
- case DB_TOO_MANY_CONCURRENT_TRXS:
- /* New error code HA_ERR_TOO_MANY_CONCURRENT_TRXS is only
- available in 5.1.38 and later, but the plugin should still
- work with previous versions of MySQL. */
-#ifdef HA_ERR_TOO_MANY_CONCURRENT_TRXS
- return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
-#else /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
- return(HA_ERR_RECORD_FILE_FULL);
-#endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
- case DB_UNSUPPORTED:
- return(HA_ERR_UNSUPPORTED);
- }
-}
-
-/*************************************************************//**
-If you want to print a thd that is not associated with the current thread,
-you must call this function before reserving the InnoDB kernel_mutex, to
-protect MySQL from setting thd->query NULL. If you print a thd of the current
-thread, we know that MySQL cannot modify thd->query, and it is not necessary
-to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
-the kernel_mutex. */
-extern "C" UNIV_INTERN
-void
-innobase_mysql_prepare_print_arbitrary_thd(void)
-/*============================================*/
-{
- ut_ad(!mutex_own(&kernel_mutex));
- VOID(pthread_mutex_lock(&LOCK_thread_count));
-}
-
-/*************************************************************//**
-Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
-In the InnoDB latching order, the mutex sits right above the
-kernel_mutex. In debug builds, we assert that the kernel_mutex is
-released before this function is invoked. */
-extern "C" UNIV_INTERN
-void
-innobase_mysql_end_print_arbitrary_thd(void)
-/*========================================*/
-{
- ut_ad(!mutex_own(&kernel_mutex));
- VOID(pthread_mutex_unlock(&LOCK_thread_count));
-}
-
-/*************************************************************//**
-Prints info of a THD object (== user session thread) to the given file. */
-extern "C" UNIV_INTERN
-void
-innobase_mysql_print_thd(
-/*=====================*/
- FILE* f, /*!< in: output stream */
- void* thd, /*!< in: pointer to a MySQL THD object */
- uint max_query_len) /*!< in: max query length to print, or 0 to
- use the default max length */
-{
- char buffer[1024];
-
- fputs(thd_security_context((THD*) thd, buffer, sizeof buffer,
- max_query_len), f);
- putc('\n', f);
-}
-
-/******************************************************************//**
-Get the variable length bounds of the given character set. */
-extern "C" UNIV_INTERN
-void
-innobase_get_cset_width(
-/*====================*/
- ulint cset, /*!< in: MySQL charset-collation code */
- ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
- ulint* mbmaxlen) /*!< out: maximum length of a char (in bytes) */
-{
- CHARSET_INFO* cs;
- ut_ad(cset < 256);
- ut_ad(mbminlen);
- ut_ad(mbmaxlen);
-
- cs = all_charsets[cset];
- if (cs) {
- *mbminlen = cs->mbminlen;
- *mbmaxlen = cs->mbmaxlen;
- } else {
- THD* thd = current_thd;
-
- if (thd && thd_sql_command(thd) == SQLCOM_DROP_TABLE) {
-
- /* Fix bug#46256: allow tables to be dropped if the
- collation is not found, but issue a warning. */
- if ((global_system_variables.log_warnings)
- && (cset != 0)){
-
- sql_print_warning(
- "Unknown collation #%lu.", cset);
- }
- } else {
-
- ut_a(cset == 0);
- }
-
- *mbminlen = *mbmaxlen = 0;
- }
-}
-
-/******************************************************************//**
-Converts an identifier to a table name. */
-extern "C" UNIV_INTERN
-void
-innobase_convert_from_table_id(
-/*===========================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len) /*!< in: length of 'to', in bytes */
-{
- uint errors;
-
- strconvert(cs, from, &my_charset_filename, to, (uint) len, &errors);
-}
-
-/******************************************************************//**
-Converts an identifier to UTF-8. */
-extern "C" UNIV_INTERN
-void
-innobase_convert_from_id(
-/*=====================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len) /*!< in: length of 'to', in bytes */
-{
- uint errors;
-
- strconvert(cs, from, system_charset_info, to, (uint) len, &errors);
-}
-
-/******************************************************************//**
-Compares NUL-terminated UTF-8 strings case insensitively.
-@return 0 if a=b, <0 if a<b, >1 if a>b */
-extern "C" UNIV_INTERN
-int
-innobase_strcasecmp(
-/*================*/
- const char* a, /*!< in: first string to compare */
- const char* b) /*!< in: second string to compare */
-{
- return(my_strcasecmp(system_charset_info, a, b));
-}
-
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-extern "C" UNIV_INTERN
-void
-innobase_casedn_str(
-/*================*/
- char* a) /*!< in/out: string to put in lower case */
-{
- my_casedn_str(system_charset_info, a);
-}
-
-/**********************************************************************//**
-Determines the connection character set.
-@return connection character set */
-extern "C" UNIV_INTERN
-struct charset_info_st*
-innobase_get_charset(
-/*=================*/
- void* mysql_thd) /*!< in: MySQL thread handle */
-{
- return(thd_charset((THD*) mysql_thd));
-}
-
-#if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
-extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
-/*******************************************************************//**
-Map an OS error to an errno value. The OS error number is stored in
-_doserrno and the mapped value is stored in errno) */
-extern "C"
-void __cdecl
-_dosmaperr(
- unsigned long); /*!< in: OS error value */
-
-/*********************************************************************//**
-Creates a temporary file.
-@return temporary file descriptor, or < 0 on error */
-extern "C" UNIV_INTERN
-int
-innobase_mysql_tmpfile(void)
-/*========================*/
-{
- int fd; /* handle of opened file */
- HANDLE osfh; /* OS handle of opened file */
- char* tmpdir; /* point to the directory
- where to create file */
- TCHAR path_buf[MAX_PATH - 14]; /* buffer for tmp file path.
- The length cannot be longer
- than MAX_PATH - 14, or
- GetTempFileName will fail. */
- char filename[MAX_PATH]; /* name of the tmpfile */
- DWORD fileaccess = GENERIC_READ /* OS file access */
- | GENERIC_WRITE
- | DELETE;
- DWORD fileshare = FILE_SHARE_READ /* OS file sharing mode */
- | FILE_SHARE_WRITE
- | FILE_SHARE_DELETE;
- DWORD filecreate = CREATE_ALWAYS; /* OS method of open/create */
- DWORD fileattrib = /* OS file attribute flags */
- FILE_ATTRIBUTE_NORMAL
- | FILE_FLAG_DELETE_ON_CLOSE
- | FILE_ATTRIBUTE_TEMPORARY
- | FILE_FLAG_SEQUENTIAL_SCAN;
-
- DBUG_ENTER("innobase_mysql_tmpfile");
-
- tmpdir = my_tmpdir(&mysql_tmpdir_list);
-
- /* The tmpdir parameter can not be NULL for GetTempFileName. */
- if (!tmpdir) {
- uint ret;
-
- /* Use GetTempPath to determine path for temporary files. */
- ret = GetTempPath(sizeof(path_buf), path_buf);
- if (ret > sizeof(path_buf) || (ret == 0)) {
-
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- tmpdir = path_buf;
- }
-
- /* Use GetTempFileName to generate a unique filename. */
- if (!GetTempFileName(tmpdir, "ib", 0, filename)) {
-
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- DBUG_PRINT("info", ("filename: %s", filename));
-
- /* Open/Create the file. */
- osfh = CreateFile(filename, fileaccess, fileshare, NULL,
- filecreate, fileattrib, NULL);
- if (osfh == INVALID_HANDLE_VALUE) {
-
- /* open/create file failed! */
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- do {
- /* Associates a CRT file descriptor with the OS file handle. */
- fd = _open_osfhandle((intptr_t) osfh, 0);
- } while (fd == -1 && errno == EINTR);
-
- if (fd == -1) {
- /* Open failed, close the file handle. */
-
- _dosmaperr(GetLastError()); /* map error */
- CloseHandle(osfh); /* no need to check if
- CloseHandle fails */
- }
-
- DBUG_RETURN(fd);
-}
-#else
-/*********************************************************************//**
-Creates a temporary file.
-@return temporary file descriptor, or < 0 on error */
-extern "C" UNIV_INTERN
-int
-innobase_mysql_tmpfile(void)
-/*========================*/
-{
- int fd2 = -1;
- File fd = mysql_tmpfile("ib");
- if (fd >= 0) {
- /* Copy the file descriptor, so that the additional resources
- allocated by create_temp_file() can be freed by invoking
- my_close().
-
- Because the file descriptor returned by this function
- will be passed to fdopen(), it will be closed by invoking
- fclose(), which in turn will invoke close() instead of
- my_close(). */
- fd2 = dup(fd);
- if (fd2 < 0) {
- DBUG_PRINT("error",("Got error %d on dup",fd2));
- my_errno=errno;
- my_error(EE_OUT_OF_FILERESOURCES,
- MYF(ME_BELL+ME_WAITTANG),
- "ib*", my_errno);
- }
- my_close(fd, MYF(MY_WME));
- }
- return(fd2);
-}
-#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
-
-/*********************************************************************//**
-Wrapper around MySQL's copy_and_convert function.
-@return number of bytes copied to 'to' */
-extern "C" UNIV_INTERN
-ulint
-innobase_convert_string(
-/*====================*/
- void* to, /*!< out: converted string */
- ulint to_length, /*!< in: number of bytes reserved
- for the converted string */
- CHARSET_INFO* to_cs, /*!< in: character set to convert to */
- const void* from, /*!< in: string to convert */
- ulint from_length, /*!< in: number of bytes to convert */
- CHARSET_INFO* from_cs, /*!< in: character set to convert from */
- uint* errors) /*!< out: number of errors encountered
- during the conversion */
-{
- return(copy_and_convert((char*)to, (uint32) to_length, to_cs,
- (const char*)from, (uint32) from_length, from_cs,
- errors));
-}
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) that is of
-type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
-the result to "buf". The result is converted to "system_charset_info".
-Not more than "buf_size" bytes are written to "buf".
-The result is always NUL-terminated (provided buf_size > 0) and the
-number of bytes that were written to "buf" is returned (including the
-terminating NUL).
-@return number of bytes that were written */
-extern "C" UNIV_INTERN
-ulint
-innobase_raw_format(
-/*================*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- ulint charset_coll, /*!< in: charset collation */
- char* buf, /*!< out: output buffer */
- ulint buf_size) /*!< in: output buffer size
- in bytes */
-{
- /* XXX we use a hard limit instead of allocating
- but_size bytes from the heap */
- CHARSET_INFO* data_cs;
- char buf_tmp[8192];
- ulint buf_tmp_used;
- uint num_errors;
-
- data_cs = all_charsets[charset_coll];
-
- buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp),
- system_charset_info,
- data, data_len, data_cs,
- &num_errors);
-
- return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
-}
-
-/*********************************************************************//**
-Compute the next autoinc value.
-
-For MySQL replication the autoincrement values can be partitioned among
-the nodes. The offset is the start or origin of the autoincrement value
-for a particular node. For n nodes the increment will be n and the offset
-will be in the interval [1, n]. The formula tries to allocate the next
-value for a particular node.
-
-Note: This function is also called with increment set to the number of
-values we want to reserve for multi-value inserts e.g.,
-
- INSERT INTO T VALUES(), (), ();
-
-innobase_next_autoinc() will be called with increment set to
-n * 3 where autoinc_lock_mode != TRADITIONAL because we want
-to reserve 3 values for the multi-value INSERT above.
-@return the next value */
-static
-ulonglong
-innobase_next_autoinc(
-/*==================*/
- ulonglong current, /*!< in: Current value */
- ulonglong increment, /*!< in: increment current by */
- ulonglong offset, /*!< in: AUTOINC offset */
- ulonglong max_value) /*!< in: max value for type */
-{
- ulonglong next_value;
-
- /* Should never be 0. */
- ut_a(increment > 0);
-
- /* According to MySQL documentation, if the offset is greater than
- the increment then the offset is ignored. */
- if (offset > increment) {
- offset = 0;
- }
-
- if (max_value <= current) {
- next_value = max_value;
- } else if (offset <= 1) {
- /* Offset 0 and 1 are the same, because there must be at
- least one node in the system. */
- if (max_value - current <= increment) {
- next_value = max_value;
- } else {
- next_value = current + increment;
- }
- } else if (max_value > current) {
- if (current > offset) {
- next_value = ((current - offset) / increment) + 1;
- } else {
- next_value = ((offset - current) / increment) + 1;
- }
-
- ut_a(increment > 0);
- ut_a(next_value > 0);
-
- /* Check for multiplication overflow. */
- if (increment > (max_value / next_value)) {
-
- next_value = max_value;
- } else {
- next_value *= increment;
-
- ut_a(max_value >= next_value);
-
- /* Check for overflow. */
- if (max_value - next_value <= offset) {
- next_value = max_value;
- } else {
- next_value += offset;
- }
- }
- } else {
- next_value = max_value;
- }
-
- ut_a(next_value <= max_value);
-
- return(next_value);
-}
-
-/*********************************************************************//**
-Initializes some fields in an InnoDB transaction object. */
-static
-void
-innobase_trx_init(
-/*==============*/
- THD* thd, /*!< in: user thread handle */
- trx_t* trx) /*!< in/out: InnoDB transaction handle */
-{
- DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
- DBUG_ASSERT(thd == trx->mysql_thd);
-
- trx->check_foreigns = !thd_test_options(
- thd, OPTION_NO_FOREIGN_KEY_CHECKS);
-
- trx->check_unique_secondary = !thd_test_options(
- thd, OPTION_RELAXED_UNIQUE_CHECKS);
-
- DBUG_VOID_RETURN;
-}
-
-/*********************************************************************//**
-Allocates an InnoDB transaction for a MySQL handler object.
-@return InnoDB transaction handle */
-extern "C" UNIV_INTERN
-trx_t*
-innobase_trx_allocate(
-/*==================*/
- THD* thd) /*!< in: user thread handle */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_trx_allocate");
- DBUG_ASSERT(thd != NULL);
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
-
- trx = trx_allocate_for_mysql();
-
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- innobase_trx_init(thd, trx);
-
- DBUG_RETURN(trx);
-}
-
-/*********************************************************************//**
-Gets the InnoDB transaction handle for a MySQL handler object, creates
-an InnoDB transaction struct if the corresponding MySQL thread struct still
-lacks one.
-@return InnoDB transaction handle */
-static
-trx_t*
-check_trx_exists(
-/*=============*/
- THD* thd) /*!< in: user thread handle */
-{
- trx_t*& trx = thd_to_trx(thd);
-
- ut_ad(EQ_CURRENT_THD(thd));
-
- if (trx == NULL) {
- trx = innobase_trx_allocate(thd);
- } else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
- mem_analyze_corruption(trx);
- ut_error;
- }
-
- innobase_trx_init(thd, trx);
-
- return(trx);
-}
-
-
-/*********************************************************************//**
-Construct ha_innobase handler. */
-UNIV_INTERN
-ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
- :handler(hton, table_arg),
- int_table_flags(HA_REC_NOT_IN_SEQ |
- HA_NULL_IN_KEY |
- HA_CAN_INDEX_BLOBS |
- HA_CAN_SQL_HANDLER |
- HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
- HA_PRIMARY_KEY_IN_READ_INDEX |
- HA_BINLOG_ROW_CAPABLE |
- HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
- HA_TABLE_SCAN_ON_INDEX),
- start_of_scan(0),
- num_write_row(0)
-{}
-
-/*********************************************************************//**
-Destruct ha_innobase handler. */
-UNIV_INTERN
-ha_innobase::~ha_innobase()
-{
-}
-
-/*********************************************************************//**
-Updates the user_thd field in a handle and also allocates a new InnoDB
-transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-UNIV_INTERN inline
-void
-ha_innobase::update_thd(
-/*====================*/
- THD* thd) /*!< in: thd to use the handle */
-{
- trx_t* trx;
-
- trx = check_trx_exists(thd);
-
- if (prebuilt->trx != trx) {
-
- row_update_prebuilt_trx(prebuilt, trx);
- }
-
- user_thd = thd;
-}
-
-/*********************************************************************//**
-Updates the user_thd field in a handle and also allocates a new InnoDB
-transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-UNIV_INTERN
-void
-ha_innobase::update_thd()
-/*=====================*/
-{
- THD* thd = ha_thd();
- ut_ad(EQ_CURRENT_THD(thd));
- update_thd(thd);
-}
-
-/*********************************************************************//**
-Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
-roll back the statement if the statement results in an error. This MUST be
-called for every SQL statement that may be rolled back by MySQL. Calling this
-several times to register the same statement is allowed, too. */
-static inline
-void
-innobase_register_stmt(
-/*===================*/
- handlerton* hton, /*!< in: Innobase hton */
- THD* thd) /*!< in: MySQL thd (connection) object */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
- /* Register the statement */
- trans_register_ha(thd, FALSE, hton);
-}
-
-/*********************************************************************//**
-Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
-to call the InnoDB prepare and commit, or rollback for the transaction. This
-MUST be called for every transaction for which the user may call commit or
-rollback. Calling this several times to register the same transaction is
-allowed, too.
-This function also registers the current SQL statement. */
-static inline
-void
-innobase_register_trx_and_stmt(
-/*===========================*/
- handlerton *hton, /*!< in: Innobase handlerton */
- THD* thd) /*!< in: MySQL thd (connection) object */
-{
- /* NOTE that actually innobase_register_stmt() registers also
- the transaction in the AUTOCOMMIT=1 mode. */
-
- innobase_register_stmt(hton, thd);
-
- if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* No autocommit mode, register for a transaction */
- trans_register_ha(thd, TRUE, hton);
- }
-}
-
-/* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
- ------------------------------------------------------------
-
-1) The use of the query cache for TBL is disabled when there is an
-uncommitted change to TBL.
-
-2) When a change to TBL commits, InnoDB stores the current value of
-its global trx id counter, let us denote it by INV_TRX_ID, to the table object
-in the InnoDB data dictionary, and does only allow such transactions whose
-id <= INV_TRX_ID to use the query cache.
-
-3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
-modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
-of TBL immediately.
-
-How this is implemented inside InnoDB:
-
-1) Since every modification always sets an IX type table lock on the InnoDB
-table, it is easy to check if there can be uncommitted modifications for a
-table: just check if there are locks in the lock list of the table.
-
-2) When a transaction inside InnoDB commits, it reads the global trx id
-counter and stores the value INV_TRX_ID to the tables on which it had a lock.
-
-3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
-InnoDB calls an invalidate method for the MySQL query cache for that table.
-
-How this is implemented inside sql_cache.cc:
-
-1) The query cache for an InnoDB table TBL is invalidated immediately at an
-INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
-invalidation to the transaction commit.
-
-2) To store or retrieve a value from the query cache of an InnoDB table TBL,
-any query must first ask InnoDB's permission. We must pass the thd as a
-parameter because InnoDB will look at the trx id, if any, associated with
-that thd.
-
-3) Use of the query cache for InnoDB tables is now allowed also when
-AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
-put restrictions on the use of the query cache.
-*/
-
-/******************************************************************//**
-The MySQL query cache uses this to check from InnoDB if the query cache at
-the moment is allowed to operate on an InnoDB table. The SQL query must
-be a non-locking SELECT.
-
-The query cache is allowed to operate on certain query only if this function
-returns TRUE for all tables in the query.
-
-If thd is not in the autocommit state, this function also starts a new
-transaction for thd if there is no active trx yet, and assigns a consistent
-read view to it if there is no read view yet.
-
-Why a deadlock of threads is not possible: the query cache calls this function
-at the start of a SELECT processing. Then the calling thread cannot be
-holding any InnoDB semaphores. The calling thread is holding the
-query cache mutex, and this function will reserver the InnoDB kernel mutex.
-Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
-the InnoDB kernel mutex.
-@return TRUE if permitted, FALSE if not; note that the value FALSE
-does not mean we should invalidate the query cache: invalidation is
-called explicitly */
-static
-my_bool
-innobase_query_caching_of_table_permitted(
-/*======================================*/
- THD* thd, /*!< in: thd of the user who is trying to
- store a result to the query cache or
- retrieve it */
- char* full_name, /*!< in: concatenation of database name,
- the null character NUL, and the table
- name */
- uint full_name_len, /*!< in: length of the full name, i.e.
- len(dbname) + len(tablename) + 1 */
- ulonglong *unused) /*!< unused for this engine */
-{
- ibool is_autocommit;
- trx_t* trx;
- char norm_name[1000];
-
- ut_a(full_name_len < 999);
-
- trx = check_trx_exists(thd);
-
- if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
- /* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
- plain SELECT if AUTOCOMMIT is not on. */
-
- return((my_bool)FALSE);
- }
-
- if (trx->has_search_latch) {
- sql_print_error("The calling thread is holding the adaptive "
- "search, latch though calling "
- "innobase_query_caching_of_table_permitted.");
-
- mutex_enter(&kernel_mutex);
- trx_print(stderr, trx, 1024);
- mutex_exit(&kernel_mutex);
- }
-
- innobase_release_stat_resources(trx);
-
- if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- is_autocommit = TRUE;
- } else {
- is_autocommit = FALSE;
-
- }
-
- if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
- /* We are going to retrieve the query result from the query
- cache. This cannot be a store operation to the query cache
- because then MySQL would have locks on tables already.
-
- TODO: if the user has used LOCK TABLES to lock the table,
- then we open a transaction in the call of row_.. below.
- That trx can stay open until UNLOCK TABLES. The same problem
- exists even if we do not use the query cache. MySQL should be
- modified so that it ALWAYS calls some cleanup function when
- the processing of a query ends!
-
- We can imagine we instantaneously serialize this consistent
- read trx to the current trx id counter. If trx2 would have
- changed the tables of a query result stored in the cache, and
- trx2 would have already committed, making the result obsolete,
- then trx2 would have already invalidated the cache. Thus we
- can trust the result in the cache is ok for this query. */
-
- return((my_bool)TRUE);
- }
-
- /* Normalize the table name to InnoDB format */
-
- memcpy(norm_name, full_name, full_name_len);
-
- norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
- separator between db and table */
- norm_name[full_name_len] = '\0';
-#ifdef __WIN__
- innobase_casedn_str(norm_name);
-#endif
- /* The call of row_search_.. will start a new transaction if it is
- not yet started */
-
- if (trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
- trx->active_trans = 1;
- }
-
- if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
-
- /* printf("Query cache for %s permitted\n", norm_name); */
-
- return((my_bool)TRUE);
- }
-
- /* printf("Query cache for %s NOT permitted\n", norm_name); */
-
- return((my_bool)FALSE);
-}
-
-/*****************************************************************//**
-Invalidates the MySQL query cache for the table. */
-extern "C" UNIV_INTERN
-void
-innobase_invalidate_query_cache(
-/*============================*/
- trx_t* trx, /*!< in: transaction which
- modifies the table */
- const char* full_name, /*!< in: concatenation of
- database name, null char NUL,
- table name, null char NUL;
- NOTE that in Windows this is
- always in LOWER CASE! */
- ulint full_name_len) /*!< in: full name length where
- also the null chars count */
-{
- /* Note that the sync0sync.h rank of the query cache mutex is just
- above the InnoDB kernel mutex. The caller of this function must not
- have latches of a lower rank. */
-
- /* Argument TRUE below means we are using transactions */
-#ifdef HAVE_QUERY_CACHE
- mysql_query_cache_invalidate4((THD*) trx->mysql_thd,
- full_name,
- (uint32) full_name_len,
- TRUE);
-#endif
-}
-
-/*****************************************************************//**
-Convert an SQL identifier to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-static
-char*
-innobase_convert_identifier(
-/*========================*/
- char* buf, /*!< out: buffer for converted identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
- ulint idlen, /*!< in: length of id, in bytes */
- void* thd, /*!< in: MySQL connection thread, or NULL */
- ibool file_id)/*!< in: TRUE=id is a table or database name;
- FALSE=id is an UTF-8 string */
-{
- char nz[NAME_LEN + 1];
-#if MYSQL_VERSION_ID >= 50141
- char nz2[NAME_LEN + 1 + EXPLAIN_FILENAME_MAX_EXTRA_LENGTH];
-#else /* MYSQL_VERSION_ID >= 50141 */
- char nz2[NAME_LEN + 1 + sizeof srv_mysql50_table_name_prefix];
-#endif /* MYSQL_VERSION_ID >= 50141 */
-
- const char* s = id;
- int q;
-
- if (file_id) {
- /* Decode the table name. The MySQL function expects
- a NUL-terminated string. The input and output strings
- buffers must not be shared. */
-
- if (UNIV_UNLIKELY(idlen > (sizeof nz) - 1)) {
- idlen = (sizeof nz) - 1;
- }
-
- memcpy(nz, id, idlen);
- nz[idlen] = 0;
-
- s = nz2;
-#if MYSQL_VERSION_ID >= 50141
- idlen = explain_filename((THD*) thd, nz, nz2, sizeof nz2,
- EXPLAIN_PARTITIONS_AS_COMMENT);
- goto no_quote;
-#else /* MYSQL_VERSION_ID >= 50141 */
- idlen = filename_to_tablename(nz, nz2, sizeof nz2);
-#endif /* MYSQL_VERSION_ID >= 50141 */
- }
-
- /* See if the identifier needs to be quoted. */
- if (UNIV_UNLIKELY(!thd)) {
- q = '"';
- } else {
- q = get_quote_char_for_identifier((THD*) thd, s, (int) idlen);
- }
-
- if (q == EOF) {
-#if MYSQL_VERSION_ID >= 50141
-no_quote:
-#endif /* MYSQL_VERSION_ID >= 50141 */
- if (UNIV_UNLIKELY(idlen > buflen)) {
- idlen = buflen;
- }
- memcpy(buf, s, idlen);
- return(buf + idlen);
- }
-
- /* Quote the identifier. */
- if (buflen < 2) {
- return(buf);
- }
-
- *buf++ = q;
- buflen--;
-
- for (; idlen; idlen--) {
- int c = *s++;
- if (UNIV_UNLIKELY(c == q)) {
- if (UNIV_UNLIKELY(buflen < 3)) {
- break;
- }
-
- *buf++ = c;
- *buf++ = c;
- buflen -= 2;
- } else {
- if (UNIV_UNLIKELY(buflen < 2)) {
- break;
- }
-
- *buf++ = c;
- buflen--;
- }
- }
-
- *buf++ = q;
- return(buf);
-}
-
-/*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-extern "C" UNIV_INTERN
-char*
-innobase_convert_name(
-/*==================*/
- char* buf, /*!< out: buffer for converted identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
- ulint idlen, /*!< in: length of id, in bytes */
- void* thd, /*!< in: MySQL connection thread, or NULL */
- ibool table_id)/*!< in: TRUE=id is a table or database name;
- FALSE=id is an index name */
-{
- char* s = buf;
- const char* bufend = buf + buflen;
-
- if (table_id) {
- const char* slash = (const char*) memchr(id, '/', idlen);
- if (!slash) {
-
- goto no_db_name;
- }
-
- /* Print the database name and table name separately. */
- s = innobase_convert_identifier(s, bufend - s, id, slash - id,
- thd, TRUE);
- if (UNIV_LIKELY(s < bufend)) {
- *s++ = '.';
- s = innobase_convert_identifier(s, bufend - s,
- slash + 1, idlen
- - (slash - id) - 1,
- thd, TRUE);
- }
- } else if (UNIV_UNLIKELY(*id == TEMP_INDEX_PREFIX)) {
- /* Temporary index name (smart ALTER TABLE) */
- const char temp_index_suffix[]= "--temporary--";
-
- s = innobase_convert_identifier(buf, buflen, id + 1, idlen - 1,
- thd, FALSE);
- if (s - buf + (sizeof temp_index_suffix - 1) < buflen) {
- memcpy(s, temp_index_suffix,
- sizeof temp_index_suffix - 1);
- s += sizeof temp_index_suffix - 1;
- }
- } else {
-no_db_name:
- s = innobase_convert_identifier(buf, buflen, id, idlen,
- thd, table_id);
- }
-
- return(s);
-
-}
-
-/**********************************************************************//**
-Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-extern "C" UNIV_INTERN
-ibool
-trx_is_interrupted(
-/*===============*/
- trx_t* trx) /*!< in: transaction */
-{
- return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
-}
-
-/**************************************************************//**
-Resets some fields of a prebuilt struct. The template is used in fast
-retrieval of just those column values MySQL needs in its processing. */
-static
-void
-reset_template(
-/*===========*/
- row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */
-{
- prebuilt->keep_other_fields_on_keyread = 0;
- prebuilt->read_just_key = 0;
-}
-
-/*****************************************************************//**
-Call this when you have opened a new table handle in HANDLER, before you
-call index_read_idx() etc. Actually, we can let the cursor stay open even
-over a transaction commit! Then you should call this before every operation,
-fetch next etc. This function inits the necessary things even after a
-transaction commit. */
-UNIV_INTERN
-void
-ha_innobase::init_table_handle_for_HANDLER(void)
-/*============================================*/
-{
- /* If current thd does not yet have a trx struct, create one.
- If the current handle does not yet have a prebuilt struct, create
- one. Update the trx pointers in the prebuilt struct. Normally
- this operation is done in external_lock. */
-
- update_thd(ha_thd());
-
- /* Initialize the prebuilt struct much like it would be inited in
- external_lock */
-
- innobase_release_stat_resources(prebuilt->trx);
-
- /* If the transaction is not started yet, start it */
-
- trx_start_if_not_started(prebuilt->trx);
-
- /* Assign a read view if the transaction does not have it yet */
-
- trx_assign_read_view(prebuilt->trx);
-
- /* Set the MySQL flag to mark that there is an active transaction */
-
- if (prebuilt->trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(ht, user_thd);
-
- prebuilt->trx->active_trans = 1;
- }
-
- /* We did the necessary inits in this function, no need to repeat them
- in row_search_for_mysql */
-
- prebuilt->sql_stat_start = FALSE;
-
- /* We let HANDLER always to do the reads as consistent reads, even
- if the trx isolation level would have been specified as SERIALIZABLE */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
-
- /* Always fetch all columns in the index record */
-
- prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
-
- /* We want always to fetch all columns in the whole row? Or do
- we???? */
-
- prebuilt->used_in_HANDLER = TRUE;
- reset_template(prebuilt);
-}
-
-/*********************************************************************//**
-Opens an InnoDB database.
-@return 0 on success, error code on failure */
-static
-int
-innobase_init(
-/*==========*/
- void *p) /*!< in: InnoDB handlerton */
-{
- static char current_dir[3]; /*!< Set if using current lib */
- int err;
- bool ret;
- char *default_path;
- uint format_id;
-
- DBUG_ENTER("innobase_init");
- handlerton *innobase_hton= (handlerton *)p;
- innodb_hton_ptr = innobase_hton;
-
- innobase_hton->state = SHOW_OPTION_YES;
- innobase_hton->db_type= DB_TYPE_INNODB;
- innobase_hton->savepoint_offset=sizeof(trx_named_savept_t);
- innobase_hton->close_connection=innobase_close_connection;
- innobase_hton->savepoint_set=innobase_savepoint;
- innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
- innobase_hton->savepoint_release=innobase_release_savepoint;
- innobase_hton->commit=innobase_commit;
- innobase_hton->rollback=innobase_rollback;
- innobase_hton->prepare=innobase_xa_prepare;
- innobase_hton->recover=innobase_xa_recover;
- innobase_hton->commit_by_xid=innobase_commit_by_xid;
- innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
- innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
- innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
- innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
- innobase_hton->create=innobase_create_handler;
- innobase_hton->drop_database=innobase_drop_database;
- innobase_hton->panic=innobase_end;
- innobase_hton->start_consistent_snapshot=innobase_start_trx_and_assign_read_view;
- innobase_hton->flush_logs=innobase_flush_logs;
- innobase_hton->show_status=innobase_show_status;
- innobase_hton->flags=HTON_NO_FLAGS;
- innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
- innobase_hton->alter_table_flags = innobase_alter_table_flags;
-
- ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
-
-#ifdef UNIV_DEBUG
- static const char test_filename[] = "-@";
- char test_tablename[sizeof test_filename
- + sizeof srv_mysql50_table_name_prefix];
- if ((sizeof test_tablename) - 1
- != filename_to_tablename(test_filename, test_tablename,
- sizeof test_tablename)
- || strncmp(test_tablename,
- srv_mysql50_table_name_prefix,
- sizeof srv_mysql50_table_name_prefix)
- || strcmp(test_tablename
- + sizeof srv_mysql50_table_name_prefix,
- test_filename)) {
- sql_print_error("tablename encoding has been changed");
- goto error;
- }
-#endif /* UNIV_DEBUG */
-
- /* Check that values don't overflow on 32-bit systems. */
- if (sizeof(ulint) == 4) {
- if (innobase_buffer_pool_size > UINT_MAX32) {
- sql_print_error(
- "innobase_buffer_pool_size can't be over 4GB"
- " on 32-bit systems");
-
- goto error;
- }
-
- if (innobase_log_file_size > UINT_MAX32) {
- sql_print_error(
- "innobase_log_file_size can't be over 4GB"
- " on 32-bit systems");
-
- goto error;
- }
- }
-
- os_innodb_umask = (ulint)my_umask;
-
- /* First calculate the default path for innodb_data_home_dir etc.,
- in case the user has not given any value.
-
- Note that when using the embedded server, the datadirectory is not
- necessarily the current directory of this program. */
-
- if (mysqld_embedded) {
- default_path = mysql_real_data_home;
- fil_path_to_mysql_datadir = mysql_real_data_home;
- } else {
- /* It's better to use current lib, to keep paths short */
- current_dir[0] = FN_CURLIB;
- current_dir[1] = FN_LIBCHAR;
- current_dir[2] = 0;
- default_path = current_dir;
- }
-
- ut_a(default_path);
-
- if (specialflag & SPECIAL_NO_PRIOR) {
- srv_set_thread_priorities = FALSE;
- } else {
- srv_set_thread_priorities = TRUE;
- srv_query_thread_priority = QUERY_PRIOR;
- }
-
- /* Set InnoDB initialization parameters according to the values
- read from MySQL .cnf file */
-
- /*--------------- Data files -------------------------*/
-
- /* The default dir for data files is the datadir of MySQL */
-
- srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
- default_path);
-
- /* Set default InnoDB data file size to 10 MB and let it be
- auto-extending. Thus users can use InnoDB in >= 4.0 without having
- to specify any startup options. */
-
- if (!innobase_data_file_path) {
- innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
- }
-
- /* Since InnoDB edits the argument in the next call, we make another
- copy of it: */
-
- internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
- MYF(MY_FAE));
-
- ret = (bool) srv_parse_data_file_paths_and_sizes(
- internal_innobase_data_file_path);
- if (ret == FALSE) {
- sql_print_error(
- "InnoDB: syntax error in innodb_data_file_path");
-mem_free_and_error:
- srv_free_paths_and_sizes();
- my_free(internal_innobase_data_file_path,
- MYF(MY_ALLOW_ZERO_PTR));
- goto error;
- }
-
- /* -------------- Log files ---------------------------*/
-
- /* The default dir for log files is the datadir of MySQL */
-
- if (!innobase_log_group_home_dir) {
- innobase_log_group_home_dir = default_path;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Since innodb_log_arch_dir has no relevance under MySQL,
- starting from 4.0.6 we always set it the same as
- innodb_log_group_home_dir: */
-
- innobase_log_arch_dir = innobase_log_group_home_dir;
-
- srv_arch_dir = innobase_log_arch_dir;
-#endif /* UNIG_LOG_ARCHIVE */
-
- ret = (bool)
- srv_parse_log_group_home_dirs(innobase_log_group_home_dir);
-
- if (ret == FALSE || innobase_mirrored_log_groups != 1) {
- sql_print_error("syntax error in innodb_log_group_home_dir, or a "
- "wrong number of mirrored log groups");
-
- goto mem_free_and_error;
- }
-
- /* Validate the file format by animal name */
- if (innobase_file_format_name != NULL) {
-
- format_id = innobase_file_format_name_lookup(
- innobase_file_format_name);
-
- if (format_id > DICT_TF_FORMAT_MAX) {
-
- sql_print_error("InnoDB: wrong innodb_file_format.");
-
- goto mem_free_and_error;
- }
- } else {
- /* Set it to the default file format id. Though this
- should never happen. */
- format_id = 0;
- }
-
- srv_file_format = format_id;
-
- /* Given the type of innobase_file_format_name we have little
- choice but to cast away the constness from the returned name.
- innobase_file_format_name is used in the MySQL set variable
- interface and so can't be const. */
-
- innobase_file_format_name =
- (char*) trx_sys_file_format_id_to_name(format_id);
-
- /* Process innobase_file_format_check variable */
- ut_a(innobase_file_format_check != NULL);
-
- /* As a side effect it will set srv_check_file_format_at_startup
- on valid input. First we check for "on"/"off". */
- if (!innobase_file_format_check_on_off(innobase_file_format_check)) {
-
- /* Did the user specify a format name that we support ?
- As a side effect it will update the variable
- srv_check_file_format_at_startup */
- if (innobase_file_format_validate_and_set(
- innobase_file_format_check) < 0) {
-
- sql_print_error("InnoDB: invalid "
- "innodb_file_format_check value: "
- "should be either 'on' or 'off' or "
- "any value up to %s or its "
- "equivalent numeric id",
- trx_sys_file_format_id_to_name(
- DICT_TF_FORMAT_MAX));
-
- goto mem_free_and_error;
- }
- }
-
- if (innobase_change_buffering) {
- ulint use;
-
- for (use = 0;
- use < UT_ARR_SIZE(innobase_change_buffering_values);
- use++) {
- if (!innobase_strcasecmp(
- innobase_change_buffering,
- innobase_change_buffering_values[use])) {
- ibuf_use = (ibuf_use_t) use;
- goto innobase_change_buffering_inited_ok;
- }
- }
-
- sql_print_error("InnoDB: invalid value "
- "innodb_file_format_check=%s",
- innobase_change_buffering);
- goto mem_free_and_error;
- }
-
-innobase_change_buffering_inited_ok:
- ut_a((ulint) ibuf_use < UT_ARR_SIZE(innobase_change_buffering_values));
- innobase_change_buffering = (char*)
- innobase_change_buffering_values[ibuf_use];
-
- /* --------------------------------------------------*/
-
- srv_file_flush_method_str = innobase_file_flush_method;
-
- srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
- srv_n_log_files = (ulint) innobase_log_files_in_group;
- srv_log_file_size = (ulint) innobase_log_file_size;
-
-#ifdef UNIV_LOG_ARCHIVE
- srv_log_archive_on = (ulint) innobase_log_archive;
-#endif /* UNIV_LOG_ARCHIVE */
- srv_log_buffer_size = (ulint) innobase_log_buffer_size;
-
- srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
-
- srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
-
- srv_n_file_io_threads = (ulint) innobase_file_io_threads;
- srv_n_read_io_threads = (ulint) innobase_read_io_threads;
- srv_n_write_io_threads = (ulint) innobase_write_io_threads;
-
- srv_force_recovery = (ulint) innobase_force_recovery;
-
- srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
- srv_use_checksums = (ibool) innobase_use_checksums;
-
-#ifdef HAVE_LARGE_PAGES
- if ((os_use_large_pages = (ibool) my_use_large_pages))
- os_large_page_size = (ulint) opt_large_page_size;
-#endif
-
- row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
-
- srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
-
- srv_max_n_open_files = (ulint) innobase_open_files;
- srv_innodb_status = (ibool) innobase_create_status_file;
-
- srv_print_verbose_log = mysqld_embedded ? 0 : 1;
-
- /* Store the default charset-collation number of this MySQL
- installation */
-
- data_mysql_default_charset_coll = (ulint)default_charset_info->number;
-
- ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
- my_charset_latin1.number);
- ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
-
- /* Store the latin1_swedish_ci character ordering table to InnoDB. For
- non-latin1_swedish_ci charsets we use the MySQL comparison functions,
- and consequently we do not need to know the ordering internally in
- InnoDB. */
-
- ut_a(0 == strcmp(my_charset_latin1.name, "latin1_swedish_ci"));
- srv_latin1_ordering = my_charset_latin1.sort_order;
-
- innobase_old_blocks_pct = buf_LRU_old_ratio_update(
- innobase_old_blocks_pct, FALSE);
-
- innobase_commit_concurrency_init_default();
-
- /* Since we in this module access directly the fields of a trx
- struct, and due to different headers and flags it might happen that
- mutex_t has a different size in this module and in InnoDB
- modules, we check at run time that the size is the same in
- these compilation modules. */
-
- err = innobase_start_or_create_for_mysql();
-
- if (err != DB_SUCCESS) {
- goto mem_free_and_error;
- }
-
- innobase_open_tables = hash_create(200);
- pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&analyze_mutex, MY_MUTEX_INIT_FAST);
- pthread_cond_init(&commit_cond, NULL);
- innodb_inited= 1;
-#ifdef MYSQL_DYNAMIC_PLUGIN
- if (innobase_hton != p) {
- innobase_hton = reinterpret_cast<handlerton*>(p);
- *innobase_hton = *innodb_hton_ptr;
- }
-#endif /* MYSQL_DYNAMIC_PLUGIN */
-
- /* Get the current high water mark format. */
- innobase_file_format_check = (char*) trx_sys_file_format_max_get();
-
- DBUG_RETURN(FALSE);
-error:
- DBUG_RETURN(TRUE);
-}
-
-/*******************************************************************//**
-Closes an InnoDB database.
-@return TRUE if error */
-static
-int
-innobase_end(
-/*=========*/
- handlerton* hton, /*!< in/out: InnoDB handlerton */
- ha_panic_function type __attribute__((unused)))
- /*!< in: ha_panic() parameter */
-{
- int err= 0;
-
- DBUG_ENTER("innobase_end");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
-#ifdef __NETWARE__ /* some special cleanup for NetWare */
- if (nw_panic) {
- set_panic_flag_for_netware();
- }
-#endif
- if (innodb_inited) {
-
- srv_fast_shutdown = (ulint) innobase_fast_shutdown;
- innodb_inited = 0;
- hash_table_free(innobase_open_tables);
- innobase_open_tables = NULL;
- if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
- err = 1;
- }
- srv_free_paths_and_sizes();
- my_free(internal_innobase_data_file_path,
- MYF(MY_ALLOW_ZERO_PTR));
- pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
- pthread_mutex_destroy(&commit_threads_m);
- pthread_mutex_destroy(&commit_cond_m);
- pthread_mutex_destroy(&analyze_mutex);
- pthread_cond_destroy(&commit_cond);
- }
-
- DBUG_RETURN(err);
-}
-
-/****************************************************************//**
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint.
-@return TRUE if error */
-static
-bool
-innobase_flush_logs(
-/*================*/
- handlerton* hton) /*!< in/out: InnoDB handlerton */
-{
- bool result = 0;
-
- DBUG_ENTER("innobase_flush_logs");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- log_buffer_flush_to_disk();
-
- DBUG_RETURN(result);
-}
-
-/****************************************************************//**
-Return alter table flags supported in an InnoDB database. */
-static
-uint
-innobase_alter_table_flags(
-/*=======================*/
- uint flags)
-{
- return(HA_ONLINE_ADD_INDEX_NO_WRITES
- | HA_ONLINE_DROP_INDEX_NO_WRITES
- | HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES
- | HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES
- | HA_ONLINE_ADD_PK_INDEX_NO_WRITES);
-}
-
-/*****************************************************************//**
-Commits a transaction in an InnoDB database. */
-static
-void
-innobase_commit_low(
-/*================*/
- trx_t* trx) /*!< in: transaction handle */
-{
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- return;
- }
-
- trx_commit_for_mysql(trx);
-}
-
-/*****************************************************************//**
-Creates an InnoDB transaction struct for the thd if it does not yet have one.
-Starts a new InnoDB transaction if a transaction is not yet started. And
-assigns a new snapshot for a consistent read if the transaction does not yet
-have one.
-@return 0 */
-static
-int
-innobase_start_trx_and_assign_read_view(
-/*====================================*/
- handlerton *hton, /*!< in: Innodb handlerton */
- THD* thd) /*!< in: MySQL thread handle of the user for whom
- the transaction should be committed */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_start_trx_and_assign_read_view");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- /* Create a new trx struct for thd, if it does not yet have one */
-
- trx = check_trx_exists(thd);
-
- /* This is just to play safe: release a possible FIFO ticket and
- search latch. Since we will reserve the kernel mutex, we have to
- release the search system latch first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* If the transaction is not started yet, start it */
-
- trx_start_if_not_started(trx);
-
- /* Assign a read view if the transaction does not have it yet */
-
- trx_assign_read_view(trx);
-
- /* Set the MySQL flag to mark that there is an active transaction */
-
- if (trx->active_trans == 0) {
- innobase_register_trx_and_stmt(hton, thd);
- trx->active_trans = 1;
- }
-
- DBUG_RETURN(0);
-}
-
-/*****************************************************************//**
-Commits a transaction in an InnoDB database or marks an SQL statement
-ended.
-@return 0 */
-static
-int
-innobase_commit(
-/*============*/
- handlerton *hton, /*!< in: Innodb handlerton */
- THD* thd, /*!< in: MySQL thread handle of the user for whom
- the transaction should be committed */
- bool all) /*!< in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_commit");
- DBUG_ASSERT(hton == innodb_hton_ptr);
- DBUG_PRINT("trans", ("ending transaction"));
-
- trx = check_trx_exists(thd);
-
- /* Since we will reserve the kernel mutex, we have to release
- the search system latch first to obey the latching order. */
-
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- /* The flag trx->active_trans is set to 1 in
-
- 1. ::external_lock(),
- 2. ::start_stmt(),
- 3. innobase_query_caching_of_table_permitted(),
- 4. innobase_savepoint(),
- 5. ::init_table_handle_for_HANDLER(),
- 6. innobase_start_trx_and_assign_read_view(),
- 7. ::transactional_table_lock()
-
- and it is only set to 0 in a commit or a rollback. If it is 0 we know
- there cannot be resources to be freed and we could return immediately.
- For the time being, we play safe and do the cleanup though there should
- be nothing to clean up. */
-
- if (trx->active_trans == 0
- && trx->conc_state != TRX_NOT_STARTED) {
-
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
- }
- if (all
- || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
- }
-
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
-
- /* Now do a write + flush of logs. */
- trx_commit_complete_for_mysql(trx);
- trx->active_trans = 0;
-
- } else {
- /* We just mark the SQL statement ended and do not do a
- transaction commit */
-
- /* If we had reserved the auto-inc lock for some
- table in this SQL statement we release it now */
-
- row_unlock_table_autoinc_for_mysql(trx);
-
- /* Store the current undo_no of the transaction so that we
- know where to roll back if we have to roll back the next
- SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
-
- trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
-
- if (trx->declared_to_be_inside_innodb) {
- /* Release our possible ticket in the FIFO */
-
- srv_conc_force_exit_innodb(trx);
- }
-
- /* Tell the InnoDB server that there might be work for utility
- threads: */
- srv_active_wake_master_thread();
-
- DBUG_RETURN(0);
-}
-
-/*****************************************************************//**
-Rolls back a transaction or the latest SQL statement.
-@return 0 or error number */
-static
-int
-innobase_rollback(
-/*==============*/
- handlerton *hton, /*!< in: Innodb handlerton */
- THD* thd, /*!< in: handle to the MySQL thread of the user
- whose transaction should be rolled back */
- bool all) /*!< in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
-{
- int error = 0;
- trx_t* trx;
-
- DBUG_ENTER("innobase_rollback");
- DBUG_ASSERT(hton == innodb_hton_ptr);
- DBUG_PRINT("trans", ("aborting transaction"));
-
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
-
- /* If we had reserved the auto-inc lock for some table (if
- we come here to roll back the latest SQL statement) we
- release it now before a possibly lengthy rollback */
-
- row_unlock_table_autoinc_for_mysql(trx);
-
- if (all
- || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- error = trx_rollback_for_mysql(trx);
- trx->active_trans = 0;
- } else {
- error = trx_rollback_last_sql_stat_for_mysql(trx);
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Rolls back a transaction
-@return 0 or error number */
-static
-int
-innobase_rollback_trx(
-/*==================*/
- trx_t* trx) /*!< in: transaction */
-{
- int error = 0;
-
- DBUG_ENTER("innobase_rollback_trx");
- DBUG_PRINT("trans", ("aborting transaction"));
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* If we had reserved the auto-inc lock for some table (if
- we come here to roll back the latest SQL statement) we
- release it now before a possibly lengthy rollback */
-
- row_unlock_table_autoinc_for_mysql(trx);
-
- error = trx_rollback_for_mysql(trx);
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Rolls back a transaction to a savepoint.
-@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
-given name */
-static
-int
-innobase_rollback_to_savepoint(
-/*===========================*/
- handlerton *hton, /*!< in: Innodb handlerton */
- THD* thd, /*!< in: handle to the MySQL thread of the user
- whose transaction should be rolled back */
- void* savepoint) /*!< in: savepoint data */
-{
- ib_int64_t mysql_binlog_cache_pos;
- int error = 0;
- trx_t* trx;
- char name[64];
-
- DBUG_ENTER("innobase_rollback_to_savepoint");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* TODO: use provided savepoint data area to store savepoint data */
-
- longlong2str((ulint)savepoint, name, 36);
-
- error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
- &mysql_binlog_cache_pos);
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Release transaction savepoint name.
-@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
-given name */
-static
-int
-innobase_release_savepoint(
-/*=======================*/
- handlerton* hton, /*!< in: handlerton for Innodb */
- THD* thd, /*!< in: handle to the MySQL thread of the user
- whose transaction should be rolled back */
- void* savepoint) /*!< in: savepoint data */
-{
- int error = 0;
- trx_t* trx;
- char name[64];
-
- DBUG_ENTER("innobase_release_savepoint");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = check_trx_exists(thd);
-
- /* TODO: use provided savepoint data area to store savepoint data */
-
- longlong2str((ulint)savepoint, name, 36);
-
- error = (int) trx_release_savepoint_for_mysql(trx, name);
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Sets a transaction savepoint.
-@return always 0, that is, always succeeds */
-static
-int
-innobase_savepoint(
-/*===============*/
- handlerton* hton, /*!< in: handle to the Innodb handlerton */
- THD* thd, /*!< in: handle to the MySQL thread */
- void* savepoint) /*!< in: savepoint data */
-{
- int error = 0;
- trx_t* trx;
-
- DBUG_ENTER("innobase_savepoint");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- /*
- In the autocommit mode there is no sense to set a savepoint
- (unless we are in sub-statement), so SQL layer ensures that
- this method is never called in such situation.
- */
-#ifdef MYSQL_SERVER /* plugins cannot access thd->in_sub_stmt */
- DBUG_ASSERT(thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) ||
- thd->in_sub_stmt);
-#endif /* MYSQL_SERVER */
-
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* cannot happen outside of transaction */
- DBUG_ASSERT(trx->active_trans);
-
- /* TODO: use provided savepoint data area to store savepoint data */
- char name[64];
- longlong2str((ulint)savepoint,name,36);
-
- error = (int) trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-static
-int
-innobase_close_connection(
-/*======================*/
- handlerton* hton, /*!< in: innobase handlerton */
- THD* thd) /*!< in: handle to the MySQL thread of the user
- whose resources should be free'd */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_close_connection");
- DBUG_ASSERT(hton == innodb_hton_ptr);
- trx = thd_to_trx(thd);
-
- ut_a(trx);
-
- if (trx->active_trans == 0
- && trx->conc_state != TRX_NOT_STARTED) {
-
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
- }
-
-
- if (trx->conc_state != TRX_NOT_STARTED &&
- global_system_variables.log_warnings) {
- sql_print_warning(
- "MySQL is closing a connection that has an active "
- "InnoDB transaction. %lu row modifications will "
- "roll back.",
- (ulong) trx->undo_no.low);
- }
-
- innobase_rollback_trx(trx);
-
- thr_local_free(trx->mysql_thread_id);
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(0);
-}
-
-
-/*************************************************************************//**
-** InnoDB database tables
-*****************************************************************************/
-
-/****************************************************************//**
-Get the record format from the data dictionary.
-@return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
-ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
-UNIV_INTERN
-enum row_type
-ha_innobase::get_row_type() const
-/*=============================*/
-{
- if (prebuilt && prebuilt->table) {
- const ulint flags = prebuilt->table->flags;
-
- if (UNIV_UNLIKELY(!flags)) {
- return(ROW_TYPE_REDUNDANT);
- }
-
- ut_ad(flags & DICT_TF_COMPACT);
-
- switch (flags & DICT_TF_FORMAT_MASK) {
- case DICT_TF_FORMAT_51 << DICT_TF_FORMAT_SHIFT:
- return(ROW_TYPE_COMPACT);
- case DICT_TF_FORMAT_ZIP << DICT_TF_FORMAT_SHIFT:
- if (flags & DICT_TF_ZSSIZE_MASK) {
- return(ROW_TYPE_COMPRESSED);
- } else {
- return(ROW_TYPE_DYNAMIC);
- }
-#if DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX
-# error "DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX"
-#endif
- }
- }
- ut_ad(0);
- return(ROW_TYPE_NOT_USED);
-}
-
-
-
-/****************************************************************//**
-Get the table flags to use for the statement.
-@return table flags */
-UNIV_INTERN
-handler::Table_flags
-ha_innobase::table_flags() const
-/*============================*/
-{
- /* Need to use tx_isolation here since table flags is (also)
- called before prebuilt is inited. */
- ulong const tx_isolation = thd_tx_isolation(ha_thd());
- if (tx_isolation <= ISO_READ_COMMITTED)
- return int_table_flags;
- return int_table_flags | HA_BINLOG_STMT_CAPABLE;
-}
-
-/****************************************************************//**
-Gives the file extension of an InnoDB single-table tablespace. */
-static const char* ha_innobase_exts[] = {
- ".ibd",
- NullS
-};
-
-/****************************************************************//**
-Returns the table type (storage engine name).
-@return table type */
-UNIV_INTERN
-const char*
-ha_innobase::table_type() const
-/*===========================*/
-{
- return(innobase_hton_name);
-}
-
-/****************************************************************//**
-Returns the index type. */
-UNIV_INTERN
-const char*
-ha_innobase::index_type(
-/*====================*/
- uint)
- /*!< out: index type */
-{
- return("BTREE");
-}
-
-/****************************************************************//**
-Returns the table file name extension.
-@return file extension string */
-UNIV_INTERN
-const char**
-ha_innobase::bas_ext() const
-/*========================*/
-{
- return(ha_innobase_exts);
-}
-
-/****************************************************************//**
-Returns the operations supported for indexes.
-@return flags of supported operations */
-UNIV_INTERN
-ulong
-ha_innobase::index_flags(
-/*=====================*/
- uint,
- uint,
- bool)
-const
-{
- return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
- | HA_READ_RANGE | HA_KEYREAD_ONLY);
-}
-
-/****************************************************************//**
-Returns the maximum number of keys.
-@return MAX_KEY */
-UNIV_INTERN
-uint
-ha_innobase::max_supported_keys() const
-/*===================================*/
-{
- return(MAX_KEY);
-}
-
-/****************************************************************//**
-Returns the maximum key length.
-@return maximum supported key length, in bytes */
-UNIV_INTERN
-uint
-ha_innobase::max_supported_key_length() const
-/*=========================================*/
-{
- /* An InnoDB page must store >= 2 keys; a secondary key record
- must also contain the primary key value: max key length is
- therefore set to slightly less than 1 / 4 of page size which
- is 16 kB; but currently MySQL does not work with keys whose
- size is > MAX_KEY_LENGTH */
- return(3500);
-}
-
-/****************************************************************//**
-Returns the key map of keys that are usable for scanning.
-@return key_map_full */
-UNIV_INTERN
-const key_map*
-ha_innobase::keys_to_use_for_scanning()
-{
- return(&key_map_full);
-}
-
-/****************************************************************//**
-Determines if table caching is supported.
-@return HA_CACHE_TBL_ASKTRANSACT */
-UNIV_INTERN
-uint8
-ha_innobase::table_cache_type()
-{
- return(HA_CACHE_TBL_ASKTRANSACT);
-}
-
-/****************************************************************//**
-Determines if the primary key is clustered index.
-@return true */
-UNIV_INTERN
-bool
-ha_innobase::primary_key_is_clustered()
-{
- return(true);
-}
-
-/*****************************************************************//**
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
-table name always to lower case. */
-static
-void
-normalize_table_name(
-/*=================*/
- char* norm_name, /*!< out: normalized name as a
- null-terminated string */
- const char* name) /*!< in: table name string */
-{
- char* name_ptr;
- char* db_ptr;
- char* ptr;
-
- /* Scan name from the end */
-
- ptr = strend(name)-1;
-
- while (ptr >= name && *ptr != '\\' && *ptr != '/') {
- ptr--;
- }
-
- name_ptr = ptr + 1;
-
- DBUG_ASSERT(ptr > name);
-
- ptr--;
-
- while (ptr >= name && *ptr != '\\' && *ptr != '/') {
- ptr--;
- }
-
- db_ptr = ptr + 1;
-
- memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));
-
- norm_name[name_ptr - db_ptr - 1] = '/';
-
-#ifdef __WIN__
- innobase_casedn_str(norm_name);
-#endif
-}
-
-/********************************************************************//**
-Set the autoinc column max value. This should only be called once from
-ha_innobase::open(). Therefore there's no need for a covering lock.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-ha_innobase::innobase_initialize_autoinc()
-/*======================================*/
-{
- dict_index_t* index;
- ulonglong auto_inc;
- const char* col_name;
- ulint error;
-
- col_name = table->found_next_number_field->field_name;
- index = innobase_get_index(table->s->next_number_index);
-
- /* Execute SELECT MAX(col_name) FROM TABLE; */
- error = row_search_max_autoinc(index, col_name, &auto_inc);
-
- switch (error) {
- case DB_SUCCESS:
-
- /* At the this stage we don't know the increment
- or the offset, so use default inrement of 1. */
- ++auto_inc;
- break;
-
- case DB_RECORD_NOT_FOUND:
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: MySQL and InnoDB data "
- "dictionaries are out of sync.\n"
- "InnoDB: Unable to find the AUTOINC column %s in the "
- "InnoDB table %s.\n"
- "InnoDB: We set the next AUTOINC column value to the "
- "maximum possible value,\n"
- "InnoDB: in effect disabling the AUTOINC next value "
- "generation.\n"
- "InnoDB: You can either set the next AUTOINC value "
- "explicitly using ALTER TABLE\n"
- "InnoDB: or fix the data dictionary by recreating "
- "the table.\n",
- col_name, index->table->name);
-
- auto_inc = 0xFFFFFFFFFFFFFFFFULL;
- break;
-
- default:
- return(error);
- }
-
- dict_table_autoinc_initialize(prebuilt->table, auto_inc);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Creates and opens a handle to a table which already exists in an InnoDB
-database.
-@return 1 if error, 0 if success */
-UNIV_INTERN
-int
-ha_innobase::open(
-/*==============*/
- const char* name, /*!< in: table name */
- int mode, /*!< in: not used */
- uint test_if_locked) /*!< in: not used */
-{
- dict_table_t* ib_table;
- char norm_name[1000];
- THD* thd;
- ulint retries = 0;
- char* is_part = NULL;
-
- DBUG_ENTER("ha_innobase::open");
-
- UT_NOT_USED(mode);
- UT_NOT_USED(test_if_locked);
-
- thd = ha_thd();
-
- /* Under some cases MySQL seems to call this function while
- holding btr_search_latch. This breaks the latching order as
- we acquire dict_sys->mutex below and leads to a deadlock. */
- if (thd != NULL) {
- innobase_release_temporary_latches(ht, thd);
- }
-
- normalize_table_name(norm_name, name);
-
- user_thd = NULL;
-
- if (!(share=get_share(name))) {
-
- DBUG_RETURN(1);
- }
-
- /* Create buffers for packing the fields of a record. Why
- table->reclength did not work here? Obviously, because char
- fields when packed actually became 1 byte longer, when we also
- stored the string length as the first byte. */
-
- upd_and_key_val_buff_len =
- table->s->reclength + table->s->max_key_length
- + MAX_REF_PARTS * 3;
- if (!(uchar*) my_multi_malloc(MYF(MY_WME),
- &upd_buff, upd_and_key_val_buff_len,
- &key_val_buff, upd_and_key_val_buff_len,
- NullS)) {
- free_share(share);
-
- DBUG_RETURN(1);
- }
-
- /* We look for pattern #P# to see if the table is partitioned
- MySQL table. The retry logic for partitioned tables is a
- workaround for http://bugs.mysql.com/bug.php?id=33349. Look
- at support issue https://support.mysql.com/view.php?id=21080
- for more details. */
- is_part = strstr(norm_name, "#P#");
-retry:
- /* Get pointer to a table object in InnoDB dictionary cache */
- ib_table = dict_table_get(norm_name, TRUE);
-
- if (NULL == ib_table) {
- if (is_part && retries < 10) {
- ++retries;
- os_thread_sleep(100000);
- goto retry;
- }
-
- if (is_part) {
- sql_print_error("Failed to open table %s after "
- "%lu attempts.\n", norm_name,
- retries);
- }
-
- sql_print_error("Cannot find or open table %s from\n"
- "the internal data dictionary of InnoDB "
- "though the .frm file for the\n"
- "table exists. Maybe you have deleted and "
- "recreated InnoDB data\n"
- "files but have forgotten to delete the "
- "corresponding .frm files\n"
- "of InnoDB tables, or you have moved .frm "
- "files to another database?\n"
- "or, the table contains indexes that this "
- "version of the engine\n"
- "doesn't support.\n"
- "See " REFMAN "innodb-troubleshooting.html\n"
- "how you can resolve the problem.\n",
- norm_name);
- free_share(share);
- my_free(upd_buff, MYF(0));
- my_errno = ENOENT;
-
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
-
- if (ib_table->ibd_file_missing && !thd_tablespace_op(thd)) {
- sql_print_error("MySQL is trying to open a table handle but "
- "the .ibd file for\ntable %s does not exist.\n"
- "Have you deleted the .ibd file from the "
- "database directory under\nthe MySQL datadir, "
- "or have you used DISCARD TABLESPACE?\n"
- "See " REFMAN "innodb-troubleshooting.html\n"
- "how you can resolve the problem.\n",
- norm_name);
- free_share(share);
- my_free(upd_buff, MYF(0));
- my_errno = ENOENT;
-
- dict_table_decrement_handle_count(ib_table, FALSE);
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
-
- prebuilt = row_create_prebuilt(ib_table);
-
- prebuilt->mysql_row_len = table->s->reclength;
- prebuilt->default_rec = table->s->default_values;
- ut_ad(prebuilt->default_rec);
-
- /* Looks like MySQL-3.23 sometimes has primary key number != 0 */
-
- primary_key = table->s->primary_key;
- key_used_on_scan = primary_key;
-
- /* Allocate a buffer for a 'row reference'. A row reference is
- a string of bytes of length ref_length which uniquely specifies
- a row in our table. Note that MySQL may also compare two row
- references for equality by doing a simple memcmp on the strings
- of length ref_length! */
-
- if (!row_table_got_default_clust_index(ib_table)) {
- if (primary_key >= MAX_KEY) {
- sql_print_error("Table %s has a primary key in InnoDB data "
- "dictionary, but not in MySQL!", name);
- }
-
- prebuilt->clust_index_was_generated = FALSE;
-
- /* MySQL allocates the buffer for ref. key_info->key_length
- includes space for all key columns + one byte for each column
- that may be NULL. ref_length must be as exact as possible to
- save space, because all row reference buffers are allocated
- based on ref_length. */
-
- ref_length = table->key_info[primary_key].key_length;
- } else {
- if (primary_key != MAX_KEY) {
- sql_print_error("Table %s has no primary key in InnoDB data "
- "dictionary, but has one in MySQL! If you "
- "created the table with a MySQL version < "
- "3.23.54 and did not define a primary key, "
- "but defined a unique key with all non-NULL "
- "columns, then MySQL internally treats that "
- "key as the primary key. You can fix this "
- "error by dump + DROP + CREATE + reimport "
- "of the table.", name);
- }
-
- prebuilt->clust_index_was_generated = TRUE;
-
- ref_length = DATA_ROW_ID_LEN;
-
- /* If we automatically created the clustered index, then
- MySQL does not know about it, and MySQL must NOT be aware
- of the index used on scan, to make it avoid checking if we
- update the column of the index. That is why we assert below
- that key_used_on_scan is the undefined value MAX_KEY.
- The column is the row id in the automatical generation case,
- and it will never be updated anyway. */
-
- if (key_used_on_scan != MAX_KEY) {
- sql_print_warning(
- "Table %s key_used_on_scan is %lu even "
- "though there is no primary key inside "
- "InnoDB.", name, (ulong) key_used_on_scan);
- }
- }
-
- /* Index block size in InnoDB: used by MySQL in query optimization */
- stats.block_size = 16 * 1024;
-
- /* Init table lock structure */
- thr_lock_data_init(&share->lock,&lock,(void*) 0);
-
- if (prebuilt->table) {
- /* We update the highest file format in the system table
- space, if this table has higher file format setting. */
-
- trx_sys_file_format_max_upgrade(
- (const char**) &innobase_file_format_check,
- dict_table_get_format(prebuilt->table));
- }
-
- info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
-
- /* Only if the table has an AUTOINC column. */
- if (prebuilt->table != NULL && table->found_next_number_field != NULL) {
- ulint error;
-
- dict_table_autoinc_lock(prebuilt->table);
-
- /* Since a table can already be "open" in InnoDB's internal
- data dictionary, we only init the autoinc counter once, the
- first time the table is loaded. We can safely reuse the
- autoinc value from a previous MySQL open. */
- if (dict_table_autoinc_read(prebuilt->table) == 0) {
-
- error = innobase_initialize_autoinc();
- ut_a(error == DB_SUCCESS);
- }
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN
-uint
-ha_innobase::max_supported_key_part_length() const
-{
- return(DICT_MAX_INDEX_COL_LEN - 1);
-}
-
-/******************************************************************//**
-Closes a handle to an InnoDB table.
-@return 0 */
-UNIV_INTERN
-int
-ha_innobase::close(void)
-/*====================*/
-{
- THD* thd;
-
- DBUG_ENTER("ha_innobase::close");
-
- thd = ha_thd();
- if (thd != NULL) {
- innobase_release_temporary_latches(ht, thd);
- }
-
- row_prebuilt_free(prebuilt, FALSE);
-
- my_free(upd_buff, MYF(0));
- free_share(share);
-
- /* Tell InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- DBUG_RETURN(0);
-}
-
-/* The following accessor functions should really be inside MySQL code! */
-
-/**************************************************************//**
-Gets field offset for a field in a table.
-@return offset */
-static inline
-uint
-get_field_offset(
-/*=============*/
- TABLE* table, /*!< in: MySQL table object */
- Field* field) /*!< in: MySQL field object */
-{
- return((uint) (field->ptr - table->record[0]));
-}
-
-/**************************************************************//**
-Checks if a field in a record is SQL NULL. Uses the record format
-information in table to track the null bit in record.
-@return 1 if NULL, 0 otherwise */
-static inline
-uint
-field_in_record_is_null(
-/*====================*/
- TABLE* table, /*!< in: MySQL table object */
- Field* field, /*!< in: MySQL field object */
- char* record) /*!< in: a row in MySQL format */
-{
- int null_offset;
-
- if (!field->null_ptr) {
-
- return(0);
- }
-
- null_offset = (uint) ((char*) field->null_ptr
- - (char*) table->record[0]);
-
- if (record[null_offset] & field->null_bit) {
-
- return(1);
- }
-
- return(0);
-}
-
-/**************************************************************//**
-Sets a field in a record to SQL NULL. Uses the record format
-information in table to track the null bit in record. */
-static inline
-void
-set_field_in_record_to_null(
-/*========================*/
- TABLE* table, /*!< in: MySQL table object */
- Field* field, /*!< in: MySQL field object */
- char* record) /*!< in: a row in MySQL format */
-{
- int null_offset;
-
- null_offset = (uint) ((char*) field->null_ptr
- - (char*) table->record[0]);
-
- record[null_offset] = record[null_offset] | field->null_bit;
-}
-
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. NOTE that the prototype
-of this function is in rem0cmp.c in InnoDB source code! If you change this
-function, remember to update the prototype there!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-extern "C" UNIV_INTERN
-int
-innobase_mysql_cmp(
-/*===============*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
-{
- CHARSET_INFO* charset;
- enum_field_types mysql_tp;
- int ret;
-
- DBUG_ASSERT(a_length != UNIV_SQL_NULL);
- DBUG_ASSERT(b_length != UNIV_SQL_NULL);
-
- mysql_tp = (enum_field_types) mysql_type;
-
- switch (mysql_tp) {
-
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- case MYSQL_TYPE_VAR_STRING:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_VARCHAR:
- /* Use the charset number to pick the right charset struct for
- the comparison. Since the MySQL function get_charset may be
- slow before Bar removes the mutex operation there, we first
- look at 2 common charsets directly. */
-
- if (charset_number == default_charset_info->number) {
- charset = default_charset_info;
- } else if (charset_number == my_charset_latin1.number) {
- charset = &my_charset_latin1;
- } else {
- charset = get_charset(charset_number, MYF(MY_WME));
-
- if (charset == NULL) {
- sql_print_error("InnoDB needs charset %lu for doing "
- "a comparison, but MySQL cannot "
- "find that charset.",
- (ulong) charset_number);
- ut_a(0);
- }
- }
-
- /* Starting from 4.1.3, we use strnncollsp() in comparisons of
- non-latin1_swedish_ci strings. NOTE that the collation order
- changes then: 'b\0\0...' is ordered BEFORE 'b ...'. Users
- having indexes on such data need to rebuild their tables! */
-
- ret = charset->coll->strnncollsp(charset,
- a, a_length,
- b, b_length, 0);
- if (ret < 0) {
- return(-1);
- } else if (ret > 0) {
- return(1);
- } else {
- return(0);
- }
- default:
- ut_error;
- }
-
- return(0);
-}
-
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
-the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
-VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return DATA_BINARY, DATA_VARCHAR, ... */
-extern "C" UNIV_INTERN
-ulint
-get_innobase_type_from_mysql_type(
-/*==============================*/
- ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
- 'unsigned type';
- at least ENUM and SET,
- and unsigned integer
- types are 'unsigned types' */
- const void* f) /*!< in: MySQL Field */
-{
- const class Field* field = reinterpret_cast<const class Field*>(f);
-
- /* The following asserts try to check that the MySQL type code fits in
- 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
- the type */
-
- DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
-
- if (field->flags & UNSIGNED_FLAG) {
-
- *unsigned_flag = DATA_UNSIGNED;
- } else {
- *unsigned_flag = 0;
- }
-
- if (field->real_type() == MYSQL_TYPE_ENUM
- || field->real_type() == MYSQL_TYPE_SET) {
-
- /* MySQL has field->type() a string type for these, but the
- data is actually internally stored as an unsigned integer
- code! */
-
- *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
- flag set to zero, even though
- internally this is an unsigned
- integer type */
- return(DATA_INT);
- }
-
- switch (field->type()) {
- /* NOTE that we only allow string types in DATA_MYSQL and
- DATA_VARMYSQL */
- case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
- case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
- if (field->binary()) {
- return(DATA_BINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
- return(DATA_VARCHAR);
- } else {
- return(DATA_VARMYSQL);
- }
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING: if (field->binary()) {
-
- return(DATA_FIXBINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
- return(DATA_CHAR);
- } else {
- return(DATA_MYSQL);
- }
- case MYSQL_TYPE_NEWDECIMAL:
- return(DATA_FIXBINARY);
- case MYSQL_TYPE_LONG:
- case MYSQL_TYPE_LONGLONG:
- case MYSQL_TYPE_TINY:
- case MYSQL_TYPE_SHORT:
- case MYSQL_TYPE_INT24:
- case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_DATETIME:
- case MYSQL_TYPE_YEAR:
- case MYSQL_TYPE_NEWDATE:
- case MYSQL_TYPE_TIME:
- case MYSQL_TYPE_TIMESTAMP:
- return(DATA_INT);
- case MYSQL_TYPE_FLOAT:
- return(DATA_FLOAT);
- case MYSQL_TYPE_DOUBLE:
- return(DATA_DOUBLE);
- case MYSQL_TYPE_DECIMAL:
- return(DATA_DECIMAL);
- case MYSQL_TYPE_GEOMETRY:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- return(DATA_BLOB);
- default:
- ut_error;
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
-storage format. */
-static inline
-void
-innobase_write_to_2_little_endian(
-/*==============================*/
- byte* buf, /*!< in: where to store */
- ulint val) /*!< in: value to write, must be < 64k */
-{
- ut_a(val < 256 * 256);
-
- buf[0] = (byte)(val & 0xFF);
- buf[1] = (byte)(val / 256);
-}
-
-/*******************************************************************//**
-Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
-storage format.
-@return value */
-static inline
-uint
-innobase_read_from_2_little_endian(
-/*===============================*/
- const uchar* buf) /*!< in: from where to read */
-{
- return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
-}
-
-/*******************************************************************//**
-Stores a key value for a row to a buffer.
-@return key value length as stored in buff */
-UNIV_INTERN
-uint
-ha_innobase::store_key_val_for_row(
-/*===============================*/
- uint keynr, /*!< in: key number */
- char* buff, /*!< in/out: buffer for the key value (in MySQL
- format) */
- uint buff_len,/*!< in: buffer length */
- const uchar* record)/*!< in: row in MySQL format */
-{
- KEY* key_info = table->key_info + keynr;
- KEY_PART_INFO* key_part = key_info->key_part;
- KEY_PART_INFO* end = key_part + key_info->key_parts;
- char* buff_start = buff;
- enum_field_types mysql_type;
- Field* field;
- ibool is_null;
-
- DBUG_ENTER("store_key_val_for_row");
-
- /* The format for storing a key field in MySQL is the following:
-
- 1. If the column can be NULL, then in the first byte we put 1 if the
- field value is NULL, 0 otherwise.
-
- 2. If the column is of a BLOB type (it must be a column prefix field
- in this case), then we put the length of the data in the field to the
- next 2 bytes, in the little-endian format. If the field is SQL NULL,
- then these 2 bytes are set to 0. Note that the length of data in the
- field is <= column prefix length.
-
- 3. In a column prefix field, prefix_len next bytes are reserved for
- data. In a normal field the max field length next bytes are reserved
- for data. For a VARCHAR(n) the max field length is n. If the stored
- value is the SQL NULL then these data bytes are set to 0.
-
- 4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
- in the MySQL row format, the length is stored in 1 or 2 bytes,
- depending on the maximum allowed length. But in the MySQL key value
- format, the length always takes 2 bytes.
-
- We have to zero-fill the buffer so that MySQL is able to use a
- simple memcmp to compare two key values to determine if they are
- equal. MySQL does this to compare contents of two 'ref' values. */
-
- bzero(buff, buff_len);
-
- for (; key_part != end; key_part++) {
- is_null = FALSE;
-
- if (key_part->null_bit) {
- if (record[key_part->null_offset]
- & key_part->null_bit) {
- *buff = 1;
- is_null = TRUE;
- } else {
- *buff = 0;
- }
- buff++;
- }
-
- field = key_part->field;
- mysql_type = field->type();
-
- if (mysql_type == MYSQL_TYPE_VARCHAR) {
- /* >= 5.0.3 true VARCHAR */
- ulint lenlen;
- ulint len;
- const byte* data;
- ulint key_len;
- ulint true_len;
- CHARSET_INFO* cs;
- int error=0;
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len + 2;
-
- continue;
- }
- cs = field->charset();
-
- lenlen = (ulint)
- (((Field_varstring*)field)->length_bytes);
-
- data = row_mysql_read_true_varchar(&len,
- (byte*) (record
- + (ulint)get_field_offset(table, field)),
- lenlen);
-
- true_len = len;
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char *) data,
- (const char *) data + len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
-
- /* In a column prefix index, we may need to truncate
- the stored value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- /* The length in a key value is always stored in 2
- bytes */
-
- row_mysql_store_true_var_len((byte*)buff, true_len, 2);
- buff += 2;
-
- memcpy(buff, data, true_len);
-
- /* Note that we always reserve the maximum possible
- length of the true VARCHAR in the key value, though
- only len first bytes after the 2 length bytes contain
- actual data. The rest of the space was reset to zero
- in the bzero() call above. */
-
- buff += key_len;
-
- } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
- || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
- || mysql_type == MYSQL_TYPE_BLOB
- || mysql_type == MYSQL_TYPE_LONG_BLOB
- /* MYSQL_TYPE_GEOMETRY data is treated
- as BLOB data in innodb. */
- || mysql_type == MYSQL_TYPE_GEOMETRY) {
-
- CHARSET_INFO* cs;
- ulint key_len;
- ulint true_len;
- int error=0;
- ulint blob_len;
- const byte* blob_data;
-
- ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len + 2;
-
- continue;
- }
-
- cs = field->charset();
-
- blob_data = row_mysql_read_blob_ref(&blob_len,
- (byte*) (record
- + (ulint)get_field_offset(table, field)),
- (ulint) field->pack_length());
-
- true_len = blob_len;
-
- ut_a(get_field_offset(table, field)
- == key_part->offset);
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (blob_len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char *) blob_data,
- (const char *) blob_data
- + blob_len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
-
- /* All indexes on BLOB and TEXT are column prefix
- indexes, and we may need to truncate the data to be
- stored in the key value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- /* MySQL reserves 2 bytes for the length and the
- storage of the number is little-endian */
-
- innobase_write_to_2_little_endian(
- (byte*)buff, true_len);
- buff += 2;
-
- memcpy(buff, blob_data, true_len);
-
- /* Note that we always reserve the maximum possible
- length of the BLOB prefix in the key value. */
-
- buff += key_len;
- } else {
- /* Here we handle all other data types except the
- true VARCHAR, BLOB and TEXT. Note that the column
- value we store may be also in a column prefix
- index. */
-
- CHARSET_INFO* cs;
- ulint true_len;
- ulint key_len;
- const uchar* src_start;
- int error=0;
- enum_field_types real_type;
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len;
-
- continue;
- }
-
- src_start = record + key_part->offset;
- real_type = field->real_type();
- true_len = key_len;
-
- /* Character set for the field is defined only
- to fields whose type is string and real field
- type is not enum or set. For these fields check
- if character set is multi byte. */
-
- if (real_type != MYSQL_TYPE_ENUM
- && real_type != MYSQL_TYPE_SET
- && ( mysql_type == MYSQL_TYPE_VAR_STRING
- || mysql_type == MYSQL_TYPE_STRING)) {
-
- cs = field->charset();
-
- /* For multi byte character sets we need to
- calculate the true length of the key */
-
- if (key_len > 0 && cs->mbmaxlen > 1) {
-
- true_len = (ulint)
- cs->cset->well_formed_len(cs,
- (const char *)src_start,
- (const char *)src_start
- + key_len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
- }
-
- memcpy(buff, src_start, true_len);
- buff += true_len;
-
- /* Pad the unused space with spaces. Note that no
- padding is ever needed for UCS-2 because in MySQL,
- all UCS2 characters are 2 bytes, as MySQL does not
- support surrogate pairs, which are needed to represent
- characters in the range U+10000 to U+10FFFF. */
-
- if (true_len < key_len) {
- ulint pad_len = key_len - true_len;
- memset(buff, ' ', pad_len);
- buff += pad_len;
- }
- }
- }
-
- ut_a(buff <= buff_start + buff_len);
-
- DBUG_RETURN((uint)(buff - buff_start));
-}
-
-/**************************************************************//**
-Builds a 'template' to the prebuilt struct. The template is used in fast
-retrieval of just those column values MySQL needs in its processing. */
-static
-void
-build_template(
-/*===========*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */
- THD* thd, /*!< in: current user thread, used
- only if templ_type is
- ROW_MYSQL_REC_FIELDS */
- TABLE* table, /*!< in: MySQL table */
- uint templ_type) /*!< in: ROW_MYSQL_WHOLE_ROW or
- ROW_MYSQL_REC_FIELDS */
-{
- dict_index_t* index;
- dict_index_t* clust_index;
- mysql_row_templ_t* templ;
- Field* field;
- ulint n_fields;
- ulint n_requested_fields = 0;
- ibool fetch_all_in_key = FALSE;
- ibool fetch_primary_key_cols = FALSE;
- ulint i;
- /* byte offset of the end of last requested column */
- ulint mysql_prefix_len = 0;
-
- if (prebuilt->select_lock_type == LOCK_X) {
- /* We always retrieve the whole clustered index record if we
- use exclusive row level locks, for example, if the read is
- done in an UPDATE statement. */
-
- templ_type = ROW_MYSQL_WHOLE_ROW;
- }
-
- if (templ_type == ROW_MYSQL_REC_FIELDS) {
- if (prebuilt->hint_need_to_fetch_extra_cols
- == ROW_RETRIEVE_ALL_COLS) {
-
- /* We know we must at least fetch all columns in the
- key, or all columns in the table */
-
- if (prebuilt->read_just_key) {
- /* MySQL has instructed us that it is enough
- to fetch the columns in the key; looks like
- MySQL can set this flag also when there is
- only a prefix of the column in the key: in
- that case we retrieve the whole column from
- the clustered index */
-
- fetch_all_in_key = TRUE;
- } else {
- templ_type = ROW_MYSQL_WHOLE_ROW;
- }
- } else if (prebuilt->hint_need_to_fetch_extra_cols
- == ROW_RETRIEVE_PRIMARY_KEY) {
- /* We must at least fetch all primary key cols. Note
- that if the clustered index was internally generated
- by InnoDB on the row id (no primary key was
- defined), then row_search_for_mysql() will always
- retrieve the row id to a special buffer in the
- prebuilt struct. */
-
- fetch_primary_key_cols = TRUE;
- }
- }
-
- clust_index = dict_table_get_first_index(prebuilt->table);
-
- if (templ_type == ROW_MYSQL_REC_FIELDS) {
- index = prebuilt->index;
- } else {
- index = clust_index;
- }
-
- if (index == clust_index) {
- prebuilt->need_to_access_clustered = TRUE;
- } else {
- prebuilt->need_to_access_clustered = FALSE;
- /* Below we check column by column if we need to access
- the clustered index */
- }
-
- n_fields = (ulint)table->s->fields; /* number of columns */
-
- if (!prebuilt->mysql_template) {
- prebuilt->mysql_template = (mysql_row_templ_t*)
- mem_alloc(n_fields * sizeof(mysql_row_templ_t));
- }
-
- prebuilt->template_type = templ_type;
- prebuilt->null_bitmap_len = table->s->null_bytes;
-
- prebuilt->templ_contains_blob = FALSE;
-
- /* Note that in InnoDB, i is the column number. MySQL calls columns
- 'fields'. */
- for (i = 0; i < n_fields; i++) {
- templ = prebuilt->mysql_template + n_requested_fields;
- field = table->field[i];
-
- if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) {
- /* Decide which columns we should fetch
- and which we can skip. */
- register const ibool index_contains_field =
- dict_index_contains_col_or_prefix(index, i);
-
- if (!index_contains_field && prebuilt->read_just_key) {
- /* If this is a 'key read', we do not need
- columns that are not in the key */
-
- goto skip_field;
- }
-
- if (index_contains_field && fetch_all_in_key) {
- /* This field is needed in the query */
-
- goto include_field;
- }
-
- if (bitmap_is_set(table->read_set, i) ||
- bitmap_is_set(table->write_set, i)) {
- /* This field is needed in the query */
-
- goto include_field;
- }
-
- if (fetch_primary_key_cols
- && dict_table_col_in_clustered_key(
- index->table, i)) {
- /* This field is needed in the query */
-
- goto include_field;
- }
-
- /* This field is not needed in the query, skip it */
-
- goto skip_field;
- }
-include_field:
- n_requested_fields++;
-
- templ->col_no = i;
-
- if (index == clust_index) {
- templ->rec_field_no = dict_col_get_clust_pos(
- &index->table->cols[i], index);
- } else {
- templ->rec_field_no = dict_index_get_nth_col_pos(
- index, i);
- }
-
- if (templ->rec_field_no == ULINT_UNDEFINED) {
- prebuilt->need_to_access_clustered = TRUE;
- }
-
- if (field->null_ptr) {
- templ->mysql_null_byte_offset =
- (ulint) ((char*) field->null_ptr
- - (char*) table->record[0]);
-
- templ->mysql_null_bit_mask = (ulint) field->null_bit;
- } else {
- templ->mysql_null_bit_mask = 0;
- }
-
- templ->mysql_col_offset = (ulint)
- get_field_offset(table, field);
-
- templ->mysql_col_len = (ulint) field->pack_length();
- if (mysql_prefix_len < templ->mysql_col_offset
- + templ->mysql_col_len) {
- mysql_prefix_len = templ->mysql_col_offset
- + templ->mysql_col_len;
- }
- templ->type = index->table->cols[i].mtype;
- templ->mysql_type = (ulint)field->type();
-
- if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
- templ->mysql_length_bytes = (ulint)
- (((Field_varstring*)field)->length_bytes);
- }
-
- templ->charset = dtype_get_charset_coll(
- index->table->cols[i].prtype);
- templ->mbminlen = index->table->cols[i].mbminlen;
- templ->mbmaxlen = index->table->cols[i].mbmaxlen;
- templ->is_unsigned = index->table->cols[i].prtype
- & DATA_UNSIGNED;
- if (templ->type == DATA_BLOB) {
- prebuilt->templ_contains_blob = TRUE;
- }
-skip_field:
- ;
- }
-
- prebuilt->n_template = n_requested_fields;
- prebuilt->mysql_prefix_len = mysql_prefix_len;
-
- if (index != clust_index && prebuilt->need_to_access_clustered) {
- /* Change rec_field_no's to correspond to the clustered index
- record */
- for (i = 0; i < n_requested_fields; i++) {
- templ = prebuilt->mysql_template + i;
-
- templ->rec_field_no = dict_col_get_clust_pos(
- &index->table->cols[templ->col_no],
- clust_index);
- }
- }
-}
-
-/********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type. */
-UNIV_INTERN
-ulonglong
-ha_innobase::innobase_get_int_col_max_value(
-/*========================================*/
- const Field* field)
-{
- ulonglong max_value = 0;
-
- switch(field->key_type()) {
- /* TINY */
- case HA_KEYTYPE_BINARY:
- max_value = 0xFFULL;
- break;
- case HA_KEYTYPE_INT8:
- max_value = 0x7FULL;
- break;
- /* SHORT */
- case HA_KEYTYPE_USHORT_INT:
- max_value = 0xFFFFULL;
- break;
- case HA_KEYTYPE_SHORT_INT:
- max_value = 0x7FFFULL;
- break;
- /* MEDIUM */
- case HA_KEYTYPE_UINT24:
- max_value = 0xFFFFFFULL;
- break;
- case HA_KEYTYPE_INT24:
- max_value = 0x7FFFFFULL;
- break;
- /* LONG */
- case HA_KEYTYPE_ULONG_INT:
- max_value = 0xFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONG_INT:
- max_value = 0x7FFFFFFFULL;
- break;
- /* BIG */
- case HA_KEYTYPE_ULONGLONG:
- max_value = 0xFFFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONGLONG:
- max_value = 0x7FFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_FLOAT:
- /* We use the maximum as per IEEE754-2008 standard, 2^24 */
- max_value = 0x1000000ULL;
- break;
- case HA_KEYTYPE_DOUBLE:
- /* We use the maximum as per IEEE754-2008 standard, 2^53 */
- max_value = 0x20000000000000ULL;
- break;
- default:
- ut_error;
- }
-
- return(max_value);
-}
-
-/********************************************************************//**
-This special handling is really to overcome the limitations of MySQL's
-binlogging. We need to eliminate the non-determinism that will arise in
-INSERT ... SELECT type of statements, since MySQL binlog only stores the
-min value of the autoinc interval. Once that is fixed we can get rid of
-the special lock handling.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-ulint
-ha_innobase::innobase_lock_autoinc(void)
-/*====================================*/
-{
- ulint error = DB_SUCCESS;
-
- switch (innobase_autoinc_lock_mode) {
- case AUTOINC_NO_LOCKING:
- /* Acquire only the AUTOINC mutex. */
- dict_table_autoinc_lock(prebuilt->table);
- break;
-
- case AUTOINC_NEW_STYLE_LOCKING:
- /* For simple (single/multi) row INSERTs, we fallback to the
- old style only if another transaction has already acquired
- the AUTOINC lock on behalf of a LOAD FILE or INSERT ... SELECT
- etc. type of statement. */
- if (thd_sql_command(user_thd) == SQLCOM_INSERT
- || thd_sql_command(user_thd) == SQLCOM_REPLACE) {
- dict_table_t* table = prebuilt->table;
-
- /* Acquire the AUTOINC mutex. */
- dict_table_autoinc_lock(table);
-
- /* We need to check that another transaction isn't
- already holding the AUTOINC lock on the table. */
- if (table->n_waiting_or_granted_auto_inc_locks) {
- /* Release the mutex to avoid deadlocks. */
- dict_table_autoinc_unlock(table);
- } else {
- break;
- }
- }
- /* Fall through to old style locking. */
-
- case AUTOINC_OLD_STYLE_LOCKING:
- error = row_lock_table_autoinc_for_mysql(prebuilt);
-
- if (error == DB_SUCCESS) {
-
- /* Acquire the AUTOINC mutex. */
- dict_table_autoinc_lock(prebuilt->table);
- }
- break;
-
- default:
- ut_error;
- }
-
- return(ulong(error));
-}
-
-/********************************************************************//**
-Reset the autoinc value in the table.
-@return DB_SUCCESS if all went well else error code */
-UNIV_INTERN
-ulint
-ha_innobase::innobase_reset_autoinc(
-/*================================*/
- ulonglong autoinc) /*!< in: value to store */
-{
- ulint error;
-
- error = innobase_lock_autoinc();
-
- if (error == DB_SUCCESS) {
-
- dict_table_autoinc_initialize(prebuilt->table, autoinc);
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- return(ulong(error));
-}
-
-/********************************************************************//**
-Store the autoinc value in the table. The autoinc value is only set if
-it's greater than the existing autoinc value in the table.
-@return DB_SUCCESS if all went well else error code */
-UNIV_INTERN
-ulint
-ha_innobase::innobase_set_max_autoinc(
-/*==================================*/
- ulonglong auto_inc) /*!< in: value to store */
-{
- ulint error;
-
- error = innobase_lock_autoinc();
-
- if (error == DB_SUCCESS) {
-
- dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc);
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- return(ulong(error));
-}
-
-/********************************************************************//**
-Stores a row in an InnoDB database, to the table specified in this
-handle.
-@return error code */
-UNIV_INTERN
-int
-ha_innobase::write_row(
-/*===================*/
- uchar* record) /*!< in: a row in MySQL format */
-{
- ulint error = 0;
- int error_result= 0;
- ibool auto_inc_used= FALSE;
- ulint sql_command;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::write_row");
-
- if (prebuilt->trx != trx) {
- sql_print_error("The transaction object for the table handle is at "
- "%p, but for the current thread it is at %p",
- (const void*) prebuilt->trx, (const void*) trx);
-
- fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
- ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
- fputs("\n"
- "InnoDB: Dump of 200 bytes around ha_data: ",
- stderr);
- ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
- putc('\n', stderr);
- ut_error;
- }
-
- ha_statistic_increment(&SSV::ha_write_count);
-
- if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
- table->timestamp_field->set_time();
-
- sql_command = thd_sql_command(user_thd);
-
- if ((sql_command == SQLCOM_ALTER_TABLE
- || sql_command == SQLCOM_OPTIMIZE
- || sql_command == SQLCOM_CREATE_INDEX
- || sql_command == SQLCOM_DROP_INDEX)
- && num_write_row >= 10000) {
- /* ALTER TABLE is COMMITted at every 10000 copied rows.
- The IX table lock for the original table has to be re-issued.
- As this method will be called on a temporary table where the
- contents of the original table is being copied to, it is
- a bit tricky to determine the source table. The cursor
- position in the source table need not be adjusted after the
- intermediate COMMIT, since writes by other transactions are
- being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
-
- dict_table_t* src_table;
- enum lock_mode mode;
-
- num_write_row = 0;
-
- /* Commit the transaction. This will release the table
- locks, so they have to be acquired again. */
-
- /* Altering an InnoDB table */
- /* Get the source table. */
- src_table = lock_get_src_table(
- prebuilt->trx, prebuilt->table, &mode);
- if (!src_table) {
-no_commit:
- /* Unknown situation: do not commit */
- /*
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ALTER TABLE is holding lock"
- " on %lu tables!\n",
- prebuilt->trx->mysql_n_tables_locked);
- */
- ;
- } else if (src_table == prebuilt->table) {
- /* Source table is not in InnoDB format:
- no need to re-acquire locks on it. */
-
- /* Altering to InnoDB format */
- innobase_commit(ht, user_thd, 1);
- /* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
- /* We will need an IX lock on the destination table. */
- prebuilt->sql_stat_start = TRUE;
- } else {
- /* Ensure that there are no other table locks than
- LOCK_IX and LOCK_AUTO_INC on the destination table. */
-
- if (!lock_is_table_exclusive(prebuilt->table,
- prebuilt->trx)) {
- goto no_commit;
- }
-
- /* Commit the transaction. This will release the table
- locks, so they have to be acquired again. */
- innobase_commit(ht, user_thd, 1);
- /* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
- /* Re-acquire the table lock on the source table. */
- row_lock_table_for_mysql(prebuilt, src_table, mode);
- /* We will need an IX lock on the destination table. */
- prebuilt->sql_stat_start = TRUE;
- }
- }
-
- num_write_row++;
-
- /* This is the case where the table has an auto-increment column */
- if (table->next_number_field && record == table->record[0]) {
-
- /* Reset the error code before calling
- innobase_get_auto_increment(). */
- prebuilt->autoinc_error = DB_SUCCESS;
-
- if ((error = update_auto_increment())) {
-
- /* We don't want to mask autoinc overflow errors. */
- if (prebuilt->autoinc_error != DB_SUCCESS) {
- error = (int) prebuilt->autoinc_error;
-
- goto report_error;
- }
-
- /* MySQL errors are passed straight back. */
- error_result = (int) error;
- goto func_exit;
- }
-
- auto_inc_used = TRUE;
- }
-
- if (prebuilt->mysql_template == NULL
- || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
-
- /* Build the template used in converting quickly between
- the two database formats */
-
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
- }
-
- innodb_srv_conc_enter_innodb(prebuilt->trx);
-
- error = row_insert_for_mysql((byte*) record, prebuilt);
-
- /* Handle duplicate key errors */
- if (auto_inc_used) {
- ulint err;
- ulonglong auto_inc;
- ulonglong col_max_value;
-
- /* Note the number of rows processed for this statement, used
- by get_auto_increment() to determine the number of AUTO-INC
- values to reserve. This is only useful for a mult-value INSERT
- and is a statement level counter.*/
- if (trx->n_autoinc_rows > 0) {
- --trx->n_autoinc_rows;
- }
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- /* Get the value that MySQL attempted to store in the table.*/
- auto_inc = table->next_number_field->val_int();
-
- switch (error) {
- case DB_DUPLICATE_KEY:
-
- /* A REPLACE command and LOAD DATA INFILE REPLACE
- handle a duplicate key error themselves, but we
- must update the autoinc counter if we are performing
- those statements. */
-
- switch (sql_command) {
- case SQLCOM_LOAD:
- if ((trx->duplicates
- & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))) {
-
- goto set_max_autoinc;
- }
- break;
-
- case SQLCOM_REPLACE:
- case SQLCOM_INSERT_SELECT:
- case SQLCOM_REPLACE_SELECT:
- goto set_max_autoinc;
-
- default:
- break;
- }
-
- break;
-
- case DB_SUCCESS:
- /* If the actual value inserted is greater than
- the upper limit of the interval, then we try and
- update the table upper limit. Note: last_value
- will be 0 if get_auto_increment() was not called.*/
-
- if (auto_inc <= col_max_value
- && auto_inc >= prebuilt->autoinc_last_value) {
-set_max_autoinc:
- ut_a(prebuilt->autoinc_increment > 0);
-
- ulonglong need;
- ulonglong offset;
-
- offset = prebuilt->autoinc_offset;
- need = prebuilt->autoinc_increment;
-
- auto_inc = innobase_next_autoinc(
- auto_inc, need, offset, col_max_value);
-
- err = innobase_set_max_autoinc(auto_inc);
-
- if (err != DB_SUCCESS) {
- error = err;
- }
- }
- break;
- }
- }
-
- innodb_srv_conc_exit_innodb(prebuilt->trx);
-
-report_error:
- error_result = convert_error_code_to_mysql((int) error,
- prebuilt->table->flags,
- user_thd);
-
-func_exit:
- innobase_active_small();
-
- DBUG_RETURN(error_result);
-}
-
-/**********************************************************************//**
-Checks which fields have changed in a row and stores information
-of them to an update vector.
-@return error number or 0 */
-static
-int
-calc_row_difference(
-/*================*/
- upd_t* uvect, /*!< in/out: update vector */
- uchar* old_row, /*!< in: old row in MySQL format */
- uchar* new_row, /*!< in: new row in MySQL format */
- struct st_table* table, /*!< in: table in MySQL data
- dictionary */
- uchar* upd_buff, /*!< in: buffer to use */
- ulint buff_len, /*!< in: buffer length */
- row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */
- THD* thd) /*!< in: user thread */
-{
- uchar* original_upd_buff = upd_buff;
- Field* field;
- enum_field_types field_mysql_type;
- uint n_fields;
- ulint o_len;
- ulint n_len;
- ulint col_pack_len;
- const byte* new_mysql_row_col;
- const byte* o_ptr;
- const byte* n_ptr;
- byte* buf;
- upd_field_t* ufield;
- ulint col_type;
- ulint n_changed = 0;
- dfield_t dfield;
- dict_index_t* clust_index;
- uint i;
-
- n_fields = table->s->fields;
- clust_index = dict_table_get_first_index(prebuilt->table);
-
- /* We use upd_buff to convert changed fields */
- buf = (byte*) upd_buff;
-
- for (i = 0; i < n_fields; i++) {
- field = table->field[i];
-
- o_ptr = (const byte*) old_row + get_field_offset(table, field);
- n_ptr = (const byte*) new_row + get_field_offset(table, field);
-
- /* Use new_mysql_row_col and col_pack_len save the values */
-
- new_mysql_row_col = n_ptr;
- col_pack_len = field->pack_length();
-
- o_len = col_pack_len;
- n_len = col_pack_len;
-
- /* We use o_ptr and n_ptr to dig up the actual data for
- comparison. */
-
- field_mysql_type = field->type();
-
- col_type = prebuilt->table->cols[i].mtype;
-
- switch (col_type) {
-
- case DATA_BLOB:
- o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
- n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
-
- break;
-
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_VARMYSQL:
- if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
- /* This is a >= 5.0.3 type true VARCHAR where
- the real payload data length is stored in
- 1 or 2 bytes */
-
- o_ptr = row_mysql_read_true_varchar(
- &o_len, o_ptr,
- (ulint)
- (((Field_varstring*)field)->length_bytes));
-
- n_ptr = row_mysql_read_true_varchar(
- &n_len, n_ptr,
- (ulint)
- (((Field_varstring*)field)->length_bytes));
- }
-
- break;
- default:
- ;
- }
-
- if (field->null_ptr) {
- if (field_in_record_is_null(table, field,
- (char*) old_row)) {
- o_len = UNIV_SQL_NULL;
- }
-
- if (field_in_record_is_null(table, field,
- (char*) new_row)) {
- n_len = UNIV_SQL_NULL;
- }
- }
-
- if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
- 0 != memcmp(o_ptr, n_ptr, o_len))) {
- /* The field has changed */
-
- ufield = uvect->fields + n_changed;
-
- /* Let us use a dummy dfield to make the conversion
- from the MySQL column format to the InnoDB format */
-
- dict_col_copy_type(prebuilt->table->cols + i,
- dfield_get_type(&dfield));
-
- if (n_len != UNIV_SQL_NULL) {
- buf = row_mysql_store_col_in_innobase_format(
- &dfield,
- (byte*)buf,
- TRUE,
- new_mysql_row_col,
- col_pack_len,
- dict_table_is_comp(prebuilt->table));
- dfield_copy_data(&ufield->new_val, &dfield);
- } else {
- dfield_set_null(&ufield->new_val);
- }
-
- ufield->exp = NULL;
- ufield->orig_len = 0;
- ufield->field_no = dict_col_get_clust_pos(
- &prebuilt->table->cols[i], clust_index);
- n_changed++;
- }
- }
-
- uvect->n_fields = n_changed;
- uvect->info_bits = 0;
-
- ut_a(buf <= (byte*)original_upd_buff + buff_len);
-
- return(0);
-}
-
-/**********************************************************************//**
-Updates a row given as a parameter to a new value. Note that we are given
-whole rows, not just the fields which are updated: this incurs some
-overhead for CPU when we check which fields are actually updated.
-TODO: currently InnoDB does not prevent the 'Halloween problem':
-in a searched update a single row can get updated several times
-if its index columns are updated!
-@return error number or 0 */
-UNIV_INTERN
-int
-ha_innobase::update_row(
-/*====================*/
- const uchar* old_row, /*!< in: old row in MySQL format */
- uchar* new_row) /*!< in: new row in MySQL format */
-{
- upd_t* uvect;
- int error = 0;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::update_row");
-
- ut_a(prebuilt->trx == trx);
-
- ha_statistic_increment(&SSV::ha_update_count);
-
- if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
- table->timestamp_field->set_time();
-
- if (prebuilt->upd_node) {
- uvect = prebuilt->upd_node->update;
- } else {
- uvect = row_get_prebuilt_update_vector(prebuilt);
- }
-
- /* Build an update vector from the modified fields in the rows
- (uses upd_buff of the handle) */
-
- calc_row_difference(uvect, (uchar*) old_row, new_row, table,
- upd_buff, (ulint)upd_and_key_val_buff_len,
- prebuilt, user_thd);
-
- /* This is not a delete */
- prebuilt->upd_node->is_delete = FALSE;
-
- ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
-
- innodb_srv_conc_enter_innodb(trx);
-
- error = row_update_for_mysql((byte*) old_row, prebuilt);
-
- /* We need to do some special AUTOINC handling for the following case:
-
- INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...
-
- We need to use the AUTOINC counter that was actually used by
- MySQL in the UPDATE statement, which can be different from the
- value used in the INSERT statement.*/
-
- if (error == DB_SUCCESS
- && table->next_number_field
- && new_row == table->record[0]
- && thd_sql_command(user_thd) == SQLCOM_INSERT
- && (trx->duplicates & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))
- == TRX_DUP_IGNORE) {
-
- ulonglong auto_inc;
- ulonglong col_max_value;
-
- auto_inc = table->next_number_field->val_int();
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- if (auto_inc <= col_max_value && auto_inc != 0) {
-
- ulonglong need;
- ulonglong offset;
-
- offset = prebuilt->autoinc_offset;
- need = prebuilt->autoinc_increment;
-
- auto_inc = innobase_next_autoinc(
- auto_inc, need, offset, col_max_value);
-
- error = innobase_set_max_autoinc(auto_inc);
- }
- }
-
- innodb_srv_conc_exit_innodb(trx);
-
- error = convert_error_code_to_mysql(error,
- prebuilt->table->flags, user_thd);
-
- if (error == 0 /* success */
- && uvect->n_fields == 0 /* no columns were updated */) {
-
- /* This is the same as success, but instructs
- MySQL that the row is not really updated and it
- should not increase the count of updated rows.
- This is fix for http://bugs.mysql.com/29157 */
- error = HA_ERR_RECORD_IS_THE_SAME;
- }
-
- /* Tell InnoDB server that there might be work for
- utility threads: */
-
- innobase_active_small();
-
- DBUG_RETURN(error);
-}
-
-/**********************************************************************//**
-Deletes a row given as the parameter.
-@return error number or 0 */
-UNIV_INTERN
-int
-ha_innobase::delete_row(
-/*====================*/
- const uchar* record) /*!< in: a row in MySQL format */
-{
- int error = 0;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::delete_row");
-
- ut_a(prebuilt->trx == trx);
-
- ha_statistic_increment(&SSV::ha_delete_count);
-
- if (!prebuilt->upd_node) {
- row_get_prebuilt_update_vector(prebuilt);
- }
-
- /* This is a delete */
-
- prebuilt->upd_node->is_delete = TRUE;
-
- innodb_srv_conc_enter_innodb(trx);
-
- error = row_update_for_mysql((byte*) record, prebuilt);
-
- innodb_srv_conc_exit_innodb(trx);
-
- error = convert_error_code_to_mysql(
- error, prebuilt->table->flags, user_thd);
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- innobase_active_small();
-
- DBUG_RETURN(error);
-}
-
-/**********************************************************************//**
-Removes a new lock set on a row, if it was not read optimistically. This can
-be called after a row has been read in the processing of an UPDATE or a DELETE
-query, if the option innodb_locks_unsafe_for_binlog is set. */
-UNIV_INTERN
-void
-ha_innobase::unlock_row(void)
-/*=========================*/
-{
- DBUG_ENTER("ha_innobase::unlock_row");
-
- /* Consistent read does not take any locks, thus there is
- nothing to unlock. */
-
- if (prebuilt->select_lock_type == LOCK_NONE) {
- DBUG_VOID_RETURN;
- }
-
- switch (prebuilt->row_read_type) {
- case ROW_READ_WITH_LOCKS:
- if (!srv_locks_unsafe_for_binlog
- && prebuilt->trx->isolation_level
- != TRX_ISO_READ_COMMITTED) {
- break;
- }
- /* fall through */
- case ROW_READ_TRY_SEMI_CONSISTENT:
- row_unlock_for_mysql(prebuilt, FALSE);
- break;
- case ROW_READ_DID_SEMI_CONSISTENT:
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- break;
- }
-
- DBUG_VOID_RETURN;
-}
-
-/* See handler.h and row0mysql.h for docs on this function. */
-UNIV_INTERN
-bool
-ha_innobase::was_semi_consistent_read(void)
-/*=======================================*/
-{
- return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
-}
-
-/* See handler.h and row0mysql.h for docs on this function. */
-UNIV_INTERN
-void
-ha_innobase::try_semi_consistent_read(bool yes)
-/*===========================================*/
-{
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- /* Row read type is set to semi consistent read if this was
- requested by the MySQL and either innodb_locks_unsafe_for_binlog
- option is used or this session is using READ COMMITTED isolation
- level. */
-
- if (yes
- && (srv_locks_unsafe_for_binlog
- || prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- } else {
- prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
- }
-}
-
-/******************************************************************//**
-Initializes a handle to use an index.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::index_init(
-/*====================*/
- uint keynr, /*!< in: key (index) number */
- bool sorted) /*!< in: 1 if result MUST be sorted according to index */
-{
- DBUG_ENTER("index_init");
-
- DBUG_RETURN(change_active_index(keynr));
-}
-
-/******************************************************************//**
-Currently does nothing.
-@return 0 */
-UNIV_INTERN
-int
-ha_innobase::index_end(void)
-/*========================*/
-{
- int error = 0;
- DBUG_ENTER("index_end");
- active_index=MAX_KEY;
- DBUG_RETURN(error);
-}
-
-/*********************************************************************//**
-Converts a search mode flag understood by MySQL to a flag understood
-by InnoDB. */
-static inline
-ulint
-convert_search_mode_to_innobase(
-/*============================*/
- enum ha_rkey_function find_flag)
-{
- switch (find_flag) {
- case HA_READ_KEY_EXACT:
- /* this does not require the index to be UNIQUE */
- return(PAGE_CUR_GE);
- case HA_READ_KEY_OR_NEXT:
- return(PAGE_CUR_GE);
- case HA_READ_KEY_OR_PREV:
- return(PAGE_CUR_LE);
- case HA_READ_AFTER_KEY:
- return(PAGE_CUR_G);
- case HA_READ_BEFORE_KEY:
- return(PAGE_CUR_L);
- case HA_READ_PREFIX:
- return(PAGE_CUR_GE);
- case HA_READ_PREFIX_LAST:
- return(PAGE_CUR_LE);
- case HA_READ_PREFIX_LAST_OR_PREV:
- return(PAGE_CUR_LE);
- /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
- pass a complete-field prefix of a key value as the search
- tuple. I.e., it is not allowed that the last field would
- just contain n first bytes of the full field value.
- MySQL uses a 'padding' trick to convert LIKE 'abc%'
- type queries so that it can use as a search tuple
- a complete-field-prefix of a key value. Thus, the InnoDB
- search mode PAGE_CUR_LE_OR_EXTENDS is never used.
- TODO: when/if MySQL starts to use also partial-field
- prefixes, we have to deal with stripping of spaces
- and comparison of non-latin1 char type fields in
- innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
- work correctly. */
- case HA_READ_MBR_CONTAIN:
- case HA_READ_MBR_INTERSECT:
- case HA_READ_MBR_WITHIN:
- case HA_READ_MBR_DISJOINT:
- case HA_READ_MBR_EQUAL:
- return(PAGE_CUR_UNSUPP);
- /* do not use "default:" in order to produce a gcc warning:
- enumeration value '...' not handled in switch
- (if -Wswitch or -Wall is used) */
- }
-
- my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");
-
- return(PAGE_CUR_UNSUPP);
-}
-
-/*
- BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
- ---------------------------------------------------
-The following does not cover all the details, but explains how we determine
-the start of a new SQL statement, and what is associated with it.
-
-For each table in the database the MySQL interpreter may have several
-table handle instances in use, also in a single SQL query. For each table
-handle instance there is an InnoDB 'prebuilt' struct which contains most
-of the InnoDB data associated with this table handle instance.
-
- A) if the user has not explicitly set any MySQL table level locks:
-
- 1) MySQL calls ::external_lock to set an 'intention' table level lock on
-the table of the handle instance. There we set
-prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
-true if we are taking this table handle instance to use in a new SQL
-statement issued by the user. We also increment trx->n_mysql_tables_in_use.
-
- 2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
-instructions to prebuilt->template of the table handle instance in
-::index_read. The template is used to save CPU time in large joins.
-
- 3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
-allocate a new consistent read view for the trx if it does not yet have one,
-or in the case of a locking read, set an InnoDB 'intention' table level
-lock on the table.
-
- 4) We do the SELECT. MySQL may repeatedly call ::index_read for the
-same table handle instance, if it is a join.
-
- 5) When the SELECT ends, MySQL removes its intention table level locks
-in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
- (a) we execute a COMMIT there if the autocommit is on,
- (b) we also release possible 'SQL statement level resources' InnoDB may
-have for this SQL statement. The MySQL interpreter does NOT execute
-autocommit for pure read transactions, though it should. That is why the
-table handler in that case has to execute the COMMIT in ::external_lock.
-
- B) If the user has explicitly set MySQL table level locks, then MySQL
-does NOT call ::external_lock at the start of the statement. To determine
-when we are at the start of a new SQL statement we at the start of
-::index_read also compare the query id to the latest query id where the
-table handle instance was used. If it has changed, we know we are at the
-start of a new SQL statement. Since the query id can theoretically
-overwrap, we use this test only as a secondary way of determining the
-start of a new SQL statement. */
-
-
-/**********************************************************************//**
-Positions an index cursor to the index specified in the handle. Fetches the
-row if any.
-@return 0, HA_ERR_KEY_NOT_FOUND, or error number */
-UNIV_INTERN
-int
-ha_innobase::index_read(
-/*====================*/
- uchar* buf, /*!< in/out: buffer for the returned
- row */
- const uchar* key_ptr, /*!< in: key value; if this is NULL
- we position the cursor at the
- start or end of index; this can
- also contain an InnoDB row id, in
- which case key_len is the InnoDB
- row id length; the key value can
- also be a prefix of a full key value,
- and the last column can be a prefix
- of a full column */
- uint key_len,/*!< in: key value length */
- enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
-{
- ulint mode;
- dict_index_t* index;
- ulint match_mode = 0;
- int error;
- ulint ret;
-
- DBUG_ENTER("index_read");
-
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- ha_statistic_increment(&SSV::ha_read_key_count);
-
- index = prebuilt->index;
-
- if (UNIV_UNLIKELY(index == NULL)) {
- prebuilt->index_usable = FALSE;
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- /* Note that if the index for which the search template is built is not
- necessarily prebuilt->index, but can also be the clustered index */
-
- if (prebuilt->sql_stat_start) {
- build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
- }
-
- if (key_ptr) {
- /* Convert the search key value to InnoDB format into
- prebuilt->search_tuple */
-
- row_sel_convert_mysql_key_to_innobase(
- prebuilt->search_tuple,
- (byte*) key_val_buff,
- (ulint)upd_and_key_val_buff_len,
- index,
- (byte*) key_ptr,
- (ulint) key_len,
- prebuilt->trx);
- } else {
- /* We position the cursor to the last or the first entry
- in the index */
-
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
- }
-
- mode = convert_search_mode_to_innobase(find_flag);
-
- match_mode = 0;
-
- if (find_flag == HA_READ_KEY_EXACT) {
-
- match_mode = ROW_SEL_EXACT;
-
- } else if (find_flag == HA_READ_PREFIX
- || find_flag == HA_READ_PREFIX_LAST) {
-
- match_mode = ROW_SEL_EXACT_PREFIX;
- }
-
- last_match_mode = (uint) match_mode;
-
- if (mode != PAGE_CUR_UNSUPP) {
-
- innodb_srv_conc_enter_innodb(prebuilt->trx);
-
- ret = row_search_for_mysql((byte*) buf, mode, prebuilt,
- match_mode, 0);
-
- innodb_srv_conc_exit_innodb(prebuilt->trx);
- } else {
-
- ret = DB_UNSUPPORTED;
- }
-
- switch (ret) {
- case DB_SUCCESS:
- error = 0;
- table->status = 0;
- break;
- case DB_RECORD_NOT_FOUND:
- error = HA_ERR_KEY_NOT_FOUND;
- table->status = STATUS_NOT_FOUND;
- break;
- case DB_END_OF_INDEX:
- error = HA_ERR_KEY_NOT_FOUND;
- table->status = STATUS_NOT_FOUND;
- break;
- default:
- error = convert_error_code_to_mysql((int) ret,
- prebuilt->table->flags,
- user_thd);
- table->status = STATUS_NOT_FOUND;
- break;
- }
-
- DBUG_RETURN(error);
-}
-
-/*******************************************************************//**
-The following functions works like index_read, but it find the last
-row with the current key value or prefix.
-@return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
-UNIV_INTERN
-int
-ha_innobase::index_read_last(
-/*=========================*/
- uchar* buf, /*!< out: fetched row */
- const uchar* key_ptr,/*!< in: key value, or a prefix of a full
- key value */
- uint key_len)/*!< in: length of the key val or prefix
- in bytes */
-{
- return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
-}
-
-/********************************************************************//**
-Get the index for a handle. Does not change active index.
-@return NULL or index instance. */
-UNIV_INTERN
-dict_index_t*
-ha_innobase::innobase_get_index(
-/*============================*/
- uint keynr) /*!< in: use this index; MAX_KEY means always
- clustered index, even if it was internally
- generated by InnoDB */
-{
- KEY* key = 0;
- dict_index_t* index = 0;
-
- DBUG_ENTER("innobase_get_index");
- ha_statistic_increment(&SSV::ha_read_key_count);
-
- ut_ad(user_thd == ha_thd());
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- if (keynr != MAX_KEY && table->s->keys > 0) {
- key = table->key_info + keynr;
-
- index = dict_table_get_index_on_name(prebuilt->table,
- key->name);
- } else {
- index = dict_table_get_first_index(prebuilt->table);
- }
-
- if (!index) {
- sql_print_error(
- "Innodb could not find key n:o %u with name %s "
- "from dict cache for table %s",
- keynr, key ? key->name : "NULL",
- prebuilt->table->name);
- }
-
- DBUG_RETURN(index);
-}
-
-/********************************************************************//**
-Changes the active index of a handle.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::change_active_index(
-/*=============================*/
- uint keynr) /*!< in: use this index; MAX_KEY means always clustered
- index, even if it was internally generated by
- InnoDB */
-{
- DBUG_ENTER("change_active_index");
-
- ut_ad(user_thd == ha_thd());
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- active_index = keynr;
-
- prebuilt->index = innobase_get_index(keynr);
-
- if (UNIV_UNLIKELY(!prebuilt->index)) {
- sql_print_warning("InnoDB: change_active_index(%u) failed",
- keynr);
- prebuilt->index_usable = FALSE;
- DBUG_RETURN(1);
- }
-
- prebuilt->index_usable = row_merge_is_index_usable(prebuilt->trx,
- prebuilt->index);
-
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- push_warning_printf(user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- HA_ERR_TABLE_DEF_CHANGED,
- "InnoDB: insufficient history for index %u",
- keynr);
- /* The caller seems to ignore this. Thus, we must check
- this again in row_search_for_mysql(). */
- DBUG_RETURN(2);
- }
-
- ut_a(prebuilt->search_tuple != 0);
-
- dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
-
- dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
- prebuilt->index->n_fields);
-
- /* MySQL changes the active index for a handle also during some
- queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
- and then calculates the sum. Previously we played safe and used
- the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
- copying. Starting from MySQL-4.1 we use a more efficient flag here. */
-
- build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
-
- DBUG_RETURN(0);
-}
-
-/**********************************************************************//**
-Positions an index cursor to the index specified in keynr. Fetches the
-row if any.
-??? This is only used to read whole keys ???
-@return error number or 0 */
-UNIV_INTERN
-int
-ha_innobase::index_read_idx(
-/*========================*/
- uchar* buf, /*!< in/out: buffer for the returned
- row */
- uint keynr, /*!< in: use this index */
- const uchar* key, /*!< in: key value; if this is NULL
- we position the cursor at the
- start or end of index */
- uint key_len, /*!< in: key value length */
- enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
-{
- if (change_active_index(keynr)) {
-
- return(1);
- }
-
- return(index_read(buf, key, key_len, find_flag));
-}
-
-/***********************************************************************//**
-Reads the next or previous row from a cursor, which must have previously been
-positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::general_fetch(
-/*=======================*/
- uchar* buf, /*!< in/out: buffer for next row in MySQL
- format */
- uint direction, /*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */
- uint match_mode) /*!< in: 0, ROW_SEL_EXACT, or
- ROW_SEL_EXACT_PREFIX */
-{
- ulint ret;
- int error = 0;
-
- DBUG_ENTER("general_fetch");
-
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- innodb_srv_conc_enter_innodb(prebuilt->trx);
-
- ret = row_search_for_mysql(
- (byte*)buf, 0, prebuilt, match_mode, direction);
-
- innodb_srv_conc_exit_innodb(prebuilt->trx);
-
- switch (ret) {
- case DB_SUCCESS:
- error = 0;
- table->status = 0;
- break;
- case DB_RECORD_NOT_FOUND:
- error = HA_ERR_END_OF_FILE;
- table->status = STATUS_NOT_FOUND;
- break;
- case DB_END_OF_INDEX:
- error = HA_ERR_END_OF_FILE;
- table->status = STATUS_NOT_FOUND;
- break;
- default:
- error = convert_error_code_to_mysql(
- (int) ret, prebuilt->table->flags, user_thd);
- table->status = STATUS_NOT_FOUND;
- break;
- }
-
- DBUG_RETURN(error);
-}
-
-/***********************************************************************//**
-Reads the next row from a cursor, which must have previously been
-positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::index_next(
-/*====================*/
- uchar* buf) /*!< in/out: buffer for next row in MySQL
- format */
-{
- ha_statistic_increment(&SSV::ha_read_next_count);
-
- return(general_fetch(buf, ROW_SEL_NEXT, 0));
-}
-
-/*******************************************************************//**
-Reads the next row matching to the key value given as the parameter.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::index_next_same(
-/*=========================*/
- uchar* buf, /*!< in/out: buffer for the row */
- const uchar* key, /*!< in: key value */
- uint keylen) /*!< in: key value length */
-{
- ha_statistic_increment(&SSV::ha_read_next_count);
-
- return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
-}
-
-/***********************************************************************//**
-Reads the previous row from a cursor, which must have previously been
-positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::index_prev(
-/*====================*/
- uchar* buf) /*!< in/out: buffer for previous row in MySQL format */
-{
- ha_statistic_increment(&SSV::ha_read_prev_count);
-
- return(general_fetch(buf, ROW_SEL_PREV, 0));
-}
-
-/********************************************************************//**
-Positions a cursor on the first record in an index and reads the
-corresponding row to buf.
-@return 0, HA_ERR_END_OF_FILE, or error code */
-UNIV_INTERN
-int
-ha_innobase::index_first(
-/*=====================*/
- uchar* buf) /*!< in/out: buffer for the row */
-{
- int error;
-
- DBUG_ENTER("index_first");
- ha_statistic_increment(&SSV::ha_read_first_count);
-
- error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
-
- /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
-
- DBUG_RETURN(error);
-}
-
-/********************************************************************//**
-Positions a cursor on the last record in an index and reads the
-corresponding row to buf.
-@return 0, HA_ERR_END_OF_FILE, or error code */
-UNIV_INTERN
-int
-ha_innobase::index_last(
-/*====================*/
- uchar* buf) /*!< in/out: buffer for the row */
-{
- int error;
-
- DBUG_ENTER("index_last");
- ha_statistic_increment(&SSV::ha_read_last_count);
-
- error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
-
- /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
-
- DBUG_RETURN(error);
-}
-
-/****************************************************************//**
-Initialize a table scan.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::rnd_init(
-/*==================*/
- bool scan) /*!< in: TRUE if table/index scan FALSE otherwise */
-{
- int err;
-
- /* Store the active index value so that we can restore the original
- value after a scan */
-
- if (prebuilt->clust_index_was_generated) {
- err = change_active_index(MAX_KEY);
- } else {
- err = change_active_index(primary_key);
- }
-
- /* Don't use semi-consistent read in random row reads (by position).
- This means we must disable semi_consistent_read if scan is false */
-
- if (!scan) {
- try_semi_consistent_read(0);
- }
-
- start_of_scan = 1;
-
- return(err);
-}
-
-/*****************************************************************//**
-Ends a table scan.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::rnd_end(void)
-/*======================*/
-{
- return(index_end());
-}
-
-/*****************************************************************//**
-Reads the next row in a table scan (also used to read the FIRST row
-in a table scan).
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::rnd_next(
-/*==================*/
- uchar* buf) /*!< in/out: returns the row in this buffer,
- in MySQL format */
-{
- int error;
-
- DBUG_ENTER("rnd_next");
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
-
- if (start_of_scan) {
- error = index_first(buf);
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
-
- start_of_scan = 0;
- } else {
- error = general_fetch(buf, ROW_SEL_NEXT, 0);
- }
-
- DBUG_RETURN(error);
-}
-
-/**********************************************************************//**
-Fetches a row from the table based on a row reference.
-@return 0, HA_ERR_KEY_NOT_FOUND, or error code */
-UNIV_INTERN
-int
-ha_innobase::rnd_pos(
-/*=================*/
- uchar* buf, /*!< in/out: buffer for the row */
- uchar* pos) /*!< in: primary key value of the row in the
- MySQL format, or the row id if the clustered
- index was internally generated by InnoDB; the
- length of data in pos has to be ref_length */
-{
- int error;
- uint keynr = active_index;
- DBUG_ENTER("rnd_pos");
- DBUG_DUMP("key", pos, ref_length);
-
- ha_statistic_increment(&SSV::ha_read_rnd_count);
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- if (prebuilt->clust_index_was_generated) {
- /* No primary key was defined for the table and we
- generated the clustered index from the row id: the
- row reference is the row id, not any key value
- that MySQL knows of */
-
- error = change_active_index(MAX_KEY);
- } else {
- error = change_active_index(primary_key);
- }
-
- if (error) {
- DBUG_PRINT("error", ("Got error: %d", error));
- DBUG_RETURN(error);
- }
-
- /* Note that we assume the length of the row reference is fixed
- for the table, and it is == ref_length */
-
- error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
-
- if (error) {
- DBUG_PRINT("error", ("Got error: %d", error));
- }
-
- change_active_index(keynr);
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************//**
-Stores a reference to the current row to 'ref' field of the handle. Note
-that in the case where we have generated the clustered index for the
-table, the function parameter is illogical: we MUST ASSUME that 'record'
-is the current 'position' of the handle, because if row ref is actually
-the row id internally generated in InnoDB, then 'record' does not contain
-it. We just guess that the row id must be for the record where the handle
-was positioned the last time. */
-UNIV_INTERN
-void
-ha_innobase::position(
-/*==================*/
- const uchar* record) /*!< in: row in MySQL format */
-{
- uint len;
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- if (prebuilt->clust_index_was_generated) {
- /* No primary key was defined for the table and we
- generated the clustered index from row id: the
- row reference will be the row id, not any key value
- that MySQL knows of */
-
- len = DATA_ROW_ID_LEN;
-
- memcpy(ref, prebuilt->row_id, len);
- } else {
- len = store_key_val_for_row(primary_key, (char*)ref,
- ref_length, record);
- }
-
- /* We assume that the 'ref' value len is always fixed for the same
- table. */
-
- if (len != ref_length) {
- sql_print_error("Stored ref len is %lu, but table ref len is %lu",
- (ulong) len, (ulong) ref_length);
- }
-}
-
-/* limit innodb monitor access to users with PROCESS privilege.
-See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
-#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \
- (row_is_magic_monitor_table(table_name) \
- && check_global_access(thd, PROCESS_ACL))
-
-/*****************************************************************//**
-Creates a table definition to an InnoDB database. */
-static
-int
-create_table_def(
-/*=============*/
- trx_t* trx, /*!< in: InnoDB transaction handle */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- const char* table_name, /*!< in: table name */
- const char* path_of_temp_table,/*!< in: if this is a table explicitly
- created by the user with the
- TEMPORARY keyword, then this
- parameter is the dir path where the
- table should be placed if we create
- an .ibd file for it (no .ibd extension
- in the path, though); otherwise this
- is NULL */
- ulint flags) /*!< in: table flags */
-{
- Field* field;
- dict_table_t* table;
- ulint n_cols;
- int error;
- ulint col_type;
- ulint col_len;
- ulint nulls_allowed;
- ulint unsigned_type;
- ulint binary_type;
- ulint long_true_varchar;
- ulint charset_no;
- ulint i;
-
- DBUG_ENTER("create_table_def");
- DBUG_PRINT("enter", ("table_name: %s", table_name));
-
- ut_a(trx->mysql_thd != NULL);
- if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name,
- (THD*) trx->mysql_thd)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
-
- n_cols = form->s->fields;
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
-
- table = dict_mem_table_create(table_name, 0, n_cols, flags);
-
- if (path_of_temp_table) {
- table->dir_path_of_temp_table =
- mem_heap_strdup(table->heap, path_of_temp_table);
- }
-
- for (i = 0; i < n_cols; i++) {
- field = form->field[i];
-
- col_type = get_innobase_type_from_mysql_type(&unsigned_type,
- field);
- if (field->null_ptr) {
- nulls_allowed = 0;
- } else {
- nulls_allowed = DATA_NOT_NULL;
- }
-
- if (field->binary()) {
- binary_type = DATA_BINARY_TYPE;
- } else {
- binary_type = 0;
- }
-
- charset_no = 0;
-
- if (dtype_is_string_type(col_type)) {
-
- charset_no = (ulint)field->charset()->number;
-
- if (UNIV_UNLIKELY(charset_no >= 256)) {
- /* in data0type.h we assume that the
- number fits in one byte in prtype */
- push_warning_printf(
- (THD*) trx->mysql_thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_CANT_CREATE_TABLE,
- "In InnoDB, charset-collation codes"
- " must be below 256."
- " Unsupported code %lu.",
- (ulong) charset_no);
- DBUG_RETURN(ER_CANT_CREATE_TABLE);
- }
- }
-
- ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
- that this fits in one byte */
- col_len = field->pack_length();
-
- /* The MySQL pack length contains 1 or 2 bytes length field
- for a true VARCHAR. Let us subtract that, so that the InnoDB
- column length in the InnoDB data dictionary is the real
- maximum byte length of the actual data. */
-
- long_true_varchar = 0;
-
- if (field->type() == MYSQL_TYPE_VARCHAR) {
- col_len -= ((Field_varstring*)field)->length_bytes;
-
- if (((Field_varstring*)field)->length_bytes == 2) {
- long_true_varchar = DATA_LONG_TRUE_VARCHAR;
- }
- }
-
- /* First check whether the column to be added has a
- system reserved name. */
- if (dict_col_name_is_reserved(field->field_name)){
- my_error(ER_WRONG_COLUMN_NAME, MYF(0),
- field->field_name);
-
- dict_mem_table_free(table);
- trx_commit_for_mysql(trx);
-
- error = DB_ERROR;
- goto error_ret;
- }
-
- dict_mem_table_add_col(table, table->heap,
- (char*) field->field_name,
- col_type,
- dtype_form_prtype(
- (ulint)field->type()
- | nulls_allowed | unsigned_type
- | binary_type | long_true_varchar,
- charset_no),
- col_len);
- }
-
- error = row_create_table_for_mysql(table, trx);
-
- if (error == DB_DUPLICATE_KEY) {
- char buf[100];
- innobase_convert_identifier(buf, sizeof buf,
- table_name, strlen(table_name),
- trx->mysql_thd, TRUE);
- my_error(ER_TABLE_EXISTS_ERROR, MYF(0), buf);
- }
-
-error_ret:
- error = convert_error_code_to_mysql(error, flags, NULL);
-
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
-Creates an index in an InnoDB database. */
-static
-int
-create_index(
-/*=========*/
- trx_t* trx, /*!< in: InnoDB transaction handle */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- ulint flags, /*!< in: InnoDB table flags */
- const char* table_name, /*!< in: table name */
- uint key_num) /*!< in: index number */
-{
- Field* field;
- dict_index_t* index;
- int error;
- ulint n_fields;
- KEY* key;
- KEY_PART_INFO* key_part;
- ulint ind_type;
- ulint col_type;
- ulint prefix_len;
- ulint is_unsigned;
- ulint i;
- ulint j;
- ulint* field_lengths;
-
- DBUG_ENTER("create_index");
-
- key = form->key_info + key_num;
-
- n_fields = key->key_parts;
-
- /* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
- ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0);
-
- ind_type = 0;
-
- if (key_num == form->s->primary_key) {
- ind_type = ind_type | DICT_CLUSTERED;
- }
-
- if (key->flags & HA_NOSAME ) {
- ind_type = ind_type | DICT_UNIQUE;
- }
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
-
- index = dict_mem_index_create(table_name, key->name, 0,
- ind_type, n_fields);
-
- field_lengths = (ulint*) my_malloc(sizeof(ulint) * n_fields,
- MYF(MY_FAE));
-
- for (i = 0; i < n_fields; i++) {
- key_part = key->key_part + i;
-
- /* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
- field in an index: we only store a specified number of first
- bytes of the column to the index field.) The flag does not
- seem to be properly set by MySQL. Let us fall back on testing
- the length of the key part versus the column. */
-
- field = NULL;
- for (j = 0; j < form->s->fields; j++) {
-
- field = form->field[j];
-
- if (0 == innobase_strcasecmp(
- field->field_name,
- key_part->field->field_name)) {
- /* Found the corresponding column */
-
- break;
- }
- }
-
- ut_a(j < form->s->fields);
-
- col_type = get_innobase_type_from_mysql_type(
- &is_unsigned, key_part->field);
-
- if (DATA_BLOB == col_type
- || (key_part->length < field->pack_length()
- && field->type() != MYSQL_TYPE_VARCHAR)
- || (field->type() == MYSQL_TYPE_VARCHAR
- && key_part->length < field->pack_length()
- - ((Field_varstring*)field)->length_bytes)) {
-
- prefix_len = key_part->length;
-
- if (col_type == DATA_INT
- || col_type == DATA_FLOAT
- || col_type == DATA_DOUBLE
- || col_type == DATA_DECIMAL) {
- sql_print_error(
- "MySQL is trying to create a column "
- "prefix index field, on an "
- "inappropriate data type. Table "
- "name %s, column name %s.",
- table_name,
- key_part->field->field_name);
-
- prefix_len = 0;
- }
- } else {
- prefix_len = 0;
- }
-
- field_lengths[i] = key_part->length;
-
- dict_mem_index_add_field(index,
- (char*) key_part->field->field_name, prefix_len);
- }
-
- /* Even though we've defined max_supported_key_part_length, we
- still do our own checking using field_lengths to be absolutely
- sure we don't create too long indexes. */
- error = row_create_index_for_mysql(index, trx, field_lengths);
-
- error = convert_error_code_to_mysql(error, flags, NULL);
-
- my_free(field_lengths, MYF(0));
-
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
-Creates an index to an InnoDB table when the user has defined no
-primary index. */
-static
-int
-create_clustered_index_when_no_primary(
-/*===================================*/
- trx_t* trx, /*!< in: InnoDB transaction handle */
- ulint flags, /*!< in: InnoDB table flags */
- const char* table_name) /*!< in: table name */
-{
- dict_index_t* index;
- int error;
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
- index = dict_mem_index_create(table_name,
- innobase_index_reserve_name,
- 0, DICT_CLUSTERED, 0);
-
- error = row_create_index_for_mysql(index, trx, NULL);
-
- error = convert_error_code_to_mysql(error, flags, NULL);
-
- return(error);
-}
-
-/*****************************************************************//**
-Validates the create options. We may build on this function
-in future. For now, it checks two specifiers:
-KEY_BLOCK_SIZE and ROW_FORMAT
-If innodb_strict_mode is not set then this function is a no-op
-@return TRUE if valid. */
-static
-ibool
-create_options_are_valid(
-/*=====================*/
- THD* thd, /*!< in: connection thread. */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info) /*!< in: create info. */
-{
- ibool kbs_specified = FALSE;
- ibool ret = TRUE;
-
-
- ut_ad(thd != NULL);
-
- /* If innodb_strict_mode is not set don't do any validation. */
- if (!(THDVAR(thd, strict_mode))) {
- return(TRUE);
- }
-
- ut_ad(form != NULL);
- ut_ad(create_info != NULL);
-
- /* First check if KEY_BLOCK_SIZE was specified. */
- if (create_info->key_block_size
- || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) {
-
- kbs_specified = TRUE;
- switch (create_info->key_block_size) {
- case 1:
- case 2:
- case 4:
- case 8:
- case 16:
- /* Valid value. */
- break;
- default:
- push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: invalid"
- " KEY_BLOCK_SIZE = %lu."
- " Valid values are"
- " [1, 2, 4, 8, 16]",
- create_info->key_block_size);
- ret = FALSE;
- }
- }
-
- /* If KEY_BLOCK_SIZE was specified, check for its
- dependencies. */
- if (kbs_specified && !srv_file_per_table) {
- push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE"
- " requires innodb_file_per_table.");
- ret = FALSE;
- }
-
- if (kbs_specified && srv_file_format < DICT_TF_FORMAT_ZIP) {
- push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE"
- " requires innodb_file_format >"
- " Antelope.");
- ret = FALSE;
- }
-
- /* Now check for ROW_FORMAT specifier. */
- if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) {
- switch (form->s->row_type) {
- const char* row_format_name;
- case ROW_TYPE_COMPRESSED:
- case ROW_TYPE_DYNAMIC:
- row_format_name
- = form->s->row_type == ROW_TYPE_COMPRESSED
- ? "COMPRESSED"
- : "DYNAMIC";
-
- /* These two ROW_FORMATs require
- srv_file_per_table and srv_file_format */
- if (!srv_file_per_table) {
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ROW_FORMAT=%s"
- " requires innodb_file_per_table.",
- row_format_name);
- ret = FALSE;
-
- }
-
- if (srv_file_format < DICT_TF_FORMAT_ZIP) {
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ROW_FORMAT=%s"
- " requires innodb_file_format >"
- " Antelope.",
- row_format_name);
- ret = FALSE;
- }
-
- /* Cannot specify KEY_BLOCK_SIZE with
- ROW_FORMAT = DYNAMIC.
- However, we do allow COMPRESSED to be
- specified with KEY_BLOCK_SIZE. */
- if (kbs_specified
- && form->s->row_type == ROW_TYPE_DYNAMIC) {
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: cannot specify"
- " ROW_FORMAT = DYNAMIC with"
- " KEY_BLOCK_SIZE.");
- ret = FALSE;
- }
-
- break;
-
- case ROW_TYPE_REDUNDANT:
- case ROW_TYPE_COMPACT:
- case ROW_TYPE_DEFAULT:
- /* Default is COMPACT. */
- row_format_name
- = form->s->row_type == ROW_TYPE_REDUNDANT
- ? "REDUNDANT"
- : "COMPACT";
-
- /* Cannot specify KEY_BLOCK_SIZE with these
- format specifiers. */
- if (kbs_specified) {
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: cannot specify"
- " ROW_FORMAT = %s with"
- " KEY_BLOCK_SIZE.",
- row_format_name);
- ret = FALSE;
- }
-
- break;
-
- default:
- push_warning(thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: invalid ROW_FORMAT specifier.");
- ret = FALSE;
-
- }
- }
-
- return(ret);
-}
-
-/*****************************************************************//**
-Update create_info. Used in SHOW CREATE TABLE et al. */
-UNIV_INTERN
-void
-ha_innobase::update_create_info(
-/*============================*/
- HA_CREATE_INFO* create_info) /*!< in/out: create info */
-{
- if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
- ha_innobase::info(HA_STATUS_AUTO);
- create_info->auto_increment_value = stats.auto_increment_value;
- }
-}
-
-/*****************************************************************//**
-Creates a new table to an InnoDB database.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::create(
-/*================*/
- const char* name, /*!< in: table name */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info) /*!< in: more information of the
- created table, contains also the
- create statement string */
-{
- int error;
- dict_table_t* innobase_table;
- trx_t* parent_trx;
- trx_t* trx;
- int primary_key_no;
- uint i;
- char name2[FN_REFLEN];
- char norm_name[FN_REFLEN];
- THD* thd = ha_thd();
- ib_int64_t auto_inc_value;
- ulint flags;
- /* Cache the value of innodb_file_format, in case it is
- modified by another thread while the table is being created. */
- const ulint file_format = srv_file_format;
-
- DBUG_ENTER("ha_innobase::create");
-
- DBUG_ASSERT(thd != NULL);
- DBUG_ASSERT(create_info != NULL);
-
-#ifdef __WIN__
- /* Names passed in from server are in two formats:
- 1. <database_name>/<table_name>: for normal table creation
- 2. full path: for temp table creation, or sym link
-
- When srv_file_per_table is on and mysqld_embedded is off,
- check for full path pattern, i.e.
- X:\dir\..., X is a driver letter, or
- \\dir1\dir2\..., UNC path
- returns error if it is in full path format, but not creating a temp.
- table. Currently InnoDB does not support symbolic link on Windows. */
-
- if (srv_file_per_table
- && !mysqld_embedded
- && (!create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
-
- if ((name[1] == ':')
- || (name[0] == '\\' && name[1] == '\\')) {
- sql_print_error("Cannot create table %s\n", name);
- DBUG_RETURN(HA_ERR_GENERIC);
- }
- }
-#endif
-
- if (form->s->fields > 1000) {
- /* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
- but we play safe here */
-
- DBUG_RETURN(HA_ERR_TO_BIG_ROW);
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- trx = innobase_trx_allocate(thd);
-
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
- strcpy(name2, name);
-
- normalize_table_name(norm_name, name2);
-
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during a table create operation.
- Drop table etc. do this latching in row0mysql.c. */
-
- row_mysql_lock_data_dictionary(trx);
-
- /* Create the table definition in InnoDB */
-
- flags = 0;
-
- /* Validate create options if innodb_strict_mode is set. */
- if (!create_options_are_valid(thd, form, create_info)) {
- error = ER_ILLEGAL_HA_CREATE_OPTION;
- goto cleanup;
- }
-
- if (create_info->key_block_size
- || (create_info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) {
- /* Determine the page_zip.ssize corresponding to the
- requested page size (key_block_size) in kilobytes. */
-
- ulint ssize, ksize;
- ulint key_block_size = create_info->key_block_size;
-
- for (ssize = ksize = 1; ssize <= DICT_TF_ZSSIZE_MAX;
- ssize++, ksize <<= 1) {
- if (key_block_size == ksize) {
- flags = ssize << DICT_TF_ZSSIZE_SHIFT
- | DICT_TF_COMPACT
- | DICT_TF_FORMAT_ZIP
- << DICT_TF_FORMAT_SHIFT;
- break;
- }
- }
-
- if (!srv_file_per_table) {
- push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE"
- " requires innodb_file_per_table.");
- flags = 0;
- }
-
- if (file_format < DICT_TF_FORMAT_ZIP) {
- push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE"
- " requires innodb_file_format >"
- " Antelope.");
- flags = 0;
- }
-
- if (!flags) {
- push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ignoring"
- " KEY_BLOCK_SIZE=%lu.",
- create_info->key_block_size);
- }
- }
-
- if (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) {
- if (flags) {
- /* KEY_BLOCK_SIZE was specified. */
- if (form->s->row_type != ROW_TYPE_COMPRESSED) {
- /* ROW_FORMAT other than COMPRESSED
- ignores KEY_BLOCK_SIZE. It does not
- make sense to reject conflicting
- KEY_BLOCK_SIZE and ROW_FORMAT, because
- such combinations can be obtained
- with ALTER TABLE anyway. */
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ignoring KEY_BLOCK_SIZE=%lu"
- " unless ROW_FORMAT=COMPRESSED.",
- create_info->key_block_size);
- flags = 0;
- }
- } else {
- /* No KEY_BLOCK_SIZE */
- if (form->s->row_type == ROW_TYPE_COMPRESSED) {
- /* ROW_FORMAT=COMPRESSED without
- KEY_BLOCK_SIZE implies half the
- maximum KEY_BLOCK_SIZE. */
- flags = (DICT_TF_ZSSIZE_MAX - 1)
- << DICT_TF_ZSSIZE_SHIFT
- | DICT_TF_COMPACT
- | DICT_TF_FORMAT_ZIP
- << DICT_TF_FORMAT_SHIFT;
-#if DICT_TF_ZSSIZE_MAX < 1
-# error "DICT_TF_ZSSIZE_MAX < 1"
-#endif
- }
- }
-
- switch (form->s->row_type) {
- const char* row_format_name;
- case ROW_TYPE_REDUNDANT:
- break;
- case ROW_TYPE_COMPRESSED:
- case ROW_TYPE_DYNAMIC:
- row_format_name
- = form->s->row_type == ROW_TYPE_COMPRESSED
- ? "COMPRESSED"
- : "DYNAMIC";
-
- if (!srv_file_per_table) {
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ROW_FORMAT=%s"
- " requires innodb_file_per_table.",
- row_format_name);
- } else if (file_format < DICT_TF_FORMAT_ZIP) {
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ROW_FORMAT=%s"
- " requires innodb_file_format >"
- " Antelope.",
- row_format_name);
- } else {
- flags |= DICT_TF_COMPACT
- | (DICT_TF_FORMAT_ZIP
- << DICT_TF_FORMAT_SHIFT);
- break;
- }
-
- /* fall through */
- case ROW_TYPE_NOT_USED:
- case ROW_TYPE_FIXED:
- default:
- push_warning(thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: assuming ROW_FORMAT=COMPACT.");
- case ROW_TYPE_DEFAULT:
- case ROW_TYPE_COMPACT:
- flags = DICT_TF_COMPACT;
- break;
- }
- } else if (!flags) {
- /* No KEY_BLOCK_SIZE or ROW_FORMAT specified:
- use ROW_FORMAT=COMPACT by default. */
- flags = DICT_TF_COMPACT;
- }
-
- /* Look for a primary key */
-
- primary_key_no= (form->s->primary_key != MAX_KEY ?
- (int) form->s->primary_key :
- -1);
-
- /* Our function row_get_mysql_key_number_for_index assumes
- the primary key is always number 0, if it exists */
-
- ut_a(primary_key_no == -1 || primary_key_no == 0);
-
- /* Check for name conflicts (with reserved name) for
- any user indices to be created. */
- if (innobase_index_name_is_reserved(trx, form->key_info,
- form->s->keys)) {
- error = -1;
- goto cleanup;
- }
-
- error = create_table_def(trx, form, norm_name,
- create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
- flags);
-
- if (error) {
- goto cleanup;
- }
-
-
- /* Create the keys */
-
- if (form->s->keys == 0 || primary_key_no == -1) {
- /* Create an index which is used as the clustered index;
- order the rows by their row id which is internally generated
- by InnoDB */
-
- error = create_clustered_index_when_no_primary(
- trx, flags, norm_name);
- if (error) {
- goto cleanup;
- }
- }
-
- if (primary_key_no != -1) {
- /* In InnoDB the clustered index must always be created
- first */
- if ((error = create_index(trx, form, flags, norm_name,
- (uint) primary_key_no))) {
- goto cleanup;
- }
- }
-
- for (i = 0; i < form->s->keys; i++) {
-
- if (i != (uint) primary_key_no) {
-
- if ((error = create_index(trx, form, flags, norm_name,
- i))) {
- goto cleanup;
- }
- }
- }
-
- if (*trx->mysql_query_str) {
- error = row_table_add_foreign_constraints(trx,
- *trx->mysql_query_str, norm_name,
- create_info->options & HA_LEX_CREATE_TMP_TABLE);
-
- error = convert_error_code_to_mysql(error, flags, NULL);
-
- if (error) {
- goto cleanup;
- }
- }
-
- innobase_commit_low(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- innobase_table = dict_table_get(norm_name, FALSE);
-
- DBUG_ASSERT(innobase_table != 0);
-
- if (innobase_table) {
- /* We update the highest file format in the system table
- space, if this table has higher file format setting. */
-
- trx_sys_file_format_max_upgrade(
- (const char**) &innobase_file_format_check,
- dict_table_get_format(innobase_table));
- }
-
- /* Note: We can't call update_thd() as prebuilt will not be
- setup at this stage and so we use thd. */
-
- /* We need to copy the AUTOINC value from the old table if
- this is an ALTER TABLE or CREATE INDEX because CREATE INDEX
- does a table copy too. */
-
- if (((create_info->used_fields & HA_CREATE_USED_AUTO)
- || thd_sql_command(thd) == SQLCOM_ALTER_TABLE
- || thd_sql_command(thd) == SQLCOM_CREATE_INDEX)
- && create_info->auto_increment_value > 0) {
-
- /* Query was one of :
- CREATE TABLE ...AUTO_INCREMENT = x; or
- ALTER TABLE...AUTO_INCREMENT = x; or
- CREATE INDEX x on t(...);
- Find out a table definition from the dictionary and get
- the current value of the auto increment field. Set a new
- value to the auto increment field if the value is greater
- than the maximum value in the column. */
-
- auto_inc_value = create_info->auto_increment_value;
-
- dict_table_autoinc_lock(innobase_table);
- dict_table_autoinc_initialize(innobase_table, auto_inc_value);
- dict_table_autoinc_unlock(innobase_table);
- }
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(0);
-
-cleanup:
- innobase_commit_low(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
-Discards or imports an InnoDB tablespace.
-@return 0 == success, -1 == error */
-UNIV_INTERN
-int
-ha_innobase::discard_or_import_tablespace(
-/*======================================*/
- my_bool discard) /*!< in: TRUE if discard, else import */
-{
- dict_table_t* dict_table;
- trx_t* trx;
- int err;
-
- DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
-
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- dict_table = prebuilt->table;
- trx = prebuilt->trx;
-
- if (discard) {
- err = row_discard_tablespace_for_mysql(dict_table->name, trx);
- } else {
- err = row_import_tablespace_for_mysql(dict_table->name, trx);
- }
-
- err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
-
- DBUG_RETURN(err);
-}
-
-/*****************************************************************//**
-Deletes all rows of an InnoDB table.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::delete_all_rows(void)
-/*==============================*/
-{
- int error;
-
- DBUG_ENTER("ha_innobase::delete_all_rows");
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created, and update prebuilt->trx */
-
- update_thd(ha_thd());
-
- if (thd_sql_command(user_thd) != SQLCOM_TRUNCATE) {
- fallback:
- /* We only handle TRUNCATE TABLE t as a special case.
- DELETE FROM t will have to use ha_innobase::delete_row(),
- because DELETE is transactional while TRUNCATE is not. */
- DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
- }
-
- /* Truncate the table in InnoDB */
-
- error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
- if (error == DB_ERROR) {
- /* Cannot truncate; resort to ha_innobase::delete_row() */
- goto fallback;
- }
-
- error = convert_error_code_to_mysql(error, prebuilt->table->flags,
- NULL);
-
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
-Drops a table from an InnoDB database. Before calling this function,
-MySQL calls innobase_commit to commit the transaction of the current user.
-Then the current user cannot have locks set on the table. Drop table
-operation inside InnoDB will remove all locks any user has on the table
-inside InnoDB.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::delete_table(
-/*======================*/
- const char* name) /*!< in: table name */
-{
- ulint name_len;
- int error;
- trx_t* parent_trx;
- trx_t* trx;
- THD *thd = ha_thd();
- char norm_name[1000];
-
- DBUG_ENTER("ha_innobase::delete_table");
-
- /* Strangely, MySQL passes the table name without the '.frm'
- extension, in contrast to ::create */
- normalize_table_name(norm_name, name);
-
- if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- trx = innobase_trx_allocate(thd);
-
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
- name_len = strlen(name);
-
- ut_a(name_len < 1000);
-
- /* Drop the table in InnoDB */
-
- error = row_drop_table_for_mysql(norm_name, trx,
- thd_sql_command(thd)
- == SQLCOM_DROP_DB);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- innobase_commit_low(trx);
-
- trx_free_for_mysql(trx);
-
- error = convert_error_code_to_mysql(error, 0, NULL);
-
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
-Removes all tables in the named database inside InnoDB. */
-static
-void
-innobase_drop_database(
-/*===================*/
- handlerton *hton, /*!< in: handlerton of Innodb */
- char* path) /*!< in: database path; inside InnoDB the name
- of the last directory in the path is used as
- the database name: for example, in 'mysql/data/test'
- the database name is 'test' */
-{
- ulint len = 0;
- trx_t* trx;
- char* ptr;
- int error;
- char* namebuf;
- THD* thd = current_thd;
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- /* In the Windows plugin, thd = current_thd is always NULL */
- if (thd) {
- trx_t* parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT
- query, release possible adaptive hash latch to avoid
- deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
- }
-
- ptr = strend(path) - 2;
-
- while (ptr >= path && *ptr != '\\' && *ptr != '/') {
- ptr--;
- len++;
- }
-
- ptr++;
- namebuf = (char*) my_malloc((uint) len + 2, MYF(0));
-
- memcpy(namebuf, ptr, len);
- namebuf[len] = '/';
- namebuf[len + 1] = '\0';
-#ifdef __WIN__
- innobase_casedn_str(namebuf);
-#endif
-#if defined __WIN__ && !defined MYSQL_SERVER
- /* In the Windows plugin, thd = current_thd is always NULL */
- trx = trx_allocate_for_mysql();
- trx->mysql_thd = NULL;
- trx->mysql_query_str = NULL;
-#else
- trx = innobase_trx_allocate(thd);
-#endif
- error = row_drop_database_for_mysql(namebuf, trx);
- my_free(namebuf, MYF(0));
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
-}
-/*********************************************************************//**
-Renames an InnoDB table.
-@return 0 or error code */
-static
-int
-innobase_rename_table(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- const char* from, /*!< in: old name of the table */
- const char* to, /*!< in: new name of the table */
- ibool lock_and_commit)
- /*!< in: TRUE=lock data dictionary and commit */
-{
- int error;
- char* norm_to;
- char* norm_from;
-
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
- // Magic number 64 arbitrary
- norm_to = (char*) my_malloc(strlen(to) + 64, MYF(0));
- norm_from = (char*) my_malloc(strlen(from) + 64, MYF(0));
-
- normalize_table_name(norm_to, to);
- normalize_table_name(norm_from, from);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- if (lock_and_commit) {
- row_mysql_lock_data_dictionary(trx);
- }
-
- error = row_rename_table_for_mysql(
- norm_from, norm_to, trx, lock_and_commit);
-
- if (error != DB_SUCCESS) {
- FILE* ef = dict_foreign_err_file;
-
- fputs("InnoDB: Renaming table ", ef);
- ut_print_name(ef, trx, TRUE, norm_from);
- fputs(" to ", ef);
- ut_print_name(ef, trx, TRUE, norm_to);
- fputs(" failed!\n", ef);
- }
-
- if (lock_and_commit) {
- row_mysql_unlock_data_dictionary(trx);
-
- /* Flush the log to reduce probability that the .frm
- files and the InnoDB data dictionary get out-of-sync
- if the user runs with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
- }
-
- my_free(norm_to, MYF(0));
- my_free(norm_from, MYF(0));
-
- return error;
-}
-/*********************************************************************//**
-Renames an InnoDB table.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::rename_table(
-/*======================*/
- const char* from, /*!< in: old name of the table */
- const char* to) /*!< in: new name of the table */
-{
- trx_t* trx;
- int error;
- trx_t* parent_trx;
- THD* thd = ha_thd();
-
- DBUG_ENTER("ha_innobase::rename_table");
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- trx = innobase_trx_allocate(thd);
-
- error = innobase_rename_table(trx, from, to, TRUE);
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
-
- /* Add a special case to handle the Duplicated Key error
- and return DB_ERROR instead.
- This is to avoid a possible SIGSEGV error from mysql error
- handling code. Currently, mysql handles the Duplicated Key
- error by re-entering the storage layer and getting dup key
- info by calling get_dup_key(). This operation requires a valid
- table handle ('row_prebuilt_t' structure) which could no
- longer be available in the error handling stage. The suggested
- solution is to report a 'table exists' error message (since
- the dup key error here is due to an existing table whose name
- is the one we are trying to rename to) and return the generic
- error code. */
- if (error == (int) DB_DUPLICATE_KEY) {
- my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
-
- error = DB_ERROR;
- }
-
- error = convert_error_code_to_mysql(error, 0, NULL);
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************//**
-Estimates the number of index records in a range.
-@return estimated number of rows */
-UNIV_INTERN
-ha_rows
-ha_innobase::records_in_range(
-/*==========================*/
- uint keynr, /*!< in: index number */
- key_range *min_key, /*!< in: start key value of the
- range, may also be 0 */
- key_range *max_key) /*!< in: range end key val, may
- also be 0 */
-{
- KEY* key;
- dict_index_t* index;
- uchar* key_val_buff2 = (uchar*) my_malloc(
- table->s->reclength
- + table->s->max_key_length + 100,
- MYF(MY_FAE));
- ulint buff2_len = table->s->reclength
- + table->s->max_key_length + 100;
- dtuple_t* range_start;
- dtuple_t* range_end;
- ib_int64_t n_rows;
- ulint mode1;
- ulint mode2;
- mem_heap_t* heap;
-
- DBUG_ENTER("records_in_range");
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- prebuilt->trx->op_info = (char*)"estimating records in index range";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- active_index = keynr;
-
- key = table->key_info + active_index;
-
- index = dict_table_get_index_on_name(prebuilt->table, key->name);
-
- /* MySQL knows about this index and so we must be able to find it.*/
- ut_a(index);
-
- heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t)
- + sizeof(dtuple_t)));
-
- range_start = dtuple_create(heap, key->key_parts);
- dict_index_copy_types(range_start, index, key->key_parts);
-
- range_end = dtuple_create(heap, key->key_parts);
- dict_index_copy_types(range_end, index, key->key_parts);
-
- row_sel_convert_mysql_key_to_innobase(
- range_start, (byte*) key_val_buff,
- (ulint)upd_and_key_val_buff_len,
- index,
- (byte*) (min_key ? min_key->key :
- (const uchar*) 0),
- (ulint) (min_key ? min_key->length : 0),
- prebuilt->trx);
-
- row_sel_convert_mysql_key_to_innobase(
- range_end, (byte*) key_val_buff2,
- buff2_len, index,
- (byte*) (max_key ? max_key->key :
- (const uchar*) 0),
- (ulint) (max_key ? max_key->length : 0),
- prebuilt->trx);
-
- mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
- HA_READ_KEY_EXACT);
- mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
- HA_READ_KEY_EXACT);
-
- if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
-
- n_rows = btr_estimate_n_rows_in_range(index, range_start,
- mode1, range_end,
- mode2);
- } else {
-
- n_rows = HA_POS_ERROR;
- }
-
- mem_heap_free(heap);
-
- my_free(key_val_buff2, MYF(0));
-
- prebuilt->trx->op_info = (char*)"";
-
- /* The MySQL optimizer seems to believe an estimate of 0 rows is
- always accurate and may return the result 'Empty set' based on that.
- The accuracy is not guaranteed, and even if it were, for a locking
- read we should anyway perform the search to set the next-key lock.
- Add 1 to the value to make sure MySQL does not make the assumption! */
-
- if (n_rows == 0) {
- n_rows = 1;
- }
-
- DBUG_RETURN((ha_rows) n_rows);
-}
-
-/*********************************************************************//**
-Gives an UPPER BOUND to the number of rows in a table. This is used in
-filesort.cc.
-@return upper bound of rows */
-UNIV_INTERN
-ha_rows
-ha_innobase::estimate_rows_upper_bound(void)
-/*======================================*/
-{
- dict_index_t* index;
- ulonglong estimate;
- ulonglong local_data_file_length;
-
- DBUG_ENTER("estimate_rows_upper_bound");
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = (char*)
- "calculating upper bound for table rows";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- index = dict_table_get_first_index(prebuilt->table);
-
- ut_a(index->stat_n_leaf_pages > 0);
-
- local_data_file_length =
- ((ulonglong) index->stat_n_leaf_pages) * UNIV_PAGE_SIZE;
-
-
- /* Calculate a minimum length for a clustered index record and from
- that an upper bound for the number of rows. Since we only calculate
- new statistics in row0mysql.c when a table has grown by a threshold
- factor, we must add a safety factor 2 in front of the formula below. */
-
- estimate = 2 * local_data_file_length /
- dict_index_calc_min_rec_len(index);
-
- prebuilt->trx->op_info = (char*)"";
-
- DBUG_RETURN((ha_rows) estimate);
-}
-
-/*********************************************************************//**
-How many seeks it will take to read through the table. This is to be
-comparable to the number returned by records_in_range so that we can
-decide if we should scan the table or use keys.
-@return estimated time measured in disk seeks */
-UNIV_INTERN
-double
-ha_innobase::scan_time()
-/*====================*/
-{
- /* Since MySQL seems to favor table scans too much over index
- searches, we pretend that a sequential read takes the same time
- as a random disk read, that is, we do not divide the following
- by 10, which would be physically realistic. */
-
- return((double) (prebuilt->table->stat_clustered_index_size));
-}
-
-/******************************************************************//**
-Calculate the time it takes to read a set of ranges through an index
-This enables us to optimise reads for clustered indexes.
-@return estimated time measured in disk seeks */
-UNIV_INTERN
-double
-ha_innobase::read_time(
-/*===================*/
- uint index, /*!< in: key number */
- uint ranges, /*!< in: how many ranges */
- ha_rows rows) /*!< in: estimated number of rows in the ranges */
-{
- ha_rows total_rows;
- double time_for_scan;
-
- if (index != table->s->primary_key) {
- /* Not clustered */
- return(handler::read_time(index, ranges, rows));
- }
-
- if (rows <= 2) {
-
- return((double) rows);
- }
-
- /* Assume that the read time is proportional to the scan time for all
- rows + at most one seek per range. */
-
- time_for_scan = scan_time();
-
- if ((total_rows = estimate_rows_upper_bound()) < rows) {
-
- return(time_for_scan);
- }
-
- return(ranges + (double) rows / (double) total_rows * time_for_scan);
-}
-
-/*********************************************************************//**
-Returns statistics information of the table to the MySQL interpreter,
-in various fields of the handle object. */
-UNIV_INTERN
-int
-ha_innobase::info(
-/*==============*/
- uint flag) /*!< in: what information MySQL requests */
-{
- dict_table_t* ib_table;
- dict_index_t* index;
- ha_rows rec_per_key;
- ib_int64_t n_rows;
- ulong j;
- ulong i;
- char path[FN_REFLEN];
- os_file_stat_t stat_info;
-
- DBUG_ENTER("info");
-
- /* If we are forcing recovery at a high level, we will suppress
- statistics calculation on tables, because that may crash the
- server if an index is badly corrupted. */
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
- /* We return success (0) instead of HA_ERR_CRASHED,
- because we want MySQL to process this query and not
- stop, like it would do if it received the error code
- HA_ERR_CRASHED. */
-
- DBUG_RETURN(0);
- }
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- prebuilt->trx->op_info = (char*)"returning various info to MySQL";
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- ib_table = prebuilt->table;
-
- if (flag & HA_STATUS_TIME) {
- if (innobase_stats_on_metadata) {
- /* In sql_show we call with this flag: update
- then statistics so that they are up-to-date */
-
- prebuilt->trx->op_info = "updating table statistics";
-
- dict_update_statistics(ib_table);
-
- prebuilt->trx->op_info = "returning various info to MySQL";
- }
-
- my_snprintf(path, sizeof(path), "%s/%s%s",
- mysql_data_home, ib_table->name, reg_ext);
-
- unpack_filename(path,path);
-
- /* Note that we do not know the access time of the table,
- nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
-
- if (os_file_get_status(path,&stat_info)) {
- stats.create_time = (ulong) stat_info.ctime;
- }
- }
-
- if (flag & HA_STATUS_VARIABLE) {
- n_rows = ib_table->stat_n_rows;
-
- /* Because we do not protect stat_n_rows by any mutex in a
- delete, it is theoretically possible that the value can be
- smaller than zero! TODO: fix this race.
-
- The MySQL optimizer seems to assume in a left join that n_rows
- is an accurate estimate if it is zero. Of course, it is not,
- since we do not have any locks on the rows yet at this phase.
- Since SHOW TABLE STATUS seems to call this function with the
- HA_STATUS_TIME flag set, while the left join optimizer does not
- set that flag, we add one to a zero value if the flag is not
- set. That way SHOW TABLE STATUS will show the best estimate,
- while the optimizer never sees the table empty. */
-
- if (n_rows < 0) {
- n_rows = 0;
- }
-
- if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
- n_rows++;
- }
-
- /* Fix bug#40386: Not flushing query cache after truncate.
- n_rows can not be 0 unless the table is empty, set to 1
- instead. The original problem of bug#29507 is actually
- fixed in the server code. */
- if (thd_sql_command(user_thd) == SQLCOM_TRUNCATE) {
-
- n_rows = 1;
-
- /* We need to reset the prebuilt value too, otherwise
- checks for values greater than the last value written
- to the table will fail and the autoinc counter will
- not be updated. This will force write_row() into
- attempting an update of the table's AUTOINC counter. */
-
- prebuilt->autoinc_last_value = 0;
- }
-
- stats.records = (ha_rows)n_rows;
- stats.deleted = 0;
- stats.data_file_length = ((ulonglong)
- ib_table->stat_clustered_index_size)
- * UNIV_PAGE_SIZE;
- stats.index_file_length = ((ulonglong)
- ib_table->stat_sum_of_other_index_sizes)
- * UNIV_PAGE_SIZE;
-
- /* Since fsp_get_available_space_in_free_extents() is
- acquiring latches inside InnoDB, we do not call it if we
- are asked by MySQL to avoid locking. Another reason to
- avoid the call is that it uses quite a lot of CPU.
- See Bug#38185.
- We do not update delete_length if no locking is requested
- so the "old" value can remain. delete_length is initialized
- to 0 in the ha_statistics' constructor. */
- if (!(flag & HA_STATUS_NO_LOCK)) {
-
- /* lock the data dictionary to avoid races with
- ibd_file_missing and tablespace_discarded */
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- /* ib_table->space must be an existent tablespace */
- if (!ib_table->ibd_file_missing
- && !ib_table->tablespace_discarded) {
-
- stats.delete_length =
- fsp_get_available_space_in_free_extents(
- ib_table->space) * 1024;
- } else {
-
- THD* thd;
-
- thd = ha_thd();
-
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_CANT_GET_STAT,
- "InnoDB: Trying to get the free "
- "space for table %s but its "
- "tablespace has been discarded or "
- "the .ibd file is missing. Setting "
- "the free space to zero.",
- ib_table->name);
-
- stats.delete_length = 0;
- }
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- }
-
- stats.check_time = 0;
-
- if (stats.records == 0) {
- stats.mean_rec_length = 0;
- } else {
- stats.mean_rec_length = (ulong) (stats.data_file_length / stats.records);
- }
- }
-
- if (flag & HA_STATUS_CONST) {
- index = dict_table_get_first_index(ib_table);
-
- if (prebuilt->clust_index_was_generated) {
- index = dict_table_get_next_index(index);
- }
-
- for (i = 0; i < table->s->keys; i++) {
- if (index == NULL) {
- sql_print_error("Table %s contains fewer "
- "indexes inside InnoDB than "
- "are defined in the MySQL "
- ".frm file. Have you mixed up "
- ".frm files from different "
- "installations? See "
- REFMAN
- "innodb-troubleshooting.html\n",
- ib_table->name);
- break;
- }
-
- for (j = 0; j < table->key_info[i].key_parts; j++) {
-
- if (j + 1 > index->n_uniq) {
- sql_print_error(
-"Index %s of %s has %lu columns unique inside InnoDB, but MySQL is asking "
-"statistics for %lu columns. Have you mixed up .frm files from different "
-"installations? "
-"See " REFMAN "innodb-troubleshooting.html\n",
- index->name,
- ib_table->name,
- (unsigned long)
- index->n_uniq, j + 1);
- break;
- }
-
- if (index->stat_n_diff_key_vals[j + 1] == 0) {
-
- rec_per_key = stats.records;
- } else {
- rec_per_key = (ha_rows)(stats.records /
- index->stat_n_diff_key_vals[j + 1]);
- }
-
- /* Since MySQL seems to favor table scans
- too much over index searches, we pretend
- index selectivity is 2 times better than
- our estimate: */
-
- rec_per_key = rec_per_key / 2;
-
- if (rec_per_key == 0) {
- rec_per_key = 1;
- }
-
- table->key_info[i].rec_per_key[j]=
- rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
- (ulong) rec_per_key;
- }
-
- index = dict_table_get_next_index(index);
- }
- }
-
- if (flag & HA_STATUS_ERRKEY) {
- const dict_index_t* err_index;
-
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
-
- err_index = trx_get_error_info(prebuilt->trx);
-
- if (err_index) {
- errkey = (unsigned int)
- row_get_mysql_key_number_for_index(err_index);
- } else {
- errkey = (unsigned int) prebuilt->trx->error_key_num;
- }
- }
-
- if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
- stats.auto_increment_value = innobase_peek_autoinc();
- }
-
- prebuilt->trx->op_info = (char*)"";
-
- DBUG_RETURN(0);
-}
-
-/**********************************************************************//**
-Updates index cardinalities of the table, based on 8 random dives into
-each index tree. This does NOT calculate exact statistics on the table.
-@return returns always 0 (success) */
-UNIV_INTERN
-int
-ha_innobase::analyze(
-/*=================*/
- THD* thd, /*!< in: connection thread handle */
- HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
-{
- /* Serialize ANALYZE TABLE inside InnoDB, see
- Bug#38996 Race condition in ANALYZE TABLE */
- pthread_mutex_lock(&analyze_mutex);
-
- /* Simply call ::info() with all the flags */
- info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);
-
- pthread_mutex_unlock(&analyze_mutex);
-
- return(0);
-}
-
-/**********************************************************************//**
-This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
-the table in MySQL. */
-UNIV_INTERN
-int
-ha_innobase::optimize(
-/*==================*/
- THD* thd, /*!< in: connection thread handle */
- HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
-{
- return(HA_ADMIN_TRY_ALTER);
-}
-
-/*******************************************************************//**
-Tries to check that an InnoDB table is not corrupted. If corruption is
-noticed, prints to stderr information about it. In case of corruption
-may also assert a failure and crash the server.
-@return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
-UNIV_INTERN
-int
-ha_innobase::check(
-/*===============*/
- THD* thd, /*!< in: user thread handle */
- HA_CHECK_OPT* check_opt) /*!< in: check options, currently
- ignored */
-{
- ulint ret;
-
- DBUG_ASSERT(thd == ha_thd());
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- ut_a(prebuilt->trx == thd_to_trx(thd));
-
- if (prebuilt->mysql_template == NULL) {
- /* Build the template; we will use a dummy template
- in index scans done in checking */
-
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
- }
-
- ret = row_check_table_for_mysql(prebuilt);
-
- switch (ret) {
- case DB_SUCCESS:
- return(HA_ADMIN_OK);
- case DB_INTERRUPTED:
- my_error(ER_QUERY_INTERRUPTED, MYF(0));
- return(-1);
- default:
- return(HA_ADMIN_CORRUPT);
- }
-}
-
-/*************************************************************//**
-Adds information about free space in the InnoDB tablespace to a table comment
-which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
-foreign keys.
-@return table comment + InnoDB free space + info on foreign keys */
-UNIV_INTERN
-char*
-ha_innobase::update_table_comment(
-/*==============================*/
- const char* comment)/*!< in: table comment defined by user */
-{
- uint length = (uint) strlen(comment);
- char* str;
- long flen;
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- if (length > 64000 - 3) {
- return((char*)comment); /* string too long */
- }
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = (char*)"returning table comment";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
- str = NULL;
-
- /* output the data to a temporary file */
-
- mutex_enter(&srv_dict_tmpfile_mutex);
- rewind(srv_dict_tmpfile);
-
- fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
- fsp_get_available_space_in_free_extents(
- prebuilt->table->space));
-
- dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
- prebuilt->trx, prebuilt->table);
- flen = ftell(srv_dict_tmpfile);
- if (flen < 0) {
- flen = 0;
- } else if (length + flen + 3 > 64000) {
- flen = 64000 - 3 - length;
- }
-
- /* allocate buffer for the full string, and
- read the contents of the temporary file */
-
- str = (char*) my_malloc(length + flen + 3, MYF(0));
-
- if (str) {
- char* pos = str + length;
- if (length) {
- memcpy(str, comment, length);
- *pos++ = ';';
- *pos++ = ' ';
- }
- rewind(srv_dict_tmpfile);
- flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
- pos[flen] = 0;
- }
-
- mutex_exit(&srv_dict_tmpfile_mutex);
-
- prebuilt->trx->op_info = (char*)"";
-
- return(str ? str : (char*) comment);
-}
-
-/*******************************************************************//**
-Gets the foreign key create info for a table stored in InnoDB.
-@return own: character string in the form which can be inserted to the
-CREATE TABLE statement, MUST be freed with
-ha_innobase::free_foreign_key_create_info */
-UNIV_INTERN
-char*
-ha_innobase::get_foreign_key_create_info(void)
-/*==========================================*/
-{
- char* str = 0;
- long flen;
-
- ut_a(prebuilt != NULL);
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = (char*)"getting info on foreign keys";
-
- /* In case MySQL calls this in the middle of a SELECT query,
- release possible adaptive hash latch to avoid
- deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- mutex_enter(&srv_dict_tmpfile_mutex);
- rewind(srv_dict_tmpfile);
-
- /* output the data to a temporary file */
- dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile,
- prebuilt->trx, prebuilt->table);
- prebuilt->trx->op_info = (char*)"";
-
- flen = ftell(srv_dict_tmpfile);
- if (flen < 0) {
- flen = 0;
- } else if (flen > 64000 - 1) {
- flen = 64000 - 1;
- }
-
- /* allocate buffer for the string, and
- read the contents of the temporary file */
-
- str = (char*) my_malloc(flen + 1, MYF(0));
-
- if (str) {
- rewind(srv_dict_tmpfile);
- flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
- str[flen] = 0;
- }
-
- mutex_exit(&srv_dict_tmpfile_mutex);
-
- return(str);
-}
-
-
-UNIV_INTERN
-int
-ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
-{
- dict_foreign_t* foreign;
-
- DBUG_ENTER("get_foreign_key_list");
- ut_a(prebuilt != NULL);
- update_thd(ha_thd());
- prebuilt->trx->op_info = (char*)"getting list of foreign keys";
- trx_search_latch_release_if_reserved(prebuilt->trx);
- mutex_enter(&(dict_sys->mutex));
- foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
-
- while (foreign != NULL) {
- uint i;
- FOREIGN_KEY_INFO f_key_info;
- LEX_STRING *name= 0;
- uint ulen;
- char uname[NAME_LEN+1]; /* Unencoded name */
- char db_name[NAME_LEN+1];
- const char *tmp_buff;
-
- tmp_buff= foreign->id;
- i= 0;
- while (tmp_buff[i] != '/')
- i++;
- tmp_buff+= i + 1;
- f_key_info.forein_id = thd_make_lex_string(thd, 0,
- tmp_buff, (uint) strlen(tmp_buff), 1);
- tmp_buff= foreign->referenced_table_name;
-
- /* Database name */
- i= 0;
- while (tmp_buff[i] != '/')
- {
- db_name[i]= tmp_buff[i];
- i++;
- }
- db_name[i]= 0;
- ulen= filename_to_tablename(db_name, uname, sizeof(uname));
- f_key_info.referenced_db = thd_make_lex_string(thd, 0,
- uname, ulen, 1);
-
- /* Table name */
- tmp_buff+= i + 1;
- ulen= filename_to_tablename(tmp_buff, uname, sizeof(uname));
- f_key_info.referenced_table = thd_make_lex_string(thd, 0,
- uname, ulen, 1);
-
- for (i= 0;;) {
- tmp_buff= foreign->foreign_col_names[i];
- name = thd_make_lex_string(thd, name,
- tmp_buff, (uint) strlen(tmp_buff), 1);
- f_key_info.foreign_fields.push_back(name);
- tmp_buff= foreign->referenced_col_names[i];
- name = thd_make_lex_string(thd, name,
- tmp_buff, (uint) strlen(tmp_buff), 1);
- f_key_info.referenced_fields.push_back(name);
- if (++i >= foreign->n_fields)
- break;
- }
-
- ulong length;
- if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)
- {
- length=7;
- tmp_buff= "CASCADE";
- }
- else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
- {
- length=8;
- tmp_buff= "SET NULL";
- }
- else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION)
- {
- length=9;
- tmp_buff= "NO ACTION";
- }
- else
- {
- length=8;
- tmp_buff= "RESTRICT";
- }
- f_key_info.delete_method = thd_make_lex_string(
- thd, f_key_info.delete_method, tmp_buff, length, 1);
-
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)
- {
- length=7;
- tmp_buff= "CASCADE";
- }
- else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)
- {
- length=8;
- tmp_buff= "SET NULL";
- }
- else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION)
- {
- length=9;
- tmp_buff= "NO ACTION";
- }
- else
- {
- length=8;
- tmp_buff= "RESTRICT";
- }
- f_key_info.update_method = thd_make_lex_string(
- thd, f_key_info.update_method, tmp_buff, length, 1);
- if (foreign->referenced_index &&
- foreign->referenced_index->name)
- {
- f_key_info.referenced_key_name = thd_make_lex_string(
- thd, f_key_info.referenced_key_name,
- foreign->referenced_index->name,
- (uint) strlen(foreign->referenced_index->name), 1);
- }
- else
- f_key_info.referenced_key_name= 0;
-
- FOREIGN_KEY_INFO *pf_key_info = (FOREIGN_KEY_INFO *)
- thd_memdup(thd, &f_key_info, sizeof(FOREIGN_KEY_INFO));
- f_key_list->push_back(pf_key_info);
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
- mutex_exit(&(dict_sys->mutex));
- prebuilt->trx->op_info = (char*)"";
-
- DBUG_RETURN(0);
-}
-
-/*****************************************************************//**
-Checks if ALTER TABLE may change the storage engine of the table.
-Changing storage engines is not allowed for tables for which there
-are foreign key constraints (parent or child tables).
-@return TRUE if can switch engines */
-UNIV_INTERN
-bool
-ha_innobase::can_switch_engines(void)
-/*=================================*/
-{
- bool can_switch;
-
- DBUG_ENTER("ha_innobase::can_switch_engines");
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- prebuilt->trx->op_info =
- "determining if there are foreign key constraints";
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
- && !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- prebuilt->trx->op_info = "";
-
- DBUG_RETURN(can_switch);
-}
-
-/*******************************************************************//**
-Checks if a table is referenced by a foreign key. The MySQL manual states that
-a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
-delete is then allowed internally to resolve a duplicate key conflict in
-REPLACE, not an update.
-@return > 0 if referenced by a FOREIGN KEY */
-UNIV_INTERN
-uint
-ha_innobase::referenced_by_foreign_key(void)
-/*========================================*/
-{
- if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) {
-
- return(1);
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Frees the foreign key create info for a table stored in InnoDB, if it is
-non-NULL. */
-UNIV_INTERN
-void
-ha_innobase::free_foreign_key_create_info(
-/*======================================*/
- char* str) /*!< in, own: create info string to free */
-{
- if (str) {
- my_free(str, MYF(0));
- }
-}
-
-/*******************************************************************//**
-Tells something additional to the handler about how to do things.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::extra(
-/*===============*/
- enum ha_extra_function operation)
- /*!< in: HA_EXTRA_FLUSH or some other flag */
-{
- /* Warning: since it is not sure that MySQL calls external_lock
- before calling this function, the trx field in prebuilt can be
- obsolete! */
-
- switch (operation) {
- case HA_EXTRA_FLUSH:
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
- }
- break;
- case HA_EXTRA_RESET_STATE:
- reset_template(prebuilt);
- break;
- case HA_EXTRA_NO_KEYREAD:
- prebuilt->read_just_key = 0;
- break;
- case HA_EXTRA_KEYREAD:
- prebuilt->read_just_key = 1;
- break;
- case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
- prebuilt->keep_other_fields_on_keyread = 1;
- break;
-
- /* IMPORTANT: prebuilt->trx can be obsolete in
- this method, because it is not sure that MySQL
- calls external_lock before this method with the
- parameters below. We must not invoke update_thd()
- either, because the calling threads may change.
- CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
- case HA_EXTRA_IGNORE_DUP_KEY:
- thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
- break;
- case HA_EXTRA_WRITE_CAN_REPLACE:
- thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
- break;
- case HA_EXTRA_WRITE_CANNOT_REPLACE:
- thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
- break;
- case HA_EXTRA_NO_IGNORE_DUP_KEY:
- thd_to_trx(ha_thd())->duplicates &=
- ~(TRX_DUP_IGNORE | TRX_DUP_REPLACE);
- break;
- default:/* Do nothing */
- ;
- }
-
- return(0);
-}
-
-UNIV_INTERN
-int
-ha_innobase::reset()
-{
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
- }
-
- reset_template(prebuilt);
-
- /* TODO: This should really be reset in reset_template() but for now
- it's safer to do it explicitly here. */
-
- /* This is a statement level counter. */
- prebuilt->autoinc_last_value = 0;
-
- return(0);
-}
-
-/******************************************************************//**
-MySQL calls this function at the start of each SQL statement inside LOCK
-TABLES. Inside LOCK TABLES the ::external_lock method does not work to
-mark SQL statement borders. Note also a special case: if a temporary table
-is created inside LOCK TABLES, MySQL has not called external_lock() at all
-on that table.
-MySQL-5.0 also calls this before each statement in an execution of a stored
-procedure. To make the execution more deterministic for binlogging, MySQL-5.0
-locks all tables involved in a stored procedure with full explicit table
-locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
-procedure.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::start_stmt(
-/*====================*/
- THD* thd, /*!< in: handle to the user thread */
- thr_lock_type lock_type)
-{
- trx_t* trx;
-
- update_thd(thd);
-
- trx = prebuilt->trx;
-
- /* Here we release the search latch and the InnoDB thread FIFO ticket
- if they were reserved. They should have been released already at the
- end of the previous statement, but because inside LOCK TABLES the
- lock count method does not work to mark the end of a SELECT statement,
- that may not be the case. We MUST release the search latch before an
- INSERT, for example. */
-
- innobase_release_stat_resources(trx);
-
- /* Reset the AUTOINC statement level counter for multi-row INSERTs. */
- trx->n_autoinc_rows = 0;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
- reset_template(prebuilt);
-
- if (!prebuilt->mysql_has_locked) {
- /* This handle is for a temporary table created inside
- this same LOCK TABLES; since MySQL does NOT call external_lock
- in this case, we must use x-row locks inside InnoDB to be
- prepared for an update of a row */
-
- prebuilt->select_lock_type = LOCK_X;
- } else {
- if (trx->isolation_level != TRX_ISO_SERIALIZABLE
- && thd_sql_command(thd) == SQLCOM_SELECT
- && lock_type == TL_READ) {
-
- /* For other than temporary tables, we obtain
- no lock for consistent read (plain SELECT). */
-
- prebuilt->select_lock_type = LOCK_NONE;
- } else {
- /* Not a consistent read: restore the
- select_lock_type value. The value of
- stored_select_lock_type was decided in:
- 1) ::store_lock(),
- 2) ::external_lock(),
- 3) ::init_table_handle_for_HANDLER(), and
- 4) ::transactional_table_lock(). */
-
- prebuilt->select_lock_type =
- prebuilt->stored_select_lock_type;
- }
- }
-
- trx->detailed_error[0] = '\0';
-
- /* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
- } else {
- innobase_register_stmt(ht, thd);
- }
-
- return(0);
-}
-
-/******************************************************************//**
-Maps a MySQL trx isolation level code to the InnoDB isolation level code
-@return InnoDB isolation level */
-static inline
-ulint
-innobase_map_isolation_level(
-/*=========================*/
- enum_tx_isolation iso) /*!< in: MySQL isolation level code */
-{
- switch(iso) {
- case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
- case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
- case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
- case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
- default: ut_a(0); return(0);
- }
-}
-
-/******************************************************************//**
-As MySQL will execute an external lock for every new table it uses when it
-starts to process an SQL statement (an exception is when MySQL calls
-start_stmt for the handle) we can use this function to store the pointer to
-the THD in the handle. We will also use this function to communicate
-to InnoDB that a new SQL statement has started and that we must store a
-savepoint to our transaction handle, so that we are able to roll back
-the SQL statement in case of an error.
-@return 0 */
-UNIV_INTERN
-int
-ha_innobase::external_lock(
-/*=======================*/
- THD* thd, /*!< in: handle to the user thread */
- int lock_type) /*!< in: lock type */
-{
- trx_t* trx;
-
- DBUG_ENTER("ha_innobase::external_lock");
- DBUG_PRINT("enter",("lock_type: %d", lock_type));
-
- update_thd(thd);
-
- /* Statement based binlogging does not work in isolation level
- READ UNCOMMITTED and READ COMMITTED since the necessary
- locks cannot be taken. In this case, we print an
- informative error message and return with an error. */
- if (lock_type == F_WRLCK)
- {
- ulong const binlog_format= thd_binlog_format(thd);
- ulong const tx_isolation = thd_tx_isolation(ha_thd());
- if (tx_isolation <= ISO_READ_COMMITTED
- && binlog_format == BINLOG_FORMAT_STMT
-#if MYSQL_VERSION_ID > 50140
- && thd_binlog_filter_ok(thd)
-#endif /* MYSQL_VERSION_ID > 50140 */
- )
- {
- char buf[256];
- my_snprintf(buf, sizeof(buf),
- "Transaction level '%s' in"
- " InnoDB is not safe for binlog mode '%s'",
- tx_isolation_names[tx_isolation],
- binlog_format_names[binlog_format]);
- my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), buf);
- DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
- }
- }
-
-
- trx = prebuilt->trx;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
-
- reset_template(prebuilt);
-
- if (lock_type == F_WRLCK) {
-
- /* If this is a SELECT, then it is in UPDATE TABLE ...
- or SELECT ... FOR UPDATE */
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- }
-
- if (lock_type != F_UNLCK) {
- /* MySQL is setting a new table lock */
-
- trx->detailed_error[0] = '\0';
-
- /* Set the MySQL flag to mark that there is an active
- transaction */
- if (trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
- } else if (trx->n_mysql_tables_in_use == 0) {
- innobase_register_stmt(ht, thd);
- }
-
- if (trx->isolation_level == TRX_ISO_SERIALIZABLE
- && prebuilt->select_lock_type == LOCK_NONE
- && thd_test_options(thd,
- OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* To get serializable execution, we let InnoDB
- conceptually add 'LOCK IN SHARE MODE' to all SELECTs
- which otherwise would have been consistent reads. An
- exception is consistent reads in the AUTOCOMMIT=1 mode:
- we know that they are read-only transactions, and they
- can be serialized also if performed as consistent
- reads. */
-
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- }
-
- /* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
- TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
- an InnoDB table lock if it is released immediately at the end
- of LOCK TABLES, and InnoDB's table locks in that case cause
- VERY easily deadlocks.
-
- We do not set InnoDB table locks if user has not explicitly
- requested a table lock. Note that thd_in_lock_tables(thd)
- can hold in some cases, e.g., at the start of a stored
- procedure call (SQLCOM_CALL). */
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
-
- if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
- && THDVAR(thd, table_locks)
- && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
- && thd_in_lock_tables(thd)) {
-
- ulint error = row_lock_table_for_mysql(
- prebuilt, NULL, 0);
-
- if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(
- (int) error, 0, thd);
- DBUG_RETURN((int) error);
- }
- }
-
- trx->mysql_n_tables_locked++;
- }
-
- trx->n_mysql_tables_in_use++;
- prebuilt->mysql_has_locked = TRUE;
-
- DBUG_RETURN(0);
- }
-
- /* MySQL is releasing a table lock */
-
- trx->n_mysql_tables_in_use--;
- prebuilt->mysql_has_locked = FALSE;
-
- /* Release a possible FIFO ticket and search latch. Since we
- may reserve the kernel mutex, we have to release the search
- system latch first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* If the MySQL lock count drops to zero we know that the current SQL
- statement has ended */
-
- if (trx->n_mysql_tables_in_use == 0) {
-
- trx->mysql_n_tables_locked = 0;
- prebuilt->used_in_HANDLER = FALSE;
-
- if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
- if (trx->active_trans != 0) {
- innobase_commit(ht, thd, TRUE);
- }
- } else {
- if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && trx->global_read_view) {
-
- /* At low transaction isolation levels we let
- each consistent read set its own snapshot */
-
- read_view_close_for_mysql(trx);
- }
- }
- }
-
- DBUG_RETURN(0);
-}
-
-/******************************************************************//**
-With this function MySQL request a transactional lock to a table when
-user issued query LOCK TABLES..WHERE ENGINE = InnoDB.
-@return error code */
-UNIV_INTERN
-int
-ha_innobase::transactional_table_lock(
-/*==================================*/
- THD* thd, /*!< in: handle to the user thread */
- int lock_type) /*!< in: lock type */
-{
- trx_t* trx;
-
- DBUG_ENTER("ha_innobase::transactional_table_lock");
- DBUG_PRINT("enter",("lock_type: %d", lock_type));
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(thd);
-
- if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir?"
- "InnoDB: See " REFMAN
- "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- trx = prebuilt->trx;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
-
- reset_template(prebuilt);
-
- if (lock_type == F_WRLCK) {
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- } else if (lock_type == F_RDLCK) {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB error:\n"
-"MySQL is trying to set transactional table lock with corrupted lock type\n"
-"to table %s, lock type %d does not exist.\n",
- prebuilt->table->name, lock_type);
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- /* MySQL is setting a new transactional table lock */
-
- /* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
- }
-
- if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
- ulint error = DB_SUCCESS;
-
- error = row_lock_table_for_mysql(prebuilt, NULL, 0);
-
- if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(
- (int) error, prebuilt->table->flags, thd);
- DBUG_RETURN((int) error);
- }
-
- if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* Store the current undo_no of the transaction
- so that we know where to roll back if we have
- to roll back the next SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
- }
-
- DBUG_RETURN(0);
-}
-
-/************************************************************************//**
-Here we export InnoDB status variables to MySQL. */
-static
-void
-innodb_export_status(void)
-/*======================*/
-{
- if (innodb_inited) {
- srv_export_innodb_status();
- }
-}
-
-/************************************************************************//**
-Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
-Monitor to the client. */
-static
-bool
-innodb_show_status(
-/*===============*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread of the caller */
- stat_print_fn *stat_print)
-{
- trx_t* trx;
- static const char truncated_msg[] = "... truncated...\n";
- const long MAX_STATUS_SIZE = 64000;
- ulint trx_list_start = ULINT_UNDEFINED;
- ulint trx_list_end = ULINT_UNDEFINED;
-
- DBUG_ENTER("innodb_show_status");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = check_trx_exists(thd);
-
- innobase_release_stat_resources(trx);
-
- /* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
- bytes of text. */
-
- long flen, usable_len;
- char* str;
-
- mutex_enter(&srv_monitor_file_mutex);
- rewind(srv_monitor_file);
- srv_printf_innodb_monitor(srv_monitor_file,
- &trx_list_start, &trx_list_end);
- flen = ftell(srv_monitor_file);
- os_file_set_eof(srv_monitor_file);
-
- if (flen < 0) {
- flen = 0;
- }
-
- if (flen > MAX_STATUS_SIZE) {
- usable_len = MAX_STATUS_SIZE;
- } else {
- usable_len = flen;
- }
-
- /* allocate buffer for the string, and
- read the contents of the temporary file */
-
- if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
- mutex_exit(&srv_monitor_file_mutex);
- DBUG_RETURN(TRUE);
- }
-
- rewind(srv_monitor_file);
- if (flen < MAX_STATUS_SIZE) {
- /* Display the entire output. */
- flen = (long) fread(str, 1, flen, srv_monitor_file);
- } else if (trx_list_end < (ulint) flen
- && trx_list_start < trx_list_end
- && trx_list_start + (flen - trx_list_end)
- < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
- /* Omit the beginning of the list of active transactions. */
- long len = (long) fread(str, 1, trx_list_start, srv_monitor_file);
- memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
- len += sizeof truncated_msg - 1;
- usable_len = (MAX_STATUS_SIZE - 1) - len;
- fseek(srv_monitor_file, flen - usable_len, SEEK_SET);
- len += (long) fread(str + len, 1, usable_len, srv_monitor_file);
- flen = len;
- } else {
- /* Omit the end of the output. */
- flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
- }
-
- mutex_exit(&srv_monitor_file_mutex);
-
- bool result = FALSE;
-
- if (stat_print(thd, innobase_hton_name, (uint) strlen(innobase_hton_name),
- STRING_WITH_LEN(""), str, flen)) {
- result= TRUE;
- }
- my_free(str, MYF(0));
-
- DBUG_RETURN(FALSE);
-}
-
-/************************************************************************//**
-Implements the SHOW MUTEX STATUS command. . */
-static
-bool
-innodb_mutex_show_status(
-/*=====================*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread of the
- caller */
- stat_print_fn* stat_print)
-{
- char buf1[IO_SIZE], buf2[IO_SIZE];
- mutex_t* mutex;
- rw_lock_t* lock;
-#ifdef UNIV_DEBUG
- ulint rw_lock_count= 0;
- ulint rw_lock_count_spin_loop= 0;
- ulint rw_lock_count_spin_rounds= 0;
- ulint rw_lock_count_os_wait= 0;
- ulint rw_lock_count_os_yield= 0;
- ulonglong rw_lock_wait_time= 0;
-#endif /* UNIV_DEBUG */
- uint hton_name_len= (uint) strlen(innobase_hton_name), buf1len, buf2len;
- DBUG_ENTER("innodb_mutex_show_status");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- mutex_enter(&mutex_list_mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex != NULL) {
- if (mutex->count_os_wait == 0
- || buf_pool_is_block_mutex(mutex)) {
- goto next_mutex;
- }
-#ifdef UNIV_DEBUG
- if (mutex->mutex_type != 1) {
- if (mutex->count_using > 0) {
- buf1len= my_snprintf(buf1, sizeof(buf1),
- "%s:%s",
- mutex->cmutex_name, mutex->cfile_name);
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "count=%lu, spin_waits=%lu,"
- " spin_rounds=%lu, "
- "os_waits=%lu, os_yields=%lu,"
- " os_wait_times=%lu",
- mutex->count_using,
- mutex->count_spin_loop,
- mutex->count_spin_rounds,
- mutex->count_os_wait,
- mutex->count_os_yield,
- (ulong) (mutex->lspent_time/1000));
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&mutex_list_mutex);
- DBUG_RETURN(1);
- }
- }
- }
- else {
- rw_lock_count += mutex->count_using;
- rw_lock_count_spin_loop += mutex->count_spin_loop;
- rw_lock_count_spin_rounds += mutex->count_spin_rounds;
- rw_lock_count_os_wait += mutex->count_os_wait;
- rw_lock_count_os_yield += mutex->count_os_yield;
- rw_lock_wait_time += mutex->lspent_time;
- }
-#else /* UNIV_DEBUG */
- buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
- mutex->cfile_name, (ulong) mutex->cline);
- buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
- mutex->count_os_wait);
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&mutex_list_mutex);
- DBUG_RETURN(1);
- }
-#endif /* UNIV_DEBUG */
-
-next_mutex:
- mutex = UT_LIST_GET_NEXT(list, mutex);
- }
-
- mutex_exit(&mutex_list_mutex);
-
- mutex_enter(&rw_lock_list_mutex);
-
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
- if (lock->count_os_wait
- && !buf_pool_is_block_lock(lock)) {
- buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
- lock->cfile_name, (ulong) lock->cline);
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "os_waits=%lu", lock->count_os_wait);
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&rw_lock_list_mutex);
- DBUG_RETURN(1);
- }
- }
- lock = UT_LIST_GET_NEXT(list, lock);
- }
-
- mutex_exit(&rw_lock_list_mutex);
-
-#ifdef UNIV_DEBUG
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
- "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
- rw_lock_count, rw_lock_count_spin_loop,
- rw_lock_count_spin_rounds,
- rw_lock_count_os_wait, rw_lock_count_os_yield,
- (ulong) (rw_lock_wait_time/1000));
-
- if (stat_print(thd, innobase_hton_name, hton_name_len,
- STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
- DBUG_RETURN(1);
- }
-#endif /* UNIV_DEBUG */
-
- DBUG_RETURN(FALSE);
-}
-
-static
-bool innobase_show_status(handlerton *hton, THD* thd,
- stat_print_fn* stat_print,
- enum ha_stat_type stat_type)
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- switch (stat_type) {
- case HA_ENGINE_STATUS:
- return innodb_show_status(hton, thd, stat_print);
- case HA_ENGINE_MUTEX:
- return innodb_mutex_show_status(hton, thd, stat_print);
- default:
- return(FALSE);
- }
-}
-
-/************************************************************************//**
- Handling the shared INNOBASE_SHARE structure that is needed to provide table
- locking.
-****************************************************************************/
-
-static INNOBASE_SHARE* get_share(const char* table_name)
-{
- INNOBASE_SHARE *share;
- pthread_mutex_lock(&innobase_share_mutex);
-
- ulint fold = ut_fold_string(table_name);
-
- HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
- INNOBASE_SHARE*, share,
- ut_ad(share->use_count > 0),
- !strcmp(share->table_name, table_name));
-
- if (!share) {
-
- uint length = (uint) strlen(table_name);
-
- /* TODO: invoke HASH_MIGRATE if innobase_open_tables
- grows too big */
-
- share = (INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
- MYF(MY_FAE | MY_ZEROFILL));
-
- share->table_name = (char*) memcpy(share + 1,
- table_name, length + 1);
-
- HASH_INSERT(INNOBASE_SHARE, table_name_hash,
- innobase_open_tables, fold, share);
-
- thr_lock_init(&share->lock);
- }
-
- share->use_count++;
- pthread_mutex_unlock(&innobase_share_mutex);
-
- return(share);
-}
-
-static void free_share(INNOBASE_SHARE* share)
-{
- pthread_mutex_lock(&innobase_share_mutex);
-
-#ifdef UNIV_DEBUG
- INNOBASE_SHARE* share2;
- ulint fold = ut_fold_string(share->table_name);
-
- HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
- INNOBASE_SHARE*, share2,
- ut_ad(share->use_count > 0),
- !strcmp(share->table_name, share2->table_name));
-
- ut_a(share2 == share);
-#endif /* UNIV_DEBUG */
-
- if (!--share->use_count) {
- ulint fold = ut_fold_string(share->table_name);
-
- HASH_DELETE(INNOBASE_SHARE, table_name_hash,
- innobase_open_tables, fold, share);
- thr_lock_delete(&share->lock);
- my_free(share, MYF(0));
-
- /* TODO: invoke HASH_MIGRATE if innobase_open_tables
- shrinks too much */
- }
-
- pthread_mutex_unlock(&innobase_share_mutex);
-}
-
-/*****************************************************************//**
-Converts a MySQL table lock stored in the 'lock' field of the handle to
-a proper type before storing pointer to the lock into an array of pointers.
-MySQL also calls this if it wants to reset some table locks to a not-locked
-state during the processing of an SQL query. An example is that during a
-SELECT the read lock is released early on the 'const' tables where we only
-fetch one row. MySQL does not call this when it releases all locks at the
-end of an SQL statement.
-@return pointer to the next element in the 'to' array */
-UNIV_INTERN
-THR_LOCK_DATA**
-ha_innobase::store_lock(
-/*====================*/
- THD* thd, /*!< in: user thread handle */
- THR_LOCK_DATA** to, /*!< in: pointer to an array
- of pointers to lock structs;
- pointer to the 'lock' field
- of current handle is stored
- next to this array */
- enum thr_lock_type lock_type) /*!< in: lock type to store in
- 'lock'; this may also be
- TL_IGNORE */
-{
- trx_t* trx;
-
- /* Note that trx in this function is NOT necessarily prebuilt->trx
- because we call update_thd() later, in ::external_lock()! Failure to
- understand this caused a serious memory corruption bug in 5.1.11. */
-
- trx = check_trx_exists(thd);
-
- /* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
- Be careful to ignore TL_IGNORE if we are going to do something with
- only 'real' locks! */
-
- /* If no MySQL table is in use, we need to set the isolation level
- of the transaction. */
-
- if (lock_type != TL_IGNORE
- && trx->n_mysql_tables_in_use == 0) {
- trx->isolation_level = innobase_map_isolation_level(
- (enum_tx_isolation) thd_tx_isolation(thd));
-
- if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && trx->global_read_view) {
-
- /* At low transaction isolation levels we let
- each consistent read set its own snapshot */
-
- read_view_close_for_mysql(trx);
- }
- }
-
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
- const bool in_lock_tables = thd_in_lock_tables(thd);
- const uint sql_command = thd_sql_command(thd);
-
- if (sql_command == SQLCOM_DROP_TABLE) {
-
- /* MySQL calls this function in DROP TABLE though this table
- handle may belong to another thd that is running a query. Let
- us in that case skip any changes to the prebuilt struct. */
-
- } else if ((lock_type == TL_READ && in_lock_tables)
- || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
- || lock_type == TL_READ_WITH_SHARED_LOCKS
- || lock_type == TL_READ_NO_INSERT
- || (lock_type != TL_IGNORE
- && sql_command != SQLCOM_SELECT)) {
-
- /* The OR cases above are in this order:
- 1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
- are processing a stored procedure or function, or
- 2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
- 3) this is a SELECT ... IN SHARE MODE, or
- 4) we are doing a complex SQL statement like
- INSERT INTO ... SELECT ... and the logical logging (MySQL
- binlog) requires the use of a locking read, or
- MySQL is doing LOCK TABLES ... READ.
- 5) we let InnoDB do locking reads for all SQL statements that
- are not simple SELECTs; note that select_lock_type in this
- case may get strengthened in ::external_lock() to LOCK_X.
- Note that we MUST use a locking read in all data modifying
- SQL statements, because otherwise the execution would not be
- serializable, and also the results from the update could be
- unexpected if an obsolete consistent read view would be
- used. */
-
- ulint isolation_level;
-
- isolation_level = trx->isolation_level;
-
- if ((srv_locks_unsafe_for_binlog
- || isolation_level == TRX_ISO_READ_COMMITTED)
- && isolation_level != TRX_ISO_SERIALIZABLE
- && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
- && (sql_command == SQLCOM_INSERT_SELECT
- || sql_command == SQLCOM_REPLACE_SELECT
- || sql_command == SQLCOM_UPDATE
- || sql_command == SQLCOM_CREATE_TABLE)) {
-
- /* If we either have innobase_locks_unsafe_for_binlog
- option set or this session is using READ COMMITTED
- isolation level and isolation level of the transaction
- is not set to serializable and MySQL is doing
- INSERT INTO...SELECT or REPLACE INTO...SELECT
- or UPDATE ... = (SELECT ...) or CREATE ...
- SELECT... without FOR UPDATE or IN SHARE
- MODE in select, then we use consistent read
- for select. */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- } else if (sql_command == SQLCOM_CHECKSUM) {
- /* Use consistent read for checksum table */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- } else {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- }
-
- } else if (lock_type != TL_IGNORE) {
-
- /* We set possible LOCK_X value in external_lock, not yet
- here even if this would be SELECT ... FOR UPDATE */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- }
-
- if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
-
- /* Starting from 5.0.7, we weaken also the table locks
- set at the start of a MySQL stored procedure call, just like
- we weaken the locks set at the start of an SQL statement.
- MySQL does set in_lock_tables TRUE there, but in reality
- we do not need table locks to make the execution of a
- single transaction stored procedure call deterministic
- (if it does not use a consistent read). */
-
- if (lock_type == TL_READ
- && sql_command == SQLCOM_LOCK_TABLES) {
- /* We come here if MySQL is processing LOCK TABLES
- ... READ LOCAL. MyISAM under that table lock type
- reads the table as it was at the time the lock was
- granted (new inserts are allowed, but not seen by the
- reader). To get a similar effect on an InnoDB table,
- we must use LOCK TABLES ... READ. We convert the lock
- type here, so that for InnoDB, READ LOCAL is
- equivalent to READ. This will change the InnoDB
- behavior in mysqldump, so that dumps of InnoDB tables
- are consistent with dumps of MyISAM tables. */
-
- lock_type = TL_READ_NO_INSERT;
- }
-
- /* If we are not doing a LOCK TABLE, DISCARD/IMPORT
- TABLESPACE or TRUNCATE TABLE then allow multiple
- writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ
- < TL_WRITE_CONCURRENT_INSERT.
-
- We especially allow multiple writers if MySQL is at the
- start of a stored procedure call (SQLCOM_CALL) or a
- stored function call (MySQL does have in_lock_tables
- TRUE there). */
-
- if ((lock_type >= TL_WRITE_CONCURRENT_INSERT
- && lock_type <= TL_WRITE)
- && !(in_lock_tables
- && sql_command == SQLCOM_LOCK_TABLES)
- && !thd_tablespace_op(thd)
- && sql_command != SQLCOM_TRUNCATE
- && sql_command != SQLCOM_OPTIMIZE
- && sql_command != SQLCOM_CREATE_TABLE) {
-
- lock_type = TL_WRITE_ALLOW_WRITE;
- }
-
- /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
- MySQL would use the lock TL_READ_NO_INSERT on t2, and that
- would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
- to t2. Convert the lock to a normal read lock to allow
- concurrent inserts to t2.
-
- We especially allow concurrent inserts if MySQL is at the
- start of a stored procedure call (SQLCOM_CALL)
- (MySQL does have thd_in_lock_tables() TRUE there). */
-
- if (lock_type == TL_READ_NO_INSERT
- && sql_command != SQLCOM_LOCK_TABLES) {
-
- lock_type = TL_READ;
- }
-
- lock.type = lock_type;
- }
-
- *to++= &lock;
-
- return(to);
-}
-
-/*********************************************************************//**
-Read the next autoinc value. Acquire the relevant locks before reading
-the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
-on return and all relevant locks acquired.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-ha_innobase::innobase_get_autoinc(
-/*==============================*/
- ulonglong* value) /*!< out: autoinc value */
-{
- *value = 0;
-
- prebuilt->autoinc_error = innobase_lock_autoinc();
-
- if (prebuilt->autoinc_error == DB_SUCCESS) {
-
- /* Determine the first value of the interval */
- *value = dict_table_autoinc_read(prebuilt->table);
-
- /* It should have been initialized during open. */
- ut_a(*value != 0);
- }
-
- return(prebuilt->autoinc_error);
-}
-
-/*******************************************************************//**
-This function reads the global auto-inc counter. It doesn't use the
-AUTOINC lock even if the lock mode is set to TRADITIONAL.
-@return the autoinc value */
-UNIV_INTERN
-ulonglong
-ha_innobase::innobase_peek_autoinc(void)
-/*====================================*/
-{
- ulonglong auto_inc;
- dict_table_t* innodb_table;
-
- ut_a(prebuilt != NULL);
- ut_a(prebuilt->table != NULL);
-
- innodb_table = prebuilt->table;
-
- dict_table_autoinc_lock(innodb_table);
-
- auto_inc = dict_table_autoinc_read(innodb_table);
-
- ut_a(auto_inc > 0);
-
- dict_table_autoinc_unlock(innodb_table);
-
- return(auto_inc);
-}
-
-/*********************************************************************//**
-This function initializes the auto-inc counter if it has not been
-initialized yet. This function does not change the value of the auto-inc
-counter if it already has been initialized. Returns the value of the
-auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as
-we have a table-level lock). offset, increment, nb_desired_values are ignored.
-*first_value is set to -1 if error (deadlock or lock wait timeout) */
-UNIV_INTERN
-void
-ha_innobase::get_auto_increment(
-/*============================*/
- ulonglong offset, /*!< in: table autoinc offset */
- ulonglong increment, /*!< in: table autoinc increment */
- ulonglong nb_desired_values, /*!< in: number of values reqd */
- ulonglong *first_value, /*!< out: the autoinc value */
- ulonglong *nb_reserved_values) /*!< out: count of reserved values */
-{
- trx_t* trx;
- ulint error;
- ulonglong autoinc = 0;
-
- /* Prepare prebuilt->trx in the table handle */
- update_thd(ha_thd());
-
- error = innobase_get_autoinc(&autoinc);
-
- if (error != DB_SUCCESS) {
- *first_value = (~(ulonglong) 0);
- return;
- }
-
- /* This is a hack, since nb_desired_values seems to be accurate only
- for the first call to get_auto_increment() for multi-row INSERT and
- meaningless for other statements e.g, LOAD etc. Subsequent calls to
- this method for the same statement results in different values which
- don't make sense. Therefore we store the value the first time we are
- called and count down from that as rows are written (see write_row()).
- */
-
- trx = prebuilt->trx;
-
- /* Note: We can't rely on *first_value since some MySQL engines,
- in particular the partition engine, don't initialize it to 0 when
- invoking this method. So we are not sure if it's guaranteed to
- be 0 or not. */
-
- /* Called for the first time ? */
- if (trx->n_autoinc_rows == 0) {
-
- trx->n_autoinc_rows = (ulint) nb_desired_values;
-
- /* It's possible for nb_desired_values to be 0:
- e.g., INSERT INTO T1(C) SELECT C FROM T2; */
- if (nb_desired_values == 0) {
-
- trx->n_autoinc_rows = 1;
- }
-
- set_if_bigger(*first_value, autoinc);
- /* Not in the middle of a mult-row INSERT. */
- } else if (prebuilt->autoinc_last_value == 0) {
- set_if_bigger(*first_value, autoinc);
- }
-
- *nb_reserved_values = trx->n_autoinc_rows;
-
- /* With old style AUTOINC locking we only update the table's
- AUTOINC counter after attempting to insert the row. */
- if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
- ulonglong need;
- ulonglong current;
- ulonglong next_value;
- ulonglong col_max_value;
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- current = *first_value > col_max_value ? autoinc : *first_value;
- need = *nb_reserved_values * increment;
-
- /* Compute the last value in the interval */
- next_value = innobase_next_autoinc(
- current, need, offset, col_max_value);
-
- prebuilt->autoinc_last_value = next_value;
-
- if (prebuilt->autoinc_last_value < *first_value) {
- *first_value = (~(ulonglong) 0);
- } else {
- /* Update the table autoinc variable */
- dict_table_autoinc_update_if_greater(
- prebuilt->table, prebuilt->autoinc_last_value);
- }
- } else {
- /* This will force write_row() into attempting an update
- of the table's AUTOINC counter. */
- prebuilt->autoinc_last_value = 0;
- }
-
- /* The increment to be used to increase the AUTOINC value, we use
- this in write_row() and update_row() to increase the autoinc counter
- for columns that are filled by the user. We need the offset and
- the increment. */
- prebuilt->autoinc_offset = offset;
- prebuilt->autoinc_increment = increment;
-
- dict_table_autoinc_unlock(prebuilt->table);
-}
-
-/*******************************************************************//**
-Reset the auto-increment counter to the given value, i.e. the next row
-inserted will get the given value. This is called e.g. after TRUNCATE
-is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
-returned by storage engines that don't support this operation.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::reset_auto_increment(
-/*==============================*/
- ulonglong value) /*!< in: new value for table autoinc */
-{
- DBUG_ENTER("ha_innobase::reset_auto_increment");
-
- int error;
-
- update_thd(ha_thd());
-
- error = row_lock_table_autoinc_for_mysql(prebuilt);
-
- if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(error,
- prebuilt->table->flags,
- user_thd);
-
- DBUG_RETURN(error);
- }
-
- /* The next value can never be 0. */
- if (value == 0) {
- value = 1;
- }
-
- innobase_reset_autoinc(value);
-
- DBUG_RETURN(0);
-}
-
-/* See comment in handler.cc */
-UNIV_INTERN
-bool
-ha_innobase::get_error_message(int error, String *buf)
-{
- trx_t* trx = check_trx_exists(ha_thd());
-
- buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
- system_charset_info);
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
-If there is no explicitly declared non-null unique key or a primary key, then
-InnoDB internally uses the row id as the primary key.
-@return < 0 if ref1 < ref2, 0 if equal, else > 0 */
-UNIV_INTERN
-int
-ha_innobase::cmp_ref(
-/*=================*/
- const uchar* ref1, /*!< in: an (internal) primary key value in the
- MySQL key value format */
- const uchar* ref2) /*!< in: an (internal) primary key value in the
- MySQL key value format */
-{
- enum_field_types mysql_type;
- Field* field;
- KEY_PART_INFO* key_part;
- KEY_PART_INFO* key_part_end;
- uint len1;
- uint len2;
- int result;
-
- if (prebuilt->clust_index_was_generated) {
- /* The 'ref' is an InnoDB row id */
-
- return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
- }
-
- /* Do a type-aware comparison of primary key fields. PK fields
- are always NOT NULL, so no checks for NULL are performed. */
-
- key_part = table->key_info[table->s->primary_key].key_part;
-
- key_part_end = key_part
- + table->key_info[table->s->primary_key].key_parts;
-
- for (; key_part != key_part_end; ++key_part) {
- field = key_part->field;
- mysql_type = field->type();
-
- if (mysql_type == MYSQL_TYPE_TINY_BLOB
- || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
- || mysql_type == MYSQL_TYPE_BLOB
- || mysql_type == MYSQL_TYPE_LONG_BLOB) {
-
- /* In the MySQL key value format, a column prefix of
- a BLOB is preceded by a 2-byte length field */
-
- len1 = innobase_read_from_2_little_endian(ref1);
- len2 = innobase_read_from_2_little_endian(ref2);
-
- ref1 += 2;
- ref2 += 2;
- result = ((Field_blob*)field)->cmp( ref1, len1,
- ref2, len2);
- } else {
- result = field->key_cmp(ref1, ref2);
- }
-
- if (result) {
-
- return(result);
- }
-
- ref1 += key_part->store_length;
- ref2 += key_part->store_length;
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Ask InnoDB if a query to a table can be cached.
-@return TRUE if query caching of the table is permitted */
-UNIV_INTERN
-my_bool
-ha_innobase::register_query_cache_table(
-/*====================================*/
- THD* thd, /*!< in: user thread handle */
- char* table_key, /*!< in: concatenation of database name,
- the null character NUL,
- and the table name */
- uint key_length, /*!< in: length of the full name, i.e.
- len(dbname) + len(tablename) + 1 */
- qc_engine_callback*
- call_back, /*!< out: pointer to function for
- checking if query caching
- is permitted */
- ulonglong *engine_data) /*!< in/out: data to call_back */
-{
- *call_back = innobase_query_caching_of_table_permitted;
- *engine_data = 0;
- return(innobase_query_caching_of_table_permitted(thd, table_key,
- key_length,
- engine_data));
-}
-
-UNIV_INTERN
-char*
-ha_innobase::get_mysql_bin_log_name()
-{
- return(trx_sys_mysql_bin_log_name);
-}
-
-UNIV_INTERN
-ulonglong
-ha_innobase::get_mysql_bin_log_pos()
-{
- /* trx... is ib_int64_t, which is a typedef for a 64-bit integer
- (__int64 or longlong) so it's ok to cast it to ulonglong. */
-
- return(trx_sys_mysql_bin_log_pos);
-}
-
-/******************************************************************//**
-This function is used to find the storage length in bytes of the first n
-characters for prefix indexes using a multibyte character set. The function
-finds charset information and returns length of prefix_len characters in the
-index field in bytes.
-@return number of bytes occupied by the first n characters */
-extern "C" UNIV_INTERN
-ulint
-innobase_get_at_most_n_mbchars(
-/*===========================*/
- ulint charset_id, /*!< in: character set id */
- ulint prefix_len, /*!< in: prefix length in bytes of the index
- (this has to be divided by mbmaxlen to get the
- number of CHARACTERS n in the prefix) */
- ulint data_len, /*!< in: length of the string in bytes */
- const char* str) /*!< in: character string */
-{
- ulint char_length; /*!< character length in bytes */
- ulint n_chars; /*!< number of characters in prefix */
- CHARSET_INFO* charset; /*!< charset used in the field */
-
- charset = get_charset((uint) charset_id, MYF(MY_WME));
-
- ut_ad(charset);
- ut_ad(charset->mbmaxlen);
-
- /* Calculate how many characters at most the prefix index contains */
-
- n_chars = prefix_len / charset->mbmaxlen;
-
- /* If the charset is multi-byte, then we must find the length of the
- first at most n chars in the string. If the string contains less
- characters than n, then we return the length to the end of the last
- character. */
-
- if (charset->mbmaxlen > 1) {
- /* my_charpos() returns the byte length of the first n_chars
- characters, or a value bigger than the length of str, if
- there were not enough full characters in str.
-
- Why does the code below work:
- Suppose that we are looking for n UTF-8 characters.
-
- 1) If the string is long enough, then the prefix contains at
- least n complete UTF-8 characters + maybe some extra
- characters + an incomplete UTF-8 character. No problem in
- this case. The function returns the pointer to the
- end of the nth character.
-
- 2) If the string is not long enough, then the string contains
- the complete value of a column, that is, only complete UTF-8
- characters, and we can store in the column prefix index the
- whole string. */
-
- char_length = my_charpos(charset, str,
- str + data_len, (int) n_chars);
- if (char_length > data_len) {
- char_length = data_len;
- }
- } else {
- if (data_len < prefix_len) {
- char_length = data_len;
- } else {
- char_length = prefix_len;
- }
- }
-
- return(char_length);
-}
-
-/*******************************************************************//**
-This function is used to prepare an X/Open XA distributed transaction.
-@return 0 or error number */
-static
-int
-innobase_xa_prepare(
-/*================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- THD* thd, /*!< in: handle to the MySQL thread of
- the user whose XA transaction should
- be prepared */
- bool all) /*!< in: TRUE - commit transaction
- FALSE - the current SQL statement
- ended */
-{
- int error = 0;
- trx_t* trx = check_trx_exists(thd);
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- /* we use support_xa value as it was seen at transaction start
- time, not the current session variable value. Any possible changes
- to the session variable take effect only in the next transaction */
- if (!trx->support_xa) {
-
- return(0);
- }
-
- thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
-
- sql_print_error("trx->active_trans == 0, but trx->conc_state != "
- "TRX_NOT_STARTED");
- }
-
- if (all
- || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* We were instructed to prepare the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- ut_ad(trx->active_trans);
-
- error = (int) trx_prepare_for_mysql(trx);
- } else {
- /* We just mark the SQL statement ended and do not do a
- transaction prepare */
-
- /* If we had reserved the auto-inc lock for some
- table in this SQL statement we release it now */
-
- row_unlock_table_autoinc_for_mysql(trx);
-
- /* Store the current undo_no of the transaction so that we
- know where to roll back if we have to roll back the next
- SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
-
- /* Tell the InnoDB server that there might be work for utility
- threads: */
-
- srv_active_wake_master_thread();
-
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
- return(error);
-}
-
-/*******************************************************************//**
-This function is used to recover X/Open XA distributed transactions.
-@return number of prepared transactions stored in xid_list */
-static
-int
-innobase_xa_recover(
-/*================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid_list,/*!< in/out: prepared transactions */
- uint len) /*!< in: number of slots in xid_list */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (len == 0 || xid_list == NULL) {
-
- return(0);
- }
-
- return(trx_recover_for_mysql(xid_list, len));
-}
-
-/*******************************************************************//**
-This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_commit_by_xid(
-/*===================*/
- handlerton *hton,
- XID* xid) /*!< in: X/Open XA transaction identification */
-{
- trx_t* trx;
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = trx_get_trx_by_xid(xid);
-
- if (trx) {
- innobase_commit_low(trx);
-
- return(XA_OK);
- } else {
- return(XAER_NOTA);
- }
-}
-
-/*******************************************************************//**
-This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_rollback_by_xid(
-/*=====================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid) /*!< in: X/Open XA transaction
- identification */
-{
- trx_t* trx;
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = trx_get_trx_by_xid(xid);
-
- if (trx) {
- return(innobase_rollback_trx(trx));
- } else {
- return(XAER_NOTA);
- }
-}
-
-/*******************************************************************//**
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor.
-@return pointer to cursor view or NULL */
-static
-void*
-innobase_create_cursor_view(
-/*========================*/
- handlerton *hton, /*!< in: innobase hton */
- THD* thd) /*!< in: user thread handle */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- return(read_cursor_view_create_for_mysql(check_trx_exists(thd)));
-}
-
-/*******************************************************************//**
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
- handlerton *hton,
- THD* thd, /*!< in: user thread handle */
- void* curview)/*!< in: Consistent read view to be closed */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- read_cursor_view_close_for_mysql(check_trx_exists(thd),
- (cursor_view_t*) curview);
-}
-
-/*******************************************************************//**
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
- handlerton *hton,
- THD* thd, /*!< in: user thread handle */
- void* curview)/*!< in: Consistent cursor view to be set */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- read_cursor_set_for_mysql(check_trx_exists(thd),
- (cursor_view_t*) curview);
-}
-
-
-/***********************************************************************
-Check whether any of the given columns is being renamed in the table. */
-static
-bool
-column_is_being_renamed(
-/*====================*/
- /* out: true if any of col_names is
- being renamed in table */
- TABLE* table, /* in: MySQL table */
- uint n_cols, /* in: number of columns */
- const char** col_names) /* in: names of the columns */
-{
- uint j;
- uint k;
- Field* field;
- const char* col_name;
-
- for (j = 0; j < n_cols; j++) {
- col_name = col_names[j];
- for (k = 0; k < table->s->fields; k++) {
- field = table->field[k];
- if ((field->flags & FIELD_IS_RENAMED)
- && innobase_strcasecmp(field->field_name,
- col_name) == 0) {
- return(true);
- }
- }
- }
-
- return(false);
-}
-
-/***********************************************************************
-Check whether a column in table "table" is being renamed and if this column
-is part of a foreign key, either part of another table, referencing this
-table or part of this table, referencing another table. */
-static
-bool
-foreign_key_column_is_being_renamed(
-/*================================*/
- /* out: true if a column that
- participates in a foreign key definition
- is being renamed */
- row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */
- TABLE* table) /* in: MySQL table */
-{
- dict_foreign_t* foreign;
-
- /* check whether there are foreign keys at all */
- if (UT_LIST_GET_LEN(prebuilt->table->foreign_list) == 0
- && UT_LIST_GET_LEN(prebuilt->table->referenced_list) == 0) {
- /* no foreign keys involved with prebuilt->table */
-
- return(false);
- }
-
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- /* Check whether any column in the foreign key constraints which refer
- to this table is being renamed. */
- for (foreign = UT_LIST_GET_FIRST(prebuilt->table->referenced_list);
- foreign != NULL;
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
-
- if (column_is_being_renamed(table, foreign->n_fields,
- foreign->referenced_col_names)) {
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- return(true);
- }
- }
-
- /* Check whether any column in the foreign key constraints in the
- table is being renamed. */
- for (foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
- foreign != NULL;
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
-
- if (column_is_being_renamed(table, foreign->n_fields,
- foreign->foreign_col_names)) {
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- return(true);
- }
- }
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
-
- return(false);
-}
-
-UNIV_INTERN
-bool
-ha_innobase::check_if_incompatible_data(
- HA_CREATE_INFO* info,
- uint table_changes)
-{
- if (table_changes != IS_EQUAL_YES) {
-
- return(COMPATIBLE_DATA_NO);
- }
-
- /* Check that auto_increment value was not changed */
- if ((info->used_fields & HA_CREATE_USED_AUTO) &&
- info->auto_increment_value != 0) {
-
- return(COMPATIBLE_DATA_NO);
- }
-
- /* Check if a column participating in a foreign key is being renamed.
- There is no mechanism for updating InnoDB foreign key definitions. */
- if (foreign_key_column_is_being_renamed(prebuilt, table)) {
-
- return COMPATIBLE_DATA_NO;
- }
-
- /* Check that row format didn't change */
- if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
- && info->row_type != ROW_TYPE_DEFAULT
- && info->row_type != get_row_type()) {
-
- return(COMPATIBLE_DATA_NO);
- }
-
- /* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */
- if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) {
- return(COMPATIBLE_DATA_NO);
- }
-
- return(COMPATIBLE_DATA_YES);
-}
-
-/************************************************************//**
-Validate the file format name and return its corresponding id.
-@return valid file format id */
-static
-uint
-innobase_file_format_name_lookup(
-/*=============================*/
- const char* format_name) /*!< in: pointer to file format name */
-{
- char* endp;
- uint format_id;
-
- ut_a(format_name != NULL);
-
- /* The format name can contain the format id itself instead of
- the name and we check for that. */
- format_id = (uint) strtoul(format_name, &endp, 10);
-
- /* Check for valid parse. */
- if (*endp == '\0' && *format_name != '\0') {
-
- if (format_id <= DICT_TF_FORMAT_MAX) {
-
- return(format_id);
- }
- } else {
-
- for (format_id = 0; format_id <= DICT_TF_FORMAT_MAX;
- format_id++) {
- const char* name;
-
- name = trx_sys_file_format_id_to_name(format_id);
-
- if (!innobase_strcasecmp(format_name, name)) {
-
- return(format_id);
- }
- }
- }
-
- return(DICT_TF_FORMAT_MAX + 1);
-}
-
-/************************************************************//**
-Validate the file format check value, is it one of "on" or "off",
-as a side effect it sets the srv_check_file_format_at_startup variable.
-@return true if config value one of "on" or "off" */
-static
-bool
-innobase_file_format_check_on_off(
-/*==============================*/
- const char* format_check) /*!< in: parameter value */
-{
- bool ret = true;
-
- if (!innobase_strcasecmp(format_check, "off")) {
-
- /* Set the value to disable checking. */
- srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX + 1;
-
- } else if (!innobase_strcasecmp(format_check, "on")) {
-
- /* Set the value to the lowest supported format. */
- srv_check_file_format_at_startup = DICT_TF_FORMAT_51;
- } else {
- ret = FALSE;
- }
-
- return(ret);
-}
-
-/************************************************************//**
-Validate the file format check config parameters, as a side effect it
-sets the srv_check_file_format_at_startup variable.
-@return the format_id if valid config value, otherwise, return -1 */
-static
-int
-innobase_file_format_validate_and_set(
-/*================================*/
- const char* format_check) /*!< in: parameter value */
-{
- uint format_id;
-
- format_id = innobase_file_format_name_lookup(format_check);
-
- if (format_id < DICT_TF_FORMAT_MAX + 1) {
- srv_check_file_format_at_startup = format_id;
-
- return((int) format_id);
- } else {
- return(-1);
- }
-}
-
-/*************************************************************//**
-Check if it is a valid file format. This function is registered as
-a callback with MySQL.
-@return 0 for valid file format */
-static
-int
-innodb_file_format_name_validate(
-/*=============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* file_format_input;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- file_format_input = value->val_str(value, buff, &len);
-
- if (file_format_input != NULL) {
- uint format_id;
-
- format_id = innobase_file_format_name_lookup(
- file_format_input);
-
- if (format_id <= DICT_TF_FORMAT_MAX) {
-
- /* Save a pointer to the name in the
- 'file_format_name_map' constant array. */
- *static_cast<const char**>(save) =
- trx_sys_file_format_id_to_name(format_id);
-
- return(0);
- }
- }
-
- *static_cast<const char**>(save) = NULL;
- return(1);
-}
-
-/****************************************************************//**
-Update the system variable innodb_file_format using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_file_format_name_update(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr, /*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- const char* format_name;
-
- ut_a(var_ptr != NULL);
- ut_a(save != NULL);
-
- format_name = *static_cast<const char*const*>(save);
-
- if (format_name) {
- uint format_id;
-
- format_id = innobase_file_format_name_lookup(format_name);
-
- if (format_id <= DICT_TF_FORMAT_MAX) {
- srv_file_format = format_id;
- }
- }
-
- *static_cast<const char**>(var_ptr)
- = trx_sys_file_format_id_to_name(srv_file_format);
-}
-
-/*************************************************************//**
-Check if valid argument to innodb_file_format_check. This
-function is registered as a callback with MySQL.
-@return 0 for valid file format */
-static
-int
-innodb_file_format_check_validate(
-/*==============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* file_format_input;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
- int format_id;
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- file_format_input = value->val_str(value, buff, &len);
-
- if (file_format_input != NULL) {
-
- /* Check if user set on/off, we want to print a suitable
- message if they did so. */
-
- if (innobase_file_format_check_on_off(file_format_input)) {
- push_warning_printf(thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: invalid innodb_file_format_check "
- "value; on/off can only be set at startup or "
- "in the configuration file");
- } else {
- format_id = innobase_file_format_validate_and_set(
- file_format_input);
-
- if (format_id >= 0) {
- /* Save a pointer to the name in the
- 'file_format_name_map' constant array. */
- *static_cast<const char**>(save) =
- trx_sys_file_format_id_to_name(
- (uint)format_id);
-
- return(0);
-
- } else {
- push_warning_printf(thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: invalid innodb_file_format_check "
- "value; can be any format up to %s "
- "or its equivalent numeric id",
- trx_sys_file_format_id_to_name(
- DICT_TF_FORMAT_MAX));
- }
- }
- }
-
- *static_cast<const char**>(save) = NULL;
- return(1);
-}
-
-/****************************************************************//**
-Update the system variable innodb_file_format_check using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_file_format_check_update(
-/*============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr, /*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- const char* format_name_in;
- const char** format_name_out;
- uint format_id;
-
- ut_a(save != NULL);
- ut_a(var_ptr != NULL);
-
- format_name_in = *static_cast<const char*const*>(save);
-
- if (!format_name_in) {
-
- return;
- }
-
- format_id = innobase_file_format_name_lookup(format_name_in);
-
- if (format_id > DICT_TF_FORMAT_MAX) {
- /* DEFAULT is "on", which is invalid at runtime. */
- push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Ignoring SET innodb_file_format=%s",
- format_name_in);
- return;
- }
-
- format_name_out = static_cast<const char**>(var_ptr);
-
- /* Update the max format id in the system tablespace. */
- if (trx_sys_file_format_max_set(format_id, format_name_out)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " [Info] InnoDB: the file format in the system "
- "tablespace is now set to %s.\n", *format_name_out);
- }
-}
-
-/****************************************************************//**
-Update the system variable innodb_adaptive_hash_index using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_adaptive_hash_index_update(
-/*==============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr, /*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- if (*(my_bool*) save) {
- btr_search_enable();
- } else {
- btr_search_disable();
- }
-}
-
-/****************************************************************//**
-Update the system variable innodb_old_blocks_pct using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_old_blocks_pct_update(
-/*=========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- innobase_old_blocks_pct = buf_LRU_old_ratio_update(
- *static_cast<const uint*>(save), TRUE);
-}
-
-/*************************************************************//**
-Check if it is a valid value of innodb_change_buffering. This function is
-registered as a callback with MySQL.
-@return 0 for valid innodb_change_buffering */
-static
-int
-innodb_change_buffering_validate(
-/*=============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* change_buffering_input;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- change_buffering_input = value->val_str(value, buff, &len);
-
- if (change_buffering_input != NULL) {
- ulint use;
-
- for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values);
- use++) {
- if (!innobase_strcasecmp(
- change_buffering_input,
- innobase_change_buffering_values[use])) {
- *(ibuf_use_t*) save = (ibuf_use_t) use;
- return(0);
- }
- }
- }
-
- return(1);
-}
-
-/****************************************************************//**
-Update the system variable innodb_change_buffering using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_change_buffering_update(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr, /*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- ut_a(var_ptr != NULL);
- ut_a(save != NULL);
- ut_a((*(ibuf_use_t*) save) < IBUF_USE_COUNT);
-
- ibuf_use = *(const ibuf_use_t*) save;
-
- *(const char**) var_ptr = innobase_change_buffering_values[ibuf_use];
-}
-
-static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
-{
- innodb_export_status();
- var->type= SHOW_ARRAY;
- var->value= (char *) &innodb_status_variables;
- return 0;
-}
-
-/***********************************************************************
-This function checks each index name for a table against reserved
-system default primary index name 'GEN_CLUST_INDEX'. If a name matches,
-this function pushes an warning message to the client, and returns true. */
-extern "C" UNIV_INTERN
-bool
-innobase_index_name_is_reserved(
-/*============================*/
- /* out: true if an index name
- matches the reserved name */
- const trx_t* trx, /* in: InnoDB transaction handle */
- const KEY* key_info, /* in: Indexes to be created */
- ulint num_of_keys) /* in: Number of indexes to
- be created. */
-{
- const KEY* key;
- uint key_num; /* index number */
-
- for (key_num = 0; key_num < num_of_keys; key_num++) {
- key = &key_info[key_num];
-
- if (innobase_strcasecmp(key->name,
- innobase_index_reserve_name) == 0) {
- /* Push warning to mysql */
- push_warning_printf((THD*) trx->mysql_thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_WRONG_NAME_FOR_INDEX,
- "Cannot Create Index with name "
- "'%s'. The name is reserved "
- "for the system default primary "
- "index.",
- innobase_index_reserve_name);
-
- my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
- innobase_index_reserve_name);
-
- return(true);
- }
- }
-
- return(false);
-}
-
-static SHOW_VAR innodb_status_variables_export[]= {
- {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
- {NullS, NullS, SHOW_LONG}
-};
-
-static struct st_mysql_storage_engine innobase_storage_engine=
-{ MYSQL_HANDLERTON_INTERFACE_VERSION };
-
-/* plugin options */
-static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB checksums validation (enabled by default). "
- "Disable with --skip-innodb-checksums.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
- PLUGIN_VAR_READONLY,
- "The common part for InnoDB table spaces.",
- NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB doublewrite buffer (enabled by default). "
- "Disable with --skip-innodb-doublewrite.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
- PLUGIN_VAR_RQCMDARG,
- "Number of IOPs the server can do. Tunes the background IO rate",
- NULL, NULL, 200, 100, ~0L, 0);
-
-static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
- PLUGIN_VAR_OPCMDARG,
- "Speeds up the shutdown process of the InnoDB storage engine. Possible "
- "values are 0, 1 (faster)"
- /*
- NetWare can't close unclosed files, can't automatically kill remaining
- threads, etc, so on this OS we disable the crash-like InnoDB shutdown.
- */
- IF_NETWARE("", " or 2 (fastest - crash-like)")
- ".",
- NULL, NULL, 1, 0, IF_NETWARE(1,2), 0);
-
-static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
- PLUGIN_VAR_NOCMDARG,
- "Stores each InnoDB table to an .ibd file in the database dir.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
- PLUGIN_VAR_RQCMDARG,
- "File format to use for new tables in .ibd files.",
- innodb_file_format_name_validate,
- innodb_file_format_name_update, "Antelope");
-
-/* If a new file format is introduced, the file format
-name needs to be updated accordingly. Please refer to
-file_format_name_map[] defined in trx0sys.c for the next
-file format name. */
-static MYSQL_SYSVAR_STR(file_format_check, innobase_file_format_check,
- PLUGIN_VAR_OPCMDARG,
- "The highest file format in the tablespace.",
- innodb_file_format_check_validate,
- innodb_file_format_check_update, "Barracuda");
-
-static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
- PLUGIN_VAR_OPCMDARG,
- "Set to 0 (write and flush once per second),"
- " 1 (write and flush at each commit)"
- " or 2 (write at commit, flush once per second).",
- NULL, NULL, 1, 0, 2, 0);
-
-static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "With which method to flush data.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Force InnoDB to not use next-key locking, to use only row-level locking.",
- NULL, NULL, FALSE);
-
-#ifdef UNIV_LOG_ARCHIVE
-static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Where full logs should be archived.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
-#endif /* UNIV_LOG_ARCHIVE */
-
-static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Path to InnoDB log files.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
- PLUGIN_VAR_RQCMDARG,
- "Percentage of dirty pages allowed in bufferpool.",
- NULL, NULL, 75, 0, 99, 0);
-
-static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
- PLUGIN_VAR_NOCMDARG,
- "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
- PLUGIN_VAR_RQCMDARG,
- "Desired maximum length of the purge queue (0 = no limit)",
- NULL, NULL, 0, 0, ~0L, 0);
-
-static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
- "Enable SHOW INNODB STATUS output in the innodb_status.<pid> file",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
- PLUGIN_VAR_OPCMDARG,
- "Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_sample_pages,
- PLUGIN_VAR_RQCMDARG,
- "The number of index pages to sample when calculating statistics (default 8)",
- NULL, NULL, 8, 1, ~0ULL, 0);
-
-static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
- PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB adaptive hash index (enabled by default). "
- "Disable with --skip-innodb-adaptive-hash-index.",
- NULL, innodb_adaptive_hash_index_update, TRUE);
-
-static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
- PLUGIN_VAR_RQCMDARG,
- "Replication thread delay (ms) on the slave server if "
- "innodb_thread_concurrency is reached (0 by default)",
- NULL, NULL, 0, 0, ~0UL, 0);
-
-static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
- NULL, NULL, 8*1024*1024L, 512*1024L, LONG_MAX, 1024);
-
-static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
- PLUGIN_VAR_RQCMDARG,
- "Data file autoextend increment in megabytes",
- NULL, NULL, 8L, 1L, 1000L, 0);
-
-static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
- NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
-
-static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
- PLUGIN_VAR_RQCMDARG,
- "Helps in performance tuning in heavily concurrent environments.",
- innobase_commit_concurrency_validate, NULL, 0, 0, 1000, 0);
-
-static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
- PLUGIN_VAR_RQCMDARG,
- "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
- NULL, NULL, 500L, 1L, ~0L, 0);
-
-static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
- "Number of file I/O threads in InnoDB.",
- NULL, NULL, 4, 4, 64, 0);
-
-static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of background read I/O threads in InnoDB.",
- NULL, NULL, 4, 1, 64, 0);
-
-static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of background write I/O threads in InnoDB.",
- NULL, NULL, 4, 1, 64, 0);
-
-static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Helps to save your data in case the disk image of the database becomes corrupt.",
- NULL, NULL, 0, 0, 6, 0);
-
-static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The size of the buffer which InnoDB uses to write log to the log files on disk.",
- NULL, NULL, 8*1024*1024L, 256*1024L, LONG_MAX, 1024);
-
-static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Size of each log file in a log group.",
- NULL, NULL, 5*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
-
-static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.",
- NULL, NULL, 2, 2, 100, 0);
-
-static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
- NULL, NULL, 1, 1, 10, 0);
-
-static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
- PLUGIN_VAR_RQCMDARG,
- "Percentage of the buffer pool to reserve for 'old' blocks.",
- NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
-
-static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
- PLUGIN_VAR_RQCMDARG,
- "Move blocks to the 'new' end of the buffer pool if the first access"
- " was at least this many milliseconds ago."
- " The timeout is disabled if 0 (the default).",
- NULL, NULL, 0, 0, UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "How many files at the maximum InnoDB keeps open at the same time.",
- NULL, NULL, 300L, 10L, LONG_MAX, 0);
-
-static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
- PLUGIN_VAR_RQCMDARG,
- "Count of spin-loop rounds in InnoDB mutexes (30 by default)",
- NULL, NULL, 30L, 0L, ~0L, 0);
-
-static MYSQL_SYSVAR_ULONG(spin_wait_delay, srv_spin_wait_delay,
- PLUGIN_VAR_OPCMDARG,
- "Maximum delay between polling for a spin lock (6 by default)",
- NULL, NULL, 6L, 0L, ~0L, 0);
-
-static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
- PLUGIN_VAR_RQCMDARG,
- "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
- NULL, NULL, 0, 0, 1000, 0);
-
-static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
- PLUGIN_VAR_RQCMDARG,
- "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep",
- NULL, NULL, 10000L, 0L, ~0L, 0);
-
-static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Path to individual files and their sizes.",
- NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The AUTOINC lock modes supported by InnoDB: "
- "0 => Old style AUTOINC locking (for backward"
- " compatibility) "
- "1 => New style AUTOINC locking "
- "2 => No AUTOINC locking (unsafe for SBR)",
- NULL, NULL,
- AUTOINC_NEW_STYLE_LOCKING, /* Default setting */
- AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
- AUTOINC_NO_LOCKING, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_STR(version, innodb_version_str,
- PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
- "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
-
-static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Use OS memory allocator instead of InnoDB's internal memory allocator",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
- PLUGIN_VAR_RQCMDARG,
- "Buffer changes to reduce random access: "
- "OFF, ON, inserting, deleting, changing, or purging.",
- innodb_change_buffering_validate,
- innodb_change_buffering_update, NULL);
-
-static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
- PLUGIN_VAR_RQCMDARG,
- "Number of pages that must be accessed sequentially for InnoDB to"
- "trigger a readahead.",
- NULL, NULL, 56, 0, 64, 0);
-
-static struct st_mysql_sys_var* innobase_system_variables[]= {
- MYSQL_SYSVAR(additional_mem_pool_size),
- MYSQL_SYSVAR(autoextend_increment),
- MYSQL_SYSVAR(buffer_pool_size),
- MYSQL_SYSVAR(checksums),
- MYSQL_SYSVAR(commit_concurrency),
- MYSQL_SYSVAR(concurrency_tickets),
- MYSQL_SYSVAR(data_file_path),
- MYSQL_SYSVAR(data_home_dir),
- MYSQL_SYSVAR(doublewrite),
- MYSQL_SYSVAR(fast_shutdown),
- MYSQL_SYSVAR(file_io_threads),
- MYSQL_SYSVAR(read_io_threads),
- MYSQL_SYSVAR(write_io_threads),
- MYSQL_SYSVAR(file_per_table),
- MYSQL_SYSVAR(file_format),
- MYSQL_SYSVAR(file_format_check),
- MYSQL_SYSVAR(flush_log_at_trx_commit),
- MYSQL_SYSVAR(flush_method),
- MYSQL_SYSVAR(force_recovery),
- MYSQL_SYSVAR(locks_unsafe_for_binlog),
- MYSQL_SYSVAR(lock_wait_timeout),
-#ifdef UNIV_LOG_ARCHIVE
- MYSQL_SYSVAR(log_arch_dir),
- MYSQL_SYSVAR(log_archive),
-#endif /* UNIV_LOG_ARCHIVE */
- MYSQL_SYSVAR(log_buffer_size),
- MYSQL_SYSVAR(log_file_size),
- MYSQL_SYSVAR(log_files_in_group),
- MYSQL_SYSVAR(log_group_home_dir),
- MYSQL_SYSVAR(max_dirty_pages_pct),
- MYSQL_SYSVAR(adaptive_flushing),
- MYSQL_SYSVAR(max_purge_lag),
- MYSQL_SYSVAR(mirrored_log_groups),
- MYSQL_SYSVAR(old_blocks_pct),
- MYSQL_SYSVAR(old_blocks_time),
- MYSQL_SYSVAR(open_files),
- MYSQL_SYSVAR(rollback_on_timeout),
- MYSQL_SYSVAR(stats_on_metadata),
- MYSQL_SYSVAR(stats_sample_pages),
- MYSQL_SYSVAR(adaptive_hash_index),
- MYSQL_SYSVAR(replication_delay),
- MYSQL_SYSVAR(status_file),
- MYSQL_SYSVAR(strict_mode),
- MYSQL_SYSVAR(support_xa),
- MYSQL_SYSVAR(sync_spin_loops),
- MYSQL_SYSVAR(spin_wait_delay),
- MYSQL_SYSVAR(table_locks),
- MYSQL_SYSVAR(thread_concurrency),
- MYSQL_SYSVAR(thread_sleep_delay),
- MYSQL_SYSVAR(autoinc_lock_mode),
- MYSQL_SYSVAR(version),
- MYSQL_SYSVAR(use_sys_malloc),
- MYSQL_SYSVAR(change_buffering),
- MYSQL_SYSVAR(read_ahead_threshold),
- MYSQL_SYSVAR(io_capacity),
- NULL
-};
-
-mysql_declare_plugin(innodb_plugin)
-{
- MYSQL_STORAGE_ENGINE_PLUGIN,
- &innobase_storage_engine,
- innobase_hton_name,
- "Innobase Oy",
- "Supports transactions, row-level locking, and foreign keys",
- PLUGIN_LICENSE_GPL,
- innobase_init, /* Plugin Init */
- NULL, /* Plugin Deinit */
- INNODB_VERSION_SHORT,
- innodb_status_variables_export,/* status variables */
- innobase_system_variables, /* system variables */
- NULL /* reserved */
-},
-i_s_innodb_trx,
-i_s_innodb_locks,
-i_s_innodb_lock_waits,
-i_s_innodb_cmp,
-i_s_innodb_cmp_reset,
-i_s_innodb_cmpmem,
-i_s_innodb_cmpmem_reset
-mysql_declare_plugin_end;
-
-/** @brief Initialize the default value of innodb_commit_concurrency.
-
-Once InnoDB is running, the innodb_commit_concurrency must not change
-from zero to nonzero. (Bug #42101)
-
-The initial default value is 0, and without this extra initialization,
-SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
-to 0, even if it was initially set to nonzero at the command line
-or configuration file. */
-static
-void
-innobase_commit_concurrency_init_default(void)
-/*==========================================*/
-{
- MYSQL_SYSVAR_NAME(commit_concurrency).def_val
- = innobase_commit_concurrency;
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-typedef struct innobase_convert_name_test_struct {
- char* buf;
- ulint buflen;
- const char* id;
- ulint idlen;
- void* thd;
- ibool file_id;
-
- const char* expected;
-} innobase_convert_name_test_t;
-
-void
-test_innobase_convert_name()
-{
- char buf[1024];
- ulint i;
-
- innobase_convert_name_test_t test_input[] = {
- {buf, sizeof(buf), "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 7, "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 6, "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 5, "abcd", 4, NULL, TRUE, "\"abc\""},
- {buf, 4, "abcd", 4, NULL, TRUE, "\"ab\""},
-
- {buf, sizeof(buf), "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 9, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 8, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 7, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 6, "ab@0060cd", 9, NULL, TRUE, "\"ab`c\""},
- {buf, 5, "ab@0060cd", 9, NULL, TRUE, "\"ab`\""},
- {buf, 4, "ab@0060cd", 9, NULL, TRUE, "\"ab\""},
-
- {buf, sizeof(buf), "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"cd\""},
- {buf, 17, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"cd\""},
- {buf, 16, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"c\""},
- {buf, 15, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"\""},
- {buf, 14, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\""},
- {buf, 13, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\""},
- {buf, 12, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#a\""},
- {buf, 11, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#\""},
- {buf, 10, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50\""},
-
- {buf, sizeof(buf), "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
- {buf, 9, "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
- {buf, 8, "ab/cd", 5, NULL, TRUE, "\"ab\".\"c\""},
- {buf, 7, "ab/cd", 5, NULL, TRUE, "\"ab\".\"\""},
- {buf, 6, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
- {buf, 5, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
- {buf, 4, "ab/cd", 5, NULL, TRUE, "\"ab\""},
- {buf, 3, "ab/cd", 5, NULL, TRUE, "\"a\""},
- {buf, 2, "ab/cd", 5, NULL, TRUE, "\"\""},
- /* XXX probably "" is a better result in this case
- {buf, 1, "ab/cd", 5, NULL, TRUE, "."},
- */
- {buf, 0, "ab/cd", 5, NULL, TRUE, ""},
- };
-
- for (i = 0; i < sizeof(test_input) / sizeof(test_input[0]); i++) {
-
- char* end;
- ibool ok = TRUE;
- size_t res_len;
-
- fprintf(stderr, "TESTING %lu, %s, %lu, %s\n",
- test_input[i].buflen,
- test_input[i].id,
- test_input[i].idlen,
- test_input[i].expected);
-
- end = innobase_convert_name(
- test_input[i].buf,
- test_input[i].buflen,
- test_input[i].id,
- test_input[i].idlen,
- test_input[i].thd,
- test_input[i].file_id);
-
- res_len = (size_t) (end - test_input[i].buf);
-
- if (res_len != strlen(test_input[i].expected)) {
-
- fprintf(stderr, "unexpected len of the result: %u, "
- "expected: %u\n", (unsigned) res_len,
- (unsigned) strlen(test_input[i].expected));
- ok = FALSE;
- }
-
- if (memcmp(test_input[i].buf,
- test_input[i].expected,
- strlen(test_input[i].expected)) != 0
- || !ok) {
-
- fprintf(stderr, "unexpected result: %.*s, "
- "expected: %s\n", (int) res_len,
- test_input[i].buf,
- test_input[i].expected);
- ok = FALSE;
- }
-
- if (ok) {
- fprintf(stderr, "OK: res: %.*s\n\n", (int) res_len,
- buf);
- } else {
- fprintf(stderr, "FAILED\n\n");
- return;
- }
- }
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innodb_plugin/handler/ha_innodb.h b/storage/innodb_plugin/handler/ha_innodb.h
deleted file mode 100644
index 31e88ed8530..00000000000
--- a/storage/innodb_plugin/handler/ha_innodb.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*
- This file is based on ha_berkeley.h of MySQL distribution
-
- This file defines the Innodb handler: the interface between MySQL and
- Innodb
-*/
-
-#ifdef USE_PRAGMA_INTERFACE
-#pragma interface /* gcc class implementation */
-#endif
-
-/** InnoDB table share */
-typedef struct st_innobase_share {
- THR_LOCK lock; /*!< MySQL lock protecting
- this structure */
- const char* table_name; /*!< InnoDB table name */
- uint use_count; /*!< reference count,
- incremented in get_share()
- and decremented in free_share() */
- void* table_name_hash;/*!< hash table chain node */
-} INNOBASE_SHARE;
-
-
-/** InnoDB B-tree index */
-struct dict_index_struct;
-/** Prebuilt structures in an Innobase table handle used within MySQL */
-struct row_prebuilt_struct;
-
-/** InnoDB B-tree index */
-typedef struct dict_index_struct dict_index_t;
-/** Prebuilt structures in an Innobase table handle used within MySQL */
-typedef struct row_prebuilt_struct row_prebuilt_t;
-
-/** The class defining a handle to an Innodb table */
-class ha_innobase: public handler
-{
- row_prebuilt_t* prebuilt; /*!< prebuilt struct in InnoDB, used
- to save CPU time with prebuilt data
- structures*/
- THD* user_thd; /*!< the thread handle of the user
- currently using the handle; this is
- set in external_lock function */
- THR_LOCK_DATA lock;
- INNOBASE_SHARE* share; /*!< information for MySQL
- table locking */
-
- uchar* upd_buff; /*!< buffer used in updates */
- uchar* key_val_buff; /*!< buffer used in converting
- search key values from MySQL format
- to Innodb format */
- ulong upd_and_key_val_buff_len;
- /* the length of each of the previous
- two buffers */
- Table_flags int_table_flags;
- uint primary_key;
- ulong start_of_scan; /*!< this is set to 1 when we are
- starting a table scan but have not
- yet fetched any row, else 0 */
- uint last_match_mode;/* match mode of the latest search:
- ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
- or undefined */
- uint num_write_row; /*!< number of write_row() calls */
-
- uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
- const uchar* record);
- inline void update_thd(THD* thd);
- void update_thd();
- int change_active_index(uint keynr);
- int general_fetch(uchar* buf, uint direction, uint match_mode);
- ulint innobase_lock_autoinc();
- ulonglong innobase_peek_autoinc();
- ulint innobase_set_max_autoinc(ulonglong auto_inc);
- ulint innobase_reset_autoinc(ulonglong auto_inc);
- ulint innobase_get_autoinc(ulonglong* value);
- ulint innobase_update_autoinc(ulonglong auto_inc);
- ulint innobase_initialize_autoinc();
- dict_index_t* innobase_get_index(uint keynr);
- ulonglong innobase_get_int_col_max_value(const Field* field);
-
- /* Init values for the class: */
- public:
- ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
- ~ha_innobase();
- /*
- Get the row type from the storage engine. If this method returns
- ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
- */
- enum row_type get_row_type() const;
-
- const char* table_type() const;
- const char* index_type(uint key_number);
- const char** bas_ext() const;
- Table_flags table_flags() const;
- ulong index_flags(uint idx, uint part, bool all_parts) const;
- uint max_supported_keys() const;
- uint max_supported_key_length() const;
- uint max_supported_key_part_length() const;
- const key_map* keys_to_use_for_scanning();
-
- int open(const char *name, int mode, uint test_if_locked);
- int close(void);
- double scan_time();
- double read_time(uint index, uint ranges, ha_rows rows);
-
- int write_row(uchar * buf);
- int update_row(const uchar * old_data, uchar * new_data);
- int delete_row(const uchar * buf);
- bool was_semi_consistent_read();
- void try_semi_consistent_read(bool yes);
- void unlock_row();
-
- int index_init(uint index, bool sorted);
- int index_end();
- int index_read(uchar * buf, const uchar * key,
- uint key_len, enum ha_rkey_function find_flag);
- int index_read_idx(uchar * buf, uint index, const uchar * key,
- uint key_len, enum ha_rkey_function find_flag);
- int index_read_last(uchar * buf, const uchar * key, uint key_len);
- int index_next(uchar * buf);
- int index_next_same(uchar * buf, const uchar *key, uint keylen);
- int index_prev(uchar * buf);
- int index_first(uchar * buf);
- int index_last(uchar * buf);
-
- int rnd_init(bool scan);
- int rnd_end();
- int rnd_next(uchar *buf);
- int rnd_pos(uchar * buf, uchar *pos);
-
- void position(const uchar *record);
- int info(uint);
- int analyze(THD* thd,HA_CHECK_OPT* check_opt);
- int optimize(THD* thd,HA_CHECK_OPT* check_opt);
- int discard_or_import_tablespace(my_bool discard);
- int extra(enum ha_extra_function operation);
- int reset();
- int external_lock(THD *thd, int lock_type);
- int transactional_table_lock(THD *thd, int lock_type);
- int start_stmt(THD *thd, thr_lock_type lock_type);
- void position(uchar *record);
- ha_rows records_in_range(uint inx, key_range *min_key, key_range
- *max_key);
- ha_rows estimate_rows_upper_bound();
-
- void update_create_info(HA_CREATE_INFO* create_info);
- int create(const char *name, register TABLE *form,
- HA_CREATE_INFO *create_info);
- int delete_all_rows();
- int delete_table(const char *name);
- int rename_table(const char* from, const char* to);
- int check(THD* thd, HA_CHECK_OPT* check_opt);
- char* update_table_comment(const char* comment);
- char* get_foreign_key_create_info();
- int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
- bool can_switch_engines();
- uint referenced_by_foreign_key();
- void free_foreign_key_create_info(char* str);
- THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
- enum thr_lock_type lock_type);
- void init_table_handle_for_HANDLER();
- virtual void get_auto_increment(ulonglong offset, ulonglong increment,
- ulonglong nb_desired_values,
- ulonglong *first_value,
- ulonglong *nb_reserved_values);
- int reset_auto_increment(ulonglong value);
-
- virtual bool get_error_message(int error, String *buf);
-
- uint8 table_cache_type();
- /*
- ask handler about permission to cache table during query registration
- */
- my_bool register_query_cache_table(THD *thd, char *table_key,
- uint key_length,
- qc_engine_callback *call_back,
- ulonglong *engine_data);
- static char *get_mysql_bin_log_name();
- static ulonglong get_mysql_bin_log_pos();
- bool primary_key_is_clustered();
- int cmp_ref(const uchar *ref1, const uchar *ref2);
- /** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
- int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
- int prepare_drop_index(TABLE *table_arg, uint *key_num,
- uint num_of_keys);
- int final_drop_index(TABLE *table_arg);
- /** @} */
- bool check_if_incompatible_data(HA_CREATE_INFO *info,
- uint table_changes);
-};
-
-/* Some accessor functions which the InnoDB plugin needs, but which
-can not be added to mysql/plugin.h as part of the public interface;
-the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
-
-#ifndef INNODB_COMPATIBILITY_HOOKS
-#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
-#endif
-
-extern "C" {
-struct charset_info_st *thd_charset(MYSQL_THD thd);
-char **thd_query(MYSQL_THD thd);
-
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
-/**
- Check if a user thread is a replication slave thread
- @param thd user thread
- @retval 0 the user thread is not a replication slave thread
- @retval 1 the user thread is a replication slave thread
-*/
-int thd_slave_thread(const MYSQL_THD thd);
-
-/**
- Check if a user thread is running a non-transactional update
- @param thd user thread
- @retval 0 the user thread is not running a non-transactional update
- @retval 1 the user thread is running a non-transactional update
-*/
-int thd_non_transactional_update(const MYSQL_THD thd);
-
-/**
- Get the user thread's binary logging format
- @param thd user thread
- @return Value to be used as index into the binlog_format_names array
-*/
-int thd_binlog_format(const MYSQL_THD thd);
-
-/**
- Mark transaction to rollback and mark error as fatal to a sub-statement.
- @param thd Thread handle
- @param all TRUE <=> rollback main transaction.
-*/
-void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
-
-#if MYSQL_VERSION_ID > 50140
-/**
- Check if binary logging is filtered for thread's current db.
- @param thd Thread handle
- @retval 1 the query is not filtered, 0 otherwise.
-*/
-bool thd_binlog_filter_ok(const MYSQL_THD thd);
-#endif /* MYSQL_VERSION_ID > 50140 */
-}
-
-typedef struct trx_struct trx_t;
-/********************************************************************//**
-@file handler/ha_innodb.h
-Converts an InnoDB error code to a MySQL error code and also tells to MySQL
-about a possible transaction rollback inside InnoDB caused by a lock wait
-timeout or a deadlock.
-@return MySQL error code */
-extern "C"
-int
-convert_error_code_to_mysql(
-/*========================*/
- int error, /*!< in: InnoDB error code */
- ulint flags, /*!< in: InnoDB table flags, or 0 */
- MYSQL_THD thd); /*!< in: user thread handle or NULL */
-
-/*********************************************************************//**
-Allocates an InnoDB transaction for a MySQL handler object.
-@return InnoDB transaction handle */
-extern "C"
-trx_t*
-innobase_trx_allocate(
-/*==================*/
- MYSQL_THD thd); /*!< in: user thread handle */
-
-
-/*********************************************************************//**
-This function checks each index name for a table against reserved
-system default primary index name 'GEN_CLUST_INDEX'. If a name
-matches, this function pushes an warning message to the client,
-and returns true. */
-extern "C"
-bool
-innobase_index_name_is_reserved(
-/*============================*/
- /* out: true if the index name
- matches the reserved name */
- const trx_t* trx, /* in: InnoDB transaction handle */
- const KEY* key_info, /* in: Indexes to be created */
- ulint num_of_keys); /* in: Number of indexes to
- be created. */
-
diff --git a/storage/innodb_plugin/ibuf/ibuf0ibuf.c b/storage/innodb_plugin/ibuf/ibuf0ibuf.c
deleted file mode 100644
index 08986fac0ef..00000000000
--- a/storage/innodb_plugin/ibuf/ibuf0ibuf.c
+++ /dev/null
@@ -1,3624 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file ibuf/ibuf0ibuf.c
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "ibuf0ibuf.h"
-
-/** Number of bits describing a single page */
-#define IBUF_BITS_PER_PAGE 4
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE must be an even number!"
-#endif
-/** The start address for an insert buffer bitmap page bitmap */
-#define IBUF_BITMAP PAGE_DATA
-
-#ifdef UNIV_NONINL
-#include "ibuf0ibuf.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-
-#include "buf0buf.h"
-#include "buf0rea.h"
-#include "fsp0fsp.h"
-#include "trx0sys.h"
-#include "fil0fil.h"
-#include "thr0loc.h"
-#include "rem0rec.h"
-#include "btr0cur.h"
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "sync0sync.h"
-#include "dict0boot.h"
-#include "fut0lst.h"
-#include "lock0lock.h"
-#include "log0recv.h"
-#include "que0que.h"
-
-/* STRUCTURE OF AN INSERT BUFFER RECORD
-
-In versions < 4.1.x:
-
-1. The first field is the page number.
-2. The second field is an array which stores type info for each subsequent
- field. We store the information which affects the ordering of records, and
- also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
- is 10 bytes.
-3. Next we have the fields of the actual index record.
-
-In versions >= 4.1.x:
-
-Note that contary to what we planned in the 1990's, there will only be one
-insert buffer tree, and that is in the system tablespace of InnoDB.
-
-1. The first field is the space id.
-2. The second field is a one-byte marker (0) which differentiates records from
- the < 4.1.x storage format.
-3. The third field is the page number.
-4. The fourth field contains the type info, where we have also added 2 bytes to
- store the charset. In the compressed table format of 5.0.x we must add more
- information here so that we can build a dummy 'index' struct which 5.0.x
- can use in the binary search on the index page in the ibuf merge phase.
-5. The rest of the fields contain the fields of the actual index record.
-
-In versions >= 5.0.3:
-
-The first byte of the fourth field is an additional marker (0) if the record
-is in the compact format. The presence of this marker can be detected by
-looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
-
-The high-order bit of the character set field in the type info is the
-"nullable" flag for the field. */
-
-
-/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
-
-If an OS thread performs any operation that brings in disk pages from
-non-system tablespaces into the buffer pool, or creates such a page there,
-then the operation may have as a side effect an insert buffer index tree
-compression. Thus, the tree latch of the insert buffer tree may be acquired
-in the x-mode, and also the file space latch of the system tablespace may
-be acquired in the x-mode.
-
-Also, an insert to an index in a non-system tablespace can have the same
-effect. How do we know this cannot lead to a deadlock of OS threads? There
-is a problem with the i\o-handler threads: they break the latching order
-because they own x-latches to pages which are on a lower level than the
-insert buffer tree latch, its page latches, and the tablespace latch an
-insert buffer operation can reserve.
-
-The solution is the following: Let all the tree and page latches connected
-with the insert buffer be later in the latching order than the fsp latch and
-fsp page latches.
-
-Insert buffer pages must be such that the insert buffer is never invoked
-when these pages are accessed as this would result in a recursion violating
-the latching order. We let a special i/o-handler thread take care of i/o to
-the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
-pages and the first inode page, which contains the inode of the ibuf tree: let
-us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
-access both non-ibuf and ibuf pages.
-
-Then an i/o-handler for the insert buffer never needs to access recursively the
-insert buffer tree and thus obeys the latching order. On the other hand, other
-i/o-handlers for other tablespaces may require access to the insert buffer,
-but because all kinds of latches they need to access there are later in the
-latching order, no violation of the latching order occurs in this case,
-either.
-
-A problem is how to grow and contract an insert buffer tree. As it is later
-in the latching order than the fsp management, we have to reserve the fsp
-latch first, before adding or removing pages from the insert buffer tree.
-We let the insert buffer tree have its own file space management: a free
-list of pages linked to the tree root. To prevent recursive using of the
-insert buffer when adding pages to the tree, we must first load these pages
-to memory, obtaining a latch on them, and only after that add them to the
-free list of the insert buffer tree. More difficult is removing of pages
-from the free list. If there is an excess of pages in the free list of the
-ibuf tree, they might be needed if some thread reserves the fsp latch,
-intending to allocate more file space. So we do the following: if a thread
-reserves the fsp latch, we check the writer count field of the latch. If
-this field has value 1, it means that the thread did not own the latch
-before entering the fsp system, and the mtr of the thread contains no
-modifications to the fsp pages. Now we are free to reserve the ibuf latch,
-and check if there is an excess of pages in the free list. We can then, in a
-separate mini-transaction, take them out of the free list and free them to
-the fsp system.
-
-To avoid deadlocks in the ibuf system, we divide file pages into three levels:
-
-(1) non-ibuf pages,
-(2) ibuf tree pages and the pages in the ibuf tree free list, and
-(3) ibuf bitmap pages.
-
-No OS thread is allowed to access higher level pages if it has latches to
-lower level pages; even if the thread owns a B-tree latch it must not access
-the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
-is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
-exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
-level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
-it uses synchronous aio, it can access any pages, as long as it obeys the
-access order rules. */
-
-/** Buffer pool size per the maximum insert buffer size */
-#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
-
-/** Table name for the insert buffer. */
-#define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
-
-/** Operations that can currently be buffered. */
-UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_INSERT;
-
-/** The insert buffer control structure */
-UNIV_INTERN ibuf_t* ibuf = NULL;
-
-/** Counter for ibuf_should_try() */
-UNIV_INTERN ulint ibuf_flush_count = 0;
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/** Number of tablespaces in the ibuf_counts array */
-#define IBUF_COUNT_N_SPACES 4
-/** Number of pages within each tablespace in the ibuf_counts array */
-#define IBUF_COUNT_N_PAGES 130000
-
-/** Buffered entry counts for file pages, used in debugging */
-static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
-
-/******************************************************************//**
-Checks that the indexes to ibuf_counts[][] are within limits. */
-UNIV_INLINE
-void
-ibuf_count_check(
-/*=============*/
- ulint space_id, /*!< in: space identifier */
- ulint page_no) /*!< in: page number */
-{
- if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
- return;
- }
-
- fprintf(stderr,
- "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
- "InnoDB: and breaks crash recovery.\n"
- "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
- "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
- (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
- (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
- ut_error;
-}
-#endif
-
-/** @name Offsets to the per-page bits in the insert buffer bitmap */
-/* @{ */
-#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the
- amount of free space */
-#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered
- changes for the page */
-#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of
- the ibuf tree, excluding the
- root page, or is in the free
- list of the ibuf */
-/* @} */
-
-/** The mutex used to block pessimistic inserts to ibuf trees */
-static mutex_t ibuf_pessimistic_insert_mutex;
-
-/** The mutex protecting the insert buffer structs */
-static mutex_t ibuf_mutex;
-
-/** The mutex protecting the insert buffer bitmaps */
-static mutex_t ibuf_bitmap_mutex;
-
-/** The area in pages from which contract looks for page numbers for merge */
-#define IBUF_MERGE_AREA 8
-
-/** Inside the merge area, pages which have at most 1 per this number less
-buffered entries compared to maximum volume that can buffered for a single
-page are merged along with the page whose buffer became full */
-#define IBUF_MERGE_THRESHOLD 4
-
-/** In ibuf_contract at most this number of pages is read to memory in one
-batch, in order to merge the entries for them in the insert buffer */
-#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
-
-/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
-many pages, we start to contract it in connection to inserts there, using
-non-synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
-
-/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
-many pages, we start to contract it in connection to inserts there, using
-synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_SYNC 5
-
-/** If the combined size of the ibuf trees exceeds ibuf->max_size by
-this many pages, we start to contract it synchronous contract, but do
-not insert */
-#define IBUF_CONTRACT_DO_NOT_INSERT 10
-
-/* TODO: how to cope with drop table if there are records in the insert
-buffer for the indexes of the table? Is there actually any problem,
-because ibuf merge is done to a page when it is read in, and it is
-still physically like the index page even if the index would have been
-dropped! So, there seems to be no problem. */
-
-/******************************************************************//**
-Sets the flag in the current OS thread local storage denoting that it is
-inside an insert buffer routine. */
-UNIV_INLINE
-void
-ibuf_enter(void)
-/*============*/
-{
- ibool* ptr;
-
- ptr = thr_local_get_in_ibuf_field();
-
- ut_ad(*ptr == FALSE);
-
- *ptr = TRUE;
-}
-
-/******************************************************************//**
-Sets the flag in the current OS thread local storage denoting that it is
-exiting an insert buffer routine. */
-UNIV_INLINE
-void
-ibuf_exit(void)
-/*===========*/
-{
- ibool* ptr;
-
- ptr = thr_local_get_in_ibuf_field();
-
- ut_ad(*ptr == TRUE);
-
- *ptr = FALSE;
-}
-
-/******************************************************************//**
-Returns TRUE if the current OS thread is performing an insert buffer
-routine.
-
-For instance, a read-ahead of non-ibuf pages is forbidden by threads
-that are executing an insert buffer routine.
-@return TRUE if inside an insert buffer routine */
-UNIV_INTERN
-ibool
-ibuf_inside(void)
-/*=============*/
-{
- return(*thr_local_get_in_ibuf_field());
-}
-
-/******************************************************************//**
-Gets the ibuf header page and x-latches it.
-@return insert buffer header page */
-static
-page_t*
-ibuf_header_page_get(
-/*=================*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
-
- ut_ad(!ibuf_inside());
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
-
- return(buf_block_get_frame(block));
-}
-
-/******************************************************************//**
-Gets the root page and x-latches it.
-@return insert buffer tree root page */
-static
-page_t*
-ibuf_tree_root_get(
-/*===============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
-
- ut_ad(ibuf_inside());
-
- mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE);
-
- return(buf_block_get_frame(block));
-}
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
-@return number of entries in the insert buffer currently buffered for
-this page */
-UNIV_INTERN
-ulint
-ibuf_count_get(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- ibuf_count_check(space, page_no);
-
- return(ibuf_counts[space][page_no]);
-}
-
-/******************************************************************//**
-Sets the ibuf count for a given page. */
-static
-void
-ibuf_count_set(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: page number */
- ulint val) /*!< in: value to set */
-{
- ibuf_count_check(space, page_no);
- ut_a(val < UNIV_PAGE_SIZE);
-
- ibuf_counts[space][page_no] = val;
-}
-#endif
-
-/******************************************************************//**
-Closes insert buffer and frees the data structures. */
-UNIV_INTERN
-void
-ibuf_close(void)
-/*============*/
-{
- mutex_free(&ibuf_pessimistic_insert_mutex);
- memset(&ibuf_pessimistic_insert_mutex,
- 0x0, sizeof(ibuf_pessimistic_insert_mutex));
-
- mutex_free(&ibuf_mutex);
- memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
-
- mutex_free(&ibuf_bitmap_mutex);
- memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
-
- mem_free(ibuf);
- ibuf = NULL;
-}
-
-/******************************************************************//**
-Updates the size information of the ibuf, assuming the segment size has not
-changed. */
-static
-void
-ibuf_size_update(
-/*=============*/
- const page_t* root, /*!< in: ibuf tree root */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mutex_own(&ibuf_mutex));
-
- ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, mtr);
-
- ibuf->height = 1 + btr_page_get_level(root, mtr);
-
- /* the '1 +' is the ibuf header page */
- ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
-
- ibuf->empty = page_get_n_recs(root) == 0;
-}
-
-/******************************************************************//**
-Creates the insert buffer data structure at a database startup and initializes
-the data structures for the insert buffer. */
-UNIV_INTERN
-void
-ibuf_init_at_db_start(void)
-/*=======================*/
-{
- page_t* root;
- mtr_t mtr;
- dict_table_t* table;
- mem_heap_t* heap;
- dict_index_t* index;
- ulint n_used;
- page_t* header_page;
- ulint error;
-
- ibuf = mem_alloc(sizeof(ibuf_t));
-
- memset(ibuf, 0, sizeof(*ibuf));
-
- /* Note that also a pessimistic delete can sometimes make a B-tree
- grow in size, as the references on the upper levels of the tree can
- change */
-
- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
- / IBUF_POOL_SIZE_PER_MAX_SIZE;
-
- mutex_create(&ibuf_pessimistic_insert_mutex,
- SYNC_IBUF_PESS_INSERT_MUTEX);
-
- mutex_create(&ibuf_mutex, SYNC_IBUF_MUTEX);
-
- mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
-
- mtr_start(&mtr);
-
- mutex_enter(&ibuf_mutex);
-
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
-
- header_page = ibuf_header_page_get(&mtr);
-
- fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- &n_used, &mtr);
- ibuf_enter();
-
- ut_ad(n_used >= 2);
-
- ibuf->seg_size = n_used;
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_TREE_NODE);
-
- root = buf_block_get_frame(block);
- }
-
- ibuf_size_update(root, &mtr);
- mutex_exit(&ibuf_mutex);
-
- mtr_commit(&mtr);
-
- ibuf_exit();
-
- heap = mem_heap_create(450);
-
- /* Use old-style record format for the insert buffer. */
- table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
-
- dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
-
- table->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
-
- dict_table_add_to_cache(table, heap);
- mem_heap_free(heap);
-
- index = dict_mem_index_create(
- IBUF_TABLE_NAME, "CLUST_IND",
- IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
-
- dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
-
- index->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
-
- error = dict_index_add_to_cache(table, index,
- FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
- ut_a(error == DB_SUCCESS);
-
- ibuf->index = dict_table_get_first_index(table);
-}
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Initializes an ibuf bitmap page. */
-UNIV_INTERN
-void
-ibuf_bitmap_page_init(
-/*==================*/
- buf_block_t* block, /*!< in: bitmap page */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page;
- ulint byte_offset;
- ulint zip_size = buf_block_get_zip_size(block);
-
- ut_a(ut_is_2pow(zip_size));
-
- page = buf_block_get_frame(block);
- fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
-
- /* Write all zeros to the bitmap */
-
- if (!zip_size) {
- byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
- * IBUF_BITS_PER_PAGE);
- } else {
- byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
- }
-
- memset(page + IBUF_BITMAP, 0, byte_offset);
-
- /* The remaining area (up to the page trailer) is uninitialized. */
-
-#ifndef UNIV_HOTBACKUP
- mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/*********************************************************************//**
-Parses a redo log record of an ibuf bitmap page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-ibuf_parse_bitmap_init(
-/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (block) {
- ibuf_bitmap_page_init(block, mtr);
- }
-
- return(ptr);
-}
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Gets the desired bits for a given page from a bitmap page.
-@return value of bits */
-UNIV_INLINE
-ulint
-ibuf_bitmap_page_get_bits(
-/*======================*/
- const page_t* page, /*!< in: bitmap page */
- ulint page_no,/*!< in: page whose bits to get */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint bit, /*!< in: IBUF_BITMAP_FREE,
- IBUF_BITMAP_BUFFERED, ... */
- mtr_t* mtr __attribute__((unused)))
- /*!< in: mtr containing an
- x-latch to the bitmap page */
-{
- ulint byte_offset;
- ulint bit_offset;
- ulint map_byte;
- ulint value;
-
- ut_ad(bit < IBUF_BITS_PER_PAGE);
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE % 2 != 0"
-#endif
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
-
- if (!zip_size) {
- bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
- + bit;
- } else {
- bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
- + bit;
- }
-
- byte_offset = bit_offset / 8;
- bit_offset = bit_offset % 8;
-
- ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
-
- map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
-
- value = ut_bit_get_nth(map_byte, bit_offset);
-
- if (bit == IBUF_BITMAP_FREE) {
- ut_ad(bit_offset + 1 < 8);
-
- value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
- }
-
- return(value);
-}
-
-/********************************************************************//**
-Sets the desired bit for a given page in a bitmap page. */
-static
-void
-ibuf_bitmap_page_set_bits(
-/*======================*/
- page_t* page, /*!< in: bitmap page */
- ulint page_no,/*!< in: page whose bits to set */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
- ulint val, /*!< in: value to set */
- mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */
-{
- ulint byte_offset;
- ulint bit_offset;
- ulint map_byte;
-
- ut_ad(bit < IBUF_BITS_PER_PAGE);
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE % 2 != 0"
-#endif
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
- || (0 == ibuf_count_get(page_get_space_id(page),
- page_no)));
-#endif
- if (!zip_size) {
- bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
- + bit;
- } else {
- bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
- + bit;
- }
-
- byte_offset = bit_offset / 8;
- bit_offset = bit_offset % 8;
-
- ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
-
- map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
-
- if (bit == IBUF_BITMAP_FREE) {
- ut_ad(bit_offset + 1 < 8);
- ut_ad(val <= 3);
-
- map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
- map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
- } else {
- ut_ad(val <= 1);
- map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
- }
-
- mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
- MLOG_1BYTE, mtr);
-}
-
-/********************************************************************//**
-Calculates the bitmap page number for a given page number.
-@return the bitmap page number where the file page is mapped */
-UNIV_INLINE
-ulint
-ibuf_bitmap_page_no_calc(
-/*=====================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no) /*!< in: tablespace page number */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return(FSP_IBUF_BITMAP_OFFSET
- + (page_no & ~(UNIV_PAGE_SIZE - 1)));
- } else {
- return(FSP_IBUF_BITMAP_OFFSET
- + (page_no & ~(zip_size - 1)));
- }
-}
-
-/********************************************************************//**
-Gets the ibuf bitmap page where the bits describing a given file page are
-stored.
-@return bitmap page where the file page is mapped, that is, the bitmap
-page containing the descriptor bits for the file page; the bitmap page
-is x-latched */
-static
-page_t*
-ibuf_bitmap_get_map_page(
-/*=====================*/
- ulint space, /*!< in: space id of the file page */
- ulint page_no,/*!< in: page number of the file page */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
-
- block = buf_page_get(space, zip_size,
- ibuf_bitmap_page_no_calc(zip_size, page_no),
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
-
- return(buf_block_get_frame(block));
-}
-
-/************************************************************************//**
-Sets the free bits of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-UNIV_INLINE
-void
-ibuf_set_free_bits_low(
-/*===================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- const buf_block_t* block, /*!< in: index page; free bits are set if
- the index is non-clustered and page
- level is 0 */
- ulint val, /*!< in: value to set: < 4 */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- page_t* bitmap_page;
- ulint space;
- ulint page_no;
-
- if (!page_is_leaf(buf_block_get_frame(block))) {
-
- return;
- }
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
-#ifdef UNIV_IBUF_DEBUG
-# if 0
- fprintf(stderr,
- "Setting space %lu page %lu free bits to %lu should be %lu\n",
- space, page_no, val,
- ibuf_index_page_calc_free(zip_size, block));
-# endif
-
- ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
-#endif /* UNIV_IBUF_DEBUG */
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, val, mtr);
-}
-
-/************************************************************************//**
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-UNIV_INTERN
-void
-ibuf_set_free_bits_func(
-/*====================*/
- buf_block_t* block, /*!< in: index page of a non-clustered index;
- free bit is reset if page level is 0 */
-#ifdef UNIV_IBUF_DEBUG
- ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
- value which the bits must have before
- setting; this is for debugging */
-#endif /* UNIV_IBUF_DEBUG */
- ulint val) /*!< in: value to set: < 4 */
-{
- mtr_t mtr;
- page_t* page;
- page_t* bitmap_page;
- ulint space;
- ulint page_no;
- ulint zip_size;
-
- page = buf_block_get_frame(block);
-
- if (!page_is_leaf(page)) {
-
- return;
- }
-
- mtr_start(&mtr);
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- zip_size = buf_block_get_zip_size(block);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
-
-#ifdef UNIV_IBUF_DEBUG
- if (max_val != ULINT_UNDEFINED) {
- ulint old_val;
-
- old_val = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, &mtr);
-# if 0
- if (old_val != max_val) {
- fprintf(stderr,
- "Ibuf: page %lu old val %lu max val %lu\n",
- page_get_page_no(page),
- old_val, max_val);
- }
-# endif
-
- ut_a(old_val <= max_val);
- }
-# if 0
- fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
- page_get_page_no(page), val,
- ibuf_index_page_calc_free(zip_size, block));
-# endif
-
- ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
-#endif /* UNIV_IBUF_DEBUG */
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, val, &mtr);
- mtr_commit(&mtr);
-}
-
-/************************************************************************//**
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to decrement or reset the bits in the bitmap in a mini-transaction
-that is committed before the mini-transaction that affects the free
-space. */
-UNIV_INTERN
-void
-ibuf_reset_free_bits(
-/*=================*/
- buf_block_t* block) /*!< in: index page; free bits are set to 0
- if the index is a non-clustered
- non-unique, and page level is 0 */
-{
- ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Updates the free bits for an uncompressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_low(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- ulint max_ins_size, /*!< in: value of
- maximum insert size
- with reorganize before
- the latest operation
- performed to the page */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- ulint before;
- ulint after;
-
- ut_a(!buf_block_get_page_zip(block));
-
- before = ibuf_index_page_calc_free_bits(0, max_ins_size);
-
- after = ibuf_index_page_calc_free(0, block);
-
- /* This approach cannot be used on compressed pages, since the
- computed value of "before" often does not match the current
- state of the bitmap. This is because the free space may
- increase or decrease when a compressed page is reorganized. */
- if (before != after) {
- ibuf_set_free_bits_low(0, block, after, mtr);
- }
-}
-
-/**********************************************************************//**
-Updates the free bits for a compressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_zip(
-/*======================*/
- buf_block_t* block, /*!< in/out: index page */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- page_t* bitmap_page;
- ulint space;
- ulint page_no;
- ulint zip_size;
- ulint after;
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- zip_size = buf_block_get_zip_size(block);
-
- ut_a(page_is_leaf(buf_block_get_frame(block)));
- ut_a(zip_size);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
-
- after = ibuf_index_page_calc_free_zip(zip_size, block);
-
- if (after == 0) {
- /* We move the page to the front of the buffer pool LRU list:
- the purpose of this is to prevent those pages to which we
- cannot make inserts using the insert buffer from slipping
- out of the buffer pool */
-
- buf_page_make_young(&block->page);
- }
-
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, after, mtr);
-}
-
-/**********************************************************************//**
-Updates the free bits for the two pages to reflect the present state.
-Does this in the mtr given, which means that the latching order rules
-virtually prevent any further operations until mtr is committed.
-NOTE: The free bits in the insert buffer bitmap must never exceed the
-free space on a page. It is safe to set the free bits in the same
-mini-transaction that updated the pages. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- buf_block_t* block1, /*!< in: index page */
- buf_block_t* block2, /*!< in: index page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint state;
-
- /* As we have to x-latch two random bitmap pages, we have to acquire
- the bitmap mutex to prevent a deadlock with a similar operation
- performed by another OS thread. */
-
- mutex_enter(&ibuf_bitmap_mutex);
-
- state = ibuf_index_page_calc_free(zip_size, block1);
-
- ibuf_set_free_bits_low(zip_size, block1, state, mtr);
-
- state = ibuf_index_page_calc_free(zip_size, block2);
-
- ibuf_set_free_bits_low(zip_size, block2, state, mtr);
-
- mutex_exit(&ibuf_bitmap_mutex);
-}
-
-/**********************************************************************//**
-Returns TRUE if the page is one of the fixed address ibuf pages.
-@return TRUE if a fixed address ibuf i/o page */
-UNIV_INLINE
-ibool
-ibuf_fixed_addr_page(
-/*=================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
-{
- return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
- || ibuf_bitmap_page(zip_size, page_no));
-}
-
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return TRUE if level 2 or level 3 page */
-UNIV_INTERN
-ibool
-ibuf_page(
-/*======*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number */
- mtr_t* mtr) /*!< in: mtr which will contain an x-latch to the
- bitmap page if the page is not one of the fixed
- address ibuf pages, or NULL, in which case a new
- transaction is created. */
-{
- ibool ret;
- mtr_t local_mtr;
- page_t* bitmap_page;
-
- ut_ad(!recv_no_ibuf_operations);
-
- if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
-
- return(TRUE);
- } else if (space != IBUF_SPACE_ID) {
-
- return(FALSE);
- }
-
- ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
-
- if (mtr == NULL) {
- mtr = &local_mtr;
- mtr_start(mtr);
- }
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
-
- ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_IBUF, mtr);
-
- if (mtr == &local_mtr) {
- mtr_commit(mtr);
- }
-
- return(ret);
-}
-
-/********************************************************************//**
-Returns the page number field of an ibuf record.
-@return page number */
-static
-ulint
-ibuf_rec_get_page_no(
-/*=================*/
- const rec_t* rec) /*!< in: ibuf record */
-{
- const byte* field;
- ulint len;
-
- ut_ad(ibuf_inside());
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, 1, &len);
-
- if (len == 1) {
- /* This is of the >= 4.1.x record format */
- ut_a(trx_sys_multiple_tablespace_format);
-
- field = rec_get_nth_field_old(rec, 2, &len);
- } else {
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- field = rec_get_nth_field_old(rec, 0, &len);
- }
-
- ut_a(len == 4);
-
- return(mach_read_from_4(field));
-}
-
-/********************************************************************//**
-Returns the space id field of an ibuf record. For < 4.1.x format records
-returns 0.
-@return space id */
-static
-ulint
-ibuf_rec_get_space(
-/*===============*/
- const rec_t* rec) /*!< in: ibuf record */
-{
- const byte* field;
- ulint len;
-
- ut_ad(ibuf_inside());
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, 1, &len);
-
- if (len == 1) {
- /* This is of the >= 4.1.x record format */
-
- ut_a(trx_sys_multiple_tablespace_format);
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_a(len == 4);
-
- return(mach_read_from_4(field));
- }
-
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- return(0);
-}
-
-/********************************************************************//**
-Creates a dummy index for inserting a record to a non-clustered index.
-
-@return dummy index */
-static
-dict_index_t*
-ibuf_dummy_index_create(
-/*====================*/
- ulint n, /*!< in: number of fields */
- ibool comp) /*!< in: TRUE=use compact record format */
-{
- dict_table_t* table;
- dict_index_t* index;
-
- table = dict_mem_table_create("IBUF_DUMMY",
- DICT_HDR_SPACE, n,
- comp ? DICT_TF_COMPACT : 0);
-
- index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
- DICT_HDR_SPACE, 0, n);
-
- index->table = table;
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- index->cached = TRUE;
-
- return(index);
-}
-/********************************************************************//**
-Add a column to the dummy index */
-static
-void
-ibuf_dummy_index_add_col(
-/*=====================*/
- dict_index_t* index, /*!< in: dummy index */
- const dtype_t* type, /*!< in: the data type of the column */
- ulint len) /*!< in: length of the column */
-{
- ulint i = index->table->n_def;
- dict_mem_table_add_col(index->table, NULL, NULL,
- dtype_get_mtype(type),
- dtype_get_prtype(type),
- dtype_get_len(type));
- dict_index_add_col(index, index->table,
- dict_table_get_nth_col(index->table, i), len);
-}
-/********************************************************************//**
-Deallocates a dummy index for inserting a record to a non-clustered index. */
-static
-void
-ibuf_dummy_index_free(
-/*==================*/
- dict_index_t* index) /*!< in, own: dummy index */
-{
- dict_table_t* table = index->table;
-
- dict_mem_index_free(index);
- dict_mem_table_free(table);
-}
-
-/*********************************************************************//**
-Builds the entry to insert into a non-clustered index when we have the
-corresponding record in an ibuf index.
-
-NOTE that as we copy pointers to fields in ibuf_rec, the caller must
-hold a latch to the ibuf_rec page as long as the entry is used!
-
-@return own: entry to insert to a non-clustered index */
-UNIV_INLINE
-dtuple_t*
-ibuf_build_entry_pre_4_1_x(
-/*=======================*/
- const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
- mem_heap_t* heap, /*!< in: heap where built */
- dict_index_t** pindex) /*!< out, own: dummy index that
- describes the entry */
-{
- ulint i;
- ulint len;
- const byte* types;
- dtuple_t* tuple;
- ulint n_fields;
-
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
- tuple = dtuple_create(heap, n_fields);
- types = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
-
- for (i = 0; i < n_fields; i++) {
- const byte* data;
- dfield_t* field;
-
- field = dtuple_get_nth_field(tuple, i);
-
- data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
-
- dfield_set_data(field, data, len);
-
- dtype_read_for_order_and_null_size(
- dfield_get_type(field),
- types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
- }
-
- *pindex = ibuf_dummy_index_create(n_fields, FALSE);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Builds the entry to insert into a non-clustered index when we have the
-corresponding record in an ibuf index.
-
-NOTE that as we copy pointers to fields in ibuf_rec, the caller must
-hold a latch to the ibuf_rec page as long as the entry is used!
-
-@return own: entry to insert to a non-clustered index */
-static
-dtuple_t*
-ibuf_build_entry_from_ibuf_rec(
-/*===========================*/
- const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
- mem_heap_t* heap, /*!< in: heap where built */
- dict_index_t** pindex) /*!< out, own: dummy index that
- describes the entry */
-{
- dtuple_t* tuple;
- dfield_t* field;
- ulint n_fields;
- const byte* types;
- const byte* data;
- ulint len;
- ulint i;
- dict_index_t* index;
-
- data = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- if (len > 1) {
- /* This a < 4.1.x format record */
-
- return(ibuf_build_entry_pre_4_1_x(ibuf_rec, heap, pindex));
- }
-
- /* This a >= 4.1.x format record */
-
- ut_a(trx_sys_multiple_tablespace_format);
- ut_a(*data == 0);
- ut_a(rec_get_n_fields_old(ibuf_rec) > 4);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
-
- tuple = dtuple_create(heap, n_fields);
-
- types = rec_get_nth_field_old(ibuf_rec, 3, &len);
-
- ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
- index = ibuf_dummy_index_create(
- n_fields, len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
- /* compact record format */
- len--;
- ut_a(*types == 0);
- types++;
- }
-
- ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
-
- dfield_set_data(field, data, len);
-
- dtype_new_read_for_order_and_null_size(
- dfield_get_type(field),
- types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
- }
-
- /* Prevent an ut_ad() failure in page_zip_write_rec() by
- adding system columns to the dummy table pointed to by the
- dummy secondary index. The insert buffer is only used for
- secondary indexes, whose records never contain any system
- columns, such as DB_TRX_ID. */
- ut_d(dict_table_add_system_columns(index->table, index->table->heap));
-
- *pindex = index;
-
- return(tuple);
-}
-
-/********************************************************************//**
-Returns the space taken by a stored non-clustered index entry if converted to
-an index record.
-@return size of index record in bytes + an upper limit of the space
-taken in the page directory */
-static
-ulint
-ibuf_rec_get_volume(
-/*================*/
- const rec_t* ibuf_rec)/*!< in: ibuf record */
-{
- dtype_t dtype;
- ibool new_format = FALSE;
- ulint data_size = 0;
- ulint n_fields;
- const byte* types;
- const byte* data;
- ulint len;
- ulint i;
- ulint comp;
-
- ut_ad(ibuf_inside());
- ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
-
- data = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- if (len > 1) {
- /* < 4.1.x format record */
-
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
-
- types = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
- comp = 0;
- } else {
- /* >= 4.1.x format record */
-
- ut_a(trx_sys_multiple_tablespace_format);
- ut_a(*data == 0);
-
- types = rec_get_nth_field_old(ibuf_rec, 3, &len);
-
- comp = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
-
- ut_a(comp <= 1);
- if (comp) {
- /* compact record format */
- ulint volume;
- dict_index_t* dummy_index;
- mem_heap_t* heap = mem_heap_create(500);
- dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
- ibuf_rec, heap, &dummy_index);
- volume = rec_get_converted_size(dummy_index, entry, 0);
- ibuf_dummy_index_free(dummy_index);
- mem_heap_free(heap);
- return(volume + page_dir_calc_reserved_space(1));
- }
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
-
- new_format = TRUE;
- }
-
- for (i = 0; i < n_fields; i++) {
- if (new_format) {
- data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
-
- dtype_new_read_for_order_and_null_size(
- &dtype, types + i
- * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
- } else {
- data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
-
- dtype_read_for_order_and_null_size(
- &dtype, types + i
- * DATA_ORDER_NULL_TYPE_BUF_SIZE);
- }
-
- if (len == UNIV_SQL_NULL) {
- data_size += dtype_get_sql_null_size(&dtype, comp);
- } else {
- data_size += len;
- }
- }
-
- return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
- + page_dir_calc_reserved_space(1));
-}
-
-/*********************************************************************//**
-Builds the tuple to insert to an ibuf tree when we have an entry for a
-non-clustered index.
-
-NOTE that the original entry must be kept because we copy pointers to
-its fields.
-
-@return own: entry to insert into an ibuf index tree */
-static
-dtuple_t*
-ibuf_entry_build(
-/*=============*/
- dict_index_t* index, /*!< in: non-clustered index */
- const dtuple_t* entry, /*!< in: entry for a non-clustered index */
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number where entry should
- be inserted */
- mem_heap_t* heap) /*!< in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- const dfield_t* entry_field;
- ulint n_fields;
- byte* buf;
- byte* buf2;
- ulint i;
-
- /* Starting from 4.1.x, we have to build a tuple whose
- (1) first field is the space id,
- (2) the second field a single marker byte (0) to tell that this
- is a new format record,
- (3) the third contains the page number, and
- (4) the fourth contains the relevent type information of each data
- field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is
- (a) 0 for b-trees in the old format, and
- (b) 1 for b-trees in the compact format, the first byte of the field
- being the marker (0);
- (5) and the rest of the fields are copied from entry. All fields
- in the tuple are ordered like the type binary in our insert buffer
- tree. */
-
- n_fields = dtuple_get_n_fields(entry);
-
- tuple = dtuple_create(heap, n_fields + 4);
-
- /* Store the space id in tuple */
-
- field = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, space);
-
- dfield_set_data(field, buf, 4);
-
- /* Store the marker byte field in tuple */
-
- field = dtuple_get_nth_field(tuple, 1);
-
- buf = mem_heap_alloc(heap, 1);
-
- /* We set the marker byte zero */
-
- mach_write_to_1(buf, 0);
-
- dfield_set_data(field, buf, 1);
-
- /* Store the page number in tuple */
-
- field = dtuple_get_nth_field(tuple, 2);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- /* Store the type info in buf2, and add the fields from entry to
- tuple */
- buf2 = mem_heap_alloc(heap, n_fields
- * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- + dict_table_is_comp(index->table));
- if (dict_table_is_comp(index->table)) {
- *buf2++ = 0; /* write the compact format indicator */
- }
- for (i = 0; i < n_fields; i++) {
- ulint fixed_len;
- const dict_field_t* ifield;
-
- /* We add 4 below because we have the 4 extra fields at the
- start of an ibuf record */
-
- field = dtuple_get_nth_field(tuple, i + 4);
- entry_field = dtuple_get_nth_field(entry, i);
- dfield_copy(field, entry_field);
-
- ifield = dict_index_get_nth_field(index, i);
- /* Prefix index columns of fixed-length columns are of
- fixed length. However, in the function call below,
- dfield_get_type(entry_field) contains the fixed length
- of the column in the clustered index. Replace it with
- the fixed length of the secondary index column. */
- fixed_len = ifield->fixed_len;
-
-#ifdef UNIV_DEBUG
- if (fixed_len) {
- /* dict_index_add_col() should guarantee these */
- ut_ad(fixed_len <= (ulint)
- dfield_get_type(entry_field)->len);
- if (ifield->prefix_len) {
- ut_ad(ifield->prefix_len == fixed_len);
- } else {
- ut_ad(fixed_len == (ulint)
- dfield_get_type(entry_field)->len);
- }
- }
-#endif /* UNIV_DEBUG */
-
- dtype_new_store_for_order_and_null_size(
- buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
- dfield_get_type(entry_field), fixed_len);
- }
-
- /* Store the type info in buf2 to field 3 of tuple */
-
- field = dtuple_get_nth_field(tuple, 3);
-
- if (dict_table_is_comp(index->table)) {
- buf2--;
- }
-
- dfield_set_data(field, buf2, n_fields
- * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- + dict_table_is_comp(index->table));
- /* Set all the types in the new tuple binary */
-
- dtuple_set_types_binary(tuple, n_fields + 4);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Builds a search tuple used to search buffered inserts for an index page.
-This is for < 4.1.x format records
-@return own: search tuple */
-static
-dtuple_t*
-ibuf_search_tuple_build(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number */
- mem_heap_t* heap) /*!< in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
-
- ut_a(space == 0);
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- tuple = dtuple_create(heap, 1);
-
- /* Store the page number in tuple */
-
- field = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- dtuple_set_types_binary(tuple, 1);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Builds a search tuple used to search buffered inserts for an index page.
-This is for >= 4.1.x format records.
-@return own: search tuple */
-static
-dtuple_t*
-ibuf_new_search_tuple_build(
-/*========================*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number */
- mem_heap_t* heap) /*!< in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
-
- ut_a(trx_sys_multiple_tablespace_format);
-
- tuple = dtuple_create(heap, 3);
-
- /* Store the space id in tuple */
-
- field = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, space);
-
- dfield_set_data(field, buf, 4);
-
- /* Store the new format record marker byte */
-
- field = dtuple_get_nth_field(tuple, 1);
-
- buf = mem_heap_alloc(heap, 1);
-
- mach_write_to_1(buf, 0);
-
- dfield_set_data(field, buf, 1);
-
- /* Store the page number in tuple */
-
- field = dtuple_get_nth_field(tuple, 2);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- dtuple_set_types_binary(tuple, 3);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Checks if there are enough pages in the free list of the ibuf tree that we
-dare to start a pessimistic insert to the insert buffer.
-@return TRUE if enough free pages in list */
-UNIV_INLINE
-ibool
-ibuf_data_enough_free_for_insert(void)
-/*==================================*/
-{
- ut_ad(mutex_own(&ibuf_mutex));
-
- /* We want a big margin of free pages, because a B-tree can sometimes
- grow in size also if records are deleted from it, as the node pointers
- can change, and we must make sure that we are able to delete the
- inserts buffered for pages that we read to the buffer pool, without
- any risk of running out of free space in the insert buffer. */
-
- return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
-}
-
-/*********************************************************************//**
-Checks if there are enough pages in the free list of the ibuf tree that we
-should remove them and free to the file space management.
-@return TRUE if enough free pages in list */
-UNIV_INLINE
-ibool
-ibuf_data_too_much_free(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&ibuf_mutex));
-
- return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
-}
-
-/*********************************************************************//**
-Allocates a new page from the ibuf file segment and adds it to the free
-list.
-@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */
-static
-ulint
-ibuf_add_free_page(void)
-/*====================*/
-{
- mtr_t mtr;
- page_t* header_page;
- ulint flags;
- ulint zip_size;
- ulint page_no;
- page_t* page;
- page_t* root;
- page_t* bitmap_page;
-
- mtr_start(&mtr);
-
- /* Acquire the fsp latch before the ibuf header, obeying the latching
- order */
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- header_page = ibuf_header_page_get(&mtr);
-
- /* Allocate a new page: NOTE that if the page has been a part of a
- non-clustered index which has subsequently been dropped, then the
- page may have buffered inserts in the insert buffer, and these
- should be deleted from there. These get deleted when the page
- allocation creates the page in buffer. Thus the call below may end
- up calling the insert buffer routines and, as we yet have no latches
- to insert buffer tree pages, these routines can run without a risk
- of a deadlock. This is the reason why we created a special ibuf
- header page apart from the ibuf tree. */
-
- page_no = fseg_alloc_free_page(
- header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
- &mtr);
-
- if (page_no == FIL_NULL) {
- mtr_commit(&mtr);
-
- return(DB_STRONG_FAIL);
- }
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
-
-
- page = buf_block_get_frame(block);
- }
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- root = ibuf_tree_root_get(&mtr);
-
- /* Add the page to the free list and update the ibuf size data */
-
- flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
- MLOG_2BYTES, &mtr);
-
- ibuf->seg_size++;
- ibuf->free_list_len++;
-
- /* Set the bit indicating that this page is now an ibuf tree page
- (level 2 page) */
-
- bitmap_page = ibuf_bitmap_get_map_page(
- IBUF_SPACE_ID, page_no, zip_size, &mtr);
-
- ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr);
-
- mtr_commit(&mtr);
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Removes a page from the free list and frees it to the fsp system. */
-static
-void
-ibuf_remove_free_page(void)
-/*=======================*/
-{
- mtr_t mtr;
- mtr_t mtr2;
- page_t* header_page;
- ulint flags;
- ulint zip_size;
- ulint page_no;
- page_t* page;
- page_t* root;
- page_t* bitmap_page;
-
- mtr_start(&mtr);
-
- /* Acquire the fsp latch before the ibuf header, obeying the latching
- order */
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
-
- header_page = ibuf_header_page_get(&mtr);
-
- /* Prevent pessimistic inserts to insert buffer trees for a while */
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- if (!ibuf_data_too_much_free()) {
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- mtr_commit(&mtr);
-
- return;
- }
-
- mtr_start(&mtr2);
-
- root = ibuf_tree_root_get(&mtr2);
-
- page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- &mtr2).page;
-
- /* NOTE that we must release the latch on the ibuf tree root
- because in fseg_free_page we access level 1 pages, and the root
- is a level 2 page. */
-
- mtr_commit(&mtr2);
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- /* Since pessimistic inserts were prevented, we know that the
- page is still in the free list. NOTE that also deletes may take
- pages from the free list, but they take them from the start, and
- the free list was so long that they cannot have taken the last
- page from it. */
-
- fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- IBUF_SPACE_ID, page_no, &mtr);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
-#endif
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- root = ibuf_tree_root_get(&mtr);
-
- ut_ad(page_no == flst_get_last(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE);
-
-
- page = buf_block_get_frame(block);
- }
-
- /* Remove the page from the free list and update the ibuf size data */
-
- flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
-
- ibuf->seg_size--;
- ibuf->free_list_len--;
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- /* Set the bit indicating that this page is no more an ibuf tree page
- (level 2 page) */
-
- bitmap_page = ibuf_bitmap_get_map_page(
- IBUF_SPACE_ID, page_no, zip_size, &mtr);
-
- ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
-#endif
- mtr_commit(&mtr);
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-}
-
-/***********************************************************************//**
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
-void
-ibuf_free_excess_pages(void)
-/*========================*/
-{
- ulint i;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
- RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(rw_lock_get_x_lock_count(
- fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
-
- ut_ad(!ibuf_inside());
-
- /* NOTE: We require that the thread did not own the latch before,
- because then we know that we can obey the correct latching order
- for ibuf latches */
-
- if (!ibuf) {
- /* Not yet initialized; not sure if this is possible, but
- does no harm to check for it. */
-
- return;
- }
-
- /* Free at most a few pages at a time, so that we do not delay the
- requested service too much */
-
- for (i = 0; i < 4; i++) {
-
- mutex_enter(&ibuf_mutex);
-
- if (!ibuf_data_too_much_free()) {
-
- mutex_exit(&ibuf_mutex);
-
- return;
- }
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_remove_free_page();
- }
-}
-
-/*********************************************************************//**
-Reads page numbers from a leaf in an ibuf tree.
-@return a lower limit for the combined volume of records which will be
-merged */
-static
-ulint
-ibuf_get_merge_page_nos(
-/*====================*/
- ibool contract,/*!< in: TRUE if this function is called to
- contract the tree, FALSE if this is called
- when a single page becomes full and we look
- if it pays to read also nearby pages */
- rec_t* rec, /*!< in: record from which we read up and down
- in the chain of records */
- ulint* space_ids,/*!< in/out: space id's of the pages */
- ib_int64_t* space_versions,/*!< in/out: tablespace version
- timestamps; used to prevent reading in old
- pages after DISCARD + IMPORT tablespace */
- ulint* page_nos,/*!< in/out: buffer for at least
- IBUF_MAX_N_PAGES_MERGED many page numbers;
- the page numbers are in an ascending order */
- ulint* n_stored)/*!< out: number of page numbers stored to
- page_nos in this function */
-{
- ulint prev_page_no;
- ulint prev_space_id;
- ulint first_page_no;
- ulint first_space_id;
- ulint rec_page_no;
- ulint rec_space_id;
- ulint sum_volumes;
- ulint volume_for_page;
- ulint rec_volume;
- ulint limit;
- ulint n_pages;
-
- *n_stored = 0;
-
- limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4);
-
- if (page_rec_is_supremum(rec)) {
-
- rec = page_rec_get_prev(rec);
- }
-
- if (page_rec_is_infimum(rec)) {
-
- rec = page_rec_get_next(rec);
- }
-
- if (page_rec_is_supremum(rec)) {
-
- return(0);
- }
-
- first_page_no = ibuf_rec_get_page_no(rec);
- first_space_id = ibuf_rec_get_space(rec);
- n_pages = 0;
- prev_page_no = 0;
- prev_space_id = 0;
-
- /* Go backwards from the first rec until we reach the border of the
- 'merge area', or the page start or the limit of storeable pages is
- reached */
-
- while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
-
- rec_page_no = ibuf_rec_get_page_no(rec);
- rec_space_id = ibuf_rec_get_space(rec);
-
- if (rec_space_id != first_space_id
- || (rec_page_no / IBUF_MERGE_AREA)
- != (first_page_no / IBUF_MERGE_AREA)) {
-
- break;
- }
-
- if (rec_page_no != prev_page_no
- || rec_space_id != prev_space_id) {
- n_pages++;
- }
-
- prev_page_no = rec_page_no;
- prev_space_id = rec_space_id;
-
- rec = page_rec_get_prev(rec);
- }
-
- rec = page_rec_get_next(rec);
-
- /* At the loop start there is no prev page; we mark this with a pair
- of space id, page no (0, 0) for which there can never be entries in
- the insert buffer */
-
- prev_page_no = 0;
- prev_space_id = 0;
- sum_volumes = 0;
- volume_for_page = 0;
-
- while (*n_stored < limit) {
- if (page_rec_is_supremum(rec)) {
- /* When no more records available, mark this with
- another 'impossible' pair of space id, page no */
- rec_page_no = 1;
- rec_space_id = 0;
- } else {
- rec_page_no = ibuf_rec_get_page_no(rec);
- rec_space_id = ibuf_rec_get_space(rec);
- ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
-#endif
- if ((rec_space_id != prev_space_id
- || rec_page_no != prev_page_no)
- && (prev_space_id != 0 || prev_page_no != 0)) {
-
- if ((prev_page_no == first_page_no
- && prev_space_id == first_space_id)
- || contract
- || (volume_for_page
- > ((IBUF_MERGE_THRESHOLD - 1)
- * 4 * UNIV_PAGE_SIZE
- / IBUF_PAGE_SIZE_PER_FREE_SPACE)
- / IBUF_MERGE_THRESHOLD)) {
-
- space_ids[*n_stored] = prev_space_id;
- space_versions[*n_stored]
- = fil_space_get_version(prev_space_id);
- page_nos[*n_stored] = prev_page_no;
-
- (*n_stored)++;
-
- sum_volumes += volume_for_page;
- }
-
- if (rec_space_id != first_space_id
- || rec_page_no / IBUF_MERGE_AREA
- != first_page_no / IBUF_MERGE_AREA) {
-
- break;
- }
-
- volume_for_page = 0;
- }
-
- if (rec_page_no == 1 && rec_space_id == 0) {
- /* Supremum record */
-
- break;
- }
-
- rec_volume = ibuf_rec_get_volume(rec);
-
- volume_for_page += rec_volume;
-
- prev_page_no = rec_page_no;
- prev_space_id = rec_space_id;
-
- rec = page_rec_get_next(rec);
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
-#endif
-#if 0
- fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
- *n_stored, sum_volumes);
-#endif
- return(sum_volumes);
-}
-
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-static
-ulint
-ibuf_contract_ext(
-/*==============*/
- ulint* n_pages,/*!< out: number of pages to which merged */
- ibool sync) /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-{
- btr_pcur_t pcur;
- ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
- ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
- ulint n_stored;
- ulint sum_sizes;
- mtr_t mtr;
-
- *n_pages = 0;
- ut_ad(!ibuf_inside());
-
- mutex_enter(&ibuf_mutex);
-
- if (ibuf->empty) {
-ibuf_is_empty:
- mutex_exit(&ibuf_mutex);
-
- return(0);
- }
-
- mtr_start(&mtr);
-
- ibuf_enter();
-
- /* Open a cursor to a randomly chosen leaf of the tree, at a random
- position within the leaf */
-
- btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
-
- if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
- /* When the ibuf tree is emptied completely, the last record
- is removed using an optimistic delete and ibuf_size_update
- is not called, causing ibuf->empty to remain FALSE. If we do
- not reset it to TRUE here then database shutdown will hang
- in the loop in ibuf_contract_for_n_pages. */
-
- ibuf->empty = TRUE;
-
- ibuf_exit();
-
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- goto ibuf_is_empty;
- }
-
- mutex_exit(&ibuf_mutex);
-
- sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
- space_ids, space_versions,
- page_nos, &n_stored);
-#if 0 /* defined UNIV_IBUF_DEBUG */
- fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
- sync, n_stored, sum_sizes);
-#endif
- ibuf_exit();
-
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
- n_stored);
- *n_pages = n_stored;
-
- return(sum_sizes + 1);
-}
-
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
-ibuf_contract(
-/*==========*/
- ibool sync) /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-{
- ulint n_pages;
-
- return(ibuf_contract_ext(&n_pages, sync));
-}
-
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
-ibuf_contract_for_n_pages(
-/*======================*/
- ibool sync, /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
- ulint n_pages)/*!< in: try to read at least this many pages to
- the buffer pool and merge the ibuf contents to
- them */
-{
- ulint sum_bytes = 0;
- ulint sum_pages = 0;
- ulint n_bytes;
- ulint n_pag2;
-
- while (sum_pages < n_pages) {
- n_bytes = ibuf_contract_ext(&n_pag2, sync);
-
- if (n_bytes == 0) {
- return(sum_bytes);
- }
-
- sum_bytes += n_bytes;
- sum_pages += n_pag2;
- }
-
- return(sum_bytes);
-}
-
-/*********************************************************************//**
-Contract insert buffer trees after insert if they are too big. */
-UNIV_INLINE
-void
-ibuf_contract_after_insert(
-/*=======================*/
- ulint entry_size) /*!< in: size of a record which was inserted
- into an ibuf tree */
-{
- ibool sync;
- ulint sum_sizes;
- ulint size;
-
- mutex_enter(&ibuf_mutex);
-
- if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
- mutex_exit(&ibuf_mutex);
-
- return;
- }
-
- sync = FALSE;
-
- if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
-
- sync = TRUE;
- }
-
- mutex_exit(&ibuf_mutex);
-
- /* Contract at least entry_size many bytes */
- sum_sizes = 0;
- size = 1;
-
- while ((size > 0) && (sum_sizes < entry_size)) {
-
- size = ibuf_contract(sync);
- sum_sizes += size;
- }
-}
-
-/*********************************************************************//**
-Gets an upper limit for the combined size of entries buffered in the insert
-buffer for a given page.
-@return upper limit for the volume of buffered inserts for the index
-page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span
-several pages in the insert buffer */
-static
-ulint
-ibuf_get_volume_buffered(
-/*=====================*/
- btr_pcur_t* pcur, /*!< in: pcur positioned at a place in an
- insert buffer tree where we would insert an
- entry for the index page whose number is
- page_no, latch mode has to be BTR_MODIFY_PREV
- or BTR_MODIFY_TREE */
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: page number of an index page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint volume;
- rec_t* rec;
- page_t* page;
- ulint prev_page_no;
- page_t* prev_page;
- ulint next_page_no;
- page_t* next_page;
-
- ut_a(trx_sys_multiple_tablespace_format);
-
- ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
- || (pcur->latch_mode == BTR_MODIFY_TREE));
-
- /* Count the volume of records earlier in the alphabetical order than
- pcur */
-
- volume = 0;
-
- rec = btr_pcur_get_rec(pcur);
- page = page_align(rec);
-
- if (page_rec_is_supremum(rec)) {
- rec = page_rec_get_prev(rec);
- }
-
- for (;;) {
- if (page_rec_is_infimum(rec)) {
-
- break;
- }
-
- if (page_no != ibuf_rec_get_page_no(rec)
- || space != ibuf_rec_get_space(rec)) {
-
- goto count_later;
- }
-
- volume += ibuf_rec_get_volume(rec);
-
- rec = page_rec_get_prev(rec);
- }
-
- /* Look at the previous page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
-
- if (prev_page_no == FIL_NULL) {
-
- goto count_later;
- }
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, mtr);
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE);
-
-
- prev_page = buf_block_get_frame(block);
- }
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- rec = page_get_supremum_rec(prev_page);
- rec = page_rec_get_prev(rec);
-
- for (;;) {
- if (page_rec_is_infimum(rec)) {
-
- /* We cannot go to yet a previous page, because we
- do not have the x-latch on it, and cannot acquire one
- because of the latching order: we have to give up */
-
- return(UNIV_PAGE_SIZE);
- }
-
- if (page_no != ibuf_rec_get_page_no(rec)
- || space != ibuf_rec_get_space(rec)) {
-
- goto count_later;
- }
-
- volume += ibuf_rec_get_volume(rec);
-
- rec = page_rec_get_prev(rec);
- }
-
-count_later:
- rec = btr_pcur_get_rec(pcur);
-
- if (!page_rec_is_supremum(rec)) {
- rec = page_rec_get_next(rec);
- }
-
- for (;;) {
- if (page_rec_is_supremum(rec)) {
-
- break;
- }
-
- if (page_no != ibuf_rec_get_page_no(rec)
- || space != ibuf_rec_get_space(rec)) {
-
- return(volume);
- }
-
- volume += ibuf_rec_get_volume(rec);
-
- rec = page_rec_get_next(rec);
- }
-
- /* Look at the next page */
-
- next_page_no = btr_page_get_next(page, mtr);
-
- if (next_page_no == FIL_NULL) {
-
- return(volume);
- }
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, mtr);
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE);
-
-
- next_page = buf_block_get_frame(block);
- }
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- rec = page_get_infimum_rec(next_page);
- rec = page_rec_get_next(rec);
-
- for (;;) {
- if (page_rec_is_supremum(rec)) {
-
- /* We give up */
-
- return(UNIV_PAGE_SIZE);
- }
-
- if (page_no != ibuf_rec_get_page_no(rec)
- || space != ibuf_rec_get_space(rec)) {
-
- return(volume);
- }
-
- volume += ibuf_rec_get_volume(rec);
-
- rec = page_rec_get_next(rec);
- }
-}
-
-/*********************************************************************//**
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-UNIV_INTERN
-void
-ibuf_update_max_tablespace_id(void)
-/*===============================*/
-{
- ulint max_space_id;
- const rec_t* rec;
- const byte* field;
- ulint len;
- btr_pcur_t pcur;
- mtr_t mtr;
-
- ut_a(!dict_table_is_comp(ibuf->index->table));
-
- ibuf_enter();
-
- mtr_start(&mtr);
-
- btr_pcur_open_at_index_side(
- FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
-
- btr_pcur_move_to_prev(&pcur, &mtr);
-
- if (btr_pcur_is_before_first_on_page(&pcur)) {
- /* The tree is empty */
-
- max_space_id = 0;
- } else {
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- ut_a(len == 4);
-
- max_space_id = mach_read_from_4(field);
- }
-
- mtr_commit(&mtr);
- ibuf_exit();
-
- /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
-
- fil_set_max_space_id_if_bigger(max_space_id);
-}
-
-/*********************************************************************//**
-Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible.
-@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
-static
-ulint
-ibuf_insert_low(
-/*============*/
- ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
- const dtuple_t* entry, /*!< in: index entry to insert */
- ulint entry_size,
- /*!< in: rec_get_converted_size(index, entry) */
- dict_index_t* index, /*!< in: index where to insert; must not be
- unique or clustered */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr) /*!< in: query thread */
-{
- big_rec_t* dummy_big_rec;
- btr_pcur_t pcur;
- btr_cur_t* cursor;
- dtuple_t* ibuf_entry;
- mem_heap_t* heap;
- ulint buffered;
- rec_t* ins_rec;
- ibool old_bit_value;
- page_t* bitmap_page;
- page_t* root;
- ulint err;
- ibool do_merge;
- ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
- ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
- ulint n_stored;
- ulint bits;
- mtr_t mtr;
- mtr_t bitmap_mtr;
-
- ut_a(!dict_index_is_clust(index));
- ut_ad(dtuple_check_typed(entry));
- ut_ad(ut_is_2pow(zip_size));
-
- ut_a(trx_sys_multiple_tablespace_format);
-
- do_merge = FALSE;
-
- mutex_enter(&ibuf_mutex);
-
- if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
- /* Insert buffer is now too big, contract it but do not try
- to insert */
-
- mutex_exit(&ibuf_mutex);
-
-#ifdef UNIV_IBUF_DEBUG
- fputs("Ibuf too big\n", stderr);
-#endif
- /* Use synchronous contract (== TRUE) */
- ibuf_contract(TRUE);
-
- return(DB_STRONG_FAIL);
- }
-
- mutex_exit(&ibuf_mutex);
-
- if (mode == BTR_MODIFY_TREE) {
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- while (!ibuf_data_enough_free_for_insert()) {
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- err = ibuf_add_free_page();
-
- if (err == DB_STRONG_FAIL) {
-
- return(err);
- }
-
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
- }
- } else {
- ibuf_enter();
- }
-
- heap = mem_heap_create(512);
-
- /* Build the entry which contains the space id and the page number as
- the first fields and the type information for other fields, and which
- will be inserted to the insert buffer. */
-
- ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap);
-
- /* Open a cursor to the insert buffer tree to calculate if we can add
- the new entry to it without exceeding the free space limit for the
- page. */
-
- mtr_start(&mtr);
-
- btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
-
- /* Find out the volume of already buffered inserts for the same index
- page */
- buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((buffered == 0) || ibuf_count_get(space, page_no));
-#endif
- mtr_start(&bitmap_mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
- zip_size, &bitmap_mtr);
-
- /* We check if the index page is suitable for buffered entries */
-
- if (buf_page_peek(space, page_no)
- || lock_rec_expl_exist_on_page(space, page_no)) {
- err = DB_STRONG_FAIL;
-
- mtr_commit(&bitmap_mtr);
-
- goto function_exit;
- }
-
- bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, &bitmap_mtr);
-
- if (buffered + entry_size + page_dir_calc_reserved_space(1)
- > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
- mtr_commit(&bitmap_mtr);
-
- /* It may not fit */
- err = DB_STRONG_FAIL;
-
- do_merge = TRUE;
-
- ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
- space_ids, space_versions,
- page_nos, &n_stored);
- goto function_exit;
- }
-
- /* Set the bitmap bit denoting that the insert buffer contains
- buffered entries for this index page, if the bit is not set yet */
-
- old_bit_value = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_BUFFERED, &bitmap_mtr);
-
- if (!old_bit_value) {
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_BUFFERED, TRUE,
- &bitmap_mtr);
- }
-
- mtr_commit(&bitmap_mtr);
-
- cursor = btr_pcur_get_btr_cur(&pcur);
-
- if (mode == BTR_MODIFY_PREV) {
- err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
- if (err == DB_SUCCESS) {
- /* Update the page max trx id field */
- page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
- thr_get_trx(thr)->id, &mtr);
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- /* We acquire an x-latch to the root page before the insert,
- because a pessimistic insert releases the tree x-latch,
- which would cause the x-latching of the root after that to
- break the latching order. */
-
- root = ibuf_tree_root_get(&mtr);
-
- err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- cursor,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
- if (err == DB_SUCCESS) {
- /* Update the page max trx id field */
- page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
- thr_get_trx(thr)->id, &mtr);
- }
-
- ibuf_size_update(root, &mtr);
- }
-
-function_exit:
-#ifdef UNIV_IBUF_COUNT_DEBUG
- if (err == DB_SUCCESS) {
- fprintf(stderr,
- "Incrementing ibuf count of space %lu page %lu\n"
- "from %lu by 1\n", space, page_no,
- ibuf_count_get(space, page_no));
-
- ibuf_count_set(space, page_no,
- ibuf_count_get(space, page_no) + 1);
- }
-#endif
- if (mode == BTR_MODIFY_TREE) {
-
- mutex_exit(&ibuf_mutex);
- mutex_exit(&ibuf_pessimistic_insert_mutex);
- }
-
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
- ibuf_exit();
-
- mem_heap_free(heap);
-
- if (err == DB_SUCCESS) {
- mutex_enter(&ibuf_mutex);
-
- ibuf->empty = FALSE;
- ibuf->n_inserts++;
-
- mutex_exit(&ibuf_mutex);
-
- if (mode == BTR_MODIFY_TREE) {
- ibuf_contract_after_insert(entry_size);
- }
- }
-
- if (do_merge) {
-#ifdef UNIV_IBUF_DEBUG
- ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
-#endif
- buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
- page_nos, n_stored);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. Does not do insert if the index is clustered
-or unique.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-ibuf_insert(
-/*========*/
- const dtuple_t* entry, /*!< in: index entry to insert */
- dict_index_t* index, /*!< in: index where to insert */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
- ulint entry_size;
-
- ut_a(trx_sys_multiple_tablespace_format);
- ut_ad(dtuple_check_typed(entry));
- ut_ad(ut_is_2pow(zip_size));
-
- ut_a(!dict_index_is_clust(index));
-
- switch (UNIV_EXPECT(ibuf_use, IBUF_USE_INSERT)) {
- case IBUF_USE_NONE:
- return(FALSE);
- case IBUF_USE_INSERT:
- goto do_insert;
- case IBUF_USE_COUNT:
- break;
- }
-
- ut_error; /* unknown value of ibuf_use */
-
-do_insert:
- entry_size = rec_get_converted_size(index, entry, 0);
-
- if (entry_size
- >= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
- / 2)) {
- return(FALSE);
- }
-
- err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
- index, space, zip_size, page_no, thr);
- if (err == DB_FAIL) {
- err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
- index, space, zip_size, page_no, thr);
- }
-
- if (err == DB_SUCCESS) {
-#ifdef UNIV_IBUF_DEBUG
- /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
- page_no, index->name); */
-#endif
- return(TRUE);
-
- } else {
- ut_a(err == DB_STRONG_FAIL);
-
- return(FALSE);
- }
-}
-
-/********************************************************************//**
-During merge, inserts to an index page a secondary index entry extracted
-from the insert buffer. */
-static
-void
-ibuf_insert_to_index_page(
-/*======================*/
- dtuple_t* entry, /*!< in: buffered entry to insert */
- buf_block_t* block, /*!< in/out: index page where the buffered entry
- should be placed */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t page_cur;
- ulint low_match;
- page_t* page = buf_block_get_frame(block);
- rec_t* rec;
- page_t* bitmap_page;
- ulint old_bits;
-
- ut_ad(ibuf_inside());
- ut_ad(dtuple_check_typed(entry));
-
- if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
- != (ibool)!!page_is_comp(page))) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the 'compact' flag does not match!\n",
- stderr);
- goto dump;
- }
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
-
- if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
- != dtuple_get_n_fields(entry))) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the number of fields does not match!\n",
- stderr);
-dump:
- buf_page_print(page, 0);
-
- dtuple_print(stderr, entry);
-
- fputs("InnoDB: The table where where"
- " this index record belongs\n"
- "InnoDB: is now probably corrupt."
- " Please run CHECK TABLE on\n"
- "InnoDB: your tables.\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com!\n", stderr);
-
- return;
- }
-
- low_match = page_cur_search(block, index, entry,
- PAGE_CUR_LE, &page_cur);
-
- if (low_match == dtuple_get_n_fields(entry)) {
- page_zip_des_t* page_zip;
-
- rec = page_cur_get_rec(&page_cur);
- page_zip = buf_block_get_page_zip(block);
-
- btr_cur_del_unmark_for_ibuf(rec, page_zip, mtr);
- } else {
- rec = page_cur_tuple_insert(&page_cur, entry, index, 0, mtr);
-
- if (UNIV_LIKELY(rec != NULL)) {
- return;
- }
-
- /* If the record did not fit, reorganize */
-
- btr_page_reorganize(block, index, mtr);
- page_cur_search(block, index, entry, PAGE_CUR_LE, &page_cur);
-
- /* This time the record must fit */
- if (UNIV_UNLIKELY
- (!page_cur_tuple_insert(&page_cur, entry, index,
- 0, mtr))) {
- ulint space;
- ulint page_no;
- ulint zip_size;
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Insert buffer insert"
- " fails; page free %lu,"
- " dtuple size %lu\n",
- (ulong) page_get_max_insert_size(
- page, 1),
- (ulong) rec_get_converted_size(
- index, entry, 0));
- fputs("InnoDB: Cannot insert index record ",
- stderr);
- dtuple_print(stderr, entry);
- fputs("\nInnoDB: The table where"
- " this index record belongs\n"
- "InnoDB: is now probably corrupt."
- " Please run CHECK TABLE on\n"
- "InnoDB: that table.\n", stderr);
-
- space = page_get_space_id(page);
- zip_size = buf_block_get_zip_size(block);
- page_no = page_get_page_no(page);
-
- bitmap_page = ibuf_bitmap_get_map_page(
- space, page_no, zip_size, mtr);
- old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, mtr);
-
- fprintf(stderr,
- "InnoDB: space %lu, page %lu,"
- " zip_size %lu, bitmap bits %lu\n",
- (ulong) space, (ulong) page_no,
- (ulong) zip_size, (ulong) old_bits);
-
- fputs("InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- }
- }
-}
-
-/*********************************************************************//**
-Deletes from ibuf the record on which pcur is positioned. If we have to
-resort to a pessimistic delete, this function commits mtr and closes
-the cursor.
-@return TRUE if mtr was committed and pcur closed in this operation */
-static
-ibool
-ibuf_delete_rec(
-/*============*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number where the record
- should belong */
- btr_pcur_t* pcur, /*!< in: pcur positioned on the record to
- delete, having latch mode BTR_MODIFY_LEAF */
- const dtuple_t* search_tuple,
- /*!< in: search tuple for entries of page_no */
- mtr_t* mtr) /*!< in: mtr */
-{
- ibool success;
- page_t* root;
- ulint err;
-
- ut_ad(ibuf_inside());
- ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
- ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
- ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
-
- success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
-
- if (success) {
-#ifdef UNIV_IBUF_COUNT_DEBUG
- fprintf(stderr,
- "Decrementing ibuf count of space %lu page %lu\n"
- "from %lu by 1\n", space, page_no,
- ibuf_count_get(space, page_no));
- ibuf_count_set(space, page_no,
- ibuf_count_get(space, page_no) - 1);
-#endif
- return(FALSE);
- }
-
- ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
- ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
- ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
-
- /* We have to resort to a pessimistic delete from ibuf */
- btr_pcur_store_position(pcur, mtr);
-
- btr_pcur_commit_specify_mtr(pcur, mtr);
-
- mutex_enter(&ibuf_mutex);
-
- mtr_start(mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
-
- if (!success) {
- if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
- /* The tablespace has been dropped. It is possible
- that another thread has deleted the insert buffer
- entry. Do not complain. */
- goto commit_and_exit;
- }
-
- fprintf(stderr,
- "InnoDB: ERROR: Submit the output to"
- " http://bugs.mysql.com\n"
- "InnoDB: ibuf cursor restoration fails!\n"
- "InnoDB: ibuf record inserted to page %lu\n",
- (ulong) page_no);
- fflush(stderr);
-
- rec_print_old(stderr, btr_pcur_get_rec(pcur));
- rec_print_old(stderr, pcur->old_rec);
- dtuple_print(stderr, search_tuple);
-
- rec_print_old(stderr,
- page_rec_get_next(btr_pcur_get_rec(pcur)));
- fflush(stderr);
-
- btr_pcur_commit_specify_mtr(pcur, mtr);
-
- fputs("InnoDB: Validating insert buffer tree:\n", stderr);
- if (!btr_validate_index(ibuf->index, NULL)) {
- ut_error;
- }
-
- fprintf(stderr, "InnoDB: ibuf tree ok\n");
- fflush(stderr);
-
- goto func_exit;
- }
-
- root = ibuf_tree_root_get(mtr);
-
- btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
- RB_NONE, mtr);
- ut_a(err == DB_SUCCESS);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
-#endif
- ibuf_size_update(root, mtr);
-
-commit_and_exit:
- btr_pcur_commit_specify_mtr(pcur, mtr);
-
-func_exit:
- btr_pcur_close(pcur);
-
- mutex_exit(&ibuf_mutex);
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
-inserts to the page the possible index entries buffered in the insert buffer.
-The entries are deleted from the insert buffer. If the page is not read, but
-created in the buffer pool, this function deletes its buffered entries from
-the insert buffer; there can exist entries for such a page if the page
-belonged to an index which subsequently was dropped. */
-UNIV_INTERN
-void
-ibuf_merge_or_delete_for_page(
-/*==========================*/
- buf_block_t* block, /*!< in: if page has been read from
- disk, pointer to the page x-latched,
- else NULL */
- ulint space, /*!< in: space id of the index page */
- ulint page_no,/*!< in: page number of the index page */
- ulint zip_size,/*!< in: compressed page size in bytes,
- or 0 */
- ibool update_ibuf_bitmap)/*!< in: normally this is set
- to TRUE, but if we have deleted or are
- deleting the tablespace, then we
- naturally do not want to update a
- non-existent bitmap page */
-{
- mem_heap_t* heap;
- btr_pcur_t pcur;
- dtuple_t* search_tuple;
- ulint n_inserts;
-#ifdef UNIV_IBUF_DEBUG
- ulint volume;
-#endif
- page_zip_des_t* page_zip = NULL;
- ibool tablespace_being_deleted = FALSE;
- ibool corruption_noticed = FALSE;
- mtr_t mtr;
-
- ut_ad(!block || buf_block_get_space(block) == space);
- ut_ad(!block || buf_block_get_page_no(block) == page_no);
- ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
- || trx_sys_hdr_page(space, page_no)) {
- return;
- }
-
- /* We cannot refer to zip_size in the following, because
- zip_size is passed as ULINT_UNDEFINED (it is unknown) when
- buf_read_ibuf_merge_pages() is merging (discarding) changes
- for a dropped tablespace. When block != NULL or
- update_ibuf_bitmap is specified, the zip_size must be known.
- That is why we will repeat the check below, with zip_size in
- place of 0. Passing zip_size as 0 assumes that the
- uncompressed page size always is a power-of-2 multiple of the
- compressed page size. */
-
- if (ibuf_fixed_addr_page(space, 0, page_no)
- || fsp_descr_page(0, page_no)) {
- return;
- }
-
- if (UNIV_LIKELY(update_ibuf_bitmap)) {
- ut_a(ut_is_2pow(zip_size));
-
- if (ibuf_fixed_addr_page(space, zip_size, page_no)
- || fsp_descr_page(zip_size, page_no)) {
- return;
- }
-
- /* If the following returns FALSE, we get the counter
- incremented, and must decrement it when we leave this
- function. When the counter is > 0, that prevents tablespace
- from being dropped. */
-
- tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
-
- if (UNIV_UNLIKELY(tablespace_being_deleted)) {
- /* Do not try to read the bitmap page from space;
- just delete the ibuf records for the page */
-
- block = NULL;
- update_ibuf_bitmap = FALSE;
- } else {
- page_t* bitmap_page;
-
- mtr_start(&mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(
- space, page_no, zip_size, &mtr);
-
- if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
- zip_size,
- IBUF_BITMAP_BUFFERED,
- &mtr)) {
- /* No inserts buffered for this page */
- mtr_commit(&mtr);
-
- if (!tablespace_being_deleted) {
- fil_decr_pending_ibuf_merges(space);
- }
-
- return;
- }
- mtr_commit(&mtr);
- }
- } else if (block
- && (ibuf_fixed_addr_page(space, zip_size, page_no)
- || fsp_descr_page(zip_size, page_no))) {
-
- return;
- }
-
- ibuf_enter();
-
- heap = mem_heap_create(512);
-
- if (!trx_sys_multiple_tablespace_format) {
- ut_a(trx_doublewrite_must_reset_space_ids);
- search_tuple = ibuf_search_tuple_build(space, page_no, heap);
- } else {
- search_tuple = ibuf_new_search_tuple_build(space, page_no,
- heap);
- }
-
- if (block) {
- /* Move the ownership of the x-latch on the page to this OS
- thread, so that we can acquire a second x-latch on it. This
- is needed for the insert operations to the index page to pass
- the debug checks. */
-
- rw_lock_x_lock_move_ownership(&(block->lock));
- page_zip = buf_block_get_page_zip(block);
-
- if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
- != FIL_PAGE_INDEX)
- || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
-
- page_t* bitmap_page;
-
- corruption_noticed = TRUE;
-
- ut_print_timestamp(stderr);
-
- mtr_start(&mtr);
-
- fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
- stderr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
- zip_size, &mtr);
- buf_page_print(bitmap_page, 0);
-
- mtr_commit(&mtr);
-
- fputs("\nInnoDB: Dump of the page:\n", stderr);
-
- buf_page_print(block->frame, 0);
-
- fprintf(stderr,
- "InnoDB: Error: corruption in the tablespace."
- " Bitmap shows insert\n"
- "InnoDB: buffer records to page n:o %lu"
- " though the page\n"
- "InnoDB: type is %lu, which is"
- " not an index leaf page!\n"
- "InnoDB: We try to resolve the problem"
- " by skipping the insert buffer\n"
- "InnoDB: merge for this page."
- " Please run CHECK TABLE on your tables\n"
- "InnoDB: to determine if they are corrupt"
- " after this.\n\n"
- "InnoDB: Please submit a detailed bug report"
- " to http://bugs.mysql.com\n\n",
- (ulong) page_no,
- (ulong)
- fil_page_get_type(block->frame));
- }
- }
-
- n_inserts = 0;
-#ifdef UNIV_IBUF_DEBUG
- volume = 0;
-#endif
-loop:
- mtr_start(&mtr);
-
- if (block) {
- ibool success;
-
- success = buf_page_get_known_nowait(
- RW_X_LATCH, block,
- BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
-
- ut_a(success);
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE);
- }
-
- /* Position pcur in the insert buffer at the first entry for this
- index page */
- btr_pcur_open_on_user_rec(
- ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
- &pcur, &mtr);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
- goto reset_bit;
- }
-
- for (;;) {
- rec_t* rec;
-
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
-
- rec = btr_pcur_get_rec(&pcur);
-
- /* Check if the entry is for this index page */
- if (ibuf_rec_get_page_no(rec) != page_no
- || ibuf_rec_get_space(rec) != space) {
-
- if (block) {
- page_header_reset_last_insert(
- block->frame, page_zip, &mtr);
- }
-
- goto reset_bit;
- }
-
- if (UNIV_UNLIKELY(corruption_noticed)) {
- fputs("InnoDB: Discarding record\n ", stderr);
- rec_print_old(stderr, rec);
- fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
- } else if (block) {
- /* Now we have at pcur a record which should be
- inserted to the index page; NOTE that the call below
- copies pointers to fields in rec, and we must
- keep the latch to the rec page until the
- insertion is finished! */
- dtuple_t* entry;
- trx_id_t max_trx_id;
- dict_index_t* dummy_index;
-
- max_trx_id = page_get_max_trx_id(page_align(rec));
- page_update_max_trx_id(block, page_zip, max_trx_id,
- &mtr);
-
- entry = ibuf_build_entry_from_ibuf_rec(
- rec, heap, &dummy_index);
-#ifdef UNIV_IBUF_DEBUG
- volume += rec_get_converted_size(dummy_index, entry, 0)
- + page_dir_calc_reserved_space(1);
- ut_a(volume <= 4 * UNIV_PAGE_SIZE
- / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-#endif
- ibuf_insert_to_index_page(entry, block,
- dummy_index, &mtr);
- ibuf_dummy_index_free(dummy_index);
- }
-
- n_inserts++;
-
- /* Delete the record from ibuf */
- if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
- &mtr)) {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
-
- goto loop;
- } else if (btr_pcur_is_after_last_on_page(&pcur)) {
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- goto loop;
- }
- }
-
-reset_bit:
-#ifdef UNIV_IBUF_COUNT_DEBUG
- if (ibuf_count_get(space, page_no) > 0) {
- /* btr_print_tree(ibuf_data->index->tree, 100);
- ibuf_print(); */
- }
-#endif
- if (UNIV_LIKELY(update_ibuf_bitmap)) {
- page_t* bitmap_page;
-
- bitmap_page = ibuf_bitmap_get_map_page(
- space, page_no, zip_size, &mtr);
-
- ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_BUFFERED, FALSE, &mtr);
-
- if (block) {
- ulint old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, &mtr);
-
- ulint new_bits = ibuf_index_page_calc_free(
- zip_size, block);
-
- if (old_bits != new_bits) {
- ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, new_bits, &mtr);
- }
- }
- }
-
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
- mem_heap_free(heap);
-
- /* Protect our statistics keeping from race conditions */
- mutex_enter(&ibuf_mutex);
-
- ibuf->n_merges++;
- ibuf->n_merged_recs += n_inserts;
-
- mutex_exit(&ibuf_mutex);
-
- if (update_ibuf_bitmap && !tablespace_being_deleted) {
-
- fil_decr_pending_ibuf_merges(space);
- }
-
- ibuf_exit();
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(space, page_no) == 0);
-#endif
-}
-
-/*********************************************************************//**
-Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
-NOTE: this does not update the page free bitmaps in the space. The space will
-become CORRUPT when you call this function! */
-UNIV_INTERN
-void
-ibuf_delete_for_discarded_space(
-/*============================*/
- ulint space) /*!< in: space id */
-{
- mem_heap_t* heap;
- btr_pcur_t pcur;
- dtuple_t* search_tuple;
- rec_t* ibuf_rec;
- ulint page_no;
- ibool closed;
- ulint n_inserts;
- mtr_t mtr;
-
- heap = mem_heap_create(512);
-
- /* Use page number 0 to build the search tuple so that we get the
- cursor positioned at the first entry for this space id */
-
- search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
-
- n_inserts = 0;
-loop:
- ibuf_enter();
-
- mtr_start(&mtr);
-
- /* Position pcur in the insert buffer at the first entry for the
- space */
- btr_pcur_open_on_user_rec(
- ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
- &pcur, &mtr);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
- goto leave_loop;
- }
-
- for (;;) {
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
-
- ibuf_rec = btr_pcur_get_rec(&pcur);
-
- /* Check if the entry is for this space */
- if (ibuf_rec_get_space(ibuf_rec) != space) {
-
- goto leave_loop;
- }
-
- page_no = ibuf_rec_get_page_no(ibuf_rec);
-
- n_inserts++;
-
- /* Delete the record from ibuf */
- closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
- &mtr);
- if (closed) {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
-
- ibuf_exit();
-
- goto loop;
- }
-
- if (btr_pcur_is_after_last_on_page(&pcur)) {
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- ibuf_exit();
-
- goto loop;
- }
- }
-
-leave_loop:
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- /* Protect our statistics keeping from race conditions */
- mutex_enter(&ibuf_mutex);
-
- ibuf->n_merges++;
- ibuf->n_merged_recs += n_inserts;
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- mem_heap_free(heap);
-}
-
-/******************************************************************//**
-Looks if the insert buffer is empty.
-@return TRUE if empty */
-UNIV_INTERN
-ibool
-ibuf_is_empty(void)
-/*===============*/
-{
- ibool is_empty;
- const page_t* root;
- mtr_t mtr;
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- mtr_start(&mtr);
-
- root = ibuf_tree_root_get(&mtr);
-
- if (page_get_n_recs(root) == 0) {
-
- is_empty = TRUE;
-
- if (ibuf->empty == FALSE) {
- fprintf(stderr,
- "InnoDB: Warning: insert buffer tree is empty"
- " but the data struct does not\n"
- "InnoDB: know it. This condition is legal"
- " if the master thread has not yet\n"
- "InnoDB: run to completion.\n");
- }
- } else {
- ut_a(ibuf->empty == FALSE);
-
- is_empty = FALSE;
- }
-
- mtr_commit(&mtr);
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- return(is_empty);
-}
-
-/******************************************************************//**
-Prints info of ibuf. */
-UNIV_INTERN
-void
-ibuf_print(
-/*=======*/
- FILE* file) /*!< in: file where to print */
-{
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ulint i;
- ulint j;
-#endif
-
- mutex_enter(&ibuf_mutex);
-
- fprintf(file,
- "Ibuf: size %lu, free list len %lu, seg size %lu,\n"
- "%lu inserts, %lu merged recs, %lu merges\n",
- (ulong) ibuf->size,
- (ulong) ibuf->free_list_len,
- (ulong) ibuf->seg_size,
- (ulong) ibuf->n_inserts,
- (ulong) ibuf->n_merged_recs,
- (ulong) ibuf->n_merges);
-#ifdef UNIV_IBUF_COUNT_DEBUG
- for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
- for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
- ulint count = ibuf_count_get(i, j);
-
- if (count > 0) {
- fprintf(stderr,
- "Ibuf count for space/page %lu/%lu"
- " is %lu\n",
- (ulong) i, (ulong) j, (ulong) count);
- }
- }
- }
-#endif /* UNIV_IBUF_COUNT_DEBUG */
-
- mutex_exit(&ibuf_mutex);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/btr0btr.h b/storage/innodb_plugin/include/btr0btr.h
deleted file mode 100644
index d5c8258513c..00000000000
--- a/storage/innodb_plugin/include/btr0btr.h
+++ /dev/null
@@ -1,509 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0btr.h
-The B-tree
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0btr_h
-#define btr0btr_h
-
-#include "univ.i"
-
-#include "dict0dict.h"
-#include "data0data.h"
-#include "page0cur.h"
-#include "mtr0mtr.h"
-#include "btr0types.h"
-
-#ifndef UNIV_HOTBACKUP
-/** Maximum record size which can be stored on a page, without using the
-special big record storage structure */
-#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
-
-/** @brief Maximum depth of a B-tree in InnoDB.
-
-Note that this isn't a maximum as such; none of the tree operations
-avoid producing trees bigger than this. It is instead a "max depth
-that other code must work with", useful for e.g. fixed-size arrays
-that must store some information about each level in a tree. In other
-words: if a B-tree with bigger depth than this is encountered, it is
-not acceptable for it to lead to mysterious memory corruption, but it
-is acceptable for the program to die with a clear assert failure. */
-#define BTR_MAX_LEVELS 100
-
-/** Latching modes for btr_cur_search_to_nth_level(). */
-enum btr_latch_mode {
- /** Search a record on a leaf page and S-latch it. */
- BTR_SEARCH_LEAF = RW_S_LATCH,
- /** (Prepare to) modify a record on a leaf page and X-latch it. */
- BTR_MODIFY_LEAF = RW_X_LATCH,
- /** Obtain no latches. */
- BTR_NO_LATCHES = RW_NO_LATCH,
- /** Start modifying the entire B-tree. */
- BTR_MODIFY_TREE = 33,
- /** Continue modifying the entire B-tree. */
- BTR_CONT_MODIFY_TREE = 34,
- /** Search the previous record. */
- BTR_SEARCH_PREV = 35,
- /** Modify the previous record. */
- BTR_MODIFY_PREV = 36
-};
-
-/** If this is ORed to btr_latch_mode, it means that the search tuple
-will be inserted to the index, at the searched position */
-#define BTR_INSERT 512
-
-/** This flag ORed to btr_latch_mode says that we do the search in query
-optimization */
-#define BTR_ESTIMATE 1024
-
-/** This flag ORed to btr_latch_mode says that we can ignore possible
-UNIQUE definition on secondary indexes when we decide if we can use
-the insert buffer to speed up inserts */
-#define BTR_IGNORE_SEC_UNIQUE 2048
-
-/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
-UNIV_INTERN
-page_t*
-btr_root_get(
-/*=========*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-buf_block_t*
-btr_block_get(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-/**************************************************************//**
-Gets the index id field of a page.
-@return index id */
-UNIV_INLINE
-dulint
-btr_page_get_index_id(
-/*==================*/
- const page_t* page); /*!< in: index page */
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
- const page_t* page); /*!< in: index page */
-/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Gets the next index page number.
-@return next page number */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Gets the previous index page number.
-@return prev page number */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
- needed, also to the previous page */
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
- needed, also to the next page */
-/**************************************************************//**
-Releases the latch on a leaf page and bufferunfixes it. */
-UNIV_INLINE
-void
-btr_leaf_page_release(
-/*==================*/
- buf_block_t* block, /*!< in: buffer block */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Gets the child node file address in a node pointer.
-@return child node address */
-UNIV_INLINE
-ulint
-btr_node_ptr_get_child_page_no(
-/*===========================*/
- const rec_t* rec, /*!< in: node pointer record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/************************************************************//**
-Creates the root node for a new index tree.
-@return page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
-ulint
-btr_create(
-/*=======*/
- ulint type, /*!< in: type of the index */
- ulint space, /*!< in: space where created */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- dulint index_id,/*!< in: index id */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
-void
-btr_free_but_not_root(
-/*==================*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no); /*!< in: root page number */
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
-void
-btr_free_root(
-/*==========*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no, /*!< in: root page number */
- mtr_t* mtr); /*!< in: a mini-transaction which has already
- been started */
-/*************************************************************//**
-Makes tree one level higher by splitting the root, and inserts
-the tuple. It is assumed that mtr contains an x-latch on the tree.
-NOTE that the operation of this function must always succeed,
-we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called.
-@return inserted record */
-UNIV_INTERN
-rec_t*
-btr_root_raise_and_insert(
-/*======================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
- on the root page; when the function returns,
- the cursor is positioned on the predecessor
- of the inserted record */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Reorganizes an index page.
-IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
-page of a non-clustered index, the caller must update the insert
-buffer free bits in the same mini-transaction in such a way that the
-modification will be redo-logged.
-@return TRUE on success, FALSE on failure */
-UNIV_INTERN
-ibool
-btr_page_reorganize(
-/*================*/
- buf_block_t* block, /*!< in: page to be reorganized */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Decides if the page should be split at the convergence point of
-inserts converging to left.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec);/*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
-/*************************************************************//**
-Decides if the page should be split at the convergence point of
-inserts converging to right.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec);/*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
-/*************************************************************//**
-Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
-released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore enough
-free disk space (2 pages) must be guaranteed to be available before
-this function is called.
-
-@return inserted record */
-UNIV_INTERN
-rec_t*
-btr_page_split_and_insert(
-/*======================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
- function returns, the cursor is positioned
- on the predecessor of the inserted record */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mtr */
-/*******************************************************//**
-Inserts a data tuple to a tree on a non-leaf level. It is assumed
-that mtr holds an x-latch on the tree. */
-UNIV_INTERN
-void
-btr_insert_on_non_leaf_level(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: level, must be > 0 */
- dtuple_t* tuple, /*!< in: the record to be inserted */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-/****************************************************************//**
-Sets a record as the predefined minimum record. */
-UNIV_INTERN
-void
-btr_set_min_rec_mark(
-/*=================*/
- rec_t* rec, /*!< in/out: record */
- mtr_t* mtr); /*!< in: mtr */
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr); /*!< in: mtr */
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Checks that the node pointer to a page is appropriate.
-@return TRUE */
-UNIV_INTERN
-ibool
-btr_check_node_ptr(
-/*===============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: index page */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-Tries to merge the page first to the left immediate brother if such a
-brother exists, and the node pointers to the current page and to the
-brother reside on the same page. If the left brother does not satisfy these
-conditions, looks at the right brother. If the page is the only one on that
-level lifts the records of the page to the father page, thus reducing the
-tree height. It is assumed that mtr holds an x-latch on the tree and on the
-page. If cursor is on the leaf level, mtr must also hold x-latches to
-the brothers, if they exist.
-@return TRUE on success */
-UNIV_INTERN
-ibool
-btr_compress(
-/*=========*/
- btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Discards a page from a B-tree. This is used to remove the last record from
-a B-tree page: the whole page must be removed at the same time. This cannot
-be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
-void
-btr_discard_page(
-/*=============*/
- btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
- the root page */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-/****************************************************************//**
-Parses the redo log record for setting an index record as the predefined
-minimum record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_parse_set_min_rec_mark(
-/*=======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses a redo log record of reorganizing a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_parse_page_reorganize(
-/*======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- dict_index_t* index, /*!< in: record descriptor */
- buf_block_t* block, /*!< in: page to be reorganized, or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-#ifndef UNIV_HOTBACKUP
-/**************************************************************//**
-Gets the number of pages in a B-tree.
-@return number of pages */
-UNIV_INTERN
-ulint
-btr_get_size(
-/*=========*/
- dict_index_t* index, /*!< in: index */
- ulint flag); /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
-/**************************************************************//**
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents!
-@return new allocated block, x-latched; NULL if out of space */
-UNIV_INTERN
-buf_block_t*
-btr_page_alloc(
-/*===========*/
- dict_index_t* index, /*!< in: index tree */
- ulint hint_page_no, /*!< in: hint of a good page */
- byte file_direction, /*!< in: direction where a possible
- page split is made */
- ulint level, /*!< in: level where the page is placed
- in the tree */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-UNIV_INTERN
-void
-btr_page_free(
-/*==========*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Frees a file page used in an index tree. Can be used also to BLOB
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
-void
-btr_page_free_low(
-/*==============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- ulint level, /*!< in: page level */
- mtr_t* mtr); /*!< in: mtr */
-#ifdef UNIV_BTR_PRINT
-/*************************************************************//**
-Prints size info of a B-tree. */
-UNIV_INTERN
-void
-btr_print_size(
-/*===========*/
- dict_index_t* index); /*!< in: index tree */
-/**************************************************************//**
-Prints directories and other info of all nodes in the index. */
-UNIV_INTERN
-void
-btr_print_index(
-/*============*/
- dict_index_t* index, /*!< in: index */
- ulint width); /*!< in: print this many entries from start
- and end */
-#endif /* UNIV_BTR_PRINT */
-/************************************************************//**
-Checks the size and number of fields in a record based on the definition of
-the index.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_index_rec_validate(
-/*===================*/
- const rec_t* rec, /*!< in: index record */
- const dict_index_t* index, /*!< in: index */
- ibool dump_on_error); /*!< in: TRUE if the function
- should print hex dump of record
- and page on error */
-/**************************************************************//**
-Checks the consistency of an index tree.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_validate_index(
-/*===============*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx); /*!< in: transaction or NULL */
-
-#define BTR_N_LEAF_PAGES 1
-#define BTR_TOTAL_SIZE 2
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/btr0btr.ic b/storage/innodb_plugin/include/btr0btr.ic
deleted file mode 100644
index 2259d22c9a6..00000000000
--- a/storage/innodb_plugin/include/btr0btr.ic
+++ /dev/null
@@ -1,310 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0btr.ic
-The B-tree
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "page0zip.h"
-
-#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
- (not really a hard limit).
- Used in debug assertions
- in btr_page_set_level and
- btr_page_get_level_low */
-
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-buf_block_t*
-btr_block_get(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
-
- block = buf_page_get(space, zip_size, page_no, mode, mtr);
-
- if (mode != RW_NO_LATCH) {
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE);
- }
-
- return(block);
-}
-
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- mtr_t* mtr) /*!< in: mtr */
-{
- return(buf_block_get_frame(btr_block_get(space, zip_size, page_no,
- mode, mtr)));
-}
-
-/**************************************************************//**
-Sets the index id field of a page. */
-UNIV_INLINE
-void
-btr_page_set_index_id(
-/*==================*/
- page_t* page, /*!< in: page to be created */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- dulint id, /*!< in: index id */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
- page_zip_write_header(page_zip,
- page + (PAGE_HEADER + PAGE_INDEX_ID),
- 8, mtr);
- } else {
- mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID),
- id, mtr);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**************************************************************//**
-Gets the index id field of a page.
-@return index id */
-UNIV_INLINE
-dulint
-btr_page_get_index_id(
-/*==================*/
- const page_t* page) /*!< in: index page */
-{
- return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
- const page_t* page) /*!< in: index page */
-{
- ulint level;
-
- ut_ad(page);
-
- level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
-
- ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
- return(level);
-}
-
-/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr __attribute__((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- return(btr_page_get_level_low(page));
-}
-
-/********************************************************//**
-Sets the node level field in an index page. */
-UNIV_INLINE
-void
-btr_page_set_level(
-/*===============*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- ulint level, /*!< in: level, leaf level == 0 */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
- ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
- page_zip_write_header(page_zip,
- page + (PAGE_HEADER + PAGE_LEVEL),
- 2, mtr);
- } else {
- mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
- MLOG_2BYTES, mtr);
- }
-}
-
-/********************************************************//**
-Gets the next index page number.
-@return next page number */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr __attribute__((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
-
- return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/********************************************************//**
-Sets the next index page field. */
-UNIV_INLINE
-void
-btr_page_set_next(
-/*==============*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- ulint next, /*!< in: next page number */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_4(page + FIL_PAGE_NEXT, next);
- page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
- } else {
- mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
- }
-}
-
-/********************************************************//**
-Gets the previous index page number.
-@return prev page number */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr __attribute__((unused))) /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-/********************************************************//**
-Sets the previous index page field. */
-UNIV_INLINE
-void
-btr_page_set_prev(
-/*==============*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- ulint prev, /*!< in: previous page number */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_4(page + FIL_PAGE_PREV, prev);
- page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
- } else {
- mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
- }
-}
-
-/**************************************************************//**
-Gets the child node file address in a node pointer.
-@return child node address */
-UNIV_INLINE
-ulint
-btr_node_ptr_get_child_page_no(
-/*===========================*/
- const rec_t* rec, /*!< in: node pointer record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- const byte* field;
- ulint len;
- ulint page_no;
-
- ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
-
- /* The child address is in the last field */
- field = rec_get_nth_field(rec, offsets,
- rec_offs_n_fields(offsets) - 1, &len);
-
- ut_ad(len == 4);
-
- page_no = mach_read_from_4(field);
-
- if (UNIV_UNLIKELY(page_no == 0)) {
- fprintf(stderr,
- "InnoDB: a nonsensical page number 0"
- " in a node ptr record at offset %lu\n",
- (ulong) page_offset(rec));
- buf_page_print(page_align(rec), 0);
- }
-
- return(page_no);
-}
-
-/**************************************************************//**
-Releases the latches on a leaf page and bufferunfixes it. */
-UNIV_INLINE
-void
-btr_leaf_page_release(
-/*==================*/
- buf_block_t* block, /*!< in: buffer block */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
- ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
-
- mtr_memo_release(mtr, block,
- latch_mode == BTR_SEARCH_LEAF
- ? MTR_MEMO_PAGE_S_FIX
- : MTR_MEMO_PAGE_X_FIX);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h
deleted file mode 100644
index 480a3877e54..00000000000
--- a/storage/innodb_plugin/include/btr0cur.h
+++ /dev/null
@@ -1,753 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0cur.h
-The index tree cursor
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0cur_h
-#define btr0cur_h
-
-#include "univ.i"
-#include "dict0dict.h"
-#include "page0cur.h"
-#include "btr0types.h"
-
-/* Mode flags for btr_cur operations; these can be ORed */
-#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
-#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
-#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
- update vector or inserted entry */
-
-#ifndef UNIV_HOTBACKUP
-#include "que0types.h"
-#include "row0types.h"
-#include "ha0ha.h"
-
-#define BTR_CUR_ADAPT
-#define BTR_CUR_HASH_ADAPT
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Returns the page cursor component of a tree cursor.
-@return pointer to page cursor component */
-UNIV_INLINE
-page_cur_t*
-btr_cur_get_page_cur(
-/*=================*/
- const btr_cur_t* cursor);/*!< in: tree cursor */
-#else /* UNIV_DEBUG */
-# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
-#endif /* UNIV_DEBUG */
-/*********************************************************//**
-Returns the buffer block on which the tree cursor is positioned.
-@return pointer to buffer block */
-UNIV_INLINE
-buf_block_t*
-btr_cur_get_block(
-/*==============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the record pointer of a tree cursor.
-@return pointer to record */
-UNIV_INLINE
-rec_t*
-btr_cur_get_rec(
-/*============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the compressed page on which the tree cursor is positioned.
-@return pointer to compressed page, or NULL if the page is not compressed */
-UNIV_INLINE
-page_zip_des_t*
-btr_cur_get_page_zip(
-/*=================*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the page of a tree cursor.
-@return pointer to page */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the index of a cursor.
-@return index */
-UNIV_INLINE
-dict_index_t*
-btr_cur_get_index(
-/*==============*/
- btr_cur_t* cursor);/*!< in: B-tree cursor */
-/*********************************************************//**
-Positions a tree cursor at a given record. */
-UNIV_INLINE
-void
-btr_cur_position(
-/*=============*/
- dict_index_t* index, /*!< in: index */
- rec_t* rec, /*!< in: record in tree */
- buf_block_t* block, /*!< in: buffer block of rec */
- btr_cur_t* cursor);/*!< in: cursor */
-/********************************************************************//**
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
-UNIV_INTERN
-void
-btr_cur_search_to_nth_level(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: the tree level of search */
- const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
- tuple must be set so that it cannot get
- compared to the node ptr page number field! */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be PAGE_CUR_LE,
- not PAGE_CUR_GE, as the latter may end up on
- the previous page of the record! Inserts
- should always be made using PAGE_CUR_LE to
- search the position! */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
- BTR_INSERT and BTR_ESTIMATE;
- cursor->left_block is used to store a pointer
- to the left neighbor page, in the cases
- BTR_SEARCH_PREV and BTR_MODIFY_PREV;
- NOTE that if has_search_latch
- is != 0, we maybe do not have a latch set
- on the cursor page, we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
- s- or x-latched, but see also above! */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Opens a cursor at either end of an index. */
-UNIV_INTERN
-void
-btr_cur_open_at_index_side(
-/*=======================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: latch mode */
- btr_cur_t* cursor, /*!< in: cursor */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
-btr_cur_open_at_rnd_pos(
-/*====================*/
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< in/out: B-tree cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Tries to perform an insert to a page in an index tree, next to cursor.
-It is assumed that mtr holds an x-latch on the page. The operation does
-not succeed if there is too little space on the page. If there is just
-one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
-ulint
-btr_cur_optimistic_insert(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags: if not
- zero, the parameters index and thr should be
- specified */
- btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
- cursor stays valid */
- dtuple_t* entry, /*!< in/out: entry to insert */
- rec_t** rec, /*!< out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller, or
- NULL */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr); /*!< in: mtr; if this function returns
- DB_SUCCESS on a leaf page of a secondary
- index in a compressed tablespace, the
- mtr must be committed before latching
- any further pages */
-/*************************************************************//**
-Performs an insert on a page of an index tree. It is assumed that mtr
-holds an x-latch on the tree and on the cursor page. If the insert is
-made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-btr_cur_pessimistic_insert(
-/*=======================*/
- ulint flags, /*!< in: undo logging and locking flags: if not
- zero, the parameter thr should be
- specified; if no undo logging is specified,
- then the caller must have reserved enough
- free extents in the file space so that the
- insertion will certainly succeed */
- btr_cur_t* cursor, /*!< in: cursor after which to insert;
- cursor stays valid */
- dtuple_t* entry, /*!< in/out: entry to insert */
- rec_t** rec, /*!< out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller, or
- NULL */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Updates a record when the update causes no size changes in its fields.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-btr_cur_update_in_place(
-/*====================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- const upd_t* update, /*!< in: update vector */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
- latching any further pages */
-/*************************************************************//**
-Tries to update a record on a page in an index tree. It is assumed that mtr
-holds an x-latch on the page. The operation does not succeed if there is too
-little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended.
-@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
-DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
-there is not enough space left on the compressed page */
-UNIV_INTERN
-ulint
-btr_cur_optimistic_update(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- const upd_t* update, /*!< in: update vector; this must also
- contain trx id and roll ptr fields */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
- latching any further pages */
-/*************************************************************//**
-Performs an update of a record on a page of a tree. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. If the
-update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-btr_cur_pessimistic_update(
-/*=======================*/
- ulint flags, /*!< in: undo logging, locking, and rollback
- flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller, or NULL */
- const upd_t* update, /*!< in: update vector; this is allowed also
- contain trx id and roll ptr fields, but
- the values in update vector have no effect */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
- latching any further pages */
-/***********************************************************//**
-Marks a clustered index record deleted. Writes an undo log record to
-undo log on this delete marking. Writes in the trx id field the id
-of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
-ulint
-btr_cur_del_mark_set_clust_rec(
-/*===========================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor */
- ibool val, /*!< in: value to set */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************//**
-Sets a secondary index record delete mark to TRUE or FALSE.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
-ulint
-btr_cur_del_mark_set_sec_rec(
-/*=========================*/
- ulint flags, /*!< in: locking flag */
- btr_cur_t* cursor, /*!< in: cursor */
- ibool val, /*!< in: value to set */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************//**
-Clear a secondary index record's delete mark. This function is only
-used by the insert buffer insert merge mechanism. */
-UNIV_INTERN
-void
-btr_cur_del_unmark_for_ibuf(
-/*========================*/
- rec_t* rec, /*!< in/out: record to delete unmark */
- page_zip_des_t* page_zip, /*!< in/out: compressed page
- corresponding to rec, or NULL
- when the tablespace is
- uncompressed */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Tries to compress a page of the tree if it seems useful. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done!
-@return TRUE if compression occurred */
-UNIV_INTERN
-ibool
-btr_cur_compress_if_useful(
-/*=======================*/
- btr_cur_t* cursor, /*!< in: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- mtr_t* mtr); /*!< in: mtr */
-/*******************************************************//**
-Removes the record on which the tree cursor is positioned. It is assumed
-that the mtr has an x-latch on the page where the cursor is positioned,
-but no latch on the whole tree.
-@return TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
-ibool
-btr_cur_optimistic_delete(
-/*======================*/
- btr_cur_t* cursor, /*!< in: cursor on the record to delete;
- cursor stays valid: if deletion succeeds,
- on function exit it points to the successor
- of the deleted record */
- mtr_t* mtr); /*!< in: mtr; if this function returns
- TRUE on a leaf page of a secondary
- index, the mtr must be committed
- before latching any further pages */
-/*************************************************************//**
-Removes the record on which the tree cursor is positioned. Tries
-to compress the page if its fillfactor drops below a threshold
-or if it is the only page on the level. It is assumed that mtr holds
-an x-latch on the tree and on the cursor page. To avoid deadlocks,
-mtr must also own x-latches to brothers of page, if those brothers
-exist.
-@return TRUE if compression occurred */
-UNIV_INTERN
-ibool
-btr_cur_pessimistic_delete(
-/*=======================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
- the latter may occur because we may have
- to update node pointers on upper levels,
- and in the case of variable length keys
- these may actually grow in size */
- ibool has_reserved_extents, /*!< in: TRUE if the
- caller has already reserved enough free
- extents so that he knows that the operation
- will succeed */
- btr_cur_t* cursor, /*!< in: cursor on the record to delete;
- if compression does not occur, the cursor
- stays valid: it points to successor of
- deleted record on function exit */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses a redo log record of updating a record in-place.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_update_in_place(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index); /*!< in: index corresponding to page */
-/****************************************************************//**
-Parses the redo log record for delete marking or unmarking of a clustered
-index record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_del_mark_set_clust_rec(
-/*=================================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index); /*!< in: index corresponding to page */
-/****************************************************************//**
-Parses the redo log record for delete marking or unmarking of a secondary
-index record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_del_mark_set_sec_rec(
-/*===============================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip);/*!< in/out: compressed page, or NULL */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
-@return estimated number of rows */
-UNIV_INTERN
-ib_int64_t
-btr_estimate_n_rows_in_range(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */
- ulint mode1, /*!< in: search mode for range start */
- const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */
- ulint mode2); /*!< in: search mode for range end */
-/*******************************************************************//**
-Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals. */
-UNIV_INTERN
-void
-btr_estimate_number_of_different_key_vals(
-/*======================================*/
- dict_index_t* index); /*!< in: index */
-/*******************************************************************//**
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
-index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
-UNIV_INTERN
-void
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: record in a clustered index */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- const upd_t* update, /*!< in: update vector */
- mtr_t* mtr); /*!< in: mtr, or NULL if not logged */
-/*******************************************************************//**
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-UNIV_INTERN
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
- dtuple_t* entry, /*!< in/out: updated entry to be
- inserted to clustered index */
- const upd_t* update); /*!< in: update vector */
-/*******************************************************************//**
-Marks all extern fields in a dtuple as owned by the record. */
-UNIV_INTERN
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
- dtuple_t* entry); /*!< in/out: clustered index entry */
-/*******************************************************************//**
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The extern flags in rec will have to be set beforehand.
-The fields are stored on pages allocated from leaf node
-file segment of the index tree.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ulint
-btr_store_big_rec_extern_fields(
-/*============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree
- MUST be X-latched */
- buf_block_t* rec_block, /*!< in/out: block containing rec */
- rec_t* rec, /*!< in: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
- the "external storage" flags in offsets
- will not correspond to rec when
- this function returns */
- big_rec_t* big_rec_vec, /*!< in: vector containing fields
- to be stored externally */
- mtr_t* local_mtr); /*!< in: mtr containing the latch to
- rec and to the tree */
-/*******************************************************************//**
-Frees the space in an externally stored field to the file space
-management if the field in data is owned the externally stored field,
-in a rollback we may have the additional condition that the field must
-not be inherited. */
-UNIV_INTERN
-void
-btr_free_externally_stored_field(
-/*=============================*/
- dict_index_t* index, /*!< in: index of the data, the index
- tree MUST be X-latched; if the tree
- height is 1, then also the root page
- must be X-latched! (this is relevant
- in the case this function is called
- from purge where 'data' is located on
- an undo log page, not an index
- page) */
- byte* field_ref, /*!< in/out: field reference */
- const rec_t* rec, /*!< in: record containing field_ref, for
- page_zip_write_blob_ptr(), or NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
- or NULL */
- page_zip_des_t* page_zip, /*!< in: compressed page corresponding
- to rec, or NULL if rec == NULL */
- ulint i, /*!< in: field number of field_ref;
- ignored if rec == NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* local_mtr); /*!< in: mtr containing the latch to
- data an an X-latch to the index
- tree */
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record must be protected by a lock or a page latch.
-@return the length of the copied field, or 0 if the column was being
-or has been deleted */
-UNIV_INTERN
-ulint
-btr_copy_externally_stored_field_prefix(
-/*====================================*/
- byte* buf, /*!< out: the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint local_len);/*!< in: length of data, in bytes */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return the field copied to heap */
-UNIV_INTERN
-byte*
-btr_rec_copy_externally_stored_field(
-/*=================================*/
- const rec_t* rec, /*!< in: record in a clustered index;
- must be protected by a lock or a page latch */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint no, /*!< in: field number */
- ulint* len, /*!< out: length of the field */
- mem_heap_t* heap); /*!< in: mem heap */
-/*******************************************************************//**
-Flags the data tuple fields that are marked as extern storage in the
-update vector. We use this function to remember which fields we must
-mark as extern storage in a record inserted for an update.
-@return number of flagged external columns */
-UNIV_INTERN
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const upd_t* update, /*!< in: update vector */
- mem_heap_t* heap) /*!< in: memory heap */
- __attribute__((nonnull));
-
-/*######################################################################*/
-
-/** In the pessimistic delete, if the page data size drops below this
-limit, merging it to a neighbor is tried */
-#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
-
-/** A slot in the path array. We store here info on a search path down the
-tree. Each slot contains data on a single level of the tree. */
-
-typedef struct btr_path_struct btr_path_t;
-struct btr_path_struct{
- ulint nth_rec; /*!< index of the record
- where the page cursor stopped on
- this level (index in alphabetical
- order); value ULINT_UNDEFINED
- denotes array end */
- ulint n_recs; /*!< number of records on the page */
-};
-
-#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
-
-/** Values for the flag documenting the used search method */
-enum btr_cur_method {
- BTR_CUR_HASH = 1, /*!< successful shortcut using
- the hash index */
- BTR_CUR_HASH_FAIL, /*!< failure using hash, success using
- binary search: the misleading hash
- reference is stored in the field
- hash_node, and might be necessary to
- update */
- BTR_CUR_BINARY, /*!< success using the binary search */
- BTR_CUR_INSERT_TO_IBUF /*!< performed the intended insert to
- the insert buffer */
-};
-
-/** The tree cursor: the definition appears here only for the compiler
-to know struct size! */
-struct btr_cur_struct {
- dict_index_t* index; /*!< index where positioned */
- page_cur_t page_cur; /*!< page cursor */
- buf_block_t* left_block; /*!< this field is used to store
- a pointer to the left neighbor
- page, in the cases
- BTR_SEARCH_PREV and
- BTR_MODIFY_PREV */
- /*------------------------------*/
- que_thr_t* thr; /*!< this field is only used
- when btr_cur_search_to_nth_level
- is called for an index entry
- insertion: the calling query
- thread is passed here to be
- used in the insert buffer */
- /*------------------------------*/
- /** The following fields are used in
- btr_cur_search_to_nth_level to pass information: */
- /* @{ */
- enum btr_cur_method flag; /*!< Search method used */
- ulint tree_height; /*!< Tree height if the search is done
- for a pessimistic insert or update
- operation */
- ulint up_match; /*!< If the search mode was PAGE_CUR_LE,
- the number of matched fields to the
- the first user record to the right of
- the cursor record after
- btr_cur_search_to_nth_level;
- for the mode PAGE_CUR_GE, the matched
- fields to the first user record AT THE
- CURSOR or to the right of it;
- NOTE that the up_match and low_match
- values may exceed the correct values
- for comparison to the adjacent user
- record if that record is on a
- different leaf page! (See the note in
- row_ins_duplicate_key.) */
- ulint up_bytes; /*!< number of matched bytes to the
- right at the time cursor positioned;
- only used internally in searches: not
- defined after the search */
- ulint low_match; /*!< if search mode was PAGE_CUR_LE,
- the number of matched fields to the
- first user record AT THE CURSOR or
- to the left of it after
- btr_cur_search_to_nth_level;
- NOT defined for PAGE_CUR_GE or any
- other search modes; see also the NOTE
- in up_match! */
- ulint low_bytes; /*!< number of matched bytes to the
- right at the time cursor positioned;
- only used internally in searches: not
- defined after the search */
- ulint n_fields; /*!< prefix length used in a hash
- search if hash_node != NULL */
- ulint n_bytes; /*!< hash prefix bytes if hash_node !=
- NULL */
- ulint fold; /*!< fold value used in the search if
- flag is BTR_CUR_HASH */
- /*------------------------------*/
- /* @} */
- btr_path_t* path_arr; /*!< in estimating the number of
- rows in range, we store in this array
- information of the path through
- the tree */
-};
-
-/** If pessimistic delete fails because of lack of file space, there
-is still a good change of success a little later. Try this many
-times. */
-#define BTR_CUR_RETRY_DELETE_N_TIMES 100
-/** If pessimistic delete fails because of lack of file space, there
-is still a good change of success a little later. Sleep this many
-microseconds between retries. */
-#define BTR_CUR_RETRY_SLEEP_TIME 50000
-
-/** The reference in a field for which data is stored on a different page.
-The reference is at the end of the 'locally' stored part of the field.
-'Locally' means storage in the index record.
-We store locally a long enough prefix of each column so that we can determine
-the ordering parts of each index record without looking into the externally
-stored part. */
-/*-------------------------------------- @{ */
-#define BTR_EXTERN_SPACE_ID 0 /*!< space id where stored */
-#define BTR_EXTERN_PAGE_NO 4 /*!< page no where stored */
-#define BTR_EXTERN_OFFSET 8 /*!< offset of BLOB header
- on that page */
-#define BTR_EXTERN_LEN 12 /*!< 8 bytes containing the
- length of the externally
- stored part of the BLOB.
- The 2 highest bits are
- reserved to the flags below. */
-/*-------------------------------------- @} */
-/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
-
-/** The most significant bit of BTR_EXTERN_LEN (i.e., the most
-significant bit of the byte at smallest address) is set to 1 if this
-field does not 'own' the externally stored field; only the owner field
-is allowed to free the field in purge! */
-#define BTR_EXTERN_OWNER_FLAG 128
-/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
-second most significant bit of the byte at smallest address) is 1 then
-it means that the externally stored field was inherited from an
-earlier version of the row. In rollback we are not allowed to free an
-inherited external field. */
-#define BTR_EXTERN_INHERITED_FLAG 64
-
-/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
-extern ulint btr_cur_n_non_sea;
-/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
-extern ulint btr_cur_n_sea;
-/** Old value of btr_cur_n_non_sea. Copied by
-srv_refresh_innodb_monitor_stats(). Referenced by
-srv_printf_innodb_monitor(). */
-extern ulint btr_cur_n_non_sea_old;
-/** Old value of btr_cur_n_sea. Copied by
-srv_refresh_innodb_monitor_stats(). Referenced by
-srv_printf_innodb_monitor(). */
-extern ulint btr_cur_n_sea_old;
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/btr0cur.ic b/storage/innodb_plugin/include/btr0cur.ic
deleted file mode 100644
index 280583f6ccf..00000000000
--- a/storage/innodb_plugin/include/btr0cur.ic
+++ /dev/null
@@ -1,200 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0cur.ic
-The index tree cursor
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-#include "btr0btr.h"
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Returns the page cursor component of a tree cursor.
-@return pointer to page cursor component */
-UNIV_INLINE
-page_cur_t*
-btr_cur_get_page_cur(
-/*=================*/
- const btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(&((btr_cur_t*) cursor)->page_cur);
-}
-#endif /* UNIV_DEBUG */
-/*********************************************************//**
-Returns the buffer block on which the tree cursor is positioned.
-@return pointer to buffer block */
-UNIV_INLINE
-buf_block_t*
-btr_cur_get_block(
-/*==============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Returns the record pointer of a tree cursor.
-@return pointer to record */
-UNIV_INLINE
-rec_t*
-btr_cur_get_rec(
-/*============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(page_cur_get_rec(&(cursor->page_cur)));
-}
-
-/*********************************************************//**
-Returns the compressed page on which the tree cursor is positioned.
-@return pointer to compressed page, or NULL if the page is not compressed */
-UNIV_INLINE
-page_zip_des_t*
-btr_cur_get_page_zip(
-/*=================*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
-}
-
-/*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- page_cur_invalidate(&(cursor->page_cur));
-}
-
-/*********************************************************//**
-Returns the page of a tree cursor.
-@return pointer to page */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(page_align(page_cur_get_rec(&(cursor->page_cur))));
-}
-
-/*********************************************************//**
-Returns the index of a cursor.
-@return index */
-UNIV_INLINE
-dict_index_t*
-btr_cur_get_index(
-/*==============*/
- btr_cur_t* cursor) /*!< in: B-tree cursor */
-{
- return(cursor->index);
-}
-
-/*********************************************************//**
-Positions a tree cursor at a given record. */
-UNIV_INLINE
-void
-btr_cur_position(
-/*=============*/
- dict_index_t* index, /*!< in: index */
- rec_t* rec, /*!< in: record in tree */
- buf_block_t* block, /*!< in: buffer block of rec */
- btr_cur_t* cursor) /*!< out: cursor */
-{
- ut_ad(page_align(rec) == block->frame);
-
- page_cur_position(rec, block, btr_cur_get_page_cur(cursor));
-
- cursor->index = index;
-}
-
-/*********************************************************************//**
-Checks if compressing an index page where a btr cursor is placed makes
-sense.
-@return TRUE if compression is recommended */
-UNIV_INLINE
-ibool
-btr_cur_compress_recommendation(
-/*============================*/
- btr_cur_t* cursor, /*!< in: btr cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page;
-
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
-
- page = btr_cur_get_page(cursor);
-
- if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))) {
-
- /* The page fillfactor has dropped below a predefined
- minimum value OR the level in the B-tree contains just
- one page: we recommend compression if this is not the
- root page. */
-
- return(dict_index_get_page(cursor->index)
- != page_get_page_no(page));
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Checks if the record on which the cursor is placed can be deleted without
-making tree compression necessary (or, recommended).
-@return TRUE if can be deleted without recommended compression */
-UNIV_INLINE
-ibool
-btr_cur_can_delete_without_compress(
-/*================================*/
- btr_cur_t* cursor, /*!< in: btr cursor */
- ulint rec_size,/*!< in: rec_get_size(btr_cur_get_rec(cursor))*/
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page;
-
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
-
- page = btr_cur_get_page(cursor);
-
- if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))
- || (page_get_n_recs(page) < 2)) {
-
- /* The page fillfactor will drop below a predefined
- minimum value, OR the level in the B-tree contains just
- one page, OR the page will become empty: we recommend
- compression if this is not the root page. */
-
- return(dict_index_get_page(cursor->index)
- == page_get_page_no(page));
- }
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/btr0pcur.h b/storage/innodb_plugin/include/btr0pcur.h
deleted file mode 100644
index 12b1375d8b7..00000000000
--- a/storage/innodb_plugin/include/btr0pcur.h
+++ /dev/null
@@ -1,537 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0pcur.h
-The index tree persistent cursor
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0pcur_h
-#define btr0pcur_h
-
-#include "univ.i"
-#include "dict0dict.h"
-#include "data0data.h"
-#include "mtr0mtr.h"
-#include "page0cur.h"
-#include "btr0cur.h"
-#include "btr0btr.h"
-#include "btr0types.h"
-
-/* Relative positions for a stored cursor position */
-#define BTR_PCUR_ON 1
-#define BTR_PCUR_BEFORE 2
-#define BTR_PCUR_AFTER 3
-/* Note that if the tree is not empty, btr_pcur_store_position does not
-use the following, but only uses the above three alternatives, where the
-position is stored relative to a specific record: this makes implementation
-of a scroll cursor easier */
-#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
-#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
-
-/**************************************************************//**
-Allocates memory for a persistent cursor object and initializes the cursor.
-@return own: persistent cursor */
-UNIV_INTERN
-btr_pcur_t*
-btr_pcur_create_for_mysql(void);
-/*============================*/
-/**************************************************************//**
-Frees the memory for a persistent cursor object. */
-UNIV_INTERN
-void
-btr_pcur_free_for_mysql(
-/*====================*/
- btr_pcur_t* cursor); /*!< in, own: persistent cursor */
-/**************************************************************//**
-Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
-void
-btr_pcur_copy_stored_position(
-/*==========================*/
- btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the
- position info */
- btr_pcur_t* pcur_donate); /*!< in: pcur from which the info is
- copied */
-/**************************************************************//**
-Sets the old_rec_buf field to NULL. */
-UNIV_INLINE
-void
-btr_pcur_init(
-/*==========*/
- btr_pcur_t* pcur); /*!< in: persistent cursor */
-/**************************************************************//**
-Initializes and opens a persistent cursor to an index tree. It should be
-closed with btr_pcur_close. */
-UNIV_INLINE
-void
-btr_pcur_open(
-/*==========*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Opens an persistent cursor to an index tree without initializing the
-cursor. */
-UNIV_INLINE
-void
-btr_pcur_open_with_no_init(
-/*=======================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page of the
- record! */
- ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that if has_search_latch != 0 then
- we maybe do not acquire a latch on the cursor
- page, but assume that the caller uses his
- btr search latch to protect the record! */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Opens a persistent cursor at either end of an index. */
-UNIV_INLINE
-void
-btr_pcur_open_at_index_side(
-/*========================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: latch mode */
- btr_pcur_t* pcur, /*!< in: cursor */
- ibool do_init, /*!< in: TRUE if should be initialized */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Gets the up_match value for a pcur after a search.
-@return number of matched fields at the cursor or to the right if
-search mode was PAGE_CUR_GE, otherwise undefined */
-UNIV_INLINE
-ulint
-btr_pcur_get_up_match(
-/*==================*/
- btr_pcur_t* cursor); /*!< in: memory buffer for persistent cursor */
-/**************************************************************//**
-Gets the low_match value for a pcur after a search.
-@return number of matched fields at the cursor or to the right if
-search mode was PAGE_CUR_LE, otherwise undefined */
-UNIV_INLINE
-ulint
-btr_pcur_get_low_match(
-/*===================*/
- btr_pcur_t* cursor); /*!< in: memory buffer for persistent cursor */
-/**************************************************************//**
-If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
-user record satisfying the search condition, in the case PAGE_CUR_L or
-PAGE_CUR_LE, on the last user record. If no such user record exists, then
-in the first case sets the cursor after last in tree, and in the latter case
-before first in tree. The latching mode must be BTR_SEARCH_LEAF or
-BTR_MODIFY_LEAF. */
-UNIV_INTERN
-void
-btr_pcur_open_on_user_rec(
-/*======================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent
- cursor */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INLINE
-void
-btr_pcur_open_at_rnd_pos(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Frees the possible old_rec_buf buffer of a persistent cursor and sets the
-latch mode of the persistent cursor to BTR_NO_LATCHES. */
-UNIV_INLINE
-void
-btr_pcur_close(
-/*===========*/
- btr_pcur_t* cursor); /*!< in: persistent cursor */
-/**************************************************************//**
-The position of the cursor is stored by taking an initial segment of the
-record the cursor is positioned on, before, or after, and copying it to the
-cursor data structure, or just setting a flag if the cursor id before the
-first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
-page where the cursor is positioned must not be empty if the index tree is
-not totally empty! */
-UNIV_INTERN
-void
-btr_pcur_store_position(
-/*====================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Restores the stored position of a persistent cursor bufferfixing the page and
-obtaining the specified latches. If the cursor position was saved when the
-(1) cursor was positioned on a user record: this function restores the position
-to the last record LESS OR EQUAL to the stored record;
-(2) cursor was positioned on a page infimum record: restores the position to
-the last record LESS than the user record which was the successor of the page
-infimum;
-(3) cursor was positioned on the page supremum: restores to the first record
-GREATER than the user record which was the predecessor of the supremum.
-(4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree.
-@return TRUE if the cursor position was stored when it was on a user
-record and it can be restored on a user record whose ordering fields
-are identical to the ones of the original user record */
-UNIV_INTERN
-ibool
-btr_pcur_restore_position(
-/*======================*/
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: detached persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
-releases the page latch and bufferfix reserved by the cursor.
-NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
-made by the current mini-transaction to the data protected by the
-cursor latch, as then the latch must not be released until mtr_commit. */
-UNIV_INTERN
-void
-btr_pcur_release_leaf(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Gets the rel_pos field for a cursor whose position has been stored.
-@return BTR_PCUR_ON, ... */
-UNIV_INLINE
-ulint
-btr_pcur_get_rel_pos(
-/*=================*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Sets the mtr field for a pcur. */
-UNIV_INLINE
-void
-btr_pcur_set_mtr(
-/*=============*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in, own: mtr */
-/*********************************************************//**
-Gets the mtr field for a pcur.
-@return mtr */
-UNIV_INLINE
-mtr_t*
-btr_pcur_get_mtr(
-/*=============*/
- btr_pcur_t* cursor); /*!< in: persistent cursor */
-/**************************************************************//**
-Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
-that is, the cursor becomes detached. If there have been modifications
-to the page where pcur is positioned, this can be used instead of
-btr_pcur_release_leaf. Function btr_pcur_store_position should be used
-before calling this, if restoration of cursor is wanted later. */
-UNIV_INLINE
-void
-btr_pcur_commit(
-/*============*/
- btr_pcur_t* pcur); /*!< in: persistent cursor */
-/**************************************************************//**
-Differs from btr_pcur_commit in that we can specify the mtr to commit. */
-UNIV_INLINE
-void
-btr_pcur_commit_specify_mtr(
-/*========================*/
- btr_pcur_t* pcur, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr to commit */
-/**************************************************************//**
-Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES.
-@return TRUE if detached */
-UNIV_INLINE
-ibool
-btr_pcur_is_detached(
-/*=================*/
- btr_pcur_t* pcur); /*!< in: persistent cursor */
-/*********************************************************//**
-Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'.
-@return TRUE if the cursor was not after last in tree */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'.
-@return TRUE if the cursor was not before first in tree */
-UNIV_INTERN
-ibool
-btr_pcur_move_to_prev(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the last record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_last_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'.
-@return TRUE if the cursor moved forward, ending on a user record */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next_user_rec(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the first record on the next page.
-Releases the latch on the current page, and bufferunfixes it.
-Note that there must not be modifications on the current page,
-as then the x-latch can be released only in mtr_commit. */
-UNIV_INTERN
-void
-btr_pcur_move_to_next_page(
-/*=======================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
- last record of the current page */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor backward if it is on the first record
-of the page. Releases the latch on the current page, and bufferunfixes
-it. Note that to prevent a possible deadlock, the operation first
-stores the position of the cursor, releases the leaf latch, acquires
-necessary latches and restores the cursor position again before returning.
-The alphabetical position of the cursor is guaranteed to be sensible
-on return, but it may happen that the cursor is not positioned on the
-last record of any page, because the structure of the tree may have
-changed while the cursor had no latches. */
-UNIV_INTERN
-void
-btr_pcur_move_backward_from_page(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the
- first record of the current page */
- mtr_t* mtr); /*!< in: mtr */
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Returns the btr cursor component of a persistent cursor.
-@return pointer to btr cursor component */
-UNIV_INLINE
-btr_cur_t*
-btr_pcur_get_btr_cur(
-/*=================*/
- const btr_pcur_t* cursor); /*!< in: persistent cursor */
-/*********************************************************//**
-Returns the page cursor component of a persistent cursor.
-@return pointer to page cursor component */
-UNIV_INLINE
-page_cur_t*
-btr_pcur_get_page_cur(
-/*==================*/
- const btr_pcur_t* cursor); /*!< in: persistent cursor */
-#else /* UNIV_DEBUG */
-# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
-# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
-#endif /* UNIV_DEBUG */
-/*********************************************************//**
-Returns the page of a persistent cursor.
-@return pointer to the page */
-UNIV_INLINE
-page_t*
-btr_pcur_get_page(
-/*==============*/
- btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Returns the buffer block of a persistent cursor.
-@return pointer to the block */
-UNIV_INLINE
-buf_block_t*
-btr_pcur_get_block(
-/*===============*/
- btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Returns the record of a persistent cursor.
-@return pointer to the record */
-UNIV_INLINE
-rec_t*
-btr_pcur_get_rec(
-/*=============*/
- btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Checks if the persistent cursor is on a user record. */
-UNIV_INLINE
-ibool
-btr_pcur_is_on_user_rec(
-/*====================*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Checks if the persistent cursor is after the last user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_on_page(
-/*===========================*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Checks if the persistent cursor is before the first user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_on_page(
-/*=============================*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Checks if the persistent cursor is before the first user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Checks if the persistent cursor is after the last user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the next record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_next_on_page(
-/*==========================*/
- btr_pcur_t* cursor);/*!< in/out: persistent cursor */
-/*********************************************************//**
-Moves the persistent cursor to the previous record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_prev_on_page(
-/*==========================*/
- btr_pcur_t* cursor);/*!< in/out: persistent cursor */
-
-
-/* The persistent B-tree cursor structure. This is used mainly for SQL
-selects, updates, and deletes. */
-
-struct btr_pcur_struct{
- btr_cur_t btr_cur; /*!< a B-tree cursor */
- ulint latch_mode; /*!< see TODO note below!
- BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
- BTR_MODIFY_TREE, or BTR_NO_LATCHES,
- depending on the latching state of
- the page and tree where the cursor is
- positioned; the last value means that
- the cursor is not currently positioned:
- we say then that the cursor is
- detached; it can be restored to
- attached if the old position was
- stored in old_rec */
- ulint old_stored; /*!< BTR_PCUR_OLD_STORED
- or BTR_PCUR_OLD_NOT_STORED */
- rec_t* old_rec; /*!< if cursor position is stored,
- contains an initial segment of the
- latest record cursor was positioned
- either on, before, or after */
- ulint old_n_fields; /*!< number of fields in old_rec */
- ulint rel_pos; /*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
- BTR_PCUR_AFTER, depending on whether
- cursor was on, before, or after the
- old_rec record */
- buf_block_t* block_when_stored;/* buffer block when the position was
- stored */
- ib_uint64_t modify_clock; /*!< the modify clock value of the
- buffer block when the cursor position
- was stored */
- ulint pos_state; /*!< see TODO note below!
- BTR_PCUR_IS_POSITIONED,
- BTR_PCUR_WAS_POSITIONED,
- BTR_PCUR_NOT_POSITIONED */
- ulint search_mode; /*!< PAGE_CUR_G, ... */
- trx_t* trx_if_known; /*!< the transaction, if we know it;
- otherwise this field is not defined;
- can ONLY BE USED in error prints in
- fatal assertion failures! */
- /*-----------------------------*/
- /* NOTE that the following fields may possess dynamically allocated
- memory which should be freed if not needed anymore! */
-
- mtr_t* mtr; /*!< NULL, or this field may contain
- a mini-transaction which holds the
- latch on the cursor page */
- byte* old_rec_buf; /*!< NULL, or a dynamically allocated
- buffer for old_rec */
- ulint buf_size; /*!< old_rec_buf size if old_rec_buf
- is not NULL */
-};
-
-#define BTR_PCUR_IS_POSITIONED 1997660512 /* TODO: currently, the state
- can be BTR_PCUR_IS_POSITIONED,
- though it really should be
- BTR_PCUR_WAS_POSITIONED,
- because we have no obligation
- to commit the cursor with
- mtr; similarly latch_mode may
- be out of date. This can
- lead to problems if btr_pcur
- is not used the right way;
- all current code should be
- ok. */
-#define BTR_PCUR_WAS_POSITIONED 1187549791
-#define BTR_PCUR_NOT_POSITIONED 1328997689
-
-#define BTR_PCUR_OLD_STORED 908467085
-#define BTR_PCUR_OLD_NOT_STORED 122766467
-
-#ifndef UNIV_NONINL
-#include "btr0pcur.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/btr0pcur.ic b/storage/innodb_plugin/include/btr0pcur.ic
deleted file mode 100644
index 0ca7223f861..00000000000
--- a/storage/innodb_plugin/include/btr0pcur.ic
+++ /dev/null
@@ -1,651 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0pcur.ic
-The index tree persistent cursor
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-
-/*********************************************************//**
-Gets the rel_pos field for a cursor whose position has been stored.
-@return BTR_PCUR_ON, ... */
-UNIV_INLINE
-ulint
-btr_pcur_get_rel_pos(
-/*=================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor);
- ut_ad(cursor->old_rec);
- ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
- ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
- || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- return(cursor->rel_pos);
-}
-
-/*********************************************************//**
-Sets the mtr field for a pcur. */
-UNIV_INLINE
-void
-btr_pcur_set_mtr(
-/*=============*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in, own: mtr */
-{
- ut_ad(cursor);
-
- cursor->mtr = mtr;
-}
-
-/*********************************************************//**
-Gets the mtr field for a pcur.
-@return mtr */
-UNIV_INLINE
-mtr_t*
-btr_pcur_get_mtr(
-/*=============*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor);
-
- return(cursor->mtr);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Returns the btr cursor component of a persistent cursor.
-@return pointer to btr cursor component */
-UNIV_INLINE
-btr_cur_t*
-btr_pcur_get_btr_cur(
-/*=================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- const btr_cur_t* btr_cur = &cursor->btr_cur;
- return((btr_cur_t*) btr_cur);
-}
-
-/*********************************************************//**
-Returns the page cursor component of a persistent cursor.
-@return pointer to page cursor component */
-UNIV_INLINE
-page_cur_t*
-btr_pcur_get_page_cur(
-/*==================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
-}
-#endif /* UNIV_DEBUG */
-/*********************************************************//**
-Returns the page of a persistent cursor.
-@return pointer to the page */
-UNIV_INLINE
-page_t*
-btr_pcur_get_page(
-/*==============*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor)));
-}
-
-/*********************************************************//**
-Returns the buffer block of a persistent cursor.
-@return pointer to the block */
-UNIV_INLINE
-buf_block_t*
-btr_pcur_get_block(
-/*===============*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor)));
-}
-
-/*********************************************************//**
-Returns the record of a persistent cursor.
-@return pointer to the record */
-UNIV_INLINE
-rec_t*
-btr_pcur_get_rec(
-/*=============*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
-}
-
-/**************************************************************//**
-Gets the up_match value for a pcur after a search.
-@return number of matched fields at the cursor or to the right if
-search mode was PAGE_CUR_GE, otherwise undefined */
-UNIV_INLINE
-ulint
-btr_pcur_get_up_match(
-/*==================*/
- btr_pcur_t* cursor) /*!< in: memory buffer for persistent cursor */
-{
- btr_cur_t* btr_cursor;
-
- ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
- || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
-
- return(btr_cursor->up_match);
-}
-
-/**************************************************************//**
-Gets the low_match value for a pcur after a search.
-@return number of matched fields at the cursor or to the right if
-search mode was PAGE_CUR_LE, otherwise undefined */
-UNIV_INLINE
-ulint
-btr_pcur_get_low_match(
-/*===================*/
- btr_pcur_t* cursor) /*!< in: memory buffer for persistent cursor */
-{
- btr_cur_t* btr_cursor;
-
- ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
- || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
- ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
-
- return(btr_cursor->low_match);
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is after the last user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_on_page(
-/*===========================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is before the first user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_on_page(
-/*=============================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is on a user record. */
-UNIV_INLINE
-ibool
-btr_pcur_is_on_user_rec(
-/*====================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (btr_pcur_is_before_first_on_page(cursor)
- || btr_pcur_is_after_last_on_page(cursor)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is before the first user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is after the last user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the next record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_next_on_page(
-/*==========================*/
- btr_pcur_t* cursor) /*!< in/out: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the previous record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_prev_on_page(
-/*==========================*/
- btr_pcur_t* cursor) /*!< in/out: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the last record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_last_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- UT_NOT_USED(mtr);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_set_after_last(btr_pcur_get_block(cursor),
- btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'.
-@return TRUE if the cursor moved forward, ending on a user record */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next_user_rec(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-loop:
- if (btr_pcur_is_after_last_on_page(cursor)) {
-
- if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_to_next_page(cursor, mtr);
- } else {
- btr_pcur_move_to_next_on_page(cursor);
- }
-
- if (btr_pcur_is_on_user_rec(cursor)) {
-
- return(TRUE);
- }
-
- goto loop;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'.
-@return TRUE if the cursor was not after last in tree */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- if (btr_pcur_is_after_last_on_page(cursor)) {
-
- if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_to_next_page(cursor, mtr);
-
- return(TRUE);
- }
-
- btr_pcur_move_to_next_on_page(cursor);
-
- return(TRUE);
-}
-
-/**************************************************************//**
-Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
-that is, the cursor becomes detached. If there have been modifications
-to the page where pcur is positioned, this can be used instead of
-btr_pcur_release_leaf. Function btr_pcur_store_position should be used
-before calling this, if restoration of cursor is wanted later. */
-UNIV_INLINE
-void
-btr_pcur_commit(
-/*============*/
- btr_pcur_t* pcur) /*!< in: persistent cursor */
-{
- ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
- pcur->latch_mode = BTR_NO_LATCHES;
-
- mtr_commit(pcur->mtr);
-
- pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/**************************************************************//**
-Differs from btr_pcur_commit in that we can specify the mtr to commit. */
-UNIV_INLINE
-void
-btr_pcur_commit_specify_mtr(
-/*========================*/
- btr_pcur_t* pcur, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr to commit */
-{
- ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
- pcur->latch_mode = BTR_NO_LATCHES;
-
- mtr_commit(mtr);
-
- pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/**************************************************************//**
-Sets the pcur latch mode to BTR_NO_LATCHES. */
-UNIV_INLINE
-void
-btr_pcur_detach(
-/*============*/
- btr_pcur_t* pcur) /*!< in: persistent cursor */
-{
- ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
- pcur->latch_mode = BTR_NO_LATCHES;
-
- pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/**************************************************************//**
-Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES.
-@return TRUE if detached */
-UNIV_INLINE
-ibool
-btr_pcur_is_detached(
-/*=================*/
- btr_pcur_t* pcur) /*!< in: persistent cursor */
-{
- if (pcur->latch_mode == BTR_NO_LATCHES) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**************************************************************//**
-Sets the old_rec_buf field to NULL. */
-UNIV_INLINE
-void
-btr_pcur_init(
-/*==========*/
- btr_pcur_t* pcur) /*!< in: persistent cursor */
-{
- pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
- pcur->old_rec_buf = NULL;
- pcur->old_rec = NULL;
-}
-
-/**************************************************************//**
-Initializes and opens a persistent cursor to an index tree. It should be
-closed with btr_pcur_close. */
-UNIV_INLINE
-void
-btr_pcur_open(
-/*==========*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_cur_t* btr_cursor;
-
- /* Initialize the cursor */
-
- btr_pcur_init(cursor);
-
- cursor->latch_mode = latch_mode;
- cursor->search_mode = mode;
-
- /* Search with the tree cursor */
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- btr_cursor, 0, mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-
- cursor->trx_if_known = NULL;
-}
-
-/**************************************************************//**
-Opens an persistent cursor to an index tree without initializing the
-cursor. */
-UNIV_INLINE
-void
-btr_pcur_open_with_no_init(
-/*=======================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page of the
- record! */
- ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that if has_search_latch != 0 then
- we maybe do not acquire a latch on the cursor
- page, but assume that the caller uses his
- btr search latch to protect the record! */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_cur_t* btr_cursor;
-
- cursor->latch_mode = latch_mode;
- cursor->search_mode = mode;
-
- /* Search with the tree cursor */
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- btr_cursor, has_search_latch, mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->trx_if_known = NULL;
-}
-
-/*****************************************************************//**
-Opens a persistent cursor at either end of an index. */
-UNIV_INLINE
-void
-btr_pcur_open_at_index_side(
-/*========================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: latch mode */
- btr_pcur_t* pcur, /*!< in: cursor */
- ibool do_init, /*!< in: TRUE if should be initialized */
- mtr_t* mtr) /*!< in: mtr */
-{
- pcur->latch_mode = latch_mode;
-
- if (from_left) {
- pcur->search_mode = PAGE_CUR_G;
- } else {
- pcur->search_mode = PAGE_CUR_L;
- }
-
- if (do_init) {
- btr_pcur_init(pcur);
- }
-
- btr_cur_open_at_index_side(from_left, index, latch_mode,
- btr_pcur_get_btr_cur(pcur), mtr);
- pcur->pos_state = BTR_PCUR_IS_POSITIONED;
-
- pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- pcur->trx_if_known = NULL;
-}
-
-/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INLINE
-void
-btr_pcur_open_at_rnd_pos(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
- mtr_t* mtr) /*!< in: mtr */
-{
- /* Initialize the cursor */
-
- cursor->latch_mode = latch_mode;
- cursor->search_mode = PAGE_CUR_G;
-
- btr_pcur_init(cursor);
-
- btr_cur_open_at_rnd_pos(index, latch_mode,
- btr_pcur_get_btr_cur(cursor), mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->trx_if_known = NULL;
-}
-
-/**************************************************************//**
-Frees the possible memory heap of a persistent cursor and sets the latch
-mode of the persistent cursor to BTR_NO_LATCHES. */
-UNIV_INLINE
-void
-btr_pcur_close(
-/*===========*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- if (cursor->old_rec_buf != NULL) {
-
- mem_free(cursor->old_rec_buf);
-
- cursor->old_rec = NULL;
- cursor->old_rec_buf = NULL;
- }
-
- cursor->btr_cur.page_cur.rec = NULL;
- cursor->btr_cur.page_cur.block = NULL;
- cursor->old_rec = NULL;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->latch_mode = BTR_NO_LATCHES;
- cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
-
- cursor->trx_if_known = NULL;
-}
diff --git a/storage/innodb_plugin/include/btr0sea.h b/storage/innodb_plugin/include/btr0sea.h
deleted file mode 100644
index f98ba386f9c..00000000000
--- a/storage/innodb_plugin/include/btr0sea.h
+++ /dev/null
@@ -1,310 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/btr0sea.h
-The index tree adaptive search
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#ifndef btr0sea_h
-#define btr0sea_h
-
-#include "univ.i"
-
-#include "rem0rec.h"
-#include "dict0dict.h"
-#include "btr0types.h"
-#include "mtr0mtr.h"
-#include "ha0ha.h"
-
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
-void
-btr_search_sys_create(
-/*==================*/
- ulint hash_size); /*!< in: hash index hash table size */
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
-void
-btr_search_sys_free(void);
-/*=====================*/
-
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
-void
-btr_search_disable(void);
-/*====================*/
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
-void
-btr_search_enable(void);
-/*====================*/
-
-/********************************************************************//**
-Returns search info for an index.
-@return search info; search mutex reserved */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
- dict_index_t* index); /*!< in: index */
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return own: search info struct */
-UNIV_INTERN
-btr_search_t*
-btr_search_info_create(
-/*===================*/
- mem_heap_t* heap); /*!< in: heap where created */
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-btr_search_latch.
-@return ref_count value. */
-UNIV_INTERN
-ulint
-btr_search_info_get_ref_count(
-/*==========================*/
- btr_search_t* info); /*!< in: search info. */
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INLINE
-void
-btr_search_info_update(
-/*===================*/
- dict_index_t* index, /*!< in: index of the cursor */
- btr_cur_t* cursor);/*!< in: cursor which was just positioned */
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
-of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
-and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-btr_search_guess_on_hash(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- btr_search_t* info, /*!< in: index search info */
- const dtuple_t* tuple, /*!< in: logical record */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< out: tree cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
-void
-btr_search_move_or_delete_hash_entries(
-/*===================================*/
- buf_block_t* new_block, /*!< in: records are copied
- to this page */
- buf_block_t* block, /*!< in: index page from which
- records were copied, and the
- copied records will be deleted
- from this page */
- dict_index_t* index); /*!< in: record descriptor */
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_index(
-/*============================*/
- buf_block_t* block); /*!< in: block containing index page,
- s- or x-latched, or an index page
- for which we know that
- block->buf_fix_count == 0 */
-/********************************************************************//**
-Drops a page hash index when a page is freed from a fseg to the file system.
-Drops possible hash index if the page happens to be in the buffer pool. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_when_freed(
-/*=================================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no); /*!< in: page number */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_delete(
-/*=============================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned on the
- record to delete using btr_cur_search_...,
- the record is not yet deleted */
-/********************************************************************//**
-Validates the search system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_search_validate(void);
-/*======================*/
-
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch and btr_search_enabled_mutex. */
-extern char btr_search_enabled;
-
-/** The search info struct in an index */
-struct btr_search_struct{
- ulint ref_count; /*!< Number of blocks in this index tree
- that have search index built
- i.e. block->index points to this index.
- Protected by btr_search_latch except
- when during initialization in
- btr_search_info_create(). */
-
- /* @{ The following fields are not protected by any latch.
- Unfortunately, this means that they must be aligned to
- the machine word, i.e., they cannot be turned into bit-fields. */
- buf_block_t* root_guess;/*!< the root page frame when it was last time
- fetched, or NULL */
- ulint hash_analysis; /*!< when this exceeds
- BTR_SEARCH_HASH_ANALYSIS, the hash
- analysis starts; this is reset if no
- success noticed */
- ibool last_hash_succ; /*!< TRUE if the last search would have
- succeeded, or did succeed, using the hash
- index; NOTE that the value here is not exact:
- it is not calculated for every search, and the
- calculation itself is not always accurate! */
- ulint n_hash_potential;
- /*!< number of consecutive searches
- which would have succeeded, or did succeed,
- using the hash index;
- the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
- /* @} */
- /*---------------------- @{ */
- ulint n_fields; /*!< recommended prefix length for hash search:
- number of full fields */
- ulint n_bytes; /*!< recommended prefix: number of bytes in
- an incomplete field
- @see BTR_PAGE_MAX_REC_SIZE */
- ibool left_side; /*!< TRUE or FALSE, depending on whether
- the leftmost record of several records with
- the same prefix should be indexed in the
- hash index */
- /*---------------------- @} */
-#ifdef UNIV_SEARCH_PERF_STAT
- ulint n_hash_succ; /*!< number of successful hash searches thus
- far */
- ulint n_hash_fail; /*!< number of failed hash searches */
- ulint n_patt_succ; /*!< number of successful pattern searches thus
- far */
- ulint n_searches; /*!< number of searches */
-#endif /* UNIV_SEARCH_PERF_STAT */
-#ifdef UNIV_DEBUG
- ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */
-/** value of btr_search_struct::magic_n, used in assertions */
-# define BTR_SEARCH_MAGIC_N 1112765
-#endif /* UNIV_DEBUG */
-};
-
-/** The hash index system */
-typedef struct btr_search_sys_struct btr_search_sys_t;
-
-/** The hash index system */
-struct btr_search_sys_struct{
- hash_table_t* hash_index; /*!< the adaptive hash index,
- mapping dtuple_fold values
- to rec_t pointers on index pages */
-};
-
-/** The adaptive hash index */
-extern btr_search_sys_t* btr_search_sys;
-
-/** @brief The latch protecting the adaptive search system
-
-This latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
-
-but does NOT protect:
-
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
-
-Bear in mind (3) and (4) when using the hash index.
-*/
-extern rw_lock_t* btr_search_latch_temp;
-
-/** The latch protecting the adaptive search system */
-#define btr_search_latch (*btr_search_latch_temp)
-
-#ifdef UNIV_SEARCH_PERF_STAT
-/** Number of successful adaptive hash index lookups */
-extern ulint btr_search_n_succ;
-/** Number of failed adaptive hash index lookups */
-extern ulint btr_search_n_hash_fail;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
-/** After change in n_fields or n_bytes in info, this many rounds are waited
-before starting the hash analysis again: this is to save CPU time when there
-is no hope in building a hash index. */
-#define BTR_SEARCH_HASH_ANALYSIS 17
-
-/** Limit of consecutive searches for trying a search shortcut on the search
-pattern */
-#define BTR_SEARCH_ON_PATTERN_LIMIT 3
-
-/** Limit of consecutive searches for trying a search shortcut using
-the hash index */
-#define BTR_SEARCH_ON_HASH_LIMIT 3
-
-/** We do this many searches before trying to keep the search latch
-over calls from MySQL. If we notice someone waiting for the latch, we
-again set this much timeout. This is to reduce contention. */
-#define BTR_SEA_TIMEOUT 10000
-
-#ifndef UNIV_NONINL
-#include "btr0sea.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/btr0sea.ic b/storage/innodb_plugin/include/btr0sea.ic
deleted file mode 100644
index beadeeb8d02..00000000000
--- a/storage/innodb_plugin/include/btr0sea.ic
+++ /dev/null
@@ -1,84 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/btr0sea.ic
-The index tree adaptive search
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "dict0mem.h"
-#include "btr0cur.h"
-#include "buf0buf.h"
-
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INTERN
-void
-btr_search_info_update_slow(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor);/*!< in: cursor which was just positioned */
-
-/********************************************************************//**
-Returns search info for an index.
-@return search info; search mutex reserved */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
- dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
-
- return(index->search_info);
-}
-
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INLINE
-void
-btr_search_info_update(
-/*===================*/
- dict_index_t* index, /*!< in: index of the cursor */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
-{
- btr_search_t* info;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- info = btr_search_get_info(index);
-
- info->hash_analysis++;
-
- if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
-
- /* Do nothing */
-
- return;
-
- }
-
- ut_ad(cursor->flag != BTR_CUR_HASH);
-
- btr_search_info_update_slow(info, cursor);
-}
diff --git a/storage/innodb_plugin/include/btr0types.h b/storage/innodb_plugin/include/btr0types.h
deleted file mode 100644
index ef4a6b04b34..00000000000
--- a/storage/innodb_plugin/include/btr0types.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/btr0types.h
-The index tree general types
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#ifndef btr0types_h
-#define btr0types_h
-
-#include "univ.i"
-
-#include "rem0types.h"
-#include "page0types.h"
-
-/** Persistent cursor */
-typedef struct btr_pcur_struct btr_pcur_t;
-/** B-tree cursor */
-typedef struct btr_cur_struct btr_cur_t;
-/** B-tree search information for the adaptive hash index */
-typedef struct btr_search_struct btr_search_t;
-
-/** The size of a reference to data stored on a different page.
-The reference is stored at the end of the prefix of the field
-in the index record. */
-#define BTR_EXTERN_FIELD_REF_SIZE 20
-
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
-
-#endif
diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
deleted file mode 100644
index 927ff893e39..00000000000
--- a/storage/innodb_plugin/include/buf0buf.h
+++ /dev/null
@@ -1,1521 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0buf.h
-The database buffer pool high-level routines
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0buf_h
-#define buf0buf_h
-
-#include "univ.i"
-#include "fil0fil.h"
-#include "mtr0types.h"
-#include "buf0types.h"
-#include "hash0hash.h"
-#include "ut0byte.h"
-#include "page0types.h"
-#ifndef UNIV_HOTBACKUP
-#include "os0proc.h"
-
-/** @name Modes for buf_page_get_gen */
-/* @{ */
-#define BUF_GET 10 /*!< get always */
-#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
-#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
- set no latch; we have
- separated this case, because
- it is error-prone programming
- not to set a latch, and it
- should be used with care */
-/* @} */
-/** @name Modes for buf_page_get_known_nowait */
-/* @{ */
-#define BUF_MAKE_YOUNG 51 /*!< Move the block to the
- start of the LRU list if there
- is a danger that the block
- would drift out of the buffer
- pool*/
-#define BUF_KEEP_OLD 52 /*!< Preserve the current LRU
- position of the block. */
-/* @} */
-
-extern buf_pool_t* buf_pool; /*!< The buffer pool of the database */
-#ifdef UNIV_DEBUG
-extern ibool buf_debug_prints;/*!< If this is set TRUE, the program
- prints info whenever read or flush
- occurs */
-#endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_write_requests; /*!< variable to count write request
- issued */
-#else /* !UNIV_HOTBACKUP */
-extern buf_block_t* back_block1; /*!< first block, for --apply-log */
-extern buf_block_t* back_block2; /*!< second block, for page reorganize */
-#endif /* !UNIV_HOTBACKUP */
-
-/** Magic value to use instead of checksums when they are disabled */
-#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-/** @brief States of a control block
-@see buf_page_struct
-
-The enumeration values must be 0..7. */
-enum buf_page_state {
- BUF_BLOCK_ZIP_FREE = 0, /*!< contains a free
- compressed page */
- BUF_BLOCK_ZIP_PAGE, /*!< contains a clean
- compressed page */
- BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed
- page that is in the
- buf_pool->flush_list */
-
- BUF_BLOCK_NOT_USED, /*!< is in the free list;
- must be after the BUF_BLOCK_ZIP_
- constants for compressed-only pages
- @see buf_block_state_valid() */
- BUF_BLOCK_READY_FOR_USE, /*!< when buf_LRU_get_free_block
- returns a block, it is in this state */
- BUF_BLOCK_FILE_PAGE, /*!< contains a buffered file page */
- BUF_BLOCK_MEMORY, /*!< contains some main memory
- object */
- BUF_BLOCK_REMOVE_HASH /*!< hash index should be removed
- before putting to the free list */
-};
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Creates the buffer pool.
-@return own: buf_pool object, NULL if not enough memory or error */
-UNIV_INTERN
-buf_pool_t*
-buf_pool_init(void);
-/*===============*/
-/********************************************************************//**
-Frees the buffer pool at shutdown. This must not be invoked before
-freeing all mutexes. */
-UNIV_INTERN
-void
-buf_pool_free(void);
-/*===============*/
-
-/********************************************************************//**
-Drops the adaptive hash index. To prevent a livelock, this function
-is only to be called while holding btr_search_latch and while
-btr_search_enabled == FALSE. */
-UNIV_INTERN
-void
-buf_pool_drop_hash_index(void);
-/*==========================*/
-
-/********************************************************************//**
-Relocate a buffer control block. Relocates the block on the LRU list
-and in buf_pool->page_hash. Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
-UNIV_INTERN
-void
-buf_relocate(
-/*=========*/
- buf_page_t* bpage, /*!< in/out: control block being relocated;
- buf_page_get_state(bpage) must be
- BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
- buf_page_t* dpage) /*!< in/out: destination control block */
- __attribute__((nonnull));
-/********************************************************************//**
-Resizes the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_resize(void);
-/*=================*/
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void);
-/*========================*/
-/********************************************************************//**
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-zero if all modified pages have been flushed to disk.
-@return oldest modification in pool, zero if none */
-UNIV_INLINE
-ib_uint64_t
-buf_pool_get_oldest_modification(void);
-/*==================================*/
-/********************************************************************//**
-Allocates a buffer block.
-@return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INLINE
-buf_block_t*
-buf_block_alloc(
-/*============*/
- ulint zip_size); /*!< in: compressed page size in bytes,
- or 0 if uncompressed tablespace */
-/********************************************************************//**
-Frees a buffer block which does not contain a file page. */
-UNIV_INLINE
-void
-buf_block_free(
-/*===========*/
- buf_block_t* block); /*!< in, own: block to be freed */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Copies contents of a buffer frame to a given buffer.
-@return buf */
-UNIV_INLINE
-byte*
-buf_frame_copy(
-/*===========*/
- byte* buf, /*!< in: buffer to copy to */
- const buf_frame_t* frame); /*!< in: buffer frame */
-#ifndef UNIV_HOTBACKUP
-/**************************************************************//**
-NOTE! The following macros should be used instead of buf_page_get_gen,
-to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
-in LA! */
-#define buf_page_get(SP, ZS, OF, LA, MTR) buf_page_get_gen(\
- SP, ZS, OF, LA, NULL,\
- BUF_GET, __FILE__, __LINE__, MTR)
-/**************************************************************//**
-Use these macros to bufferfix a page with no latching. Remember not to
-read the contents of the page unless you know it is safe. Do not modify
-the contents of the page! We have separated this case, because it is
-error-prone programming not to set a latch, and it should be used
-with care. */
-#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
- SP, ZS, OF, RW_NO_LATCH, NULL,\
- BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
-/**************************************************************//**
-NOTE! The following macros should be used instead of
-buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
-RW_X_LATCH are allowed as LA! */
-#define buf_page_optimistic_get(LA, BL, MC, MTR) \
- buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR)
-/********************************************************************//**
-This is the general function used to get optimistic access to a database
-page.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-buf_page_optimistic_get_func(
-/*=========================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: guessed block */
- ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
- ..._GUESS_ON_CLOCK */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-/********************************************************************//**
-This is used to get access to a known database page, when no waiting can be
-done.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-buf_page_get_known_nowait(
-/*======================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: the known page */
- ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
-page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the kernel mutex. */
-UNIV_INTERN
-const buf_block_t*
-buf_page_try_get_func(
-/*==================*/
- ulint space_id,/*!< in: tablespace id */
- ulint page_no,/*!< in: page number */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-
-/** Tries to get a page. If the page is not in the buffer pool it is
-not loaded. Suitable for using when holding the kernel mutex.
-@param space_id in: tablespace id
-@param page_no in: page number
-@param mtr in: mini-transaction
-@return the page if in buffer pool, NULL if not */
-#define buf_page_try_get(space_id, page_no, mtr) \
- buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr);
-
-/********************************************************************//**
-Get read access to a compressed page (usually of type
-FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
-The page must be released with buf_page_release_zip().
-NOTE: the page is not protected by any latch. Mutual exclusion has to
-be implemented at a higher level. In other words, all possible
-accesses to a given page through this function must be protected by
-the same set of mutexes or latches.
-@return pointer to the block, or NULL if not compressed */
-UNIV_INTERN
-buf_page_t*
-buf_page_get_zip(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return pointer to the block or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_page_get_gen(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page number */
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_block_t* guess, /*!< in: guessed block or NULL */
- ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_GET_NO_LATCH */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
-from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_get_gen).
-@return pointer to the block, page bufferfixed */
-UNIV_INTERN
-buf_block_t*
-buf_page_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space in units of
- a page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
-UNIV_INTERN
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- buf_block_t* block); /*!< in: block to init */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Releases a compressed-only page acquired with buf_page_get_zip(). */
-UNIV_INLINE
-void
-buf_page_release_zip(
-/*=================*/
- buf_page_t* bpage); /*!< in: buffer block */
-/********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release(
-/*=============*/
- buf_block_t* block, /*!< in: buffer block */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
- RW_NO_LATCH */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from slipping out of
-the buffer pool. */
-UNIV_INTERN
-void
-buf_page_make_young(
-/*================*/
- buf_page_t* bpage); /*!< in: buffer block of a file page */
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
-
-NOTE that it is possible that the page is not yet read from disk,
-though.
-
-@return TRUE if found in the page hash table */
-UNIV_INLINE
-ibool
-buf_page_peek(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-#ifdef UNIV_DEBUG_FILE_ACCESSES
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- ulint space, /*!< in: space id */
- ulint offset); /*!< in: page number */
-#endif /* UNIV_DEBUG_FILE_ACCESSES */
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-ulint
-buf_page_get_freed_page_clock(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: block */
- __attribute__((pure));
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-ulint
-buf_block_get_freed_page_clock(
-/*===========================*/
- const buf_block_t* block) /*!< in: block */
- __attribute__((pure));
-
-/********************************************************************//**
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex.
-@return TRUE if should be made younger */
-UNIV_INLINE
-ibool
-buf_page_peek_if_too_old(
-/*=====================*/
- const buf_page_t* bpage); /*!< in: block to make younger */
-/********************************************************************//**
-Returns the current state of is_hashed of a page. FALSE if the page is
-not in the pool. NOTE that this operation does not fix the page in the
-pool if it is found there.
-@return TRUE if page hash index is built in search system */
-UNIV_INTERN
-ibool
-buf_page_peek_if_search_hashed(
-/*===========================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-Gets the youngest modification log sequence number for a frame.
-Returns zero if not file page or no modification occurred yet.
-@return newest modification to page */
-UNIV_INLINE
-ib_uint64_t
-buf_page_get_newest_modification(
-/*=============================*/
- const buf_page_t* bpage); /*!< in: block containing the
- page frame */
-/********************************************************************//**
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-void
-buf_block_modify_clock_inc(
-/*=======================*/
- buf_block_t* block); /*!< in: block */
-/********************************************************************//**
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block.
-@return value */
-UNIV_INLINE
-ib_uint64_t
-buf_block_get_modify_clock(
-/*=======================*/
- buf_block_t* block); /*!< in: block */
-#else /* !UNIV_HOTBACKUP */
-# define buf_block_modify_clock_inc(block) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value
-on 32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- const byte* page); /*!< in: buffer page */
-/********************************************************************//**
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input!
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- const byte* page); /*!< in: buffer page */
-/********************************************************************//**
-Checks if a page is corrupt.
-@return TRUE if corrupted */
-UNIV_INTERN
-ibool
-buf_page_is_corrupted(
-/*==================*/
- const byte* read_buf, /*!< in: a database page */
- ulint zip_size); /*!< in: size of compressed page;
- 0 for uncompressed pages */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Gets the space id, page offset, and byte offset within page of a
-pointer pointing to a buffer frame containing a file page. */
-UNIV_INLINE
-void
-buf_ptr_get_fsp_addr(
-/*=================*/
- const void* ptr, /*!< in: pointer to a buffer frame */
- ulint* space, /*!< out: space id */
- fil_addr_t* addr); /*!< out: page offset and byte offset */
-/**********************************************************************//**
-Gets the hash value of a block. This can be used in searches in the
-lock hash table.
-@return lock hash value */
-UNIV_INLINE
-ulint
-buf_block_get_lock_hash_val(
-/*========================*/
- const buf_block_t* block) /*!< in: block */
- __attribute__((pure));
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Finds a block in the buffer pool that points to a
-given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_pool_contains_zip(
-/*==================*/
- const void* data); /*!< in: pointer to compressed page */
-#endif /* UNIV_DEBUG */
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/*********************************************************************//**
-Validates the buffer pool data structure.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_validate(void);
-/*==============*/
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/*********************************************************************//**
-Prints info of the buffer pool data structure. */
-UNIV_INTERN
-void
-buf_print(void);
-/*============*/
-#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Prints a page to stderr. */
-UNIV_INTERN
-void
-buf_page_print(
-/*===========*/
- const byte* read_buf, /*!< in: a database page */
- ulint zip_size); /*!< in: compressed page size, or
- 0 for uncompressed pages */
-/********************************************************************//**
-Decompress a block.
-@return TRUE if successful */
-UNIV_INTERN
-ibool
-buf_zip_decompress(
-/*===============*/
- buf_block_t* block, /*!< in/out: block */
- ibool check); /*!< in: TRUE=verify the page checksum */
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Returns the number of latched pages in the buffer pool.
-@return number of latched pages */
-UNIV_INTERN
-ulint
-buf_get_latched_pages_number(void);
-/*==============================*/
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Returns the number of pending buf pool ios.
-@return number of pending I/O operations */
-UNIV_INTERN
-ulint
-buf_get_n_pending_ios(void);
-/*=======================*/
-/*********************************************************************//**
-Prints info of the buffer i/o. */
-UNIV_INTERN
-void
-buf_print_io(
-/*=========*/
- FILE* file); /*!< in: file where to print */
-/*********************************************************************//**
-Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool.
-@return modified page percentage ratio */
-UNIV_INTERN
-ulint
-buf_get_modified_ratio_pct(void);
-/*============================*/
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-buf_refresh_io_stats(void);
-/*======================*/
-/*********************************************************************//**
-Asserts that all file pages in the buffer are in a replaceable state.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_all_freed(void);
-/*===============*/
-/*********************************************************************//**
-Checks that there currently are no pending i/o-operations for the buffer
-pool.
-@return TRUE if there is no pending i/o */
-UNIV_INTERN
-ibool
-buf_pool_check_no_pending_io(void);
-/*==============================*/
-/*********************************************************************//**
-Invalidates the file pages in the buffer pool when an archive recovery is
-completed. All the file pages buffered must be in a replaceable state when
-this function is called: not latched and not modified. */
-UNIV_INTERN
-void
-buf_pool_invalidate(void);
-/*=====================*/
-#endif /* !UNIV_HOTBACKUP */
-
-/*========================================================================
---------------------------- LOWER LEVEL ROUTINES -------------------------
-=========================================================================*/
-
-#ifdef UNIV_SYNC_DEBUG
-/*********************************************************************//**
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. */
-UNIV_INLINE
-void
-buf_block_dbg_add_level(
-/*====================*/
- buf_block_t* block, /*!< in: buffer page
- where we have acquired latch */
- ulint level); /*!< in: latching order level */
-#else /* UNIV_SYNC_DEBUG */
-# define buf_block_dbg_add_level(block, level) /* nothing */
-#endif /* UNIV_SYNC_DEBUG */
-/*********************************************************************//**
-Gets the state of a block.
-@return state */
-UNIV_INLINE
-enum buf_page_state
-buf_page_get_state(
-/*===============*/
- const buf_page_t* bpage); /*!< in: pointer to the control block */
-/*********************************************************************//**
-Gets the state of a block.
-@return state */
-UNIV_INLINE
-enum buf_page_state
-buf_block_get_state(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Sets the state of a block. */
-UNIV_INLINE
-void
-buf_page_set_state(
-/*===============*/
- buf_page_t* bpage, /*!< in/out: pointer to control block */
- enum buf_page_state state); /*!< in: state */
-/*********************************************************************//**
-Sets the state of a block. */
-UNIV_INLINE
-void
-buf_block_set_state(
-/*================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- enum buf_page_state state); /*!< in: state */
-/*********************************************************************//**
-Determines if a block is mapped to a tablespace.
-@return TRUE if mapped */
-UNIV_INLINE
-ibool
-buf_page_in_file(
-/*=============*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
- __attribute__((pure));
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Determines if a block should be on unzip_LRU list.
-@return TRUE if block belongs to unzip_LRU */
-UNIV_INLINE
-ibool
-buf_page_belongs_to_unzip_LRU(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
- __attribute__((pure));
-
-/*********************************************************************//**
-Gets the mutex of a block.
-@return pointer to mutex protecting bpage */
-UNIV_INLINE
-mutex_t*
-buf_page_get_mutex(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
- __attribute__((pure));
-
-/*********************************************************************//**
-Get the flush type of a page.
-@return flush type */
-UNIV_INLINE
-enum buf_flush
-buf_page_get_flush_type(
-/*====================*/
- const buf_page_t* bpage) /*!< in: buffer page */
- __attribute__((pure));
-/*********************************************************************//**
-Set the flush type of a page. */
-UNIV_INLINE
-void
-buf_page_set_flush_type(
-/*====================*/
- buf_page_t* bpage, /*!< in: buffer page */
- enum buf_flush flush_type); /*!< in: flush type */
-/*********************************************************************//**
-Map a block to a file page. */
-UNIV_INLINE
-void
-buf_block_set_file_page(
-/*====================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- ulint space, /*!< in: tablespace id */
- ulint page_no);/*!< in: page number */
-/*********************************************************************//**
-Gets the io_fix state of a block.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_page_get_io_fix(
-/*================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Gets the io_fix state of a block.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_block_get_io_fix(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Sets the io_fix state of a block. */
-UNIV_INLINE
-void
-buf_page_set_io_fix(
-/*================*/
- buf_page_t* bpage, /*!< in/out: control block */
- enum buf_io_fix io_fix);/*!< in: io_fix state */
-/*********************************************************************//**
-Sets the io_fix state of a block. */
-UNIV_INLINE
-void
-buf_block_set_io_fix(
-/*=================*/
- buf_block_t* block, /*!< in/out: control block */
- enum buf_io_fix io_fix);/*!< in: io_fix state */
-
-/********************************************************************//**
-Determine if a buffer block can be relocated in memory. The block
-can be dirty, but it must not be I/O-fixed or bufferfixed. */
-UNIV_INLINE
-ibool
-buf_page_can_relocate(
-/*==================*/
- const buf_page_t* bpage) /*!< control block being relocated */
- __attribute__((pure));
-
-/*********************************************************************//**
-Determine if a block has been flagged old.
-@return TRUE if old */
-UNIV_INLINE
-ibool
-buf_page_is_old(
-/*============*/
- const buf_page_t* bpage) /*!< in: control block */
- __attribute__((pure));
-/*********************************************************************//**
-Flag a block old. */
-UNIV_INLINE
-void
-buf_page_set_old(
-/*=============*/
- buf_page_t* bpage, /*!< in/out: control block */
- ibool old); /*!< in: old */
-/*********************************************************************//**
-Determine the time of first access of a block in the buffer pool.
-@return ut_time_ms() at the time of first access, 0 if not accessed */
-UNIV_INLINE
-unsigned
-buf_page_is_accessed(
-/*=================*/
- const buf_page_t* bpage) /*!< in: control block */
- __attribute__((nonnull, pure));
-/*********************************************************************//**
-Flag a block accessed. */
-UNIV_INLINE
-void
-buf_page_set_accessed(
-/*==================*/
- buf_page_t* bpage, /*!< in/out: control block */
- ulint time_ms) /*!< in: ut_time_ms() */
- __attribute__((nonnull));
-/*********************************************************************//**
-Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL.
-@return control block, or NULL */
-UNIV_INLINE
-buf_block_t*
-buf_page_get_block(
-/*===============*/
- buf_page_t* bpage) /*!< in: control block, or NULL */
- __attribute__((pure));
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets a pointer to the memory frame of a block.
-@return pointer to the frame */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- __attribute__((pure));
-#else /* UNIV_DEBUG */
-# define buf_block_get_frame(block) (block)->frame
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- __attribute__((pure));
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable. */
-#define buf_block_get_page_zip(block) \
- (UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL)
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return pointer to block, never NULL */
-UNIV_INTERN
-buf_block_t*
-buf_block_align(
-/*============*/
- const byte* ptr); /*!< in: pointer to a frame */
-/********************************************************************//**
-Find out if a pointer belongs to a buf_block_t. It can be a pointer to
-the buf_block_t itself or a member of it
-@return TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
-ibool
-buf_pointer_is_block_field(
-/*=======================*/
- const void* ptr); /*!< in: pointer not
- dereferenced */
-/** Find out if a pointer corresponds to a buf_block_t::mutex.
-@param m in: mutex candidate
-@return TRUE if m is a buf_block_t::mutex */
-#define buf_pool_is_block_mutex(m) \
- buf_pointer_is_block_field((const void*)(m))
-/** Find out if a pointer corresponds to a buf_block_t::lock.
-@param l in: rw-lock candidate
-@return TRUE if l is a buf_block_t::lock */
-#define buf_pool_is_block_lock(l) \
- buf_pointer_is_block_field((const void*)(l))
-
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
- const byte* ptr); /*!< in: pointer to the page */
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
-(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
-then this function does nothing.
-Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
-on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later.
-@return pointer to the block or NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_init_for_read(
-/*===================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- ibool unzip, /*!< in: TRUE=request uncompressed page */
- ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong
- version of the tablespace in case we have done
- DISCARD + IMPORT */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool. */
-UNIV_INTERN
-void
-buf_page_io_complete(
-/*=================*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
-/********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
- __attribute__((const));
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return block, NULL if not found */
-UNIV_INLINE
-buf_page_t*
-buf_page_hash_get(
-/*==============*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: offset of the page within space */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found
-or an uncompressed page frame does not exist.
-@return block, NULL if not found */
-UNIV_INLINE
-buf_block_t*
-buf_block_hash_get(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: offset of the page within space */
-/*********************************************************************//**
-Gets the current length of the free list of buffer blocks.
-@return length of the free list */
-UNIV_INTERN
-ulint
-buf_get_free_list_len(void);
-/*=======================*/
-#endif /* !UNIV_HOTBACKUP */
-
-
-/** The common buffer control block structure
-for compressed and uncompressed frames */
-
-struct buf_page_struct{
- /** @name General fields
- None of these bit-fields must be modified without holding
- buf_page_get_mutex() [buf_block_struct::mutex or
- buf_pool_zip_mutex], since they can be stored in the same
- machine word. Some of these fields are additionally protected
- by buf_pool_mutex. */
- /* @{ */
-
- unsigned space:32; /*!< tablespace id; also protected
- by buf_pool_mutex. */
- unsigned offset:32; /*!< page number; also protected
- by buf_pool_mutex. */
-
- unsigned state:3; /*!< state of the control block; also
- protected by buf_pool_mutex.
- State transitions from
- BUF_BLOCK_READY_FOR_USE to
- BUF_BLOCK_MEMORY need not be
- protected by buf_page_get_mutex().
- @see enum buf_page_state */
-#ifndef UNIV_HOTBACKUP
- unsigned flush_type:2; /*!< if this block is currently being
- flushed to disk, this tells the
- flush_type.
- @see enum buf_flush */
- unsigned io_fix:2; /*!< type of pending I/O operation;
- also protected by buf_pool_mutex
- @see enum buf_io_fix */
- unsigned buf_fix_count:25;/*!< count of how manyfold this block
- is currently bufferfixed */
- /* @} */
-#endif /* !UNIV_HOTBACKUP */
- page_zip_des_t zip; /*!< compressed page; zip.data
- (but not the data it points to) is
- also protected by buf_pool_mutex */
-#ifndef UNIV_HOTBACKUP
- buf_page_t* hash; /*!< node used in chaining to
- buf_pool->page_hash or
- buf_pool->zip_hash */
-#ifdef UNIV_DEBUG
- ibool in_page_hash; /*!< TRUE if in buf_pool->page_hash */
- ibool in_zip_hash; /*!< TRUE if in buf_pool->zip_hash */
-#endif /* UNIV_DEBUG */
-
- /** @name Page flushing fields
- All these are protected by buf_pool_mutex. */
- /* @{ */
-
- UT_LIST_NODE_T(buf_page_t) list;
- /*!< based on state, this is a
- list node, protected only by
- buf_pool_mutex, in one of the
- following lists in buf_pool:
-
- - BUF_BLOCK_NOT_USED: free
- - BUF_BLOCK_FILE_PAGE: flush_list
- - BUF_BLOCK_ZIP_DIRTY: flush_list
- - BUF_BLOCK_ZIP_PAGE: zip_clean
- - BUF_BLOCK_ZIP_FREE: zip_free[]
-
- The contents of the list node
- is undefined if !in_flush_list
- && state == BUF_BLOCK_FILE_PAGE,
- or if state is one of
- BUF_BLOCK_MEMORY,
- BUF_BLOCK_REMOVE_HASH or
- BUF_BLOCK_READY_IN_USE. */
-
-#ifdef UNIV_DEBUG
- ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
- when buf_pool_mutex is free, the
- following should hold: in_flush_list
- == (state == BUF_BLOCK_FILE_PAGE
- || state == BUF_BLOCK_ZIP_DIRTY) */
- ibool in_free_list; /*!< TRUE if in buf_pool->free; when
- buf_pool_mutex is free, the following
- should hold: in_free_list
- == (state == BUF_BLOCK_NOT_USED) */
-#endif /* UNIV_DEBUG */
- ib_uint64_t newest_modification;
- /*!< log sequence number of
- the youngest modification to
- this block, zero if not
- modified */
- ib_uint64_t oldest_modification;
- /*!< log sequence number of
- the START of the log entry
- written of the oldest
- modification to this block
- which has not yet been flushed
- on disk; zero if all
- modifications are on disk */
- /* @} */
- /** @name LRU replacement algorithm fields
- These fields are protected by buf_pool_mutex only (not
- buf_pool_zip_mutex or buf_block_struct::mutex). */
- /* @{ */
-
- UT_LIST_NODE_T(buf_page_t) LRU;
- /*!< node of the LRU list */
-#ifdef UNIV_DEBUG
- ibool in_LRU_list; /*!< TRUE if the page is in
- the LRU list; used in
- debugging */
-#endif /* UNIV_DEBUG */
- unsigned old:1; /*!< TRUE if the block is in the old
- blocks in buf_pool->LRU_old */
- unsigned freed_page_clock:31;/*!< the value of
- buf_pool->freed_page_clock
- when this block was the last
- time put to the head of the
- LRU list; a thread is allowed
- to read this for heuristic
- purposes without holding any
- mutex or latch */
- unsigned access_time:32; /*!< time of first access, or
- 0 if the block was never accessed
- in the buffer pool */
- /* @} */
-# ifdef UNIV_DEBUG_FILE_ACCESSES
- ibool file_page_was_freed;
- /*!< this is set to TRUE when fsp
- frees a page in buffer pool */
-# endif /* UNIV_DEBUG_FILE_ACCESSES */
-#endif /* !UNIV_HOTBACKUP */
-};
-
-/** The buffer control block structure */
-
-struct buf_block_struct{
-
- /** @name General fields */
- /* @{ */
-
- buf_page_t page; /*!< page information; this must
- be the first field, so that
- buf_pool->page_hash can point
- to buf_page_t or buf_block_t */
- byte* frame; /*!< pointer to buffer frame which
- is of size UNIV_PAGE_SIZE, and
- aligned to an address divisible by
- UNIV_PAGE_SIZE */
-#ifndef UNIV_HOTBACKUP
- UT_LIST_NODE_T(buf_block_t) unzip_LRU;
- /*!< node of the decompressed LRU list;
- a block is in the unzip_LRU list
- if page.state == BUF_BLOCK_FILE_PAGE
- and page.zip.data != NULL */
-#ifdef UNIV_DEBUG
- ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
- decompressed LRU list;
- used in debugging */
-#endif /* UNIV_DEBUG */
- mutex_t mutex; /*!< mutex protecting this block:
- state (also protected by the buffer
- pool mutex), io_fix, buf_fix_count,
- and accessed; we introduce this new
- mutex in InnoDB-5.1 to relieve
- contention on the buffer pool mutex */
- rw_lock_t lock; /*!< read-write lock of the buffer
- frame */
- unsigned lock_hash_val:32;/*!< hashed value of the page address
- in the record lock hash table */
- unsigned check_index_page_at_flush:1;
- /*!< TRUE if we know that this is
- an index page, and want the database
- to check its consistency before flush;
- note that there may be pages in the
- buffer pool which are index pages,
- but this flag is not set because
- we do not keep track of all pages */
- /* @} */
- /** @name Optimistic search field */
- /* @{ */
-
- ib_uint64_t modify_clock; /*!< this clock is incremented every
- time a pointer to a record on the
- page may become obsolete; this is
- used in the optimistic cursor
- positioning: if the modify clock has
- not changed, we know that the pointer
- is still valid; this field may be
- changed if the thread (1) owns the
- pool mutex and the page is not
- bufferfixed, or (2) the thread has an
- x-latch on the block */
- /* @} */
- /** @name Hash search fields (unprotected)
- NOTE that these fields are NOT protected by any semaphore! */
- /* @{ */
-
- ulint n_hash_helps; /*!< counter which controls building
- of a new hash index for the page */
- ulint n_fields; /*!< recommended prefix length for hash
- search: number of full fields */
- ulint n_bytes; /*!< recommended prefix: number of bytes
- in an incomplete field */
- ibool left_side; /*!< TRUE or FALSE, depending on
- whether the leftmost record of several
- records with the same prefix should be
- indexed in the hash index */
- /* @} */
-
- /** @name Hash search fields
- These 6 fields may only be modified when we have
- an x-latch on btr_search_latch AND
- - we are holding an s-latch or x-latch on buf_block_struct::lock or
- - we know that buf_block_struct::buf_fix_count == 0.
-
- An exception to this is when we init or create a page
- in the buffer pool in buf0buf.c. */
-
- /* @{ */
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ulint n_pointers; /*!< used in debugging: the number of
- pointers in the adaptive hash index
- pointing to this frame */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- unsigned is_hashed:1; /*!< TRUE if hash index has
- already been built on this
- page; note that it does not
- guarantee that the index is
- complete, though: there may
- have been hash collisions,
- record deletions, etc. */
- unsigned curr_n_fields:10;/*!< prefix length for hash indexing:
- number of full fields */
- unsigned curr_n_bytes:15;/*!< number of bytes in hash
- indexing */
- unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
- dict_index_t* index; /*!< Index for which the adaptive
- hash index has been created. */
- /* @} */
-# ifdef UNIV_SYNC_DEBUG
- /** @name Debug fields */
- /* @{ */
- rw_lock_t debug_latch; /*!< in the debug version, each thread
- which bufferfixes the block acquires
- an s-latch here; so we can use the
- debug utilities in sync0rw */
- /* @} */
-# endif
-#endif /* !UNIV_HOTBACKUP */
-};
-
-/** Check if a buf_block_t object is in a valid state
-@param block buffer block
-@return TRUE if valid */
-#define buf_block_state_valid(block) \
-(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED \
- && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Compute the hash fold value for blocks in buf_pool->zip_hash. */
-/* @{ */
-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
-#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
-#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
-/* @} */
-
-/** @brief The buffer pool statistics structure. */
-struct buf_pool_stat_struct{
- ulint n_page_gets; /*!< number of page gets performed;
- also successful searches through
- the adaptive hash index are
- counted as page gets; this field
- is NOT protected by the buffer
- pool mutex */
- ulint n_pages_read; /*!< number read operations */
- ulint n_pages_written;/*!< number write operations */
- ulint n_pages_created;/*!< number of pages created
- in the pool with no read */
- ulint n_ra_pages_read;/*!< number of pages read in
- as part of read ahead */
- ulint n_ra_pages_evicted;/*!< number of read ahead
- pages that are evicted without
- being accessed */
- ulint n_pages_made_young; /*!< number of pages made young, in
- calls to buf_LRU_make_block_young() */
- ulint n_pages_not_made_young; /*!< number of pages not made
- young because the first access
- was not long enough ago, in
- buf_page_peek_if_too_old() */
-};
-
-/** @brief The buffer pool structure.
-
-NOTE! The definition appears here only for other modules of this
-directory (buf) to see it. Do not use from outside! */
-
-struct buf_pool_struct{
-
- /** @name General fields */
- /* @{ */
-
- ulint n_chunks; /*!< number of buffer pool chunks */
- buf_chunk_t* chunks; /*!< buffer pool chunks */
- ulint curr_size; /*!< current pool size in pages */
- hash_table_t* page_hash; /*!< hash table of buf_page_t or
- buf_block_t file pages,
- buf_page_in_file() == TRUE,
- indexed by (space_id, offset) */
- hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks
- whose frames are allocated to the
- zip buddy system,
- indexed by block->frame */
- ulint n_pend_reads; /*!< number of pending read operations */
- ulint n_pend_unzip; /*!< number of pending decompressions */
-
- time_t last_printout_time;
- /*!< when buf_print_io was last time
- called */
- buf_pool_stat_t stat; /*!< current statistics */
- buf_pool_stat_t old_stat; /*!< old statistics */
-
- /* @} */
-
- /** @name Page flushing algorithm fields */
-
- /* @{ */
-
- UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
- /*!< base node of the modified block
- list */
- ibool init_flush[BUF_FLUSH_N_TYPES];
- /*!< this is TRUE when a flush of the
- given type is being initialized */
- ulint n_flush[BUF_FLUSH_N_TYPES];
- /*!< this is the number of pending
- writes in the given flush type */
- os_event_t no_flush[BUF_FLUSH_N_TYPES];
- /*!< this is in the set state
- when there is no flush batch
- of the given type running */
- ulint freed_page_clock;/*!< a sequence number used
- to count the number of buffer
- blocks removed from the end of
- the LRU list; NOTE that this
- counter may wrap around at 4
- billion! A thread is allowed
- to read this for heuristic
- purposes without holding any
- mutex or latch */
- ulint LRU_flush_ended;/*!< when an LRU flush ends for a page,
- this is incremented by one; this is
- set to zero when a buffer block is
- allocated */
-
- /* @} */
- /** @name LRU replacement algorithm fields */
- /* @{ */
-
- UT_LIST_BASE_NODE_T(buf_page_t) free;
- /*!< base node of the free
- block list */
- UT_LIST_BASE_NODE_T(buf_page_t) LRU;
- /*!< base node of the LRU list */
- buf_page_t* LRU_old; /*!< pointer to the about
- buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
- oldest blocks in the LRU list;
- NULL if LRU length less than
- BUF_LRU_OLD_MIN_LEN;
- NOTE: when LRU_old != NULL, its length
- should always equal LRU_old_len */
- ulint LRU_old_len; /*!< length of the LRU list from
- the block to which LRU_old points
- onward, including that block;
- see buf0lru.c for the restrictions
- on this value; 0 if LRU_old == NULL;
- NOTE: LRU_old_len must be adjusted
- whenever LRU_old shrinks or grows! */
-
- UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
- /*!< base node of the
- unzip_LRU list */
-
- /* @} */
- /** @name Buddy allocator fields
- The buddy allocator is used for allocating compressed page
- frames and buf_page_t descriptors of blocks that exist
- in the buffer pool only in compressed form. */
- /* @{ */
- UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
- /*!< unmodified compressed pages */
- UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES];
- /*!< buddy free lists */
-#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
-# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
-#endif
-#if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE
-# error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE"
-#endif
- /* @} */
-};
-
-/** mutex protecting the buffer pool struct and control blocks, except the
-read-write lock in them */
-extern mutex_t buf_pool_mutex;
-/** mutex protecting the control blocks of compressed-only pages
-(of type buf_page_t, not buf_block_t) */
-extern mutex_t buf_pool_zip_mutex;
-
-/** @name Accessors for buf_pool_mutex.
-Use these instead of accessing buf_pool_mutex directly. */
-/* @{ */
-
-/** Test if buf_pool_mutex is owned. */
-#define buf_pool_mutex_own() mutex_own(&buf_pool_mutex)
-/** Acquire the buffer pool mutex. */
-#define buf_pool_mutex_enter() do { \
- ut_ad(!mutex_own(&buf_pool_zip_mutex)); \
- mutex_enter(&buf_pool_mutex); \
-} while (0)
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/** Flag to forbid the release of the buffer pool mutex.
-Protected by buf_pool_mutex. */
-extern ulint buf_pool_mutex_exit_forbidden;
-/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid() do { \
- ut_ad(buf_pool_mutex_own()); \
- buf_pool_mutex_exit_forbidden++; \
-} while (0)
-/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow() do { \
- ut_ad(buf_pool_mutex_own()); \
- ut_a(buf_pool_mutex_exit_forbidden); \
- buf_pool_mutex_exit_forbidden--; \
-} while (0)
-/** Release the buffer pool mutex. */
-# define buf_pool_mutex_exit() do { \
- ut_a(!buf_pool_mutex_exit_forbidden); \
- mutex_exit(&buf_pool_mutex); \
-} while (0)
-#else
-/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid() ((void) 0)
-/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow() ((void) 0)
-/** Release the buffer pool mutex. */
-# define buf_pool_mutex_exit() mutex_exit(&buf_pool_mutex)
-#endif
-#endif /* !UNIV_HOTBACKUP */
-/* @} */
-
-/**********************************************************************
-Let us list the consistency conditions for different control block states.
-
-NOT_USED: is in free list, not in LRU list, not in flush list, nor
- page hash table
-READY_FOR_USE: is not in free list, LRU list, or flush list, nor page
- hash table
-MEMORY: is not in free list, LRU list, or flush list, nor page
- hash table
-FILE_PAGE: space and offset are defined, is in page hash table
- if io_fix == BUF_IO_WRITE,
- pool: no_flush[flush_type] is in reset state,
- pool: n_flush[flush_type] > 0
-
- (1) if buf_fix_count == 0, then
- is in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- is x-locked,
- if and only if io_fix == BUF_IO_READ
- is s-locked,
- if and only if io_fix == BUF_IO_WRITE
-
- (2) if buf_fix_count > 0, then
- is not in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- if io_fix == BUF_IO_READ,
- is x-locked
- if io_fix == BUF_IO_WRITE,
- is s-locked
-
-State transitions:
-
-NOT_USED => READY_FOR_USE
-READY_FOR_USE => MEMORY
-READY_FOR_USE => FILE_PAGE
-MEMORY => NOT_USED
-FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
- (1) buf_fix_count == 0,
- (2) oldest_modification == 0, and
- (3) io_fix == 0.
-*/
-
-#ifndef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic
deleted file mode 100644
index 0f92a59a1c7..00000000000
--- a/storage/innodb_plugin/include/buf0buf.ic
+++ /dev/null
@@ -1,1068 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0buf.ic
-The database buffer buf_pool
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0mtr.h"
-#ifndef UNIV_HOTBACKUP
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-ulint
-buf_page_get_freed_page_clock(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: block */
-{
- /* This is sometimes read without holding buf_pool_mutex. */
- return(bpage->freed_page_clock);
-}
-
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-ulint
-buf_block_get_freed_page_clock(
-/*===========================*/
- const buf_block_t* block) /*!< in: block */
-{
- return(buf_page_get_freed_page_clock(&block->page));
-}
-
-/********************************************************************//**
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex.
-@return TRUE if should be made younger */
-UNIV_INLINE
-ibool
-buf_page_peek_if_too_old(
-/*=====================*/
- const buf_page_t* bpage) /*!< in: block to make younger */
-{
- if (UNIV_UNLIKELY(buf_pool->freed_page_clock == 0)) {
- /* If eviction has not started yet, do not update the
- statistics or move blocks in the LRU list. This is
- either the warm-up phase or an in-memory workload. */
- return(FALSE);
- } else if (buf_LRU_old_threshold_ms && bpage->old) {
- unsigned access_time = buf_page_is_accessed(bpage);
-
- if (access_time > 0
- && (ut_time_ms() - access_time)
- >= buf_LRU_old_threshold_ms) {
- return(TRUE);
- }
-
- buf_pool->stat.n_pages_not_made_young++;
- return(FALSE);
- } else {
- /* FIXME: bpage->freed_page_clock is 31 bits */
- return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
- > ((ulint) bpage->freed_page_clock
- + (buf_pool->curr_size
- * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
- / (BUF_LRU_OLD_RATIO_DIV * 4))));
- }
-}
-
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void)
-/*========================*/
-{
- return(buf_pool->curr_size * UNIV_PAGE_SIZE);
-}
-
-/********************************************************************//**
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-zero if all modified pages have been flushed to disk.
-@return oldest modification in pool, zero if none */
-UNIV_INLINE
-ib_uint64_t
-buf_pool_get_oldest_modification(void)
-/*==================================*/
-{
- buf_page_t* bpage;
- ib_uint64_t lsn;
-
- buf_pool_mutex_enter();
-
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-
- if (bpage == NULL) {
- lsn = 0;
- } else {
- ut_ad(bpage->in_flush_list);
- lsn = bpage->oldest_modification;
- }
-
- buf_pool_mutex_exit();
-
- /* The returned answer may be out of date: the flush_list can
- change after the mutex has been released. */
-
- return(lsn);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Gets the state of a block.
-@return state */
-UNIV_INLINE
-enum buf_page_state
-buf_page_get_state(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- enum buf_page_state state = (enum buf_page_state) bpage->state;
-
-#ifdef UNIV_DEBUG
- switch (state) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_FILE_PAGE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- break;
- default:
- ut_error;
- }
-#endif /* UNIV_DEBUG */
-
- return(state);
-}
-/*********************************************************************//**
-Gets the state of a block.
-@return state */
-UNIV_INLINE
-enum buf_page_state
-buf_block_get_state(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- return(buf_page_get_state(&block->page));
-}
-/*********************************************************************//**
-Sets the state of a block. */
-UNIV_INLINE
-void
-buf_page_set_state(
-/*===============*/
- buf_page_t* bpage, /*!< in/out: pointer to control block */
- enum buf_page_state state) /*!< in: state */
-{
-#ifdef UNIV_DEBUG
- enum buf_page_state old_state = buf_page_get_state(bpage);
-
- switch (old_state) {
- case BUF_BLOCK_ZIP_FREE:
- ut_error;
- break;
- case BUF_BLOCK_ZIP_PAGE:
- ut_a(state == BUF_BLOCK_ZIP_DIRTY);
- break;
- case BUF_BLOCK_ZIP_DIRTY:
- ut_a(state == BUF_BLOCK_ZIP_PAGE);
- break;
- case BUF_BLOCK_NOT_USED:
- ut_a(state == BUF_BLOCK_READY_FOR_USE);
- break;
- case BUF_BLOCK_READY_FOR_USE:
- ut_a(state == BUF_BLOCK_MEMORY
- || state == BUF_BLOCK_FILE_PAGE
- || state == BUF_BLOCK_NOT_USED);
- break;
- case BUF_BLOCK_MEMORY:
- ut_a(state == BUF_BLOCK_NOT_USED);
- break;
- case BUF_BLOCK_FILE_PAGE:
- ut_a(state == BUF_BLOCK_NOT_USED
- || state == BUF_BLOCK_REMOVE_HASH);
- break;
- case BUF_BLOCK_REMOVE_HASH:
- ut_a(state == BUF_BLOCK_MEMORY);
- break;
- }
-#endif /* UNIV_DEBUG */
- bpage->state = state;
- ut_ad(buf_page_get_state(bpage) == state);
-}
-
-/*********************************************************************//**
-Sets the state of a block. */
-UNIV_INLINE
-void
-buf_block_set_state(
-/*================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- enum buf_page_state state) /*!< in: state */
-{
- buf_page_set_state(&block->page, state);
-}
-
-/*********************************************************************//**
-Determines if a block is mapped to a tablespace.
-@return TRUE if mapped */
-UNIV_INLINE
-ibool
-buf_page_in_file(
-/*=============*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
-{
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- /* This is a free page in buf_pool->zip_free[].
- Such pages should only be accessed by the buddy allocator. */
- ut_error;
- break;
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_FILE_PAGE:
- return(TRUE);
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- break;
- }
-
- return(FALSE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Determines if a block should be on unzip_LRU list.
-@return TRUE if block belongs to unzip_LRU */
-UNIV_INLINE
-ibool
-buf_page_belongs_to_unzip_LRU(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
-{
- ut_ad(buf_page_in_file(bpage));
-
- return(bpage->zip.data
- && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-}
-
-/*********************************************************************//**
-Gets the mutex of a block.
-@return pointer to mutex protecting bpage */
-UNIV_INLINE
-mutex_t*
-buf_page_get_mutex(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
-{
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
- ut_error;
- return(NULL);
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- return(&buf_pool_zip_mutex);
- default:
- return(&((buf_block_t*) bpage)->mutex);
- }
-}
-
-/*********************************************************************//**
-Get the flush type of a page.
-@return flush type */
-UNIV_INLINE
-enum buf_flush
-buf_page_get_flush_type(
-/*====================*/
- const buf_page_t* bpage) /*!< in: buffer page */
-{
- enum buf_flush flush_type = (enum buf_flush) bpage->flush_type;
-
-#ifdef UNIV_DEBUG
- switch (flush_type) {
- case BUF_FLUSH_LRU:
- case BUF_FLUSH_SINGLE_PAGE:
- case BUF_FLUSH_LIST:
- return(flush_type);
- case BUF_FLUSH_N_TYPES:
- break;
- }
- ut_error;
-#endif /* UNIV_DEBUG */
- return(flush_type);
-}
-/*********************************************************************//**
-Set the flush type of a page. */
-UNIV_INLINE
-void
-buf_page_set_flush_type(
-/*====================*/
- buf_page_t* bpage, /*!< in: buffer page */
- enum buf_flush flush_type) /*!< in: flush type */
-{
- bpage->flush_type = flush_type;
- ut_ad(buf_page_get_flush_type(bpage) == flush_type);
-}
-
-/*********************************************************************//**
-Map a block to a file page. */
-UNIV_INLINE
-void
-buf_block_set_file_page(
-/*====================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- ulint space, /*!< in: tablespace id */
- ulint page_no)/*!< in: page number */
-{
- buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
- block->page.space = space;
- block->page.offset = page_no;
-}
-
-/*********************************************************************//**
-Gets the io_fix state of a block.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_page_get_io_fix(
-/*================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- enum buf_io_fix io_fix = (enum buf_io_fix) bpage->io_fix;
-#ifdef UNIV_DEBUG
- switch (io_fix) {
- case BUF_IO_NONE:
- case BUF_IO_READ:
- case BUF_IO_WRITE:
- return(io_fix);
- }
- ut_error;
-#endif /* UNIV_DEBUG */
- return(io_fix);
-}
-
-/*********************************************************************//**
-Gets the io_fix state of a block.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_block_get_io_fix(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- return(buf_page_get_io_fix(&block->page));
-}
-
-/*********************************************************************//**
-Sets the io_fix state of a block. */
-UNIV_INLINE
-void
-buf_page_set_io_fix(
-/*================*/
- buf_page_t* bpage, /*!< in/out: control block */
- enum buf_io_fix io_fix) /*!< in: io_fix state */
-{
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- bpage->io_fix = io_fix;
- ut_ad(buf_page_get_io_fix(bpage) == io_fix);
-}
-
-/*********************************************************************//**
-Sets the io_fix state of a block. */
-UNIV_INLINE
-void
-buf_block_set_io_fix(
-/*=================*/
- buf_block_t* block, /*!< in/out: control block */
- enum buf_io_fix io_fix) /*!< in: io_fix state */
-{
- buf_page_set_io_fix(&block->page, io_fix);
-}
-
-/********************************************************************//**
-Determine if a buffer block can be relocated in memory. The block
-can be dirty, but it must not be I/O-fixed or bufferfixed. */
-UNIV_INLINE
-ibool
-buf_page_can_relocate(
-/*==================*/
- const buf_page_t* bpage) /*!< control block being relocated */
-{
- ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
-
- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
- && bpage->buf_fix_count == 0);
-}
-
-/*********************************************************************//**
-Determine if a block has been flagged old.
-@return TRUE if old */
-UNIV_INLINE
-ibool
-buf_page_is_old(
-/*============*/
- const buf_page_t* bpage) /*!< in: control block */
-{
- ut_ad(buf_page_in_file(bpage));
- ut_ad(buf_pool_mutex_own());
-
- return(bpage->old);
-}
-
-/*********************************************************************//**
-Flag a block old. */
-UNIV_INLINE
-void
-buf_page_set_old(
-/*=============*/
- buf_page_t* bpage, /*!< in/out: control block */
- ibool old) /*!< in: old */
-{
- ut_a(buf_page_in_file(bpage));
- ut_ad(buf_pool_mutex_own());
- ut_ad(bpage->in_LRU_list);
-
-#ifdef UNIV_LRU_DEBUG
- ut_a((buf_pool->LRU_old_len == 0) == (buf_pool->LRU_old == NULL));
- /* If a block is flagged "old", the LRU_old list must exist. */
- ut_a(!old || buf_pool->LRU_old);
-
- if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage)) {
- const buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
- const buf_page_t* next = UT_LIST_GET_NEXT(LRU, bpage);
- if (prev->old == next->old) {
- ut_a(prev->old == old);
- } else {
- ut_a(!prev->old);
- ut_a(buf_pool->LRU_old == (old ? bpage : next));
- }
- }
-#endif /* UNIV_LRU_DEBUG */
-
- bpage->old = old;
-}
-
-/*********************************************************************//**
-Determine the time of first access of a block in the buffer pool.
-@return ut_time_ms() at the time of first access, 0 if not accessed */
-UNIV_INLINE
-unsigned
-buf_page_is_accessed(
-/*=================*/
- const buf_page_t* bpage) /*!< in: control block */
-{
- ut_ad(buf_page_in_file(bpage));
-
- return(bpage->access_time);
-}
-
-/*********************************************************************//**
-Flag a block accessed. */
-UNIV_INLINE
-void
-buf_page_set_accessed(
-/*==================*/
- buf_page_t* bpage, /*!< in/out: control block */
- ulint time_ms) /*!< in: ut_time_ms() */
-{
- ut_a(buf_page_in_file(bpage));
- ut_ad(buf_pool_mutex_own());
-
- if (!bpage->access_time) {
- /* Make this the time of the first access. */
- bpage->access_time = time_ms;
- }
-}
-
-/*********************************************************************//**
-Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL.
-@return control block, or NULL */
-UNIV_INLINE
-buf_block_t*
-buf_page_get_block(
-/*===============*/
- buf_page_t* bpage) /*!< in: control block, or NULL */
-{
- if (UNIV_LIKELY(bpage != NULL)) {
- ut_ad(buf_page_in_file(bpage));
-
- if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
- return((buf_block_t*) bpage);
- }
- }
-
- return(NULL);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets a pointer to the memory frame of a block.
-@return pointer to the frame */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- ut_ad(block);
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_NOT_USED:
- ut_error;
- break;
- case BUF_BLOCK_FILE_PAGE:
-# ifndef UNIV_HOTBACKUP
- ut_a(block->page.buf_fix_count > 0);
-# endif /* !UNIV_HOTBACKUP */
- /* fall through */
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- goto ok;
- }
- ut_error;
-ok:
- return((buf_frame_t*) block->frame);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- ut_ad(bpage);
- ut_a(buf_page_in_file(bpage));
-
- return(bpage->space);
-}
-
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- ut_ad(block);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- return(block->page.space);
-}
-
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- ut_ad(bpage);
- ut_a(buf_page_in_file(bpage));
-
- return(bpage->offset);
-}
-
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- ut_ad(block);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- return(block->page.offset);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- return(bpage->zip.ssize ? 512 << bpage->zip.ssize : 0);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- return(block->page.zip.ssize ? 512 << block->page.zip.ssize : 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
- const byte* ptr) /*!< in: pointer to the page */
-{
- return(buf_block_get_page_zip(buf_block_align(ptr)));
-}
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Gets the space id, page offset, and byte offset within page of a
-pointer pointing to a buffer frame containing a file page. */
-UNIV_INLINE
-void
-buf_ptr_get_fsp_addr(
-/*=================*/
- const void* ptr, /*!< in: pointer to a buffer frame */
- ulint* space, /*!< out: space id */
- fil_addr_t* addr) /*!< out: page offset and byte offset */
-{
- const page_t* page = (const page_t*) ut_align_down(ptr,
- UNIV_PAGE_SIZE);
-
- *space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
- addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Gets the hash value of the page the pointer is pointing to. This can be used
-in searches in the lock hash table.
-@return lock hash value */
-UNIV_INLINE
-ulint
-buf_block_get_lock_hash_val(
-/*========================*/
- const buf_block_t* block) /*!< in: block */
-{
- return(block->lock_hash_val);
-}
-
-/********************************************************************//**
-Allocates a buffer block.
-@return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INLINE
-buf_block_t*
-buf_block_alloc(
-/*============*/
- ulint zip_size) /*!< in: compressed page size in bytes,
- or 0 if uncompressed tablespace */
-{
- buf_block_t* block;
-
- block = buf_LRU_get_free_block(zip_size);
-
- buf_block_set_state(block, BUF_BLOCK_MEMORY);
-
- return(block);
-}
-
-/********************************************************************//**
-Frees a buffer block which does not contain a file page. */
-UNIV_INLINE
-void
-buf_block_free(
-/*===========*/
- buf_block_t* block) /*!< in, own: block to be freed */
-{
- buf_pool_mutex_enter();
-
- mutex_enter(&block->mutex);
-
- ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-
- buf_LRU_block_free_non_file_page(block);
-
- mutex_exit(&block->mutex);
-
- buf_pool_mutex_exit();
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Copies contents of a buffer frame to a given buffer.
-@return buf */
-UNIV_INLINE
-byte*
-buf_frame_copy(
-/*===========*/
- byte* buf, /*!< in: buffer to copy to */
- const buf_frame_t* frame) /*!< in: buffer frame */
-{
- ut_ad(buf && frame);
-
- ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
-
- return(buf);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
-{
- return((space << 20) + space + offset);
-}
-
-/********************************************************************//**
-Gets the youngest modification log sequence number for a frame.
-Returns zero if not file page or no modification occurred yet.
-@return newest modification to page */
-UNIV_INLINE
-ib_uint64_t
-buf_page_get_newest_modification(
-/*=============================*/
- const buf_page_t* bpage) /*!< in: block containing the
- page frame */
-{
- ib_uint64_t lsn;
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- if (buf_page_in_file(bpage)) {
- lsn = bpage->newest_modification;
- } else {
- lsn = 0;
- }
-
- mutex_exit(block_mutex);
-
- return(lsn);
-}
-
-/********************************************************************//**
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-void
-buf_block_modify_clock_inc(
-/*=======================*/
- buf_block_t* block) /*!< in: block */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((buf_pool_mutex_own()
- && (block->page.buf_fix_count == 0))
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- block->modify_clock++;
-}
-
-/********************************************************************//**
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block.
-@return value */
-UNIV_INLINE
-ib_uint64_t
-buf_block_get_modify_clock(
-/*=======================*/
- buf_block_t* block) /*!< in: block */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- return(block->modify_clock);
-}
-
-/*******************************************************************//**
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_inc_func(
-/*=======================*/
-#ifdef UNIV_SYNC_DEBUG
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line */
-#endif /* UNIV_SYNC_DEBUG */
- buf_block_t* block) /*!< in/out: block to bufferfix */
-{
-#ifdef UNIV_SYNC_DEBUG
- ibool ret;
-
- ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
- ut_a(ret);
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&block->mutex));
-
- block->page.buf_fix_count++;
-}
-#ifdef UNIV_SYNC_DEBUG
-/** Increments the bufferfix count.
-@param b in/out: block to bufferfix
-@param f in: file name where requested
-@param l in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-#else /* UNIV_SYNC_DEBUG */
-/** Increments the bufferfix count.
-@param b in/out: block to bufferfix
-@param f in: file name where requested
-@param l in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-#endif /* UNIV_SYNC_DEBUG */
-
-/*******************************************************************//**
-Decrements the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_dec(
-/*==================*/
- buf_block_t* block) /*!< in/out: block to bufferunfix */
-{
- ut_ad(mutex_own(&block->mutex));
-
- block->page.buf_fix_count--;
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&block->debug_latch);
-#endif
-}
-
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return block, NULL if not found */
-UNIV_INLINE
-buf_page_t*
-buf_page_hash_get(
-/*==============*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
-{
- buf_page_t* bpage;
- ulint fold;
-
- ut_ad(buf_pool);
- ut_ad(buf_pool_mutex_own());
-
- /* Look for the page in the hash table */
-
- fold = buf_page_address_fold(space, offset);
-
- HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
- ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
- && buf_page_in_file(bpage)),
- bpage->space == space && bpage->offset == offset);
- if (bpage) {
- ut_a(buf_page_in_file(bpage));
- ut_ad(bpage->in_page_hash);
- ut_ad(!bpage->in_zip_hash);
- UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
- }
-
- return(bpage);
-}
-
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found
-or an uncompressed page frame does not exist.
-@return block, NULL if not found */
-UNIV_INLINE
-buf_block_t*
-buf_block_hash_get(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
-{
- return(buf_page_get_block(buf_page_hash_get(space, offset)));
-}
-
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
-
-NOTE that it is possible that the page is not yet read from disk,
-though.
-
-@return TRUE if found in the page hash table */
-UNIV_INLINE
-ibool
-buf_page_peek(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- const buf_page_t* bpage;
-
- buf_pool_mutex_enter();
-
- bpage = buf_page_hash_get(space, offset);
-
- buf_pool_mutex_exit();
-
- return(bpage != NULL);
-}
-
-/********************************************************************//**
-Releases a compressed-only page acquired with buf_page_get_zip(). */
-UNIV_INLINE
-void
-buf_page_release_zip(
-/*=================*/
- buf_page_t* bpage) /*!< in: buffer block */
-{
- buf_block_t* block;
-
- ut_ad(bpage);
- ut_a(bpage->buf_fix_count > 0);
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- mutex_enter(&buf_pool_zip_mutex);
- bpage->buf_fix_count--;
- mutex_exit(&buf_pool_zip_mutex);
- return;
- case BUF_BLOCK_FILE_PAGE:
- block = (buf_block_t*) bpage;
- mutex_enter(&block->mutex);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&block->debug_latch);
-#endif
- bpage->buf_fix_count--;
- mutex_exit(&block->mutex);
- return;
- case BUF_BLOCK_ZIP_FREE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- break;
- }
-
- ut_error;
-}
-
-/********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release(
-/*=============*/
- buf_block_t* block, /*!< in: buffer block */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
- RW_NO_LATCH */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(block);
-
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_a(block->page.buf_fix_count > 0);
-
- if (rw_latch == RW_X_LATCH && mtr->modifications) {
- buf_pool_mutex_enter();
- buf_flush_note_modification(block, mtr);
- buf_pool_mutex_exit();
- }
-
- mutex_enter(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
- block->page.buf_fix_count--;
-
- mutex_exit(&block->mutex);
-
- if (rw_latch == RW_S_LATCH) {
- rw_lock_s_unlock(&(block->lock));
- } else if (rw_latch == RW_X_LATCH) {
- rw_lock_x_unlock(&(block->lock));
- }
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/*********************************************************************//**
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. */
-UNIV_INLINE
-void
-buf_block_dbg_add_level(
-/*====================*/
- buf_block_t* block, /*!< in: buffer page
- where we have acquired latch */
- ulint level) /*!< in: latching order level */
-{
- sync_thread_add_level(&block->lock, level);
-}
-#endif /* UNIV_SYNC_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/buf0flu.h b/storage/innodb_plugin/include/buf0flu.h
deleted file mode 100644
index 6c751852f54..00000000000
--- a/storage/innodb_plugin/include/buf0flu.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0flu.h
-The database buffer pool flush algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0flu_h
-#define buf0flu_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0types.h"
-#include "buf0types.h"
-
-/********************************************************************//**
-Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
-void
-buf_flush_remove(
-/*=============*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
-/********************************************************************//**
-Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
-void
-buf_flush_write_complete(
-/*=====================*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
-/*********************************************************************//**
-Flushes pages from the end of the LRU list if there is too small
-a margin of replaceable pages there. */
-UNIV_INTERN
-void
-buf_flush_free_margin(void);
-/*=======================*/
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
-void
-buf_flush_init_for_writing(
-/*=======================*/
- byte* page, /*!< in/out: page */
- void* page_zip_, /*!< in/out: compressed page, or NULL */
- ib_uint64_t newest_lsn); /*!< in: newest modification lsn
- to the page */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
-UNIV_INTERN
-ulint
-buf_flush_batch(
-/*============*/
- enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
- then the caller must not own any
- latches on pages */
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- ib_uint64_t lsn_limit); /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
-/******************************************************************//**
-Waits until a flush batch of the given type ends */
-UNIV_INTERN
-void
-buf_flush_wait_batch_end(
-/*=====================*/
- enum buf_flush type); /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-/********************************************************************//**
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it not
-already in it. */
-UNIV_INLINE
-void
-buf_flush_note_modification(
-/*========================*/
- buf_block_t* block, /*!< in: block which is modified */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-This function should be called when recovery has modified a buffer page. */
-UNIV_INLINE
-void
-buf_flush_recv_note_modification(
-/*=============================*/
- buf_block_t* block, /*!< in: block which is modified */
- ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a
- set of mtr's */
- ib_uint64_t end_lsn); /*!< in: end lsn of the last mtr in the
- set of mtr's */
-/********************************************************************//**
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., transition FILE_PAGE => NOT_USED allowed.
-@return TRUE if can replace immediately */
-UNIV_INTERN
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
- buf_page_t* bpage); /*!< in: buffer control block, must be
- buf_page_in_file(bpage) and in the LRU list */
-
-/** @brief Statistics for selecting flush rate based on redo log
-generation speed.
-
-These statistics are generated for heuristics used in estimating the
-rate at which we should flush the dirty blocks to avoid bursty IO
-activity. Note that the rate of flushing not only depends on how many
-dirty pages we have in the buffer pool but it is also a fucntion of
-how much redo the workload is generating and at what rate. */
-
-struct buf_flush_stat_struct
-{
- ib_uint64_t redo; /**< amount of redo generated. */
- ulint n_flushed; /**< number of pages flushed. */
-};
-
-/** Statistics for selecting flush rate of dirty pages. */
-typedef struct buf_flush_stat_struct buf_flush_stat_t;
-/*********************************************************************
-Update the historical stats that we are collecting for flush rate
-heuristics at the end of each interval. */
-UNIV_INTERN
-void
-buf_flush_stat_update(void);
-/*=======================*/
-/*********************************************************************
-Determines the fraction of dirty pages that need to be flushed based
-on the speed at which we generate redo log. Note that if redo log
-is generated at significant rate without a corresponding increase
-in the number of dirty pages (for example, an in-memory workload)
-it can cause IO bursts of flushing. This function implements heuristics
-to avoid this burstiness.
-@return number of dirty pages to be flushed / second */
-UNIV_INTERN
-ulint
-buf_flush_get_desired_flush_rate(void);
-/*==================================*/
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/******************************************************************//**
-Validates the flush list.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-buf_flush_validate(void);
-/*====================*/
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-/** When buf_flush_free_margin is called, it tries to make this many blocks
-available to replacement in the free list and at the end of the LRU list (to
-make sure that a read-ahead batch can be read efficiently in a single
-sweep). */
-#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
-/** Extra margin to apply above BUF_FLUSH_FREE_BLOCK_MARGIN */
-#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/buf0flu.ic b/storage/innodb_plugin/include/buf0flu.ic
deleted file mode 100644
index c90cd59e4b6..00000000000
--- a/storage/innodb_plugin/include/buf0flu.ic
+++ /dev/null
@@ -1,123 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0flu.ic
-The database buffer pool flush algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-#include "buf0buf.h"
-#include "mtr0mtr.h"
-
-/********************************************************************//**
-Inserts a modified block into the flush list. */
-UNIV_INTERN
-void
-buf_flush_insert_into_flush_list(
-/*=============================*/
- buf_block_t* block); /*!< in/out: block which is modified */
-/********************************************************************//**
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-UNIV_INTERN
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
- buf_block_t* block); /*!< in/out: block which is modified */
-
-/********************************************************************//**
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it is not
-already in it. */
-UNIV_INLINE
-void
-buf_flush_note_modification(
-/*========================*/
- buf_block_t* block, /*!< in: block which is modified */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(block);
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(buf_pool_mutex_own());
-
- ut_ad(mtr->start_lsn != 0);
- ut_ad(mtr->modifications);
- ut_ad(block->page.newest_modification <= mtr->end_lsn);
-
- block->page.newest_modification = mtr->end_lsn;
-
- if (!block->page.oldest_modification) {
-
- block->page.oldest_modification = mtr->start_lsn;
- ut_ad(block->page.oldest_modification != 0);
-
- buf_flush_insert_into_flush_list(block);
- } else {
- ut_ad(block->page.oldest_modification <= mtr->start_lsn);
- }
-
- ++srv_buf_pool_write_requests;
-}
-
-/********************************************************************//**
-This function should be called when recovery has modified a buffer page. */
-UNIV_INLINE
-void
-buf_flush_recv_note_modification(
-/*=============================*/
- buf_block_t* block, /*!< in: block which is modified */
- ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a
- set of mtr's */
- ib_uint64_t end_lsn) /*!< in: end lsn of the last mtr in the
- set of mtr's */
-{
- ut_ad(block);
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- buf_pool_mutex_enter();
-
- ut_ad(block->page.newest_modification <= end_lsn);
-
- block->page.newest_modification = end_lsn;
-
- if (!block->page.oldest_modification) {
-
- block->page.oldest_modification = start_lsn;
-
- ut_ad(block->page.oldest_modification != 0);
-
- buf_flush_insert_sorted_into_flush_list(block);
- } else {
- ut_ad(block->page.oldest_modification <= start_lsn);
- }
-
- buf_pool_mutex_exit();
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/buf0lru.h b/storage/innodb_plugin/include/buf0lru.h
deleted file mode 100644
index 009430af35b..00000000000
--- a/storage/innodb_plugin/include/buf0lru.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0lru.h
-The database buffer pool LRU replacement algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0lru_h
-#define buf0lru_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "buf0types.h"
-
-/** The return type of buf_LRU_free_block() */
-enum buf_lru_free_block_status {
- /** freed */
- BUF_LRU_FREED = 0,
- /** not freed because the caller asked to remove the
- uncompressed frame but the control block cannot be
- relocated */
- BUF_LRU_CANNOT_RELOCATE,
- /** not freed because of some other reason */
- BUF_LRU_NOT_FREED
-};
-
-/******************************************************************//**
-Tries to remove LRU flushed blocks from the end of the LRU list and put them
-to the free list. This is beneficial for the efficiency of the insert buffer
-operation, as flushed pages from non-unique non-clustered indexes are here
-taken out of the buffer pool, and their inserts redirected to the insert
-buffer. Otherwise, the flushed blocks could get modified again before read
-operations need new buffer blocks, and the i/o work done in flushing would be
-wasted. */
-UNIV_INTERN
-void
-buf_LRU_try_free_flushed_blocks(void);
-/*==================================*/
-/******************************************************************//**
-Returns TRUE if less than 25 % of the buffer pool is available. This can be
-used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks.
-@return TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
-ibool
-buf_LRU_buf_pool_running_out(void);
-/*==============================*/
-
-/*#######################################################################
-These are low-level functions
-#########################################################################*/
-
-/** Minimum LRU list length for which the LRU_old pointer is defined */
-#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
-
-/** Maximum LRU list search length in buf_flush_LRU_recommendation() */
-#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
-
-/******************************************************************//**
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
-what guarantees that it will not try to read in pages after this operation has
-completed? */
-UNIV_INTERN
-void
-buf_LRU_invalidate_tablespace(
-/*==========================*/
- ulint id); /*!< in: space id */
-/********************************************************************//**
-Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
-void
-buf_LRU_insert_zip_clean(
-/*=====================*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
-
-/******************************************************************//**
-Try to free a block. If bpage is a descriptor of a compressed-only
-page, the descriptor object will be freed as well.
-
-NOTE: If this function returns BUF_LRU_FREED, it will not temporarily
-release buf_pool_mutex. Furthermore, the page frame will no longer be
-accessible via bpage.
-
-The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
-release these two mutexes after the call. No other
-buf_page_get_mutex() may be held when calling this function.
-@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
-BUF_LRU_NOT_FREED otherwise. */
-UNIV_INTERN
-enum buf_lru_free_block_status
-buf_LRU_free_block(
-/*===============*/
- buf_page_t* bpage, /*!< in: block to be freed */
- ibool zip, /*!< in: TRUE if should remove also the
- compressed page of an uncompressed page */
- ibool* buf_pool_mutex_released);
- /*!< in: pointer to a variable that will
- be assigned TRUE if buf_pool_mutex
- was temporarily released, or NULL */
-/******************************************************************//**
-Try to free a replaceable block.
-@return TRUE if found and freed */
-UNIV_INTERN
-ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
- ulint n_iterations); /*!< in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; if
- n_iterations < 10, then we search
- n_iterations / 10 * buf_pool->curr_size
- pages from the end of the LRU list; if
- n_iterations < 5, then we will also search
- n_iterations / 5 of the unzip_LRU list. */
-/******************************************************************//**
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, returns NULL.
-@return a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
-buf_block_t*
-buf_LRU_get_free_only(void);
-/*=======================*/
-/******************************************************************//**
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, blocks are moved from the end of the
-LRU list to the free list.
-@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
-buf_block_t*
-buf_LRU_get_free_block(
-/*===================*/
- ulint zip_size); /*!< in: compressed page size in bytes,
- or 0 if uncompressed tablespace */
-
-/******************************************************************//**
-Puts a block back to the free list. */
-UNIV_INTERN
-void
-buf_LRU_block_free_non_file_page(
-/*=============================*/
- buf_block_t* block); /*!< in: block, must not contain a file page */
-/******************************************************************//**
-Adds a block to the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_add_block(
-/*==============*/
- buf_page_t* bpage, /*!< in: control block */
- ibool old); /*!< in: TRUE if should be put to the old
- blocks in the LRU list, else put to the
- start; if the LRU list is very short, added to
- the start regardless of this parameter */
-/******************************************************************//**
-Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
-void
-buf_unzip_LRU_add_block(
-/*====================*/
- buf_block_t* block, /*!< in: control block */
- ibool old); /*!< in: TRUE if should be put to the end
- of the list, else put to the start */
-/******************************************************************//**
-Moves a block to the start of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_young(
-/*=====================*/
- buf_page_t* bpage); /*!< in: control block */
-/******************************************************************//**
-Moves a block to the end of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_old(
-/*===================*/
- buf_page_t* bpage); /*!< in: control block */
-/**********************************************************************//**
-Updates buf_LRU_old_ratio.
-@return updated old_pct */
-UNIV_INTERN
-uint
-buf_LRU_old_ratio_update(
-/*=====================*/
- uint old_pct,/*!< in: Reserve this percentage of
- the buffer pool for "old" blocks. */
- ibool adjust);/*!< in: TRUE=adjust the LRU list;
- FALSE=just assign buf_LRU_old_ratio
- during the initialization of InnoDB */
-/********************************************************************//**
-Update the historical stats that we are collecting for LRU eviction
-policy at the end of each interval. */
-UNIV_INTERN
-void
-buf_LRU_stat_update(void);
-/*=====================*/
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Validates the LRU list.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_LRU_validate(void);
-/*==================*/
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Prints the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_print(void);
-/*===============*/
-#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-/** @name Heuristics for detecting index scan @{ */
-/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
-"old" blocks. Protected by buf_pool_mutex. */
-extern uint buf_LRU_old_ratio;
-/** The denominator of buf_LRU_old_ratio. */
-#define BUF_LRU_OLD_RATIO_DIV 1024
-/** Maximum value of buf_LRU_old_ratio.
-@see buf_LRU_old_adjust_len
-@see buf_LRU_old_ratio_update */
-#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
-/** Minimum value of buf_LRU_old_ratio.
-@see buf_LRU_old_adjust_len
-@see buf_LRU_old_ratio_update
-The minimum must exceed
-(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
-#define BUF_LRU_OLD_RATIO_MIN 51
-
-#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
-# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
-#endif
-#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
-# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
-#endif
-
-/** Move blocks to "new" LRU list only if the first access was at
-least this many milliseconds ago. Not protected by any mutex or latch. */
-extern uint buf_LRU_old_threshold_ms;
-/* @} */
-
-/** @brief Statistics for selecting the LRU list for eviction.
-
-These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
-and page_zip_decompress() operations. Based on the statistics we decide
-if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
-struct buf_LRU_stat_struct
-{
- ulint io; /**< Counter of buffer pool I/O operations. */
- ulint unzip; /**< Counter of page_zip_decompress operations. */
-};
-
-/** Statistics for selecting the LRU list for eviction. */
-typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
-
-/** Current operation counters. Not protected by any mutex.
-Cleared by buf_LRU_stat_update(). */
-extern buf_LRU_stat_t buf_LRU_stat_cur;
-
-/** Running sum of past values of buf_LRU_stat_cur.
-Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
-extern buf_LRU_stat_t buf_LRU_stat_sum;
-
-/********************************************************************//**
-Increments the I/O counter in buf_LRU_stat_cur. */
-#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
-/********************************************************************//**
-Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
-#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
-
-#ifndef UNIV_NONINL
-#include "buf0lru.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/buf0lru.ic b/storage/innodb_plugin/include/buf0lru.ic
deleted file mode 100644
index 556f45d987f..00000000000
--- a/storage/innodb_plugin/include/buf0lru.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0lru.ic
-The database buffer replacement algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innodb_plugin/include/buf0rea.h b/storage/innodb_plugin/include/buf0rea.h
deleted file mode 100644
index 093750623d6..00000000000
--- a/storage/innodb_plugin/include/buf0rea.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0rea.h
-The database buffer read
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0rea_h
-#define buf0rea_h
-
-#include "univ.i"
-#include "buf0types.h"
-
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread.
-@return TRUE if page has been read in, FALSE in case of failure */
-UNIV_INTERN
-ibool
-buf_read_page(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
-a linear read-ahead area and all the pages in the area have been accessed.
-Does not read any page if the read-ahead mechanism is not activated. Note
-that the algorithm looks at the 'natural' adjacent successor and
-predecessor of the page, which on the leaf level of a B-tree are the next
-and previous page in the chain of leaves. To know these, the page specified
-in (space, offset) must already be present in the buf_pool. Thus, the
-natural way to use this function is to call it when a page in the buf_pool
-is accessed the first time, calling this function just after it has been
-bufferfixed.
-NOTE 1: as this function looks at the natural predecessor and successor
-fields on the page, what happens, if these are not initialized to any
-sensible value? No problem, before applying read-ahead we check that the
-area to read is within the span of the space, if not, read-ahead is not
-applied. An uninitialized value may result in a useless read operation, but
-only very improbably.
-NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
-function must be written such that it cannot end up waiting for these
-latches!
-NOTE 3: the calling thread must want access to the page given: this rule is
-set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io.
-@return number of page read requests issued */
-UNIV_INTERN
-ulint
-buf_read_ahead_linear(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint offset);/*!< in: page number of a page; NOTE: the current thread
- must want access to this page (see NOTE 3 above) */
-/********************************************************************//**
-Issues read requests for pages which the ibuf module wants to read in, in
-order to contract the insert buffer tree. Technically, this function is like
-a read-ahead function. */
-UNIV_INTERN
-void
-buf_read_ibuf_merge_pages(
-/*======================*/
- ibool sync, /*!< in: TRUE if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- const ulint* space_ids, /*!< in: array of space ids */
- const ib_int64_t* space_versions,/*!< in: the spaces must have
- this version number
- (timestamp), otherwise we
- discard the read; we use this
- to cancel reads if DISCARD +
- IMPORT may have changed the
- tablespace size */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored); /*!< in: number of elements
- in the arrays */
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
-void
-buf_read_recv_pages(
-/*================*/
- ibool sync, /*!< in: TRUE if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in
- bytes, or 0 */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored); /*!< in: number of page numbers
- in the array */
-
-/** The size in pages of the area which the read-ahead algorithms read if
-invoked */
-#define BUF_READ_AHEAD_AREA \
- ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
-
-/** @name Modes used in read-ahead @{ */
-/** read only pages belonging to the insert buffer tree */
-#define BUF_READ_IBUF_PAGES_ONLY 131
-/** read any page */
-#define BUF_READ_ANY_PAGE 132
-/* @} */
-
-#endif
diff --git a/storage/innodb_plugin/include/buf0types.h b/storage/innodb_plugin/include/buf0types.h
deleted file mode 100644
index bfae6477135..00000000000
--- a/storage/innodb_plugin/include/buf0types.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0types.h
-The database buffer pool global types for the directory
-
-Created 11/17/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0types_h
-#define buf0types_h
-
-/** Buffer page (uncompressed or compressed) */
-typedef struct buf_page_struct buf_page_t;
-/** Buffer block for which an uncompressed page exists */
-typedef struct buf_block_struct buf_block_t;
-/** Buffer pool chunk comprising buf_block_t */
-typedef struct buf_chunk_struct buf_chunk_t;
-/** Buffer pool comprising buf_chunk_t */
-typedef struct buf_pool_struct buf_pool_t;
-/** Buffer pool statistics struct */
-typedef struct buf_pool_stat_struct buf_pool_stat_t;
-
-/** A buffer frame. @see page_t */
-typedef byte buf_frame_t;
-
-/** Flags for flush types */
-enum buf_flush {
- BUF_FLUSH_LRU = 0, /*!< flush via the LRU list */
- BUF_FLUSH_SINGLE_PAGE, /*!< flush a single page */
- BUF_FLUSH_LIST, /*!< flush via the flush list
- of dirty blocks */
- BUF_FLUSH_N_TYPES /*!< index of last element + 1 */
-};
-
-/** Flags for io_fix types */
-enum buf_io_fix {
- BUF_IO_NONE = 0, /**< no pending I/O */
- BUF_IO_READ, /**< read pending */
- BUF_IO_WRITE /**< write pending */
-};
-
-/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
-/* @{ */
-#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
-/** Base-2 logarithm of the smallest buddy block size */
-# define BUF_BUDDY_LOW_SHIFT 6
-#else /* 64-bit system */
-/** Base-2 logarithm of the smallest buddy block size */
-# define BUF_BUDDY_LOW_SHIFT 7
-#endif
-#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
- /*!< minimum block size in the binary
- buddy system; must be at least
- sizeof(buf_page_t) */
-#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
- /*!< number of buddy sizes */
-
-/** twice the maximum block size of the buddy system;
-the underlying memory is aligned by this amount:
-this must be equal to UNIV_PAGE_SIZE */
-#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
-/* @} */
-
-#endif
-
diff --git a/storage/innodb_plugin/include/data0data.h b/storage/innodb_plugin/include/data0data.h
deleted file mode 100644
index f9fce3f3657..00000000000
--- a/storage/innodb_plugin/include/data0data.h
+++ /dev/null
@@ -1,483 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/data0data.h
-SQL data field and tuple
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef data0data_h
-#define data0data_h
-
-#include "univ.i"
-
-#include "data0types.h"
-#include "data0type.h"
-#include "mem0mem.h"
-#include "dict0types.h"
-
-/** Storage for overflow data in a big record, that is, a clustered
-index record which needs external storage of data fields */
-typedef struct big_rec_struct big_rec_t;
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets pointer to the type struct of SQL data field.
-@return pointer to the type struct */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
- const dfield_t* field); /*!< in: SQL data field */
-/*********************************************************************//**
-Gets pointer to the data in a field.
-@return pointer to data */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- const dfield_t* field); /*!< in: field */
-#else /* UNIV_DEBUG */
-# define dfield_get_type(field) (&(field)->type)
-# define dfield_get_data(field) ((field)->data)
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Sets the type struct of SQL data field. */
-UNIV_INLINE
-void
-dfield_set_type(
-/*============*/
- dfield_t* field, /*!< in: SQL data field */
- dtype_t* type); /*!< in: pointer to data type struct */
-/*********************************************************************//**
-Gets length of field data.
-@return length of data; UNIV_SQL_NULL if SQL null data */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
- const dfield_t* field); /*!< in: field */
-/*********************************************************************//**
-Sets length in a field. */
-UNIV_INLINE
-void
-dfield_set_len(
-/*===========*/
- dfield_t* field, /*!< in: field */
- ulint len); /*!< in: length or UNIV_SQL_NULL */
-/*********************************************************************//**
-Determines if a field is SQL NULL
-@return nonzero if SQL null data */
-UNIV_INLINE
-ulint
-dfield_is_null(
-/*===========*/
- const dfield_t* field); /*!< in: field */
-/*********************************************************************//**
-Determines if a field is externally stored
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-dfield_is_ext(
-/*==========*/
- const dfield_t* field); /*!< in: field */
-/*********************************************************************//**
-Sets the "external storage" flag */
-UNIV_INLINE
-void
-dfield_set_ext(
-/*===========*/
- dfield_t* field); /*!< in/out: field */
-/*********************************************************************//**
-Sets pointer to the data and length in a field. */
-UNIV_INLINE
-void
-dfield_set_data(
-/*============*/
- dfield_t* field, /*!< in: field */
- const void* data, /*!< in: data */
- ulint len); /*!< in: length or UNIV_SQL_NULL */
-/*********************************************************************//**
-Sets a data field to SQL NULL. */
-UNIV_INLINE
-void
-dfield_set_null(
-/*============*/
- dfield_t* field); /*!< in/out: field */
-/**********************************************************************//**
-Writes an SQL null field full of zeros. */
-UNIV_INLINE
-void
-data_write_sql_null(
-/*================*/
- byte* data, /*!< in: pointer to a buffer of size len */
- ulint len); /*!< in: SQL null size in bytes */
-/*********************************************************************//**
-Copies the data and len fields. */
-UNIV_INLINE
-void
-dfield_copy_data(
-/*=============*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2);/*!< in: field to copy from */
-/*********************************************************************//**
-Copies a data field to another. */
-UNIV_INLINE
-void
-dfield_copy(
-/*========*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2);/*!< in: field to copy from */
-/*********************************************************************//**
-Copies the data pointed to by a data field. */
-UNIV_INLINE
-void
-dfield_dup(
-/*=======*/
- dfield_t* field, /*!< in/out: data field */
- mem_heap_t* heap); /*!< in: memory heap where allocated */
-/*********************************************************************//**
-Tests if data length and content is equal for two dfields.
-@return TRUE if equal */
-UNIV_INLINE
-ibool
-dfield_datas_are_binary_equal(
-/*==========================*/
- const dfield_t* field1, /*!< in: field */
- const dfield_t* field2);/*!< in: field */
-/*********************************************************************//**
-Tests if dfield data length and content is equal to the given.
-@return TRUE if equal */
-UNIV_INTERN
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
- const dfield_t* field, /*!< in: field */
- ulint len, /*!< in: data length or UNIV_SQL_NULL */
- const byte* data); /*!< in: data */
-/*********************************************************************//**
-Gets number of fields in a data tuple.
-@return number of fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
- const dtuple_t* tuple); /*!< in: tuple */
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return nth field */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
- const dtuple_t* tuple, /*!< in: tuple */
- ulint n); /*!< in: index of field */
-#else /* UNIV_DEBUG */
-# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Gets info bits in a data tuple.
-@return info bits */
-UNIV_INLINE
-ulint
-dtuple_get_info_bits(
-/*=================*/
- const dtuple_t* tuple); /*!< in: tuple */
-/*********************************************************************//**
-Sets info bits in a data tuple. */
-UNIV_INLINE
-void
-dtuple_set_info_bits(
-/*=================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint info_bits); /*!< in: info bits */
-/*********************************************************************//**
-Gets number of fields used in record comparisons.
-@return number of fields used in comparisons in rem0cmp.* */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields_cmp(
-/*====================*/
- const dtuple_t* tuple); /*!< in: tuple */
-/*********************************************************************//**
-Gets number of fields used in record comparisons. */
-UNIV_INLINE
-void
-dtuple_set_n_fields_cmp(
-/*====================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields_cmp); /*!< in: number of fields used in
- comparisons in rem0cmp.* */
-/**********************************************************//**
-Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return own: created tuple */
-UNIV_INLINE
-dtuple_t*
-dtuple_create(
-/*==========*/
- mem_heap_t* heap, /*!< in: memory heap where the tuple
- is created */
- ulint n_fields); /*!< in: number of fields */
-
-/**********************************************************//**
-Wrap data fields in a tuple. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return data tuple */
-UNIV_INLINE
-const dtuple_t*
-dtuple_from_fields(
-/*===============*/
- dtuple_t* tuple, /*!< in: storage for data tuple */
- const dfield_t* fields, /*!< in: fields */
- ulint n_fields); /*!< in: number of fields */
-
-/*********************************************************************//**
-Sets number of fields used in a tuple. Normally this is set in
-dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
-void
-dtuple_set_n_fields(
-/*================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields); /*!< in: number of fields */
-/*********************************************************************//**
-Copies a data tuple to another. This is a shallow copy; if a deep copy
-is desired, dfield_dup() will have to be invoked on each field.
-@return own: copy of tuple */
-UNIV_INLINE
-dtuple_t*
-dtuple_copy(
-/*========*/
- const dtuple_t* tuple, /*!< in: tuple to copy from */
- mem_heap_t* heap); /*!< in: memory heap
- where the tuple is created */
-/**********************************************************//**
-The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted.
-@return sum of data lens */
-UNIV_INLINE
-ulint
-dtuple_get_data_size(
-/*=================*/
- const dtuple_t* tuple, /*!< in: typed data tuple */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-/*********************************************************************//**
-Computes the number of externally stored fields in a data tuple.
-@return number of fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_ext(
-/*=============*/
- const dtuple_t* tuple); /*!< in: tuple */
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
-int
-dtuple_coll_cmp(
-/*============*/
- const dtuple_t* tuple1, /*!< in: tuple 1 */
- const dtuple_t* tuple2);/*!< in: tuple 2 */
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return the folded value */
-UNIV_INLINE
-ulint
-dtuple_fold(
-/*========*/
- const dtuple_t* tuple, /*!< in: the tuple */
- ulint n_fields,/*!< in: number of complete fields to fold */
- ulint n_bytes,/*!< in: number of bytes to fold in an
- incomplete last field */
- dulint tree_id)/*!< in: index tree id */
- __attribute__((pure));
-/*******************************************************************//**
-Sets types of fields binary in a tuple. */
-UNIV_INLINE
-void
-dtuple_set_types_binary(
-/*====================*/
- dtuple_t* tuple, /*!< in: data tuple */
- ulint n); /*!< in: number of fields to set */
-/**********************************************************************//**
-Checks if a dtuple contains an SQL null value.
-@return TRUE if some field is SQL null */
-UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
- const dtuple_t* tuple); /*!< in: dtuple */
-/**********************************************************//**
-Checks that a data field is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dfield_check_typed(
-/*===============*/
- const dfield_t* field); /*!< in: data field */
-/**********************************************************//**
-Checks that a data tuple is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed(
-/*===============*/
- const dtuple_t* tuple); /*!< in: tuple */
-/**********************************************************//**
-Checks that a data tuple is typed.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
- const dtuple_t* tuple); /*!< in: tuple */
-#ifdef UNIV_DEBUG
-/**********************************************************//**
-Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_validate(
-/*============*/
- const dtuple_t* tuple); /*!< in: tuple */
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
-void
-dfield_print(
-/*=========*/
- const dfield_t* dfield);/*!< in: dfield */
-/*************************************************************//**
-Pretty prints a dfield value according to its data type. Also the hex string
-is printed if a string contains non-printable characters. */
-UNIV_INTERN
-void
-dfield_print_also_hex(
-/*==================*/
- const dfield_t* dfield); /*!< in: dfield */
-/**********************************************************//**
-The following function prints the contents of a tuple. */
-UNIV_INTERN
-void
-dtuple_print(
-/*=========*/
- FILE* f, /*!< in: output stream */
- const dtuple_t* tuple); /*!< in: tuple */
-/**************************************************************//**
-Moves parts of long fields in entry to the big record vector so that
-the size of tuple drops below the maximum record size allowed in the
-database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index.
-@return own: created big record vector, NULL if we are not able to
-shorten the entry enough, i.e., if there are too many fixed-length or
-short fields in entry or the index is clustered */
-UNIV_INTERN
-big_rec_t*
-dtuple_convert_big_rec(
-/*===================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in/out: index entry */
- ulint* n_ext); /*!< in/out: number of
- externally stored columns */
-/**************************************************************//**
-Puts back to entry the data stored in vector. Note that to ensure the
-fields in entry can accommodate the data, vector must have been created
-from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
-void
-dtuple_convert_back_big_rec(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: entry whose data was put to vector */
- big_rec_t* vector);/*!< in, own: big rec vector; it is
- freed in this function */
-/**************************************************************//**
-Frees the memory in a big rec vector. */
-UNIV_INLINE
-void
-dtuple_big_rec_free(
-/*================*/
- big_rec_t* vector); /*!< in, own: big rec vector; it is
- freed in this function */
-
-/*######################################################################*/
-
-/** Structure for an SQL data field */
-struct dfield_struct{
- void* data; /*!< pointer to data */
- unsigned ext:1; /*!< TRUE=externally stored, FALSE=local */
- unsigned len:32; /*!< data length; UNIV_SQL_NULL if SQL null */
- dtype_t type; /*!< type of data */
-};
-
-/** Structure for an SQL data tuple of fields (logical record) */
-struct dtuple_struct {
- ulint info_bits; /*!< info bits of an index record:
- the default is 0; this field is used
- if an index record is built from
- a data tuple */
- ulint n_fields; /*!< number of fields in dtuple */
- ulint n_fields_cmp; /*!< number of fields which should
- be used in comparison services
- of rem0cmp.*; the index search
- is performed by comparing only these
- fields, others are ignored; the
- default value in dtuple creation is
- the same value as n_fields */
- dfield_t* fields; /*!< fields */
- UT_LIST_NODE_T(dtuple_t) tuple_list;
- /*!< data tuples can be linked into a
- list using this field */
-#ifdef UNIV_DEBUG
- ulint magic_n; /*!< magic number, used in
- debug assertions */
-/** Value of dtuple_struct::magic_n */
-# define DATA_TUPLE_MAGIC_N 65478679
-#endif /* UNIV_DEBUG */
-};
-
-/** A slot for a field in a big rec vector */
-typedef struct big_rec_field_struct big_rec_field_t;
-/** A slot for a field in a big rec vector */
-struct big_rec_field_struct {
- ulint field_no; /*!< field number in record */
- ulint len; /*!< stored data length, in bytes */
- const void* data; /*!< stored data */
-};
-
-/** Storage format for overflow data in a big record, that is, a
-clustered index record which needs external storage of data fields */
-struct big_rec_struct {
- mem_heap_t* heap; /*!< memory heap from which
- allocated */
- ulint n_fields; /*!< number of stored fields */
- big_rec_field_t*fields; /*!< stored fields */
-};
-
-#ifndef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/data0data.ic b/storage/innodb_plugin/include/data0data.ic
deleted file mode 100644
index da79aa33702..00000000000
--- a/storage/innodb_plugin/include/data0data.ic
+++ /dev/null
@@ -1,612 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/data0data.ic
-SQL data field and tuple
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0mem.h"
-#include "ut0rnd.h"
-
-#ifdef UNIV_DEBUG
-/** Dummy variable to catch access to uninitialized fields. In the
-debug version, dtuple_create() will make all fields of dtuple_t point
-to data_error. */
-extern byte data_error;
-
-/*********************************************************************//**
-Gets pointer to the type struct of SQL data field.
-@return pointer to the type struct */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
- const dfield_t* field) /*!< in: SQL data field */
-{
- ut_ad(field);
-
- return((dtype_t*) &(field->type));
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Sets the type struct of SQL data field. */
-UNIV_INLINE
-void
-dfield_set_type(
-/*============*/
- dfield_t* field, /*!< in: SQL data field */
- dtype_t* type) /*!< in: pointer to data type struct */
-{
- ut_ad(field && type);
-
- field->type = *type;
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets pointer to the data in a field.
-@return pointer to data */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad(field);
- ut_ad((field->len == UNIV_SQL_NULL)
- || (field->data != &data_error));
-
- return((void*) field->data);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Gets length of field data.
-@return length of data; UNIV_SQL_NULL if SQL null data */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad(field);
- ut_ad((field->len == UNIV_SQL_NULL)
- || (field->data != &data_error));
-
- return(field->len);
-}
-
-/*********************************************************************//**
-Sets length in a field. */
-UNIV_INLINE
-void
-dfield_set_len(
-/*===========*/
- dfield_t* field, /*!< in: field */
- ulint len) /*!< in: length or UNIV_SQL_NULL */
-{
- ut_ad(field);
-#ifdef UNIV_VALGRIND_DEBUG
- if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
-#endif /* UNIV_VALGRIND_DEBUG */
-
- field->ext = 0;
- field->len = len;
-}
-
-/*********************************************************************//**
-Determines if a field is SQL NULL
-@return nonzero if SQL null data */
-UNIV_INLINE
-ulint
-dfield_is_null(
-/*===========*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad(field);
-
- return(field->len == UNIV_SQL_NULL);
-}
-
-/*********************************************************************//**
-Determines if a field is externally stored
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-dfield_is_ext(
-/*==========*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad(field);
-
- return(UNIV_UNLIKELY(field->ext));
-}
-
-/*********************************************************************//**
-Sets the "external storage" flag */
-UNIV_INLINE
-void
-dfield_set_ext(
-/*===========*/
- dfield_t* field) /*!< in/out: field */
-{
- ut_ad(field);
-
- field->ext = 1;
-}
-
-/*********************************************************************//**
-Sets pointer to the data and length in a field. */
-UNIV_INLINE
-void
-dfield_set_data(
-/*============*/
- dfield_t* field, /*!< in: field */
- const void* data, /*!< in: data */
- ulint len) /*!< in: length or UNIV_SQL_NULL */
-{
- ut_ad(field);
-
-#ifdef UNIV_VALGRIND_DEBUG
- if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
-#endif /* UNIV_VALGRIND_DEBUG */
- field->data = (void*) data;
- field->ext = 0;
- field->len = len;
-}
-
-/*********************************************************************//**
-Sets a data field to SQL NULL. */
-UNIV_INLINE
-void
-dfield_set_null(
-/*============*/
- dfield_t* field) /*!< in/out: field */
-{
- dfield_set_data(field, NULL, UNIV_SQL_NULL);
-}
-
-/*********************************************************************//**
-Copies the data and len fields. */
-UNIV_INLINE
-void
-dfield_copy_data(
-/*=============*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2) /*!< in: field to copy from */
-{
- ut_ad(field1 && field2);
-
- field1->data = field2->data;
- field1->len = field2->len;
- field1->ext = field2->ext;
-}
-
-/*********************************************************************//**
-Copies a data field to another. */
-UNIV_INLINE
-void
-dfield_copy(
-/*========*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2) /*!< in: field to copy from */
-{
- *field1 = *field2;
-}
-
-/*********************************************************************//**
-Copies the data pointed to by a data field. */
-UNIV_INLINE
-void
-dfield_dup(
-/*=======*/
- dfield_t* field, /*!< in/out: data field */
- mem_heap_t* heap) /*!< in: memory heap where allocated */
-{
- if (!dfield_is_null(field)) {
- UNIV_MEM_ASSERT_RW(field->data, field->len);
- field->data = mem_heap_dup(heap, field->data, field->len);
- }
-}
-
-/*********************************************************************//**
-Tests if data length and content is equal for two dfields.
-@return TRUE if equal */
-UNIV_INLINE
-ibool
-dfield_datas_are_binary_equal(
-/*==========================*/
- const dfield_t* field1, /*!< in: field */
- const dfield_t* field2) /*!< in: field */
-{
- ulint len;
-
- len = field1->len;
-
- return(len == field2->len
- && (len == UNIV_SQL_NULL
- || !memcmp(field1->data, field2->data, len)));
-}
-
-/*********************************************************************//**
-Gets info bits in a data tuple.
-@return info bits */
-UNIV_INLINE
-ulint
-dtuple_get_info_bits(
-/*=================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->info_bits);
-}
-
-/*********************************************************************//**
-Sets info bits in a data tuple. */
-UNIV_INLINE
-void
-dtuple_set_info_bits(
-/*=================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint info_bits) /*!< in: info bits */
-{
- ut_ad(tuple);
-
- tuple->info_bits = info_bits;
-}
-
-/*********************************************************************//**
-Gets number of fields used in record comparisons.
-@return number of fields used in comparisons in rem0cmp.* */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields_cmp(
-/*====================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->n_fields_cmp);
-}
-
-/*********************************************************************//**
-Sets number of fields used in record comparisons. */
-UNIV_INLINE
-void
-dtuple_set_n_fields_cmp(
-/*====================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields_cmp) /*!< in: number of fields used in
- comparisons in rem0cmp.* */
-{
- ut_ad(tuple);
- ut_ad(n_fields_cmp <= tuple->n_fields);
-
- tuple->n_fields_cmp = n_fields_cmp;
-}
-
-/*********************************************************************//**
-Gets number of fields in a data tuple.
-@return number of fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->n_fields);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return nth field */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
- const dtuple_t* tuple, /*!< in: tuple */
- ulint n) /*!< in: index of field */
-{
- ut_ad(tuple);
- ut_ad(n < tuple->n_fields);
-
- return((dfield_t*) tuple->fields + n);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************//**
-Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return own: created tuple */
-UNIV_INLINE
-dtuple_t*
-dtuple_create(
-/*==========*/
- mem_heap_t* heap, /*!< in: memory heap where the tuple
- is created */
- ulint n_fields) /*!< in: number of fields */
-{
- dtuple_t* tuple;
-
- ut_ad(heap);
-
- tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
- + n_fields * sizeof(dfield_t));
- tuple->info_bits = 0;
- tuple->n_fields = n_fields;
- tuple->n_fields_cmp = n_fields;
- tuple->fields = (dfield_t*) &tuple[1];
-
-#ifdef UNIV_DEBUG
- tuple->magic_n = DATA_TUPLE_MAGIC_N;
-
- { /* In the debug version, initialize fields to an error value */
- ulint i;
-
- for (i = 0; i < n_fields; i++) {
- dfield_t* field;
-
- field = dtuple_get_nth_field(tuple, i);
-
- dfield_set_len(field, UNIV_SQL_NULL);
- field->data = &data_error;
- dfield_get_type(field)->mtype = DATA_ERROR;
- }
- }
-
- UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
-#endif
- return(tuple);
-}
-
-/**********************************************************//**
-Wrap data fields in a tuple. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return data tuple */
-UNIV_INLINE
-const dtuple_t*
-dtuple_from_fields(
-/*===============*/
- dtuple_t* tuple, /*!< in: storage for data tuple */
- const dfield_t* fields, /*!< in: fields */
- ulint n_fields) /*!< in: number of fields */
-{
- tuple->info_bits = 0;
- tuple->n_fields = tuple->n_fields_cmp = n_fields;
- tuple->fields = (dfield_t*) fields;
- ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Copies a data tuple to another. This is a shallow copy; if a deep copy
-is desired, dfield_dup() will have to be invoked on each field.
-@return own: copy of tuple */
-UNIV_INLINE
-dtuple_t*
-dtuple_copy(
-/*========*/
- const dtuple_t* tuple, /*!< in: tuple to copy from */
- mem_heap_t* heap) /*!< in: memory heap
- where the tuple is created */
-{
- ulint n_fields = dtuple_get_n_fields(tuple);
- dtuple_t* new_tuple = dtuple_create(heap, n_fields);
- ulint i;
-
- for (i = 0; i < n_fields; i++) {
- dfield_copy(dtuple_get_nth_field(new_tuple, i),
- dtuple_get_nth_field(tuple, i));
- }
-
- return(new_tuple);
-}
-
-/**********************************************************//**
-The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted. Neither
-is possible space in externally stored parts of the field.
-@return sum of data lengths */
-UNIV_INLINE
-ulint
-dtuple_get_data_size(
-/*=================*/
- const dtuple_t* tuple, /*!< in: typed data tuple */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
- const dfield_t* field;
- ulint n_fields;
- ulint len;
- ulint i;
- ulint sum = 0;
-
- ut_ad(tuple);
- ut_ad(dtuple_check_typed(tuple));
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
- n_fields = tuple->n_fields;
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
- len = dfield_get_len(field);
-
- if (len == UNIV_SQL_NULL) {
- len = dtype_get_sql_null_size(dfield_get_type(field),
- comp);
- }
-
- sum += len;
- }
-
- return(sum);
-}
-
-/*********************************************************************//**
-Computes the number of externally stored fields in a data tuple.
-@return number of externally stored fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_ext(
-/*=============*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ulint n_ext = 0;
- ulint n_fields = tuple->n_fields;
- ulint i;
-
- ut_ad(tuple);
- ut_ad(dtuple_check_typed(tuple));
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
- for (i = 0; i < n_fields; i++) {
- n_ext += dtuple_get_nth_field(tuple, i)->ext;
- }
-
- return(n_ext);
-}
-
-/*******************************************************************//**
-Sets types of fields binary in a tuple. */
-UNIV_INLINE
-void
-dtuple_set_types_binary(
-/*====================*/
- dtuple_t* tuple, /*!< in: data tuple */
- ulint n) /*!< in: number of fields to set */
-{
- dtype_t* dfield_type;
- ulint i;
-
- for (i = 0; i < n; i++) {
- dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
- dtype_set(dfield_type, DATA_BINARY, 0, 0);
- }
-}
-
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return the folded value */
-UNIV_INLINE
-ulint
-dtuple_fold(
-/*========*/
- const dtuple_t* tuple, /*!< in: the tuple */
- ulint n_fields,/*!< in: number of complete fields to fold */
- ulint n_bytes,/*!< in: number of bytes to fold in an
- incomplete last field */
- dulint tree_id)/*!< in: index tree id */
-{
- const dfield_t* field;
- ulint i;
- const byte* data;
- ulint len;
- ulint fold;
-
- ut_ad(tuple);
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(dtuple_check_typed(tuple));
-
- fold = ut_fold_dulint(tree_id);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = (const byte*) dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len != UNIV_SQL_NULL) {
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- if (n_bytes > 0) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = (const byte*) dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len != UNIV_SQL_NULL) {
- if (len > n_bytes) {
- len = n_bytes;
- }
-
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- return(fold);
-}
-
-/**********************************************************************//**
-Writes an SQL null field full of zeros. */
-UNIV_INLINE
-void
-data_write_sql_null(
-/*================*/
- byte* data, /*!< in: pointer to a buffer of size len */
- ulint len) /*!< in: SQL null size in bytes */
-{
- memset(data, 0, len);
-}
-
-/**********************************************************************//**
-Checks if a dtuple contains an SQL null value.
-@return TRUE if some field is SQL null */
-UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
- const dtuple_t* tuple) /*!< in: dtuple */
-{
- ulint n;
- ulint i;
-
- n = dtuple_get_n_fields(tuple);
-
- for (i = 0; i < n; i++) {
- if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**************************************************************//**
-Frees the memory in a big rec vector. */
-UNIV_INLINE
-void
-dtuple_big_rec_free(
-/*================*/
- big_rec_t* vector) /*!< in, own: big rec vector; it is
- freed in this function */
-{
- mem_heap_free(vector->heap);
-}
diff --git a/storage/innodb_plugin/include/data0type.h b/storage/innodb_plugin/include/data0type.h
deleted file mode 100644
index a73bed3a9f5..00000000000
--- a/storage/innodb_plugin/include/data0type.h
+++ /dev/null
@@ -1,486 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/data0type.h
-Data types
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef data0type_h
-#define data0type_h
-
-#include "univ.i"
-
-extern ulint data_mysql_default_charset_coll;
-#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
-#define DATA_MYSQL_BINARY_CHARSET_COLL 63
-
-/* SQL data type struct */
-typedef struct dtype_struct dtype_t;
-
-/*-------------------------------------------*/
-/* The 'MAIN TYPE' of a column */
-#define DATA_VARCHAR 1 /* character varying of the
- latin1_swedish_ci charset-collation; note
- that the MySQL format for this, DATA_BINARY,
- DATA_VARMYSQL, is also affected by whether the
- 'precise type' contains
- DATA_MYSQL_TRUE_VARCHAR */
-#define DATA_CHAR 2 /* fixed length character of the
- latin1_swedish_ci charset-collation */
-#define DATA_FIXBINARY 3 /* binary string of fixed length */
-#define DATA_BINARY 4 /* binary string */
-#define DATA_BLOB 5 /* binary large object, or a TEXT type;
- if prtype & DATA_BINARY_TYPE == 0, then this is
- actually a TEXT column (or a BLOB created
- with < 4.0.14; since column prefix indexes
- came only in 4.0.14, the missing flag in BLOBs
- created before that does not cause any harm) */
-#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
-#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
-#define DATA_SYS 8 /* system column */
-
-/* Data types >= DATA_FLOAT must be compared using the whole field, not as
-binary strings */
-
-#define DATA_FLOAT 9
-#define DATA_DOUBLE 10
-#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
-#define DATA_VARMYSQL 12 /* any charset varying length char */
-#define DATA_MYSQL 13 /* any charset fixed length char */
- /* NOTE that 4.1.1 used DATA_MYSQL and
- DATA_VARMYSQL for all character sets, and the
- charset-collation for tables created with it
- can also be latin1_swedish_ci */
-#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
- requires the values are <= 63 */
-/*-------------------------------------------*/
-/* The 'PRECISE TYPE' of a column */
-/*
-Tables created by a MySQL user have the following convention:
-
-- In the least significant byte in the precise type we store the MySQL type
-code (not applicable for system columns).
-
-- In the second least significant byte we OR flags DATA_NOT_NULL,
-DATA_UNSIGNED, DATA_BINARY_TYPE.
-
-- In the third least significant byte of the precise type of string types we
-store the MySQL charset-collation code. In DATA_BLOB columns created with
-< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
-are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
-problem, though.
-
-Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
-precise type, since the charset was always the default charset of the MySQL
-installation. If the stored charset code is 0 in the system table SYS_COLUMNS
-of InnoDB, that means that the default charset of this MySQL installation
-should be used.
-
-When loading a table definition from the system tables to the InnoDB data
-dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
-if the stored charset-collation is 0, and if that is the case and the type is
-a non-binary string, replace that 0 by the default charset-collation code of
-this MySQL installation. In short, in old tables, the charset-collation code
-in the system tables on disk can be 0, but in in-memory data structures
-(dtype_t), the charset-collation code is always != 0 for non-binary string
-types.
-
-In new tables, in binary string types, the charset-collation code is the
-MySQL code for the 'binary charset', that is, != 0.
-
-For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
-DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
-InnoDB performs all comparisons internally, without resorting to the MySQL
-comparison functions. This is to save CPU time.
-
-InnoDB's own internal system tables have different precise types for their
-columns, and for them the precise type is usually not used at all.
-*/
-
-#define DATA_ENGLISH 4 /* English language character string: this
- is a relic from pre-MySQL time and only used
- for InnoDB's own system tables */
-#define DATA_ERROR 111 /* another relic from pre-MySQL time */
-
-#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
- type from the precise type */
-#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
- format true VARCHAR */
-
-/* Precise data types for system columns and the length of those columns;
-NOTE: the values must run from 0 up in the order given! All codes must
-be less than 256 */
-#define DATA_ROW_ID 0 /* row id: a dulint */
-#define DATA_ROW_ID_LEN 6 /* stored length for row id */
-
-#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
-#define DATA_TRX_ID_LEN 6
-
-#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
-#define DATA_ROLL_PTR_LEN 7
-
-#define DATA_N_SYS_COLS 3 /* number of system columns defined above */
-
-#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
-
-/* Flags ORed to the precise data type */
-#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
- the column is declared as NOT NULL */
-#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
- we have an unsigned integer type */
-#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
- string, this is ORed to the precise type:
- this only holds for tables created with
- >= MySQL-4.0.14 */
-/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1.
- In earlier versions this was set for some
- BLOB columns.
-*/
-#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data
- type when the column is true VARCHAR where
- MySQL uses 2 bytes to store the data len;
- for shorter VARCHARs MySQL uses only 1 byte */
-/*-------------------------------------------*/
-
-/* This many bytes we need to store the type information affecting the
-alphabetical order for a single field and decide the storage size of an
-SQL null*/
-#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
-/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
-store the charset-collation number; one byte is left unused, though */
-#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Gets the MySQL type code from a dtype.
-@return MySQL type code; this is NOT an InnoDB type code! */
-UNIV_INLINE
-ulint
-dtype_get_mysql_type(
-/*=================*/
- const dtype_t* type); /*!< in: type struct */
-/*********************************************************************//**
-Determine how many bytes the first n characters of the given string occupy.
-If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy.
-@return length of the prefix, in bytes */
-UNIV_INTERN
-ulint
-dtype_get_at_most_n_mbchars(
-/*========================*/
- ulint prtype, /*!< in: precise type */
- ulint mbminlen, /*!< in: minimum length of a
- multi-byte character */
- ulint mbmaxlen, /*!< in: maximum length of a
- multi-byte character */
- ulint prefix_len, /*!< in: length of the requested
- prefix, in characters, multiplied by
- dtype_get_mbmaxlen(dtype) */
- ulint data_len, /*!< in: length of str (in bytes) */
- const char* str); /*!< in: the string whose prefix
- length is being determined */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Checks if a data main type is a string type. Also a BLOB is considered a
-string type.
-@return TRUE if string type */
-UNIV_INTERN
-ibool
-dtype_is_string_type(
-/*=================*/
- ulint mtype); /*!< in: InnoDB main data type code: DATA_CHAR, ... */
-/*********************************************************************//**
-Checks if a type is a binary string type. Note that for tables created with
-< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE.
-@return TRUE if binary string type */
-UNIV_INTERN
-ibool
-dtype_is_binary_string_type(
-/*========================*/
- ulint mtype, /*!< in: main data type */
- ulint prtype);/*!< in: precise type */
-/*********************************************************************//**
-Checks if a type is a non-binary string type. That is, dtype_is_string_type is
-TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
-with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE.
-@return TRUE if non-binary string type */
-UNIV_INTERN
-ibool
-dtype_is_non_binary_string_type(
-/*============================*/
- ulint mtype, /*!< in: main data type */
- ulint prtype);/*!< in: precise type */
-/*********************************************************************//**
-Sets a data type structure. */
-UNIV_INLINE
-void
-dtype_set(
-/*======*/
- dtype_t* type, /*!< in: type struct to init */
- ulint mtype, /*!< in: main data type */
- ulint prtype, /*!< in: precise type */
- ulint len); /*!< in: precision of type */
-/*********************************************************************//**
-Copies a data type structure. */
-UNIV_INLINE
-void
-dtype_copy(
-/*=======*/
- dtype_t* type1, /*!< in: type struct to copy to */
- const dtype_t* type2); /*!< in: type struct to copy from */
-/*********************************************************************//**
-Gets the SQL main data type.
-@return SQL main data type */
-UNIV_INLINE
-ulint
-dtype_get_mtype(
-/*============*/
- const dtype_t* type); /*!< in: data type */
-/*********************************************************************//**
-Gets the precise data type.
-@return precise data type */
-UNIV_INLINE
-ulint
-dtype_get_prtype(
-/*=============*/
- const dtype_t* type); /*!< in: data type */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_get_mblen(
-/*============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type (and collation) */
- ulint* mbminlen, /*!< out: minimum length of a
- multi-byte character */
- ulint* mbmaxlen); /*!< out: maximum length of a
- multi-byte character */
-/*********************************************************************//**
-Gets the MySQL charset-collation code for MySQL string types.
-@return MySQL charset-collation code */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
- ulint prtype);/*!< in: precise data type */
-/*********************************************************************//**
-Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code.
-@return precise type, including the charset-collation code */
-UNIV_INTERN
-ulint
-dtype_form_prtype(
-/*==============*/
- ulint old_prtype, /*!< in: the MySQL type code and the flags
- DATA_BINARY_TYPE etc. */
- ulint charset_coll); /*!< in: MySQL charset-collation code */
-/*********************************************************************//**
-Determines if a MySQL string type is a subset of UTF-8. This function
-may return false negatives, in case further character-set collation
-codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
-UNIV_INLINE
-ibool
-dtype_is_utf8(
-/*==========*/
- ulint prtype);/*!< in: precise data type */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Gets the type length.
-@return fixed length of the type, in bytes, or 0 if variable-length */
-UNIV_INLINE
-ulint
-dtype_get_len(
-/*==========*/
- const dtype_t* type); /*!< in: data type */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Gets the minimum length of a character, in bytes.
-@return minimum length of a char, in bytes, or 0 if this is not a
-character type */
-UNIV_INLINE
-ulint
-dtype_get_mbminlen(
-/*===============*/
- const dtype_t* type); /*!< in: type */
-/*********************************************************************//**
-Gets the maximum length of a character, in bytes.
-@return maximum length of a char, in bytes, or 0 if this is not a
-character type */
-UNIV_INLINE
-ulint
-dtype_get_mbmaxlen(
-/*===============*/
- const dtype_t* type); /*!< in: type */
-/*********************************************************************//**
-Gets the padding character code for the type.
-@return padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype); /*!< in: precise type */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return fixed size, or 0 */
-UNIV_INLINE
-ulint
-dtype_get_fixed_size_low(
-/*=====================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint len, /*!< in: length */
- ulint mbminlen, /*!< in: minimum length of a multibyte char */
- ulint mbmaxlen, /*!< in: maximum length of a multibyte char */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Returns the minimum size of a data type.
-@return minimum size */
-UNIV_INLINE
-ulint
-dtype_get_min_size_low(
-/*===================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint len, /*!< in: length */
- ulint mbminlen, /*!< in: minimum length of a multibyte char */
- ulint mbmaxlen); /*!< in: maximum length of a multibyte char */
-/***********************************************************************//**
-Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information.
-@return maximum size */
-UNIV_INLINE
-ulint
-dtype_get_max_size_low(
-/*===================*/
- ulint mtype, /*!< in: main type */
- ulint len); /*!< in: length */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
-UNIV_INLINE
-ulint
-dtype_get_sql_null_size(
-/*====================*/
- const dtype_t* type, /*!< in: type */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. */
-UNIV_INLINE
-void
-dtype_read_for_order_and_null_size(
-/*===============================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf); /*!< in: buffer for the stored order info */
-/**********************************************************************//**
-Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. This is the >= 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_store_for_order_and_null_size(
-/*====================================*/
- byte* buf, /*!< in: buffer for
- DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- bytes where we store the info */
- const dtype_t* type, /*!< in: type struct */
- ulint prefix_len);/*!< in: prefix length to
- replace type->len, or 0 */
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_read_for_order_and_null_size(
-/*===================================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf); /*!< in: buffer for stored type order info */
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Validates a data type structure.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtype_validate(
-/*===========*/
- const dtype_t* type); /*!< in: type struct to validate */
-/*********************************************************************//**
-Prints a data type structure. */
-UNIV_INTERN
-void
-dtype_print(
-/*========*/
- const dtype_t* type); /*!< in: type */
-
-/* Structure for an SQL data type.
-If you add fields to this structure, be sure to initialize them everywhere.
-This structure is initialized in the following functions:
-dtype_set()
-dtype_read_for_order_and_null_size()
-dtype_new_read_for_order_and_null_size()
-sym_tab_add_null_lit() */
-
-struct dtype_struct{
- unsigned mtype:8; /*!< main data type */
- unsigned prtype:24; /*!< precise type; MySQL data
- type, charset code, flags to
- indicate nullability,
- signedness, whether this is a
- binary string, whether this is
- a true VARCHAR where MySQL
- uses 2 bytes to store the length */
-
- /* the remaining fields do not affect alphabetical ordering: */
-
- unsigned len:16; /*!< length; for MySQL data this
- is field->pack_length(),
- except that for a >= 5.0.3
- type true VARCHAR this is the
- maximum byte length of the
- string data (in addition to
- the string, MySQL uses 1 or 2
- bytes to store the string length) */
-#ifndef UNIV_HOTBACKUP
- unsigned mbminlen:2; /*!< minimum length of a
- character, in bytes */
- unsigned mbmaxlen:3; /*!< maximum length of a
- character, in bytes */
-#endif /* !UNIV_HOTBACKUP */
-};
-
-#ifndef UNIV_NONINL
-#include "data0type.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/data0type.ic b/storage/innodb_plugin/include/data0type.ic
deleted file mode 100644
index 240b4288f39..00000000000
--- a/storage/innodb_plugin/include/data0type.ic
+++ /dev/null
@@ -1,599 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/data0type.ic
-Data types
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-
-/*********************************************************************//**
-Gets the MySQL charset-collation code for MySQL string types.
-@return MySQL charset-collation code */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
- ulint prtype) /*!< in: precise data type */
-{
- return((prtype >> 16) & 0xFFUL);
-}
-
-/*********************************************************************//**
-Determines if a MySQL string type is a subset of UTF-8. This function
-may return false negatives, in case further character-set collation
-codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
-UNIV_INLINE
-ibool
-dtype_is_utf8(
-/*==========*/
- ulint prtype) /*!< in: precise data type */
-{
- /* These codes have been copied from strings/ctype-extra.c
- and strings/ctype-utf8.c. */
- switch (dtype_get_charset_coll(prtype)) {
- case 11: /* ascii_general_ci */
- case 65: /* ascii_bin */
- case 33: /* utf8_general_ci */
- case 83: /* utf8_bin */
- case 254: /* utf8_general_cs */
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Gets the MySQL type code from a dtype.
-@return MySQL type code; this is NOT an InnoDB type code! */
-UNIV_INLINE
-ulint
-dtype_get_mysql_type(
-/*=================*/
- const dtype_t* type) /*!< in: type struct */
-{
- return(type->prtype & 0xFFUL);
-}
-
-/*********************************************************************//**
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_get_mblen(
-/*============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type (and collation) */
- ulint* mbminlen, /*!< out: minimum length of a
- multi-byte character */
- ulint* mbmaxlen) /*!< out: maximum length of a
- multi-byte character */
-{
- if (dtype_is_string_type(mtype)) {
- innobase_get_cset_width(dtype_get_charset_coll(prtype),
- mbminlen, mbmaxlen);
- ut_ad(*mbminlen <= *mbmaxlen);
- ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
- ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
- } else {
- *mbminlen = *mbmaxlen = 0;
- }
-}
-
-/*********************************************************************//**
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_set_mblen(
-/*============*/
- dtype_t* type) /*!< in/out: type */
-{
- ulint mbminlen;
- ulint mbmaxlen;
-
- dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
- type->mbminlen = mbminlen;
- type->mbmaxlen = mbmaxlen;
-
- ut_ad(dtype_validate(type));
-}
-#else /* !UNIV_HOTBACKUP */
-# define dtype_set_mblen(type) (void) 0
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Sets a data type structure. */
-UNIV_INLINE
-void
-dtype_set(
-/*======*/
- dtype_t* type, /*!< in: type struct to init */
- ulint mtype, /*!< in: main data type */
- ulint prtype, /*!< in: precise type */
- ulint len) /*!< in: precision of type */
-{
- ut_ad(type);
- ut_ad(mtype <= DATA_MTYPE_MAX);
-
- type->mtype = mtype;
- type->prtype = prtype;
- type->len = len;
-
- dtype_set_mblen(type);
-}
-
-/*********************************************************************//**
-Copies a data type structure. */
-UNIV_INLINE
-void
-dtype_copy(
-/*=======*/
- dtype_t* type1, /*!< in: type struct to copy to */
- const dtype_t* type2) /*!< in: type struct to copy from */
-{
- *type1 = *type2;
-
- ut_ad(dtype_validate(type1));
-}
-
-/*********************************************************************//**
-Gets the SQL main data type.
-@return SQL main data type */
-UNIV_INLINE
-ulint
-dtype_get_mtype(
-/*============*/
- const dtype_t* type) /*!< in: data type */
-{
- ut_ad(type);
-
- return(type->mtype);
-}
-
-/*********************************************************************//**
-Gets the precise data type.
-@return precise data type */
-UNIV_INLINE
-ulint
-dtype_get_prtype(
-/*=============*/
- const dtype_t* type) /*!< in: data type */
-{
- ut_ad(type);
-
- return(type->prtype);
-}
-
-/*********************************************************************//**
-Gets the type length.
-@return fixed length of the type, in bytes, or 0 if variable-length */
-UNIV_INLINE
-ulint
-dtype_get_len(
-/*==========*/
- const dtype_t* type) /*!< in: data type */
-{
- ut_ad(type);
-
- return(type->len);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Gets the minimum length of a character, in bytes.
-@return minimum length of a char, in bytes, or 0 if this is not a
-character type */
-UNIV_INLINE
-ulint
-dtype_get_mbminlen(
-/*===============*/
- const dtype_t* type) /*!< in: type */
-{
- ut_ad(type);
- return(type->mbminlen);
-}
-/*********************************************************************//**
-Gets the maximum length of a character, in bytes.
-@return maximum length of a char, in bytes, or 0 if this is not a
-character type */
-UNIV_INLINE
-ulint
-dtype_get_mbmaxlen(
-/*===============*/
- const dtype_t* type) /*!< in: type */
-{
- ut_ad(type);
- return(type->mbmaxlen);
-}
-
-/*********************************************************************//**
-Gets the padding character code for a type.
-@return padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype) /*!< in: precise type */
-{
- switch (mtype) {
- case DATA_FIXBINARY:
- case DATA_BINARY:
- if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype)
- == DATA_MYSQL_BINARY_CHARSET_COLL)) {
- /* Starting from 5.0.18, do not pad
- VARBINARY or BINARY columns. */
- return(ULINT_UNDEFINED);
- }
- /* Fall through */
- case DATA_CHAR:
- case DATA_VARCHAR:
- case DATA_MYSQL:
- case DATA_VARMYSQL:
- /* Space is the padding character for all char and binary
- strings, and starting from 5.0.3, also for TEXT strings. */
-
- return(0x20);
- case DATA_BLOB:
- if (!(prtype & DATA_BINARY_TYPE)) {
- return(0x20);
- }
- /* Fall through */
- default:
- /* No padding specified */
- return(ULINT_UNDEFINED);
- }
-}
-
-/**********************************************************************//**
-Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. This is the >= 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_store_for_order_and_null_size(
-/*====================================*/
- byte* buf, /*!< in: buffer for
- DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- bytes where we store the info */
- const dtype_t* type, /*!< in: type struct */
- ulint prefix_len)/*!< in: prefix length to
- replace type->len, or 0 */
-{
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
- ulint len;
-
- buf[0] = (byte)(type->mtype & 0xFFUL);
-
- if (type->prtype & DATA_BINARY_TYPE) {
- buf[0] = buf[0] | 128;
- }
-
- /* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
- buf[0] = buf[0] | 64;
- }
- */
-
- buf[1] = (byte)(type->prtype & 0xFFUL);
-
- len = prefix_len ? prefix_len : type->len;
-
- mach_write_to_2(buf + 2, len & 0xFFFFUL);
-
- ut_ad(dtype_get_charset_coll(type->prtype) < 256);
- mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
-
- if (type->prtype & DATA_NOT_NULL) {
- buf[4] |= 128;
- }
-}
-
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the < 4.1.x
-storage format. */
-UNIV_INLINE
-void
-dtype_read_for_order_and_null_size(
-/*===============================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf) /*!< in: buffer for stored type order info */
-{
-#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
-# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
- type->mtype = buf[0] & 63;
- type->prtype = buf[1];
-
- if (buf[0] & 128) {
- type->prtype = type->prtype | DATA_BINARY_TYPE;
- }
-
- type->len = mach_read_from_2(buf + 2);
-
- type->prtype = dtype_form_prtype(type->prtype,
- data_mysql_default_charset_coll);
- dtype_set_mblen(type);
-}
-
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
-storage format. */
-UNIV_INLINE
-void
-dtype_new_read_for_order_and_null_size(
-/*===================================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf) /*!< in: buffer for stored type order info */
-{
- ulint charset_coll;
-
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
- type->mtype = buf[0] & 63;
- type->prtype = buf[1];
-
- if (buf[0] & 128) {
- type->prtype |= DATA_BINARY_TYPE;
- }
-
- if (buf[4] & 128) {
- type->prtype |= DATA_NOT_NULL;
- }
-
- type->len = mach_read_from_2(buf + 2);
-
- charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
-
- if (dtype_is_string_type(type->mtype)) {
- ut_a(charset_coll < 256);
-
- if (charset_coll == 0) {
- /* This insert buffer record was inserted with MySQL
- version < 4.1.2, and the charset-collation code was not
- explicitly stored to dtype->prtype at that time. It
- must be the default charset-collation of this MySQL
- installation. */
-
- charset_coll = data_mysql_default_charset_coll;
- }
-
- type->prtype = dtype_form_prtype(type->prtype, charset_coll);
- }
- dtype_set_mblen(type);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return fixed size, or 0 */
-UNIV_INLINE
-ulint
-dtype_get_fixed_size_low(
-/*=====================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint len, /*!< in: length */
- ulint mbminlen, /*!< in: minimum length of a multibyte char */
- ulint mbmaxlen, /*!< in: maximum length of a multibyte char */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
- switch (mtype) {
- case DATA_SYS:
-#ifdef UNIV_DEBUG
- switch (prtype & DATA_MYSQL_TYPE_MASK) {
- case DATA_ROW_ID:
- ut_ad(len == DATA_ROW_ID_LEN);
- break;
- case DATA_TRX_ID:
- ut_ad(len == DATA_TRX_ID_LEN);
- break;
- case DATA_ROLL_PTR:
- ut_ad(len == DATA_ROLL_PTR_LEN);
- break;
- default:
- ut_ad(0);
- return(0);
- }
-#endif /* UNIV_DEBUG */
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- return(len);
- case DATA_MYSQL:
-#ifndef UNIV_HOTBACKUP
- if (prtype & DATA_BINARY_TYPE) {
- return(len);
- } else if (!comp) {
- return(len);
- } else {
- /* We play it safe here and ask MySQL for
- mbminlen and mbmaxlen. Although
- mbminlen and mbmaxlen are
- initialized if and only if prtype
- is (in one of the 3 functions in this file),
- it could be that none of these functions
- has been called. */
-
- ulint i_mbminlen, i_mbmaxlen;
-
- innobase_get_cset_width(
- dtype_get_charset_coll(prtype),
- &i_mbminlen, &i_mbmaxlen);
-
- if (UNIV_UNLIKELY(mbminlen != i_mbminlen)
- || UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: "
- "mbminlen=%lu, "
- "mbmaxlen=%lu, "
- "type->mbminlen=%lu, "
- "type->mbmaxlen=%lu\n",
- (ulong) i_mbminlen,
- (ulong) i_mbmaxlen,
- (ulong) mbminlen,
- (ulong) mbmaxlen);
- }
- if (mbminlen == mbmaxlen) {
- return(len);
- }
- }
-#else /* !UNIV_HOTBACKUP */
- return(len);
-#endif /* !UNIV_HOTBACKUP */
- /* fall through for variable-length charsets */
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- case DATA_BLOB:
- return(0);
- default:
- ut_error;
- }
-
- return(0);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Returns the minimum size of a data type.
-@return minimum size */
-UNIV_INLINE
-ulint
-dtype_get_min_size_low(
-/*===================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint len, /*!< in: length */
- ulint mbminlen, /*!< in: minimum length of a multibyte char */
- ulint mbmaxlen) /*!< in: maximum length of a multibyte char */
-{
- switch (mtype) {
- case DATA_SYS:
-#ifdef UNIV_DEBUG
- switch (prtype & DATA_MYSQL_TYPE_MASK) {
- case DATA_ROW_ID:
- ut_ad(len == DATA_ROW_ID_LEN);
- break;
- case DATA_TRX_ID:
- ut_ad(len == DATA_TRX_ID_LEN);
- break;
- case DATA_ROLL_PTR:
- ut_ad(len == DATA_ROLL_PTR_LEN);
- break;
- default:
- ut_ad(0);
- return(0);
- }
-#endif /* UNIV_DEBUG */
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- return(len);
- case DATA_MYSQL:
- if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) {
- return(len);
- }
- /* this is a variable-length character set */
- ut_a(mbminlen > 0);
- ut_a(mbmaxlen > mbminlen);
- ut_a(len % mbmaxlen == 0);
- return(len * mbminlen / mbmaxlen);
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- case DATA_BLOB:
- return(0);
- default:
- ut_error;
- }
-
- return(0);
-}
-
-/***********************************************************************//**
-Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information.
-@return maximum size */
-UNIV_INLINE
-ulint
-dtype_get_max_size_low(
-/*===================*/
- ulint mtype, /*!< in: main type */
- ulint len) /*!< in: length */
-{
- switch (mtype) {
- case DATA_SYS:
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- case DATA_MYSQL:
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- return(len);
- case DATA_BLOB:
- break;
- default:
- ut_error;
- }
-
- return(ULINT_MAX);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
-UNIV_INLINE
-ulint
-dtype_get_sql_null_size(
-/*====================*/
- const dtype_t* type, /*!< in: type */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
-#ifndef UNIV_HOTBACKUP
- return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- type->mbminlen, type->mbmaxlen, comp));
-#else /* !UNIV_HOTBACKUP */
- return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- 0, 0, 0));
-#endif /* !UNIV_HOTBACKUP */
-}
diff --git a/storage/innodb_plugin/include/data0types.h b/storage/innodb_plugin/include/data0types.h
deleted file mode 100644
index 04e835bc401..00000000000
--- a/storage/innodb_plugin/include/data0types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/data0types.h
-Some type definitions
-
-Created 9/21/2000 Heikki Tuuri
-*************************************************************************/
-
-#ifndef data0types_h
-#define data0types_h
-
-/* SQL data field struct */
-typedef struct dfield_struct dfield_t;
-
-/* SQL data tuple struct */
-typedef struct dtuple_struct dtuple_t;
-
-#endif
-
diff --git a/storage/innodb_plugin/include/db0err.h b/storage/innodb_plugin/include/db0err.h
deleted file mode 100644
index 747e9b5364e..00000000000
--- a/storage/innodb_plugin/include/db0err.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/db0err.h
-Global error codes for the database
-
-Created 5/24/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef db0err_h
-#define db0err_h
-
-
-enum db_err {
- DB_SUCCESS = 10,
-
- /* The following are error codes */
- DB_ERROR,
- DB_INTERRUPTED,
- DB_OUT_OF_MEMORY,
- DB_OUT_OF_FILE_SPACE,
- DB_LOCK_WAIT,
- DB_DEADLOCK,
- DB_ROLLBACK,
- DB_DUPLICATE_KEY,
- DB_QUE_THR_SUSPENDED,
- DB_MISSING_HISTORY, /* required history data has been
- deleted due to lack of space in
- rollback segment */
- DB_CLUSTER_NOT_FOUND = 30,
- DB_TABLE_NOT_FOUND,
- DB_MUST_GET_MORE_FILE_SPACE, /* the database has to be stopped
- and restarted with more file space */
- DB_TABLE_IS_BEING_USED,
- DB_TOO_BIG_RECORD, /* a record in an index would not fit
- on a compressed page, or it would
- become bigger than 1/2 free space in
- an uncompressed page frame */
- DB_LOCK_WAIT_TIMEOUT, /* lock wait lasted too long */
- DB_NO_REFERENCED_ROW, /* referenced key value not found
- for a foreign key in an insert or
- update of a row */
- DB_ROW_IS_REFERENCED, /* cannot delete or update a row
- because it contains a key value
- which is referenced */
- DB_CANNOT_ADD_CONSTRAINT, /* adding a foreign key constraint
- to a table failed */
- DB_CORRUPTION, /* data structure corruption noticed */
- DB_COL_APPEARS_TWICE_IN_INDEX, /* InnoDB cannot handle an index
- where same column appears twice */
- DB_CANNOT_DROP_CONSTRAINT, /* dropping a foreign key constraint
- from a table failed */
- DB_NO_SAVEPOINT, /* no savepoint exists with the given
- name */
- DB_TABLESPACE_ALREADY_EXISTS, /* we cannot create a new single-table
- tablespace because a file of the same
- name already exists */
- DB_TABLESPACE_DELETED, /* tablespace does not exist or is
- being dropped right now */
- DB_LOCK_TABLE_FULL, /* lock structs have exhausted the
- buffer pool (for big transactions,
- InnoDB stores the lock structs in the
- buffer pool) */
- DB_FOREIGN_DUPLICATE_KEY, /* foreign key constraints
- activated by the operation would
- lead to a duplicate key in some
- table */
- DB_TOO_MANY_CONCURRENT_TRXS, /* when InnoDB runs out of the
- preconfigured undo slots, this can
- only happen when there are too many
- concurrent transactions */
- DB_UNSUPPORTED, /* when InnoDB sees any artefact or
- a feature that it can't recoginize or
- work with e.g., FT indexes created by
- a later version of the engine. */
-
- DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY
- was found to be NULL */
-
- /* The following are partial failure codes */
- DB_FAIL = 1000,
- DB_OVERFLOW,
- DB_UNDERFLOW,
- DB_STRONG_FAIL,
- DB_ZIP_OVERFLOW,
- DB_RECORD_NOT_FOUND = 1500,
- DB_END_OF_INDEX
-};
-
-#endif
diff --git a/storage/innodb_plugin/include/dict0boot.h b/storage/innodb_plugin/include/dict0boot.h
deleted file mode 100644
index 51d37ee98d1..00000000000
--- a/storage/innodb_plugin/include/dict0boot.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0boot.h
-Data dictionary creation and booting
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0boot_h
-#define dict0boot_h
-
-#include "univ.i"
-
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "ut0byte.h"
-#include "buf0buf.h"
-#include "fsp0fsp.h"
-#include "dict0dict.h"
-
-typedef byte dict_hdr_t;
-
-/**********************************************************************//**
-Gets a pointer to the dictionary header and x-latches its page.
-@return pointer to the dictionary header, page x-latched */
-UNIV_INTERN
-dict_hdr_t*
-dict_hdr_get(
-/*=========*/
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Returns a new row, table, index, or tree id.
-@return the new id */
-UNIV_INTERN
-dulint
-dict_hdr_get_new_id(
-/*================*/
- ulint type); /*!< in: DICT_HDR_ROW_ID, ... */
-/**********************************************************************//**
-Returns a new row id.
-@return the new id */
-UNIV_INLINE
-dulint
-dict_sys_get_new_row_id(void);
-/*=========================*/
-/**********************************************************************//**
-Reads a row id from a record or other 6-byte stored form.
-@return row id */
-UNIV_INLINE
-dulint
-dict_sys_read_row_id(
-/*=================*/
- byte* field); /*!< in: record field */
-/**********************************************************************//**
-Writes a row id to a record or other 6-byte stored form. */
-UNIV_INLINE
-void
-dict_sys_write_row_id(
-/*==================*/
- byte* field, /*!< in: record field */
- dulint row_id);/*!< in: row id */
-/*****************************************************************//**
-Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
-UNIV_INTERN
-void
-dict_boot(void);
-/*===========*/
-/*****************************************************************//**
-Creates and initializes the data dictionary at the database creation. */
-UNIV_INTERN
-void
-dict_create(void);
-/*=============*/
-
-
-/* Space id and page no where the dictionary header resides */
-#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
-#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
-
-/* The ids for the basic system tables and their indexes */
-#define DICT_TABLES_ID ut_dulint_create(0, 1)
-#define DICT_COLUMNS_ID ut_dulint_create(0, 2)
-#define DICT_INDEXES_ID ut_dulint_create(0, 3)
-#define DICT_FIELDS_ID ut_dulint_create(0, 4)
-/* The following is a secondary index on SYS_TABLES */
-#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5)
-
-#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start
- from this number, except for basic
- system tables and their above defined
- indexes; ibuf tables and indexes are
- assigned as the id the number
- DICT_IBUF_ID_MIN plus the space id */
-#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFFUL, 0)
-
-/* The offset of the dictionary header on the page */
-#define DICT_HDR FSEG_PAGE_DATA
-
-/*-------------------------------------------------------------*/
-/* Dictionary header offsets */
-#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
-#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
-#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
-#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */
-#define DICT_HDR_TABLES 32 /* Root of the table index tree */
-#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */
-#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */
-#define DICT_HDR_INDEXES 44 /* Root of the index index tree */
-#define DICT_HDR_FIELDS 48 /* Root of the index field
- index tree */
-
-#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
- segment into which the dictionary
- header is created */
-/*-------------------------------------------------------------*/
-
-/* The field number of the page number field in the sys_indexes table
-clustered index */
-#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
-#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
-#define DICT_SYS_INDEXES_TYPE_FIELD 6
-
-/* When a row id which is zero modulo this number (which must be a power of
-two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
-updated */
-#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
-
-#ifndef UNIV_NONINL
-#include "dict0boot.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/dict0boot.ic b/storage/innodb_plugin/include/dict0boot.ic
deleted file mode 100644
index d5f372e38c4..00000000000
--- a/storage/innodb_plugin/include/dict0boot.ic
+++ /dev/null
@@ -1,93 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0boot.ic
-Data dictionary creation and booting
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-/**********************************************************************//**
-Writes the current value of the row id counter to the dictionary header file
-page. */
-UNIV_INTERN
-void
-dict_hdr_flush_row_id(void);
-/*=======================*/
-
-
-/**********************************************************************//**
-Returns a new row id.
-@return the new id */
-UNIV_INLINE
-dulint
-dict_sys_get_new_row_id(void)
-/*=========================*/
-{
- dulint id;
-
- mutex_enter(&(dict_sys->mutex));
-
- id = dict_sys->row_id;
-
- if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
-
- dict_hdr_flush_row_id();
- }
-
- UT_DULINT_INC(dict_sys->row_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return(id);
-}
-
-/**********************************************************************//**
-Reads a row id from a record or other 6-byte stored form.
-@return row id */
-UNIV_INLINE
-dulint
-dict_sys_read_row_id(
-/*=================*/
- byte* field) /*!< in: record field */
-{
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
- return(mach_read_from_6(field));
-}
-
-/**********************************************************************//**
-Writes a row id to a record or other 6-byte stored form. */
-UNIV_INLINE
-void
-dict_sys_write_row_id(
-/*==================*/
- byte* field, /*!< in: record field */
- dulint row_id) /*!< in: row id */
-{
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
- mach_write_to_6(field, row_id);
-}
-
-
diff --git a/storage/innodb_plugin/include/dict0crea.h b/storage/innodb_plugin/include/dict0crea.h
deleted file mode 100644
index cce1246b789..00000000000
--- a/storage/innodb_plugin/include/dict0crea.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0crea.h
-Database object creation
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0crea_h
-#define dict0crea_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "dict0dict.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/*********************************************************************//**
-Creates a table create graph.
-@return own: table create node */
-UNIV_INTERN
-tab_node_t*
-tab_create_graph_create(
-/*====================*/
- dict_table_t* table, /*!< in: table to create, built as a memory data
- structure */
- mem_heap_t* heap); /*!< in: heap where created */
-/*********************************************************************//**
-Creates an index create graph.
-@return own: index create node */
-UNIV_INTERN
-ind_node_t*
-ind_create_graph_create(
-/*====================*/
- dict_index_t* index, /*!< in: index to create, built as a memory data
- structure */
- mem_heap_t* heap); /*!< in: heap where created */
-/***********************************************************//**
-Creates a table. This is a high-level function used in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-dict_create_table_step(
-/*===================*/
- que_thr_t* thr); /*!< in: query thread */
-/***********************************************************//**
-Creates an index. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-dict_create_index_step(
-/*===================*/
- que_thr_t* thr); /*!< in: query thread */
-/*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
-@return new root page number, or FIL_NULL on failure */
-UNIV_INTERN
-ulint
-dict_truncate_index_tree(
-/*=====================*/
- dict_table_t* table, /*!< in: the table the index belongs to */
- ulint space, /*!< in: 0=truncate,
- nonzero=create the index tree in the
- given tablespace */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
- record in the clustered index of
- SYS_INDEXES table. The cursor may be
- repositioned in this call. */
- mtr_t* mtr); /*!< in: mtr having the latch
- on the record page. The mtr may be
- committed and restarted in this call. */
-/*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
-void
-dict_drop_index_tree(
-/*=================*/
- rec_t* rec, /*!< in/out: record in the clustered index
- of SYS_INDEXES table */
- mtr_t* mtr); /*!< in: mtr having the latch on the record page */
-/****************************************************************//**
-Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
-not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-dict_create_or_check_foreign_constraint_tables(void);
-/*================================================*/
-/********************************************************************//**
-Adds foreign key definitions to data dictionary tables in the database. We
-look at table->foreign_list, and also generate names to constraints that were
-not named by the user. A generated constraint has a name of the format
-databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and are
-given locally for this table, that is, the number is not global, as in the
-old format constraints < 4.0.18 it used to be.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-dict_create_add_foreigns_to_dictionary(
-/*===================================*/
- ulint start_id,/*!< in: if we are actually doing ALTER TABLE
- ADD CONSTRAINT, we want to generate constraint
- numbers which are bigger than in the table so
- far; we number the constraints from
- start_id + 1 up; start_id should be set to 0 if
- we are creating a new table, or if the table
- so far has no constraints for which the name
- was generated here */
- dict_table_t* table, /*!< in: table */
- trx_t* trx); /*!< in: transaction */
-
-/* Table create node structure */
-
-struct tab_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */
- dict_table_t* table; /*!< table to create, built as a memory data
- structure with dict_mem_... functions */
- ins_node_t* tab_def; /* child node which does the insert of
- the table definition; the row to be inserted
- is built by the parent node */
- ins_node_t* col_def; /* child node which does the inserts of
- the column definitions; the row to be inserted
- is built by the parent node */
- commit_node_t* commit_node;
- /* child node which performs a commit after
- a successful table creation */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /*!< node execution state */
- ulint col_no; /*!< next column definition to insert */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
-};
-
-/* Table create node states */
-#define TABLE_BUILD_TABLE_DEF 1
-#define TABLE_BUILD_COL_DEF 2
-#define TABLE_COMMIT_WORK 3
-#define TABLE_ADD_TO_CACHE 4
-#define TABLE_COMPLETED 5
-
-/* Index create node struct */
-
-struct ind_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */
- dict_index_t* index; /*!< index to create, built as a memory data
- structure with dict_mem_... functions */
- ins_node_t* ind_def; /* child node which does the insert of
- the index definition; the row to be inserted
- is built by the parent node */
- ins_node_t* field_def; /* child node which does the inserts of
- the field definitions; the row to be inserted
- is built by the parent node */
- commit_node_t* commit_node;
- /* child node which performs a commit after
- a successful index creation */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /*!< node execution state */
- ulint page_no;/* root page number of the index */
- dict_table_t* table; /*!< table which owns the index */
- dtuple_t* ind_row;/* index definition row built */
- ulint field_no;/* next field definition to insert */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
-};
-
-/* Index create node states */
-#define INDEX_BUILD_INDEX_DEF 1
-#define INDEX_BUILD_FIELD_DEF 2
-#define INDEX_CREATE_INDEX_TREE 3
-#define INDEX_COMMIT_WORK 4
-#define INDEX_ADD_TO_CACHE 5
-
-#ifndef UNIV_NONINL
-#include "dict0crea.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/dict0crea.ic b/storage/innodb_plugin/include/dict0crea.ic
deleted file mode 100644
index c5365ce7489..00000000000
--- a/storage/innodb_plugin/include/dict0crea.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0crea.ic
-Database object creation
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innodb_plugin/include/dict0dict.h b/storage/innodb_plugin/include/dict0dict.h
deleted file mode 100644
index 12396556c2d..00000000000
--- a/storage/innodb_plugin/include/dict0dict.h
+++ /dev/null
@@ -1,1165 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0dict.h
-Data dictionary system
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0dict_h
-#define dict0dict_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "dict0mem.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "hash0hash.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "trx0types.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
- char* a); /*!< in/out: string to put in lower case */
-/********************************************************************//**
-Get the database name length in a table name.
-@return database name length */
-UNIV_INTERN
-ulint
-dict_get_db_name_len(
-/*=================*/
- const char* name); /*!< in: table name in the form
- dbname '/' tablename */
-/********************************************************************//**
-Return the end of table name where we have removed dbname and '/'.
-@return table name */
-
-const char*
-dict_remove_db_name(
-/*================*/
- const char* name); /*!< in: table name in the form
- dbname '/' tablename */
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_get_on_id(
-/*=================*/
- dulint table_id, /*!< in: table id */
- trx_t* trx); /*!< in: transaction handle */
-/********************************************************************//**
-Decrements the count of open MySQL handles to a table. */
-UNIV_INTERN
-void
-dict_table_decrement_handle_count(
-/*==============================*/
- dict_table_t* table, /*!< in/out: table */
- ibool dict_locked); /*!< in: TRUE=data dictionary locked */
-/**********************************************************************//**
-Inits the data dictionary module. */
-UNIV_INTERN
-void
-dict_init(void);
-/*===========*/
-/********************************************************************//**
-Gets the space id of every table of the data dictionary and makes a linear
-list and a hash table of them to the data dictionary cache. This function
-can be called at database startup if we did not need to do a crash recovery.
-In crash recovery we must scan the space id's from the .ibd files in MySQL
-database directories. */
-UNIV_INTERN
-void
-dict_load_space_id_list(void);
-/*=========================*/
-/*********************************************************************//**
-Gets the column data type. */
-UNIV_INLINE
-void
-dict_col_copy_type(
-/*===============*/
- const dict_col_t* col, /*!< in: column */
- dtype_t* type); /*!< out: data type */
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Assert that a column and a data type match.
-@return TRUE */
-UNIV_INLINE
-ibool
-dict_col_type_assert_equal(
-/*=======================*/
- const dict_col_t* col, /*!< in: column */
- const dtype_t* type); /*!< in: data type */
-#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Returns the minimum size of the column.
-@return minimum size */
-UNIV_INLINE
-ulint
-dict_col_get_min_size(
-/*==================*/
- const dict_col_t* col); /*!< in: column */
-/***********************************************************************//**
-Returns the maximum size of the column.
-@return maximum size */
-UNIV_INLINE
-ulint
-dict_col_get_max_size(
-/*==================*/
- const dict_col_t* col); /*!< in: column */
-/***********************************************************************//**
-Returns the size of a fixed size column, 0 if not a fixed size column.
-@return fixed size, or 0 */
-UNIV_INLINE
-ulint
-dict_col_get_fixed_size(
-/*====================*/
- const dict_col_t* col, /*!< in: column */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-/***********************************************************************//**
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
-UNIV_INLINE
-ulint
-dict_col_get_sql_null_size(
-/*=======================*/
- const dict_col_t* col, /*!< in: column */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-
-/*********************************************************************//**
-Gets the column number.
-@return col->ind, table column position (starting from 0) */
-UNIV_INLINE
-ulint
-dict_col_get_no(
-/*============*/
- const dict_col_t* col); /*!< in: column */
-/*********************************************************************//**
-Gets the column position in the clustered index. */
-UNIV_INLINE
-ulint
-dict_col_get_clust_pos(
-/*===================*/
- const dict_col_t* col, /*!< in: table column */
- const dict_index_t* clust_index); /*!< in: clustered index */
-/****************************************************************//**
-If the given column name is reserved for InnoDB system columns, return
-TRUE.
-@return TRUE if name is reserved */
-UNIV_INTERN
-ibool
-dict_col_name_is_reserved(
-/*======================*/
- const char* name); /*!< in: column name */
-/********************************************************************//**
-Acquire the autoinc lock. */
-UNIV_INTERN
-void
-dict_table_autoinc_lock(
-/*====================*/
- dict_table_t* table); /*!< in/out: table */
-/********************************************************************//**
-Unconditionally set the autoinc counter. */
-UNIV_INTERN
-void
-dict_table_autoinc_initialize(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value); /*!< in: next value to assign to a row */
-/********************************************************************//**
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized.
-@return value for a new row, or 0 */
-UNIV_INTERN
-ib_uint64_t
-dict_table_autoinc_read(
-/*====================*/
- const dict_table_t* table); /*!< in: table */
-/********************************************************************//**
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-UNIV_INTERN
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
-
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value); /*!< in: value which was assigned to a row */
-/********************************************************************//**
-Release the autoinc lock. */
-UNIV_INTERN
-void
-dict_table_autoinc_unlock(
-/*======================*/
- dict_table_t* table); /*!< in/out: table */
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Adds system columns to a table object. */
-UNIV_INTERN
-void
-dict_table_add_system_columns(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- mem_heap_t* heap); /*!< in: temporary heap */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Adds a table object to the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap); /*!< in: temporary heap */
-/**********************************************************************//**
-Removes a table object from the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_remove_from_cache(
-/*=========================*/
- dict_table_t* table); /*!< in, own: table */
-/**********************************************************************//**
-Renames a table object.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-dict_table_rename_in_cache(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- const char* new_name, /*!< in: new name */
- ibool rename_also_foreigns);/*!< in: in ALTER TABLE we want
- to preserve the original table name
- in constraints which reference it */
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
-/**********************************************************************//**
-Change the id of a table object in the dictionary cache. This is used in
-DISCARD TABLESPACE. */
-UNIV_INTERN
-void
-dict_table_change_id_in_cache(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table object already in cache */
- dulint new_id);/*!< in: new id to set */
-/**********************************************************************//**
-Adds a foreign key constraint object to the dictionary cache. May free
-the object if there already is an object with the same identifier in.
-At least one of foreign table or referenced table must already be in
-the dictionary cache!
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-dict_foreign_add_to_cache(
-/*======================*/
- dict_foreign_t* foreign, /*!< in, own: foreign key constraint */
- ibool check_charsets);/*!< in: TRUE=check charset
- compatibility */
-/*********************************************************************//**
-Check if the index is referenced by a foreign key, if TRUE return the
-matching instance NULL otherwise.
-@return pointer to foreign key struct if index is defined for foreign
-key, otherwise NULL */
-UNIV_INTERN
-dict_foreign_t*
-dict_table_get_referenced_constraint(
-/*=================================*/
- dict_table_t* table, /*!< in: InnoDB table */
- dict_index_t* index); /*!< in: InnoDB index */
-/*********************************************************************//**
-Checks if a table is referenced by foreign keys.
-@return TRUE if table is referenced by a foreign key */
-UNIV_INTERN
-ibool
-dict_table_is_referenced_by_foreign_key(
-/*====================================*/
- const dict_table_t* table); /*!< in: InnoDB table */
-/**********************************************************************//**
-Replace the index in the foreign key list that matches this index's
-definition with an equivalent index. */
-UNIV_INTERN
-void
-dict_table_replace_index_in_foreign_list(
-/*=====================================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in: index to be replaced */
-/*********************************************************************//**
-Checks if a index is defined for a foreign key constraint. Index is a part
-of a foreign key constraint if the index is referenced by foreign key
-or index is a foreign key index
-@return pointer to foreign key struct if index is defined for foreign
-key, otherwise NULL */
-UNIV_INTERN
-dict_foreign_t*
-dict_table_get_foreign_constraint(
-/*==============================*/
- dict_table_t* table, /*!< in: InnoDB table */
- dict_index_t* index); /*!< in: InnoDB index */
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-dict_create_foreign_constraints(
-/*============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES
- table2(c, d), table2 can be written
- also with the database
- name before it: test.table2; the
- default database id the database of
- parameter name */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks); /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-/**********************************************************************//**
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
-@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
-constraint id does not match */
-UNIV_INTERN
-ulint
-dict_foreign_parse_drop_constraints(
-/*================================*/
- mem_heap_t* heap, /*!< in: heap from which we can
- allocate memory */
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: table */
- ulint* n, /*!< out: number of constraints
- to drop */
- const char*** constraints_to_drop); /*!< out: id's of the
- constraints to drop */
-/**********************************************************************//**
-Returns a table object and optionally increment its MySQL open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_get(
-/*===========*/
- const char* table_name, /*!< in: table name */
- ibool inc_mysql_count);
- /*!< in: whether to increment the open
- handle count on the table */
-/**********************************************************************//**
-Returns a index object, based on table and index id, and memoryfixes it.
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_on_id_low(
-/*=====================*/
- dict_table_t* table, /*!< in: table */
- dulint index_id); /*!< in: index id */
-/**********************************************************************//**
-Checks if a table is in the dictionary cache.
-@return table, NULL if not found */
-
-UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
- const char* table_name); /*!< in: table name */
-/**********************************************************************//**
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function.
-@return table, NULL if not found */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
- const char* table_name); /*!< in: table name */
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_on_id_low(
-/*=====================*/
- dulint table_id); /*!< in: table id */
-/**********************************************************************//**
-Find an index that is equivalent to the one passed in and is not marked
-for deletion.
-@return index equivalent to foreign->foreign_index, or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_equiv_index(
-/*==========================*/
- dict_foreign_t* foreign);/*!< in: foreign key */
-/**********************************************************************//**
-Returns an index object by matching on the name and column names and
-if more than one index matches return the index with the max id
-@return matching index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_by_max_id(
-/*===========================*/
- dict_table_t* table, /*!< in: table */
- const char* name, /*!< in: the index name to find */
- const char** columns,/*!< in: array of column names */
- ulint n_cols);/*!< in: number of columns */
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-UNIV_INTERN
-const char*
-dict_table_get_col_name(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_nr);/*!< in: column number */
-
-/**********************************************************************//**
-Prints a table definition. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print_low(
-/*=================*/
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Prints a table data when we know the table name. */
-UNIV_INTERN
-void
-dict_table_print_by_name(
-/*=====================*/
- const char* name); /*!< in: table name */
-/**********************************************************************//**
-Outputs info on foreign keys of a table. */
-UNIV_INTERN
-void
-dict_print_info_on_foreign_keys(
-/*============================*/
- ibool create_table_format, /*!< in: if TRUE then print in
- a format suitable to be inserted into
- a CREATE TABLE, otherwise in the format
- of SHOW TABLE STATUS */
- FILE* file, /*!< in: file where to print */
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Outputs info on a foreign key of a table in a format suitable for
-CREATE TABLE. */
-UNIV_INTERN
-void
-dict_print_info_on_foreign_key_in_create_format(
-/*============================================*/
- FILE* file, /*!< in: file where to print */
- trx_t* trx, /*!< in: transaction */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- ibool add_newline); /*!< in: whether to add a newline */
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
- FILE* file, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- const dict_index_t* index); /*!< in: index to print */
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the first index on the table (the clustered index).
-@return index, NULL if none exists */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_first_index(
-/*=======================*/
- const dict_table_t* table); /*!< in: table */
-/********************************************************************//**
-Gets the next index on the table.
-@return index, NULL if none left */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_next_index(
-/*======================*/
- const dict_index_t* index); /*!< in: index */
-#else /* UNIV_DEBUG */
-# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
-# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
-#endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Check whether the index is the clustered index.
-@return nonzero for clustered index, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_clust(
-/*================*/
- const dict_index_t* index) /*!< in: index */
- __attribute__((pure));
-/********************************************************************//**
-Check whether the index is unique.
-@return nonzero for unique index, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_unique(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
- __attribute__((pure));
-/********************************************************************//**
-Check whether the index is the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_ibuf(
-/*===============*/
- const dict_index_t* index) /*!< in: index */
- __attribute__((pure));
-/********************************************************************//**
-Check whether the index is a secondary index or the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_sec_or_ibuf(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
- __attribute__((pure));
-
-/********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return number of user-defined (e.g., not ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_user_cols(
-/*=======================*/
- const dict_table_t* table); /*!< in: table */
-/********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return number of system (e.g., ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_sys_cols(
-/*======================*/
- const dict_table_t* table); /*!< in: table */
-/********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return number of columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_cols(
-/*==================*/
- const dict_table_t* table); /*!< in: table */
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the nth column of a table.
-@return pointer to column object */
-UNIV_INLINE
-dict_col_t*
-dict_table_get_nth_col(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- ulint pos); /*!< in: position of column */
-/********************************************************************//**
-Gets the given system column of a table.
-@return pointer to column object */
-UNIV_INLINE
-dict_col_t*
-dict_table_get_sys_col(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- ulint sys); /*!< in: DATA_ROW_ID, ... */
-#else /* UNIV_DEBUG */
-#define dict_table_get_nth_col(table, pos) \
-((table)->cols + (pos))
-#define dict_table_get_sys_col(table, sys) \
-((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS)
-#endif /* UNIV_DEBUG */
-/********************************************************************//**
-Gets the given system column number of a table.
-@return column number */
-UNIV_INLINE
-ulint
-dict_table_get_sys_col_no(
-/*======================*/
- const dict_table_t* table, /*!< in: table */
- ulint sys); /*!< in: DATA_ROW_ID, ... */
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Returns the minimum data size of an index record.
-@return minimum data size in bytes */
-UNIV_INLINE
-ulint
-dict_index_get_min_size(
-/*====================*/
- const dict_index_t* index); /*!< in: index */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Check whether the table uses the compact page format.
-@return TRUE if table uses the compact page format */
-UNIV_INLINE
-ibool
-dict_table_is_comp(
-/*===============*/
- const dict_table_t* table); /*!< in: table */
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
- const dict_table_t* table); /*!< in: table */
-/********************************************************************//**
-Set the file format of a table. */
-UNIV_INLINE
-void
-dict_table_set_format(
-/*==================*/
- dict_table_t* table, /*!< in/out: table */
- ulint format);/*!< in: file format version */
-/********************************************************************//**
-Extract the compressed page size from table flags.
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_table_flags_to_zip_size(
-/*=========================*/
- ulint flags) /*!< in: flags */
- __attribute__((const));
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
- const dict_table_t* table); /*!< in: table */
-/********************************************************************//**
-Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns.
-@return TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
-ibool
-dict_table_col_in_clustered_key(
-/*============================*/
- const dict_table_t* table, /*!< in: table */
- ulint n); /*!< in: column number */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Copies types of columns contained in table to tuple and sets all
-fields of the tuple to the SQL NULL value. This function should
-be called right after dtuple_create(). */
-UNIV_INTERN
-void
-dict_table_copy_types(
-/*==================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Looks for an index with the given id. NOTE that we do not reserve
-the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page!
-@return index or NULL if not found from cache */
-UNIV_INTERN
-dict_index_t*
-dict_index_find_on_id_low(
-/*======================*/
- dulint id); /*!< in: index id */
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
-ulint
-dict_index_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table on which the index is */
- dict_index_t* index, /*!< in, own: index; NOTE! The index memory
- object is freed in this function! */
- ulint page_no,/*!< in: root page number of the index */
- ibool strict);/*!< in: TRUE=refuse to create the index
- if records could be too big to fit in
- an B-tree page */
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_fields(
-/*====================*/
- const dict_index_t* index); /*!< in: an internal
- representation of index (in
- the dictionary cache) */
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index
-that uniquely determine the position of an index entry in the index, if
-we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique(
-/*====================*/
- const dict_index_t* index); /*!< in: an internal representation
- of index (in the dictionary cache) */
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index
-which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique_in_tree(
-/*============================*/
- const dict_index_t* index); /*!< in: an internal representation
- of index (in the dictionary cache) */
-/********************************************************************//**
-Gets the number of user-defined ordering fields in the index. In the internal
-representation we add the row id to the ordering fields to make all indexes
-unique, but this function returns the number of fields the user defined
-in the index as ordering fields.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_ordering_defined_by_user(
-/*======================================*/
- const dict_index_t* index); /*!< in: an internal representation
- of index (in the dictionary cache) */
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the nth field of an index.
-@return pointer to field object */
-UNIV_INLINE
-dict_field_t*
-dict_index_get_nth_field(
-/*=====================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of field */
-#else /* UNIV_DEBUG */
-# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
-#endif /* UNIV_DEBUG */
-/********************************************************************//**
-Gets pointer to the nth column in an index.
-@return column */
-UNIV_INLINE
-const dict_col_t*
-dict_index_get_nth_col(
-/*===================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of the field */
-/********************************************************************//**
-Gets the column number of the nth field in an index.
-@return column number */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_no(
-/*======================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of the field */
-/********************************************************************//**
-Looks for column n in an index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INTERN
-ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint n); /*!< in: column number */
-/********************************************************************//**
-Returns TRUE if the index contains a column or a prefix of that column.
-@return TRUE if contains the column or its prefix */
-UNIV_INTERN
-ibool
-dict_index_contains_col_or_prefix(
-/*==============================*/
- const dict_index_t* index, /*!< in: index */
- ulint n); /*!< in: column number */
-/********************************************************************//**
-Looks for a matching field in an index. The column has to be the same. The
-column in index must be complete, or must contain a prefix longer than the
-column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INTERN
-ulint
-dict_index_get_nth_field_pos(
-/*=========================*/
- const dict_index_t* index, /*!< in: index from which to search */
- const dict_index_t* index2, /*!< in: index */
- ulint n); /*!< in: field number in index2 */
-/********************************************************************//**
-Looks for column n position in the clustered index.
-@return position in internal representation of the clustered index */
-UNIV_INTERN
-ulint
-dict_table_get_nth_col_pos(
-/*=======================*/
- const dict_table_t* table, /*!< in: table */
- ulint n); /*!< in: column number */
-/********************************************************************//**
-Returns the position of a system column in an index.
-@return position, ULINT_UNDEFINED if not contained */
-UNIV_INLINE
-ulint
-dict_index_get_sys_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint type); /*!< in: DATA_ROW_ID, ... */
-/*******************************************************************//**
-Adds a column to index. */
-UNIV_INTERN
-void
-dict_index_add_col(
-/*===============*/
- dict_index_t* index, /*!< in/out: index */
- const dict_table_t* table, /*!< in: table */
- dict_col_t* col, /*!< in: column */
- ulint prefix_len); /*!< in: column prefix length */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Copies types of fields contained in index to tuple. */
-UNIV_INTERN
-void
-dict_index_copy_types(
-/*==================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_index_t* index, /*!< in: index */
- ulint n_fields); /*!< in: number of
- field types to copy */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Gets the field column.
-@return field->col, pointer to the table column */
-UNIV_INLINE
-const dict_col_t*
-dict_field_get_col(
-/*===============*/
- const dict_field_t* field); /*!< in: index field */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Returns an index object if it is found in the dictionary cache.
-Assumes that dict_sys->mutex is already being held.
-@return index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_if_in_cache_low(
-/*===========================*/
- dulint index_id); /*!< in: index id */
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Returns an index object if it is found in the dictionary cache.
-@return index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_if_in_cache(
-/*=======================*/
- dulint index_id); /*!< in: index id */
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dict_index_check_search_tuple(
-/*==========================*/
- const dict_index_t* index, /*!< in: index tree */
- const dtuple_t* tuple); /*!< in: tuple used in a search */
-/**********************************************************************//**
-Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
-void
-dict_table_check_for_dup_indexes(
-/*=============================*/
- const dict_table_t* table); /*!< in: Check for dup indexes
- in this table */
-
-#endif /* UNIV_DEBUG */
-/**********************************************************************//**
-Builds a node pointer out of a physical record and a page number.
-@return own: node pointer */
-UNIV_INTERN
-dtuple_t*
-dict_index_build_node_ptr(
-/*======================*/
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record for which to build node
- pointer */
- ulint page_no,/*!< in: page number to put in node
- pointer */
- mem_heap_t* heap, /*!< in: memory heap where pointer
- created */
- ulint level); /*!< in: level of rec in tree:
- 0 means leaf level */
-/**********************************************************************//**
-Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely.
-@return pointer to the prefix record */
-UNIV_INTERN
-rec_t*
-dict_index_copy_rec_order_prefix(
-/*=============================*/
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record for which to
- copy prefix */
- ulint* n_fields,/*!< out: number of fields copied */
- byte** buf, /*!< in/out: memory buffer for the
- copied prefix, or NULL */
- ulint* buf_size);/*!< in/out: buffer size */
-/**********************************************************************//**
-Builds a typed data tuple out of a physical record.
-@return own: data tuple */
-UNIV_INTERN
-dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- rec_t* rec, /*!< in: record for which to build data tuple */
- ulint n_fields,/*!< in: number of data fields */
- mem_heap_t* heap); /*!< in: memory heap where tuple created */
-/*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
- const dict_index_t* index); /*!< in: index */
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
- dict_index_t* index, /*!< in/out: index */
- ulint space); /*!< in: space id */
-/*********************************************************************//**
-Gets the page number of the root of the index tree.
-@return page number */
-UNIV_INLINE
-ulint
-dict_index_get_page(
-/*================*/
- const dict_index_t* tree); /*!< in: index */
-/*********************************************************************//**
-Sets the page number of the root of index tree. */
-UNIV_INLINE
-void
-dict_index_set_page(
-/*================*/
- dict_index_t* index, /*!< in/out: index */
- ulint page); /*!< in: page number */
-/*********************************************************************//**
-Gets the read-write lock of the index tree.
-@return read-write lock */
-UNIV_INLINE
-rw_lock_t*
-dict_index_get_lock(
-/*================*/
- dict_index_t* index); /*!< in: index */
-/********************************************************************//**
-Returns free space reserved for future updates of records. This is
-relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index.
-@return number of free bytes on page, reserved for updates */
-UNIV_INLINE
-ulint
-dict_index_get_space_reserve(void);
-/*==============================*/
-/*********************************************************************//**
-Calculates the minimum record length in an index. */
-UNIV_INTERN
-ulint
-dict_index_calc_min_rec_len(
-/*========================*/
- const dict_index_t* index); /*!< in: index */
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-UNIV_INTERN
-void
-dict_update_statistics_low(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- ibool has_dict_mutex);/*!< in: TRUE if the caller has the
- dictionary mutex */
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-UNIV_INTERN
-void
-dict_update_statistics(
-/*===================*/
- dict_table_t* table); /*!< in/out: table */
-/********************************************************************//**
-Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
-void
-dict_mutex_enter_for_mysql(void);
-/*============================*/
-/********************************************************************//**
-Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
-void
-dict_mutex_exit_for_mysql(void);
-/*===========================*/
-/********************************************************************//**
-Checks if the database name in two table names is the same.
-@return TRUE if same db name */
-UNIV_INTERN
-ibool
-dict_tables_have_same_db(
-/*=====================*/
- const char* name1, /*!< in: table name in the form
- dbname '/' tablename */
- const char* name2); /*!< in: table name in the form
- dbname '/' tablename */
-/*********************************************************************//**
-Removes an index from the cache */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
-/**********************************************************************//**
-Get index by name
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name(
-/*=========================*/
- dict_table_t* table, /*!< in: table */
- const char* name); /*!< in: name of the index to find */
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*====================================*/
- dict_table_t* table, /*!< in: table */
- const char* name); /*!< in: name of the index to find */
-/* Buffers for storing detailed information about the latest foreign key
-and unique key errors */
-extern FILE* dict_foreign_err_file;
-extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
-
-/** the dictionary system */
-extern dict_sys_t* dict_sys;
-/** the data dictionary rw-latch protecting dict_sys */
-extern rw_lock_t dict_operation_lock;
-
-/* Dictionary system struct */
-struct dict_sys_struct{
- mutex_t mutex; /*!< mutex protecting the data
- dictionary; protects also the
- disk-based dictionary system tables;
- this mutex serializes CREATE TABLE
- and DROP TABLE, as well as reading
- the dictionary data for a table from
- system tables */
- dulint row_id; /*!< the next row id to assign;
- NOTE that at a checkpoint this
- must be written to the dict system
- header and flushed to a file; in
- recovery this must be derived from
- the log records */
- hash_table_t* table_hash; /*!< hash table of the tables, based
- on name */
- hash_table_t* table_id_hash; /*!< hash table of the tables, based
- on id */
- UT_LIST_BASE_NODE_T(dict_table_t)
- table_LRU; /*!< LRU list of tables */
- ulint size; /*!< varying space in bytes occupied
- by the data dictionary table and
- index objects */
- dict_table_t* sys_tables; /*!< SYS_TABLES table */
- dict_table_t* sys_columns; /*!< SYS_COLUMNS table */
- dict_table_t* sys_indexes; /*!< SYS_INDEXES table */
- dict_table_t* sys_fields; /*!< SYS_FIELDS table */
-};
-#endif /* !UNIV_HOTBACKUP */
-
-/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
-extern dict_index_t* dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-extern dict_index_t* dict_ind_compact;
-
-/**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
-void
-dict_ind_init(void);
-/*===============*/
-
-/**********************************************************************//**
-Closes the data dictionary module. */
-UNIV_INTERN
-void
-dict_close(void);
-/*============*/
-
-#ifndef UNIV_NONINL
-#include "dict0dict.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/dict0dict.ic b/storage/innodb_plugin/include/dict0dict.ic
deleted file mode 100644
index 46e78df8272..00000000000
--- a/storage/innodb_plugin/include/dict0dict.ic
+++ /dev/null
@@ -1,806 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/dict0dict.ic
-Data dictionary system
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "data0type.h"
-#ifndef UNIV_HOTBACKUP
-#include "dict0load.h"
-#include "rem0types.h"
-
-/*********************************************************************//**
-Gets the column data type. */
-UNIV_INLINE
-void
-dict_col_copy_type(
-/*===============*/
- const dict_col_t* col, /*!< in: column */
- dtype_t* type) /*!< out: data type */
-{
- ut_ad(col && type);
-
- type->mtype = col->mtype;
- type->prtype = col->prtype;
- type->len = col->len;
- type->mbminlen = col->mbminlen;
- type->mbmaxlen = col->mbmaxlen;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Assert that a column and a data type match.
-@return TRUE */
-UNIV_INLINE
-ibool
-dict_col_type_assert_equal(
-/*=======================*/
- const dict_col_t* col, /*!< in: column */
- const dtype_t* type) /*!< in: data type */
-{
- ut_ad(col);
- ut_ad(type);
-
- ut_ad(col->mtype == type->mtype);
- ut_ad(col->prtype == type->prtype);
- ut_ad(col->len == type->len);
-# ifndef UNIV_HOTBACKUP
- ut_ad(col->mbminlen == type->mbminlen);
- ut_ad(col->mbmaxlen == type->mbmaxlen);
-# endif /* !UNIV_HOTBACKUP */
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Returns the minimum size of the column.
-@return minimum size */
-UNIV_INLINE
-ulint
-dict_col_get_min_size(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
-{
- return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
- col->mbminlen, col->mbmaxlen));
-}
-/***********************************************************************//**
-Returns the maximum size of the column.
-@return maximum size */
-UNIV_INLINE
-ulint
-dict_col_get_max_size(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
-{
- return(dtype_get_max_size_low(col->mtype, col->len));
-}
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-Returns the size of a fixed size column, 0 if not a fixed size column.
-@return fixed size, or 0 */
-UNIV_INLINE
-ulint
-dict_col_get_fixed_size(
-/*====================*/
- const dict_col_t* col, /*!< in: column */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
- return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
- col->mbminlen, col->mbmaxlen, comp));
-}
-/***********************************************************************//**
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
-UNIV_INLINE
-ulint
-dict_col_get_sql_null_size(
-/*=======================*/
- const dict_col_t* col, /*!< in: column */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
- return(dict_col_get_fixed_size(col, comp));
-}
-
-/*********************************************************************//**
-Gets the column number.
-@return col->ind, table column position (starting from 0) */
-UNIV_INLINE
-ulint
-dict_col_get_no(
-/*============*/
- const dict_col_t* col) /*!< in: column */
-{
- ut_ad(col);
-
- return(col->ind);
-}
-
-/*********************************************************************//**
-Gets the column position in the clustered index. */
-UNIV_INLINE
-ulint
-dict_col_get_clust_pos(
-/*===================*/
- const dict_col_t* col, /*!< in: table column */
- const dict_index_t* clust_index) /*!< in: clustered index */
-{
- ulint i;
-
- ut_ad(col);
- ut_ad(clust_index);
- ut_ad(dict_index_is_clust(clust_index));
-
- for (i = 0; i < clust_index->n_def; i++) {
- const dict_field_t* field = &clust_index->fields[i];
-
- if (!field->prefix_len && field->col == col) {
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the first index on the table (the clustered index).
-@return index, NULL if none exists */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_first_index(
-/*=======================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes));
-}
-
-/********************************************************************//**
-Gets the next index on the table.
-@return index, NULL if none left */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_next_index(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
-}
-#endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Check whether the index is the clustered index.
-@return nonzero for clustered index, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_clust(
-/*================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED));
-}
-/********************************************************************//**
-Check whether the index is unique.
-@return nonzero for unique index, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_unique(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(UNIV_UNLIKELY(index->type & DICT_UNIQUE));
-}
-
-/********************************************************************//**
-Check whether the index is the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_ibuf(
-/*===============*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(UNIV_UNLIKELY(index->type & DICT_IBUF));
-}
-
-/********************************************************************//**
-Check whether the index is a secondary index or the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_sec_or_ibuf(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
-{
- ulint type;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- type = index->type;
-
- return(UNIV_LIKELY(!(type & DICT_CLUSTERED) || (type & DICT_IBUF)));
-}
-
-/********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return number of user-defined (e.g., not ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_user_cols(
-/*=======================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols - DATA_N_SYS_COLS);
-}
-
-/********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return number of system (e.g., ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_sys_cols(
-/*======================*/
- const dict_table_t* table __attribute__((unused))) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(table->cached);
-
- return(DATA_N_SYS_COLS);
-}
-
-/********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return number of columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_cols(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols);
-}
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the nth column of a table.
-@return pointer to column object */
-UNIV_INLINE
-dict_col_t*
-dict_table_get_nth_col(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- ulint pos) /*!< in: position of column */
-{
- ut_ad(table);
- ut_ad(pos < table->n_def);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return((dict_col_t*) (table->cols) + pos);
-}
-
-/********************************************************************//**
-Gets the given system column of a table.
-@return pointer to column object */
-UNIV_INLINE
-dict_col_t*
-dict_table_get_sys_col(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- ulint sys) /*!< in: DATA_ROW_ID, ... */
-{
- dict_col_t* col;
-
- ut_ad(table);
- ut_ad(sys < DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- col = dict_table_get_nth_col(table, table->n_cols
- - DATA_N_SYS_COLS + sys);
- ut_ad(col->mtype == DATA_SYS);
- ut_ad(col->prtype == (sys | DATA_NOT_NULL));
-
- return(col);
-}
-#endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Gets the given system column number of a table.
-@return column number */
-UNIV_INLINE
-ulint
-dict_table_get_sys_col_no(
-/*======================*/
- const dict_table_t* table, /*!< in: table */
- ulint sys) /*!< in: DATA_ROW_ID, ... */
-{
- ut_ad(table);
- ut_ad(sys < DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols - DATA_N_SYS_COLS + sys);
-}
-
-/********************************************************************//**
-Check whether the table uses the compact page format.
-@return TRUE if table uses the compact page format */
-UNIV_INLINE
-ibool
-dict_table_is_comp(
-/*===============*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
-
-#if DICT_TF_COMPACT != TRUE
-#error
-#endif
-
- return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT));
-}
-
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
-
- return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT);
-}
-
-/********************************************************************//**
-Determine the file format of a table. */
-UNIV_INLINE
-void
-dict_table_set_format(
-/*==================*/
- dict_table_t* table, /*!< in/out: table */
- ulint format) /*!< in: file format version */
-{
- ut_ad(table);
-
- table->flags = (table->flags & ~DICT_TF_FORMAT_MASK)
- | (format << DICT_TF_FORMAT_SHIFT);
-}
-
-/********************************************************************//**
-Extract the compressed page size from table flags.
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_table_flags_to_zip_size(
-/*=========================*/
- ulint flags) /*!< in: flags */
-{
- ulint zip_size = flags & DICT_TF_ZSSIZE_MASK;
-
- if (UNIV_UNLIKELY(zip_size)) {
- zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
- << (zip_size >> DICT_TF_ZSSIZE_SHIFT));
-
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
- }
-
- return(zip_size);
-}
-
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
-
- return(dict_table_flags_to_zip_size(table->flags));
-}
-
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_fields(
-/*====================*/
- const dict_index_t* index) /*!< in: an internal
- representation of index (in
- the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->n_fields);
-}
-
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index
-that uniquely determine the position of an index entry in the index, if
-we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique(
-/*====================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(index->cached);
-
- return(index->n_uniq);
-}
-
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index
-which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique_in_tree(
-/*============================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(index->cached);
-
- if (dict_index_is_clust(index)) {
-
- return(dict_index_get_n_unique(index));
- }
-
- return(dict_index_get_n_fields(index));
-}
-
-/********************************************************************//**
-Gets the number of user-defined ordering fields in the index. In the internal
-representation of clustered indexes we add the row id to the ordering fields
-to make a clustered index unique, but this function returns the number of
-fields the user defined in the index as ordering fields.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_ordering_defined_by_user(
-/*======================================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
-{
- return(index->n_user_defined_cols);
-}
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the nth field of an index.
-@return pointer to field object */
-UNIV_INLINE
-dict_field_t*
-dict_index_get_nth_field(
-/*=====================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of field */
-{
- ut_ad(index);
- ut_ad(pos < index->n_def);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return((dict_field_t*) (index->fields) + pos);
-}
-#endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Returns the position of a system column in an index.
-@return position, ULINT_UNDEFINED if not contained */
-UNIV_INLINE
-ulint
-dict_index_get_sys_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint type) /*!< in: DATA_ROW_ID, ... */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(!(index->type & DICT_UNIVERSAL));
-
- if (dict_index_is_clust(index)) {
-
- return(dict_col_get_clust_pos(
- dict_table_get_sys_col(index->table, type),
- index));
- }
-
- return(dict_index_get_nth_col_pos(
- index, dict_table_get_sys_col_no(index->table, type)));
-}
-
-/*********************************************************************//**
-Gets the field column.
-@return field->col, pointer to the table column */
-UNIV_INLINE
-const dict_col_t*
-dict_field_get_col(
-/*===============*/
- const dict_field_t* field) /*!< in: index field */
-{
- ut_ad(field);
-
- return(field->col);
-}
-
-/********************************************************************//**
-Gets pointer to the nth column in an index.
-@return column */
-UNIV_INLINE
-const dict_col_t*
-dict_index_get_nth_col(
-/*===================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of the field */
-{
- return(dict_field_get_col(dict_index_get_nth_field(index, pos)));
-}
-
-/********************************************************************//**
-Gets the column number the nth field in an index.
-@return column number */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_no(
-/*======================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of the field */
-{
- return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Returns the minimum data size of an index record.
-@return minimum data size in bytes */
-UNIV_INLINE
-ulint
-dict_index_get_min_size(
-/*====================*/
- const dict_index_t* index) /*!< in: index */
-{
- ulint n = dict_index_get_n_fields(index);
- ulint size = 0;
-
- while (n--) {
- size += dict_col_get_min_size(dict_index_get_nth_col(index,
- n));
- }
-
- return(size);
-}
-
-/*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->space);
-}
-
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
- dict_index_t* index, /*!< in/out: index */
- ulint space) /*!< in: space id */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->space = space;
-}
-
-/*********************************************************************//**
-Gets the page number of the root of the index tree.
-@return page number */
-UNIV_INLINE
-ulint
-dict_index_get_page(
-/*================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->page);
-}
-
-/*********************************************************************//**
-Sets the page number of the root of index tree. */
-UNIV_INLINE
-void
-dict_index_set_page(
-/*================*/
- dict_index_t* index, /*!< in/out: index */
- ulint page) /*!< in: page number */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->page = page;
-}
-
-/*********************************************************************//**
-Gets the read-write lock of the index tree.
-@return read-write lock */
-UNIV_INLINE
-rw_lock_t*
-dict_index_get_lock(
-/*================*/
- dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(&(index->lock));
-}
-
-/********************************************************************//**
-Returns free space reserved for future updates of records. This is
-relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index.
-@return number of free bytes on page, reserved for updates */
-UNIV_INLINE
-ulint
-dict_index_get_space_reserve(void)
-/*==============================*/
-{
- return(UNIV_PAGE_SIZE / 16);
-}
-
-/**********************************************************************//**
-Checks if a table is in the dictionary cache.
-@return table, NULL if not found */
-UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
- const char* table_name) /*!< in: table name */
-{
- dict_table_t* table;
- ulint table_fold;
-
- ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* Look for the table name in the hash table */
- table_fold = ut_fold_string(table_name);
-
- HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
- dict_table_t*, table, ut_ad(table->cached),
- !strcmp(table->name, table_name));
- return(table);
-}
-
-/**********************************************************************//**
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function.
-@return table, NULL if not found */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
- const char* table_name) /*!< in: table name */
-{
- dict_table_t* table;
-
- ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = dict_table_check_if_in_cache_low(table_name);
-
- if (table == NULL) {
- table = dict_load_table(table_name);
- }
-
- ut_ad(!table || table->cached);
-
- return(table);
-}
-
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_on_id_low(
-/*=====================*/
- dulint table_id) /*!< in: table id */
-{
- dict_table_t* table;
- ulint fold;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* Look for the table name in the hash table */
- fold = ut_fold_dulint(table_id);
-
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
- dict_table_t*, table, ut_ad(table->cached),
- !ut_dulint_cmp(table->id, table_id));
- if (table == NULL) {
- table = dict_load_table_on_id(table_id);
- }
-
- ut_ad(!table || table->cached);
-
- /* TODO: should get the type information from MySQL */
-
- return(table);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/dict0load.h b/storage/innodb_plugin/include/dict0load.h
deleted file mode 100644
index 60b8c1fb632..00000000000
--- a/storage/innodb_plugin/include/dict0load.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0load.h
-Loads to the memory cache database object definitions
-from dictionary tables
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0load_h
-#define dict0load_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-
-/********************************************************************//**
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
-
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
- ibool in_crash_recovery); /*!< in: are we doing a crash recovery */
-/********************************************************************//**
-Finds the first table name in the given database.
-@return own: table name, NULL if does not exist; the caller must free
-the memory in the string! */
-UNIV_INTERN
-char*
-dict_get_first_table_name_in_db(
-/*============================*/
- const char* name); /*!< in: database name which ends to '/' */
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
-the cluster definition if the table is a member in a cluster. Also loads
-all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table.
-@return table, NULL if does not exist; if the table is stored in an
-.ibd file, but the file does not exist, then we set the
-ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
-dict_table_t*
-dict_load_table(
-/*============*/
- const char* name); /*!< in: table name in the
- databasename/tablename format */
-/***********************************************************************//**
-Loads a table object based on the table id.
-@return table; NULL if table does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_load_table_on_id(
-/*==================*/
- dulint table_id); /*!< in: table id */
-/********************************************************************//**
-This function is called when the database is booted.
-Loads system table index definitions except for the clustered index which
-is added to the dictionary cache at booting before calling this function. */
-UNIV_INTERN
-void
-dict_load_sys_table(
-/*================*/
- dict_table_t* table); /*!< in: system table */
-/***********************************************************************//**
-Loads foreign key constraints where the table is either the foreign key
-holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-dict_load_foreigns(
-/*===============*/
- const char* table_name, /*!< in: table name */
- ibool check_charsets);/*!< in: TRUE=check charsets
- compatibility */
-/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void);
-/*============*/
-
-
-#ifndef UNIV_NONINL
-#include "dict0load.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/dict0load.ic b/storage/innodb_plugin/include/dict0load.ic
deleted file mode 100644
index ccc16db165b..00000000000
--- a/storage/innodb_plugin/include/dict0load.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0load.ic
-Loads to the memory cache database object definitions
-from dictionary tables
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innodb_plugin/include/dict0mem.h b/storage/innodb_plugin/include/dict0mem.h
deleted file mode 100644
index 2d001111938..00000000000
--- a/storage/innodb_plugin/include/dict0mem.h
+++ /dev/null
@@ -1,537 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0mem.h
-Data dictionary memory object creation
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0mem_h
-#define dict0mem_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "data0type.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "btr0types.h"
-#ifndef UNIV_HOTBACKUP
-# include "lock0types.h"
-# include "que0types.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "hash0hash.h"
-#include "trx0types.h"
-
-/** Type flags of an index: OR'ing of the flags is allowed to define a
-combination of types */
-/* @{ */
-#define DICT_CLUSTERED 1 /*!< clustered index */
-#define DICT_UNIQUE 2 /*!< unique index */
-#define DICT_UNIVERSAL 4 /*!< index which can contain records from any
- other index */
-#define DICT_IBUF 8 /*!< insert buffer tree */
-/* @} */
-
-/** Types for a table object */
-#define DICT_TABLE_ORDINARY 1 /*!< ordinary table */
-#if 0 /* not implemented */
-#define DICT_TABLE_CLUSTER_MEMBER 2
-#define DICT_TABLE_CLUSTER 3 /* this means that the table is
- really a cluster definition */
-#endif
-
-/** Table flags. All unused bits must be 0. */
-/* @{ */
-#define DICT_TF_COMPACT 1 /* Compact page format.
- This must be set for
- new file formats
- (later than
- DICT_TF_FORMAT_51). */
-
-/** Compressed page size (0=uncompressed, up to 15 compressed sizes) */
-/* @{ */
-#define DICT_TF_ZSSIZE_SHIFT 1
-#define DICT_TF_ZSSIZE_MASK (15 << DICT_TF_ZSSIZE_SHIFT)
-#define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1)
-/* @} */
-
-/** File format */
-/* @{ */
-#define DICT_TF_FORMAT_SHIFT 5 /* file format */
-#define DICT_TF_FORMAT_MASK (127 << DICT_TF_FORMAT_SHIFT)
-#define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */
-#define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1:
- compressed tables,
- new BLOB treatment */
-/** Maximum supported file format */
-#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP
-
-#define DICT_TF_BITS 6 /*!< number of flag bits */
-#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
-# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
-#endif
-/* @} */
-/* @} */
-
-/**********************************************************************//**
-Creates a table memory object.
-@return own: table object */
-UNIV_INTERN
-dict_table_t*
-dict_mem_table_create(
-/*==================*/
- const char* name, /*!< in: table name */
- ulint space, /*!< in: space where the clustered index
- of the table is placed; this parameter
- is ignored if the table is made
- a member of a cluster */
- ulint n_cols, /*!< in: number of columns */
- ulint flags); /*!< in: table flags */
-/****************************************************************//**
-Free a table memory object. */
-UNIV_INTERN
-void
-dict_mem_table_free(
-/*================*/
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Adds a column definition to a table. */
-UNIV_INTERN
-void
-dict_mem_table_add_col(
-/*===================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
- const char* name, /*!< in: column name, or NULL */
- ulint mtype, /*!< in: main datatype */
- ulint prtype, /*!< in: precise type */
- ulint len); /*!< in: precision */
-/**********************************************************************//**
-Creates an index memory object.
-@return own: index object */
-UNIV_INTERN
-dict_index_t*
-dict_mem_index_create(
-/*==================*/
- const char* table_name, /*!< in: table name */
- const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
- ulint type, /*!< in: DICT_UNIQUE,
- DICT_CLUSTERED, ... ORed */
- ulint n_fields); /*!< in: number of fields */
-/**********************************************************************//**
-Adds a field definition to an index. NOTE: does not take a copy
-of the column name if the field is a column. The memory occupied
-by the column name may be released only after publishing the index. */
-UNIV_INTERN
-void
-dict_mem_index_add_field(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- const char* name, /*!< in: column name */
- ulint prefix_len); /*!< in: 0 or the column prefix length
- in a MySQL index like
- INDEX (textcol(25)) */
-/**********************************************************************//**
-Frees an index memory object. */
-UNIV_INTERN
-void
-dict_mem_index_free(
-/*================*/
- dict_index_t* index); /*!< in: index */
-/**********************************************************************//**
-Creates and initializes a foreign constraint memory object.
-@return own: foreign constraint struct */
-UNIV_INTERN
-dict_foreign_t*
-dict_mem_foreign_create(void);
-/*=========================*/
-
-/** Data structure for a column in a table */
-struct dict_col_struct{
- /*----------------------*/
- /** The following are copied from dtype_t,
- so that all bit-fields can be packed tightly. */
- /* @{ */
- unsigned mtype:8; /*!< main data type */
- unsigned prtype:24; /*!< precise type; MySQL data
- type, charset code, flags to
- indicate nullability,
- signedness, whether this is a
- binary string, whether this is
- a true VARCHAR where MySQL
- uses 2 bytes to store the length */
-
- /* the remaining fields do not affect alphabetical ordering: */
-
- unsigned len:16; /*!< length; for MySQL data this
- is field->pack_length(),
- except that for a >= 5.0.3
- type true VARCHAR this is the
- maximum byte length of the
- string data (in addition to
- the string, MySQL uses 1 or 2
- bytes to store the string length) */
-
- unsigned mbminlen:2; /*!< minimum length of a
- character, in bytes */
- unsigned mbmaxlen:3; /*!< maximum length of a
- character, in bytes */
- /*----------------------*/
- /* End of definitions copied from dtype_t */
- /* @} */
-
- unsigned ind:10; /*!< table column position
- (starting from 0) */
- unsigned ord_part:1; /*!< nonzero if this column
- appears in the ordering fields
- of an index */
-};
-
-/** @brief DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length).
-
-It is set to 3*256, so that one can create a column prefix index on
-256 characters of a TEXT or VARCHAR column also in the UTF-8
-charset. In that charset, a character may take at most 3 bytes. This
-constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
-files would be at risk! */
-#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN
-
-/** Data structure for a field in an index */
-struct dict_field_struct{
- dict_col_t* col; /*!< pointer to the table column */
- const char* name; /*!< name of the column */
- unsigned prefix_len:10; /*!< 0 or the length of the column
- prefix in bytes in a MySQL index of
- type, e.g., INDEX (textcol(25));
- must be smaller than
- DICT_MAX_INDEX_COL_LEN; NOTE that
- in the UTF-8 charset, MySQL sets this
- to 3 * the prefix len in UTF-8 chars */
- unsigned fixed_len:10; /*!< 0 or the fixed length of the
- column if smaller than
- DICT_MAX_INDEX_COL_LEN */
-};
-
-/** Data structure for an index. Most fields will be
-initialized to 0, NULL or FALSE in dict_mem_index_create(). */
-struct dict_index_struct{
- dulint id; /*!< id of the index */
- mem_heap_t* heap; /*!< memory heap */
- const char* name; /*!< index name */
- const char* table_name;/*!< table name */
- dict_table_t* table; /*!< back pointer to table */
-#ifndef UNIV_HOTBACKUP
- unsigned space:32;
- /*!< space where the index tree is placed */
- unsigned page:32;/*!< index tree root page number */
-#endif /* !UNIV_HOTBACKUP */
- unsigned type:4; /*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
- DICT_UNIVERSAL, DICT_IBUF) */
- unsigned trx_id_offset:10;/*!< position of the trx id column
- in a clustered index record, if the fields
- before it are known to be of a fixed size,
- 0 otherwise */
- unsigned n_user_defined_cols:10;
- /*!< number of columns the user defined to
- be in the index: in the internal
- representation we add more columns */
- unsigned n_uniq:10;/*!< number of fields from the beginning
- which are enough to determine an index
- entry uniquely */
- unsigned n_def:10;/*!< number of fields defined so far */
- unsigned n_fields:10;/*!< number of fields in the index */
- unsigned n_nullable:10;/*!< number of nullable fields */
- unsigned cached:1;/*!< TRUE if the index object is in the
- dictionary cache */
- unsigned to_be_dropped:1;
- /*!< TRUE if this index is marked to be
- dropped in ha_innobase::prepare_drop_index(),
- otherwise FALSE */
- dict_field_t* fields; /*!< array of field descriptions */
-#ifndef UNIV_HOTBACKUP
- UT_LIST_NODE_T(dict_index_t)
- indexes;/*!< list of indexes of the table */
- btr_search_t* search_info; /*!< info used in optimistic searches */
- /*----------------------*/
- /** Statistics for query optimization */
- /* @{ */
- ib_int64_t* stat_n_diff_key_vals;
- /*!< approximate number of different
- key values for this index, for each
- n-column prefix where n <=
- dict_get_n_unique(index); we
- periodically calculate new
- estimates */
- ulint stat_index_size;
- /*!< approximate index size in
- database pages */
- ulint stat_n_leaf_pages;
- /*!< approximate number of leaf pages in the
- index tree */
- /* @} */
- rw_lock_t lock; /*!< read-write lock protecting the
- upper levels of the index tree */
- ib_uint64_t trx_id; /*!< id of the transaction that created this
- index, or 0 if the index existed
- when InnoDB was started up */
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_DEBUG
- ulint magic_n;/*!< magic number */
-/** Value of dict_index_struct::magic_n */
-# define DICT_INDEX_MAGIC_N 76789786
-#endif
-};
-
-/** Data structure for a foreign key constraint; an example:
-FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be
-initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
-struct dict_foreign_struct{
- mem_heap_t* heap; /*!< this object is allocated from
- this memory heap */
- char* id; /*!< id of the constraint as a
- null-terminated string */
- unsigned n_fields:10; /*!< number of indexes' first fields
- for which the foreign key
- constraint is defined: we allow the
- indexes to contain more fields than
- mentioned in the constraint, as long
- as the first fields are as mentioned */
- unsigned type:6; /*!< 0 or DICT_FOREIGN_ON_DELETE_CASCADE
- or DICT_FOREIGN_ON_DELETE_SET_NULL */
- char* foreign_table_name;/*!< foreign table name */
- dict_table_t* foreign_table; /*!< table where the foreign key is */
- const char** foreign_col_names;/*!< names of the columns in the
- foreign key */
- char* referenced_table_name;/*!< referenced table name */
- dict_table_t* referenced_table;/*!< table where the referenced key
- is */
- const char** referenced_col_names;/*!< names of the referenced
- columns in the referenced table */
- dict_index_t* foreign_index; /*!< foreign index; we require that
- both tables contain explicitly defined
- indexes for the constraint: InnoDB
- does not generate new indexes
- implicitly */
- dict_index_t* referenced_index;/*!< referenced index */
- UT_LIST_NODE_T(dict_foreign_t)
- foreign_list; /*!< list node for foreign keys of the
- table */
- UT_LIST_NODE_T(dict_foreign_t)
- referenced_list;/*!< list node for referenced
- keys of the table */
-};
-
-/** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
-a foreign key constraint is enforced, therefore RESTRICT just means no flag */
-/* @{ */
-#define DICT_FOREIGN_ON_DELETE_CASCADE 1 /*!< ON DELETE CASCADE */
-#define DICT_FOREIGN_ON_DELETE_SET_NULL 2 /*!< ON UPDATE SET NULL */
-#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 /*!< ON DELETE CASCADE */
-#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 /*!< ON UPDATE SET NULL */
-#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 /*!< ON DELETE NO ACTION */
-#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */
-/* @} */
-
-
-/** Data structure for a database table. Most fields will be
-initialized to 0, NULL or FALSE in dict_mem_table_create(). */
-struct dict_table_struct{
- dulint id; /*!< id of the table */
- mem_heap_t* heap; /*!< memory heap */
- const char* name; /*!< table name */
- const char* dir_path_of_temp_table;/*!< NULL or the directory path
- where a TEMPORARY table that was explicitly
- created by a user should be placed if
- innodb_file_per_table is defined in my.cnf;
- in Unix this is usually /tmp/..., in Windows
- temp\... */
- unsigned space:32;
- /*!< space where the clustered index of the
- table is placed */
- unsigned flags:DICT_TF_BITS;/*!< DICT_TF_COMPACT, ... */
- unsigned ibd_file_missing:1;
- /*!< TRUE if this is in a single-table
- tablespace and the .ibd file is missing; then
- we must return in ha_innodb.cc an error if the
- user tries to query such an orphaned table */
- unsigned tablespace_discarded:1;
- /*!< this flag is set TRUE when the user
- calls DISCARD TABLESPACE on this
- table, and reset to FALSE in IMPORT
- TABLESPACE */
- unsigned cached:1;/*!< TRUE if the table object has been added
- to the dictionary cache */
- unsigned n_def:10;/*!< number of columns defined so far */
- unsigned n_cols:10;/*!< number of columns */
- dict_col_t* cols; /*!< array of column descriptions */
- const char* col_names;
- /*!< Column names packed in a character string
- "name1\0name2\0...nameN\0". Until
- the string contains n_cols, it will be
- allocated from a temporary heap. The final
- string will be allocated from table->heap. */
-#ifndef UNIV_HOTBACKUP
- hash_node_t name_hash; /*!< hash chain node */
- hash_node_t id_hash; /*!< hash chain node */
- UT_LIST_BASE_NODE_T(dict_index_t)
- indexes; /*!< list of indexes of the table */
- UT_LIST_BASE_NODE_T(dict_foreign_t)
- foreign_list;/*!< list of foreign key constraints
- in the table; these refer to columns
- in other tables */
- UT_LIST_BASE_NODE_T(dict_foreign_t)
- referenced_list;/*!< list of foreign key constraints
- which refer to this table */
- UT_LIST_NODE_T(dict_table_t)
- table_LRU; /*!< node of the LRU list of tables */
- ulint n_mysql_handles_opened;
- /*!< count of how many handles MySQL has opened
- to this table; dropping of the table is
- NOT allowed until this count gets to zero;
- MySQL does NOT itself check the number of
- open handles at drop */
- ulint n_foreign_key_checks_running;
- /*!< count of how many foreign key check
- operations are currently being performed
- on the table: we cannot drop the table while
- there are foreign key checks running on
- it! */
- trx_id_t query_cache_inv_trx_id;
- /*!< transactions whose trx id is
- smaller than this number are not
- allowed to store to the MySQL query
- cache or retrieve from it; when a trx
- with undo logs commits, it sets this
- to the value of the trx id counter for
- the tables it had an IX lock on */
- UT_LIST_BASE_NODE_T(lock_t)
- locks; /*!< list of locks on the table */
-#ifdef UNIV_DEBUG
- /*----------------------*/
- ibool does_not_fit_in_memory;
- /*!< this field is used to specify in
- simulations tables which are so big
- that disk should be accessed: disk
- access is simulated by putting the
- thread to sleep for a while; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about value TRUE if it has
- to reload the table definition from
- disk */
-#endif /* UNIV_DEBUG */
- /*----------------------*/
- unsigned big_rows:1;
- /*!< flag: TRUE if the maximum length of
- a single row exceeds BIG_ROW_SIZE;
- initialized in dict_table_add_to_cache() */
- /** Statistics for query optimization */
- /* @{ */
- unsigned stat_initialized:1; /*!< TRUE if statistics have
- been calculated the first time
- after database startup or table creation */
- ib_int64_t stat_n_rows;
- /*!< approximate number of rows in the table;
- we periodically calculate new estimates */
- ulint stat_clustered_index_size;
- /*!< approximate clustered index size in
- database pages */
- ulint stat_sum_of_other_index_sizes;
- /*!< other indexes in database pages */
- ulint stat_modified_counter;
- /*!< when a row is inserted, updated,
- or deleted,
- we add 1 to this number; we calculate new
- estimates for the stat_... values for the
- table and the indexes at an interval of 2 GB
- or when about 1 / 16 of table has been
- modified; also when the estimate operation is
- called for MySQL SHOW TABLE STATUS; the
- counter is reset to zero at statistics
- calculation; this counter is not protected by
- any latch, because this is only used for
- heuristics */
- /* @} */
- /*----------------------*/
- /**!< The following fields are used by the
- AUTOINC code. The actual collection of
- tables locked during AUTOINC read/write is
- kept in trx_t. In order to quickly determine
- whether a transaction has locked the AUTOINC
- lock we keep a pointer to the transaction
- here in the autoinc_trx variable. This is to
- avoid acquiring the kernel mutex and scanning
- the vector in trx_t.
-
- When an AUTOINC lock has to wait, the
- corresponding lock instance is created on
- the trx lock heap rather than use the
- pre-allocated instance in autoinc_lock below.*/
- /* @{ */
- lock_t* autoinc_lock;
- /*!< a buffer for an AUTOINC lock
- for this table: we allocate the memory here
- so that individual transactions can get it
- and release it without a need to allocate
- space from the lock heap of the trx:
- otherwise the lock heap would grow rapidly
- if we do a large insert from a select */
- mutex_t autoinc_mutex;
- /*!< mutex protecting the autoincrement
- counter */
- ib_uint64_t autoinc;/*!< autoinc counter value to give to the
- next inserted row */
- ulong n_waiting_or_granted_auto_inc_locks;
- /*!< This counter is used to track the number
- of granted and pending autoinc locks on this
- table. This value is set after acquiring the
- kernel mutex but we peek the contents to
- determine whether other transactions have
- acquired the AUTOINC lock or not. Of course
- only one transaction can be granted the
- lock but there can be multiple waiters. */
- const trx_t* autoinc_trx;
- /*!< The transaction that currently holds the
- the AUTOINC lock on this table. */
- /* @} */
- /*----------------------*/
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
- ulint magic_n;/*!< magic number */
-/** Value of dict_table_struct::magic_n */
-# define DICT_TABLE_MAGIC_N 76333786
-#endif /* UNIV_DEBUG */
-};
-
-#ifndef UNIV_NONINL
-#include "dict0mem.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/dict0mem.ic b/storage/innodb_plugin/include/dict0mem.ic
deleted file mode 100644
index c36adb07a18..00000000000
--- a/storage/innodb_plugin/include/dict0mem.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/dict0mem.ic
-Data dictionary memory object creation
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-
diff --git a/storage/innodb_plugin/include/dict0types.h b/storage/innodb_plugin/include/dict0types.h
deleted file mode 100644
index 7ad69193cc9..00000000000
--- a/storage/innodb_plugin/include/dict0types.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0types.h
-Data dictionary global types
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0types_h
-#define dict0types_h
-
-typedef struct dict_sys_struct dict_sys_t;
-typedef struct dict_col_struct dict_col_t;
-typedef struct dict_field_struct dict_field_t;
-typedef struct dict_index_struct dict_index_t;
-typedef struct dict_table_struct dict_table_t;
-typedef struct dict_foreign_struct dict_foreign_t;
-
-/* A cluster object is a table object with the type field set to
-DICT_CLUSTERED */
-
-typedef dict_table_t dict_cluster_t;
-
-typedef struct ind_node_struct ind_node_t;
-typedef struct tab_node_struct tab_node_t;
-
-/* Space id and page no where the dictionary header resides */
-#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
-#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
-
-#endif
diff --git a/storage/innodb_plugin/include/dyn0dyn.h b/storage/innodb_plugin/include/dyn0dyn.h
deleted file mode 100644
index 121a5946ac7..00000000000
--- a/storage/innodb_plugin/include/dyn0dyn.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.h
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dyn0dyn_h
-#define dyn0dyn_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "mem0mem.h"
-
-/** A block in a dynamically allocated array */
-typedef struct dyn_block_struct dyn_block_t;
-/** Dynamically allocated array */
-typedef dyn_block_t dyn_array_t;
-
-
-/** This is the initial 'payload' size of a dynamic array;
-this must be > MLOG_BUF_MARGIN + 30! */
-#define DYN_ARRAY_DATA_SIZE 512
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
- dyn_array_t* arr); /*!< in: pointer to a memory buffer of
- size sizeof(dyn_array_t) */
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
- dyn_array_t* arr); /*!< in: dyn array */
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size); /*!< in: size in bytes of the buffer; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
- dyn_array_t* arr, /*!< in: dynamic array */
- byte* ptr); /*!< in: buffer space from ptr up was not used */
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to
-the added element. The caller must copy the element to
-the pointer returned.
-@return pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size); /*!< in: size in bytes of the element */
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
- dyn_array_t* arr, /*!< in: dyn array */
- ulint pos); /*!< in: position of element as bytes
- from array start */
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
- dyn_array_t* arr); /*!< in: dyn array */
-/************************************************************//**
-Gets the first block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_first_block(
-/*======================*/
- dyn_array_t* arr); /*!< in: dyn array */
-/************************************************************//**
-Gets the last block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_last_block(
-/*=====================*/
- dyn_array_t* arr); /*!< in: dyn array */
-/********************************************************************//**
-Gets the next block in a dyn array.
-@return pointer to next, NULL if end of list */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_next_block(
-/*=====================*/
- dyn_array_t* arr, /*!< in: dyn array */
- dyn_block_t* block); /*!< in: dyn array block */
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
- dyn_block_t* block); /*!< in: dyn array block */
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
- dyn_block_t* block); /*!< in: dyn array block */
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
- dyn_array_t* arr, /*!< in: dyn array */
- const byte* str, /*!< in: string to write */
- ulint len); /*!< in: string length */
-
-/*#################################################################*/
-
-/** @brief A block in a dynamically allocated array.
-NOTE! Do not access the fields of the struct directly: the definition
-appears here only for the compiler to know its size! */
-struct dyn_block_struct{
- mem_heap_t* heap; /*!< in the first block this is != NULL
- if dynamic allocation has been needed */
- ulint used; /*!< number of data bytes used in this block;
- DYN_BLOCK_FULL_FLAG is set when the block
- becomes full */
- byte data[DYN_ARRAY_DATA_SIZE];
- /*!< storage for array elements */
- UT_LIST_BASE_NODE_T(dyn_block_t) base;
- /*!< linear list of dyn blocks: this node is
- used only in the first block */
- UT_LIST_NODE_T(dyn_block_t) list;
- /*!< linear list node: used in all blocks */
-#ifdef UNIV_DEBUG
- ulint buf_end;/*!< only in the debug version: if dyn
- array is opened, this is the buffer
- end offset, else this is 0 */
- ulint magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
-#endif
-};
-
-
-#ifndef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/dyn0dyn.ic b/storage/innodb_plugin/include/dyn0dyn.ic
deleted file mode 100644
index 110e674abff..00000000000
--- a/storage/innodb_plugin/include/dyn0dyn.ic
+++ /dev/null
@@ -1,365 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.ic
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-/** Value of dyn_block_struct::magic_n */
-#define DYN_BLOCK_MAGIC_N 375767
-/** Flag for dyn_block_struct::used that indicates a full block */
-#define DYN_BLOCK_FULL_FLAG 0x1000000UL
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
- dyn_array_t* arr); /*!< in: dyn array */
-
-
-/************************************************************//**
-Gets the first block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_first_block(
-/*======================*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- return(arr);
-}
-
-/************************************************************//**
-Gets the last block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_last_block(
-/*=====================*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- if (arr->heap == NULL) {
-
- return(arr);
- }
-
- return(UT_LIST_GET_LAST(arr->base));
-}
-
-/********************************************************************//**
-Gets the next block in a dyn array.
-@return pointer to next, NULL if end of list */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_next_block(
-/*=====================*/
- dyn_array_t* arr, /*!< in: dyn array */
- dyn_block_t* block) /*!< in: dyn array block */
-{
- ut_ad(arr && block);
-
- if (arr->heap == NULL) {
- ut_ad(arr == block);
-
- return(NULL);
- }
-
- return(UT_LIST_GET_NEXT(list, block));
-}
-
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
- dyn_block_t* block) /*!< in: dyn array block */
-{
- ut_ad(block);
-
- return((block->used) & ~DYN_BLOCK_FULL_FLAG);
-}
-
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
- dyn_block_t* block) /*!< in: dyn array block */
-{
- ut_ad(block);
-
- return(block->data);
-}
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
- dyn_array_t* arr) /*!< in: pointer to a memory buffer of
- size sizeof(dyn_array_t) */
-{
- ut_ad(arr);
-#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
-# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
-#endif
-
- arr->heap = NULL;
- arr->used = 0;
-
-#ifdef UNIV_DEBUG
- arr->buf_end = 0;
- arr->magic_n = DYN_BLOCK_MAGIC_N;
-#endif
- return(arr);
-}
-
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- if (arr->heap != NULL) {
- mem_heap_free(arr->heap);
- }
-
-#ifdef UNIV_DEBUG
- arr->magic_n = 0;
-#endif
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to the added element.
-The caller must copy the element to the pointer returned.
-@return pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size) /*!< in: size in bytes of the element */
-{
- dyn_block_t* block;
- ulint used;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
- ut_ad(size <= DYN_ARRAY_DATA_SIZE);
- ut_ad(size);
-
- block = arr;
- used = block->used;
-
- if (used + size > DYN_ARRAY_DATA_SIZE) {
- /* Get the last array block */
-
- block = dyn_array_get_last_block(arr);
- used = block->used;
-
- if (used + size > DYN_ARRAY_DATA_SIZE) {
- block = dyn_array_add_block(arr);
- used = block->used;
- }
- }
-
- block->used = used + size;
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
- return((block->data) + used);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size) /*!< in: size in bytes of the buffer; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-{
- dyn_block_t* block;
- ulint used;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
- ut_ad(size <= DYN_ARRAY_DATA_SIZE);
- ut_ad(size);
-
- block = arr;
- used = block->used;
-
- if (used + size > DYN_ARRAY_DATA_SIZE) {
- /* Get the last array block */
-
- block = dyn_array_get_last_block(arr);
- used = block->used;
-
- if (used + size > DYN_ARRAY_DATA_SIZE) {
- block = dyn_array_add_block(arr);
- used = block->used;
- ut_a(size <= DYN_ARRAY_DATA_SIZE);
- }
- }
-
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-#ifdef UNIV_DEBUG
- ut_ad(arr->buf_end == 0);
-
- arr->buf_end = used + size;
-#endif
- return((block->data) + used);
-}
-
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
- dyn_array_t* arr, /*!< in: dynamic array */
- byte* ptr) /*!< in: buffer space from ptr up was not used */
-{
- dyn_block_t* block;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- block = dyn_array_get_last_block(arr);
-
- ut_ad(arr->buf_end + block->data >= ptr);
-
- block->used = ptr - block->data;
-
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
-#ifdef UNIV_DEBUG
- arr->buf_end = 0;
-#endif
-}
-
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
- dyn_array_t* arr, /*!< in: dyn array */
- ulint pos) /*!< in: position of element as bytes
- from array start */
-{
- dyn_block_t* block;
- ulint used;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- /* Get the first array block */
- block = dyn_array_get_first_block(arr);
-
- if (arr->heap != NULL) {
- used = dyn_block_get_used(block);
-
- while (pos >= used) {
- pos -= used;
- block = UT_LIST_GET_NEXT(list, block);
- ut_ad(block);
-
- used = dyn_block_get_used(block);
- }
- }
-
- ut_ad(block);
- ut_ad(dyn_block_get_used(block) >= pos);
-
- return(block->data + pos);
-}
-
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- dyn_block_t* block;
- ulint sum = 0;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- if (arr->heap == NULL) {
-
- return(arr->used);
- }
-
- /* Get the first array block */
- block = dyn_array_get_first_block(arr);
-
- while (block != NULL) {
- sum += dyn_block_get_used(block);
- block = dyn_array_get_next_block(arr, block);
- }
-
- return(sum);
-}
-
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
- dyn_array_t* arr, /*!< in: dyn array */
- const byte* str, /*!< in: string to write */
- ulint len) /*!< in: string length */
-{
- ulint n_copied;
-
- while (len > 0) {
- if (len > DYN_ARRAY_DATA_SIZE) {
- n_copied = DYN_ARRAY_DATA_SIZE;
- } else {
- n_copied = len;
- }
-
- memcpy(dyn_array_push(arr, n_copied), str, n_copied);
-
- str += n_copied;
- len -= n_copied;
- }
-}
diff --git a/storage/innodb_plugin/include/eval0eval.h b/storage/innodb_plugin/include/eval0eval.h
deleted file mode 100644
index 60aefd8d453..00000000000
--- a/storage/innodb_plugin/include/eval0eval.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/eval0eval.h
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef eval0eval_h
-#define eval0eval_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-
-/*****************************************************************//**
-Free the buffer from global dynamic memory for a value of a que_node,
-if it has been allocated in the above function. The freeing for pushed
-column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
-void
-eval_node_free_val_buf(
-/*===================*/
- que_node_t* node); /*!< in: query graph node */
-/*****************************************************************//**
-Evaluates a symbol table symbol. */
-UNIV_INLINE
-void
-eval_sym(
-/*=====*/
- sym_node_t* sym_node); /*!< in: symbol table node */
-/*****************************************************************//**
-Evaluates an expression. */
-UNIV_INLINE
-void
-eval_exp(
-/*=====*/
- que_node_t* exp_node); /*!< in: expression */
-/*****************************************************************//**
-Sets an integer value as the value of an expression node. */
-UNIV_INLINE
-void
-eval_node_set_int_val(
-/*==================*/
- que_node_t* node, /*!< in: expression node */
- lint val); /*!< in: value to set */
-/*****************************************************************//**
-Gets an integer value from an expression node.
-@return integer value */
-UNIV_INLINE
-lint
-eval_node_get_int_val(
-/*==================*/
- que_node_t* node); /*!< in: expression node */
-/*****************************************************************//**
-Copies a binary string value as the value of a query graph node. Allocates a
-new buffer if necessary. */
-UNIV_INLINE
-void
-eval_node_copy_and_alloc_val(
-/*=========================*/
- que_node_t* node, /*!< in: query graph node */
- const byte* str, /*!< in: binary string */
- ulint len); /*!< in: string length or UNIV_SQL_NULL */
-/*****************************************************************//**
-Copies a query node value to another node. */
-UNIV_INLINE
-void
-eval_node_copy_val(
-/*===============*/
- que_node_t* node1, /*!< in: node to copy to */
- que_node_t* node2); /*!< in: node to copy from */
-/*****************************************************************//**
-Gets a iboolean value from a query node.
-@return iboolean value */
-UNIV_INLINE
-ibool
-eval_node_get_ibool_val(
-/*====================*/
- que_node_t* node); /*!< in: query graph node */
-/*****************************************************************//**
-Evaluates a comparison node.
-@return the result of the comparison */
-UNIV_INTERN
-ibool
-eval_cmp(
-/*=====*/
- func_node_t* cmp_node); /*!< in: comparison node */
-
-
-#ifndef UNIV_NONINL
-#include "eval0eval.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/eval0eval.ic b/storage/innodb_plugin/include/eval0eval.ic
deleted file mode 100644
index fe767f39b00..00000000000
--- a/storage/innodb_plugin/include/eval0eval.ic
+++ /dev/null
@@ -1,251 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/eval0eval.ic
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-#include "rem0cmp.h"
-#include "pars0grm.h"
-
-/*****************************************************************//**
-Evaluates a function node. */
-UNIV_INTERN
-void
-eval_func(
-/*======*/
- func_node_t* func_node); /*!< in: function node */
-/*****************************************************************//**
-Allocate a buffer from global dynamic memory for a value of a que_node.
-NOTE that this memory must be explicitly freed when the query graph is
-freed. If the node already has allocated buffer, that buffer is freed
-here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field.
-@return pointer to allocated buffer */
-UNIV_INTERN
-byte*
-eval_node_alloc_val_buf(
-/*====================*/
- que_node_t* node, /*!< in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size); /*!< in: buffer size */
-
-
-/*****************************************************************//**
-Allocates a new buffer if needed.
-@return pointer to buffer */
-UNIV_INLINE
-byte*
-eval_node_ensure_val_buf(
-/*=====================*/
- que_node_t* node, /*!< in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size) /*!< in: buffer size */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
- dfield_set_len(dfield, size);
-
- data = dfield_get_data(dfield);
-
- if (!data || que_node_get_val_buf_size(node) < size) {
-
- data = eval_node_alloc_val_buf(node, size);
- }
-
- return(data);
-}
-
-/*****************************************************************//**
-Evaluates a symbol table symbol. */
-UNIV_INLINE
-void
-eval_sym(
-/*=====*/
- sym_node_t* sym_node) /*!< in: symbol table node */
-{
-
- ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
-
- if (sym_node->indirection) {
- /* The symbol table node is an alias for a variable or a
- column */
-
- dfield_copy_data(que_node_get_val(sym_node),
- que_node_get_val(sym_node->indirection));
- }
-}
-
-/*****************************************************************//**
-Evaluates an expression. */
-UNIV_INLINE
-void
-eval_exp(
-/*=====*/
- que_node_t* exp_node) /*!< in: expression */
-{
- if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
-
- eval_sym((sym_node_t*)exp_node);
-
- return;
- }
-
- eval_func(exp_node);
-}
-
-/*****************************************************************//**
-Sets an integer value as the value of an expression node. */
-UNIV_INLINE
-void
-eval_node_set_int_val(
-/*==================*/
- que_node_t* node, /*!< in: expression node */
- lint val) /*!< in: value to set */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
-
- data = dfield_get_data(dfield);
-
- if (data == NULL) {
- data = eval_node_alloc_val_buf(node, 4);
- }
-
- ut_ad(dfield_get_len(dfield) == 4);
-
- mach_write_to_4(data, (ulint)val);
-}
-
-/*****************************************************************//**
-Gets an integer non-SQL null value from an expression node.
-@return integer value */
-UNIV_INLINE
-lint
-eval_node_get_int_val(
-/*==================*/
- que_node_t* node) /*!< in: expression node */
-{
- dfield_t* dfield;
-
- dfield = que_node_get_val(node);
-
- ut_ad(dfield_get_len(dfield) == 4);
-
- return((int)mach_read_from_4(dfield_get_data(dfield)));
-}
-
-/*****************************************************************//**
-Gets a iboolean value from a query node.
-@return iboolean value */
-UNIV_INLINE
-ibool
-eval_node_get_ibool_val(
-/*====================*/
- que_node_t* node) /*!< in: query graph node */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
-
- data = dfield_get_data(dfield);
-
- ut_ad(data != NULL);
-
- return(mach_read_from_1(data));
-}
-
-/*****************************************************************//**
-Sets a iboolean value as the value of a function node. */
-UNIV_INLINE
-void
-eval_node_set_ibool_val(
-/*====================*/
- func_node_t* func_node, /*!< in: function node */
- ibool val) /*!< in: value to set */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(func_node);
-
- data = dfield_get_data(dfield);
-
- if (data == NULL) {
- /* Allocate 1 byte to hold the value */
-
- data = eval_node_alloc_val_buf(func_node, 1);
- }
-
- ut_ad(dfield_get_len(dfield) == 1);
-
- mach_write_to_1(data, val);
-}
-
-/*****************************************************************//**
-Copies a binary string value as the value of a query graph node. Allocates a
-new buffer if necessary. */
-UNIV_INLINE
-void
-eval_node_copy_and_alloc_val(
-/*=========================*/
- que_node_t* node, /*!< in: query graph node */
- const byte* str, /*!< in: binary string */
- ulint len) /*!< in: string length or UNIV_SQL_NULL */
-{
- byte* data;
-
- if (len == UNIV_SQL_NULL) {
- dfield_set_len(que_node_get_val(node), len);
-
- return;
- }
-
- data = eval_node_ensure_val_buf(node, len);
-
- ut_memcpy(data, str, len);
-}
-
-/*****************************************************************//**
-Copies a query node value to another node. */
-UNIV_INLINE
-void
-eval_node_copy_val(
-/*===============*/
- que_node_t* node1, /*!< in: node to copy to */
- que_node_t* node2) /*!< in: node to copy from */
-{
- dfield_t* dfield2;
-
- dfield2 = que_node_get_val(node2);
-
- eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
- dfield_get_len(dfield2));
-}
diff --git a/storage/innodb_plugin/include/eval0proc.h b/storage/innodb_plugin/include/eval0proc.h
deleted file mode 100644
index 13e2e365320..00000000000
--- a/storage/innodb_plugin/include/eval0proc.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/eval0proc.h
-Executes SQL stored procedures and their control structures
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#ifndef eval0proc_h
-#define eval0proc_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-
-/**********************************************************************//**
-Performs an execution step of a procedure node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-proc_step(
-/*======*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of an if-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-if_step(
-/*====*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of a while-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-while_step(
-/*=======*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of a for-loop node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-for_step(
-/*=====*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of an assignment statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-assign_step(
-/*========*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of a procedure call node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-proc_eval_step(
-/*===========*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of an exit statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-exit_step(
-/*======*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of a return-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-return_step(
-/*========*/
- que_thr_t* thr); /*!< in: query thread */
-
-
-#ifndef UNIV_NONINL
-#include "eval0proc.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/eval0proc.ic b/storage/innodb_plugin/include/eval0proc.ic
deleted file mode 100644
index c602af0a694..00000000000
--- a/storage/innodb_plugin/include/eval0proc.ic
+++ /dev/null
@@ -1,88 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/eval0proc.ic
-Executes SQL stored procedures and their control structures
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#include "pars0pars.h"
-#include "que0que.h"
-#include "eval0eval.h"
-
-/**********************************************************************//**
-Performs an execution step of a procedure node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-proc_step(
-/*======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- proc_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- /* Start execution from the first statement in the statement
- list */
-
- thr->run_node = node->stat_list;
- } else {
- /* Move to the next statement */
- ut_ad(que_node_get_next(thr->prev_node) == NULL);
-
- thr->run_node = NULL;
- }
-
- if (thr->run_node == NULL) {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of a procedure call node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-proc_eval_step(
-/*===========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- func_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
-
- /* Evaluate the procedure */
-
- eval_exp(node);
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
diff --git a/storage/innodb_plugin/include/fil0fil.h b/storage/innodb_plugin/include/fil0fil.h
deleted file mode 100644
index 74d0fbcdacd..00000000000
--- a/storage/innodb_plugin/include/fil0fil.h
+++ /dev/null
@@ -1,723 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fil0fil.h
-The low-level file system
-
-Created 10/25/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef fil0fil_h
-#define fil0fil_h
-
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-#include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "dict0types.h"
-#include "ut0byte.h"
-#include "os0file.h"
-
-/** When mysqld is run, the default directory "." is the mysqld datadir,
-but in the MySQL Embedded Server Library and ibbackup it is not the default
-directory, and we must set the base file path explicitly */
-extern const char* fil_path_to_mysql_datadir;
-
-/** Initial size of a single-table tablespace in pages */
-#define FIL_IBD_FILE_INITIAL_SIZE 4
-
-/** 'null' (undefined) page offset in the context of file spaces */
-#define FIL_NULL ULINT32_UNDEFINED
-
-/* Space address data type; this is intended to be used when
-addresses accurate to a byte are stored in file pages. If the page part
-of the address is FIL_NULL, the address is considered undefined. */
-
-typedef byte fil_faddr_t; /*!< 'type' definition in C: an address
- stored in a file page is a string of bytes */
-#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
-#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
-
-#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
-
-/** A struct for storing a space address FIL_ADDR, when it is used
-in C program data structures. */
-
-typedef struct fil_addr_struct fil_addr_t;
-/** File space address */
-struct fil_addr_struct{
- ulint page; /*!< page number within a space */
- ulint boffset; /*!< byte offset within the page */
-};
-
-/** The null file address */
-extern fil_addr_t fil_addr_null;
-
-/** The byte offsets on a file page for various variables @{ */
-#define FIL_PAGE_SPACE_OR_CHKSUM 0 /*!< in < MySQL-4.0.14 space id the
- page belongs to (== 0) but in later
- versions the 'new' checksum of the
- page */
-#define FIL_PAGE_OFFSET 4 /*!< page offset inside space */
-#define FIL_PAGE_PREV 8 /*!< if there is a 'natural'
- predecessor of the page, its
- offset. Otherwise FIL_NULL.
- This field is not set on BLOB
- pages, which are stored as a
- singly-linked list. See also
- FIL_PAGE_NEXT. */
-#define FIL_PAGE_NEXT 12 /*!< if there is a 'natural' successor
- of the page, its offset.
- Otherwise FIL_NULL.
- B-tree index pages
- (FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
- on the same PAGE_LEVEL are maintained
- as a doubly linked list via
- FIL_PAGE_PREV and FIL_PAGE_NEXT
- in the collation order of the
- smallest user record on each page. */
-#define FIL_PAGE_LSN 16 /*!< lsn of the end of the newest
- modification log record to the page */
-#define FIL_PAGE_TYPE 24 /*!< file page type: FIL_PAGE_INDEX,...,
- 2 bytes.
-
- The contents of this field can only
- be trusted in the following case:
- if the page is an uncompressed
- B-tree index page, then it is
- guaranteed that the value is
- FIL_PAGE_INDEX.
- The opposite does not hold.
-
- In tablespaces created by
- MySQL/InnoDB 5.1.7 or later, the
- contents of this field is valid
- for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the
- first page in a data file: the file
- has been flushed to disk at least up
- to this lsn */
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
- contains the space id of the page */
-#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
-/* @} */
-/** File page trailer @{ */
-#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
- to store the page checksum, the
- last 4 bytes should be identical
- to the last 4 bytes of FIL_PAGE_LSN */
-#define FIL_PAGE_DATA_END 8 /*!< size of the page trailer */
-/* @} */
-
-/** File page types (values of FIL_PAGE_TYPE) @{ */
-#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
-#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
-#define FIL_PAGE_INODE 3 /*!< Index node */
-#define FIL_PAGE_IBUF_FREE_LIST 4 /*!< Insert buffer free list */
-/* File page types introduced in MySQL/InnoDB 5.1.7 */
-#define FIL_PAGE_TYPE_ALLOCATED 0 /*!< Freshly allocated page */
-#define FIL_PAGE_IBUF_BITMAP 5 /*!< Insert buffer bitmap */
-#define FIL_PAGE_TYPE_SYS 6 /*!< System page */
-#define FIL_PAGE_TYPE_TRX_SYS 7 /*!< Transaction system data */
-#define FIL_PAGE_TYPE_FSP_HDR 8 /*!< File space header */
-#define FIL_PAGE_TYPE_XDES 9 /*!< Extent descriptor page */
-#define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */
-#define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */
-#define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */
-/* @} */
-
-/** Space types @{ */
-#define FIL_TABLESPACE 501 /*!< tablespace */
-#define FIL_LOG 502 /*!< redo log */
-/* @} */
-
-/** The number of fsyncs done to the log */
-extern ulint fil_n_log_flushes;
-
-/** Number of pending redo log flushes */
-extern ulint fil_n_pending_log_flushes;
-/** Number of pending tablespace flushes */
-extern ulint fil_n_pending_tablespace_flushes;
-
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Returns the latch of a file space.
-@return latch protecting storage allocation */
-UNIV_INTERN
-rw_lock_t*
-fil_space_get_latch(
-/*================*/
- ulint id, /*!< in: space id */
- ulint* zip_size);/*!< out: compressed page size, or
- 0 for uncompressed tablespaces */
-/*******************************************************************//**
-Returns the type of a file space.
-@return FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
-fil_space_get_type(
-/*===============*/
- ulint id); /*!< in: space id */
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed. */
-UNIV_INTERN
-void
-fil_node_create(
-/*============*/
- const char* name, /*!< in: file name (file must be closed) */
- ulint size, /*!< in: file size in database blocks, rounded
- downwards to an integer */
- ulint id, /*!< in: space id where to append */
- ibool is_raw);/*!< in: TRUE if a raw device or
- a raw disk partition */
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
-void
-fil_space_truncate_start(
-/*=====================*/
- ulint id, /*!< in: space id */
- ulint trunc_len); /*!< in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
-#endif /* UNIV_LOG_ARCHIVE */
-/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table. If
-there is an error, prints an error message to the .err log.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_space_create(
-/*=============*/
- const char* name, /*!< in: space name */
- ulint id, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or
- 0 for uncompressed tablespaces */
- ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
-/*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return space size, 0 if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_size(
-/*===============*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Returns the flags of the space. The tablespace must be cached
-in the memory cache.
-@return flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_flags(
-/*================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint page_no);/*!< in: page number */
-/****************************************************************//**
-Initializes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_init(
-/*=====*/
- ulint hash_size, /*!< in: hash table size */
- ulint max_n_open); /*!< in: max number of open files */
-/*******************************************************************//**
-Initializes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_close(void);
-/*===========*/
-/*******************************************************************//**
-Opens all log files and system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the log and the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
-void
-fil_open_log_and_system_tablespace_files(void);
-/*==========================================*/
-/*******************************************************************//**
-Closes all open files. There must not be any pending i/o's or not flushed
-modifications in the files. */
-UNIV_INTERN
-void
-fil_close_all_files(void);
-/*=====================*/
-/*******************************************************************//**
-Sets the max tablespace id counter if the given number is bigger than the
-previous value. */
-UNIV_INTERN
-void
-fil_set_max_space_id_if_bigger(
-/*===========================*/
- ulint max_id);/*!< in: maximum known id */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-fil_write_flushed_lsn_to_data_files(
-/*================================*/
- ib_uint64_t lsn, /*!< in: lsn to write */
- ulint arch_log_no); /*!< in: latest archived log
- file number */
-/*******************************************************************//**
-Reads the flushed lsn and arch no fields from a data file at database
-startup. */
-UNIV_INTERN
-void
-fil_read_flushed_lsn_and_arch_log_no(
-/*=================================*/
- os_file_t data_file, /*!< in: open data file */
- ibool one_read_already, /*!< in: TRUE if min and max
- parameters below already
- contain sensible data */
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no, /*!< in/out: */
- ulint* max_arch_log_no, /*!< in/out: */
-#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t* min_flushed_lsn, /*!< in/out: */
- ib_uint64_t* max_flushed_lsn); /*!< in/out: */
-/*******************************************************************//**
-Increments the count of pending insert buffer page merges, if space is not
-being deleted.
-@return TRUE if being deleted, and ibuf merges should be skipped */
-UNIV_INTERN
-ibool
-fil_inc_pending_ibuf_merges(
-/*========================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Decrements the count of pending insert buffer page merges. */
-UNIV_INTERN
-void
-fil_decr_pending_ibuf_merges(
-/*=========================*/
- ulint id); /*!< in: space id */
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
-datadir that we should use in replaying the file operations.
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
- byte* ptr, /*!< in: buffer containing the log record body,
- or an initial segment of it, if the record does
- not fir completely between ptr and end_ptr */
- byte* end_ptr, /*!< in: buffer end */
- ulint type, /*!< in: the type of this log record */
- ulint space_id, /*!< in: the space id of the tablespace in
- question, or 0 if the log record should
- only be parsed but not replayed */
- ulint log_flags); /*!< in: redo log flags
- (stored in the page number parameter) */
-/*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_delete_tablespace(
-/*==================*/
- ulint id); /*!< in: space id */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_discard_tablespace(
-/*===================*/
- ulint id); /*!< in: space id */
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_rename_tablespace(
-/*==================*/
- const char* old_name, /*!< in: old table name in the standard
- databasename/tablename format of
- InnoDB, or NULL if we do the rename
- based on the space id only */
- ulint id, /*!< in: space id */
- const char* new_name); /*!< in: new table name in the standard
- databasename/tablename format
- of InnoDB */
-
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-fil_create_new_single_table_tablespace(
-/*===================================*/
- ulint* space_id, /*!< in/out: space id; if this is != 0,
- then this is an input parameter,
- otherwise output */
- const char* tablename, /*!< in: the table name in the usual
- databasename/tablename format
- of InnoDB, or a dir path to a temp
- table */
- ibool is_temp, /*!< in: TRUE if a table created with
- CREATE TEMPORARY TABLE */
- ulint flags, /*!< in: tablespace flags */
- ulint size); /*!< in: the initial size of the
- tablespace file in pages,
- must be >= FIL_IBD_FILE_INITIAL_SIZE */
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Tries to open a single-table tablespace and optionally checks the space id is
-right in it. If does not succeed, prints an error message to the .err log. This
-function is used to open a tablespace when we start up mysqld, and also in
-IMPORT TABLESPACE.
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_open_single_table_tablespace(
-/*=============================*/
- ibool check_space_id, /*!< in: should we check that the space
- id in the file is right; we assume
- that this function runs much faster
- if no check is made, since accessing
- the file inode probably is much
- faster (the OS caches them) than
- accessing the first page of the file */
- ulint id, /*!< in: space id */
- ulint flags, /*!< in: tablespace flags */
- const char* name); /*!< in: table name in the
- databasename/tablename format */
-/********************************************************************//**
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
- const char* name, /*!< in: table name in the
- databasename/tablename format */
- ib_uint64_t current_lsn); /*!< in: reset lsn's if the lsn stamped
- to FIL_PAGE_FILE_FLUSH_LSN in the
- first page is too high */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-fil_load_single_table_tablespaces(void);
-/*===================================*/
-/********************************************************************//**
-If we need crash recovery, and we have called
-fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
-we can call this function to print an error message of orphaned .ibd files
-for which there is not a data dictionary entry with a matching table name
-and space id. */
-UNIV_INTERN
-void
-fil_print_orphaned_tablespaces(void);
-/*================================*/
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return TRUE if does not exist or is being\ deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
- ulint id, /*!< in: space id */
- ib_int64_t version);/*!< in: tablespace_version should be this; if
- you pass -1 as the value of this, then this
- parameter is ignored */
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
- ulint id); /*!< in: space id */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache.
-@return TRUE if a matching tablespace exists in the memory cache */
-UNIV_INTERN
-ibool
-fil_space_for_table_exists_in_mem(
-/*==============================*/
- ulint id, /*!< in: space id */
- const char* name, /*!< in: table name in the standard
- 'databasename/tablename' format or
- the dir path to a temp table */
- ibool is_temp, /*!< in: TRUE if created with CREATE
- TEMPORARY TABLE */
- ibool mark_space, /*!< in: in crash recovery, at database
- startup we mark all spaces which have
- an associated table in the InnoDB
- data dictionary, so that
- we can print a warning about orphaned
- tablespaces */
- ibool print_error_if_does_not_exist);
- /*!< in: print detailed error
- information to the .err log if a
- matching tablespace is not found from
- memory */
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-ibbackup --apply-log phase we extended the spaces on-demand so that log records
-could be appllied, but that may have left spaces still too small compared to
-the size stored in the space header. */
-UNIV_INTERN
-void
-fil_extend_tablespaces_to_stored_len(void);
-/*======================================*/
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
- ulint* actual_size, /*!< out: size of the space after extension;
- if we ran out of disk space this may be lower
- than the desired size */
- ulint space_id, /*!< in: space id */
- ulint size_after_extend);/*!< in: desired size in pages after the
- extension; if the current space size is bigger
- than this already, the function does nothing */
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return TRUE if succeed */
-UNIV_INTERN
-ibool
-fil_space_reserve_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_free_now, /*!< in: number of free extents now */
- ulint n_to_reserve); /*!< in: how many one wants to reserve */
-/*******************************************************************//**
-Releases free extents in a file space. */
-UNIV_INTERN
-void
-fil_space_release_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_reserved); /*!< in: how many one reserved */
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-UNIV_INTERN
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
- ulint id); /*!< in: space id */
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
-ulint
-fil_io(
-/*===*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- ibool sync, /*!< in: TRUE if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
- void* buf, /*!< in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message); /*!< in: message for aio handler if non-sync
- aio used, else ignored */
-/**********************************************************************//**
-Waits for an aio operation to complete. This function is used to write the
-handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.c for more info). The thread specifies which
-segment it wants to wait for. */
-UNIV_INTERN
-void
-fil_aio_wait(
-/*=========*/
- ulint segment); /*!< in: the number of the segment in the aio
- array to wait for */
-/**********************************************************************//**
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-UNIV_INTERN
-void
-fil_flush(
-/*======*/
- ulint space_id); /*!< in: file space id (this can be a group of
- log files or a tablespace of the database) */
-/**********************************************************************//**
-Flushes to disk writes in file spaces of the given type possibly cached by
-the OS. */
-UNIV_INTERN
-void
-fil_flush_file_spaces(
-/*==================*/
- ulint purpose); /*!< in: FIL_TABLESPACE, FIL_LOG */
-/******************************************************************//**
-Checks the consistency of the tablespace cache.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fil_validate(void);
-/*==============*/
-/********************************************************************//**
-Returns TRUE if file address is undefined.
-@return TRUE if undefined */
-UNIV_INTERN
-ibool
-fil_addr_is_null(
-/*=============*/
- fil_addr_t addr); /*!< in: address */
-/********************************************************************//**
-Get the predecessor of a file page.
-@return FIL_PAGE_PREV */
-UNIV_INTERN
-ulint
-fil_page_get_prev(
-/*==============*/
- const byte* page); /*!< in: file page */
-/********************************************************************//**
-Get the successor of a file page.
-@return FIL_PAGE_NEXT */
-UNIV_INTERN
-ulint
-fil_page_get_next(
-/*==============*/
- const byte* page); /*!< in: file page */
-/*********************************************************************//**
-Sets the file page type. */
-UNIV_INTERN
-void
-fil_page_set_type(
-/*==============*/
- byte* page, /*!< in/out: file page */
- ulint type); /*!< in: type */
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
-ulint
-fil_page_get_type(
-/*==============*/
- const byte* page); /*!< in: file page */
-
-
-typedef struct fil_space_struct fil_space_t;
-
-#endif
diff --git a/storage/innodb_plugin/include/fsp0fsp.h b/storage/innodb_plugin/include/fsp0fsp.h
deleted file mode 100644
index 7abd3914eda..00000000000
--- a/storage/innodb_plugin/include/fsp0fsp.h
+++ /dev/null
@@ -1,359 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fsp0fsp.h
-File space management
-
-Created 12/18/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef fsp0fsp_h
-#define fsp0fsp_h
-
-#include "univ.i"
-
-#include "mtr0mtr.h"
-#include "fut0lst.h"
-#include "ut0byte.h"
-#include "page0types.h"
-#include "fsp0types.h"
-
-/**********************************************************************//**
-Initializes the file space system. */
-UNIV_INTERN
-void
-fsp_init(void);
-/*==========*/
-/**********************************************************************//**
-Gets the current free limit of the system tablespace. The free limit
-means the place of the first page which has never been put to the
-free list for allocation. The space above that address is initialized
-to zero. Sets also the global variable log_fsp_current_free_limit.
-@return free limit in megabytes */
-UNIV_INTERN
-ulint
-fsp_header_get_free_limit(void);
-/*===========================*/
-/**********************************************************************//**
-Gets the size of the system tablespace from the tablespace header. If
-we do not have an auto-extending data file, this should be equal to
-the size of the data files. If there is an auto-extending data file,
-this can be smaller.
-@return size in pages */
-UNIV_INTERN
-ulint
-fsp_header_get_tablespace_size(void);
-/*================================*/
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return tablespace size stored in the space header */
-UNIV_INTERN
-ulint
-fsp_get_size_low(
-/*=============*/
- page_t* page); /*!< in: header page (page 0 in the tablespace) */
-/**********************************************************************//**
-Reads the space id from the first page of a tablespace.
-@return space id, ULINT UNDEFINED if error */
-UNIV_INTERN
-ulint
-fsp_header_get_space_id(
-/*====================*/
- const page_t* page); /*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return flags */
-UNIV_INTERN
-ulint
-fsp_header_get_flags(
-/*=================*/
- const page_t* page); /*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
-ulint
-fsp_header_get_zip_size(
-/*====================*/
- const page_t* page); /*!< in: first page of a tablespace */
-/**********************************************************************//**
-Writes the space id and compressed page size to a tablespace header.
-This function is used past the buffer pool when we in fil0fil.c create
-a new single-table tablespace. */
-UNIV_INTERN
-void
-fsp_header_init_fields(
-/*===================*/
- page_t* page, /*!< in/out: first page in the space */
- ulint space_id, /*!< in: space id */
- ulint flags); /*!< in: tablespace flags (FSP_SPACE_FLAGS):
- 0, or table->flags if newer than COMPACT */
-/**********************************************************************//**
-Initializes the space header of a new created space and creates also the
-insert buffer tree root if space == 0. */
-UNIV_INTERN
-void
-fsp_header_init(
-/*============*/
- ulint space, /*!< in: space id */
- ulint size, /*!< in: current size in blocks */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/**********************************************************************//**
-Increases the space size field of a space. */
-UNIV_INTERN
-void
-fsp_header_inc_size(
-/*================*/
- ulint space, /*!< in: space id */
- ulint size_inc,/*!< in: size increment in pages */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-UNIV_INTERN
-buf_block_t*
-fseg_create(
-/*========*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-UNIV_INTERN
-buf_block_t*
-fseg_create_general(
-/*================*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- ibool has_done_reservation, /*!< in: TRUE if the caller has already
- done the reservation for the pages with
- fsp_reserve_free_extents (at least 2 extents: one for
- the inode and the other for the segment) then there is
- no need to do the check for this individual
- operation */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used.
-@return number of reserved pages */
-UNIV_INTERN
-ulint
-fseg_n_reserved_pages(
-/*==================*/
- fseg_header_t* header, /*!< in: segment header */
- ulint* used, /*!< out: number of pages used (<= reserved) */
- mtr_t* mtr); /*!< in: mtr handle */
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize
-file space fragmentation.
-@return the allocated page offset FIL_NULL if no page could be allocated */
-UNIV_INTERN
-ulint
-fseg_alloc_free_page(
-/*=================*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint hint, /*!< in: hint of which page would be desirable */
- byte direction, /*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /*!< in: mtr handle */
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@return allocated page offset, FIL_NULL if no page could be allocated */
-UNIV_INTERN
-ulint
-fseg_alloc_free_page_general(
-/*=========================*/
- fseg_header_t* seg_header,/*!< in: segment header */
- ulint hint, /*!< in: hint of which page would be desirable */
- byte direction,/*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- ibool has_done_reservation, /*!< in: TRUE if the caller has
- already done the reservation for the page
- with fsp_reserve_free_extents, then there
- is no need to do the check for this individual
- page */
- mtr_t* mtr); /*!< in: mtr handle */
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
-use several pages from the tablespace should call this function beforehand
-and reserve enough free extents so that they certainly will be able
-to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
-
-The alloc_type below has the following meaning: FSP_NORMAL means an
-operation which will probably result in more space usage, like an
-insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
-deleting rows, then this allocation will in the long run result in
-less space usage (after a purge); FSP_CLEANING means allocation done
-in a physical record delete (like in a purge) or other cleaning operation
-which will result in less space usage in the long run. We prefer the latter
-two types of allocation: when space is scarce, FSP_NORMAL allocations
-will not succeed, but the latter two allocations will succeed, if possible.
-The purpose is to avoid dead end where the database is full but the
-user cannot free any space because these freeing operations temporarily
-reserve some space.
-
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
-fsp_reserve_free_extents(
-/*=====================*/
- ulint* n_reserved,/*!< out: number of extents actually reserved; if we
- return TRUE and the tablespace size is < 64 pages,
- then this can be 0, otherwise it is n_ext */
- ulint space, /*!< in: space id */
- ulint n_ext, /*!< in: number of extents to reserve */
- ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return available space in kB */
-UNIV_INTERN
-ullint
-fsp_get_available_space_in_free_extents(
-/*====================================*/
- ulint space); /*!< in: space id */
-/**********************************************************************//**
-Frees a single page of a segment. */
-UNIV_INTERN
-void
-fseg_free_page(
-/*===========*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page offset */
- mtr_t* mtr); /*!< in: mtr handle */
-/**********************************************************************//**
-Frees part of a segment. This function can be used to free a segment
-by repeatedly calling this function in different mini-transactions.
-Doing the freeing in a single mini-transaction might result in
-too big a mini-transaction.
-@return TRUE if freeing completed */
-UNIV_INTERN
-ibool
-fseg_free_step(
-/*===========*/
- fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
- resides on the first page of the frag list
- of the segment, this pointer becomes obsolete
- after the last freeing step */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed.
-@return TRUE if freeing completed, except the header page */
-UNIV_INTERN
-ibool
-fseg_free_step_not_header(
-/*======================*/
- fseg_header_t* header, /*!< in: segment header which must reside on
- the first fragment page of the segment */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return TRUE if a descriptor page */
-UNIV_INLINE
-ibool
-fsp_descr_page(
-/*===========*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no);/*!< in: page number */
-/***********************************************************//**
-Parses a redo log record of a file page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr, /*!< in: buffer end */
- buf_block_t* block); /*!< in: block or NULL */
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
- ulint space); /*!< in: space id */
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
- ulint space); /*!< in: space id */
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fseg_validate(
-/*==========*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_BTR_PRINT
-/*******************************************************************//**
-Writes info of a segment. */
-UNIV_INTERN
-void
-fseg_print(
-/*=======*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* UNIV_BTR_PRINT */
-
-#ifndef UNIV_NONINL
-#include "fsp0fsp.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/fsp0fsp.ic b/storage/innodb_plugin/include/fsp0fsp.ic
deleted file mode 100644
index 434c370b527..00000000000
--- a/storage/innodb_plugin/include/fsp0fsp.ic
+++ /dev/null
@@ -1,45 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fsp0fsp.ic
-File space management
-
-Created 12/18/1995 Heikki Tuuri
-*******************************************************/
-
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return TRUE if a descriptor page */
-UNIV_INLINE
-ibool
-fsp_descr_page(
-/*===========*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
- == FSP_XDES_OFFSET));
- }
-
- return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET));
-}
diff --git a/storage/innodb_plugin/include/fsp0types.h b/storage/innodb_plugin/include/fsp0types.h
deleted file mode 100644
index 496081c2346..00000000000
--- a/storage/innodb_plugin/include/fsp0types.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************
-@file include/fsp0types.h
-File space management types
-
-Created May 26, 2009 Vasil Dimov
-*******************************************************/
-
-#ifndef fsp0types_h
-#define fsp0types_h
-
-#include "univ.i"
-
-#include "fil0fil.h" /* for FIL_PAGE_DATA */
-
-/** @name Flags for inserting records in order
-If records are inserted in order, there are the following
-flags to tell this (their type is made byte for the compiler
-to warn if direction and hint parameters are switched in
-fseg_alloc_free_page) */
-/* @{ */
-#define FSP_UP ((byte)111) /*!< alphabetically upwards */
-#define FSP_DOWN ((byte)112) /*!< alphabetically downwards */
-#define FSP_NO_DIR ((byte)113) /*!< no order */
-/* @} */
-
-/** File space extent size (one megabyte) in pages */
-#define FSP_EXTENT_SIZE (1 << (20 - UNIV_PAGE_SIZE_SHIFT))
-
-/** On a page of any file segment, data may be put starting from this
-offset */
-#define FSEG_PAGE_DATA FIL_PAGE_DATA
-
-/** @name File segment header
-The file segment header points to the inode describing the file segment. */
-/* @{ */
-/** Data type for file segment header */
-typedef byte fseg_header_t;
-
-#define FSEG_HDR_SPACE 0 /*!< space id of the inode */
-#define FSEG_HDR_PAGE_NO 4 /*!< page number of the inode */
-#define FSEG_HDR_OFFSET 8 /*!< byte offset of the inode */
-
-#define FSEG_HEADER_SIZE 10 /*!< Length of the file system
- header, in bytes */
-/* @} */
-
-/** Flags for fsp_reserve_free_extents @{ */
-#define FSP_NORMAL 1000000
-#define FSP_UNDO 2000000
-#define FSP_CLEANING 3000000
-/* @} */
-
-/* Number of pages described in a single descriptor page: currently each page
-description takes less than 1 byte; a descriptor page is repeated every
-this many file pages */
-/* #define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE */
-/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
-
-/** @name The space low address page map
-The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
-every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
-/* @{ */
-/*--------------------------------------*/
-#define FSP_XDES_OFFSET 0 /* !< extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET 1 /* !< insert buffer bitmap */
- /* The ibuf bitmap pages are the ones whose
- page number is the number above plus a
- multiple of XDES_DESCRIBED_PER_PAGE */
-
-#define FSP_FIRST_INODE_PAGE_NO 2 /*!< in every tablespace */
- /* The following pages exist
- in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO 3 /*!< insert buffer
- header page, in
- tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /*!< insert buffer
- B-tree root page in
- tablespace 0 */
- /* The ibuf tree root page number in
- tablespace 0; its fseg inode is on the page
- number FSP_FIRST_INODE_PAGE_NO */
-#define FSP_TRX_SYS_PAGE_NO 5 /*!< transaction
- system header, in
- tablespace 0 */
-#define FSP_FIRST_RSEG_PAGE_NO 6 /*!< first rollback segment
- page, in tablespace 0 */
-#define FSP_DICT_HDR_PAGE_NO 7 /*!< data dictionary header
- page, in tablespace 0 */
-/*--------------------------------------*/
-/* @} */
-
-#endif /* fsp0types_h */
diff --git a/storage/innodb_plugin/include/fut0fut.h b/storage/innodb_plugin/include/fut0fut.h
deleted file mode 100644
index dce20b3bad6..00000000000
--- a/storage/innodb_plugin/include/fut0fut.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fut0fut.h
-File-based utilities
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-
-#ifndef fut0fut_h
-#define fut0fut_h
-
-#include "univ.i"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
-bufferfixed and latched */
-UNIV_INLINE
-byte*
-fut_get_ptr(
-/*========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t addr, /*!< in: file address */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr); /*!< in: mtr handle */
-
-#ifndef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
-#endif
-
diff --git a/storage/innodb_plugin/include/fut0fut.ic b/storage/innodb_plugin/include/fut0fut.ic
deleted file mode 100644
index 0b52719a055..00000000000
--- a/storage/innodb_plugin/include/fut0fut.ic
+++ /dev/null
@@ -1,56 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fut0fut.ic
-File-based utilities
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "sync0rw.h"
-#include "buf0buf.h"
-
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
-bufferfixed and latched */
-UNIV_INLINE
-byte*
-fut_get_ptr(
-/*========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t addr, /*!< in: file address */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- buf_block_t* block;
- byte* ptr;
-
- ut_ad(addr.boffset < UNIV_PAGE_SIZE);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
- ptr = buf_block_get_frame(block) + addr.boffset;
-
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- return(ptr);
-}
diff --git a/storage/innodb_plugin/include/fut0lst.h b/storage/innodb_plugin/include/fut0lst.h
deleted file mode 100644
index fe024c2498f..00000000000
--- a/storage/innodb_plugin/include/fut0lst.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fut0lst.h
-File-based list utilities
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef fut0lst_h
-#define fut0lst_h
-
-#include "univ.i"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-
-/* The C 'types' of base node and list node: these should be used to
-write self-documenting code. Of course, the sizeof macro cannot be
-applied to these types! */
-
-typedef byte flst_base_node_t;
-typedef byte flst_node_t;
-
-/* The physical size of a list base node in bytes */
-#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
-
-/* The physical size of a list node in bytes */
-#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Initializes a list base node. */
-UNIV_INLINE
-void
-flst_init(
-/*======*/
- flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Adds a node as the last node in a list. */
-UNIV_INTERN
-void
-flst_add_last(
-/*==========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Adds a node as the first node in a list. */
-UNIV_INTERN
-void
-flst_add_first(
-/*===========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Inserts a node after another in a list. */
-UNIV_INTERN
-void
-flst_insert_after(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node1, /*!< in: node to insert after */
- flst_node_t* node2, /*!< in: node to add */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Inserts a node before another in a list. */
-UNIV_INTERN
-void
-flst_insert_before(
-/*===============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to insert */
- flst_node_t* node3, /*!< in: node to insert before */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Removes a node. */
-UNIV_INTERN
-void
-flst_remove(
-/*========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to remove */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Cuts off the tail of the list, including the node given. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-UNIV_INTERN
-void
-flst_cut_end(
-/*=========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: first node to remove */
- ulint n_nodes,/*!< in: number of nodes to remove,
- must be >= 1 */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Cuts off the tail of the list, not including the given node. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-UNIV_INTERN
-void
-flst_truncate_end(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: first node not to remove */
- ulint n_nodes,/*!< in: number of nodes to remove */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list length.
-@return length */
-UNIV_INLINE
-ulint
-flst_get_len(
-/*=========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list first node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_first(
-/*===========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list last node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_last(
-/*==========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list next node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_next_addr(
-/*===============*/
- const flst_node_t* node, /*!< in: pointer to node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list prev node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_prev_addr(
-/*===============*/
- const flst_node_t* node, /*!< in: pointer to node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Writes a file address. */
-UNIV_INLINE
-void
-flst_write_addr(
-/*============*/
- fil_faddr_t* faddr, /*!< in: pointer to file faddress */
- fil_addr_t addr, /*!< in: file address */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Reads a file address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_read_addr(
-/*===========*/
- const fil_faddr_t* faddr, /*!< in: pointer to file faddress */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Validates a file-based list.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-flst_validate(
-/*==========*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr1); /*!< in: mtr */
-/********************************************************************//**
-Prints info of a file-based list. */
-UNIV_INTERN
-void
-flst_print(
-/*=======*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr); /*!< in: mtr */
-
-
-#ifndef UNIV_NONINL
-#include "fut0lst.ic"
-#endif
-
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/innodb_plugin/include/fut0lst.ic b/storage/innodb_plugin/include/fut0lst.ic
deleted file mode 100644
index dcd13c61871..00000000000
--- a/storage/innodb_plugin/include/fut0lst.ic
+++ /dev/null
@@ -1,167 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fut0lst.ic
-File-based list utilities
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0fut.h"
-#include "mtr0log.h"
-#include "buf0buf.h"
-
-/* We define the field offsets of a node for the list */
-#define FLST_PREV 0 /* 6-byte address of the previous list element;
- the page part of address is FIL_NULL, if no
- previous element */
-#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next
- list element; the page part of address
- is FIL_NULL, if no next element */
-
-/* We define the field offsets of a base node for the list */
-#define FLST_LEN 0 /* 32-bit list length field */
-#define FLST_FIRST 4 /* 6-byte address of the first element
- of the list; undefined if empty list */
-#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the
- last element of the list; undefined
- if empty list */
-
-/********************************************************************//**
-Writes a file address. */
-UNIV_INLINE
-void
-flst_write_addr(
-/*============*/
- fil_faddr_t* faddr, /*!< in: pointer to file faddress */
- fil_addr_t addr, /*!< in: file address */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(faddr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
- ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
- ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
-
- mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
- mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
- MLOG_2BYTES, mtr);
-}
-
-/********************************************************************//**
-Reads a file address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_read_addr(
-/*===========*/
- const fil_faddr_t* faddr, /*!< in: pointer to file faddress */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- fil_addr_t addr;
-
- ut_ad(faddr && mtr);
-
- addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
- addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
- mtr);
- ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
- ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
- return(addr);
-}
-
-/********************************************************************//**
-Initializes a list base node. */
-UNIV_INLINE
-void
-flst_init(
-/*======*/
- flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-
- mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
- flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
- flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
-}
-
-/********************************************************************//**
-Gets list length.
-@return length */
-UNIV_INLINE
-ulint
-flst_get_len(
-/*=========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
-}
-
-/********************************************************************//**
-Gets list first node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_first(
-/*===========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(flst_read_addr(base + FLST_FIRST, mtr));
-}
-
-/********************************************************************//**
-Gets list last node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_last(
-/*==========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(flst_read_addr(base + FLST_LAST, mtr));
-}
-
-/********************************************************************//**
-Gets list next node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_next_addr(
-/*===============*/
- const flst_node_t* node, /*!< in: pointer to node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(flst_read_addr(node + FLST_NEXT, mtr));
-}
-
-/********************************************************************//**
-Gets list prev node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_prev_addr(
-/*===============*/
- const flst_node_t* node, /*!< in: pointer to node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(flst_read_addr(node + FLST_PREV, mtr));
-}
diff --git a/storage/innodb_plugin/include/ha0ha.h b/storage/innodb_plugin/include/ha0ha.h
deleted file mode 100644
index 1ffbd3440aa..00000000000
--- a/storage/innodb_plugin/include/ha0ha.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ha0ha.h
-The hash table with external chains
-
-Created 8/18/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef ha0ha_h
-#define ha0ha_h
-
-#include "univ.i"
-
-#include "hash0hash.h"
-#include "page0types.h"
-#include "buf0types.h"
-
-/*************************************************************//**
-Looks for an element in a hash table.
-@return pointer to the data of the first hash table node in chain
-having the fold number, NULL if not found */
-UNIV_INLINE
-void*
-ha_search_and_get_data(
-/*===================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: folded value of the searched data */
-/*********************************************************//**
-Looks for an element when we know the pointer to the data and updates
-the pointer to data if found. */
-UNIV_INTERN
-void
-ha_search_and_update_if_found_func(
-/*===============================*/
- hash_table_t* table, /*!< in/out: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- void* data, /*!< in: pointer to the data */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* new_block,/*!< in: block containing new_data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- void* new_data);/*!< in: new pointer to the data */
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/** Looks for an element when we know the pointer to the data and
-updates the pointer to data if found.
-@param table in/out: hash table
-@param fold in: folded value of the searched data
-@param data in: pointer to the data
-@param new_block in: block containing new_data
-@param new_data in: new pointer to the data */
-# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
- ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-/** Looks for an element when we know the pointer to the data and
-updates the pointer to data if found.
-@param table in/out: hash table
-@param fold in: folded value of the searched data
-@param data in: pointer to the data
-@param new_block ignored: block containing new_data
-@param new_data in: new pointer to the data */
-# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
- ha_search_and_update_if_found_func(table,fold,data,new_data)
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-/*************************************************************//**
-Creates a hash table with at least n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
-hash_table_t*
-ha_create_func(
-/*===========*/
- ulint n, /*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
- ulint mutex_level, /*!< in: level of the mutexes in the latching
- order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes); /*!< in: number of mutexes to protect the
- hash table: must be a power of 2, or 0 */
-#ifdef UNIV_SYNC_DEBUG
-/** Creates a hash table.
-@return own: created table
-@param n_c in: number of array cells. The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level in: level of the mutexes in the latching order
-@param n_m in: number of mutexes to protect the hash table;
- must be a power of 2, or 0 */
-# define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m)
-#else /* UNIV_SYNC_DEBUG */
-/** Creates a hash table.
-@return own: created table
-@param n_c in: number of array cells. The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level in: level of the mutexes in the latching order
-@param n_m in: number of mutexes to protect the hash table;
- must be a power of 2, or 0 */
-# define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m)
-#endif /* UNIV_SYNC_DEBUG */
-
-/*************************************************************//**
-Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
-void
-ha_clear(
-/*=====*/
- hash_table_t* table); /*!< in, own: hash table */
-
-/*************************************************************//**
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
-ibool
-ha_insert_for_fold_func(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of data; if a node with
- the same fold value already exists, it is
- updated to point to the same data, and no new
- node is created! */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* block, /*!< in: buffer block containing the data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- void* data); /*!< in: data, must not be NULL */
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/**
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated
-@param t in: hash table
-@param f in: folded value of data
-@param b in: buffer block containing the data
-@param d in: data, must not be NULL */
-# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d)
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-/**
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated
-@param t in: hash table
-@param f in: folded value of data
-@param b ignored: buffer block containing the data
-@param d in: data, must not be NULL */
-# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d)
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
-/*********************************************************//**
-Looks for an element when we know the pointer to the data and deletes
-it from the hash table if found.
-@return TRUE if found */
-UNIV_INLINE
-ibool
-ha_search_and_delete_if_found(
-/*==========================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- void* data); /*!< in: pointer to the data */
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Removes from the chain determined by fold all nodes whose data pointer
-points to the page given. */
-UNIV_INTERN
-void
-ha_remove_all_nodes_to_page(
-/*========================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: fold value */
- const page_t* page); /*!< in: buffer page */
-/*************************************************************//**
-Validates a given range of the cells in hash table.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-ha_validate(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint start_index, /*!< in: start index */
- ulint end_index); /*!< in: end index */
-/*************************************************************//**
-Prints info of a hash table. */
-UNIV_INTERN
-void
-ha_print_info(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- hash_table_t* table); /*!< in: hash table */
-#endif /* !UNIV_HOTBACKUP */
-
-/** The hash table external chain node */
-typedef struct ha_node_struct ha_node_t;
-
-/** The hash table external chain node */
-struct ha_node_struct {
- ha_node_t* next; /*!< next chain node or NULL if none */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* block; /*!< buffer block containing the data, or NULL */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- void* data; /*!< pointer to the data */
- ulint fold; /*!< fold value for the data */
-};
-
-#ifndef UNIV_HOTBACKUP
-/** Assert that the current thread is holding the mutex protecting a
-hash bucket corresponding to a fold value.
-@param table in: hash table
-@param fold in: fold value */
-# define ASSERT_HASH_MUTEX_OWN(table, fold) \
- ut_ad(!(table)->mutexes || mutex_own(hash_get_mutex(table, fold)))
-#else /* !UNIV_HOTBACKUP */
-/** Assert that the current thread is holding the mutex protecting a
-hash bucket corresponding to a fold value.
-@param table in: hash table
-@param fold in: fold value */
-# define ASSERT_HASH_MUTEX_OWN(table, fold) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/ha0ha.ic b/storage/innodb_plugin/include/ha0ha.ic
deleted file mode 100644
index 734403c4cd9..00000000000
--- a/storage/innodb_plugin/include/ha0ha.ic
+++ /dev/null
@@ -1,220 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/ha0ha.ic
-The hash table with external chains
-
-Created 8/18/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0rnd.h"
-#include "mem0mem.h"
-
-/***********************************************************//**
-Deletes a hash node. */
-UNIV_INTERN
-void
-ha_delete_hash_node(
-/*================*/
- hash_table_t* table, /*!< in: hash table */
- ha_node_t* del_node); /*!< in: node to be deleted */
-
-/******************************************************************//**
-Gets a hash node data.
-@return pointer to the data */
-UNIV_INLINE
-void*
-ha_node_get_data(
-/*=============*/
- ha_node_t* node) /*!< in: hash chain node */
-{
- return(node->data);
-}
-
-/******************************************************************//**
-Sets hash node data. */
-UNIV_INLINE
-void
-ha_node_set_data_func(
-/*==================*/
- ha_node_t* node, /*!< in: hash chain node */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* block, /*!< in: buffer block containing the data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- void* data) /*!< in: pointer to the data */
-{
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- node->block = block;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- node->data = data;
-}
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/** Sets hash node data.
-@param n in: hash chain node
-@param b in: buffer block containing the data
-@param d in: pointer to the data */
-# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-/** Sets hash node data.
-@param n in: hash chain node
-@param b in: buffer block containing the data
-@param d in: pointer to the data */
-# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
-/******************************************************************//**
-Gets the next node in a hash chain.
-@return next node, NULL if none */
-UNIV_INLINE
-ha_node_t*
-ha_chain_get_next(
-/*==============*/
- ha_node_t* node) /*!< in: hash chain node */
-{
- return(node->next);
-}
-
-/******************************************************************//**
-Gets the first node in a hash chain.
-@return first node, NULL if none */
-UNIV_INLINE
-ha_node_t*
-ha_chain_get_first(
-/*===============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold value determining the chain */
-{
- return((ha_node_t*)
- hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
-}
-
-/*************************************************************//**
-Looks for an element in a hash table.
-@return pointer to the first hash table node in chain having the fold
-number, NULL if not found */
-UNIV_INLINE
-ha_node_t*
-ha_search(
-/*======*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: folded value of the searched data */
-{
- ha_node_t* node;
-
- ASSERT_HASH_MUTEX_OWN(table, fold);
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (node->fold == fold) {
-
- return(node);
- }
-
- node = ha_chain_get_next(node);
- }
-
- return(NULL);
-}
-
-/*************************************************************//**
-Looks for an element in a hash table.
-@return pointer to the data of the first hash table node in chain
-having the fold number, NULL if not found */
-UNIV_INLINE
-void*
-ha_search_and_get_data(
-/*===================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: folded value of the searched data */
-{
- ha_node_t* node;
-
- ASSERT_HASH_MUTEX_OWN(table, fold);
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (node->fold == fold) {
-
- return(node->data);
- }
-
- node = ha_chain_get_next(node);
- }
-
- return(NULL);
-}
-
-/*********************************************************//**
-Looks for an element when we know the pointer to the data.
-@return pointer to the hash table node, NULL if not found in the table */
-UNIV_INLINE
-ha_node_t*
-ha_search_with_data(
-/*================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- void* data) /*!< in: pointer to the data */
-{
- ha_node_t* node;
-
- ASSERT_HASH_MUTEX_OWN(table, fold);
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (node->data == data) {
-
- return(node);
- }
-
- node = ha_chain_get_next(node);
- }
-
- return(NULL);
-}
-
-/*********************************************************//**
-Looks for an element when we know the pointer to the data, and deletes
-it from the hash table, if found.
-@return TRUE if found */
-UNIV_INLINE
-ibool
-ha_search_and_delete_if_found(
-/*==========================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- void* data) /*!< in: pointer to the data */
-{
- ha_node_t* node;
-
- ASSERT_HASH_MUTEX_OWN(table, fold);
-
- node = ha_search_with_data(table, fold, data);
-
- if (node) {
- ha_delete_hash_node(table, node);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/innodb_plugin/include/ha_prototypes.h b/storage/innodb_plugin/include/ha_prototypes.h
deleted file mode 100644
index e8789d1638b..00000000000
--- a/storage/innodb_plugin/include/ha_prototypes.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ha_prototypes.h
-Prototypes for global functions in ha_innodb.cc that are called by
-InnoDB C code
-
-Created 5/11/2006 Osku Salerma
-************************************************************************/
-
-#ifndef HA_INNODB_PROTOTYPES_H
-#define HA_INNODB_PROTOTYPES_H
-
-#include "trx0types.h"
-#include "m_ctype.h" /* CHARSET_INFO */
-
-/*********************************************************************//**
-Wrapper around MySQL's copy_and_convert function.
-@return number of bytes copied to 'to' */
-UNIV_INTERN
-ulint
-innobase_convert_string(
-/*====================*/
- void* to, /*!< out: converted string */
- ulint to_length, /*!< in: number of bytes reserved
- for the converted string */
- CHARSET_INFO* to_cs, /*!< in: character set to convert to */
- const void* from, /*!< in: string to convert */
- ulint from_length, /*!< in: number of bytes to convert */
- CHARSET_INFO* from_cs, /*!< in: character set to convert from */
- uint* errors); /*!< out: number of errors encountered
- during the conversion */
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) that is of
-type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
-the result to "buf". The result is converted to "system_charset_info".
-Not more than "buf_size" bytes are written to "buf".
-The result is always NUL-terminated (provided buf_size > 0) and the
-number of bytes that were written to "buf" is returned (including the
-terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
-ulint
-innobase_raw_format(
-/*================*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- ulint charset_coll, /*!< in: charset collation */
- char* buf, /*!< out: output buffer */
- ulint buf_size); /*!< in: output buffer size
- in bytes */
-
-/*****************************************************************//**
-Invalidates the MySQL query cache for the table. */
-UNIV_INTERN
-void
-innobase_invalidate_query_cache(
-/*============================*/
- trx_t* trx, /*!< in: transaction which
- modifies the table */
- const char* full_name, /*!< in: concatenation of
- database name, null char NUL,
- table name, null char NUL;
- NOTE that in Windows this is
- always in LOWER CASE! */
- ulint full_name_len); /*!< in: full name length where
- also the null chars count */
-
-/*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
-char*
-innobase_convert_name(
-/*==================*/
- char* buf, /*!< out: buffer for converted identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
- ulint idlen, /*!< in: length of id, in bytes */
- void* thd, /*!< in: MySQL connection thread, or NULL */
- ibool table_id);/*!< in: TRUE=id is a table or database name;
- FALSE=id is an index name */
-
-/******************************************************************//**
-Returns true if the thread is the replication thread on the slave
-server. Used in srv_conc_enter_innodb() to determine if the thread
-should be allowed to enter InnoDB - the replication thread is treated
-differently than other threads. Also used in
-srv_conc_force_exit_innodb().
-@return true if thd is the replication thread */
-UNIV_INTERN
-ibool
-thd_is_replication_slave_thread(
-/*============================*/
- void* thd); /*!< in: thread handle (THD*) */
-
-/******************************************************************//**
-Returns true if the transaction this thread is processing has edited
-non-transactional tables. Used by the deadlock detector when deciding
-which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables.
-@return true if non-transactional tables have been edited */
-UNIV_INTERN
-ibool
-thd_has_edited_nontrans_tables(
-/*===========================*/
- void* thd); /*!< in: thread handle (THD*) */
-
-/*************************************************************//**
-Prints info of a THD object (== user session thread) to the given file. */
-UNIV_INTERN
-void
-innobase_mysql_print_thd(
-/*=====================*/
- FILE* f, /*!< in: output stream */
- void* thd, /*!< in: pointer to a MySQL THD object */
- uint max_query_len); /*!< in: max query length to print, or 0 to
- use the default max length */
-
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
-the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
-VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return DATA_BINARY, DATA_VARCHAR, ... */
-UNIV_INTERN
-ulint
-get_innobase_type_from_mysql_type(
-/*==============================*/
- ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
- 'unsigned type';
- at least ENUM and SET,
- and unsigned integer
- types are 'unsigned types' */
- const void* field) /*!< in: MySQL Field */
- __attribute__((nonnull));
-
-/*************************************************************//**
-If you want to print a thd that is not associated with the current thread,
-you must call this function before reserving the InnoDB kernel_mutex, to
-protect MySQL from setting thd->query NULL. If you print a thd of the current
-thread, we know that MySQL cannot modify thd->query, and it is not necessary
-to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
-the kernel_mutex. */
-UNIV_INTERN
-void
-innobase_mysql_prepare_print_arbitrary_thd(void);
-/*============================================*/
-
-/*************************************************************//**
-Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
-In the InnoDB latching order, the mutex sits right above the
-kernel_mutex. In debug builds, we assert that the kernel_mutex is
-released before this function is invoked. */
-UNIV_INTERN
-void
-innobase_mysql_end_print_arbitrary_thd(void);
-/*========================================*/
-
-/******************************************************************//**
-Get the variable length bounds of the given character set. */
-UNIV_INTERN
-void
-innobase_get_cset_width(
-/*====================*/
- ulint cset, /*!< in: MySQL charset-collation code */
- ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
- ulint* mbmaxlen); /*!< out: maximum length of a char (in bytes) */
-
-/******************************************************************//**
-Compares NUL-terminated UTF-8 strings case insensitively.
-@return 0 if a=b, <0 if a<b, >1 if a>b */
-UNIV_INTERN
-int
-innobase_strcasecmp(
-/*================*/
- const char* a, /*!< in: first string to compare */
- const char* b); /*!< in: second string to compare */
-
-/******************************************************************//**
-Returns true if the thread is executing a SELECT statement.
-@return true if thd is executing SELECT */
-
-ibool
-thd_is_select(
-/*==========*/
- const void* thd); /*!< in: thread handle (THD*) */
-
-/******************************************************************//**
-Converts an identifier to a table name. */
-UNIV_INTERN
-void
-innobase_convert_from_table_id(
-/*===========================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len); /*!< in: length of 'to', in bytes; should
- be at least 5 * strlen(to) + 1 */
-/******************************************************************//**
-Converts an identifier to UTF-8. */
-UNIV_INTERN
-void
-innobase_convert_from_id(
-/*=====================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len); /*!< in: length of 'to', in bytes; should
- be at least 3 * strlen(to) + 1 */
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-innobase_casedn_str(
-/*================*/
- char* a); /*!< in/out: string to put in lower case */
-
-/**********************************************************************//**
-Determines the connection character set.
-@return connection character set */
-struct charset_info_st*
-innobase_get_charset(
-/*=================*/
- void* mysql_thd); /*!< in: MySQL thread handle */
-
-/******************************************************************//**
-This function is used to find the storage length in bytes of the first n
-characters for prefix indexes using a multibyte character set. The function
-finds charset information and returns length of prefix_len characters in the
-index field in bytes.
-@return number of bytes occupied by the first n characters */
-UNIV_INTERN
-ulint
-innobase_get_at_most_n_mbchars(
-/*===========================*/
- ulint charset_id, /*!< in: character set id */
- ulint prefix_len, /*!< in: prefix length in bytes of the index
- (this has to be divided by mbmaxlen to get the
- number of CHARACTERS n in the prefix) */
- ulint data_len, /*!< in: length of the string in bytes */
- const char* str); /*!< in: character string */
-
-/******************************************************************//**
-Returns true if the thread supports XA,
-global value of innodb_supports_xa if thd is NULL.
-@return true if thd supports XA */
-
-ibool
-thd_supports_xa(
-/*============*/
- void* thd); /*!< in: thread handle (THD*), or NULL to query
- the global innodb_supports_xa */
-
-/******************************************************************//**
-Returns the lock wait timeout for the current connection.
-@return the lock wait timeout, in seconds */
-
-ulong
-thd_lock_wait_timeout(
-/*==================*/
- void* thd); /*!< in: thread handle (THD*), or NULL to query
- the global innodb_lock_wait_timeout */
-
-#endif
diff --git a/storage/innodb_plugin/include/hash0hash.h b/storage/innodb_plugin/include/hash0hash.h
deleted file mode 100644
index 977cb829f35..00000000000
--- a/storage/innodb_plugin/include/hash0hash.h
+++ /dev/null
@@ -1,446 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/hash0hash.h
-The simple hash table utility
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef hash0hash_h
-#define hash0hash_h
-
-#include "univ.i"
-#include "mem0mem.h"
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-#endif /* !UNIV_HOTBACKUP */
-
-typedef struct hash_table_struct hash_table_t;
-typedef struct hash_cell_struct hash_cell_t;
-
-typedef void* hash_node_t;
-
-/* Fix Bug #13859: symbol collision between imap/mysql */
-#define hash_create hash0_create
-
-/*************************************************************//**
-Creates a hash table with >= n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
-hash_table_t*
-hash_create(
-/*========*/
- ulint n); /*!< in: number of array cells */
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Creates a mutex array to protect a hash table. */
-UNIV_INTERN
-void
-hash_create_mutexes_func(
-/*=====================*/
- hash_table_t* table, /*!< in: hash table */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /*!< in: latching order level of the
- mutexes: used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes); /*!< in: number of mutexes */
-#ifdef UNIV_SYNC_DEBUG
-# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n)
-#else /* UNIV_SYNC_DEBUG */
-# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n)
-#endif /* UNIV_SYNC_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Frees a hash table. */
-UNIV_INTERN
-void
-hash_table_free(
-/*============*/
- hash_table_t* table); /*!< in, own: hash table */
-/**************************************************************//**
-Calculates the hash value from a folded value.
-@return hashed value */
-UNIV_INLINE
-ulint
-hash_calc_hash(
-/*===========*/
- ulint fold, /*!< in: folded value */
- hash_table_t* table); /*!< in: hash table */
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Assert that the mutex for the table in a hash operation is owned. */
-# define HASH_ASSERT_OWNED(TABLE, FOLD) \
-ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
-#else /* !UNIV_HOTBACKUP */
-# define HASH_ASSERT_OWNED(TABLE, FOLD)
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Inserts a struct to a hash table. */
-
-#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
-do {\
- hash_cell_t* cell3333;\
- TYPE* struct3333;\
-\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
-\
- (DATA)->NAME = NULL;\
-\
- cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
-\
- if (cell3333->node == NULL) {\
- cell3333->node = DATA;\
- } else {\
- struct3333 = (TYPE*) cell3333->node;\
-\
- while (struct3333->NAME != NULL) {\
-\
- struct3333 = (TYPE*) struct3333->NAME;\
- }\
-\
- struct3333->NAME = DATA;\
- }\
-} while (0)
-
-#ifdef UNIV_HASH_DEBUG
-# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
-# define HASH_INVALIDATE(DATA, NAME) DATA->NAME = (void*) -1
-#else
-# define HASH_ASSERT_VALID(DATA) do {} while (0)
-# define HASH_INVALIDATE(DATA, NAME) do {} while (0)
-#endif
-
-/*******************************************************************//**
-Deletes a struct from a hash table. */
-
-#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
-do {\
- hash_cell_t* cell3333;\
- TYPE* struct3333;\
-\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
-\
- cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
-\
- if (cell3333->node == DATA) {\
- HASH_ASSERT_VALID(DATA->NAME);\
- cell3333->node = DATA->NAME;\
- } else {\
- struct3333 = (TYPE*) cell3333->node;\
-\
- while (struct3333->NAME != DATA) {\
-\
- struct3333 = (TYPE*) struct3333->NAME;\
- ut_a(struct3333);\
- }\
-\
- struct3333->NAME = DATA->NAME;\
- }\
- HASH_INVALIDATE(DATA, NAME);\
-} while (0)
-
-/*******************************************************************//**
-Gets the first struct in a hash chain, NULL if none. */
-
-#define HASH_GET_FIRST(TABLE, HASH_VAL)\
- (hash_get_nth_cell(TABLE, HASH_VAL)->node)
-
-/*******************************************************************//**
-Gets the next struct in a hash chain, NULL if none. */
-
-#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME)
-
-/********************************************************************//**
-Looks for a struct in a hash table. */
-#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\
-{\
-\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
-\
- (DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
- HASH_ASSERT_VALID(DATA);\
-\
- while ((DATA) != NULL) {\
- ASSERTION;\
- if (TEST) {\
- break;\
- } else {\
- HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\
- (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\
- }\
- }\
-}
-
-/********************************************************************//**
-Looks for an item in all hash buckets. */
-#define HASH_SEARCH_ALL(NAME, TABLE, TYPE, DATA, ASSERTION, TEST) \
-do { \
- ulint i3333; \
- \
- for (i3333 = (TABLE)->n_cells; i3333--; ) { \
- (DATA) = (TYPE) HASH_GET_FIRST(TABLE, i3333); \
- \
- while ((DATA) != NULL) { \
- HASH_ASSERT_VALID(DATA); \
- ASSERTION; \
- \
- if (TEST) { \
- break; \
- } \
- \
- (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA); \
- } \
- \
- if ((DATA) != NULL) { \
- break; \
- } \
- } \
-} while (0)
-
-/************************************************************//**
-Gets the nth cell in a hash table.
-@return pointer to cell */
-UNIV_INLINE
-hash_cell_t*
-hash_get_nth_cell(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint n); /*!< in: cell index */
-
-/*************************************************************//**
-Clears a hash table so that all the cells become empty. */
-UNIV_INLINE
-void
-hash_table_clear(
-/*=============*/
- hash_table_t* table); /*!< in/out: hash table */
-
-/*************************************************************//**
-Returns the number of cells in a hash table.
-@return number of cells */
-UNIV_INLINE
-ulint
-hash_get_n_cells(
-/*=============*/
- hash_table_t* table); /*!< in: table */
-/*******************************************************************//**
-Deletes a struct which is stored in the heap of the hash table, and compacts
-the heap. The fold value must be stored in the struct NODE in a field named
-'fold'. */
-
-#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
-do {\
- TYPE* node111;\
- TYPE* top_node111;\
- hash_cell_t* cell111;\
- ulint fold111;\
-\
- fold111 = (NODE)->fold;\
-\
- HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
-\
- top_node111 = (TYPE*)mem_heap_get_top(\
- hash_get_heap(TABLE, fold111),\
- sizeof(TYPE));\
-\
- /* If the node to remove is not the top node in the heap, compact the\
- heap of nodes by moving the top node in the place of NODE. */\
-\
- if (NODE != top_node111) {\
-\
- /* Copy the top node in place of NODE */\
-\
- *(NODE) = *top_node111;\
-\
- cell111 = hash_get_nth_cell(TABLE,\
- hash_calc_hash(top_node111->fold, TABLE));\
-\
- /* Look for the pointer to the top node, to update it */\
-\
- if (cell111->node == top_node111) {\
- /* The top node is the first in the chain */\
-\
- cell111->node = NODE;\
- } else {\
- /* We have to look for the predecessor of the top\
- node */\
- node111 = cell111->node;\
-\
- while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
-\
- node111 = HASH_GET_NEXT(NAME, node111);\
- }\
-\
- /* Now we have the predecessor node */\
-\
- node111->NAME = NODE;\
- }\
- }\
-\
- /* Free the space occupied by the top node */\
-\
- mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
-} while (0)
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Move all hash table entries from OLD_TABLE to NEW_TABLE. */
-
-#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
-do {\
- ulint i2222;\
- ulint cell_count2222;\
-\
- cell_count2222 = hash_get_n_cells(OLD_TABLE);\
-\
- for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
- NODE_TYPE* node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
-\
- while (node2222) {\
- NODE_TYPE* next2222 = node2222->PTR_NAME;\
- ulint fold2222 = FOLD_FUNC(node2222);\
-\
- HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
- fold2222, node2222);\
-\
- node2222 = next2222;\
- }\
- }\
-} while (0)
-
-/************************************************************//**
-Gets the mutex index for a fold value in a hash table.
-@return mutex number */
-UNIV_INLINE
-ulint
-hash_get_mutex_no(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Gets the nth heap in a hash table.
-@return mem heap */
-UNIV_INLINE
-mem_heap_t*
-hash_get_nth_heap(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i); /*!< in: index of the heap */
-/************************************************************//**
-Gets the heap for a fold value in a hash table.
-@return mem heap */
-UNIV_INLINE
-mem_heap_t*
-hash_get_heap(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Gets the nth mutex in a hash table.
-@return mutex */
-UNIV_INLINE
-mutex_t*
-hash_get_nth_mutex(
-/*===============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i); /*!< in: index of the mutex */
-/************************************************************//**
-Gets the mutex for a fold value in a hash table.
-@return mutex */
-UNIV_INLINE
-mutex_t*
-hash_get_mutex(
-/*===========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_enter(
-/*=============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit(
-/*============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_mutex_enter_all(
-/*=================*/
- hash_table_t* table); /*!< in: hash table */
-/************************************************************//**
-Releases all the mutexes of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all(
-/*================*/
- hash_table_t* table); /*!< in: hash table */
-#else /* !UNIV_HOTBACKUP */
-# define hash_get_heap(table, fold) ((table)->heap)
-# define hash_mutex_enter(table, fold) ((void) 0)
-# define hash_mutex_exit(table, fold) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-struct hash_cell_struct{
- void* node; /*!< hash chain node, NULL if none */
-};
-
-/* The hash table structure */
-struct hash_table_struct {
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
- ibool adaptive;/* TRUE if this is the hash table of the
- adaptive hash index */
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- ulint n_cells;/* number of cells in the hash table */
- hash_cell_t* array; /*!< pointer to cell array */
-#ifndef UNIV_HOTBACKUP
- ulint n_mutexes;/* if mutexes != NULL, then the number of
- mutexes, must be a power of 2 */
- mutex_t* mutexes;/* NULL, or an array of mutexes used to
- protect segments of the hash table */
- mem_heap_t** heaps; /*!< if this is non-NULL, hash chain nodes for
- external chaining can be allocated from these
- memory heaps; there are then n_mutexes many of
- these heaps */
-#endif /* !UNIV_HOTBACKUP */
- mem_heap_t* heap;
- ulint magic_n;
-};
-
-#define HASH_TABLE_MAGIC_N 76561114
-
-#ifndef UNIV_NONINL
-#include "hash0hash.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/hash0hash.ic b/storage/innodb_plugin/include/hash0hash.ic
deleted file mode 100644
index 19da2d50701..00000000000
--- a/storage/innodb_plugin/include/hash0hash.ic
+++ /dev/null
@@ -1,163 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/hash0hash.ic
-The simple hash table utility
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "ut0rnd.h"
-
-/************************************************************//**
-Gets the nth cell in a hash table.
-@return pointer to cell */
-UNIV_INLINE
-hash_cell_t*
-hash_get_nth_cell(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint n) /*!< in: cell index */
-{
- ut_ad(n < table->n_cells);
-
- return(table->array + n);
-}
-
-/*************************************************************//**
-Clears a hash table so that all the cells become empty. */
-UNIV_INLINE
-void
-hash_table_clear(
-/*=============*/
- hash_table_t* table) /*!< in/out: hash table */
-{
- memset(table->array, 0x0,
- table->n_cells * sizeof(*table->array));
-}
-
-/*************************************************************//**
-Returns the number of cells in a hash table.
-@return number of cells */
-UNIV_INLINE
-ulint
-hash_get_n_cells(
-/*=============*/
- hash_table_t* table) /*!< in: table */
-{
- return(table->n_cells);
-}
-
-/**************************************************************//**
-Calculates the hash value from a folded value.
-@return hashed value */
-UNIV_INLINE
-ulint
-hash_calc_hash(
-/*===========*/
- ulint fold, /*!< in: folded value */
- hash_table_t* table) /*!< in: hash table */
-{
- return(ut_hash_ulint(fold, table->n_cells));
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Gets the mutex index for a fold value in a hash table.
-@return mutex number */
-UNIV_INLINE
-ulint
-hash_get_mutex_no(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ut_ad(ut_is_2pow(table->n_mutexes));
- return(ut_2pow_remainder(hash_calc_hash(fold, table),
- table->n_mutexes));
-}
-
-/************************************************************//**
-Gets the nth heap in a hash table.
-@return mem heap */
-UNIV_INLINE
-mem_heap_t*
-hash_get_nth_heap(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i) /*!< in: index of the heap */
-{
- ut_ad(i < table->n_mutexes);
-
- return(table->heaps[i]);
-}
-
-/************************************************************//**
-Gets the heap for a fold value in a hash table.
-@return mem heap */
-UNIV_INLINE
-mem_heap_t*
-hash_get_heap(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ulint i;
-
- if (table->heap) {
- return(table->heap);
- }
-
- i = hash_get_mutex_no(table, fold);
-
- return(hash_get_nth_heap(table, i));
-}
-
-/************************************************************//**
-Gets the nth mutex in a hash table.
-@return mutex */
-UNIV_INLINE
-mutex_t*
-hash_get_nth_mutex(
-/*===============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i) /*!< in: index of the mutex */
-{
- ut_ad(i < table->n_mutexes);
-
- return(table->mutexes + i);
-}
-
-/************************************************************//**
-Gets the mutex for a fold value in a hash table.
-@return mutex */
-UNIV_INLINE
-mutex_t*
-hash_get_mutex(
-/*===========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ulint i;
-
- i = hash_get_mutex_no(table, fold);
-
- return(hash_get_nth_mutex(table, i));
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/ibuf0ibuf.h b/storage/innodb_plugin/include/ibuf0ibuf.h
deleted file mode 100644
index 8aa21fb9d95..00000000000
--- a/storage/innodb_plugin/include/ibuf0ibuf.h
+++ /dev/null
@@ -1,383 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ibuf0ibuf.h
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0ibuf_h
-#define ibuf0ibuf_h
-
-#include "univ.i"
-
-#include "mtr0mtr.h"
-#include "dict0mem.h"
-#include "fsp0fsp.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "ibuf0types.h"
-
-/** Combinations of operations that can be buffered. Because the enum
-values are used for indexing innobase_change_buffering_values[], they
-should start at 0 and there should not be any gaps. */
-typedef enum {
- IBUF_USE_NONE = 0,
- IBUF_USE_INSERT, /* insert */
-
- IBUF_USE_COUNT /* number of entries in ibuf_use_t */
-} ibuf_use_t;
-
-/** Operations that can currently be buffered. */
-extern ibuf_use_t ibuf_use;
-
-/** The insert buffer control structure */
-extern ibuf_t* ibuf;
-
-/* The purpose of the insert buffer is to reduce random disk access.
-When we wish to insert a record into a non-unique secondary index and
-the B-tree leaf page where the record belongs to is not in the buffer
-pool, we insert the record into the insert buffer B-tree, indexed by
-(space_id, page_no). When the page is eventually read into the buffer
-pool, we look up the insert buffer B-tree for any modifications to the
-page, and apply these upon the completion of the read operation. This
-is called the insert buffer merge. */
-
-/* The insert buffer merge must always succeed. To guarantee this,
-the insert buffer subsystem keeps track of the free space in pages for
-which it can buffer operations. Two bits per page in the insert
-buffer bitmap indicate the available space in coarse increments. The
-free bits in the insert buffer bitmap must never exceed the free space
-on a page. It is safe to decrement or reset the bits in the bitmap in
-a mini-transaction that is committed before the mini-transaction that
-affects the free space. It is unsafe to increment the bits in a
-separately committed mini-transaction, because in crash recovery, the
-free bits could momentarily be set too high. */
-
-/******************************************************************//**
-Creates the insert buffer data structure at a database startup and
-initializes the data structures for the insert buffer of each tablespace. */
-UNIV_INTERN
-void
-ibuf_init_at_db_start(void);
-/*=======================*/
-/*********************************************************************//**
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-UNIV_INTERN
-void
-ibuf_update_max_tablespace_id(void);
-/*===============================*/
-/*********************************************************************//**
-Initializes an ibuf bitmap page. */
-UNIV_INTERN
-void
-ibuf_bitmap_page_init(
-/*==================*/
- buf_block_t* block, /*!< in: bitmap page */
- mtr_t* mtr); /*!< in: mtr */
-/************************************************************************//**
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to decrement or reset the bits in the bitmap in a mini-transaction
-that is committed before the mini-transaction that affects the free
-space. */
-UNIV_INTERN
-void
-ibuf_reset_free_bits(
-/*=================*/
- buf_block_t* block); /*!< in: index page; free bits are set to 0
- if the index is a non-clustered
- non-unique, and page level is 0 */
-/************************************************************************//**
-Updates the free bits of an uncompressed page in the ibuf bitmap if
-there is not enough free on the page any more. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is
-unsafe to increment the bits in a separately committed
-mini-transaction, because in crash recovery, the free bits could
-momentarily be set too high. It is only safe to use this function for
-decrementing the free bits. Should more free space become available,
-we must not update the free bits here, because that would break crash
-recovery. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
- buf_block_t* block, /*!< in: index page to which we have added new
- records; the free bits are updated if the
- index is non-clustered and non-unique and
- the page level is 0, and the page becomes
- fuller */
- ulint max_ins_size,/*!< in: value of maximum insert size with
- reorganize before the latest operation
- performed to the page */
- ulint increase);/*!< in: upper limit for the additional space
- used in the latest operation, if known, or
- ULINT_UNDEFINED */
-/**********************************************************************//**
-Updates the free bits for an uncompressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_low(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- ulint max_ins_size, /*!< in: value of
- maximum insert size
- with reorganize before
- the latest operation
- performed to the page */
- mtr_t* mtr); /*!< in/out: mtr */
-/**********************************************************************//**
-Updates the free bits for a compressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_zip(
-/*======================*/
- buf_block_t* block, /*!< in/out: index page */
- mtr_t* mtr); /*!< in/out: mtr */
-/**********************************************************************//**
-Updates the free bits for the two pages to reflect the present state.
-Does this in the mtr given, which means that the latching order rules
-virtually prevent any further operations until mtr is committed.
-NOTE: The free bits in the insert buffer bitmap must never exceed the
-free space on a page. It is safe to set the free bits in the same
-mini-transaction that updated the pages. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- buf_block_t* block1, /*!< in: index page */
- buf_block_t* block2, /*!< in: index page */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
- dict_index_t* index, /*!< in: index where to insert */
- ulint ignore_sec_unique); /*!< in: if != 0, we should
- ignore UNIQUE constraint on
- a secondary index when we
- decide */
-/******************************************************************//**
-Returns TRUE if the current OS thread is performing an insert buffer
-routine.
-
-For instance, a read-ahead of non-ibuf pages is forbidden by threads
-that are executing an insert buffer routine.
-@return TRUE if inside an insert buffer routine */
-UNIV_INTERN
-ibool
-ibuf_inside(void);
-/*=============*/
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page (level 3 page) address.
-@return TRUE if a bitmap page */
-UNIV_INLINE
-ibool
-ibuf_bitmap_page(
-/*=============*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no);/*!< in: page number */
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return TRUE if level 2 or level 3 page */
-UNIV_INTERN
-ibool
-ibuf_page(
-/*======*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number */
- mtr_t* mtr); /*!< in: mtr which will contain an x-latch to the
- bitmap page if the page is not one of the fixed
- address ibuf pages, or NULL, in which case a new
- transaction is created. */
-/***********************************************************************//**
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
-void
-ibuf_free_excess_pages(void);
-/*========================*/
-/*********************************************************************//**
-Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. Does not do insert if the index is clustered
-or unique.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-ibuf_insert(
-/*========*/
- const dtuple_t* entry, /*!< in: index entry to insert */
- dict_index_t* index, /*!< in: index where to insert */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
-inserts to the page the possible index entries buffered in the insert buffer.
-The entries are deleted from the insert buffer. If the page is not read, but
-created in the buffer pool, this function deletes its buffered entries from
-the insert buffer; there can exist entries for such a page if the page
-belonged to an index which subsequently was dropped. */
-UNIV_INTERN
-void
-ibuf_merge_or_delete_for_page(
-/*==========================*/
- buf_block_t* block, /*!< in: if page has been read from
- disk, pointer to the page x-latched,
- else NULL */
- ulint space, /*!< in: space id of the index page */
- ulint page_no,/*!< in: page number of the index page */
- ulint zip_size,/*!< in: compressed page size in bytes,
- or 0 */
- ibool update_ibuf_bitmap);/*!< in: normally this is set
- to TRUE, but if we have deleted or are
- deleting the tablespace, then we
- naturally do not want to update a
- non-existent bitmap page */
-/*********************************************************************//**
-Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
-NOTE: this does not update the page free bitmaps in the space. The space will
-become CORRUPT when you call this function! */
-UNIV_INTERN
-void
-ibuf_delete_for_discarded_space(
-/*============================*/
- ulint space); /*!< in: space id */
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
-ibuf_contract(
-/*==========*/
- ibool sync); /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
-ibuf_contract_for_n_pages(
-/*======================*/
- ibool sync, /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
- ulint n_pages);/*!< in: try to read at least this many pages to
- the buffer pool and merge the ibuf contents to
- them */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Parses a redo log record of an ibuf bitmap page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-ibuf_parse_bitmap_init(
-/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
-@return number of entries in the insert buffer currently buffered for
-this page */
-UNIV_INTERN
-ulint
-ibuf_count_get(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no);/*!< in: page number */
-#endif
-/******************************************************************//**
-Looks if the insert buffer is empty.
-@return TRUE if empty */
-UNIV_INTERN
-ibool
-ibuf_is_empty(void);
-/*===============*/
-/******************************************************************//**
-Prints info of ibuf. */
-UNIV_INTERN
-void
-ibuf_print(
-/*=======*/
- FILE* file); /*!< in: file where to print */
-/******************************************************************//**
-Closes insert buffer and frees the data structures. */
-UNIV_INTERN
-void
-ibuf_close(void);
-/*============*/
-
-#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
-#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
-
-#endif /* !UNIV_HOTBACKUP */
-
-/* The ibuf header page currently contains only the file segment header
-for the file segment from which the pages for the ibuf tree are allocated */
-#define IBUF_HEADER PAGE_DATA
-#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
-
-/* The insert buffer tree itself is always located in space 0. */
-#define IBUF_SPACE_ID 0
-
-#ifndef UNIV_NONINL
-#include "ibuf0ibuf.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/ibuf0ibuf.ic b/storage/innodb_plugin/include/ibuf0ibuf.ic
deleted file mode 100644
index 15bbe61ab30..00000000000
--- a/storage/innodb_plugin/include/ibuf0ibuf.ic
+++ /dev/null
@@ -1,327 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ibuf0ibuf.ic
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "page0page.h"
-#include "page0zip.h"
-#ifndef UNIV_HOTBACKUP
-#include "buf0lru.h"
-
-/** Counter for ibuf_should_try() */
-extern ulint ibuf_flush_count;
-
-/** An index page must contain at least UNIV_PAGE_SIZE /
-IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
-buffer inserts to this page. If there is this much of free space, the
-corresponding bits are set in the ibuf bitmap. */
-#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
-
-/** Insert buffer struct */
-struct ibuf_struct{
- ulint size; /*!< current size of the ibuf index
- tree, in pages */
- ulint max_size; /*!< recommended maximum size of the
- ibuf index tree, in pages */
- ulint seg_size; /*!< allocated pages of the file
- segment containing ibuf header and
- tree */
- ibool empty; /*!< after an insert to the ibuf tree
- is performed, this is set to FALSE,
- and if a contract operation finds
- the tree empty, this is set to
- TRUE */
- ulint free_list_len; /*!< length of the free list */
- ulint height; /*!< tree height */
- dict_index_t* index; /*!< insert buffer index */
-
- ulint n_inserts; /*!< number of inserts made to
- the insert buffer */
- ulint n_merges; /*!< number of pages merged */
- ulint n_merged_recs; /*!< number of records merged */
-};
-
-/************************************************************************//**
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-UNIV_INTERN
-void
-ibuf_set_free_bits_func(
-/*====================*/
- buf_block_t* block, /*!< in: index page of a non-clustered index;
- free bit is reset if page level is 0 */
-#ifdef UNIV_IBUF_DEBUG
- ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
- value which the bits must have before
- setting; this is for debugging */
-#endif /* UNIV_IBUF_DEBUG */
- ulint val); /*!< in: value to set: < 4 */
-#ifdef UNIV_IBUF_DEBUG
-# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
-#else /* UNIV_IBUF_DEBUG */
-# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
-#endif /* UNIV_IBUF_DEBUG */
-
-/**********************************************************************//**
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
- dict_index_t* index, /*!< in: index where to insert */
- ulint ignore_sec_unique) /*!< in: if != 0, we should
- ignore UNIQUE constraint on
- a secondary index when we
- decide */
-{
- if (ibuf_use != IBUF_USE_NONE
- && !dict_index_is_clust(index)
- && (ignore_sec_unique || !dict_index_is_unique(index))) {
-
- ibuf_flush_count++;
-
- if (ibuf_flush_count % 4 == 0) {
-
- buf_LRU_try_free_flushed_blocks();
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page address.
-@return TRUE if a bitmap page */
-UNIV_INLINE
-ibool
-ibuf_bitmap_page(
-/*=============*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
- == FSP_IBUF_BITMAP_OFFSET));
- }
-
- return(UNIV_UNLIKELY((page_no & (zip_size - 1))
- == FSP_IBUF_BITMAP_OFFSET));
-}
-
-/*********************************************************************//**
-Translates the free space on a page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_bits(
-/*===========================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint max_ins_size) /*!< in: maximum insert size after reorganize
- for the page */
-{
- ulint n;
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
-
- if (zip_size) {
- n = max_ins_size
- / (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- } else {
- n = max_ins_size
- / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- if (n == 3) {
- n = 2;
- }
-
- if (n > 3) {
- n = 3;
- }
-
- return(n);
-}
-
-/*********************************************************************//**
-Translates the ibuf free bits to the free space on a page in bytes.
-@return maximum insert size after reorganize for the page */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_from_bits(
-/*================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint bits) /*!< in: value for ibuf bitmap bits */
-{
- ut_ad(bits < 4);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- ut_ad(zip_size <= UNIV_PAGE_SIZE);
-
- if (zip_size) {
- if (bits == 3) {
- return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- if (bits == 3) {
- return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
-}
-
-/*********************************************************************//**
-Translates the free space on a compressed page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_zip(
-/*==========================*/
- ulint zip_size,
- /*!< in: compressed page size in bytes */
- const buf_block_t* block) /*!< in: buffer block */
-{
- ulint max_ins_size;
- const page_zip_des_t* page_zip;
- lint zip_max_ins;
-
- ut_ad(zip_size == buf_block_get_zip_size(block));
- ut_ad(zip_size);
-
- max_ins_size = page_get_max_insert_size_after_reorganize(
- buf_block_get_frame(block), 1);
-
- page_zip = buf_block_get_page_zip(block);
- zip_max_ins = page_zip_max_ins_size(page_zip,
- FALSE/* not clustered */);
-
- if (UNIV_UNLIKELY(zip_max_ins < 0)) {
- return(0);
- } else if (UNIV_LIKELY(max_ins_size > (ulint) zip_max_ins)) {
- max_ins_size = (ulint) zip_max_ins;
- }
-
- return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
-}
-
-/*********************************************************************//**
-Translates the free space on a page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free(
-/*======================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- const buf_block_t* block) /*!< in: buffer block */
-{
- ut_ad(zip_size == buf_block_get_zip_size(block));
-
- if (!zip_size) {
- ulint max_ins_size;
-
- max_ins_size = page_get_max_insert_size_after_reorganize(
- buf_block_get_frame(block), 1);
-
- return(ibuf_index_page_calc_free_bits(0, max_ins_size));
- } else {
- return(ibuf_index_page_calc_free_zip(zip_size, block));
- }
-}
-
-/************************************************************************//**
-Updates the free bits of an uncompressed page in the ibuf bitmap if
-there is not enough free on the page any more. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is
-unsafe to increment the bits in a separately committed
-mini-transaction, because in crash recovery, the free bits could
-momentarily be set too high. It is only safe to use this function for
-decrementing the free bits. Should more free space become available,
-we must not update the free bits here, because that would break crash
-recovery. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
- buf_block_t* block, /*!< in: index page to which we have added new
- records; the free bits are updated if the
- index is non-clustered and non-unique and
- the page level is 0, and the page becomes
- fuller */
- ulint max_ins_size,/*!< in: value of maximum insert size with
- reorganize before the latest operation
- performed to the page */
- ulint increase)/*!< in: upper limit for the additional space
- used in the latest operation, if known, or
- ULINT_UNDEFINED */
-{
- ulint before;
- ulint after;
-
- ut_ad(!buf_block_get_page_zip(block));
-
- before = ibuf_index_page_calc_free_bits(0, max_ins_size);
-
- if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
-#endif
- after = ibuf_index_page_calc_free_bits(0, max_ins_size
- - increase);
-#ifdef UNIV_IBUF_DEBUG
- ut_a(after <= ibuf_index_page_calc_free(0, block));
-#endif
- } else {
- after = ibuf_index_page_calc_free(0, block);
- }
-
- if (after == 0) {
- /* We move the page to the front of the buffer pool LRU list:
- the purpose of this is to prevent those pages to which we
- cannot make inserts using the insert buffer from slipping
- out of the buffer pool */
-
- buf_page_make_young(&block->page);
- }
-
- if (before > after) {
- ibuf_set_free_bits(block, after, before);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/ibuf0types.h b/storage/innodb_plugin/include/ibuf0types.h
deleted file mode 100644
index 55944f879b2..00000000000
--- a/storage/innodb_plugin/include/ibuf0types.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ibuf0types.h
-Insert buffer global types
-
-Created 7/29/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0types_h
-#define ibuf0types_h
-
-typedef struct ibuf_struct ibuf_t;
-
-#endif
diff --git a/storage/innodb_plugin/include/lock0iter.h b/storage/innodb_plugin/include/lock0iter.h
deleted file mode 100644
index 25a57c9740c..00000000000
--- a/storage/innodb_plugin/include/lock0iter.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0iter.h
-Lock queue iterator type and function prototypes.
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef lock0iter_h
-#define lock0iter_h
-
-#include "univ.i"
-#include "lock0types.h"
-
-typedef struct lock_queue_iterator_struct {
- const lock_t* current_lock;
- /* In case this is a record lock queue (not table lock queue)
- then bit_no is the record number within the heap in which the
- record is stored. */
- ulint bit_no;
-} lock_queue_iterator_t;
-
-/*******************************************************************//**
-Initialize lock queue iterator so that it starts to iterate from
-"lock". bit_no specifies the record number within the heap where the
-record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
-1. If the lock is a table lock, thus we have a table lock queue;
-2. If the lock is a record lock and it is a wait lock. In this case
- bit_no is calculated in this function by using
- lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
- of a wait lock. */
-UNIV_INTERN
-void
-lock_queue_iterator_reset(
-/*======================*/
- lock_queue_iterator_t* iter, /*!< out: iterator */
- const lock_t* lock, /*!< in: lock to start from */
- ulint bit_no);/*!< in: record number in the
- heap */
-
-/*******************************************************************//**
-Gets the previous lock in the lock queue, returns NULL if there are no
-more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned).
-@return previous lock or NULL */
-
-const lock_t*
-lock_queue_iterator_get_prev(
-/*=========================*/
- lock_queue_iterator_t* iter); /*!< in/out: iterator */
-
-#endif /* lock0iter_h */
diff --git a/storage/innodb_plugin/include/lock0lock.h b/storage/innodb_plugin/include/lock0lock.h
deleted file mode 100644
index 82e4c9bd976..00000000000
--- a/storage/innodb_plugin/include/lock0lock.h
+++ /dev/null
@@ -1,823 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0lock.h
-The transaction lock system
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef lock0lock_h
-#define lock0lock_h
-
-#include "univ.i"
-#include "buf0types.h"
-#include "trx0types.h"
-#include "mtr0types.h"
-#include "rem0types.h"
-#include "dict0types.h"
-#include "que0types.h"
-#include "lock0types.h"
-#include "read0types.h"
-#include "hash0hash.h"
-#include "ut0vec.h"
-
-#ifdef UNIV_DEBUG
-extern ibool lock_print_waits;
-#endif /* UNIV_DEBUG */
-/* Buffer for storing information about the most recent deadlock error */
-extern FILE* lock_latest_err_file;
-
-/*********************************************************************//**
-Gets the size of a lock struct.
-@return size in bytes */
-UNIV_INTERN
-ulint
-lock_get_size(void);
-/*===============*/
-/*********************************************************************//**
-Creates the lock system at database start. */
-UNIV_INTERN
-void
-lock_sys_create(
-/*============*/
- ulint n_cells); /*!< in: number of slots in lock hash table */
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-UNIV_INTERN
-void
-lock_sys_close(void);
-/*================*/
-/*********************************************************************//**
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index.
-@return transaction which has the x-lock, or NULL */
-UNIV_INLINE
-trx_t*
-lock_clust_rec_some_has_impl(
-/*=========================*/
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
-/*********************************************************************//**
-Gets the heap_no of the smallest user record on a page.
-@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
-UNIV_INLINE
-ulint
-lock_get_min_heap_no(
-/*=================*/
- const buf_block_t* block); /*!< in: buffer block */
-/*************************************************************//**
-Updates the lock table when we have reorganized a page. NOTE: we copy
-also the locks set on the infimum of the page; the infimum may carry
-locks if an update of a record is occurring on the page, and its locks
-were temporarily stored on the infimum. */
-UNIV_INTERN
-void
-lock_move_reorganize_page(
-/*======================*/
- const buf_block_t* block, /*!< in: old index page, now
- reorganized */
- const buf_block_t* oblock);/*!< in: copy of the old, not
- reorganized page */
-/*************************************************************//**
-Moves the explicit locks on user records to another page if a record
-list end is moved to another page. */
-UNIV_INTERN
-void
-lock_move_rec_list_end(
-/*===================*/
- const buf_block_t* new_block, /*!< in: index page to move to */
- const buf_block_t* block, /*!< in: index page */
- const rec_t* rec); /*!< in: record on page: this
- is the first record moved */
-/*************************************************************//**
-Moves the explicit locks on user records to another page if a record
-list start is moved to another page. */
-UNIV_INTERN
-void
-lock_move_rec_list_start(
-/*=====================*/
- const buf_block_t* new_block, /*!< in: index page to move to */
- const buf_block_t* block, /*!< in: index page */
- const rec_t* rec, /*!< in: record on page:
- this is the first
- record NOT copied */
- const rec_t* old_end); /*!< in: old
- previous-to-last
- record on new_page
- before the records
- were copied */
-/*************************************************************//**
-Updates the lock table when a page is split to the right. */
-UNIV_INTERN
-void
-lock_update_split_right(
-/*====================*/
- const buf_block_t* right_block, /*!< in: right page */
- const buf_block_t* left_block); /*!< in: left page */
-/*************************************************************//**
-Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
-void
-lock_update_merge_right(
-/*====================*/
- const buf_block_t* right_block, /*!< in: right page to
- which merged */
- const rec_t* orig_succ, /*!< in: original
- successor of infimum
- on the right page
- before merge */
- const buf_block_t* left_block); /*!< in: merged index
- page which will be
- discarded */
-/*************************************************************//**
-Updates the lock table when the root page is copied to another in
-btr_root_raise_and_insert. Note that we leave lock structs on the
-root page, even though they do not make sense on other than leaf
-pages: the reason is that in a pessimistic update the infimum record
-of the root page will act as a dummy carrier of the locks of the record
-to be updated. */
-UNIV_INTERN
-void
-lock_update_root_raise(
-/*===================*/
- const buf_block_t* block, /*!< in: index page to which copied */
- const buf_block_t* root); /*!< in: root page */
-/*************************************************************//**
-Updates the lock table when a page is copied to another and the original page
-is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
-void
-lock_update_copy_and_discard(
-/*=========================*/
- const buf_block_t* new_block, /*!< in: index page to
- which copied */
- const buf_block_t* block); /*!< in: index page;
- NOT the root! */
-/*************************************************************//**
-Updates the lock table when a page is split to the left. */
-UNIV_INTERN
-void
-lock_update_split_left(
-/*===================*/
- const buf_block_t* right_block, /*!< in: right page */
- const buf_block_t* left_block); /*!< in: left page */
-/*************************************************************//**
-Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
-void
-lock_update_merge_left(
-/*===================*/
- const buf_block_t* left_block, /*!< in: left page to
- which merged */
- const rec_t* orig_pred, /*!< in: original predecessor
- of supremum on the left page
- before merge */
- const buf_block_t* right_block); /*!< in: merged index page
- which will be discarded */
-/*************************************************************//**
-Resets the original locks on heir and replaces them with gap type locks
-inherited from rec. */
-UNIV_INTERN
-void
-lock_rec_reset_and_inherit_gap_locks(
-/*=================================*/
- const buf_block_t* heir_block, /*!< in: block containing the
- record which inherits */
- const buf_block_t* block, /*!< in: block containing the
- record from which inherited;
- does NOT reset the locks on
- this record */
- ulint heir_heap_no, /*!< in: heap_no of the
- inheriting record */
- ulint heap_no); /*!< in: heap_no of the
- donating record */
-/*************************************************************//**
-Updates the lock table when a page is discarded. */
-UNIV_INTERN
-void
-lock_update_discard(
-/*================*/
- const buf_block_t* heir_block, /*!< in: index page
- which will inherit the locks */
- ulint heir_heap_no, /*!< in: heap_no of the record
- which will inherit the locks */
- const buf_block_t* block); /*!< in: index page
- which will be discarded */
-/*************************************************************//**
-Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
-void
-lock_update_insert(
-/*===============*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec); /*!< in: the inserted record */
-/*************************************************************//**
-Updates the lock table when a record is removed. */
-UNIV_INTERN
-void
-lock_update_delete(
-/*===============*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec); /*!< in: the record to be removed */
-/*********************************************************************//**
-Stores on the page infimum record the explicit locks of another record.
-This function is used to store the lock state of a record when it is
-updated and the size of the record changes in the update. The record
-is in such an update moved, perhaps to another page. The infimum record
-acts as a dummy carrier record, taking care of lock releases while the
-actual record is being moved. */
-UNIV_INTERN
-void
-lock_rec_store_on_page_infimum(
-/*===========================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec); /*!< in: record whose lock state
- is stored on the infimum
- record of the same page; lock
- bits are reset on the
- record */
-/*********************************************************************//**
-Restores the state of explicit lock requests on a single record, where the
-state was stored on the infimum of the page. */
-UNIV_INTERN
-void
-lock_rec_restore_from_page_infimum(
-/*===============================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record whose lock state
- is restored */
- const buf_block_t* donator);/*!< in: page (rec is not
- necessarily on this page)
- whose infimum stored the lock
- state; lock bits are reset on
- the infimum */
-/*********************************************************************//**
-Returns TRUE if there are explicit record locks on a page.
-@return TRUE if there are explicit record locks on the page */
-UNIV_INTERN
-ibool
-lock_rec_expl_exist_on_page(
-/*========================*/
- ulint space, /*!< in: space id */
- ulint page_no);/*!< in: page number */
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate insert of
-a record. If they do, first tests if the query thread should anyway
-be suspended for some reason; if not, then puts the transaction and
-the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_rec_insert_check_and_lock(
-/*===========================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
- set, does nothing */
- const rec_t* rec, /*!< in: record after which to insert */
- buf_block_t* block, /*!< in/out: buffer block of rec */
- dict_index_t* index, /*!< in: index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit);/*!< out: set to TRUE if the new
- inserted record maybe should inherit
- LOCK_GAP type locks from the successor
- record */
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate modify (update,
-delete mark, or delete unmark) of a clustered index record. If they do,
-first tests if the query thread should anyway be suspended for some
-reason; if not, then puts the transaction and the query thread to the
-lock wait state and inserts a waiting request for a record x-lock to the
-lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_clust_rec_modify_check_and_lock(
-/*=================================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record which should be
- modified */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate modify
-(delete mark or delete unmark) of a secondary index record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_sec_rec_modify_check_and_lock(
-/*===============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- buf_block_t* block, /*!< in/out: buffer block of rec */
- const rec_t* rec, /*!< in: record which should be
- modified; NOTE: as this is a secondary
- index, we always have to modify the
- clustered index record first: see the
- comment below */
- dict_index_t* index, /*!< in: secondary index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/*********************************************************************//**
-Like the counterpart for a clustered index below, but now we read a
-secondary index record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_sec_rec_read_check_and_lock(
-/*=============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_clust_rec_read_check_and_lock(
-/*===============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. This is an alternative version of
-lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets".
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_clust_rec_read_check_and_lock_alt(
-/*===================================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: clustered index */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-Checks that a record is seen in a consistent read.
-@return TRUE if sees, or FALSE if an earlier version of the record
-should be retrieved */
-UNIV_INTERN
-ibool
-lock_clust_rec_cons_read_sees(
-/*==========================*/
- const rec_t* rec, /*!< in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- read_view_t* view); /*!< in: consistent read view */
-/*********************************************************************//**
-Checks that a non-clustered index record is seen in a consistent read.
-
-NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case FALSE, the present version of
-rec may be the right, but we must check this from the clustered index
-record.
-
-@return TRUE if certainly sees, or FALSE if an earlier version of the
-clustered index record might be needed */
-UNIV_INTERN
-ulint
-lock_sec_rec_cons_read_sees(
-/*========================*/
- const rec_t* rec, /*!< in: user record which
- should be read or passed over
- by a read cursor */
- const read_view_t* view); /*!< in: consistent read view */
-/*********************************************************************//**
-Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_table(
-/*=======*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- dict_table_t* table, /*!< in: database table in dictionary cache */
- enum lock_mode mode, /*!< in: lock mode */
- que_thr_t* thr); /*!< in: query thread */
-/*************************************************************//**
-Removes a granted record lock of a transaction from the queue and grants
-locks to other transactions waiting in the queue if they now are entitled
-to a lock. */
-UNIV_INTERN
-void
-lock_rec_unlock(
-/*============*/
- trx_t* trx, /*!< in: transaction that has
- set a record lock */
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record */
- enum lock_mode lock_mode);/*!< in: LOCK_S or LOCK_X */
-/*********************************************************************//**
-Releases transaction locks, and releases possible other transactions waiting
-because of these locks. */
-UNIV_INTERN
-void
-lock_release_off_kernel(
-/*====================*/
- trx_t* trx); /*!< in: transaction */
-/*********************************************************************//**
-Cancels a waiting lock request and releases possible other transactions
-waiting behind it. */
-UNIV_INTERN
-void
-lock_cancel_waiting_and_release(
-/*============================*/
- lock_t* lock); /*!< in: waiting lock request */
-
-/*********************************************************************//**
-Removes locks on a table to be dropped or truncated.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
-void
-lock_remove_all_on_table(
-/*=====================*/
- dict_table_t* table, /*!< in: table to be dropped
- or truncated */
- ibool remove_also_table_sx_locks);/*!< in: also removes
- table S and X locks */
-
-/*********************************************************************//**
-Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table.
-@return folded value */
-UNIV_INLINE
-ulint
-lock_rec_fold(
-/*==========*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
- __attribute__((const));
-/*********************************************************************//**
-Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table.
-@return hashed value */
-UNIV_INLINE
-ulint
-lock_rec_hash(
-/*==========*/
- ulint space, /*!< in: space */
- ulint page_no);/*!< in: page number */
-
-/**********************************************************************//**
-Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found.
-@return bit index == heap number of the record, or ULINT_UNDEFINED if
-none found */
-UNIV_INTERN
-ulint
-lock_rec_find_set_bit(
-/*==================*/
- const lock_t* lock); /*!< in: record lock with at least one
- bit set */
-
-/*********************************************************************//**
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock.
-@return the source table of transaction, if it is covered by an IX or
-IS table lock; dest if there is no source table, and NULL if the
-transaction is locking more than two tables or an inconsistency is
-found */
-UNIV_INTERN
-dict_table_t*
-lock_get_src_table(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* dest, /*!< in: destination of ALTER TABLE */
- enum lock_mode* mode); /*!< out: lock mode of the source table */
-/*********************************************************************//**
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table.
-@return TRUE if table is only locked by trx, with LOCK_IX, and
-possibly LOCK_AUTO_INC */
-UNIV_INTERN
-ibool
-lock_is_table_exclusive(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- trx_t* trx); /*!< in: transaction */
-/*********************************************************************//**
-Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
-ibool
-lock_has_to_wait(
-/*=============*/
- const lock_t* lock1, /*!< in: waiting lock */
- const lock_t* lock2); /*!< in: another lock; NOTE that it is
- assumed that this has a lock bit set
- on the same record as in lock1 if the
- locks are record locks */
-/*********************************************************************//**
-Checks that a transaction id is sensible, i.e., not in the future.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-lock_check_trx_id_sanity(
-/*=====================*/
- trx_id_t trx_id, /*!< in: trx id */
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- ibool has_kernel_mutex);/*!< in: TRUE if the caller owns the
- kernel mutex */
-/*********************************************************************//**
-Prints info of a table lock. */
-UNIV_INTERN
-void
-lock_table_print(
-/*=============*/
- FILE* file, /*!< in: file where to print */
- const lock_t* lock); /*!< in: table type lock */
-/*********************************************************************//**
-Prints info of a record lock. */
-UNIV_INTERN
-void
-lock_rec_print(
-/*===========*/
- FILE* file, /*!< in: file where to print */
- const lock_t* lock); /*!< in: record type lock */
-/*********************************************************************//**
-Prints info of locks for all transactions. */
-UNIV_INTERN
-void
-lock_print_info_summary(
-/*====================*/
- FILE* file); /*!< in: file where to print */
-/*********************************************************************//**
-Prints info of locks for each transaction. */
-UNIV_INTERN
-void
-lock_print_info_all_transactions(
-/*=============================*/
- FILE* file); /*!< in: file where to print */
-/*********************************************************************//**
-Return approximate number or record locks (bits set in the bitmap) for
-this transaction. Since delete-marked records may be removed, the
-record count will not be precise. */
-UNIV_INTERN
-ulint
-lock_number_of_rows_locked(
-/*=======================*/
- trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Check if a transaction holds any autoinc locks.
-@return TRUE if the transaction holds any AUTOINC locks. */
-UNIV_INTERN
-ibool
-lock_trx_holds_autoinc_locks(
-/*=========================*/
- const trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Release all the transaction's autoinc locks. */
-UNIV_INTERN
-void
-lock_release_autoinc_locks(
-/*=======================*/
- trx_t* trx); /*!< in/out: transaction */
-
-/*******************************************************************//**
-Gets the type of a lock. Non-inline version for using outside of the
-lock module.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
-ulint
-lock_get_type(
-/*==========*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the id of the transaction owning a lock.
-@return transaction id */
-UNIV_INTERN
-ullint
-lock_get_trx_id(
-/*============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the mode of a lock in a human readable string.
-The string should not be free()'d or modified.
-@return lock mode */
-UNIV_INTERN
-const char*
-lock_get_mode_str(
-/*==============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the type of a lock in a human readable string.
-The string should not be free()'d or modified.
-@return lock type */
-UNIV_INTERN
-const char*
-lock_get_type_str(
-/*==============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the id of the table on which the lock is.
-@return id of the table */
-UNIV_INTERN
-ullint
-lock_get_table_id(
-/*==============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return name of the table */
-UNIV_INTERN
-const char*
-lock_get_table_name(
-/*================*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-For a record lock, gets the index on which the lock is.
-@return index */
-UNIV_INTERN
-const dict_index_t*
-lock_rec_get_index(
-/*===============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-For a record lock, gets the name of the index on which the lock is.
-The string should not be free()'d or modified.
-@return name of the index */
-UNIV_INTERN
-const char*
-lock_rec_get_index_name(
-/*====================*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-For a record lock, gets the tablespace number on which the lock is.
-@return tablespace number */
-UNIV_INTERN
-ulint
-lock_rec_get_space_id(
-/*==================*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-For a record lock, gets the page number on which the lock is.
-@return page number */
-UNIV_INTERN
-ulint
-lock_rec_get_page_no(
-/*=================*/
- const lock_t* lock); /*!< in: lock */
-
-/** Lock modes and types */
-/* @{ */
-#define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the
- type_mode field in a lock */
-/** Lock types */
-/* @{ */
-#define LOCK_TABLE 16 /*!< table lock */
-#define LOCK_REC 32 /*!< record lock */
-#define LOCK_TYPE_MASK 0xF0UL /*!< mask used to extract lock type from the
- type_mode field in a lock */
-#if LOCK_MODE_MASK & LOCK_TYPE_MASK
-# error "LOCK_MODE_MASK & LOCK_TYPE_MASK"
-#endif
-
-#define LOCK_WAIT 256 /*!< Waiting lock flag; when set, it
- means that the lock has not yet been
- granted, it is just waiting for its
- turn in the wait queue */
-/* Precise modes */
-#define LOCK_ORDINARY 0 /*!< this flag denotes an ordinary
- next-key lock in contrast to LOCK_GAP
- or LOCK_REC_NOT_GAP */
-#define LOCK_GAP 512 /*!< when this bit is set, it means that the
- lock holds only on the gap before the record;
- for instance, an x-lock on the gap does not
- give permission to modify the record on which
- the bit is set; locks of this type are created
- when records are removed from the index chain
- of records */
-#define LOCK_REC_NOT_GAP 1024 /*!< this bit means that the lock is only on
- the index record and does NOT block inserts
- to the gap before the index record; this is
- used in the case when we retrieve a record
- with a unique key, and is also used in
- locking plain SELECTs (not part of UPDATE
- or DELETE) when the user has set the READ
- COMMITTED isolation level */
-#define LOCK_INSERT_INTENTION 2048 /*!< this bit is set when we place a waiting
- gap type record lock request in order to let
- an insert of an index record to wait until
- there are no conflicting locks by other
- transactions on the gap; note that this flag
- remains set when the waiting lock is granted,
- or if the lock is inherited to a neighboring
- record */
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
-# error
-#endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
-# error
-#endif
-/* @} */
-
-/** Lock operation struct */
-typedef struct lock_op_struct lock_op_t;
-/** Lock operation struct */
-struct lock_op_struct{
- dict_table_t* table; /*!< table to be locked */
- enum lock_mode mode; /*!< lock mode */
-};
-
-/** The lock system struct */
-struct lock_sys_struct{
- hash_table_t* rec_hash; /*!< hash table of the record locks */
-};
-
-/** The lock system */
-extern lock_sys_t* lock_sys;
-
-
-#ifndef UNIV_NONINL
-#include "lock0lock.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/lock0lock.ic b/storage/innodb_plugin/include/lock0lock.ic
deleted file mode 100644
index 014722f51c4..00000000000
--- a/storage/innodb_plugin/include/lock0lock.ic
+++ /dev/null
@@ -1,121 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0lock.ic
-The transaction lock system
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#include "srv0srv.h"
-#include "dict0dict.h"
-#include "row0row.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
-#include "buf0buf.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "row0vers.h"
-#include "que0que.h"
-#include "btr0cur.h"
-#include "read0read.h"
-#include "log0recv.h"
-
-/*********************************************************************//**
-Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table.
-@return folded value */
-UNIV_INLINE
-ulint
-lock_rec_fold(
-/*==========*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- return(ut_fold_ulint_pair(space, page_no));
-}
-
-/*********************************************************************//**
-Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table.
-@return hashed value */
-UNIV_INLINE
-ulint
-lock_rec_hash(
-/*==========*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- return(hash_calc_hash(lock_rec_fold(space, page_no),
- lock_sys->rec_hash));
-}
-
-/*********************************************************************//**
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index.
-@return transaction which has the x-lock, or NULL */
-UNIV_INLINE
-trx_t*
-lock_clust_rec_some_has_impl(
-/*=========================*/
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- trx_id_t trx_id;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(dict_index_is_clust(index));
- ut_ad(page_rec_is_user_rec(rec));
-
- trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- if (trx_is_active(trx_id)) {
- /* The modifying or inserting transaction is active */
-
- return(trx_get_on_id(trx_id));
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Gets the heap_no of the smallest user record on a page.
-@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
-UNIV_INLINE
-ulint
-lock_get_min_heap_no(
-/*=================*/
- const buf_block_t* block) /*!< in: buffer block */
-{
- const page_t* page = block->frame;
-
- if (page_is_comp(page)) {
- return(rec_get_heap_no_new(
- page
- + rec_get_next_offs(page + PAGE_NEW_INFIMUM,
- TRUE)));
- } else {
- return(rec_get_heap_no_old(
- page
- + rec_get_next_offs(page + PAGE_OLD_INFIMUM,
- FALSE)));
- }
-}
diff --git a/storage/innodb_plugin/include/lock0priv.h b/storage/innodb_plugin/include/lock0priv.h
deleted file mode 100644
index 287c151b19f..00000000000
--- a/storage/innodb_plugin/include/lock0priv.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0priv.h
-Lock module internal structures and methods.
-
-Created July 12, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef lock0priv_h
-#define lock0priv_h
-
-#ifndef LOCK_MODULE_IMPLEMENTATION
-/* If you need to access members of the structures defined in this
-file, please write appropriate functions that retrieve them and put
-those functions in lock/ */
-#error Do not include lock0priv.h outside of the lock/ module
-#endif
-
-#include "univ.i"
-#include "dict0types.h"
-#include "hash0hash.h"
-#include "trx0types.h"
-#include "ut0lst.h"
-
-/** A table lock */
-typedef struct lock_table_struct lock_table_t;
-/** A table lock */
-struct lock_table_struct {
- dict_table_t* table; /*!< database table in dictionary
- cache */
- UT_LIST_NODE_T(lock_t)
- locks; /*!< list of locks on the same
- table */
-};
-
-/** Record lock for a page */
-typedef struct lock_rec_struct lock_rec_t;
-/** Record lock for a page */
-struct lock_rec_struct {
- ulint space; /*!< space id */
- ulint page_no; /*!< page number */
- ulint n_bits; /*!< number of bits in the lock
- bitmap; NOTE: the lock bitmap is
- placed immediately after the
- lock struct */
-};
-
-/** Lock struct */
-struct lock_struct {
- trx_t* trx; /*!< transaction owning the
- lock */
- UT_LIST_NODE_T(lock_t)
- trx_locks; /*!< list of the locks of the
- transaction */
- ulint type_mode; /*!< lock type, mode, LOCK_GAP or
- LOCK_REC_NOT_GAP,
- LOCK_INSERT_INTENTION,
- wait flag, ORed */
- hash_node_t hash; /*!< hash chain node for a record
- lock */
- dict_index_t* index; /*!< index for a record lock */
- union {
- lock_table_t tab_lock;/*!< table lock */
- lock_rec_t rec_lock;/*!< record lock */
- } un_member; /*!< lock details */
-};
-
-/*********************************************************************//**
-Gets the type of a lock.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INLINE
-ulint
-lock_get_type_low(
-/*==============*/
- const lock_t* lock); /*!< in: lock */
-
-/*********************************************************************//**
-Gets the previous record lock set on a record.
-@return previous lock on the same record, NULL if none exists */
-UNIV_INTERN
-const lock_t*
-lock_rec_get_prev(
-/*==============*/
- const lock_t* in_lock,/*!< in: record lock */
- ulint heap_no);/*!< in: heap number of the record */
-
-#ifndef UNIV_NONINL
-#include "lock0priv.ic"
-#endif
-
-#endif /* lock0priv_h */
diff --git a/storage/innodb_plugin/include/lock0priv.ic b/storage/innodb_plugin/include/lock0priv.ic
deleted file mode 100644
index 30447c99848..00000000000
--- a/storage/innodb_plugin/include/lock0priv.ic
+++ /dev/null
@@ -1,49 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0priv.ic
-Lock module internal inline methods.
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-/* This file contains only methods which are used in
-lock/lock0* files, other than lock/lock0lock.c.
-I.e. lock/lock0lock.c contains more internal inline
-methods but they are used only in that file. */
-
-#ifndef LOCK_MODULE_IMPLEMENTATION
-#error Do not include lock0priv.ic outside of the lock/ module
-#endif
-
-/*********************************************************************//**
-Gets the type of a lock.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INLINE
-ulint
-lock_get_type_low(
-/*==============*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock);
-
- return(lock->type_mode & LOCK_TYPE_MASK);
-}
-
-/* vim: set filetype=c: */
diff --git a/storage/innodb_plugin/include/lock0types.h b/storage/innodb_plugin/include/lock0types.h
deleted file mode 100644
index 45f29e90fe9..00000000000
--- a/storage/innodb_plugin/include/lock0types.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0types.h
-The transaction lock system global types
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef lock0types_h
-#define lock0types_h
-
-#define lock_t ib_lock_t
-typedef struct lock_struct lock_t;
-typedef struct lock_sys_struct lock_sys_t;
-
-/* Basic lock modes */
-enum lock_mode {
- LOCK_IS = 0, /* intention shared */
- LOCK_IX, /* intention exclusive */
- LOCK_S, /* shared */
- LOCK_X, /* exclusive */
- LOCK_AUTO_INC, /* locks the auto-inc counter of a table
- in an exclusive mode */
- LOCK_NONE, /* this is used elsewhere to note consistent read */
- LOCK_NUM = LOCK_NONE/* number of lock modes */
-};
-
-#endif
diff --git a/storage/innodb_plugin/include/log0log.h b/storage/innodb_plugin/include/log0log.h
deleted file mode 100644
index 135aeb69e2d..00000000000
--- a/storage/innodb_plugin/include/log0log.h
+++ /dev/null
@@ -1,976 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0log.h
-Database log
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef log0log_h
-#define log0log_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "ut0lst.h"
-#ifndef UNIV_HOTBACKUP
-#include "sync0sync.h"
-#include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-
-/** Redo log buffer */
-typedef struct log_struct log_t;
-/** Redo log group */
-typedef struct log_group_struct log_group_t;
-
-#ifdef UNIV_DEBUG
-/** Flag: write to log file? */
-extern ibool log_do_write;
-/** Flag: enable debug output when writing to the log? */
-extern ibool log_debug_writes;
-#else /* UNIV_DEBUG */
-/** Write to log */
-# define log_do_write TRUE
-#endif /* UNIV_DEBUG */
-
-/** Wait modes for log_write_up_to @{ */
-#define LOG_NO_WAIT 91
-#define LOG_WAIT_ONE_GROUP 92
-#define LOG_WAIT_ALL_GROUPS 93
-/* @} */
-/** Maximum number of log groups in log_group_struct::checkpoint_buf */
-#define LOG_MAX_N_GROUPS 32
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
-so that we know that the limit has been written to a log checkpoint field
-on disk. */
-UNIV_INTERN
-void
-log_fsp_current_free_limit_set_and_checkpoint(
-/*==========================================*/
- ulint limit); /*!< in: limit to set */
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Calculates where in log files we find a specified lsn.
-@return log file number */
-UNIV_INTERN
-ulint
-log_calc_where_lsn_is(
-/*==================*/
- ib_int64_t* log_file_offset, /*!< out: offset in that file
- (including the header) */
- ib_uint64_t first_header_lsn, /*!< in: first log file start
- lsn */
- ib_uint64_t lsn, /*!< in: lsn whose position to
- determine */
- ulint n_log_files, /*!< in: total number of log
- files */
- ib_int64_t log_file_size); /*!< in: log file size
- (including the header) */
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return end lsn of the log record, zero if did not succeed */
-UNIV_INLINE
-ib_uint64_t
-log_reserve_and_write_fast(
-/*=======================*/
- const void* str, /*!< in: string */
- ulint len, /*!< in: string length */
- ib_uint64_t* start_lsn);/*!< out: start lsn of the log record */
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void);
-/*=============*/
-/***********************************************************************//**
-Checks if there is need for a log buffer flush or a new checkpoint, and does
-this if yes. Any database operation should call this when it has modified
-more than about 4 pages. NOTE that this function may only be called when the
-OS thread owns no synchronization objects except the dictionary mutex. */
-UNIV_INLINE
-void
-log_free_check(void);
-/*================*/
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release.
-@return start lsn of the log record */
-UNIV_INTERN
-ib_uint64_t
-log_reserve_and_open(
-/*=================*/
- ulint len); /*!< in: length of data to be catenated */
-/************************************************************//**
-Writes to the log the string given. It is assumed that the caller holds the
-log mutex. */
-UNIV_INTERN
-void
-log_write_low(
-/*==========*/
- byte* str, /*!< in: string */
- ulint str_len); /*!< in: string length */
-/************************************************************//**
-Closes the log.
-@return lsn */
-UNIV_INTERN
-ib_uint64_t
-log_close(void);
-/*===========*/
-/************************************************************//**
-Gets the current lsn.
-@return current lsn */
-UNIV_INLINE
-ib_uint64_t
-log_get_lsn(void);
-/*=============*/
-/****************************************************************
-Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
-@return log group capacity */
-UNIV_INLINE
-ulint
-log_get_capacity(void);
-/*==================*/
-/******************************************************//**
-Initializes the log. */
-UNIV_INTERN
-void
-log_init(void);
-/*==========*/
-/******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
-void
-log_group_init(
-/*===========*/
- ulint id, /*!< in: group id */
- ulint n_files, /*!< in: number of log files */
- ulint file_size, /*!< in: log file size in bytes */
- ulint space_id, /*!< in: space id of the file space
- which contains the log files of this
- group */
- ulint archive_space_id); /*!< in: space id of the file space
- which contains some archived log
- files for this group; currently, only
- for the first log group this is
- used */
-/******************************************************//**
-Completes an i/o to a log file. */
-UNIV_INTERN
-void
-log_io_complete(
-/*============*/
- log_group_t* group); /*!< in: log group */
-/******************************************************//**
-This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been written to the log file up to the last log entry written
-by the transaction. If there is a flush running, it waits and checks if the
-flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
-void
-log_write_up_to(
-/*============*/
- ib_uint64_t lsn, /*!< in: log sequence number up to which
- the log should be written,
- IB_ULONGLONG_MAX if not specified */
- ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk);
- /*!< in: TRUE if we want the written log
- also to be flushed to disk */
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
-void
-log_buffer_flush_to_disk(void);
-/*==========================*/
-/****************************************************************//**
-This functions writes the log buffer to the log file and if 'flush'
-is set it forces a flush of the log file as well. This is meant to be
-called from background master thread only as it does not wait for
-the write (+ possible flush) to finish. */
-UNIV_INTERN
-void
-log_buffer_sync_in_background(
-/*==========================*/
- ibool flush); /*<! in: flush the logs to disk */
-/****************************************************************//**
-Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool and also may make a new checkpoint. NOTE: this function may only
-be called if the calling thread owns no synchronization objects!
-@return FALSE if there was a flush batch of the same type running,
-which means that we could not start this flush batch */
-UNIV_INTERN
-ibool
-log_preflush_pool_modified_pages(
-/*=============================*/
- ib_uint64_t new_oldest, /*!< in: try to advance
- oldest_modified_lsn at least
- to this lsn */
- ibool sync); /*!< in: TRUE if synchronous
- operation is desired */
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
-log_checkpoint(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is
- desired */
- ibool write_always); /*!< in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
-void
-log_make_checkpoint_at(
-/*===================*/
- ib_uint64_t lsn, /*!< in: make a checkpoint at this or a
- later lsn, if IB_ULONGLONG_MAX, makes
- a checkpoint at the latest lsn */
- ibool write_always); /*!< in: the function normally checks if
- the new checkpoint would have a
- greater lsn than the previous one: if
- not, then no physical write is done;
- by setting this parameter TRUE, a
- physical write will always be made to
- log files */
-/****************************************************************//**
-Makes a checkpoint at the latest lsn and writes it to first page of each
-data file in the database, so that we know that the file spaces contain
-all modifications up to that lsn. This can only be called at database
-shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
-void
-logs_empty_and_mark_files_at_shutdown(void);
-/*=======================================*/
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
-void
-log_group_read_checkpoint_info(
-/*===========================*/
- log_group_t* group, /*!< in: log group */
- ulint field); /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
- const byte* buf, /*!< in: buffer containing checkpoint info */
- ulint n, /*!< in: nth slot */
- ulint* file_no,/*!< out: archived file number */
- ulint* offset);/*!< out: archived file offset */
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
-void
-log_groups_write_checkpoint_info(void);
-/*==================================*/
-/********************************************************************//**
-Starts an archiving operation.
-@return TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is desired */
- ulint* n_bytes);/*!< out: archive log buffer size, 0 if nothing to
- archive */
-/****************************************************************//**
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from a number one higher, so that the archiving will
-not write again to the archived log files which exist when this function
-returns.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_stop(void);
-/*==================*/
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void);
-/*===================*/
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void);
-/*==========================*/
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void);
-/*========================*/
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
-void
-log_archived_file_name_gen(
-/*=======================*/
- char* buf, /*!< in: buffer where to write */
- ulint id, /*!< in: group id */
- ulint file_no);/*!< in: file number */
-#else /* !UNIV_HOTBACKUP */
-/******************************************************//**
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-UNIV_INTERN
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/*!< in: buffer which will be written to the
- start of the first log file */
- ib_uint64_t start); /*!< in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Checks that there is enough free space in the log to start a new query step.
-Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
-function may only be called if the calling thread owns no synchronization
-objects! */
-UNIV_INTERN
-void
-log_check_margins(void);
-/*===================*/
-#ifndef UNIV_HOTBACKUP
-/******************************************************//**
-Reads a specified log segment to a buffer. */
-UNIV_INTERN
-void
-log_group_read_log_seg(
-/*===================*/
- ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /*!< in: buffer where to read */
- log_group_t* group, /*!< in: log group */
- ib_uint64_t start_lsn, /*!< in: read area start */
- ib_uint64_t end_lsn); /*!< in: read area end */
-/******************************************************//**
-Writes a buffer to a log file group. */
-UNIV_INTERN
-void
-log_group_write_buf(
-/*================*/
- log_group_t* group, /*!< in: log group */
- byte* buf, /*!< in: buffer */
- ulint len, /*!< in: buffer len; must be divisible
- by OS_FILE_LOG_BLOCK_SIZE */
- ib_uint64_t start_lsn, /*!< in: start lsn of the buffer; must
- be divisible by
- OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset);/*!< in: start offset of new data in
- buf: this parameter is used to decide
- if we have to write a new log file
- header */
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
-void
-log_group_set_fields(
-/*=================*/
- log_group_t* group, /*!< in/out: group */
- ib_uint64_t lsn); /*!< in: lsn for which the values should be
- set */
-/******************************************************//**
-Calculates the data capacity of a log group, when the log file headers are not
-included.
-@return capacity in bytes */
-UNIV_INTERN
-ulint
-log_group_get_capacity(
-/*===================*/
- const log_group_t* group); /*!< in: log group */
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************//**
-Gets a log block flush bit.
-@return TRUE if this block was the first to be written in a log flush */
-UNIV_INLINE
-ibool
-log_block_get_flush_bit(
-/*====================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Gets a log block number stored in the header.
-@return log block number stored in the block header */
-UNIV_INLINE
-ulint
-log_block_get_hdr_no(
-/*=================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Gets a log block data length.
-@return log block data length measured as a byte offset from the block start */
-UNIV_INLINE
-ulint
-log_block_get_data_len(
-/*===================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets the log block data length. */
-UNIV_INLINE
-void
-log_block_set_data_len(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint len); /*!< in: data length */
-/************************************************************//**
-Calculates the checksum for a log block.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_calc_checksum(
-/*====================*/
- const byte* block); /*!< in: log block */
-/************************************************************//**
-Gets a log block checksum field value.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_get_checksum(
-/*===================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets a log block checksum field value. */
-UNIV_INLINE
-void
-log_block_set_checksum(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint checksum); /*!< in: checksum */
-/************************************************************//**
-Gets a log block first mtr log record group offset.
-@return first mtr log record group byte offset from the block start, 0
-if none */
-UNIV_INLINE
-ulint
-log_block_get_first_rec_group(
-/*==========================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets the log block first mtr log record group offset. */
-UNIV_INLINE
-void
-log_block_set_first_rec_group(
-/*==========================*/
- byte* log_block, /*!< in/out: log block */
- ulint offset); /*!< in: offset, 0 if none */
-/************************************************************//**
-Gets a log block checkpoint number field (4 lowest bytes).
-@return checkpoint no (4 lowest bytes) */
-UNIV_INLINE
-ulint
-log_block_get_checkpoint_no(
-/*========================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Initializes a log block in the log buffer. */
-UNIV_INLINE
-void
-log_block_init(
-/*===========*/
- byte* log_block, /*!< in: pointer to the log buffer */
- ib_uint64_t lsn); /*!< in: lsn within the log block */
-/************************************************************//**
-Initializes a log block in the log buffer in the old, < 3.23.52 format, where
-there was no checksum yet. */
-UNIV_INLINE
-void
-log_block_init_in_old_format(
-/*=========================*/
- byte* log_block, /*!< in: pointer to the log buffer */
- ib_uint64_t lsn); /*!< in: lsn within the log block */
-/************************************************************//**
-Converts a lsn to a log block number.
-@return log block number, it is > 0 and <= 1G */
-UNIV_INLINE
-ulint
-log_block_convert_lsn_to_no(
-/*========================*/
- ib_uint64_t lsn); /*!< in: lsn of a byte within the block */
-/******************************************************//**
-Prints info of the log. */
-UNIV_INTERN
-void
-log_print(
-/*======*/
- FILE* file); /*!< in: file where to print */
-/******************************************************//**
-Peeks the current lsn.
-@return TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
-ibool
-log_peek_lsn(
-/*=========*/
- ib_uint64_t* lsn); /*!< out: if returns TRUE, current lsn is here */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-log_refresh_stats(void);
-/*===================*/
-/**********************************************************
-Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
-void
-log_shutdown(void);
-/*==============*/
-/**********************************************************
-Free the log system data structures. */
-UNIV_INTERN
-void
-log_mem_free(void);
-/*==============*/
-
-extern log_t* log_sys;
-
-/* Values used as flags */
-#define LOG_FLUSH 7652559
-#define LOG_CHECKPOINT 78656949
-#ifdef UNIV_LOG_ARCHIVE
-# define LOG_ARCHIVE 11122331
-#endif /* UNIV_LOG_ARCHIVE */
-#define LOG_RECOVER 98887331
-
-/* The counting of lsn's starts from this value: this must be non-zero */
-#define LOG_START_LSN ((ib_uint64_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
-
-#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE)
-#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
-
-/* Offsets of a log block header */
-#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and
- is allowed to wrap around at 2G; the
- highest bit is set to 1 if this is the
- first log block in a log flush write
- segment */
-#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL
- /* mask used to get the highest bit in
- the preceding field */
-#define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to
- this block */
-#define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an
- mtr log record group in this log block,
- 0 if none; if the value is the same
- as LOG_BLOCK_HDR_DATA_LEN, it means
- that the first rec group has not yet
- been catenated to this log block, but
- if it will, it will start at this
- offset; an archive recovery can
- start parsing the log records starting
- from this offset in this log block,
- if value not 0 */
-#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of
- log_sys->next_checkpoint_no when the
- log block was last written to: if the
- block has not yet been written full,
- this value is only updated before a
- log buffer flush */
-#define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in
- bytes */
-
-/* Offsets of a log block trailer from the end of the block */
-#define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block
- contents; in InnoDB versions
- < 3.23.52 this did not contain the
- checksum but the same value as
- .._HDR_NO */
-#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */
-
-/* Offsets for a checkpoint field */
-#define LOG_CHECKPOINT_NO 0
-#define LOG_CHECKPOINT_LSN 8
-#define LOG_CHECKPOINT_OFFSET 16
-#define LOG_CHECKPOINT_LOG_BUF_SIZE 20
-#define LOG_CHECKPOINT_ARCHIVED_LSN 24
-#define LOG_CHECKPOINT_GROUP_ARRAY 32
-
-/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */
-
-#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0
-#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4
-
-#define LOG_CHECKPOINT_ARRAY_END (LOG_CHECKPOINT_GROUP_ARRAY\
- + LOG_MAX_N_GROUPS * 8)
-#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END
-#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END)
-#define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END)
- /* current fsp free limit in
- tablespace 0, in units of one
- megabyte; this information is only used
- by ibbackup to decide if it can
- truncate unused ends of
- non-auto-extending data files in space
- 0 */
-#define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END)
- /* this magic number tells if the
- checkpoint contains the above field:
- the field was added to
- InnoDB-3.23.50 */
-#define LOG_CHECKPOINT_SIZE (16 + LOG_CHECKPOINT_ARRAY_END)
-
-#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243
-
-/* Offsets of a log file header */
-#define LOG_GROUP_ID 0 /* log group number */
-#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this
- log file */
-#define LOG_FILE_NO 12 /* 4-byte archived log file number;
- this field is only defined in an
- archived log file */
-#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
- /* a 32-byte field which contains
- the string 'ibbackup' and the
- creation time if the log file was
- created by ibbackup --restore;
- when mysqld is first time started
- on the restored database, it can
- print helpful info for the user */
-#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
- /* this 4-byte field is TRUE when
- the writing of an archived log file
- has been completed; this field is
- only defined in an archived log file */
-#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4)
- /* lsn where the archived log file
- at least extends: actually the
- archived log file may extend to a
- later lsn, as long as it is within the
- same log block as this lsn; this field
- is defined only when an archived log
- file has been completely written */
-#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
- /* first checkpoint field in the log
- header; we write alternately to the
- checkpoint fields when we make new
- checkpoints; this field is only defined
- in the first log file of a log group */
-#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE)
- /* second checkpoint field in the log
- header */
-#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
-
-#define LOG_GROUP_OK 301
-#define LOG_GROUP_CORRUPTED 302
-
-/** Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil. */
-struct log_group_struct{
- /* The following fields are protected by log_sys->mutex */
- ulint id; /*!< log group id */
- ulint n_files; /*!< number of files in the group */
- ulint file_size; /*!< individual log file size in bytes,
- including the log file header */
- ulint space_id; /*!< file space which implements the log
- group */
- ulint state; /*!< LOG_GROUP_OK or
- LOG_GROUP_CORRUPTED */
- ib_uint64_t lsn; /*!< lsn used to fix coordinates within
- the log group */
- ulint lsn_offset; /*!< the offset of the above lsn */
- ulint n_pending_writes;/*!< number of currently pending flush
- writes for this log group */
- byte** file_header_bufs_ptr;/*!< unaligned buffers */
- byte** file_header_bufs;/*!< buffers for each file
- header in the group */
-#ifdef UNIV_LOG_ARCHIVE
- /*-----------------------------*/
- byte** archive_file_header_bufs_ptr;/*!< unaligned buffers */
- byte** archive_file_header_bufs;/*!< buffers for each file
- header in the group */
- ulint archive_space_id;/*!< file space which
- implements the log group
- archive */
- ulint archived_file_no;/*!< file number corresponding to
- log_sys->archived_lsn */
- ulint archived_offset;/*!< file offset corresponding to
- log_sys->archived_lsn, 0 if we have
- not yet written to the archive file
- number archived_file_no */
- ulint next_archived_file_no;/*!< during an archive write,
- until the write is completed, we
- store the next value for
- archived_file_no here: the write
- completion function then sets the new
- value to ..._file_no */
- ulint next_archived_offset; /*!< like the preceding field */
-#endif /* UNIV_LOG_ARCHIVE */
- /*-----------------------------*/
- ib_uint64_t scanned_lsn; /*!< used only in recovery: recovery scan
- succeeded up to this lsn in this log
- group */
- byte* checkpoint_buf_ptr;/*!< unaligned checkpoint header */
- byte* checkpoint_buf; /*!< checkpoint header is written from
- this buffer to the group */
- UT_LIST_NODE_T(log_group_t)
- log_groups; /*!< list of log groups */
-};
-
-/** Redo log buffer */
-struct log_struct{
- byte pad[64]; /*!< padding to prevent other memory
- update hotspots from residing on the
- same memory cache line */
- ib_uint64_t lsn; /*!< log sequence number */
- ulint buf_free; /*!< first free offset within the log
- buffer */
-#ifndef UNIV_HOTBACKUP
- mutex_t mutex; /*!< mutex protecting the log */
-#endif /* !UNIV_HOTBACKUP */
- byte* buf_ptr; /* unaligned log buffer */
- byte* buf; /*!< log buffer */
- ulint buf_size; /*!< log buffer size in bytes */
- ulint max_buf_free; /*!< recommended maximum value of
- buf_free, after which the buffer is
- flushed */
- ulint old_buf_free; /*!< value of buf free when log was
- last time opened; only in the debug
- version */
- ib_uint64_t old_lsn; /*!< value of lsn when log was
- last time opened; only in the
- debug version */
- ibool check_flush_or_checkpoint;
- /*!< this is set to TRUE when there may
- be need to flush the log buffer, or
- preflush buffer pool pages, or make
- a checkpoint; this MUST be TRUE when
- lsn - last_checkpoint_lsn >
- max_checkpoint_age; this flag is
- peeked at by log_free_check(), which
- does not reserve the log mutex */
- UT_LIST_BASE_NODE_T(log_group_t)
- log_groups; /*!< log groups */
-
-#ifndef UNIV_HOTBACKUP
- /** The fields involved in the log buffer flush @{ */
-
- ulint buf_next_to_write;/*!< first offset in the log buffer
- where the byte content may not exist
- written to file, e.g., the start
- offset of a log record catenated
- later; this is advanced when a flush
- operation is completed to all the log
- groups */
- ib_uint64_t written_to_some_lsn;
- /*!< first log sequence number not yet
- written to any log group; for this to
- be advanced, it is enough that the
- write i/o has been completed for any
- one log group */
- ib_uint64_t written_to_all_lsn;
- /*!< first log sequence number not yet
- written to some log group; for this to
- be advanced, it is enough that the
- write i/o has been completed for all
- log groups */
- ib_uint64_t write_lsn; /*!< end lsn for the current running
- write */
- ulint write_end_offset;/*!< the data in buffer has
- been written up to this offset
- when the current write ends:
- this field will then be copied
- to buf_next_to_write */
- ib_uint64_t current_flush_lsn;/*!< end lsn for the current running
- write + flush operation */
- ib_uint64_t flushed_to_disk_lsn;
- /*!< how far we have written the log
- AND flushed to disk */
- ulint n_pending_writes;/*!< number of currently
- pending flushes or writes */
- /* NOTE on the 'flush' in names of the fields below: starting from
- 4.0.14, we separate the write of the log file and the actual fsync()
- or other method to flush it to disk. The names below shhould really
- be 'flush_or_write'! */
- os_event_t no_flush_event; /*!< this event is in the reset state
- when a flush or a write is running;
- a thread should wait for this without
- owning the log mutex, but NOTE that
- to set or reset this event, the
- thread MUST own the log mutex! */
- ibool one_flushed; /*!< during a flush, this is
- first FALSE and becomes TRUE
- when one log group has been
- written or flushed */
- os_event_t one_flushed_event;/*!< this event is reset when the
- flush or write has not yet completed
- for any log group; e.g., this means
- that a transaction has been committed
- when this is set; a thread should wait
- for this without owning the log mutex,
- but NOTE that to set or reset this
- event, the thread MUST own the log
- mutex! */
- ulint n_log_ios; /*!< number of log i/os initiated thus
- far */
- ulint n_log_ios_old; /*!< number of log i/o's at the
- previous printout */
- time_t last_printout_time;/*!< when log_print was last time
- called */
- /* @} */
-
- /** Fields involved in checkpoints @{ */
- ulint log_group_capacity; /*!< capacity of the log group; if
- the checkpoint age exceeds this, it is
- a serious error because it is possible
- we will then overwrite log and spoil
- crash recovery */
- ulint max_modified_age_async;
- /*!< when this recommended
- value for lsn -
- buf_pool_get_oldest_modification()
- is exceeded, we start an
- asynchronous preflush of pool pages */
- ulint max_modified_age_sync;
- /*!< when this recommended
- value for lsn -
- buf_pool_get_oldest_modification()
- is exceeded, we start a
- synchronous preflush of pool pages */
- ulint adm_checkpoint_interval;
- /*!< administrator-specified checkpoint
- interval in terms of log growth in
- bytes; the interval actually used by
- the database can be smaller */
- ulint max_checkpoint_age_async;
- /*!< when this checkpoint age
- is exceeded we start an
- asynchronous writing of a new
- checkpoint */
- ulint max_checkpoint_age;
- /*!< this is the maximum allowed value
- for lsn - last_checkpoint_lsn when a
- new query step is started */
- ib_uint64_t next_checkpoint_no;
- /*!< next checkpoint number */
- ib_uint64_t last_checkpoint_lsn;
- /*!< latest checkpoint lsn */
- ib_uint64_t next_checkpoint_lsn;
- /*!< next checkpoint lsn */
- ulint n_pending_checkpoint_writes;
- /*!< number of currently pending
- checkpoint writes */
- rw_lock_t checkpoint_lock;/*!< this latch is x-locked when a
- checkpoint write is running; a thread
- should wait for this without owning
- the log mutex */
-#endif /* !UNIV_HOTBACKUP */
- byte* checkpoint_buf_ptr;/* unaligned checkpoint header */
- byte* checkpoint_buf; /*!< checkpoint header is read to this
- buffer */
- /* @} */
-#ifdef UNIV_LOG_ARCHIVE
- /** Fields involved in archiving @{ */
- ulint archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
- LOG_ARCH_STOPPED, LOG_ARCH_OFF */
- ib_uint64_t archived_lsn; /*!< archiving has advanced to this
- lsn */
- ulint max_archived_lsn_age_async;
- /*!< recommended maximum age of
- archived_lsn, before we start
- asynchronous copying to the archive */
- ulint max_archived_lsn_age;
- /*!< maximum allowed age for
- archived_lsn */
- ib_uint64_t next_archived_lsn;/*!< during an archive write,
- until the write is completed, we
- store the next value for
- archived_lsn here: the write
- completion function then sets the new
- value to archived_lsn */
- ulint archiving_phase;/*!< LOG_ARCHIVE_READ or
- LOG_ARCHIVE_WRITE */
- ulint n_pending_archive_ios;
- /*!< number of currently pending reads
- or writes in archiving */
- rw_lock_t archive_lock; /*!< this latch is x-locked when an
- archive write is running; a thread
- should wait for this without owning
- the log mutex */
- ulint archive_buf_size;/*!< size of archive_buf */
- byte* archive_buf; /*!< log segment is written to the
- archive from this buffer */
- os_event_t archiving_on; /*!< if archiving has been stopped,
- a thread can wait for this event to
- become signaled */
- /* @} */
-#endif /* UNIV_LOG_ARCHIVE */
-};
-
-#ifdef UNIV_LOG_ARCHIVE
-/** Archiving state @{ */
-#define LOG_ARCH_ON 71
-#define LOG_ARCH_STOPPING 72
-#define LOG_ARCH_STOPPING2 73
-#define LOG_ARCH_STOPPED 74
-#define LOG_ARCH_OFF 75
-/* @} */
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifndef UNIV_NONINL
-#include "log0log.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/log0log.ic b/storage/innodb_plugin/include/log0log.ic
deleted file mode 100644
index 36d151a3064..00000000000
--- a/storage/innodb_plugin/include/log0log.ic
+++ /dev/null
@@ -1,440 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0log.ic
-Database log
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0file.h"
-#include "mach0data.h"
-#include "mtr0mtr.h"
-
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
- const byte* buf, /*!< in: pointer to the start of
- the log segment in the
- log_sys->buf log buffer */
- ulint len, /*!< in: segment length in bytes */
- ib_uint64_t buf_start_lsn); /*!< in: buffer start lsn */
-#endif /* UNIV_LOG_DEBUG */
-
-/************************************************************//**
-Gets a log block flush bit.
-@return TRUE if this block was the first to be written in a log flush */
-UNIV_INLINE
-ibool
-log_block_get_flush_bit(
-/*====================*/
- const byte* log_block) /*!< in: log block */
-{
- if (LOG_BLOCK_FLUSH_BIT_MASK
- & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/************************************************************//**
-Sets the log block flush bit. */
-UNIV_INLINE
-void
-log_block_set_flush_bit(
-/*====================*/
- byte* log_block, /*!< in/out: log block */
- ibool val) /*!< in: value to set */
-{
- ulint field;
-
- field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO);
-
- if (val) {
- field = field | LOG_BLOCK_FLUSH_BIT_MASK;
- } else {
- field = field & ~LOG_BLOCK_FLUSH_BIT_MASK;
- }
-
- mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field);
-}
-
-/************************************************************//**
-Gets a log block number stored in the header.
-@return log block number stored in the block header */
-UNIV_INLINE
-ulint
-log_block_get_hdr_no(
-/*=================*/
- const byte* log_block) /*!< in: log block */
-{
- return(~LOG_BLOCK_FLUSH_BIT_MASK
- & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO));
-}
-
-/************************************************************//**
-Sets the log block number stored in the header; NOTE that this must be set
-before the flush bit! */
-UNIV_INLINE
-void
-log_block_set_hdr_no(
-/*=================*/
- byte* log_block, /*!< in/out: log block */
- ulint n) /*!< in: log block number: must be > 0 and
- < LOG_BLOCK_FLUSH_BIT_MASK */
-{
- ut_ad(n > 0);
- ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK);
-
- mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n);
-}
-
-/************************************************************//**
-Gets a log block data length.
-@return log block data length measured as a byte offset from the block start */
-UNIV_INLINE
-ulint
-log_block_get_data_len(
-/*===================*/
- const byte* log_block) /*!< in: log block */
-{
- return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN));
-}
-
-/************************************************************//**
-Sets the log block data length. */
-UNIV_INLINE
-void
-log_block_set_data_len(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint len) /*!< in: data length */
-{
- mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len);
-}
-
-/************************************************************//**
-Gets a log block first mtr log record group offset.
-@return first mtr log record group byte offset from the block start, 0
-if none */
-UNIV_INLINE
-ulint
-log_block_get_first_rec_group(
-/*==========================*/
- const byte* log_block) /*!< in: log block */
-{
- return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP));
-}
-
-/************************************************************//**
-Sets the log block first mtr log record group offset. */
-UNIV_INLINE
-void
-log_block_set_first_rec_group(
-/*==========================*/
- byte* log_block, /*!< in/out: log block */
- ulint offset) /*!< in: offset, 0 if none */
-{
- mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset);
-}
-
-/************************************************************//**
-Gets a log block checkpoint number field (4 lowest bytes).
-@return checkpoint no (4 lowest bytes) */
-UNIV_INLINE
-ulint
-log_block_get_checkpoint_no(
-/*========================*/
- const byte* log_block) /*!< in: log block */
-{
- return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO));
-}
-
-/************************************************************//**
-Sets a log block checkpoint number field (4 lowest bytes). */
-UNIV_INLINE
-void
-log_block_set_checkpoint_no(
-/*========================*/
- byte* log_block, /*!< in/out: log block */
- ib_uint64_t no) /*!< in: checkpoint no */
-{
- mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO, (ulint) no);
-}
-
-/************************************************************//**
-Converts a lsn to a log block number.
-@return log block number, it is > 0 and <= 1G */
-UNIV_INLINE
-ulint
-log_block_convert_lsn_to_no(
-/*========================*/
- ib_uint64_t lsn) /*!< in: lsn of a byte within the block */
-{
- return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1);
-}
-
-/************************************************************//**
-Calculates the checksum for a log block.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_calc_checksum(
-/*====================*/
- const byte* block) /*!< in: log block */
-{
- ulint sum;
- ulint sh;
- ulint i;
-
- sum = 1;
- sh = 0;
-
- for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
- ulint b = (ulint) block[i];
- sum &= 0x7FFFFFFFUL;
- sum += b;
- sum += b << sh;
- sh++;
- if (sh > 24) {
- sh = 0;
- }
- }
-
- return(sum);
-}
-
-/************************************************************//**
-Gets a log block checksum field value.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_get_checksum(
-/*===================*/
- const byte* log_block) /*!< in: log block */
-{
- return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM));
-}
-
-/************************************************************//**
-Sets a log block checksum field value. */
-UNIV_INLINE
-void
-log_block_set_checksum(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint checksum) /*!< in: checksum */
-{
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM,
- checksum);
-}
-
-/************************************************************//**
-Initializes a log block in the log buffer. */
-UNIV_INLINE
-void
-log_block_init(
-/*===========*/
- byte* log_block, /*!< in: pointer to the log buffer */
- ib_uint64_t lsn) /*!< in: lsn within the log block */
-{
- ulint no;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- no = log_block_convert_lsn_to_no(lsn);
-
- log_block_set_hdr_no(log_block, no);
-
- log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
- log_block_set_first_rec_group(log_block, 0);
-}
-
-/************************************************************//**
-Initializes a log block in the log buffer in the old format, where there
-was no checksum yet. */
-UNIV_INLINE
-void
-log_block_init_in_old_format(
-/*=========================*/
- byte* log_block, /*!< in: pointer to the log buffer */
- ib_uint64_t lsn) /*!< in: lsn within the log block */
-{
- ulint no;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- no = log_block_convert_lsn_to_no(lsn);
-
- log_block_set_hdr_no(log_block, no);
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM, no);
- log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
- log_block_set_first_rec_group(log_block, 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return end lsn of the log record, zero if did not succeed */
-UNIV_INLINE
-ib_uint64_t
-log_reserve_and_write_fast(
-/*=======================*/
- const void* str, /*!< in: string */
- ulint len, /*!< in: string length */
- ib_uint64_t* start_lsn)/*!< out: start lsn of the log record */
-{
- ulint data_len;
-#ifdef UNIV_LOG_LSN_DEBUG
- /* length of the LSN pseudo-record */
- ulint lsn_len = 1
- + mach_get_compressed_size(log_sys->lsn >> 32)
- + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
-#endif /* UNIV_LOG_LSN_DEBUG */
-
- mutex_enter(&log_sys->mutex);
-
- data_len = len
-#ifdef UNIV_LOG_LSN_DEBUG
- + lsn_len
-#endif /* UNIV_LOG_LSN_DEBUG */
- + log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE;
-
- if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
-
- /* The string does not fit within the current log block
- or the log block would become full */
-
- mutex_exit(&log_sys->mutex);
-
- return(0);
- }
-
- *start_lsn = log_sys->lsn;
-
-#ifdef UNIV_LOG_LSN_DEBUG
- {
- /* Write the LSN pseudo-record. */
- byte* b = &log_sys->buf[log_sys->buf_free];
- *b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
- /* Write the LSN in two parts,
- as a pseudo page number and space id. */
- b += mach_write_compressed(b, log_sys->lsn >> 32);
- b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
- ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
-
- memcpy(b, str, len);
- len += lsn_len;
- }
-#else /* UNIV_LOG_LSN_DEBUG */
- memcpy(log_sys->buf + log_sys->buf_free, str, len);
-#endif /* UNIV_LOG_LSN_DEBUG */
-
- log_block_set_data_len((byte*) ut_align_down(log_sys->buf
- + log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE),
- data_len);
-#ifdef UNIV_LOG_DEBUG
- log_sys->old_buf_free = log_sys->buf_free;
- log_sys->old_lsn = log_sys->lsn;
-#endif
- log_sys->buf_free += len;
-
- ut_ad(log_sys->buf_free <= log_sys->buf_size);
-
- log_sys->lsn += len;
-
-#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log_sys->buf + log_sys->old_buf_free,
- log_sys->buf_free - log_sys->old_buf_free,
- log_sys->old_lsn);
-#endif
- return(log_sys->lsn);
-}
-
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void)
-/*=============*/
-{
- mutex_exit(&(log_sys->mutex));
-}
-
-/************************************************************//**
-Gets the current lsn.
-@return current lsn */
-UNIV_INLINE
-ib_uint64_t
-log_get_lsn(void)
-/*=============*/
-{
- ib_uint64_t lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- return(lsn);
-}
-
-/****************************************************************
-Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
-@return log group capacity */
-UNIV_INLINE
-ulint
-log_get_capacity(void)
-/*==================*/
-{
- return(log_sys->log_group_capacity);
-}
-
-/***********************************************************************//**
-Checks if there is need for a log buffer flush or a new checkpoint, and does
-this if yes. Any database operation should call this when it has modified
-more than about 4 pages. NOTE that this function may only be called when the
-OS thread owns no synchronization objects except the dictionary mutex. */
-UNIV_INLINE
-void
-log_free_check(void)
-/*================*/
-{
- /* ut_ad(sync_thread_levels_empty()); */
-
- if (log_sys->check_flush_or_checkpoint) {
-
- log_check_margins();
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/log0recv.h b/storage/innodb_plugin/include/log0recv.h
deleted file mode 100644
index a3d2bd050f5..00000000000
--- a/storage/innodb_plugin/include/log0recv.h
+++ /dev/null
@@ -1,489 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0recv.h
-Recovery
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef log0recv_h
-#define log0recv_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "buf0types.h"
-#include "hash0hash.h"
-#include "log0log.h"
-
-#ifdef UNIV_HOTBACKUP
-extern ibool recv_replay_file_ops;
-
-/*******************************************************************//**
-Reads the checkpoint info needed in hot backup.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-recv_read_cp_info_for_backup(
-/*=========================*/
- const byte* hdr, /*!< in: buffer containing the log group
- header */
- ib_uint64_t* lsn, /*!< out: checkpoint lsn */
- ulint* offset, /*!< out: checkpoint offset in the log group */
- ulint* fsp_limit,/*!< out: fsp limit of space 0,
- 1000000000 if the database is running
- with < version 3.23.50 of InnoDB */
- ib_uint64_t* cp_no, /*!< out: checkpoint number */
- ib_uint64_t* first_header_lsn);
- /*!< out: lsn of of the start of the
- first log file */
-/*******************************************************************//**
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-UNIV_INTERN
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
- byte* buf, /*!< in: buffer containing log data */
- ulint buf_len, /*!< in: data length in that buffer */
- ib_uint64_t* scanned_lsn, /*!< in/out: lsn of buffer start,
- we return scanned lsn */
- ulint* scanned_checkpoint_no,
- /*!< in/out: 4 lowest bytes of the
- highest scanned checkpoint number so
- far */
- ulint* n_bytes_scanned);/*!< out: how much we were able to
- scan, smaller than buf_len if log
- data ended here */
-#endif /* UNIV_HOTBACKUP */
-/*******************************************************************//**
-Returns TRUE if recovery is currently running.
-@return recv_recovery_on */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void);
-/*=====================*/
-#ifdef UNIV_LOG_ARCHIVE
-/*******************************************************************//**
-Returns TRUE if recovery from backup is currently running.
-@return recv_recovery_from_backup_on */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void);
-/*=================================*/
-#endif /* UNIV_LOG_ARCHIVE */
-/************************************************************************//**
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
-void
-recv_recover_page_func(
-/*===================*/
-#ifndef UNIV_HOTBACKUP
- ibool just_read_in,
- /*!< in: TRUE if the i/o handler calls
- this for a freshly read page */
-#endif /* !UNIV_HOTBACKUP */
- buf_block_t* block); /*!< in/out: buffer block */
-#ifndef UNIV_HOTBACKUP
-/** Wrapper for recv_recover_page_func().
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool.
-@param jri in: TRUE if just read in (the i/o handler calls this for
-a freshly read page)
-@param block in/out: the buffer block
-*/
-# define recv_recover_page(jri, block) recv_recover_page_func(jri, block)
-#else /* !UNIV_HOTBACKUP */
-/** Wrapper for recv_recover_page_func().
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool.
-@param jri in: TRUE if just read in (the i/o handler calls this for
-a freshly read page)
-@param block in/out: the buffer block
-*/
-# define recv_recover_page(jri, block) recv_recover_page_func(block)
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************//**
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_checkpoint_start_func(
-/*=====================================*/
-#ifdef UNIV_LOG_ARCHIVE
- ulint type, /*!< in: LOG_CHECKPOINT or
- LOG_ARCHIVE */
- ib_uint64_t limit_lsn, /*!< in: recover up to this lsn
- if possible */
-#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn from
- data files */
- ib_uint64_t max_flushed_lsn);/*!< in: max flushed lsn from
- data files */
-#ifdef UNIV_LOG_ARCHIVE
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type in: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim in: recover up to this log sequence number if possible
-@param min in: minimum flushed log sequence number from data files
-@param max in: maximum flushed log sequence number from data files
-@return error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,min,max) \
- recv_recovery_from_checkpoint_start_func(type,lim,min,max)
-#else /* UNIV_LOG_ARCHIVE */
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type ignored: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim ignored: recover up to this log sequence number if possible
-@param min in: minimum flushed log sequence number from data files
-@param max in: maximum flushed log sequence number from data files
-@return error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,min,max) \
- recv_recovery_from_checkpoint_start_func(min,max)
-#endif /* UNIV_LOG_ARCHIVE */
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
-void
-recv_recovery_from_checkpoint_finish(void);
-/*======================================*/
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer.
-Parses and hashes the log records if new data found. Unless
-UNIV_HOTBACKUP is defined, this function will apply log records
-automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
-recv_scan_log_recs(
-/*===============*/
- ulint available_memory,/*!< in: we let the hash table of recs
- to grow to this size, at the maximum */
- ibool store_to_hash, /*!< in: TRUE if the records should be
- stored to the hash table; this is set
- to FALSE if just debug checking is
- needed */
- const byte* buf, /*!< in: buffer containing a log
- segment or garbage */
- ulint len, /*!< in: buffer length */
- ib_uint64_t start_lsn, /*!< in: buffer start lsn */
- ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- ib_uint64_t* group_scanned_lsn);/*!< out: scanning succeeded up to
- this lsn */
-/******************************************************//**
-Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
-void
-recv_reset_logs(
-/*============*/
- ib_uint64_t lsn, /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /*!< in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created);/*!< in: TRUE if resetting logs
- is done at the log creation;
- FALSE if it is done after
- archive recovery */
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Creates new log files after a backup has been restored. */
-UNIV_INTERN
-void
-recv_reset_log_files_for_backup(
-/*============================*/
- const char* log_dir, /*!< in: log file directory path */
- ulint n_log_files, /*!< in: number of log files */
- ulint log_file_size, /*!< in: log file size */
- ib_uint64_t lsn); /*!< in: new start lsn, must be
- divisible by OS_FILE_LOG_BLOCK_SIZE */
-#endif /* UNIV_HOTBACKUP */
-/********************************************************//**
-Creates the recovery system. */
-UNIV_INTERN
-void
-recv_sys_create(void);
-/*=================*/
-/**********************************************************//**
-Release recovery system mutexes. */
-UNIV_INTERN
-void
-recv_sys_close(void);
-/*================*/
-/********************************************************//**
-Frees the recovery system memory. */
-UNIV_INTERN
-void
-recv_sys_mem_free(void);
-/*===================*/
-/********************************************************//**
-Inits the recovery system for a recovery operation. */
-UNIV_INTERN
-void
-recv_sys_init(
-/*==========*/
- ulint available_memory); /*!< in: available memory in bytes */
-/********************************************************//**
-Reset the state of the recovery system variables. */
-UNIV_INTERN
-void
-recv_sys_var_init(void);
-/*===================*/
-/*******************************************************************//**
-Empties the hash table of stored log records, applying them to appropriate
-pages. */
-UNIV_INTERN
-void
-recv_apply_hashed_log_recs(
-/*=======================*/
- ibool allow_ibuf); /*!< in: if TRUE, also ibuf operations are
- allowed during the application; if FALSE,
- no ibuf operations are allowed, and after
- the application all file pages are flushed to
- disk and invalidated in buffer pool: this
- alternative means that no new log records
- can be generated during the application */
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Applies log records in the hash table to a backup. */
-UNIV_INTERN
-void
-recv_apply_log_recs_for_backup(void);
-/*================================*/
-#endif
-#ifdef UNIV_LOG_ARCHIVE
-/********************************************************//**
-Recovers from archived log files, and also from log files, if they exist.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
- ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the
- data files */
- ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if
- possible */
- ulint first_log_no); /*!< in: number of the first archived
- log file to use in the recovery; the
- file will be searched from
- INNOBASE_LOG_ARCH_DIR specified in
- server config file */
-/********************************************************//**
-Completes recovery from archive. */
-UNIV_INTERN
-void
-recv_recovery_from_archive_finish(void);
-/*===================================*/
-#endif /* UNIV_LOG_ARCHIVE */
-
-/** Block of log record data */
-typedef struct recv_data_struct recv_data_t;
-/** Block of log record data */
-struct recv_data_struct{
- recv_data_t* next; /*!< pointer to the next block or NULL */
- /*!< the log record data is stored physically
- immediately after this struct, max amount
- RECV_DATA_BLOCK_SIZE bytes of it */
-};
-
-/** Stored log record struct */
-typedef struct recv_struct recv_t;
-/** Stored log record struct */
-struct recv_struct{
- byte type; /*!< log record type */
- ulint len; /*!< log record body length in bytes */
- recv_data_t* data; /*!< chain of blocks containing the log record
- body */
- ib_uint64_t start_lsn;/*!< start lsn of the log segment written by
- the mtr which generated this log record: NOTE
- that this is not necessarily the start lsn of
- this log record */
- ib_uint64_t end_lsn;/*!< end lsn of the log segment written by
- the mtr which generated this log record: NOTE
- that this is not necessarily the end lsn of
- this log record */
- UT_LIST_NODE_T(recv_t)
- rec_list;/*!< list of log records for this page */
-};
-
-/** States of recv_addr_struct */
-enum recv_addr_state {
- /** not yet processed */
- RECV_NOT_PROCESSED,
- /** page is being read */
- RECV_BEING_READ,
- /** log records are being applied on the page */
- RECV_BEING_PROCESSED,
- /** log records have been applied on the page, or they have
- been discarded because the tablespace does not exist */
- RECV_PROCESSED
-};
-
-/** Hashed page file address struct */
-typedef struct recv_addr_struct recv_addr_t;
-/** Hashed page file address struct */
-struct recv_addr_struct{
- enum recv_addr_state state;
- /*!< recovery state of the page */
- ulint space; /*!< space id */
- ulint page_no;/*!< page number */
- UT_LIST_BASE_NODE_T(recv_t)
- rec_list;/*!< list of log records for this page */
- hash_node_t addr_hash;/*!< hash node in the hash bucket chain */
-};
-
-/** Recovery system data structure */
-typedef struct recv_sys_struct recv_sys_t;
-/** Recovery system data structure */
-struct recv_sys_struct{
-#ifndef UNIV_HOTBACKUP
- mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
- n_addrs, and the state field in each recv_addr
- struct */
-#endif /* !UNIV_HOTBACKUP */
- ibool apply_log_recs;
- /*!< this is TRUE when log rec application to
- pages is allowed; this flag tells the
- i/o-handler if it should do log record
- application */
- ibool apply_batch_on;
- /*!< this is TRUE when a log rec application
- batch is running */
- ib_uint64_t lsn; /*!< log sequence number */
- ulint last_log_buf_size;
- /*!< size of the log buffer when the database
- last time wrote to the log */
- byte* last_block;
- /*!< possible incomplete last recovered log
- block */
- byte* last_block_buf_start;
- /*!< the nonaligned start address of the
- preceding buffer */
- byte* buf; /*!< buffer for parsing log records */
- ulint len; /*!< amount of data in buf */
- ib_uint64_t parse_start_lsn;
- /*!< this is the lsn from which we were able to
- start parsing log records and adding them to
- the hash table; zero if a suitable
- start point not found yet */
- ib_uint64_t scanned_lsn;
- /*!< the log data has been scanned up to this
- lsn */
- ulint scanned_checkpoint_no;
- /*!< the log data has been scanned up to this
- checkpoint number (lowest 4 bytes) */
- ulint recovered_offset;
- /*!< start offset of non-parsed log records in
- buf */
- ib_uint64_t recovered_lsn;
- /*!< the log records have been parsed up to
- this lsn */
- ib_uint64_t limit_lsn;/*!< recovery should be made at most
- up to this lsn */
- ibool found_corrupt_log;
- /*!< this is set to TRUE if we during log
- scan find a corrupt log block, or a corrupt
- log record, or there is a log parsing
- buffer overflow */
-#ifdef UNIV_LOG_ARCHIVE
- log_group_t* archive_group;
- /*!< in archive recovery: the log group whose
- archive is read */
-#endif /* !UNIV_LOG_ARCHIVE */
- mem_heap_t* heap; /*!< memory heap of log records and file
- addresses*/
- hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
- ulint n_addrs;/*!< number of not processed hashed file
- addresses in the hash table */
-};
-
-/** The recovery system */
-extern recv_sys_t* recv_sys;
-
-/** TRUE when applying redo log records during crash recovery; FALSE
-otherwise. Note that this is FALSE while a background thread is
-rolling back incomplete transactions. */
-extern ibool recv_recovery_on;
-/** If the following is TRUE, the buffer pool file pages must be invalidated
-after recovery and no ibuf operations are allowed; this becomes TRUE if
-the log record hash table becomes too full, and log records must be merged
-to file pages already before the recovery is finished: in this case no
-ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state.
-
-TRUE means that recovery is running and no operations on the log files
-are allowed yet: the variable name is misleading. */
-extern ibool recv_no_ibuf_operations;
-/** TRUE when recv_init_crash_recovery() has been called. */
-extern ibool recv_needed_recovery;
-#ifdef UNIV_DEBUG
-/** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
-extern ibool recv_no_log_write;
-#endif /* UNIV_DEBUG */
-
-/** TRUE if buf_page_is_corrupted() should check if the log sequence
-number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-extern ibool recv_lsn_checks_on;
-#ifdef UNIV_HOTBACKUP
-/** TRUE when the redo log is being backed up */
-extern ibool recv_is_making_a_backup;
-#endif /* UNIV_HOTBACKUP */
-/** Maximum page number encountered in the redo log */
-extern ulint recv_max_parsed_page_no;
-
-/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
-times! */
-#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024)
-
-/** Size of block reads when the log groups are scanned forward to do a
-roll-forward */
-#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
-
-/** This many frames must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free frames to read in pages when we start applying the
-log records to the database. */
-extern ulint recv_n_pool_free_frames;
-
-#ifndef UNIV_NONINL
-#include "log0recv.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/log0recv.ic b/storage/innodb_plugin/include/log0recv.ic
deleted file mode 100644
index 0a8e55b96fa..00000000000
--- a/storage/innodb_plugin/include/log0recv.ic
+++ /dev/null
@@ -1,53 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0recv.ic
-Recovery
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "univ.i"
-
-/*******************************************************************//**
-Returns TRUE if recovery is currently running.
-@return recv_recovery_on */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void)
-/*=====================*/
-{
- return(UNIV_UNLIKELY(recv_recovery_on));
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/** TRUE when applying redo log records from an archived log file */
-extern ibool recv_recovery_from_backup_on;
-
-/*******************************************************************//**
-Returns TRUE if recovery from backup is currently running.
-@return recv_recovery_from_backup_on */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void)
-/*=================================*/
-{
- return(recv_recovery_from_backup_on);
-}
-#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innodb_plugin/include/mach0data.h b/storage/innodb_plugin/include/mach0data.h
deleted file mode 100644
index 44ee3df22ce..00000000000
--- a/storage/innodb_plugin/include/mach0data.h
+++ /dev/null
@@ -1,400 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/mach0data.h
-Utilities for converting data from the database file
-to the machine format.
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef mach0data_h
-#define mach0data_h
-
-#include "univ.i"
-#include "ut0byte.h"
-
-/* The data and all fields are always stored in a database file
-in the same format: ascii, big-endian, ... .
-All data in the files MUST be accessed using the functions in this
-module. */
-
-/*******************************************************//**
-The following function is used to store data in one byte. */
-UNIV_INLINE
-void
-mach_write_to_1(
-/*============*/
- byte* b, /*!< in: pointer to byte where to store */
- ulint n); /*!< in: ulint integer to be stored, >= 0, < 256 */
-/********************************************************//**
-The following function is used to fetch data from one byte.
-@return ulint integer, >= 0, < 256 */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
- const byte* b) /*!< in: pointer to byte */
- __attribute__((nonnull, pure));
-/*******************************************************//**
-The following function is used to store data in two consecutive
-bytes. We store the most significant byte to the lower address. */
-UNIV_INLINE
-void
-mach_write_to_2(
-/*============*/
- byte* b, /*!< in: pointer to two bytes where to store */
- ulint n); /*!< in: ulint integer to be stored, >= 0, < 64k */
-/********************************************************//**
-The following function is used to fetch data from two consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer, >= 0, < 64k */
-UNIV_INLINE
-ulint
-mach_read_from_2(
-/*=============*/
- const byte* b) /*!< in: pointer to two bytes */
- __attribute__((nonnull, pure));
-
-/********************************************************//**
-The following function is used to convert a 16-bit data item
-to the canonical format, for fast bytewise equality test
-against memory.
-@return 16-bit integer in canonical format */
-UNIV_INLINE
-uint16
-mach_encode_2(
-/*==========*/
- ulint n) /*!< in: integer in machine-dependent format */
- __attribute__((const));
-/********************************************************//**
-The following function is used to convert a 16-bit data item
-from the canonical format, for fast bytewise equality test
-against memory.
-@return integer in machine-dependent format */
-UNIV_INLINE
-ulint
-mach_decode_2(
-/*==========*/
- uint16 n) /*!< in: 16-bit integer in canonical format */
- __attribute__((const));
-/*******************************************************//**
-The following function is used to store data in 3 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_3(
-/*============*/
- byte* b, /*!< in: pointer to 3 bytes where to store */
- ulint n); /*!< in: ulint integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_3(
-/*=============*/
- const byte* b) /*!< in: pointer to 3 bytes */
- __attribute__((nonnull, pure));
-/*******************************************************//**
-The following function is used to store data in four consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_4(
-/*============*/
- byte* b, /*!< in: pointer to four bytes where to store */
- ulint n); /*!< in: ulint integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_4(
-/*=============*/
- const byte* b) /*!< in: pointer to four bytes */
- __attribute__((nonnull, pure));
-/*********************************************************//**
-Writes a ulint in a compressed form (1..5 bytes).
-@return stored size in bytes */
-UNIV_INLINE
-ulint
-mach_write_compressed(
-/*==================*/
- byte* b, /*!< in: pointer to memory where to store */
- ulint n); /*!< in: ulint integer to be stored */
-/*********************************************************//**
-Returns the size of an ulint when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_get_compressed_size(
-/*=====================*/
- ulint n) /*!< in: ulint integer to be stored */
- __attribute__((const));
-/*********************************************************//**
-Reads a ulint in a compressed form.
-@return read integer */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
- const byte* b) /*!< in: pointer to memory from where to read */
- __attribute__((nonnull, pure));
-/*******************************************************//**
-The following function is used to store data in 6 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_6(
-/*============*/
- byte* b, /*!< in: pointer to 6 bytes where to store */
- dulint n); /*!< in: dulint integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address.
-@return dulint integer */
-UNIV_INLINE
-dulint
-mach_read_from_6(
-/*=============*/
- const byte* b) /*!< in: pointer to 6 bytes */
- __attribute__((nonnull, pure));
-/*******************************************************//**
-The following function is used to store data in 7 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_7(
-/*============*/
- byte* b, /*!< in: pointer to 7 bytes where to store */
- dulint n); /*!< in: dulint integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address.
-@return dulint integer */
-UNIV_INLINE
-dulint
-mach_read_from_7(
-/*=============*/
- const byte* b) /*!< in: pointer to 7 bytes */
- __attribute__((nonnull, pure));
-/*******************************************************//**
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_8(
-/*============*/
- byte* b, /*!< in: pointer to 8 bytes where to store */
- dulint n); /*!< in: dulint integer to be stored */
-/*******************************************************//**
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_ull(
-/*===========*/
- byte* b, /*!< in: pointer to 8 bytes where to store */
- ib_uint64_t n); /*!< in: 64-bit integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address.
-@return dulint integer */
-UNIV_INLINE
-dulint
-mach_read_from_8(
-/*=============*/
- const byte* b) /*!< in: pointer to 8 bytes */
- __attribute__((nonnull, pure));
-/********************************************************//**
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address.
-@return 64-bit integer */
-UNIV_INLINE
-ib_uint64_t
-mach_read_ull(
-/*==========*/
- const byte* b) /*!< in: pointer to 8 bytes */
- __attribute__((nonnull, pure));
-/*********************************************************//**
-Writes a dulint in a compressed form (5..9 bytes).
-@return size in bytes */
-UNIV_INLINE
-ulint
-mach_dulint_write_compressed(
-/*=========================*/
- byte* b, /*!< in: pointer to memory where to store */
- dulint n); /*!< in: dulint integer to be stored */
-/*********************************************************//**
-Returns the size of a dulint when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_dulint_get_compressed_size(
-/*============================*/
- dulint n); /*!< in: dulint integer to be stored */
-/*********************************************************//**
-Reads a dulint in a compressed form.
-@return read dulint */
-UNIV_INLINE
-dulint
-mach_dulint_read_compressed(
-/*========================*/
- const byte* b) /*!< in: pointer to memory from where to read */
- __attribute__((nonnull, pure));
-/*********************************************************//**
-Writes a dulint in a compressed form (1..11 bytes).
-@return size in bytes */
-UNIV_INLINE
-ulint
-mach_dulint_write_much_compressed(
-/*==============================*/
- byte* b, /*!< in: pointer to memory where to store */
- dulint n); /*!< in: dulint integer to be stored */
-/*********************************************************//**
-Returns the size of a dulint when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_dulint_get_much_compressed_size(
-/*=================================*/
- dulint n) /*!< in: dulint integer to be stored */
- __attribute__((const));
-/*********************************************************//**
-Reads a dulint in a compressed form.
-@return read dulint */
-UNIV_INLINE
-dulint
-mach_dulint_read_much_compressed(
-/*=============================*/
- const byte* b) /*!< in: pointer to memory from where to read */
- __attribute__((nonnull, pure));
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
-mach_parse_compressed(
-/*==================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- ulint* val); /*!< out: read value */
-/*********************************************************//**
-Reads a dulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
-mach_dulint_parse_compressed(
-/*=========================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- dulint* val); /*!< out: read value */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************//**
-Reads a double. It is stored in a little-endian format.
-@return double read */
-UNIV_INLINE
-double
-mach_double_read(
-/*=============*/
- const byte* b) /*!< in: pointer to memory from where to read */
- __attribute__((nonnull, pure));
-/*********************************************************//**
-Writes a double. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_double_write(
-/*==============*/
- byte* b, /*!< in: pointer to memory where to write */
- double d); /*!< in: double */
-/*********************************************************//**
-Reads a float. It is stored in a little-endian format.
-@return float read */
-UNIV_INLINE
-float
-mach_float_read(
-/*============*/
- const byte* b) /*!< in: pointer to memory from where to read */
- __attribute__((nonnull, pure));
-/*********************************************************//**
-Writes a float. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_float_write(
-/*=============*/
- byte* b, /*!< in: pointer to memory where to write */
- float d); /*!< in: float */
-/*********************************************************//**
-Reads a ulint stored in the little-endian format.
-@return unsigned long int */
-UNIV_INLINE
-ulint
-mach_read_from_n_little_endian(
-/*===========================*/
- const byte* buf, /*!< in: from where to read */
- ulint buf_size) /*!< in: from how many bytes to read */
- __attribute__((nonnull, pure));
-/*********************************************************//**
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_n_little_endian(
-/*==========================*/
- byte* dest, /*!< in: where to write */
- ulint dest_size, /*!< in: into how many bytes to write */
- ulint n); /*!< in: unsigned long int to write */
-/*********************************************************//**
-Reads a ulint stored in the little-endian format.
-@return unsigned long int */
-UNIV_INLINE
-ulint
-mach_read_from_2_little_endian(
-/*===========================*/
- const byte* buf) /*!< in: from where to read */
- __attribute__((nonnull, pure));
-/*********************************************************//**
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_2_little_endian(
-/*==========================*/
- byte* dest, /*!< in: where to write */
- ulint n); /*!< in: unsigned long int to write */
-
-/*********************************************************//**
-Convert integral type from storage byte order (big endian) to
-host byte order.
-@return integer value */
-UNIV_INLINE
-ullint
-mach_read_int_type(
-/*===============*/
- const byte* src, /*!< in: where to read from */
- ulint len, /*!< in: length of src */
- ibool unsigned_type); /*!< in: signed or unsigned flag */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "mach0data.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/mach0data.ic b/storage/innodb_plugin/include/mach0data.ic
deleted file mode 100644
index ef20356bd31..00000000000
--- a/storage/innodb_plugin/include/mach0data.ic
+++ /dev/null
@@ -1,786 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/mach0data.ic
-Utilities for converting data from the database file
-to the machine format.
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "ut0mem.h"
-
-/*******************************************************//**
-The following function is used to store data in one byte. */
-UNIV_INLINE
-void
-mach_write_to_1(
-/*============*/
- byte* b, /*!< in: pointer to byte where to store */
- ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */
-{
- ut_ad(b);
- ut_ad(n <= 0xFFUL);
-
- b[0] = (byte)n;
-}
-
-/********************************************************//**
-The following function is used to fetch data from one byte.
-@return ulint integer, >= 0, < 256 */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
- const byte* b) /*!< in: pointer to byte */
-{
- ut_ad(b);
- return((ulint)(b[0]));
-}
-
-/*******************************************************//**
-The following function is used to store data in two consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_2(
-/*============*/
- byte* b, /*!< in: pointer to two bytes where to store */
- ulint n) /*!< in: ulint integer to be stored */
-{
- ut_ad(b);
- ut_ad(n <= 0xFFFFUL);
-
- b[0] = (byte)(n >> 8);
- b[1] = (byte)(n);
-}
-
-/********************************************************//**
-The following function is used to fetch data from 2 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_2(
-/*=============*/
- const byte* b) /*!< in: pointer to 2 bytes */
-{
- ut_ad(b);
- return( ((ulint)(b[0]) << 8)
- + (ulint)(b[1])
- );
-}
-
-/********************************************************//**
-The following function is used to convert a 16-bit data item
-to the canonical format, for fast bytewise equality test
-against memory.
-@return 16-bit integer in canonical format */
-UNIV_INLINE
-uint16
-mach_encode_2(
-/*==========*/
- ulint n) /*!< in: integer in machine-dependent format */
-{
- uint16 ret;
- ut_ad(2 == sizeof ret);
- mach_write_to_2((byte*) &ret, n);
- return(ret);
-}
-/********************************************************//**
-The following function is used to convert a 16-bit data item
-from the canonical format, for fast bytewise equality test
-against memory.
-@return integer in machine-dependent format */
-UNIV_INLINE
-ulint
-mach_decode_2(
-/*==========*/
- uint16 n) /*!< in: 16-bit integer in canonical format */
-{
- ut_ad(2 == sizeof n);
- return(mach_read_from_2((const byte*) &n));
-}
-
-/*******************************************************//**
-The following function is used to store data in 3 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_3(
-/*============*/
- byte* b, /*!< in: pointer to 3 bytes where to store */
- ulint n) /*!< in: ulint integer to be stored */
-{
- ut_ad(b);
- ut_ad(n <= 0xFFFFFFUL);
-
- b[0] = (byte)(n >> 16);
- b[1] = (byte)(n >> 8);
- b[2] = (byte)(n);
-}
-
-/********************************************************//**
-The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_3(
-/*=============*/
- const byte* b) /*!< in: pointer to 3 bytes */
-{
- ut_ad(b);
- return( ((ulint)(b[0]) << 16)
- + ((ulint)(b[1]) << 8)
- + (ulint)(b[2])
- );
-}
-
-/*******************************************************//**
-The following function is used to store data in four consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_4(
-/*============*/
- byte* b, /*!< in: pointer to four bytes where to store */
- ulint n) /*!< in: ulint integer to be stored */
-{
- ut_ad(b);
-
- b[0] = (byte)(n >> 24);
- b[1] = (byte)(n >> 16);
- b[2] = (byte)(n >> 8);
- b[3] = (byte)n;
-}
-
-/********************************************************//**
-The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_4(
-/*=============*/
- const byte* b) /*!< in: pointer to four bytes */
-{
- ut_ad(b);
- return( ((ulint)(b[0]) << 24)
- + ((ulint)(b[1]) << 16)
- + ((ulint)(b[2]) << 8)
- + (ulint)(b[3])
- );
-}
-
-/*********************************************************//**
-Writes a ulint in a compressed form where the first byte codes the
-length of the stored ulint. We look at the most significant bits of
-the byte. If the most significant bit is zero, it means 1-byte storage,
-else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
-it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
-else the storage is 5-byte.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_write_compressed(
-/*==================*/
- byte* b, /*!< in: pointer to memory where to store */
- ulint n) /*!< in: ulint integer (< 2^32) to be stored */
-{
- ut_ad(b);
-
- if (n < 0x80UL) {
- mach_write_to_1(b, n);
- return(1);
- } else if (n < 0x4000UL) {
- mach_write_to_2(b, n | 0x8000UL);
- return(2);
- } else if (n < 0x200000UL) {
- mach_write_to_3(b, n | 0xC00000UL);
- return(3);
- } else if (n < 0x10000000UL) {
- mach_write_to_4(b, n | 0xE0000000UL);
- return(4);
- } else {
- mach_write_to_1(b, 0xF0UL);
- mach_write_to_4(b + 1, n);
- return(5);
- }
-}
-
-/*********************************************************//**
-Returns the size of a ulint when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_get_compressed_size(
-/*=====================*/
- ulint n) /*!< in: ulint integer (< 2^32) to be stored */
-{
- if (n < 0x80UL) {
- return(1);
- } else if (n < 0x4000UL) {
- return(2);
- } else if (n < 0x200000UL) {
- return(3);
- } else if (n < 0x10000000UL) {
- return(4);
- } else {
- return(5);
- }
-}
-
-/*********************************************************//**
-Reads a ulint in a compressed form.
-@return read integer (< 2^32) */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- ulint flag;
-
- ut_ad(b);
-
- flag = mach_read_from_1(b);
-
- if (flag < 0x80UL) {
- return(flag);
- } else if (flag < 0xC0UL) {
- return(mach_read_from_2(b) & 0x7FFFUL);
- } else if (flag < 0xE0UL) {
- return(mach_read_from_3(b) & 0x3FFFFFUL);
- } else if (flag < 0xF0UL) {
- return(mach_read_from_4(b) & 0x1FFFFFFFUL);
- } else {
- ut_ad(flag == 0xF0UL);
- return(mach_read_from_4(b + 1));
- }
-}
-
-/*******************************************************//**
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_8(
-/*============*/
- byte* b, /*!< in: pointer to 8 bytes where to store */
- dulint n) /*!< in: dulint integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_4(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 4, ut_dulint_get_low(n));
-}
-
-/*******************************************************//**
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_ull(
-/*===========*/
- byte* b, /*!< in: pointer to 8 bytes where to store */
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_4(b, (ulint) (n >> 32));
- mach_write_to_4(b + 4, (ulint) n);
-}
-
-/********************************************************//**
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address.
-@return dulint integer */
-UNIV_INLINE
-dulint
-mach_read_from_8(
-/*=============*/
- const byte* b) /*!< in: pointer to 8 bytes */
-{
- ulint high;
- ulint low;
-
- ut_ad(b);
-
- high = mach_read_from_4(b);
- low = mach_read_from_4(b + 4);
-
- return(ut_dulint_create(high, low));
-}
-
-/********************************************************//**
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address.
-@return 64-bit integer */
-UNIV_INLINE
-ib_uint64_t
-mach_read_ull(
-/*==========*/
- const byte* b) /*!< in: pointer to 8 bytes */
-{
- ib_uint64_t ull;
-
- ull = ((ib_uint64_t) mach_read_from_4(b)) << 32;
- ull |= (ib_uint64_t) mach_read_from_4(b + 4);
-
- return(ull);
-}
-
-/*******************************************************//**
-The following function is used to store data in 7 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_7(
-/*============*/
- byte* b, /*!< in: pointer to 7 bytes where to store */
- dulint n) /*!< in: dulint integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_3(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 3, ut_dulint_get_low(n));
-}
-
-/********************************************************//**
-The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address.
-@return dulint integer */
-UNIV_INLINE
-dulint
-mach_read_from_7(
-/*=============*/
- const byte* b) /*!< in: pointer to 7 bytes */
-{
- ulint high;
- ulint low;
-
- ut_ad(b);
-
- high = mach_read_from_3(b);
- low = mach_read_from_4(b + 3);
-
- return(ut_dulint_create(high, low));
-}
-
-/*******************************************************//**
-The following function is used to store data in 6 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_6(
-/*============*/
- byte* b, /*!< in: pointer to 6 bytes where to store */
- dulint n) /*!< in: dulint integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_2(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 2, ut_dulint_get_low(n));
-}
-
-/********************************************************//**
-The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address.
-@return dulint integer */
-UNIV_INLINE
-dulint
-mach_read_from_6(
-/*=============*/
- const byte* b) /*!< in: pointer to 6 bytes */
-{
- ulint high;
- ulint low;
-
- ut_ad(b);
-
- high = mach_read_from_2(b);
- low = mach_read_from_4(b + 2);
-
- return(ut_dulint_create(high, low));
-}
-
-/*********************************************************//**
-Writes a dulint in a compressed form (5..9 bytes).
-@return size in bytes */
-UNIV_INLINE
-ulint
-mach_dulint_write_compressed(
-/*=========================*/
- byte* b, /*!< in: pointer to memory where to store */
- dulint n) /*!< in: dulint integer to be stored */
-{
- ulint size;
-
- ut_ad(b);
-
- size = mach_write_compressed(b, ut_dulint_get_high(n));
- mach_write_to_4(b + size, ut_dulint_get_low(n));
-
- return(size + 4);
-}
-
-/*********************************************************//**
-Returns the size of a dulint when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_dulint_get_compressed_size(
-/*============================*/
- dulint n) /*!< in: dulint integer to be stored */
-{
- return(4 + mach_get_compressed_size(ut_dulint_get_high(n)));
-}
-
-/*********************************************************//**
-Reads a dulint in a compressed form.
-@return read dulint */
-UNIV_INLINE
-dulint
-mach_dulint_read_compressed(
-/*========================*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- ulint high;
- ulint low;
- ulint size;
-
- ut_ad(b);
-
- high = mach_read_compressed(b);
-
- size = mach_get_compressed_size(high);
-
- low = mach_read_from_4(b + size);
-
- return(ut_dulint_create(high, low));
-}
-
-/*********************************************************//**
-Writes a dulint in a compressed form (1..11 bytes).
-@return size in bytes */
-UNIV_INLINE
-ulint
-mach_dulint_write_much_compressed(
-/*==============================*/
- byte* b, /*!< in: pointer to memory where to store */
- dulint n) /*!< in: dulint integer to be stored */
-{
- ulint size;
-
- ut_ad(b);
-
- if (ut_dulint_get_high(n) == 0) {
- return(mach_write_compressed(b, ut_dulint_get_low(n)));
- }
-
- *b = (byte)0xFF;
- size = 1 + mach_write_compressed(b + 1, ut_dulint_get_high(n));
-
- size += mach_write_compressed(b + size, ut_dulint_get_low(n));
-
- return(size);
-}
-
-/*********************************************************//**
-Returns the size of a dulint when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_dulint_get_much_compressed_size(
-/*=================================*/
- dulint n) /*!< in: dulint integer to be stored */
-{
- if (0 == ut_dulint_get_high(n)) {
- return(mach_get_compressed_size(ut_dulint_get_low(n)));
- }
-
- return(1 + mach_get_compressed_size(ut_dulint_get_high(n))
- + mach_get_compressed_size(ut_dulint_get_low(n)));
-}
-
-/*********************************************************//**
-Reads a dulint in a compressed form.
-@return read dulint */
-UNIV_INLINE
-dulint
-mach_dulint_read_much_compressed(
-/*=============================*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- ulint high;
- ulint low;
- ulint size;
-
- ut_ad(b);
-
- if (*b != (byte)0xFF) {
- high = 0;
- size = 0;
- } else {
- high = mach_read_compressed(b + 1);
-
- size = 1 + mach_get_compressed_size(high);
- }
-
- low = mach_read_compressed(b + size);
-
- return(ut_dulint_create(high, low));
-}
-#ifndef UNIV_HOTBACKUP
-/*********************************************************//**
-Reads a double. It is stored in a little-endian format.
-@return double read */
-UNIV_INLINE
-double
-mach_double_read(
-/*=============*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- double d;
- ulint i;
- byte* ptr;
-
- ptr = (byte*)&d;
-
- for (i = 0; i < sizeof(double); i++) {
-#ifdef WORDS_BIGENDIAN
- ptr[sizeof(double) - i - 1] = b[i];
-#else
- ptr[i] = b[i];
-#endif
- }
-
- return(d);
-}
-
-/*********************************************************//**
-Writes a double. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_double_write(
-/*==============*/
- byte* b, /*!< in: pointer to memory where to write */
- double d) /*!< in: double */
-{
- ulint i;
- byte* ptr;
-
- ptr = (byte*)&d;
-
- for (i = 0; i < sizeof(double); i++) {
-#ifdef WORDS_BIGENDIAN
- b[i] = ptr[sizeof(double) - i - 1];
-#else
- b[i] = ptr[i];
-#endif
- }
-}
-
-/*********************************************************//**
-Reads a float. It is stored in a little-endian format.
-@return float read */
-UNIV_INLINE
-float
-mach_float_read(
-/*============*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- float d;
- ulint i;
- byte* ptr;
-
- ptr = (byte*)&d;
-
- for (i = 0; i < sizeof(float); i++) {
-#ifdef WORDS_BIGENDIAN
- ptr[sizeof(float) - i - 1] = b[i];
-#else
- ptr[i] = b[i];
-#endif
- }
-
- return(d);
-}
-
-/*********************************************************//**
-Writes a float. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_float_write(
-/*=============*/
- byte* b, /*!< in: pointer to memory where to write */
- float d) /*!< in: float */
-{
- ulint i;
- byte* ptr;
-
- ptr = (byte*)&d;
-
- for (i = 0; i < sizeof(float); i++) {
-#ifdef WORDS_BIGENDIAN
- b[i] = ptr[sizeof(float) - i - 1];
-#else
- b[i] = ptr[i];
-#endif
- }
-}
-
-/*********************************************************//**
-Reads a ulint stored in the little-endian format.
-@return unsigned long int */
-UNIV_INLINE
-ulint
-mach_read_from_n_little_endian(
-/*===========================*/
- const byte* buf, /*!< in: from where to read */
- ulint buf_size) /*!< in: from how many bytes to read */
-{
- ulint n = 0;
- const byte* ptr;
-
- ut_ad(buf_size <= sizeof(ulint));
- ut_ad(buf_size > 0);
-
- ptr = buf + buf_size;
-
- for (;;) {
- ptr--;
-
- n = n << 8;
-
- n += (ulint)(*ptr);
-
- if (ptr == buf) {
- break;
- }
- }
-
- return(n);
-}
-
-/*********************************************************//**
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_n_little_endian(
-/*==========================*/
- byte* dest, /*!< in: where to write */
- ulint dest_size, /*!< in: into how many bytes to write */
- ulint n) /*!< in: unsigned long int to write */
-{
- byte* end;
-
- ut_ad(dest_size <= sizeof(ulint));
- ut_ad(dest_size > 0);
-
- end = dest + dest_size;
-
- for (;;) {
- *dest = (byte)(n & 0xFF);
-
- n = n >> 8;
-
- dest++;
-
- if (dest == end) {
- break;
- }
- }
-
- ut_ad(n == 0);
-}
-
-/*********************************************************//**
-Reads a ulint stored in the little-endian format.
-@return unsigned long int */
-UNIV_INLINE
-ulint
-mach_read_from_2_little_endian(
-/*===========================*/
- const byte* buf) /*!< in: from where to read */
-{
- return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256);
-}
-
-/*********************************************************//**
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_2_little_endian(
-/*==========================*/
- byte* dest, /*!< in: where to write */
- ulint n) /*!< in: unsigned long int to write */
-{
- ut_ad(n < 256 * 256);
-
- *dest = (byte)(n & 0xFFUL);
-
- n = n >> 8;
- dest++;
-
- *dest = (byte)(n & 0xFFUL);
-}
-
-/*********************************************************//**
-Convert integral type from storage byte order (big endian) to
-host byte order.
-@return integer value */
-UNIV_INLINE
-ullint
-mach_read_int_type(
-/*===============*/
- const byte* src, /*!< in: where to read from */
- ulint len, /*!< in: length of src */
- ibool unsigned_type) /*!< in: signed or unsigned flag */
-{
- /* XXX this can be optimized on big-endian machines */
-
- ullint ret;
- uint i;
-
- if (unsigned_type || (src[0] & 0x80)) {
-
- ret = 0x0000000000000000ULL;
- } else {
-
- ret = 0xFFFFFFFFFFFFFF00ULL;
- }
-
- if (unsigned_type) {
-
- ret |= src[0];
- } else {
-
- ret |= src[0] ^ 0x80;
- }
-
- for (i = 1; i < len; i++) {
- ret <<= 8;
- ret |= src[i];
- }
-
- return(ret);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/mem0dbg.h b/storage/innodb_plugin/include/mem0dbg.h
deleted file mode 100644
index a064af5c678..00000000000
--- a/storage/innodb_plugin/include/mem0dbg.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0dbg.h
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-/* In the debug version each allocated field is surrounded with
-check fields whose sizes are given below */
-
-#ifdef UNIV_MEM_DEBUG
-#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
- UNIV_MEM_ALIGNMENT)
-#define MEM_FIELD_TRAILER_SIZE sizeof(ulint)
-#else
-#define MEM_FIELD_HEADER_SIZE 0
-#endif
-
-
-/* Space needed when allocating for a user a field of
-length N. The space is allocated only in multiples of
-UNIV_MEM_ALIGNMENT. In the debug version there are also
-check fields at the both ends of the field. */
-#ifdef UNIV_MEM_DEBUG
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
- + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT)
-#else
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
-#endif
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
- mem_heap_t* heap, /*!< in: memory heap */
- byte* top, /*!< in: calculate and validate only until
- this top pointer in the heap is reached,
- if this pointer is NULL, ignored */
- ibool print, /*!< in: if TRUE, prints the contents
- of the heap; works only in
- the debug version */
- ibool* error, /*!< out: TRUE if error */
- ulint* us_size,/*!< out: allocated memory
- (for the user) in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored; in the
- non-debug version this is always -1 */
- ulint* ph_size,/*!< out: physical size of the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
- ulint* n_blocks); /*!< out: number of blocks in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
- mem_heap_t* heap); /*!< in: memory heap */
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it)
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
- mem_heap_t* heap); /*!< in: memory heap */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void);
-/*===============*/
-/*****************************************************************//**
-Validates the dynamic memory
-@return TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void);
-/*=========================*/
-/************************************************************//**
-Validates the dynamic memory
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void);
-/*===============*/
-#endif /* UNIV_MEM_DEBUG */
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
- void* ptr); /*!< in: pointer to place of possible corruption */
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void);
-/*================*/
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void);
-/*====================*/
diff --git a/storage/innodb_plugin/include/mem0dbg.ic b/storage/innodb_plugin/include/mem0dbg.ic
deleted file mode 100644
index cb9245411dc..00000000000
--- a/storage/innodb_plugin/include/mem0dbg.ic
+++ /dev/null
@@ -1,112 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0dbg.ic
-The memory management: the debug code. This is not an independent
-compilation module but is included in mem0mem.*.
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-extern mutex_t mem_hash_mutex;
-# endif /* !UNIV_HOTBACKUP */
-extern ulint mem_current_allocated_memory;
-
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
- byte* buf, /*!< in: memory field */
- ulint n); /*!< in: how many bytes the user requested */
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
- byte* buf, /*!< in: memory field */
- ulint n); /*!< in: how many bytes the user requested */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n); /*!< in: length of buffer */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n); /*!< in: length of buffer */
-/***************************************************************//**
-Inserts a created memory heap to the hash table of
-current allocated memory heaps.
-Initializes the hash table when first called. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
- mem_heap_t* heap, /*!< in: the created heap */
- const char* file_name, /*!< in: file name of creation */
- ulint line); /*!< in: line where created */
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
- mem_heap_t* heap, /*!< in: the heap to be freed */
- const char* file_name, /*!< in: file name of freeing */
- ulint line); /*!< in: line where freed */
-
-
-void
-mem_field_header_set_len(byte* field, ulint len);
-
-ulint
-mem_field_header_get_len(byte* field);
-
-void
-mem_field_header_set_check(byte* field, ulint check);
-
-ulint
-mem_field_header_get_check(byte* field);
-
-void
-mem_field_trailer_set_check(byte* field, ulint check);
-
-ulint
-mem_field_trailer_get_check(byte* field);
-#endif /* UNIV_MEM_DEBUG */
diff --git a/storage/innodb_plugin/include/mem0mem.h b/storage/innodb_plugin/include/mem0mem.h
deleted file mode 100644
index 98f8748e529..00000000000
--- a/storage/innodb_plugin/include/mem0mem.h
+++ /dev/null
@@ -1,399 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0mem.h
-The memory management
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0mem_h
-#define mem0mem_h
-
-#include "univ.i"
-#include "ut0mem.h"
-#include "ut0byte.h"
-#include "ut0rnd.h"
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-#endif /* UNIV_HOTBACKUP */
-#include "ut0lst.h"
-#include "mach0data.h"
-
-/* -------------------- MEMORY HEAPS ----------------------------- */
-
-/* The info structure stored at the beginning of a heap block */
-typedef struct mem_block_info_struct mem_block_info_t;
-
-/* A block of a memory heap consists of the info structure
-followed by an area of memory */
-typedef mem_block_info_t mem_block_t;
-
-/* A memory heap is a nonempty linear list of memory blocks */
-typedef mem_block_t mem_heap_t;
-
-/* Types of allocation for memory heaps: DYNAMIC means allocation from the
-dynamic memory pool of the C compiler, BUFFER means allocation from the
-buffer pool; the latter method is used for very big heaps */
-
-#define MEM_HEAP_DYNAMIC 0 /* the most common type */
-#define MEM_HEAP_BUFFER 1
-#define MEM_HEAP_BTR_SEARCH 2 /* this flag can optionally be
- ORed to MEM_HEAP_BUFFER, in which
- case heap->free_block is used in
- some cases for memory allocations,
- and if it's NULL, the memory
- allocation functions can return
- NULL. */
-
-/* The following start size is used for the first block in the memory heap if
-the size is not specified, i.e., 0 is given as the parameter in the call of
-create. The standard size is the maximum (payload) size of the blocks used for
-allocations of small buffers. */
-
-#define MEM_BLOCK_START_SIZE 64
-#define MEM_BLOCK_STANDARD_SIZE \
- (UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
-
-/* If a memory heap is allowed to grow into the buffer pool, the following
-is the maximum size for a single allocated buffer: */
-#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200)
-
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
- ulint size); /*!< in: common pool size in bytes */
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void);
-/*===========*/
-
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-#define mem_heap_create(N) mem_heap_create_func(\
- (N), MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-#define mem_heap_create_in_buffer(N) mem_heap_create_func(\
- (N), MEM_HEAP_BUFFER, __FILE__, __LINE__)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\
- (N), MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\
- __FILE__, __LINE__)
-
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap freeing. */
-
-#define mem_heap_free(heap) mem_heap_free_func(\
- (heap), __FILE__, __LINE__)
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-arguments.
-@return own: memory heap, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INLINE
-mem_heap_t*
-mem_heap_create_func(
-/*=================*/
- ulint n, /*!< in: desired start block size,
- this means that a single user buffer
- of size n will fit in the block,
- 0 creates a default size block */
- ulint type, /*!< in: heap type */
- const char* file_name, /*!< in: file name where created */
- ulint line); /*!< in: line where created */
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
-UNIV_INLINE
-void
-mem_heap_free_func(
-/*===============*/
- mem_heap_t* heap, /*!< in, own: heap to be freed */
- const char* file_name, /*!< in: file name where freed */
- ulint line); /*!< in: line where freed */
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return allocated, zero-filled storage */
-UNIV_INLINE
-void*
-mem_heap_zalloc(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
-@return allocated storage, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INLINE
-void*
-mem_heap_alloc(
-/*===========*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return pointer to the heap top */
-UNIV_INLINE
-byte*
-mem_heap_get_heap_top(
-/*==================*/
- mem_heap_t* heap); /*!< in: memory heap */
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_free_heap_top(
-/*===================*/
- mem_heap_t* heap, /*!< in: heap from which to free */
- byte* old_top);/*!< in: pointer to old top of heap */
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_empty(
-/*===========*/
- mem_heap_t* heap); /*!< in: heap to empty */
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap.
-The size of the element must be given.
-@return pointer to the topmost element */
-UNIV_INLINE
-void*
-mem_heap_get_top(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: size of the topmost element */
-/*****************************************************************//**
-Frees the topmost element in a memory heap.
-The size of the element must be given. */
-UNIV_INLINE
-void
-mem_heap_free_top(
-/*==============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: size of the topmost element */
-/*****************************************************************//**
-Returns the space in bytes occupied by a memory heap. */
-UNIV_INLINE
-ulint
-mem_heap_get_size(
-/*==============*/
- mem_heap_t* heap); /*!< in: heap */
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
-
-#define mem_zalloc(N) memset(mem_alloc(N), 0, (N));
-
-#define mem_alloc(N) mem_alloc_func((N), NULL, __FILE__, __LINE__)
-#define mem_alloc2(N,S) mem_alloc_func((N), (S), __FILE__, __LINE__)
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
- ulint n, /*!< in: requested size in bytes */
- ulint* size, /*!< out: allocated size in bytes,
- or NULL */
- const char* file_name, /*!< in: file name where created */
- ulint line); /*!< in: line where created */
-
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer freeing */
-
-#define mem_free(PTR) mem_free_func((PTR), __FILE__, __LINE__)
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Frees a single buffer of storage from
-the dynamic memory of C compiler. Similar to free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
- void* ptr, /*!< in, own: buffer to be freed */
- const char* file_name, /*!< in: file name where created */
- ulint line); /*!< in: line where created */
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
-UNIV_INLINE
-char*
-mem_strdup(
-/*=======*/
- const char* str); /*!< in: string to be copied */
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
-UNIV_INLINE
-char*
-mem_strdupl(
-/*========*/
- const char* str, /*!< in: string to be copied */
- ulint len); /*!< in: length of str, in bytes */
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INTERN
-char*
-mem_heap_strdup(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str); /*!< in: string to be copied */
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str, /*!< in: string to be copied */
- ulint len); /*!< in: length of str, in bytes */
-
-/**********************************************************************//**
-Concatenate two strings and return the result, using a memory heap.
-@return own: the result */
-UNIV_INTERN
-char*
-mem_heap_strcat(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* s1, /*!< in: string 1 */
- const char* s2); /*!< in: string 2 */
-
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-UNIV_INTERN
-void*
-mem_heap_dup(
-/*=========*/
- mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
- const void* data, /*!< in: data to be copied */
- ulint len); /*!< in: length of data, in bytes */
-
-/****************************************************************//**
-A simple (s)printf replacement that dynamically allocates the space for the
-formatted string from the given heap. This supports a very limited set of
-the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type).
-@return heap-allocated formatted string */
-UNIV_INTERN
-char*
-mem_heap_printf(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- const char* format, /*!< in: format string */
- ...) __attribute__ ((format (printf, 2, 3)));
-
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
-void
-mem_validate_all_blocks(void);
-/*=========================*/
-#endif
-
-/*#######################################################################*/
-
-/* The info header of a block in a memory heap */
-
-struct mem_block_info_struct {
- ulint magic_n;/* magic number for debugging */
- char file_name[8];/* file name where the mem heap was created */
- ulint line; /*!< line number where the mem heap was created */
- UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
- the list this is the base node of the list of blocks;
- in subsequent blocks this is undefined */
- UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next
- and prev in the list. The first block allocated
- to the heap is also the first block in this list,
- though it also contains the base node of the list. */
- ulint len; /*!< physical length of this block in bytes */
- ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or
- MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
- ulint free; /*!< offset in bytes of the first free position for
- user data in the block */
- ulint start; /*!< the value of the struct field 'free' at the
- creation of the block */
-#ifndef UNIV_HOTBACKUP
- void* free_block;
- /* if the MEM_HEAP_BTR_SEARCH bit is set in type,
- and this is the heap root, this can contain an
- allocated buffer frame, which can be appended as a
- free block to the heap, if we need more space;
- otherwise, this is NULL */
- void* buf_block;
- /* if this block has been allocated from the buffer
- pool, this contains the buf_block_t handle;
- otherwise, this is NULL */
-#endif /* !UNIV_HOTBACKUP */
-#ifdef MEM_PERIODIC_CHECK
- UT_LIST_NODE_T(mem_block_t) mem_block_list;
- /* List of all mem blocks allocated; protected
- by the mem_comm_pool mutex */
-#endif
-};
-
-#define MEM_BLOCK_MAGIC_N 764741555
-#define MEM_FREED_BLOCK_MAGIC_N 547711122
-
-/* Header size for a memory heap block */
-#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\
- UNIV_MEM_ALIGNMENT)
-#include "mem0dbg.h"
-
-#ifndef UNIV_NONINL
-#include "mem0mem.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/mem0mem.ic b/storage/innodb_plugin/include/mem0mem.ic
deleted file mode 100644
index e7080d8c508..00000000000
--- a/storage/innodb_plugin/include/mem0mem.ic
+++ /dev/null
@@ -1,646 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0mem.ic
-The memory management
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0dbg.ic"
-#ifndef UNIV_HOTBACKUP
-# include "mem0pool.h"
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Creates a memory heap block where data can be allocated.
-@return own: memory heap block, NULL if did not succeed (only possible
-for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
-mem_block_t*
-mem_heap_create_block(
-/*==================*/
- mem_heap_t* heap, /*!< in: memory heap or NULL if first block
- should be created */
- ulint n, /*!< in: number of bytes needed for user data */
- ulint type, /*!< in: type of heap: MEM_HEAP_DYNAMIC or
- MEM_HEAP_BUFFER */
- const char* file_name,/*!< in: file name where created */
- ulint line); /*!< in: line where created */
-/******************************************************************//**
-Frees a block from a memory heap. */
-UNIV_INTERN
-void
-mem_heap_block_free(
-/*================*/
- mem_heap_t* heap, /*!< in: heap */
- mem_block_t* block); /*!< in: block to free */
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Frees the free_block field from a memory heap. */
-UNIV_INTERN
-void
-mem_heap_free_block_free(
-/*=====================*/
- mem_heap_t* heap); /*!< in: heap */
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-Adds a new block to a memory heap.
-@return created block, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
-mem_block_t*
-mem_heap_add_block(
-/*===============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes user needs */
-
-UNIV_INLINE
-void
-mem_block_set_len(mem_block_t* block, ulint len)
-{
- ut_ad(len > 0);
-
- block->len = len;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_len(mem_block_t* block)
-{
- return(block->len);
-}
-
-UNIV_INLINE
-void
-mem_block_set_type(mem_block_t* block, ulint type)
-{
- ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
- || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
-
- block->type = type;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_type(mem_block_t* block)
-{
- return(block->type);
-}
-
-UNIV_INLINE
-void
-mem_block_set_free(mem_block_t* block, ulint free)
-{
- ut_ad(free > 0);
- ut_ad(free <= mem_block_get_len(block));
-
- block->free = free;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_free(mem_block_t* block)
-{
- return(block->free);
-}
-
-UNIV_INLINE
-void
-mem_block_set_start(mem_block_t* block, ulint start)
-{
- ut_ad(start > 0);
-
- block->start = start;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_start(mem_block_t* block)
-{
- return(block->start);
-}
-
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return allocated, zero-filled storage */
-UNIV_INLINE
-void*
-mem_heap_zalloc(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-{
- ut_ad(heap);
- ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH));
- return(memset(mem_heap_alloc(heap, n), 0, n));
-}
-
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
-@return allocated storage, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INLINE
-void*
-mem_heap_alloc(
-/*===========*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-{
- mem_block_t* block;
- void* buf;
- ulint free;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF));
-
- /* Check if there is enough space in block. If not, create a new
- block to the heap */
-
- if (mem_block_get_len(block)
- < mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) {
-
- block = mem_heap_add_block(heap, n);
-
- if (block == NULL) {
-
- return(NULL);
- }
- }
-
- free = mem_block_get_free(block);
-
- buf = (byte*)block + free;
-
- mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
-
-#ifdef UNIV_MEM_DEBUG
- UNIV_MEM_ALLOC(buf,
- n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
-
- /* In the debug version write debugging info to the field */
- mem_field_init((byte*)buf, n);
-
- /* Advance buf to point at the storage which will be given to the
- caller */
- buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
-
-#endif
-#ifdef UNIV_SET_MEM_TO_ZERO
- UNIV_MEM_ALLOC(buf, n);
- memset(buf, '\0', n);
-#endif
- UNIV_MEM_ALLOC(buf, n);
- return(buf);
-}
-
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return pointer to the heap top */
-UNIV_INLINE
-byte*
-mem_heap_get_heap_top(
-/*==================*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- mem_block_t* block;
- byte* buf;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- buf = (byte*)block + mem_block_get_free(block);
-
- return(buf);
-}
-
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_free_heap_top(
-/*===================*/
- mem_heap_t* heap, /*!< in: heap from which to free */
- byte* old_top)/*!< in: pointer to old top of heap */
-{
- mem_block_t* block;
- mem_block_t* prev_block;
-#ifdef UNIV_MEM_DEBUG
- ibool error;
- ulint total_size;
- ulint size;
-#endif
-
- ut_ad(mem_heap_check(heap));
-
-#ifdef UNIV_MEM_DEBUG
-
- /* Validate the heap and get its total allocated size */
- mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
- NULL, NULL);
- ut_a(!error);
-
- /* Get the size below top pointer */
- mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
- NULL);
- ut_a(!error);
-
-#endif
-
- block = UT_LIST_GET_LAST(heap->base);
-
- while (block != NULL) {
- if (((byte*)block + mem_block_get_free(block) >= old_top)
- && ((byte*)block <= old_top)) {
- /* Found the right block */
-
- break;
- }
-
- /* Store prev_block value before freeing the current block
- (the current block will be erased in freeing) */
-
- prev_block = UT_LIST_GET_PREV(list, block);
-
- mem_heap_block_free(heap, block);
-
- block = prev_block;
- }
-
- ut_ad(block);
-
- /* Set the free field of block */
- mem_block_set_free(block, old_top - (byte*)block);
-
-#ifdef UNIV_MEM_DEBUG
- ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
- /* In the debug version erase block from top up */
- mem_erase_buf(old_top, (byte*)block + block->len - old_top);
-
- /* Update allocated memory count */
- mutex_enter(&mem_hash_mutex);
- mem_current_allocated_memory -= (total_size - size);
- mutex_exit(&mem_hash_mutex);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_W(old_top, (byte*)block + block->len - old_top);
-#endif /* UNIV_MEM_DEBUG */
- UNIV_MEM_ALLOC(old_top, (byte*)block + block->len - old_top);
-
- /* If free == start, we may free the block if it is not the first
- one */
-
- if ((heap != block) && (mem_block_get_free(block)
- == mem_block_get_start(block))) {
- mem_heap_block_free(heap, block);
- }
-}
-
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_empty(
-/*===========*/
- mem_heap_t* heap) /*!< in: heap to empty */
-{
- mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap));
-#ifndef UNIV_HOTBACKUP
- if (heap->free_block) {
- mem_heap_free_block_free(heap);
- }
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap. The size of the
-element must be given.
-@return pointer to the topmost element */
-UNIV_INLINE
-void*
-mem_heap_get_top(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: size of the topmost element */
-{
- mem_block_t* block;
- void* buf;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- buf = (byte*)block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
-
-#ifdef UNIV_MEM_DEBUG
- ut_ad(mem_block_get_start(block) <=(ulint)((byte*)buf - (byte*)block));
-
- /* In the debug version, advance buf to point at the storage which
- was given to the caller in the allocation*/
-
- buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
-
- /* Check that the field lengths agree */
- ut_ad(n == (ulint)mem_field_header_get_len(buf));
-#endif
-
- return(buf);
-}
-
-/*****************************************************************//**
-Frees the topmost element in a memory heap. The size of the element must be
-given. */
-UNIV_INLINE
-void
-mem_heap_free_top(
-/*==============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: size of the topmost element */
-{
- mem_block_t* block;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- /* Subtract the free field of block */
- mem_block_set_free(block, mem_block_get_free(block)
- - MEM_SPACE_NEEDED(n));
- UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n);
-#ifdef UNIV_MEM_DEBUG
-
- ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
- /* In the debug version check the consistency, and erase field */
- mem_field_erase((byte*)block + mem_block_get_free(block), n);
-#endif
-
- /* If free == start, we may free the block if it is not the first
- one */
-
- if ((heap != block) && (mem_block_get_free(block)
- == mem_block_get_start(block))) {
- mem_heap_block_free(heap, block);
- } else {
- /* Avoid a bogus UNIV_MEM_ASSERT_W() warning in a
- subsequent invocation of mem_heap_free_top().
- Originally, this was UNIV_MEM_FREE(), to catch writes
- to freed memory. */
- UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n);
- }
-}
-
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-argument.
-@return own: memory heap, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INLINE
-mem_heap_t*
-mem_heap_create_func(
-/*=================*/
- ulint n, /*!< in: desired start block size,
- this means that a single user buffer
- of size n will fit in the block,
- 0 creates a default size block */
- ulint type, /*!< in: heap type */
- const char* file_name, /*!< in: file name where created */
- ulint line) /*!< in: line where created */
-{
- mem_block_t* block;
-
- if (!n) {
- n = MEM_BLOCK_START_SIZE;
- }
-
- block = mem_heap_create_block(NULL, n, type, file_name, line);
-
- if (block == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_INIT(block->base);
-
- /* Add the created block itself as the first block in the list */
- UT_LIST_ADD_FIRST(list, block->base, block);
-
-#ifdef UNIV_MEM_DEBUG
-
- mem_hash_insert(block, file_name, line);
-
-#endif
-
- return(block);
-}
-
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
-UNIV_INLINE
-void
-mem_heap_free_func(
-/*===============*/
- mem_heap_t* heap, /*!< in, own: heap to be freed */
- const char* file_name __attribute__((unused)),
- /*!< in: file name where freed */
- ulint line __attribute__((unused)))
-{
- mem_block_t* block;
- mem_block_t* prev_block;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
-#ifdef UNIV_MEM_DEBUG
-
- /* In the debug version remove the heap from the hash table of heaps
- and check its consistency */
-
- mem_hash_remove(heap, file_name, line);
-
-#endif
-#ifndef UNIV_HOTBACKUP
- if (heap->free_block) {
- mem_heap_free_block_free(heap);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- while (block != NULL) {
- /* Store the contents of info before freeing current block
- (it is erased in freeing) */
-
- prev_block = UT_LIST_GET_PREV(list, block);
-
- mem_heap_block_free(heap, block);
-
- block = prev_block;
- }
-}
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
- ulint n, /*!< in: desired number of bytes */
- ulint* size, /*!< out: allocated size in bytes,
- or NULL */
- const char* file_name, /*!< in: file name where created */
- ulint line) /*!< in: line where created */
-{
- mem_heap_t* heap;
- void* buf;
-
- heap = mem_heap_create_func(n, MEM_HEAP_DYNAMIC, file_name, line);
-
- /* Note that as we created the first block in the heap big enough
- for the buffer requested by the caller, the buffer will be in the
- first block and thus we can calculate the pointer to the heap from
- the pointer to the buffer when we free the memory buffer. */
-
- if (UNIV_LIKELY_NULL(size)) {
- /* Adjust the allocation to the actual size of the
- memory block. */
- ulint m = mem_block_get_len(heap)
- - mem_block_get_free(heap);
-#ifdef UNIV_MEM_DEBUG
- m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
-#endif /* UNIV_MEM_DEBUG */
- ut_ad(m >= n);
- *size = n = m;
- }
-
- buf = mem_heap_alloc(heap, n);
-
- ut_a((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE
- - MEM_FIELD_HEADER_SIZE);
- return(buf);
-}
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees a single
-buffer of storage from the dynamic memory of the C compiler. Similar to the
-free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
- void* ptr, /*!< in, own: buffer to be freed */
- const char* file_name, /*!< in: file name where created */
- ulint line) /*!< in: line where created */
-{
- mem_heap_t* heap;
-
- heap = (mem_heap_t*)((byte*)ptr - MEM_BLOCK_HEADER_SIZE
- - MEM_FIELD_HEADER_SIZE);
- mem_heap_free_func(heap, file_name, line);
-}
-
-/*****************************************************************//**
-Returns the space in bytes occupied by a memory heap. */
-UNIV_INLINE
-ulint
-mem_heap_get_size(
-/*==============*/
- mem_heap_t* heap) /*!< in: heap */
-{
- mem_block_t* block;
- ulint size = 0;
-
- ut_ad(mem_heap_check(heap));
-
- block = heap;
-
- while (block != NULL) {
-
- size += mem_block_get_len(block);
- block = UT_LIST_GET_NEXT(list, block);
- }
-#ifndef UNIV_HOTBACKUP
- if (heap->free_block) {
- size += UNIV_PAGE_SIZE;
- }
-#endif /* !UNIV_HOTBACKUP */
-
- return(size);
-}
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
-UNIV_INLINE
-char*
-mem_strdup(
-/*=======*/
- const char* str) /*!< in: string to be copied */
-{
- ulint len = strlen(str) + 1;
- return((char*) memcpy(mem_alloc(len), str, len));
-}
-
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
-UNIV_INLINE
-char*
-mem_strdupl(
-/*========*/
- const char* str, /*!< in: string to be copied */
- ulint len) /*!< in: length of str, in bytes */
-{
- char* s = (char*) mem_alloc(len + 1);
- s[len] = 0;
- return((char*) memcpy(s, str, len));
-}
-
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str, /*!< in: string to be copied */
- ulint len) /*!< in: length of str, in bytes */
-{
- char* s = (char*) mem_heap_alloc(heap, len + 1);
- s[len] = 0;
- return((char*) memcpy(s, str, len));
-}
diff --git a/storage/innodb_plugin/include/mem0pool.h b/storage/innodb_plugin/include/mem0pool.h
deleted file mode 100644
index 5e93bf88a47..00000000000
--- a/storage/innodb_plugin/include/mem0pool.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0pool.h
-The lowest-level memory management
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0pool_h
-#define mem0pool_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "ut0lst.h"
-
-/** Memory area header */
-typedef struct mem_area_struct mem_area_t;
-/** Memory pool */
-typedef struct mem_pool_struct mem_pool_t;
-
-/** The common memory pool */
-extern mem_pool_t* mem_comm_pool;
-
-/** Memory area header */
-
-struct mem_area_struct{
- ulint size_and_free; /*!< memory area size is obtained by
- anding with ~MEM_AREA_FREE; area in
- a free list if ANDing with
- MEM_AREA_FREE results in nonzero */
- UT_LIST_NODE_T(mem_area_t)
- free_list; /*!< free list node */
-};
-
-/** Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_struct),\
- UNIV_MEM_ALIGNMENT))
-
-/********************************************************************//**
-Creates a memory pool.
-@return memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
- ulint size); /*!< in: pool size in bytes */
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
- mem_pool_t* pool); /*!< in, own: memory pool */
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
- ulint* psize, /*!< in: requested size in bytes; for optimum
- space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE;
- out: allocated size in bytes (greater than
- or equal to the requested size) */
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
- void* ptr, /*!< in, own: pointer to allocated memory
- buffer */
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return reserved mmeory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Reserves the mem pool mutex. */
-UNIV_INTERN
-void
-mem_pool_mutex_enter(void);
-/*======================*/
-/********************************************************************//**
-Releases the mem pool mutex. */
-UNIV_INTERN
-void
-mem_pool_mutex_exit(void);
-/*=====================*/
-/********************************************************************//**
-Validates a memory pool.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
- FILE* outfile,/*!< in: output file to write to */
- mem_pool_t* pool); /*!< in: memory pool */
-
-
-#ifndef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/mem0pool.ic b/storage/innodb_plugin/include/mem0pool.ic
deleted file mode 100644
index b891dd6dea0..00000000000
--- a/storage/innodb_plugin/include/mem0pool.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0pool.ic
-The lowest-level memory management
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
diff --git a/storage/innodb_plugin/include/mtr0log.h b/storage/innodb_plugin/include/mtr0log.h
deleted file mode 100644
index 6322af2a569..00000000000
--- a/storage/innodb_plugin/include/mtr0log.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0log.h
-Mini-transaction logging routines
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0log_h
-#define mtr0log_h
-
-#include "univ.i"
-#include "mtr0mtr.h"
-#include "dict0types.h"
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_write_ulint(
-/*=============*/
- byte* ptr, /*!< in: pointer where to write */
- ulint val, /*!< in: value to write */
- byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_write_dulint(
-/*==============*/
- byte* ptr, /*!< in: pointer where to write */
- dulint val, /*!< in: value to write */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Writes a string to a file page buffered in the buffer pool. Writes the
-corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_write_string(
-/*==============*/
- byte* ptr, /*!< in: pointer where to write */
- const byte* str, /*!< in: string to write */
- ulint len, /*!< in: string length */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Logs a write of a string to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_log_string(
-/*============*/
- byte* ptr, /*!< in: pointer written to */
- ulint len, /*!< in: string length */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Writes initial part of a log record consisting of one-byte item
-type and four-byte space and page numbers. */
-UNIV_INTERN
-void
-mlog_write_initial_log_record(
-/*==========================*/
- const byte* ptr, /*!< in: pointer to (inside) a buffer
- frame holding the file page where
- modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
- ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id,/*!< in: space id, if applicable */
- ulint page_no,/*!< in: page number (not relevant currently) */
- byte* log_ptr,/*!< in: pointer to mtr log which has been opened */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************//**
-Catenates 1 - 4 bytes to the mtr log. */
-UNIV_INLINE
-void
-mlog_catenate_ulint(
-/*================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val, /*!< in: value to write */
- ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-/********************************************************//**
-Catenates n bytes to the mtr log. */
-UNIV_INTERN
-void
-mlog_catenate_string(
-/*=================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* str, /*!< in: string to write */
- ulint len); /*!< in: string length */
-/********************************************************//**
-Catenates a compressed ulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ulint_compressed(
-/*===========================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val); /*!< in: value to write */
-/********************************************************//**
-Catenates a compressed dulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_dulint_compressed(
-/*============================*/
- mtr_t* mtr, /*!< in: mtr */
- dulint val); /*!< in: value to write */
-/********************************************************//**
-Opens a buffer to mlog. It must be closed with mlog_close.
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INLINE
-byte*
-mlog_open(
-/*======*/
- mtr_t* mtr, /*!< in: mtr */
- ulint size); /*!< in: buffer size in bytes; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-/********************************************************//**
-Closes a buffer opened to mlog. */
-UNIV_INLINE
-void
-mlog_close(
-/*=======*/
- mtr_t* mtr, /*!< in: mtr */
- byte* ptr); /*!< in: buffer space from ptr up was not used */
-/********************************************************//**
-Writes the initial part of a log record (3..11 bytes).
-If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly!
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_fast(
-/*===============================*/
- const byte* ptr, /*!< in: pointer to (inside) a buffer
- frame holding the file page where
- modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
- byte* log_ptr,/*!< in: pointer to mtr log which has
- been opened */
- mtr_t* mtr); /*!< in: mtr */
-#else /* !UNIV_HOTBACKUP */
-# define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0)
-# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte *) 0)
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************//**
-Parses an initial log record written by mlog_write_initial_log_record.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_initial_log_record(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* type, /*!< out: log record type: MLOG_1BYTE, ... */
- ulint* space, /*!< out: space id */
- ulint* page_no);/*!< out: page number */
-/********************************************************//**
-Parses a log record written by mlog_write_ulint or mlog_write_dulint.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_nbytes(
-/*==============*/
- ulint type, /*!< in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip);/*!< in/out: compressed page, or NULL */
-/********************************************************//**
-Parses a log record written by mlog_write_string.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_string(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip);/*!< in/out: compressed page, or NULL */
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index. Reserves space
-for further log entries. The log entry must be closed with
-mtr_close().
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
-byte*
-mlog_open_and_write_index(
-/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* rec, /*!< in: index record or page */
- dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
- ulint size); /*!< in: requested buffer size in bytes
- (if 0, calls mlog_close() and returns NULL) */
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Parses a log record written by mlog_open_and_write_index.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_index(
-/*=============*/
- byte* ptr, /*!< in: buffer */
- const byte* end_ptr,/*!< in: buffer end */
- ibool comp, /*!< in: TRUE=compact record format */
- dict_index_t** index); /*!< out, own: dummy index */
-
-#ifndef UNIV_HOTBACKUP
-/* Insert, update, and maybe other functions may use this value to define an
-extra mlog buffer size for variable size data */
-#define MLOG_BUF_MARGIN 256
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "mtr0log.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/mtr0log.ic b/storage/innodb_plugin/include/mtr0log.ic
deleted file mode 100644
index 5c24c38b337..00000000000
--- a/storage/innodb_plugin/include/mtr0log.ic
+++ /dev/null
@@ -1,274 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0log.ic
-Mini-transaction logging routines
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
-#include "fsp0types.h"
-#include "trx0sys.h"
-
-/********************************************************//**
-Opens a buffer to mlog. It must be closed with mlog_close.
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INLINE
-byte*
-mlog_open(
-/*======*/
- mtr_t* mtr, /*!< in: mtr */
- ulint size) /*!< in: buffer size in bytes; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-{
- dyn_array_t* mlog;
-
- mtr->modifications = TRUE;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return(NULL);
- }
-
- mlog = &(mtr->log);
-
- return(dyn_array_open(mlog, size));
-}
-
-/********************************************************//**
-Closes a buffer opened to mlog. */
-UNIV_INLINE
-void
-mlog_close(
-/*=======*/
- mtr_t* mtr, /*!< in: mtr */
- byte* ptr) /*!< in: buffer space from ptr up was not used */
-{
- dyn_array_t* mlog;
-
- ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
-
- mlog = &(mtr->log);
-
- dyn_array_close(mlog, ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
-UNIV_INLINE
-void
-mlog_catenate_ulint(
-/*================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val, /*!< in: value to write */
- ulint type) /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-{
- dyn_array_t* mlog;
- byte* ptr;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return;
- }
-
- mlog = &(mtr->log);
-
-#if MLOG_1BYTE != 1
-# error "MLOG_1BYTE != 1"
-#endif
-#if MLOG_2BYTES != 2
-# error "MLOG_2BYTES != 2"
-#endif
-#if MLOG_4BYTES != 4
-# error "MLOG_4BYTES != 4"
-#endif
-#if MLOG_8BYTES != 8
-# error "MLOG_8BYTES != 8"
-#endif
- ptr = (byte*) dyn_array_push(mlog, type);
-
- if (type == MLOG_4BYTES) {
- mach_write_to_4(ptr, val);
- } else if (type == MLOG_2BYTES) {
- mach_write_to_2(ptr, val);
- } else {
- ut_ad(type == MLOG_1BYTE);
- mach_write_to_1(ptr, val);
- }
-}
-
-/********************************************************//**
-Catenates a compressed ulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ulint_compressed(
-/*===========================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val) /*!< in: value to write */
-{
- byte* log_ptr;
-
- log_ptr = mlog_open(mtr, 10);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr += mach_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/********************************************************//**
-Catenates a compressed dulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_dulint_compressed(
-/*============================*/
- mtr_t* mtr, /*!< in: mtr */
- dulint val) /*!< in: value to write */
-{
- byte* log_ptr;
-
- log_ptr = mlog_open(mtr, 15);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr += mach_dulint_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/********************************************************//**
-Writes the initial part of a log record (3..11 bytes).
-If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly!
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_fast(
-/*===============================*/
- const byte* ptr, /*!< in: pointer to (inside) a buffer
- frame holding the file page where
- modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
- byte* log_ptr,/*!< in: pointer to mtr log which has
- been opened */
- mtr_t* mtr) /*!< in: mtr */
-{
-#ifdef UNIV_DEBUG
- buf_block_t* block;
-#endif
- const byte* page;
- ulint space;
- ulint offset;
-
- ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
- ut_ad(type <= MLOG_BIGGEST_TYPE);
- ut_ad(ptr && log_ptr);
-
- page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
- space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
-
- /* check whether the page is in the doublewrite buffer;
- the doublewrite buffer is located in pages
- FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
- system tablespace */
- if (space == TRX_SYS_SPACE
- && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
- if (trx_doublewrite_buf_is_being_created) {
- /* Do nothing: we only come to this branch in an
- InnoDB database creation. We do not redo log
- anything for the doublewrite buffer pages. */
- return(log_ptr);
- } else {
- fprintf(stderr,
- "Error: trying to redo log a record of type "
- "%d on page %lu of space %lu in the "
- "doublewrite buffer, continuing anyway.\n"
- "Please post a bug report to "
- "bugs.mysql.com.\n",
- type, offset, space);
- }
- }
-
- mach_write_to_1(log_ptr, type);
- log_ptr++;
- log_ptr += mach_write_compressed(log_ptr, space);
- log_ptr += mach_write_compressed(log_ptr, offset);
-
- mtr->n_log_recs++;
-
-#ifdef UNIV_LOG_DEBUG
- fprintf(stderr,
- "Adding to mtr log record type %lu space %lu page no %lu\n",
- (ulong) type, space, offset);
-#endif
-
-#ifdef UNIV_DEBUG
- /* We now assume that all x-latched pages have been modified! */
- block = (buf_block_t*) buf_block_align(ptr);
-
- if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
-
- mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
- }
-#endif
- return(log_ptr);
-}
-
-/********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
- ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id,/*!< in: space id, if applicable */
- ulint page_no,/*!< in: page number (not relevant currently) */
- byte* log_ptr,/*!< in: pointer to mtr log which has been opened */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(log_ptr);
-
- mach_write_to_1(log_ptr, type);
- log_ptr++;
-
- /* We write dummy space id and page number */
- log_ptr += mach_write_compressed(log_ptr, space_id);
- log_ptr += mach_write_compressed(log_ptr, page_no);
-
- mtr->n_log_recs++;
-
- return(log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/mtr0mtr.h b/storage/innodb_plugin/include/mtr0mtr.h
deleted file mode 100644
index bc3f1951be9..00000000000
--- a/storage/innodb_plugin/include/mtr0mtr.h
+++ /dev/null
@@ -1,419 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0mtr.h
-Mini-transaction buffer
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0mtr_h
-#define mtr0mtr_h
-
-#include "univ.i"
-#include "mem0mem.h"
-#include "dyn0dyn.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "ut0byte.h"
-#include "mtr0types.h"
-#include "page0types.h"
-
-/* Logging modes for a mini-transaction */
-#define MTR_LOG_ALL 21 /* default mode: log all operations
- modifying disk-based data */
-#define MTR_LOG_NONE 22 /* log no operations */
-/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying
- file space page allocation data
- (operations in fsp0fsp.* ) */
-#define MTR_LOG_SHORT_INSERTS 24 /* inserts are logged in a shorter
- form */
-
-/* Types for the mlock objects to store in the mtr memo; NOTE that the
-first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-#define MTR_MEMO_PAGE_S_FIX RW_S_LATCH
-#define MTR_MEMO_PAGE_X_FIX RW_X_LATCH
-#define MTR_MEMO_BUF_FIX RW_NO_LATCH
-#define MTR_MEMO_MODIFY 54
-#define MTR_MEMO_S_LOCK 55
-#define MTR_MEMO_X_LOCK 56
-
-/** @name Log item types
-The log items are declared 'byte' so that the compiler can warn if val
-and type parameters are switched in a call to mlog_write_ulint. NOTE!
-For 1 - 8 bytes, the flag value must give the length also! @{ */
-#define MLOG_SINGLE_REC_FLAG 128 /*!< if the mtr contains only
- one log record for one page,
- i.e., write_initial_log_record
- has been called only once,
- this flag is ORed to the type
- of that first log record */
-#define MLOG_1BYTE (1) /*!< one byte is written */
-#define MLOG_2BYTES (2) /*!< 2 bytes ... */
-#define MLOG_4BYTES (4) /*!< 4 bytes ... */
-#define MLOG_8BYTES (8) /*!< 8 bytes ... */
-#define MLOG_REC_INSERT ((byte)9) /*!< record insert */
-#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /*!< mark clustered index record
- deleted */
-#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /*!< mark secondary index record
- deleted */
-#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /*!< update of a record,
- preserves record field sizes */
-#define MLOG_REC_DELETE ((byte)14) /*!< delete a record from a
- page */
-#define MLOG_LIST_END_DELETE ((byte)15) /*!< delete record list end on
- index page */
-#define MLOG_LIST_START_DELETE ((byte)16) /*!< delete record list start on
- index page */
-#define MLOG_LIST_END_COPY_CREATED ((byte)17) /*!< copy record list end to a
- new created index page */
-#define MLOG_PAGE_REORGANIZE ((byte)18) /*!< reorganize an
- index page in
- ROW_FORMAT=REDUNDANT */
-#define MLOG_PAGE_CREATE ((byte)19) /*!< create an index page */
-#define MLOG_UNDO_INSERT ((byte)20) /*!< insert entry in an undo
- log */
-#define MLOG_UNDO_ERASE_END ((byte)21) /*!< erase an undo log
- page end */
-#define MLOG_UNDO_INIT ((byte)22) /*!< initialize a page in an
- undo log */
-#define MLOG_UNDO_HDR_DISCARD ((byte)23) /*!< discard an update undo log
- header */
-#define MLOG_UNDO_HDR_REUSE ((byte)24) /*!< reuse an insert undo log
- header */
-#define MLOG_UNDO_HDR_CREATE ((byte)25) /*!< create an undo
- log header */
-#define MLOG_REC_MIN_MARK ((byte)26) /*!< mark an index
- record as the
- predefined minimum
- record */
-#define MLOG_IBUF_BITMAP_INIT ((byte)27) /*!< initialize an
- ibuf bitmap page */
-/*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */
-#ifdef UNIV_LOG_LSN_DEBUG
-# define MLOG_LSN ((byte)28) /* current LSN */
-#endif
-#define MLOG_INIT_FILE_PAGE ((byte)29) /*!< this means that a
- file page is taken
- into use and the prior
- contents of the page
- should be ignored: in
- recovery we must not
- trust the lsn values
- stored to the file
- page */
-#define MLOG_WRITE_STRING ((byte)30) /*!< write a string to
- a page */
-#define MLOG_MULTI_REC_END ((byte)31) /*!< if a single mtr writes
- several log records,
- this log record ends the
- sequence of these records */
-#define MLOG_DUMMY_RECORD ((byte)32) /*!< dummy log record used to
- pad a log block full */
-#define MLOG_FILE_CREATE ((byte)33) /*!< log record about an .ibd
- file creation */
-#define MLOG_FILE_RENAME ((byte)34) /*!< log record about an .ibd
- file rename */
-#define MLOG_FILE_DELETE ((byte)35) /*!< log record about an .ibd
- file deletion */
-#define MLOG_COMP_REC_MIN_MARK ((byte)36) /*!< mark a compact
- index record as the
- predefined minimum
- record */
-#define MLOG_COMP_PAGE_CREATE ((byte)37) /*!< create a compact
- index page */
-#define MLOG_COMP_REC_INSERT ((byte)38) /*!< compact record insert */
-#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
- /*!< mark compact
- clustered index record
- deleted */
-#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact
- secondary index record
- deleted; this log
- record type is
- redundant, as
- MLOG_REC_SEC_DELETE_MARK
- is independent of the
- record format. */
-#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a
- compact record,
- preserves record field
- sizes */
-#define MLOG_COMP_REC_DELETE ((byte)42) /*!< delete a compact record
- from a page */
-#define MLOG_COMP_LIST_END_DELETE ((byte)43) /*!< delete compact record list
- end on index page */
-#define MLOG_COMP_LIST_START_DELETE ((byte)44) /*!< delete compact record list
- start on index page */
-#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
- /*!< copy compact
- record list end to a
- new created index
- page */
-#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /*!< reorganize an index page */
-#define MLOG_FILE_CREATE2 ((byte)47) /*!< log record about creating
- an .ibd file, with format */
-#define MLOG_ZIP_WRITE_NODE_PTR ((byte)48) /*!< write the node pointer of
- a record on a compressed
- non-leaf B-tree page */
-#define MLOG_ZIP_WRITE_BLOB_PTR ((byte)49) /*!< write the BLOB pointer
- of an externally stored column
- on a compressed page */
-#define MLOG_ZIP_WRITE_HEADER ((byte)50) /*!< write to compressed page
- header */
-#define MLOG_ZIP_PAGE_COMPRESS ((byte)51) /*!< compress an index page */
-#define MLOG_BIGGEST_TYPE ((byte)51) /*!< biggest value (used in
- assertions) */
-/* @} */
-
-/** @name Flags for MLOG_FILE operations
-(stored in the page number parameter, called log_flags in the
-functions). The page number parameter was originally written as 0. @{ */
-#define MLOG_FILE_FLAG_TEMP 1 /*!< identifies TEMPORARY TABLE in
- MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
-/* @} */
-
-/***************************************************************//**
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller.
-@return mtr buffer which also acts as the mtr handle */
-UNIV_INLINE
-mtr_t*
-mtr_start(
-/*======*/
- mtr_t* mtr); /*!< in: memory buffer for the mtr buffer */
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
-void
-mtr_commit(
-/*=======*/
- mtr_t* mtr); /*!< in: mini-transaction */
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
-@return savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************//**
-Releases the latches stored in an mtr memo down to a savepoint.
-NOTE! The mtr must not have made changes to buffer pages after the
-savepoint, as these can be handled only by mtr_commit. */
-UNIV_INTERN
-void
-mtr_rollback_to_savepoint(
-/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint); /*!< in: savepoint */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- rw_lock_t* lock); /*!< in: latch to release */
-#else /* !UNIV_HOTBACKUP */
-# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-Gets the logging mode of a mini-transaction.
-@return logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Changes the logging mode of a mini-transaction.
-@return old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
- mtr_t* mtr, /*!< in: mtr */
- ulint mode); /*!< in: logging mode: MTR_LOG_NONE, ... */
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Reads 8 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INTERN
-dulint
-mtr_read_dulint(
-/*============*/
- const byte* ptr, /*!< in: pointer from where to read */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-This macro locks an rw-lock in s-mode. */
-#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\
- (MTR))
-/*********************************************************************//**
-This macro locks an rw-lock in x-mode. */
-#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\
- (MTR))
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************//**
-Releases an object in the memo stack. */
-UNIV_INTERN
-void
-mtr_memo_release(
-/*=============*/
- mtr_t* mtr, /*!< in: mtr */
- void* object, /*!< in: object */
- ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-#ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given item.
-@return TRUE if contains */
-UNIV_INLINE
-ibool
-mtr_memo_contains(
-/*==============*/
- mtr_t* mtr, /*!< in: mtr */
- const void* object, /*!< in: object to search */
- ulint type); /*!< in: type of object */
-
-/**********************************************************//**
-Checks if memo contains the given page.
-@return TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* ptr, /*!< in: pointer to buffer frame */
- ulint type); /*!< in: type of object */
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
-void
-mtr_print(
-/*======*/
- mtr_t* mtr); /*!< in: mtr */
-# else /* !UNIV_HOTBACKUP */
-# define mtr_memo_contains(mtr, object, type) TRUE
-# define mtr_memo_contains_page(mtr, ptr, type) TRUE
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
-/*######################################################################*/
-
-#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */
-
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
-@return log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
- mtr_t* mtr); /*!< in: mini-transaction */
-/***************************************************//**
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
-void
-mtr_memo_push(
-/*==========*/
- mtr_t* mtr, /*!< in: mtr */
- void* object, /*!< in: object */
- ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-
-
-/* Type definition of a mini-transaction memo stack slot. */
-typedef struct mtr_memo_slot_struct mtr_memo_slot_t;
-struct mtr_memo_slot_struct{
- ulint type; /*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
- void* object; /*!< pointer to the object */
-};
-
-/* Mini-transaction handle and buffer */
-struct mtr_struct{
-#ifdef UNIV_DEBUG
- ulint state; /*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
-#endif
- dyn_array_t memo; /*!< memo stack for locks etc. */
- dyn_array_t log; /*!< mini-transaction log */
- ibool modifications;
- /* TRUE if the mtr made modifications to
- buffer pool pages */
- ulint n_log_recs;
- /* count of how many page initial log records
- have been written to the mtr log */
- ulint log_mode; /* specifies which operations should be
- logged; default value MTR_LOG_ALL */
- ib_uint64_t start_lsn;/* start lsn of the possible log entry for
- this mtr */
- ib_uint64_t end_lsn;/* end lsn of the possible log entry for
- this mtr */
-#ifdef UNIV_DEBUG
- ulint magic_n;
-#endif /* UNIV_DEBUG */
-};
-
-#ifdef UNIV_DEBUG
-# define MTR_MAGIC_N 54551
-#endif /* UNIV_DEBUG */
-
-#define MTR_ACTIVE 12231
-#define MTR_COMMITTING 56456
-#define MTR_COMMITTED 34676
-
-#ifndef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/mtr0mtr.ic b/storage/innodb_plugin/include/mtr0mtr.ic
deleted file mode 100644
index 310c7c4117f..00000000000
--- a/storage/innodb_plugin/include/mtr0mtr.ic
+++ /dev/null
@@ -1,272 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0mtr.ic
-Mini-transaction buffer
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "mach0data.h"
-
-/***************************************************************//**
-Starts a mini-transaction and creates a mini-transaction handle
-and a buffer in the memory buffer given by the caller.
-@return mtr buffer which also acts as the mtr handle */
-UNIV_INLINE
-mtr_t*
-mtr_start(
-/*======*/
- mtr_t* mtr) /*!< in: memory buffer for the mtr buffer */
-{
- dyn_array_create(&(mtr->memo));
- dyn_array_create(&(mtr->log));
-
- mtr->log_mode = MTR_LOG_ALL;
- mtr->modifications = FALSE;
- mtr->n_log_recs = 0;
-
- ut_d(mtr->state = MTR_ACTIVE);
- ut_d(mtr->magic_n = MTR_MAGIC_N);
-
- return(mtr);
-}
-
-/***************************************************//**
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
-void
-mtr_memo_push(
-/*==========*/
- mtr_t* mtr, /*!< in: mtr */
- void* object, /*!< in: object */
- ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-{
- dyn_array_t* memo;
- mtr_memo_slot_t* slot;
-
- ut_ad(object);
- ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_X_LOCK);
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- memo = &(mtr->memo);
-
- slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
-
- slot->object = object;
- slot->type = type;
-}
-
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
-@return savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- dyn_array_t* memo;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- memo = &(mtr->memo);
-
- return(dyn_array_get_data_size(memo));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- rw_lock_t* lock) /*!< in: latch to release */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- ut_ad(dyn_array_get_data_size(memo) > savepoint);
-
- slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
-
- ut_ad(slot->object == lock);
- ut_ad(slot->type == MTR_MEMO_S_LOCK);
-
- rw_lock_s_unlock(lock);
-
- slot->object = NULL;
-}
-
-# ifdef UNIV_DEBUG
-/**********************************************************//**
-Checks if memo contains the given item.
-@return TRUE if contains */
-UNIV_INLINE
-ibool
-mtr_memo_contains(
-/*==============*/
- mtr_t* mtr, /*!< in: mtr */
- const void* object, /*!< in: object to search */
- ulint type) /*!< in: type of object */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
-
- while (offset > 0) {
- offset -= sizeof(mtr_memo_slot_t);
-
- slot = dyn_array_get_element(memo, offset);
-
- if ((object == slot->object) && (type == slot->type)) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
-@return log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- return(&(mtr->log));
-}
-
-/***************************************************************//**
-Gets the logging mode of a mini-transaction.
-@return logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr);
- ut_ad(mtr->log_mode >= MTR_LOG_ALL);
- ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
-
- return(mtr->log_mode);
-}
-
-/***************************************************************//**
-Changes the logging mode of a mini-transaction.
-@return old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
- mtr_t* mtr, /*!< in: mtr */
- ulint mode) /*!< in: logging mode: MTR_LOG_NONE, ... */
-{
- ulint old_mode;
-
- ut_ad(mtr);
- ut_ad(mode >= MTR_LOG_ALL);
- ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
-
- old_mode = mtr->log_mode;
-
- if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
- /* Do nothing */
- } else {
- mtr->log_mode = mode;
- }
-
- ut_ad(old_mode >= MTR_LOG_ALL);
- ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
-
- return(old_mode);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr);
- ut_ad(lock);
-
- rw_lock_s_lock_func(lock, 0, file, line);
-
- mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
-}
-
-/*********************************************************************//**
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr);
- ut_ad(lock);
-
- rw_lock_x_lock_func(lock, 0, file, line);
-
- mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/mtr0types.h b/storage/innodb_plugin/include/mtr0types.h
deleted file mode 100644
index 83a7aaf3839..00000000000
--- a/storage/innodb_plugin/include/mtr0types.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0types.h
-Mini-transaction buffer global types
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0types_h
-#define mtr0types_h
-
-typedef struct mtr_struct mtr_t;
-
-#endif
diff --git a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/include/os0file.h
deleted file mode 100644
index 16568579f31..00000000000
--- a/storage/innodb_plugin/include/os0file.h
+++ /dev/null
@@ -1,805 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
-
-/**************************************************//**
-@file include/os0file.h
-The interface to the operating system file io
-
-Created 10/21/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0file_h
-#define os0file_h
-
-#include "univ.i"
-
-#ifndef __WIN__
-#include <dirent.h>
-#include <sys/stat.h>
-#include <time.h>
-#endif
-
-/** File node of a tablespace or the log data space */
-typedef struct fil_node_struct fil_node_t;
-
-#ifdef UNIV_DO_FLUSH
-extern ibool os_do_not_call_flush_at_each_write;
-#endif /* UNIV_DO_FLUSH */
-extern ibool os_has_said_disk_full;
-/** Flag: enable debug printout for asynchronous i/o */
-extern ibool os_aio_print_debug;
-
-/** Number of pending os_file_pread() operations */
-extern ulint os_file_n_pending_preads;
-/** Number of pending os_file_pwrite() operations */
-extern ulint os_file_n_pending_pwrites;
-
-/** Number of pending read operations */
-extern ulint os_n_pending_reads;
-/** Number of pending write operations */
-extern ulint os_n_pending_writes;
-
-#ifdef __WIN__
-
-/** We define always WIN_ASYNC_IO, and check at run-time whether
- the OS actually supports it: Win 95 does not, NT does. */
-#define WIN_ASYNC_IO
-
-/** Use unbuffered I/O */
-#define UNIV_NON_BUFFERED_IO
-
-#endif
-
-#ifdef __WIN__
-/** File handle */
-#define os_file_t HANDLE
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-#define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
-#else
-/** File handle */
-typedef int os_file_t;
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-#define OS_FILE_FROM_FD(fd) fd
-#endif
-
-/** Umask for creating files */
-extern ulint os_innodb_umask;
-
-/** If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads */
-
-extern ibool os_aio_use_native_aio;
-
-/** The next value should be smaller or equal to the smallest sector size used
-on any disk. A log block is required to be a portion of disk which is written
-so that if the start and the end of a block get written to disk, then the
-whole block gets written. This should be true even in most cases of a crash:
-if this fails for a log block, then it is equivalent to a media failure in the
-log. */
-
-#define OS_FILE_LOG_BLOCK_SIZE 512
-
-/** Options for file_create @{ */
-#define OS_FILE_OPEN 51
-#define OS_FILE_CREATE 52
-#define OS_FILE_OVERWRITE 53
-#define OS_FILE_OPEN_RAW 54
-#define OS_FILE_CREATE_PATH 55
-#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on
- the first ibdata file */
-
-#define OS_FILE_READ_ONLY 333
-#define OS_FILE_READ_WRITE 444
-#define OS_FILE_READ_ALLOW_DELETE 555 /* for ibbackup */
-
-/* Options for file_create */
-#define OS_FILE_AIO 61
-#define OS_FILE_NORMAL 62
-/* @} */
-
-/** Types for file create @{ */
-#define OS_DATA_FILE 100
-#define OS_LOG_FILE 101
-/* @} */
-
-/** Error codes from os_file_get_last_error @{ */
-#define OS_FILE_NOT_FOUND 71
-#define OS_FILE_DISK_FULL 72
-#define OS_FILE_ALREADY_EXISTS 73
-#define OS_FILE_PATH_ERROR 74
-#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources
- to become available again */
-#define OS_FILE_SHARING_VIOLATION 76
-#define OS_FILE_ERROR_NOT_SPECIFIED 77
-#define OS_FILE_INSUFFICIENT_RESOURCE 78
-#define OS_FILE_OPERATION_ABORTED 79
-/* @} */
-
-/** Types for aio operations @{ */
-#define OS_FILE_READ 10
-#define OS_FILE_WRITE 11
-
-#define OS_FILE_LOG 256 /* This can be ORed to type */
-/* @} */
-
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more
- than 64 */
-
-/** Modes for aio operations @{ */
-#define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf
- pages or ibuf bitmap pages */
-#define OS_AIO_IBUF 22 /*!< Asynchronous i/o for ibuf pages or ibuf
- bitmap pages */
-#define OS_AIO_LOG 23 /*!< Asynchronous i/o for the log */
-#define OS_AIO_SYNC 24 /*!< Asynchronous i/o where the calling thread
- will itself wait for the i/o to complete,
- doing also the job of the i/o-handler thread;
- can be used for any pages, ibuf or non-ibuf.
- This is used to save CPU time, as we can do
- with fewer thread switches. Plain synchronous
- i/o is not as good, because it must serialize
- the file seek and read or write, causing a
- bottleneck for parallelism. */
-
-#define OS_AIO_SIMULATED_WAKE_LATER 512 /*!< This can be ORed to mode
- in the call of os_aio(...),
- if the caller wants to post several i/o
- requests in a batch, and only after that
- wake the i/o-handler thread; this has
- effect only in simulated aio */
-/* @} */
-
-#define OS_WIN31 1 /*!< Microsoft Windows 3.x */
-#define OS_WIN95 2 /*!< Microsoft Windows 95 */
-#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
-#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
-
-extern ulint os_n_file_reads;
-extern ulint os_n_file_writes;
-extern ulint os_n_fsyncs;
-
-/* File types for directory entry data type */
-
-enum os_file_type_enum{
- OS_FILE_TYPE_UNKNOWN = 0,
- OS_FILE_TYPE_FILE, /* regular file */
- OS_FILE_TYPE_DIR, /* directory */
- OS_FILE_TYPE_LINK /* symbolic link */
-};
-typedef enum os_file_type_enum os_file_type_t;
-
-/* Maximum path string length in bytes when referring to tables with in the
-'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
-of this size from the thread stack; that is why this should not be made much
-bigger than 4000 bytes */
-#define OS_FILE_MAX_PATH 4000
-
-/* Struct used in fetching information of a file in a directory */
-struct os_file_stat_struct{
- char name[OS_FILE_MAX_PATH]; /*!< path to a file */
- os_file_type_t type; /*!< file type */
- ib_int64_t size; /*!< file size */
- time_t ctime; /*!< creation time */
- time_t mtime; /*!< modification time */
- time_t atime; /*!< access time */
-};
-typedef struct os_file_stat_struct os_file_stat_t;
-
-#ifdef __WIN__
-typedef HANDLE os_file_dir_t; /*!< directory stream */
-#else
-typedef DIR* os_file_dir_t; /*!< directory stream */
-#endif
-
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
-UNIV_INTERN
-ulint
-os_get_os_version(void);
-/*===================*/
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Creates the seek mutexes used in positioned reads and writes. */
-UNIV_INTERN
-void
-os_io_init_simple(void);
-/*===================*/
-/***********************************************************************//**
-Creates a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the MySQL temporary directory.
-On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag.
-@return temporary file handle, or NULL on error */
-
-FILE*
-os_file_create_tmpfile(void);
-/*========================*/
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing.
-@return directory stream, NULL if error */
-UNIV_INTERN
-os_file_dir_t
-os_file_opendir(
-/*============*/
- const char* dirname, /*!< in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal);/*!< in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
-/***********************************************************************//**
-Closes a directory stream.
-@return 0 if success, -1 if failure */
-UNIV_INTERN
-int
-os_file_closedir(
-/*=============*/
- os_file_dir_t dir); /*!< in: directory stream */
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory.
-@return 0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
-int
-os_file_readdir_next_file(
-/*======================*/
- const char* dirname,/*!< in: directory name or path */
- os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info); /*!< in/out: buffer where the info is returned */
-/*****************************************************************//**
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true.
-@return TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
-os_file_create_directory(
-/*=====================*/
- const char* pathname, /*!< in: directory name as
- null-terminated string */
- ibool fail_if_exists);/*!< in: if TRUE, pre-existing directory
- is treated as an error. */
-/****************************************************************//**
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple(
-/*==================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
- opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error), or
- OS_FILE_CREATE_PATH if new file
- (if exists, error) and subdirectories along
- its path are created (if needed)*/
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success);/*!< out: TRUE if succeed, FALSE if error */
-/****************************************************************//**
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_no_error_handling(
-/*====================================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error) */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success);/*!< out: TRUE if succeed, FALSE if error */
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor. */
-UNIV_INTERN
-void
-os_file_set_nocache(
-/*================*/
- int fd, /*!< in: file descriptor to alter */
- const char* file_name, /*!< in: file name, used in the
- diagnostic message */
- const char* operation_name);/*!< in: "open" or "create"; used in the
- diagnostic message */
-/****************************************************************//**
-Opens an existing file or creates a new.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create(
-/*===========*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error),
- OS_FILE_OVERWRITE if a new file is created
- or an old overwritten;
- OS_FILE_OPEN_RAW, if a raw device or disk
- partition should be opened */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success);/*!< out: TRUE if succeed, FALSE if error */
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_delete(
-/*===========*/
- const char* name); /*!< in: file path as a null-terminated string */
-
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_delete_if_exists(
-/*=====================*/
- const char* name); /*!< in: file path as a null-terminated string */
-/***********************************************************************//**
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_rename(
-/*===========*/
- const char* oldpath, /*!< in: old file path as a
- null-terminated string */
- const char* newpath); /*!< in: new file path */
-/***********************************************************************//**
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close(
-/*==========*/
- os_file_t file); /*!< in, own: handle to a file */
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************//**
-Closes a file handle.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_no_error_handling(
-/*============================*/
- os_file_t file); /*!< in, own: handle to a file */
-#endif /* UNIV_HOTBACKUP */
-/***********************************************************************//**
-Gets a file size.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_get_size(
-/*=============*/
- os_file_t file, /*!< in: handle to a file */
- ulint* size, /*!< out: least significant 32 bits of file
- size */
- ulint* size_high);/*!< out: most significant 32 bits of size */
-/***********************************************************************//**
-Gets file size as a 64-bit integer ib_int64_t.
-@return size in bytes, -1 if error */
-UNIV_INTERN
-ib_int64_t
-os_file_get_size_as_iblonglong(
-/*===========================*/
- os_file_t file); /*!< in: handle to a file */
-/***********************************************************************//**
-Write the specified number of zeros to a newly created file.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_size(
-/*=============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- ulint size, /*!< in: least significant 32 bits of file
- size */
- ulint size_high);/*!< in: most significant 32 bits of size */
-/***********************************************************************//**
-Truncates a file at its current position.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_eof(
-/*============*/
- FILE* file); /*!< in: file to be truncated */
-/***********************************************************************//**
-Flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_flush(
-/*==========*/
- os_file_t file); /*!< in, own: handle to a file */
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@return error number, or OS error number + 100 */
-UNIV_INTERN
-ulint
-os_file_get_last_error(
-/*===================*/
- ibool report_all_errors); /*!< in: TRUE if we want an error message
- printed of all errors */
-/*******************************************************************//**
-Requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read(
-/*=========*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
- ulint n); /*!< in: number of bytes to read */
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
-void
-os_file_read_string(
-/*================*/
- FILE* file, /*!< in: file to read from */
- char* str, /*!< in: buffer where to read */
- ulint size); /*!< in: size of buffer */
-/*******************************************************************//**
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_no_error_handling(
-/*===========================*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
- ulint n); /*!< in: number of bytes to read */
-
-/*******************************************************************//**
-Requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_write(
-/*==========*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to write */
- ulint offset_high,/*!< in: most significant 32 bits of
- offset */
- ulint n); /*!< in: number of bytes to write */
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return TRUE if call succeeded */
-UNIV_INTERN
-ibool
-os_file_status(
-/*===========*/
- const char* path, /*!< in: pathname of the file */
- ibool* exists, /*!< out: TRUE if file exists */
- os_file_type_t* type); /*!< out: type of the file (if it exists) */
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' charac­
-ters are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
-
-@return own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
- const char* path); /*!< in: pathname */
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
- const char* path); /*!< in: path name */
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
-and log i/o. Also creates one array each for read and write where each
-array is divided logically into n_read_segs and n_write_segs
-respectively. The caller must create an i/o handler thread for each
-segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-void
-os_aio_init(
-/*========*/
- ulint n_per_seg, /*<! in: maximum number of pending aio
- operations allowed per segment */
- ulint n_read_segs, /*<! in: number of reader threads */
- ulint n_write_segs, /*<! in: number of writer threads */
- ulint n_slots_sync); /*<! in: number of slots in the sync aio
- array */
-/***********************************************************************
-Frees the asynchronous io system. */
-UNIV_INTERN
-void
-os_aio_free(void);
-/*=============*/
-
-/*******************************************************************//**
-Requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
-os_aio(
-/*===*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read or write */
- ulint offset_high, /*!< in: most significant 32 bits of
- offset */
- ulint n, /*!< in: number of bytes to read or write */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2);/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
-shutdown. */
-UNIV_INTERN
-void
-os_aio_wake_all_threads_at_shutdown(void);
-/*=====================================*/
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
-be other, synchronous, pending writes. */
-UNIV_INTERN
-void
-os_aio_wait_until_no_pending_writes(void);
-/*=====================================*/
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
-void
-os_aio_simulated_wake_handler_threads(void);
-/*=======================================*/
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-UNIV_INTERN
-void
-os_aio_simulated_put_read_threads_to_sleep(void);
-/*============================================*/
-
-#ifdef WIN_ASYNC_IO
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /*!< this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
-#endif
-
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
-/**********************************************************************//**
-Validates the consistency of the aio system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void);
-/*=================*/
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
-void
-os_aio_print(
-/*=========*/
- FILE* file); /*!< in: file where to print */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-os_aio_refresh_stats(void);
-/*======================*/
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations. */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void);
-/*=======================*/
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-This function returns information about the specified file
-@return TRUE if stat information found */
-UNIV_INTERN
-ibool
-os_file_get_status(
-/*===============*/
- const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info); /*!< information of a file in a
- directory */
-
-#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
-/*********************************************************************//**
-Creates a temporary file that will be deleted on close.
-This function is defined in ha_innodb.cc.
-@return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
-int
-innobase_mysql_tmpfile(void);
-/*========================*/
-#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
-
-#endif
diff --git a/storage/innodb_plugin/include/os0proc.h b/storage/innodb_plugin/include/os0proc.h
deleted file mode 100644
index fd46bd7db87..00000000000
--- a/storage/innodb_plugin/include/os0proc.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0proc.h
-The interface to the operating system
-process control primitives
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0proc_h
-#define os0proc_h
-
-#include "univ.i"
-
-#ifdef UNIV_LINUX
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#endif
-
-typedef void* os_process_t;
-typedef unsigned long int os_process_id_t;
-
-extern ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-extern ulint os_large_page_size;
-
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return process id as a number */
-UNIV_INTERN
-ulint
-os_proc_get_number(void);
-/*====================*/
-/****************************************************************//**
-Allocates large pages memory.
-@return allocated memory */
-UNIV_INTERN
-void*
-os_mem_alloc_large(
-/*===============*/
- ulint* n); /*!< in/out: number of bytes */
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
-void
-os_mem_free_large(
-/*==============*/
- void *ptr, /*!< in: pointer returned by
- os_mem_alloc_large() */
- ulint size); /*!< in: size returned by
- os_mem_alloc_large() */
-
-#ifndef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/os0proc.ic b/storage/innodb_plugin/include/os0proc.ic
deleted file mode 100644
index c9641644525..00000000000
--- a/storage/innodb_plugin/include/os0proc.ic
+++ /dev/null
@@ -1,27 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0proc.ic
-The interface to the operating system
-process control primitives
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/innodb_plugin/include/os0sync.h b/storage/innodb_plugin/include/os0sync.h
deleted file mode 100644
index 0c22162b900..00000000000
--- a/storage/innodb_plugin/include/os0sync.h
+++ /dev/null
@@ -1,445 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.h
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0sync_h
-#define os0sync_h
-
-#include "univ.i"
-#include "ut0lst.h"
-
-#ifdef __WIN__
-
-/** Native mutex */
-#define os_fast_mutex_t CRITICAL_SECTION
-
-/** Native event */
-typedef HANDLE os_native_event_t;
-
-/** Operating system event */
-typedef struct os_event_struct os_event_struct_t;
-/** Operating system event handle */
-typedef os_event_struct_t* os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event_struct {
- os_native_event_t handle;
- /*!< Windows event */
- UT_LIST_NODE_T(os_event_struct_t) os_event_list;
- /*!< list of all created events */
-};
-#else
-/** Native mutex */
-typedef pthread_mutex_t os_fast_mutex_t;
-
-/** Operating system event */
-typedef struct os_event_struct os_event_struct_t;
-/** Operating system event handle */
-typedef os_event_struct_t* os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event_struct {
- os_fast_mutex_t os_mutex; /*!< this mutex protects the next
- fields */
- ibool is_set; /*!< this is TRUE when the event is
- in the signaled state, i.e., a thread
- does not stop if it tries to wait for
- this event */
- ib_int64_t signal_count; /*!< this is incremented each time
- the event becomes signaled */
- pthread_cond_t cond_var; /*!< condition variable is used in
- waiting for the event */
- UT_LIST_NODE_T(os_event_struct_t) os_event_list;
- /*!< list of all created events */
-};
-#endif
-
-/** Operating system mutex */
-typedef struct os_mutex_struct os_mutex_str_t;
-/** Operating system mutex handle */
-typedef os_mutex_str_t* os_mutex_t;
-
-/** Denotes an infinite delay for os_event_wait_time() */
-#define OS_SYNC_INFINITE_TIME ((ulint)(-1))
-
-/** Return value of os_event_wait_time() when the time is exceeded */
-#define OS_SYNC_TIME_EXCEEDED 1
-
-/** Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_mutex_t os_sync_mutex;
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-extern ulint os_thread_count;
-
-extern ulint os_event_count;
-extern ulint os_mutex_count;
-extern ulint os_fast_mutex_count;
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void);
-/*==============*/
-/*********************************************************//**
-Frees created events and OS 'slow' mutexes. */
-UNIV_INTERN
-void
-os_sync_free(void);
-/*==============*/
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two states:
-signaled and nonsignaled. The created event is manual reset: it must be reset
-explicitly by calling sync_os_reset_event.
-@return the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(
-/*============*/
- const char* name); /*!< in: the name of the event, if NULL
- the event is created without a name */
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
- os_event_t event); /*!< in: event to set */
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
- os_event_t event); /*!< in: event to reset */
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
- os_event_t event); /*!< in: event to free */
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state. If
-srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
-waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /*!< in: event to wait */
- ib_int64_t reset_sig_count);/*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-
-#define os_event_wait(event) os_event_wait_low(event, 0)
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite.
-@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time(
-/*===============*/
- os_event_t event, /*!< in: event to wait */
- ulint time); /*!< in: timeout in microseconds, or
- OS_SYNC_INFINITE_TIME */
-#ifdef __WIN__
-/**********************************************************//**
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled.
-@return index of the event which was signaled */
-UNIV_INTERN
-ulint
-os_event_wait_multiple(
-/*===================*/
- ulint n, /*!< in: number of events in the
- array */
- os_native_event_t* native_event_array);
- /*!< in: pointer to an array of event
- handles */
-#endif
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
-@return the mutex handle */
-UNIV_INTERN
-os_mutex_t
-os_mutex_create(
-/*============*/
- const char* name); /*!< in: the name of the mutex, if NULL
- the mutex is created without a name */
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
- os_mutex_t mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
- os_mutex_t mutex); /*!< in: mutex to release */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
- os_mutex_t mutex); /*!< in: mutex to free */
-/**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock(
-/*=================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to release */
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /*!< in: fast mutex */
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to free */
-
-/**********************************************************//**
-Atomic compare-and-swap and increment for InnoDB. */
-
-#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
-
-#define HAVE_ATOMIC_BUILTINS
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap(ptr, old_val, new_val) \
- __sync_bool_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use GCC atomic builtins"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes use GCC atomic builtins, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment(ptr, amount) \
- __sync_add_and_fetch(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
- os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
- os_atomic_increment(ptr, amount)
-
-/**********************************************************//**
-Returns the old value of *ptr, atomically sets *ptr to new_val */
-
-# define os_atomic_test_and_set_byte(ptr, new_val) \
- __sync_lock_test_and_set(ptr, new_val)
-
-#elif defined(HAVE_IB_SOLARIS_ATOMICS)
-
-#define HAVE_ATOMIC_BUILTINS
-
-/* If not compiling with GCC or GCC doesn't support the atomic
-intrinsics and running on Solaris >= 10 use Solaris atomics */
-
-#include <atomic.h>
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- (atomic_cas_ulong(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- ((lint)atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS
-# if SIZEOF_PTHREAD_T == 4
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- ((pthread_t)atomic_cas_32(ptr, old_val, new_val) == old_val)
-# elif SIZEOF_PTHREAD_T == 8
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- ((pthread_t)atomic_cas_64(ptr, old_val, new_val) == old_val)
-# else
-# error "SIZEOF_PTHREAD_T != 4 or 8"
-# endif /* SIZEOF_PTHREAD_T CHECK */
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use Solaris atomic functions"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes use Solaris atomic functions, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_lint(ptr, amount) \
- atomic_add_long_nv((ulong_t*) ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
- atomic_add_long_nv(ptr, amount)
-
-/**********************************************************//**
-Returns the old value of *ptr, atomically sets *ptr to new_val */
-
-# define os_atomic_test_and_set_byte(ptr, new_val) \
- atomic_swap_uchar(ptr, new_val)
-
-#elif defined(HAVE_WINDOWS_ATOMICS)
-
-#define HAVE_ATOMIC_BUILTINS
-
-/* On Windows, use Windows atomics / interlocked */
-# ifdef _WIN64
-# define win_cmp_and_xchg InterlockedCompareExchange64
-# define win_xchg_and_add InterlockedExchangeAdd64
-# else /* _WIN64 */
-# define win_cmp_and_xchg InterlockedCompareExchange
-# define win_xchg_and_add InterlockedExchangeAdd
-# endif
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- (win_cmp_and_xchg(ptr, new_val, old_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- (win_cmp_and_xchg(ptr, new_val, old_val) == old_val)
-
-/* windows thread objects can always be passed to windows atomic functions */
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- (InterlockedCompareExchange(ptr, new_val, old_val) == old_val)
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use Windows interlocked functions"
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_lint(ptr, amount) \
- (win_xchg_and_add(ptr, amount) + amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
- ((ulint) (win_xchg_and_add(ptr, amount) + amount))
-
-/**********************************************************//**
-Returns the old value of *ptr, atomically sets *ptr to new_val.
-InterlockedExchange() operates on LONG, and the LONG will be
-clobbered */
-
-# define os_atomic_test_and_set_byte(ptr, new_val) \
- ((byte) InterlockedExchange(ptr, new_val))
-
-#else
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use InnoDB's own implementation"
-#endif
-
-#ifndef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/os0sync.ic b/storage/innodb_plugin/include/os0sync.ic
deleted file mode 100644
index 1f3ce38fa65..00000000000
--- a/storage/innodb_plugin/include/os0sync.ic
+++ /dev/null
@@ -1,53 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.ic
-The interface to the operating system synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifdef __WIN__
-#include <winbase.h>
-#endif
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
-{
-#ifdef __WIN__
- EnterCriticalSection(fast_mutex);
-
- return(0);
-#else
- /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
- so that it returns 0 on success. In the operating system
- libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
- returns 1 on success (but MySQL remaps that to 0), while Linux,
- FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
- return((ulint) pthread_mutex_trylock(fast_mutex));
-#endif
-}
diff --git a/storage/innodb_plugin/include/os0thread.h b/storage/innodb_plugin/include/os0thread.h
deleted file mode 100644
index 6583de0005f..00000000000
--- a/storage/innodb_plugin/include/os0thread.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0thread.h
-The interface to the operating system
-process and thread control primitives
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0thread_h
-#define os0thread_h
-
-#include "univ.i"
-
-/* Maximum number of threads which can be created in the program;
-this is also the size of the wait slot array for MySQL threads which
-can wait inside InnoDB */
-
-#define OS_THREAD_MAX_N srv_max_n_threads
-
-
-/* Possible fixed priorities for threads */
-#define OS_THREAD_PRIORITY_NONE 100
-#define OS_THREAD_PRIORITY_BACKGROUND 1
-#define OS_THREAD_PRIORITY_NORMAL 2
-#define OS_THREAD_PRIORITY_ABOVE_NORMAL 3
-
-#ifdef __WIN__
-typedef void* os_thread_t;
-typedef unsigned long os_thread_id_t; /*!< In Windows the thread id
- is an unsigned long int */
-#else
-typedef pthread_t os_thread_t;
-typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread
- handle itself as the id of
- the thread */
-#endif
-
-/* Define a function pointer type to use in a typecast */
-typedef void* (*os_posix_f_t) (void*);
-
-/***************************************************************//**
-Compares two thread ids for equality.
-@return TRUE if equal */
-UNIV_INTERN
-ibool
-os_thread_eq(
-/*=========*/
- os_thread_id_t a, /*!< in: OS thread or thread id */
- os_thread_id_t b); /*!< in: OS thread or thread id */
-/****************************************************************//**
-Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though!
-@return thread identifier as a number */
-UNIV_INTERN
-ulint
-os_thread_pf(
-/*=========*/
- os_thread_id_t a); /*!< in: OS thread identifier */
-/****************************************************************//**
-Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns a ulint.
-NOTE: We count the number of threads in os_thread_exit(). A created
-thread should always use that to exit and not use return() to exit.
-@return handle to the thread */
-UNIV_INTERN
-os_thread_t
-os_thread_create(
-/*=============*/
-#ifndef __WIN__
- os_posix_f_t start_f,
-#else
- ulint (*start_f)(void*), /*!< in: pointer to function
- from which to start */
-#endif
- void* arg, /*!< in: argument to start
- function */
- os_thread_id_t* thread_id); /*!< out: id of the created
- thread, or NULL */
-
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
-void
-os_thread_exit(
-/*===========*/
- void* exit_value); /*!< in: exit value; in Windows this void*
- is cast as a DWORD */
-/*****************************************************************//**
-Returns the thread identifier of current thread.
-@return current thread identifier */
-UNIV_INTERN
-os_thread_id_t
-os_thread_get_curr_id(void);
-/*========================*/
-/*****************************************************************//**
-Returns handle to the current thread.
-@return current thread handle */
-UNIV_INTERN
-os_thread_t
-os_thread_get_curr(void);
-/*====================*/
-/*****************************************************************//**
-Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
-void
-os_thread_yield(void);
-/*=================*/
-/*****************************************************************//**
-The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
-void
-os_thread_sleep(
-/*============*/
- ulint tm); /*!< in: time in microseconds */
-/******************************************************************//**
-Gets a thread priority.
-@return priority */
-UNIV_INTERN
-ulint
-os_thread_get_priority(
-/*===================*/
- os_thread_t handle);/*!< in: OS handle to the thread */
-/******************************************************************//**
-Sets a thread priority. */
-UNIV_INTERN
-void
-os_thread_set_priority(
-/*===================*/
- os_thread_t handle, /*!< in: OS handle to the thread */
- ulint pri); /*!< in: priority: one of OS_PRIORITY_... */
-/******************************************************************//**
-Gets the last operating system error code for the calling thread.
-@return last error on Windows, 0 otherwise */
-UNIV_INTERN
-ulint
-os_thread_get_last_error(void);
-/*==========================*/
-
-#ifndef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/os0thread.ic b/storage/innodb_plugin/include/os0thread.ic
deleted file mode 100644
index f89bc40b4fa..00000000000
--- a/storage/innodb_plugin/include/os0thread.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0thread.ic
-The interface to the operating system
-process and thread control primitives
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/page0cur.h b/storage/innodb_plugin/include/page0cur.h
deleted file mode 100644
index 1544b0abe1c..00000000000
--- a/storage/innodb_plugin/include/page0cur.h
+++ /dev/null
@@ -1,346 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/page0cur.h
-The page cursor
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef page0cur_h
-#define page0cur_h
-
-#include "univ.i"
-
-#include "buf0types.h"
-#include "page0page.h"
-#include "rem0rec.h"
-#include "data0data.h"
-#include "mtr0mtr.h"
-
-
-#define PAGE_CUR_ADAPT
-
-/* Page cursor search modes; the values must be in this order! */
-
-#define PAGE_CUR_UNSUPP 0
-#define PAGE_CUR_G 1
-#define PAGE_CUR_GE 2
-#define PAGE_CUR_L 3
-#define PAGE_CUR_LE 4
-/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
- "column LIKE 'abc%' ORDER BY column DESC";
- we have to find strings which are <= 'abc' or
- which extend it */
-#ifdef UNIV_SEARCH_DEBUG
-# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */
-#endif /* UNIV_SEARCH_DEBUG */
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
- page_cur_t* cur); /*!< in: page cursor */
-/*********************************************************//**
-Gets pointer to the buffer block where the cursor is positioned.
-@return page */
-UNIV_INLINE
-buf_block_t*
-page_cur_get_block(
-/*===============*/
- page_cur_t* cur); /*!< in: page cursor */
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_zip_des_t*
-page_cur_get_page_zip(
-/*==================*/
- page_cur_t* cur); /*!< in: page cursor */
-/*********************************************************//**
-Gets the record where the cursor is positioned.
-@return record */
-UNIV_INLINE
-rec_t*
-page_cur_get_rec(
-/*=============*/
- page_cur_t* cur); /*!< in: page cursor */
-#else /* UNIV_DEBUG */
-# define page_cur_get_page(cur) page_align((cur)->rec)
-# define page_cur_get_block(cur) (cur)->block
-# define page_cur_get_page_zip(cur) buf_block_get_page_zip((cur)->block)
-# define page_cur_get_rec(cur) (cur)->rec
-#endif /* UNIV_DEBUG */
-/*********************************************************//**
-Sets the cursor object to point before the first user record
-on the page. */
-UNIV_INLINE
-void
-page_cur_set_before_first(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- page_cur_t* cur); /*!< in: cursor */
-/*********************************************************//**
-Sets the cursor object to point after the last user record on
-the page. */
-UNIV_INLINE
-void
-page_cur_set_after_last(
-/*====================*/
- const buf_block_t* block, /*!< in: index page */
- page_cur_t* cur); /*!< in: cursor */
-/*********************************************************//**
-Returns TRUE if the cursor is before first user record on page.
-@return TRUE if at start */
-UNIV_INLINE
-ibool
-page_cur_is_before_first(
-/*=====================*/
- const page_cur_t* cur); /*!< in: cursor */
-/*********************************************************//**
-Returns TRUE if the cursor is after last user record.
-@return TRUE if at end */
-UNIV_INLINE
-ibool
-page_cur_is_after_last(
-/*===================*/
- const page_cur_t* cur); /*!< in: cursor */
-/**********************************************************//**
-Positions the cursor on the given record. */
-UNIV_INLINE
-void
-page_cur_position(
-/*==============*/
- const rec_t* rec, /*!< in: record on a page */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- page_cur_t* cur); /*!< out: page cursor */
-/**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
- page_cur_t* cur); /*!< out: page cursor */
-/**********************************************************//**
-Moves the cursor to the next record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_next(
-/*==================*/
- page_cur_t* cur); /*!< in/out: cursor; must not be after last */
-/**********************************************************//**
-Moves the cursor to the previous record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_prev(
-/*==================*/
- page_cur_t* cur); /*!< in/out: cursor; not before first */
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same logical position, but the physical position may change if it is
-pointing to a compressed page that was reorganized.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INLINE
-rec_t*
-page_cur_tuple_insert(
-/*==================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const dtuple_t* tuple, /*!< in: pointer to a data tuple */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same logical position, but the physical position may change if it is
-pointing to a compressed page that was reorganized.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INLINE
-rec_t*
-page_cur_rec_insert(
-/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const rec_t* rec, /*!< in: record to insert */
- dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
-/***********************************************************//**
-Inserts a record next to page cursor on an uncompressed page.
-Returns pointer to inserted record if succeed, i.e., enough
-space available, NULL otherwise. The cursor stays at the same position.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
-rec_t*
-page_cur_insert_rec_low(
-/*====================*/
- rec_t* current_rec,/*!< in: pointer to current record after
- which the new record is inserted */
- dict_index_t* index, /*!< in: record descriptor */
- const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
-/***********************************************************//**
-Inserts a record next to page cursor on a compressed and uncompressed
-page. Returns pointer to inserted record if succeed, i.e.,
-enough space available, NULL otherwise.
-The cursor stays at the same position.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
-rec_t*
-page_cur_insert_rec_zip(
-/*====================*/
- rec_t** current_rec,/*!< in/out: pointer to current record after
- which the new record is inserted */
- buf_block_t* block, /*!< in: buffer block of *current_rec */
- dict_index_t* index, /*!< in: record descriptor */
- const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
-/*************************************************************//**
-Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
-UNIV_INTERN
-void
-page_copy_rec_list_end_to_created_page(
-/*===================================*/
- page_t* new_page, /*!< in/out: index page to copy to */
- rec_t* rec, /*!< in: first record to copy */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************//**
-Deletes a record at the page cursor. The cursor is moved to the
-next record after the deleted one. */
-UNIV_INTERN
-void
-page_cur_delete_rec(
-/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return number of matched fields on the left */
-UNIV_INLINE
-ulint
-page_cur_search(
-/*============*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- page_cur_t* cursor);/*!< out: page cursor */
-/****************************************************************//**
-Searches the right position for a page cursor. */
-UNIV_INTERN
-void
-page_cur_search_with_match(
-/*=======================*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- ulint* iup_matched_fields,
- /*!< in/out: already matched
- fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- ulint* ilow_matched_fields,
- /*!< in/out: already matched
- fields in lower limit record */
- ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor);/*!< out: page cursor */
-/***********************************************************//**
-Positions a page cursor on a randomly chosen user record on a page. If there
-are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
-void
-page_cur_open_on_rnd_user_rec(
-/*==========================*/
- buf_block_t* block, /*!< in: page */
- page_cur_t* cursor);/*!< out: page cursor */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses a log record of a record insert on a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_cur_parse_insert_rec(
-/*======================*/
- ibool is_short,/*!< in: TRUE if short inserts */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/**********************************************************//**
-Parses a log record of copying a record list end to a new created page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_copy_rec_list_to_created_page(
-/*=====================================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses log record of a record delete on a page.
-@return pointer to record end or NULL */
-UNIV_INTERN
-byte*
-page_cur_parse_delete_rec(
-/*======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr or NULL */
-
-/** Index page cursor */
-
-struct page_cur_struct{
- byte* rec; /*!< pointer to a record on page */
- buf_block_t* block; /*!< pointer to the block containing rec */
-};
-
-#ifndef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/page0cur.ic b/storage/innodb_plugin/include/page0cur.ic
deleted file mode 100644
index 3520677dfb3..00000000000
--- a/storage/innodb_plugin/include/page0cur.ic
+++ /dev/null
@@ -1,299 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/page0cur.ic
-The page cursor
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "page0page.h"
-#include "buf0types.h"
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
- page_cur_t* cur) /*!< in: page cursor */
-{
- ut_ad(cur);
- ut_ad(page_align(cur->rec) == cur->block->frame);
-
- return(page_align(cur->rec));
-}
-
-/*********************************************************//**
-Gets pointer to the buffer block where the cursor is positioned.
-@return page */
-UNIV_INLINE
-buf_block_t*
-page_cur_get_block(
-/*===============*/
- page_cur_t* cur) /*!< in: page cursor */
-{
- ut_ad(cur);
- ut_ad(page_align(cur->rec) == cur->block->frame);
- return(cur->block);
-}
-
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_zip_des_t*
-page_cur_get_page_zip(
-/*==================*/
- page_cur_t* cur) /*!< in: page cursor */
-{
- return(buf_block_get_page_zip(page_cur_get_block(cur)));
-}
-
-/*********************************************************//**
-Gets the record where the cursor is positioned.
-@return record */
-UNIV_INLINE
-rec_t*
-page_cur_get_rec(
-/*=============*/
- page_cur_t* cur) /*!< in: page cursor */
-{
- ut_ad(cur);
- ut_ad(page_align(cur->rec) == cur->block->frame);
-
- return(cur->rec);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************//**
-Sets the cursor object to point before the first user record
-on the page. */
-UNIV_INLINE
-void
-page_cur_set_before_first(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- page_cur_t* cur) /*!< in: cursor */
-{
- cur->block = (buf_block_t*) block;
- cur->rec = page_get_infimum_rec(buf_block_get_frame(cur->block));
-}
-
-/*********************************************************//**
-Sets the cursor object to point after the last user record on
-the page. */
-UNIV_INLINE
-void
-page_cur_set_after_last(
-/*====================*/
- const buf_block_t* block, /*!< in: index page */
- page_cur_t* cur) /*!< in: cursor */
-{
- cur->block = (buf_block_t*) block;
- cur->rec = page_get_supremum_rec(buf_block_get_frame(cur->block));
-}
-
-/*********************************************************//**
-Returns TRUE if the cursor is before first user record on page.
-@return TRUE if at start */
-UNIV_INLINE
-ibool
-page_cur_is_before_first(
-/*=====================*/
- const page_cur_t* cur) /*!< in: cursor */
-{
- ut_ad(cur);
- ut_ad(page_align(cur->rec) == cur->block->frame);
- return(page_rec_is_infimum(cur->rec));
-}
-
-/*********************************************************//**
-Returns TRUE if the cursor is after last user record.
-@return TRUE if at end */
-UNIV_INLINE
-ibool
-page_cur_is_after_last(
-/*===================*/
- const page_cur_t* cur) /*!< in: cursor */
-{
- ut_ad(cur);
- ut_ad(page_align(cur->rec) == cur->block->frame);
- return(page_rec_is_supremum(cur->rec));
-}
-
-/**********************************************************//**
-Positions the cursor on the given record. */
-UNIV_INLINE
-void
-page_cur_position(
-/*==============*/
- const rec_t* rec, /*!< in: record on a page */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- page_cur_t* cur) /*!< out: page cursor */
-{
- ut_ad(rec && block && cur);
- ut_ad(page_align(rec) == block->frame);
-
- cur->rec = (rec_t*) rec;
- cur->block = (buf_block_t*) block;
-}
-
-/**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
- page_cur_t* cur) /*!< out: page cursor */
-{
- ut_ad(cur);
-
- cur->rec = NULL;
- cur->block = NULL;
-}
-
-/**********************************************************//**
-Moves the cursor to the next record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_next(
-/*==================*/
- page_cur_t* cur) /*!< in/out: cursor; must not be after last */
-{
- ut_ad(!page_cur_is_after_last(cur));
-
- cur->rec = page_rec_get_next(cur->rec);
-}
-
-/**********************************************************//**
-Moves the cursor to the previous record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_prev(
-/*==================*/
- page_cur_t* cur) /*!< in/out: page cursor, not before first */
-{
- ut_ad(!page_cur_is_before_first(cur));
-
- cur->rec = page_rec_get_prev(cur->rec);
-}
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return number of matched fields on the left */
-UNIV_INLINE
-ulint
-page_cur_search(
-/*============*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- page_cur_t* cursor) /*!< out: page cursor */
-{
- ulint low_matched_fields = 0;
- ulint low_matched_bytes = 0;
- ulint up_matched_fields = 0;
- ulint up_matched_bytes = 0;
-
- ut_ad(dtuple_check_typed(tuple));
-
- page_cur_search_with_match(block, index, tuple, mode,
- &up_matched_fields,
- &up_matched_bytes,
- &low_matched_fields,
- &low_matched_bytes,
- cursor);
- return(low_matched_fields);
-}
-
-/***********************************************************//**
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same logical position, but the physical position may change if it is
-pointing to a compressed page that was reorganized.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INLINE
-rec_t*
-page_cur_tuple_insert(
-/*==================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const dtuple_t* tuple, /*!< in: pointer to a data tuple */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
-{
- mem_heap_t* heap;
- ulint* offsets;
- ulint size
- = rec_get_converted_size(index, tuple, n_ext);
- rec_t* rec;
-
- heap = mem_heap_create(size
- + (4 + REC_OFFS_HEADER_SIZE
- + dtuple_get_n_fields(tuple))
- * sizeof *offsets);
- rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(heap, size),
- index, tuple, n_ext);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- if (buf_block_get_page_zip(cursor->block)) {
- rec = page_cur_insert_rec_zip(&cursor->rec, cursor->block,
- index, rec, offsets, mtr);
- } else {
- rec = page_cur_insert_rec_low(cursor->rec,
- index, rec, offsets, mtr);
- }
-
- mem_heap_free(heap);
- return(rec);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same logical position, but the physical position may change if it is
-pointing to a compressed page that was reorganized.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INLINE
-rec_t*
-page_cur_rec_insert(
-/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const rec_t* rec, /*!< in: record to insert */
- dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
-{
- if (buf_block_get_page_zip(cursor->block)) {
- return(page_cur_insert_rec_zip(&cursor->rec, cursor->block,
- index, rec, offsets, mtr));
- } else {
- return(page_cur_insert_rec_low(cursor->rec,
- index, rec, offsets, mtr));
- }
-}
diff --git a/storage/innodb_plugin/include/page0page.h b/storage/innodb_plugin/include/page0page.h
deleted file mode 100644
index 3899499fb6a..00000000000
--- a/storage/innodb_plugin/include/page0page.h
+++ /dev/null
@@ -1,1015 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/page0page.h
-Index page routines
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef page0page_h
-#define page0page_h
-
-#include "univ.i"
-
-#include "page0types.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#include "data0data.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
-#include "fsp0fsp.h"
-#include "mtr0mtr.h"
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE
-#endif
-
-/* PAGE HEADER
- ===========
-
-Index page header starts at the first offset left free by the FIL-module */
-
-typedef byte page_header_t;
-
-#define PAGE_HEADER FSEG_PAGE_DATA /* index page header starts at this
- offset */
-/*-----------------------------*/
-#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */
-#define PAGE_HEAP_TOP 2 /* pointer to record heap top */
-#define PAGE_N_HEAP 4 /* number of records in the heap,
- bit 15=flag: new-style compact page format */
-#define PAGE_FREE 6 /* pointer to start of page free record list */
-#define PAGE_GARBAGE 8 /* number of bytes in deleted records */
-#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or
- NULL if this info has been reset by a delete,
- for example */
-#define PAGE_DIRECTION 12 /* last insert direction: PAGE_LEFT, ... */
-#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same
- direction */
-#define PAGE_N_RECS 16 /* number of user records on the page */
-#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified
- a record on the page; a dulint; defined only
- in secondary indexes and in the insert buffer
- tree; NOTE: this may be modified only
- when the thread has an x-latch to the page,
- and ALSO an x-latch to btr_search_latch
- if there is a hash index to the page! */
-#define PAGE_HEADER_PRIV_END 26 /* end of private data structure of the page
- header which are set in a page create */
-/*----*/
-#define PAGE_LEVEL 26 /* level of the node in an index tree; the
- leaf level is the level 0. This field should
- not be written to after page creation. */
-#define PAGE_INDEX_ID 28 /* index id where the page belongs.
- This field should not be written to after
- page creation. */
-#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in
- a B-tree: defined only on the root page of a
- B-tree, but not in the root of an ibuf tree */
-#define PAGE_BTR_IBUF_FREE_LIST PAGE_BTR_SEG_LEAF
-#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF
- /* in the place of PAGE_BTR_SEG_LEAF and _TOP
- there is a free list base node if the page is
- the root page of an ibuf tree, and at the same
- place is the free list node if the page is in
- a free list */
-#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE)
- /* file segment header for the non-leaf pages
- in a B-tree: defined only on the root page of
- a B-tree, but not in the root of an ibuf
- tree */
-/*----*/
-#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
- /* start of data on the page */
-
-#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES)
- /* offset of the page infimum record on an
- old-style page */
-#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8)
- /* offset of the page supremum record on an
- old-style page */
-#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9)
- /* offset of the page supremum record end on
- an old-style page */
-#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES)
- /* offset of the page infimum record on a
- new-style compact page */
-#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8)
- /* offset of the page supremum record on a
- new-style compact page */
-#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8)
- /* offset of the page supremum record end on
- a new-style compact page */
-/*-----------------------------*/
-
-/* Heap numbers */
-#define PAGE_HEAP_NO_INFIMUM 0 /* page infimum */
-#define PAGE_HEAP_NO_SUPREMUM 1 /* page supremum */
-#define PAGE_HEAP_NO_USER_LOW 2 /* first user record in
- creation (insertion) order,
- not necessarily collation order;
- this record may have been deleted */
-
-/* Directions of cursor movement */
-#define PAGE_LEFT 1
-#define PAGE_RIGHT 2
-#define PAGE_SAME_REC 3
-#define PAGE_SAME_PAGE 4
-#define PAGE_NO_DIRECTION 5
-
-/* PAGE DIRECTORY
- ==============
-*/
-
-typedef byte page_dir_slot_t;
-typedef page_dir_slot_t page_dir_t;
-
-/* Offset of the directory start down from the page end. We call the
-slot with the highest file address directory start, as it points to
-the first record in the list of records. */
-#define PAGE_DIR FIL_PAGE_DATA_END
-
-/* We define a slot in the page directory as two bytes */
-#define PAGE_DIR_SLOT_SIZE 2
-
-/* The offset of the physically lower end of the directory, counted from
-page end, when the page is empty */
-#define PAGE_EMPTY_DIR_START (PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE)
-
-/* The maximum and minimum number of records owned by a directory slot. The
-number may drop below the minimum in the first and the last slot in the
-directory. */
-#define PAGE_DIR_SLOT_MAX_N_OWNED 8
-#define PAGE_DIR_SLOT_MIN_N_OWNED 4
-
-/************************************************************//**
-Gets the start of a page.
-@return start of the page */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
- const void* ptr) /*!< in: pointer to page frame */
- __attribute__((const));
-/************************************************************//**
-Gets the offset within a page.
-@return offset from the start of the page */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
- const void* ptr) /*!< in: pointer to page frame */
- __attribute__((const));
-/*************************************************************//**
-Returns the max trx id field value. */
-UNIV_INLINE
-trx_id_t
-page_get_max_trx_id(
-/*================*/
- const page_t* page); /*!< in: page */
-/*************************************************************//**
-Sets the max trx id field value. */
-UNIV_INTERN
-void
-page_set_max_trx_id(
-/*================*/
- buf_block_t* block, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr); /*!< in/out: mini-transaction, or NULL */
-/*************************************************************//**
-Sets the max trx id field value if trx_id is bigger than the previous
-value. */
-UNIV_INLINE
-void
-page_update_max_trx_id(
-/*===================*/
- buf_block_t* block, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/*************************************************************//**
-Reads the given header field. */
-UNIV_INLINE
-ulint
-page_header_get_field(
-/*==================*/
- const page_t* page, /*!< in: page */
- ulint field); /*!< in: PAGE_N_DIR_SLOTS, ... */
-/*************************************************************//**
-Sets the given header field. */
-UNIV_INLINE
-void
-page_header_set_field(
-/*==================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint field, /*!< in: PAGE_N_DIR_SLOTS, ... */
- ulint val); /*!< in: value */
-/*************************************************************//**
-Returns the offset stored in the given header field.
-@return offset from the start of the page, or 0 */
-UNIV_INLINE
-ulint
-page_header_get_offs(
-/*=================*/
- const page_t* page, /*!< in: page */
- ulint field) /*!< in: PAGE_FREE, ... */
- __attribute__((nonnull, pure));
-
-/*************************************************************//**
-Returns the pointer stored in the given header field, or NULL. */
-#define page_header_get_ptr(page, field) \
- (page_header_get_offs(page, field) \
- ? page + page_header_get_offs(page, field) : NULL)
-/*************************************************************//**
-Sets the pointer stored in the given header field. */
-UNIV_INLINE
-void
-page_header_set_ptr(
-/*================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint field, /*!< in/out: PAGE_FREE, ... */
- const byte* ptr); /*!< in: pointer or NULL*/
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Resets the last insert info field in the page header. Writes to mlog
-about this operation. */
-UNIV_INLINE
-void
-page_header_reset_last_insert(
-/*==========================*/
- page_t* page, /*!< in: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************//**
-Gets the offset of the first record on the page.
-@return offset of the first record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_infimum_offset(
-/*====================*/
- const page_t* page); /*!< in: page which must have record(s) */
-/************************************************************//**
-Gets the offset of the last record on the page.
-@return offset of the last record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_supremum_offset(
-/*=====================*/
- const page_t* page); /*!< in: page which must have record(s) */
-#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
-#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
-/************************************************************//**
-Returns the middle record of record list. If there are an even number
-of records in the list, returns the first record of upper half-list.
-@return middle record */
-UNIV_INTERN
-rec_t*
-page_get_middle_rec(
-/*================*/
- page_t* page); /*!< in: page */
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record on a page; may also
- be page infimum or supremum, in which case
- matched-parameter values below are not
- affected */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns
- contains the value for current comparison */
- ulint* matched_bytes); /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns contains the
- value for current comparison */
-#endif /* !UNIV_HOTBACKUP */
-/*************************************************************//**
-Gets the page number.
-@return page number */
-UNIV_INLINE
-ulint
-page_get_page_no(
-/*=============*/
- const page_t* page); /*!< in: page */
-/*************************************************************//**
-Gets the tablespace identifier.
-@return space id */
-UNIV_INLINE
-ulint
-page_get_space_id(
-/*==============*/
- const page_t* page); /*!< in: page */
-/*************************************************************//**
-Gets the number of user records on page (the infimum and supremum records
-are not user records).
-@return number of user records */
-UNIV_INLINE
-ulint
-page_get_n_recs(
-/*============*/
- const page_t* page); /*!< in: index page */
-/***************************************************************//**
-Returns the number of records before the given record in chain.
-The number includes infimum and supremum records.
-@return number of records */
-UNIV_INTERN
-ulint
-page_rec_get_n_recs_before(
-/*=======================*/
- const rec_t* rec); /*!< in: the physical record */
-/*************************************************************//**
-Gets the number of records in the heap.
-@return number of user records */
-UNIV_INLINE
-ulint
-page_dir_get_n_heap(
-/*================*/
- const page_t* page); /*!< in: index page */
-/*************************************************************//**
-Sets the number of records in the heap. */
-UNIV_INLINE
-void
-page_dir_set_n_heap(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL.
- Note that the size of the dense page directory
- in the compressed page trailer is
- n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
- ulint n_heap);/*!< in: number of records */
-/*************************************************************//**
-Gets the number of dir slots in directory.
-@return number of slots */
-UNIV_INLINE
-ulint
-page_dir_get_n_slots(
-/*=================*/
- const page_t* page); /*!< in: index page */
-/*************************************************************//**
-Sets the number of dir slots in directory. */
-UNIV_INLINE
-void
-page_dir_set_n_slots(
-/*=================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint n_slots);/*!< in: number of slots */
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Gets pointer to nth directory slot.
-@return pointer to dir slot */
-UNIV_INLINE
-page_dir_slot_t*
-page_dir_get_nth_slot(
-/*==================*/
- const page_t* page, /*!< in: index page */
- ulint n); /*!< in: position */
-#else /* UNIV_DEBUG */
-# define page_dir_get_nth_slot(page, n) \
- ((page) + UNIV_PAGE_SIZE - PAGE_DIR \
- - (n + 1) * PAGE_DIR_SLOT_SIZE)
-#endif /* UNIV_DEBUG */
-/**************************************************************//**
-Used to check the consistency of a record on a page.
-@return TRUE if succeed */
-UNIV_INLINE
-ibool
-page_rec_check(
-/*===========*/
- const rec_t* rec); /*!< in: record */
-/***************************************************************//**
-Gets the record pointed to by a directory slot.
-@return pointer to record */
-UNIV_INLINE
-const rec_t*
-page_dir_slot_get_rec(
-/*==================*/
- const page_dir_slot_t* slot); /*!< in: directory slot */
-/***************************************************************//**
-This is used to set the record offset in a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_rec(
-/*==================*/
- page_dir_slot_t* slot, /*!< in: directory slot */
- rec_t* rec); /*!< in: record on the page */
-/***************************************************************//**
-Gets the number of records owned by a directory slot.
-@return number of records */
-UNIV_INLINE
-ulint
-page_dir_slot_get_n_owned(
-/*======================*/
- const page_dir_slot_t* slot); /*!< in: page directory slot */
-/***************************************************************//**
-This is used to set the owned records field of a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_n_owned(
-/*======================*/
- page_dir_slot_t*slot, /*!< in/out: directory slot */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n); /*!< in: number of records owned by the slot */
-/************************************************************//**
-Calculates the space reserved for directory slots of a given
-number of records. The exact value is a fraction number
-n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
-rounded upwards to an integer. */
-UNIV_INLINE
-ulint
-page_dir_calc_reserved_space(
-/*=========================*/
- ulint n_recs); /*!< in: number of records */
-/***************************************************************//**
-Looks for the directory slot which owns the given record.
-@return the directory slot number */
-UNIV_INTERN
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
- const rec_t* rec); /*!< in: the physical record */
-/************************************************************//**
-Determine whether the page is in new-style compact format.
-@return nonzero if the page is in compact format, zero if it is in
-old-style format */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
- const page_t* page); /*!< in: index page */
-/************************************************************//**
-TRUE if the record is on a page in compact format.
-@return nonzero if in compact format */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
- const rec_t* rec); /*!< in: record */
-/***************************************************************//**
-Returns the heap number of a record.
-@return heap number */
-UNIV_INLINE
-ulint
-page_rec_get_heap_no(
-/*=================*/
- const rec_t* rec); /*!< in: the physical record */
-/************************************************************//**
-Determine whether the page is a B-tree leaf.
-@return TRUE if the page is a B-tree leaf */
-UNIV_INLINE
-ibool
-page_is_leaf(
-/*=========*/
- const page_t* page) /*!< in: page */
- __attribute__((nonnull, pure));
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_low(
-/*==================*/
- const rec_t* rec, /*!< in: pointer to record */
- ulint comp); /*!< in: nonzero=compact page layout */
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
- rec_t* rec); /*!< in: pointer to record */
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_const(
-/*====================*/
- const rec_t* rec); /*!< in: pointer to record */
-/************************************************************//**
-Sets the pointer to the next record on the page. */
-UNIV_INLINE
-void
-page_rec_set_next(
-/*==============*/
- rec_t* rec, /*!< in: pointer to record,
- must not be page supremum */
- rec_t* next); /*!< in: pointer to next record,
- must not be page infimum */
-/************************************************************//**
-Gets the pointer to the previous record.
-@return pointer to previous record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_prev_const(
-/*====================*/
- const rec_t* rec); /*!< in: pointer to record, must not be page
- infimum */
-/************************************************************//**
-Gets the pointer to the previous record.
-@return pointer to previous record */
-UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
- rec_t* rec); /*!< in: pointer to record,
- must not be page infimum */
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
- __attribute__((const));
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
- __attribute__((const));
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
- ulint offset) /*!< in: record offset on page */
- __attribute__((const));
-
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
- const rec_t* rec) /*!< in: record */
- __attribute__((const));
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
- const rec_t* rec) /*!< in: record */
- __attribute__((const));
-
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
- const rec_t* rec) /*!< in: record */
- __attribute__((const));
-/***************************************************************//**
-Looks for the record which owns the given record.
-@return the owner record */
-UNIV_INLINE
-rec_t*
-page_rec_find_owner_rec(
-/*====================*/
- rec_t* rec); /*!< in: the physical record */
-/***********************************************************************//**
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary
-record. */
-UNIV_INTERN
-void
-page_rec_write_index_page_no(
-/*=========================*/
- rec_t* rec, /*!< in: record to update */
- ulint i, /*!< in: index of the field to update */
- ulint page_no,/*!< in: value to write */
- mtr_t* mtr); /*!< in: mtr */
-/************************************************************//**
-Returns the maximum combined size of records which can be inserted on top
-of record heap.
-@return maximum combined size for inserted records */
-UNIV_INLINE
-ulint
-page_get_max_insert_size(
-/*=====================*/
- const page_t* page, /*!< in: index page */
- ulint n_recs);/*!< in: number of records */
-/************************************************************//**
-Returns the maximum combined size of records which can be inserted on top
-of record heap if page is first reorganized.
-@return maximum combined size for inserted records */
-UNIV_INLINE
-ulint
-page_get_max_insert_size_after_reorganize(
-/*======================================*/
- const page_t* page, /*!< in: index page */
- ulint n_recs);/*!< in: number of records */
-/*************************************************************//**
-Calculates free space if a page is emptied.
-@return free space */
-UNIV_INLINE
-ulint
-page_get_free_space_of_empty(
-/*=========================*/
- ulint comp) /*!< in: nonzero=compact page format */
- __attribute__((const));
-/**********************************************************//**
-Returns the base extra size of a physical record. This is the
-size of the fixed header, independent of the record size.
-@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
-UNIV_INLINE
-ulint
-page_rec_get_base_extra_size(
-/*=========================*/
- const rec_t* rec); /*!< in: physical record */
-/************************************************************//**
-Returns the sum of the sizes of the records in the record list
-excluding the infimum and supremum records.
-@return data in bytes */
-UNIV_INLINE
-ulint
-page_get_data_size(
-/*===============*/
- const page_t* page); /*!< in: index page */
-/************************************************************//**
-Allocates a block of memory from the head of the free list
-of an index page. */
-UNIV_INLINE
-void
-page_mem_alloc_free(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
- space available for inserting the record,
- or NULL */
- rec_t* next_rec,/*!< in: pointer to the new head of the
- free record list */
- ulint need); /*!< in: number of bytes allocated */
-/************************************************************//**
-Allocates a block of memory from the heap of an index page.
-@return pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
-byte*
-page_mem_alloc_heap(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
- space available for inserting the record,
- or NULL */
- ulint need, /*!< in: total number of bytes needed */
- ulint* heap_no);/*!< out: this contains the heap number
- of the allocated record
- if allocation succeeds */
-/************************************************************//**
-Puts a record to free list. */
-UNIV_INLINE
-void
-page_mem_free(
-/*==========*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- rec_t* rec, /*!< in: pointer to the (origin of) record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**********************************************************//**
-Create an uncompressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
-page_t*
-page_create(
-/*========*/
- buf_block_t* block, /*!< in: a buffer block where the
- page is created */
- mtr_t* mtr, /*!< in: mini-transaction handle */
- ulint comp); /*!< in: nonzero=compact page format */
-/**********************************************************//**
-Create a compressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
-page_t*
-page_create_zip(
-/*============*/
- buf_block_t* block, /*!< in/out: a buffer frame where the
- page is created */
- dict_index_t* index, /*!< in: the index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-
-/*************************************************************//**
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page. */
-UNIV_INTERN
-void
-page_copy_rec_list_end_no_locks(
-/*============================*/
- buf_block_t* new_block, /*!< in: index page to copy to */
- buf_block_t* block, /*!< in: index page of rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Copies records from page to new_page, from the given record onward,
-including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page.
-@return pointer to the original successor of the infimum record on
-new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
-rec_t*
-page_copy_rec_list_end(
-/*===================*/
- buf_block_t* new_block, /*!< in/out: index page to copy to */
- buf_block_t* block, /*!< in: index page containing rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
-/*************************************************************//**
-Copies records from page to new_page, up to the given record, NOT
-including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page.
-@return pointer to the original predecessor of the supremum record on
-new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
-rec_t*
-page_copy_rec_list_start(
-/*=====================*/
- buf_block_t* new_block, /*!< in/out: index page to copy to */
- buf_block_t* block, /*!< in: index page containing rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
-/*************************************************************//**
-Deletes records from a page from a given record onward, including that record.
-The infimum and supremum records are not deleted. */
-UNIV_INTERN
-void
-page_delete_rec_list_end(
-/*=====================*/
- rec_t* rec, /*!< in: pointer to record on page */
- buf_block_t* block, /*!< in: buffer block of the page */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n_recs, /*!< in: number of records to delete,
- or ULINT_UNDEFINED if not known */
- ulint size, /*!< in: the sum of the sizes of the
- records in the end of the chain to
- delete, or ULINT_UNDEFINED if not known */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
-/*************************************************************//**
-Deletes records from page, up to the given record, NOT including
-that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
-void
-page_delete_rec_list_start(
-/*=======================*/
- rec_t* rec, /*!< in: record on page */
- buf_block_t* block, /*!< in: buffer block of the page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
-/*************************************************************//**
-Moves record list end to another page. Moved records include
-split_rec.
-@return TRUE on success; FALSE on compression failure (new_block will
-be decompressed) */
-UNIV_INTERN
-ibool
-page_move_rec_list_end(
-/*===================*/
- buf_block_t* new_block, /*!< in/out: index page where to move */
- buf_block_t* block, /*!< in: index page from where to move */
- rec_t* split_rec, /*!< in: first record to move */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull(1, 2, 4, 5)));
-/*************************************************************//**
-Moves record list start to another page. Moved records do not include
-split_rec.
-@return TRUE on success; FALSE on compression failure */
-UNIV_INTERN
-ibool
-page_move_rec_list_start(
-/*=====================*/
- buf_block_t* new_block, /*!< in/out: index page where to move */
- buf_block_t* block, /*!< in/out: page containing split_rec */
- rec_t* split_rec, /*!< in: first record not to move */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull(1, 2, 4, 5)));
-/****************************************************************//**
-Splits a directory slot which owns too many records. */
-UNIV_INTERN
-void
-page_dir_split_slot(
-/*================*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be written, or NULL */
- ulint slot_no)/*!< in: the directory slot */
- __attribute__((nonnull(1)));
-/*************************************************************//**
-Tries to balance the given directory slot with too few records
-with the upper neighbor, so that there are at least the minimum number
-of records owned by the slot; this may result in the merging of
-two slots. */
-UNIV_INTERN
-void
-page_dir_balance_slot(
-/*==================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint slot_no)/*!< in: the directory slot */
- __attribute__((nonnull(1)));
-/**********************************************************//**
-Parses a log record of a record list end or start deletion.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_delete_rec_list(
-/*=======================*/
- byte type, /*!< in: MLOG_LIST_END_DELETE,
- MLOG_LIST_START_DELETE,
- MLOG_COMP_LIST_END_DELETE or
- MLOG_COMP_LIST_START_DELETE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in/out: buffer block or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_create(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/************************************************************//**
-Prints record contents including the data relevant only in
-the index page context. */
-UNIV_INTERN
-void
-page_rec_print(
-/*===========*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: record descriptor */
-/***************************************************************//**
-This is used to print the contents of the directory for
-debugging purposes. */
-UNIV_INTERN
-void
-page_dir_print(
-/*===========*/
- page_t* page, /*!< in: index page */
- ulint pr_n); /*!< in: print n first and n last entries */
-/***************************************************************//**
-This is used to print the contents of the page record list for
-debugging purposes. */
-UNIV_INTERN
-void
-page_print_list(
-/*============*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index, /*!< in: dictionary index of the page */
- ulint pr_n); /*!< in: print n first and n last entries */
-/***************************************************************//**
-Prints the info in a page header. */
-UNIV_INTERN
-void
-page_header_print(
-/*==============*/
- const page_t* page); /*!< in: index page */
-/***************************************************************//**
-This is used to print the contents of the page for
-debugging purposes. */
-UNIV_INTERN
-void
-page_print(
-/*=======*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index, /*!< in: dictionary index of the page */
- ulint dn, /*!< in: print dn first and last entries
- in directory */
- ulint rn); /*!< in: print rn first and last records
- in directory */
-/***************************************************************//**
-The following is used to validate a record on a page. This function
-differs from rec_validate as it can also check the n_owned field and
-the heap_no field.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_rec_validate(
-/*==============*/
- rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***************************************************************//**
-Checks that the first directory slot points to the infimum record and
-the last to the supremum. This function is intended to track if the
-bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
-void
-page_check_dir(
-/*===========*/
- const page_t* page); /*!< in: index page */
-/***************************************************************//**
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_simple_validate_old(
-/*=====================*/
- page_t* page); /*!< in: old-style index page */
-/***************************************************************//**
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_simple_validate_new(
-/*=====================*/
- page_t* block); /*!< in: new-style index page */
-/***************************************************************//**
-This function checks the consistency of an index page.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_validate(
-/*==========*/
- page_t* page, /*!< in: index page */
- dict_index_t* index); /*!< in: data dictionary index containing
- the page record type definition */
-/***************************************************************//**
-Looks in the page record list for a record with the given heap number.
-@return record, NULL if not found */
-
-const rec_t*
-page_find_rec_with_heap_no(
-/*=======================*/
- const page_t* page, /*!< in: index page */
- ulint heap_no);/*!< in: heap number */
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
-
-#ifndef UNIV_NONINL
-#include "page0page.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/page0page.ic b/storage/innodb_plugin/include/page0page.ic
deleted file mode 100644
index 8f794410f20..00000000000
--- a/storage/innodb_plugin/include/page0page.ic
+++ /dev/null
@@ -1,1073 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/page0page.ic
-Index page routines
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#ifdef UNIV_DEBUG
-# include "log0recv.h"
-#endif /* !UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-# include "rem0cmp.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "mtr0log.h"
-#include "page0zip.h"
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE
-#endif
-
-/************************************************************//**
-Gets the start of a page.
-@return start of the page */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
- const void* ptr) /*!< in: pointer to page frame */
-{
- return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
-}
-/************************************************************//**
-Gets the offset within a page.
-@return offset from the start of the page */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
- const void* ptr) /*!< in: pointer to page frame */
-{
- return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
-}
-/*************************************************************//**
-Returns the max trx id field value. */
-UNIV_INLINE
-trx_id_t
-page_get_max_trx_id(
-/*================*/
- const page_t* page) /*!< in: page */
-{
- ut_ad(page);
-
- return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
-}
-
-/*************************************************************//**
-Sets the max trx id field value if trx_id is bigger than the previous
-value. */
-UNIV_INLINE
-void
-page_update_max_trx_id(
-/*===================*/
- buf_block_t* block, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(block);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* During crash recovery, this function may be called on
- something else than a leaf page of a secondary index or the
- insert buffer index tree (dict_index_is_sec_or_ibuf() returns
- TRUE for the dummy indexes constructed during redo log
- application). In that case, PAGE_MAX_TRX_ID is unused,
- and trx_id is usually zero. */
- ut_ad(!ut_dulint_is_zero(trx_id) || recv_recovery_is_on());
- ut_ad(page_is_leaf(buf_block_get_frame(block)));
-
- if (ut_dulint_cmp(page_get_max_trx_id(buf_block_get_frame(block)),
- trx_id) < 0) {
-
- page_set_max_trx_id(block, page_zip, trx_id, mtr);
- }
-}
-
-/*************************************************************//**
-Reads the given header field. */
-UNIV_INLINE
-ulint
-page_header_get_field(
-/*==================*/
- const page_t* page, /*!< in: page */
- ulint field) /*!< in: PAGE_LEVEL, ... */
-{
- ut_ad(page);
- ut_ad(field <= PAGE_INDEX_ID);
-
- return(mach_read_from_2(page + PAGE_HEADER + field));
-}
-
-/*************************************************************//**
-Sets the given header field. */
-UNIV_INLINE
-void
-page_header_set_field(
-/*==================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint field, /*!< in: PAGE_N_DIR_SLOTS, ... */
- ulint val) /*!< in: value */
-{
- ut_ad(page);
- ut_ad(field <= PAGE_N_RECS);
- ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
- ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
-
- mach_write_to_2(page + PAGE_HEADER + field, val);
- if (UNIV_LIKELY_NULL(page_zip)) {
- page_zip_write_header(page_zip,
- page + PAGE_HEADER + field, 2, NULL);
- }
-}
-
-/*************************************************************//**
-Returns the offset stored in the given header field.
-@return offset from the start of the page, or 0 */
-UNIV_INLINE
-ulint
-page_header_get_offs(
-/*=================*/
- const page_t* page, /*!< in: page */
- ulint field) /*!< in: PAGE_FREE, ... */
-{
- ulint offs;
-
- ut_ad(page);
- ut_ad((field == PAGE_FREE)
- || (field == PAGE_LAST_INSERT)
- || (field == PAGE_HEAP_TOP));
-
- offs = page_header_get_field(page, field);
-
- ut_ad((field != PAGE_HEAP_TOP) || offs);
-
- return(offs);
-}
-
-/*************************************************************//**
-Sets the pointer stored in the given header field. */
-UNIV_INLINE
-void
-page_header_set_ptr(
-/*================*/
- page_t* page, /*!< in: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint field, /*!< in: PAGE_FREE, ... */
- const byte* ptr) /*!< in: pointer or NULL*/
-{
- ulint offs;
-
- ut_ad(page);
- ut_ad((field == PAGE_FREE)
- || (field == PAGE_LAST_INSERT)
- || (field == PAGE_HEAP_TOP));
-
- if (ptr == NULL) {
- offs = 0;
- } else {
- offs = ptr - page;
- }
-
- ut_ad((field != PAGE_HEAP_TOP) || offs);
-
- page_header_set_field(page, page_zip, field, offs);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Resets the last insert info field in the page header. Writes to mlog
-about this operation. */
-UNIV_INLINE
-void
-page_header_reset_last_insert(
-/*==========================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(page && mtr);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
- page_zip_write_header(page_zip,
- page + (PAGE_HEADER + PAGE_LAST_INSERT),
- 2, mtr);
- } else {
- mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0,
- MLOG_2BYTES, mtr);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/************************************************************//**
-Determine whether the page is in new-style compact format.
-@return nonzero if the page is in compact format, zero if it is in
-old-style format */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
- const page_t* page) /*!< in: index page */
-{
- return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000,
- 0x8000));
-}
-
-/************************************************************//**
-TRUE if the record is on a page in compact format.
-@return nonzero if in compact format */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
- const rec_t* rec) /*!< in: record */
-{
- return(page_is_comp(page_align(rec)));
-}
-
-/***************************************************************//**
-Returns the heap number of a record.
-@return heap number */
-UNIV_INLINE
-ulint
-page_rec_get_heap_no(
-/*=================*/
- const rec_t* rec) /*!< in: the physical record */
-{
- if (page_rec_is_comp(rec)) {
- return(rec_get_heap_no_new(rec));
- } else {
- return(rec_get_heap_no_old(rec));
- }
-}
-
-/************************************************************//**
-Determine whether the page is a B-tree leaf.
-@return TRUE if the page is a B-tree leaf */
-UNIV_INLINE
-ibool
-page_is_leaf(
-/*=========*/
- const page_t* page) /*!< in: page */
-{
- return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
-}
-
-/************************************************************//**
-Gets the offset of the first record on the page.
-@return offset of the first record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_infimum_offset(
-/*====================*/
- const page_t* page) /*!< in: page which must have record(s) */
-{
- ut_ad(page);
- ut_ad(!page_offset(page));
-
- if (page_is_comp(page)) {
- return(PAGE_NEW_INFIMUM);
- } else {
- return(PAGE_OLD_INFIMUM);
- }
-}
-
-/************************************************************//**
-Gets the offset of the last record on the page.
-@return offset of the last record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_supremum_offset(
-/*=====================*/
- const page_t* page) /*!< in: page which must have record(s) */
-{
- ut_ad(page);
- ut_ad(!page_offset(page));
-
- if (page_is_comp(page)) {
- return(PAGE_NEW_SUPREMUM);
- } else {
- return(PAGE_OLD_SUPREMUM);
- }
-}
-
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
-#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM
-# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM"
-#endif
-#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM
-# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM"
-#endif
-#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END
-# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END"
-#endif
-#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END
-# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END"
-#endif
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(UNIV_LIKELY(offset != PAGE_NEW_SUPREMUM)
- && UNIV_LIKELY(offset != PAGE_NEW_INFIMUM)
- && UNIV_LIKELY(offset != PAGE_OLD_INFIMUM)
- && UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM));
-}
-
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(UNIV_UNLIKELY(offset == PAGE_NEW_SUPREMUM)
- || UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM));
-}
-
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
- ulint offset) /*!< in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(UNIV_UNLIKELY(offset == PAGE_NEW_INFIMUM)
- || UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM));
-}
-
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
- const rec_t* rec) /*!< in: record */
-{
- return(page_rec_is_user_rec_low(page_offset(rec)));
-}
-
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
- const rec_t* rec) /*!< in: record */
-{
- return(page_rec_is_supremum_low(page_offset(rec)));
-}
-
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
- const rec_t* rec) /*!< in: record */
-{
- return(page_rec_is_infimum_low(page_offset(rec)));
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record on a page; may also
- be page infimum or supremum, in which case
- matched-parameter values below are not
- affected */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns
- contains the value for current comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns contains the
- value for current comparison */
-{
- ulint rec_offset;
-
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
-
- rec_offset = page_offset(rec);
-
- if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_INFIMUM)
- || UNIV_UNLIKELY(rec_offset == PAGE_OLD_INFIMUM)) {
- return(1);
- }
- if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_SUPREMUM)
- || UNIV_UNLIKELY(rec_offset == PAGE_OLD_SUPREMUM)) {
- return(-1);
- }
-
- return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- matched_fields,
- matched_bytes));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Gets the page number.
-@return page number */
-UNIV_INLINE
-ulint
-page_get_page_no(
-/*=============*/
- const page_t* page) /*!< in: page */
-{
- ut_ad(page == page_align((page_t*) page));
- return(mach_read_from_4(page + FIL_PAGE_OFFSET));
-}
-
-/*************************************************************//**
-Gets the tablespace identifier.
-@return space id */
-UNIV_INLINE
-ulint
-page_get_space_id(
-/*==============*/
- const page_t* page) /*!< in: page */
-{
- ut_ad(page == page_align((page_t*) page));
- return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
-}
-
-/*************************************************************//**
-Gets the number of user records on page (infimum and supremum records
-are not user records).
-@return number of user records */
-UNIV_INLINE
-ulint
-page_get_n_recs(
-/*============*/
- const page_t* page) /*!< in: index page */
-{
- return(page_header_get_field(page, PAGE_N_RECS));
-}
-
-/*************************************************************//**
-Gets the number of dir slots in directory.
-@return number of slots */
-UNIV_INLINE
-ulint
-page_dir_get_n_slots(
-/*=================*/
- const page_t* page) /*!< in: index page */
-{
- return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
-}
-/*************************************************************//**
-Sets the number of dir slots in directory. */
-UNIV_INLINE
-void
-page_dir_set_n_slots(
-/*=================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint n_slots)/*!< in: number of slots */
-{
- page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
-}
-
-/*************************************************************//**
-Gets the number of records in the heap.
-@return number of user records */
-UNIV_INLINE
-ulint
-page_dir_get_n_heap(
-/*================*/
- const page_t* page) /*!< in: index page */
-{
- return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
-}
-
-/*************************************************************//**
-Sets the number of records in the heap. */
-UNIV_INLINE
-void
-page_dir_set_n_heap(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL.
- Note that the size of the dense page directory
- in the compressed page trailer is
- n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
- ulint n_heap) /*!< in: number of records */
-{
- ut_ad(n_heap < 0x8000);
- ut_ad(!page_zip || n_heap
- == (page_header_get_field(page, PAGE_N_HEAP) & 0x7fff) + 1);
-
- page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap
- | (0x8000
- & page_header_get_field(page, PAGE_N_HEAP)));
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Gets pointer to nth directory slot.
-@return pointer to dir slot */
-UNIV_INLINE
-page_dir_slot_t*
-page_dir_get_nth_slot(
-/*==================*/
- const page_t* page, /*!< in: index page */
- ulint n) /*!< in: position */
-{
- ut_ad(page_dir_get_n_slots(page) > n);
-
- return((page_dir_slot_t*)
- page + UNIV_PAGE_SIZE - PAGE_DIR
- - (n + 1) * PAGE_DIR_SLOT_SIZE);
-}
-#endif /* UNIV_DEBUG */
-
-/**************************************************************//**
-Used to check the consistency of a record on a page.
-@return TRUE if succeed */
-UNIV_INLINE
-ibool
-page_rec_check(
-/*===========*/
- const rec_t* rec) /*!< in: record */
-{
- const page_t* page = page_align(rec);
-
- ut_a(rec);
-
- ut_a(page_offset(rec) <= page_header_get_field(page, PAGE_HEAP_TOP));
- ut_a(page_offset(rec) >= PAGE_DATA);
-
- return(TRUE);
-}
-
-/***************************************************************//**
-Gets the record pointed to by a directory slot.
-@return pointer to record */
-UNIV_INLINE
-const rec_t*
-page_dir_slot_get_rec(
-/*==================*/
- const page_dir_slot_t* slot) /*!< in: directory slot */
-{
- return(page_align(slot) + mach_read_from_2(slot));
-}
-
-/***************************************************************//**
-This is used to set the record offset in a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_rec(
-/*==================*/
- page_dir_slot_t* slot, /*!< in: directory slot */
- rec_t* rec) /*!< in: record on the page */
-{
- ut_ad(page_rec_check(rec));
-
- mach_write_to_2(slot, page_offset(rec));
-}
-
-/***************************************************************//**
-Gets the number of records owned by a directory slot.
-@return number of records */
-UNIV_INLINE
-ulint
-page_dir_slot_get_n_owned(
-/*======================*/
- const page_dir_slot_t* slot) /*!< in: page directory slot */
-{
- const rec_t* rec = page_dir_slot_get_rec(slot);
- if (page_rec_is_comp(slot)) {
- return(rec_get_n_owned_new(rec));
- } else {
- return(rec_get_n_owned_old(rec));
- }
-}
-
-/***************************************************************//**
-This is used to set the owned records field of a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_n_owned(
-/*======================*/
- page_dir_slot_t*slot, /*!< in/out: directory slot */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n) /*!< in: number of records owned by the slot */
-{
- rec_t* rec = (rec_t*) page_dir_slot_get_rec(slot);
- if (page_rec_is_comp(slot)) {
- rec_set_n_owned_new(rec, page_zip, n);
- } else {
- ut_ad(!page_zip);
- rec_set_n_owned_old(rec, n);
- }
-}
-
-/************************************************************//**
-Calculates the space reserved for directory slots of a given number of
-records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
-PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
-UNIV_INLINE
-ulint
-page_dir_calc_reserved_space(
-/*=========================*/
- ulint n_recs) /*!< in: number of records */
-{
- return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
- / PAGE_DIR_SLOT_MIN_N_OWNED);
-}
-
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_low(
-/*==================*/
- const rec_t* rec, /*!< in: pointer to record */
- ulint comp) /*!< in: nonzero=compact page layout */
-{
- ulint offs;
- const page_t* page;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
-
- offs = rec_get_next_offs(rec, comp);
-
- if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Next record offset is nonsensical %lu"
- " in record at offset %lu\n"
- "InnoDB: rec address %p, space id %lu, page %lu\n",
- (ulong)offs, (ulong) page_offset(rec),
- (void*) rec,
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page));
- buf_page_print(page, 0);
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY(offs == 0)) {
-
- return(NULL);
- }
-
- return(page + offs);
-}
-
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
- rec_t* rec) /*!< in: pointer to record */
-{
- return((rec_t*) page_rec_get_next_low(rec, page_rec_is_comp(rec)));
-}
-
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_const(
-/*====================*/
- const rec_t* rec) /*!< in: pointer to record */
-{
- return(page_rec_get_next_low(rec, page_rec_is_comp(rec)));
-}
-
-/************************************************************//**
-Sets the pointer to the next record on the page. */
-UNIV_INLINE
-void
-page_rec_set_next(
-/*==============*/
- rec_t* rec, /*!< in: pointer to record,
- must not be page supremum */
- rec_t* next) /*!< in: pointer to next record,
- must not be page infimum */
-{
- ulint offs;
-
- ut_ad(page_rec_check(rec));
- ut_ad(!page_rec_is_supremum(rec));
- ut_ad(rec != next);
-
- ut_ad(!next || !page_rec_is_infimum(next));
- ut_ad(!next || page_align(rec) == page_align(next));
-
- if (UNIV_LIKELY(next != NULL)) {
- offs = page_offset(next);
- } else {
- offs = 0;
- }
-
- if (page_rec_is_comp(rec)) {
- rec_set_next_offs_new(rec, offs);
- } else {
- rec_set_next_offs_old(rec, offs);
- }
-}
-
-/************************************************************//**
-Gets the pointer to the previous record.
-@return pointer to previous record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_prev_const(
-/*====================*/
- const rec_t* rec) /*!< in: pointer to record, must not be page
- infimum */
-{
- const page_dir_slot_t* slot;
- ulint slot_no;
- const rec_t* rec2;
- const rec_t* prev_rec = NULL;
- const page_t* page;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
-
- ut_ad(!page_rec_is_infimum(rec));
-
- slot_no = page_dir_find_owner_slot(rec);
-
- ut_a(slot_no != 0);
-
- slot = page_dir_get_nth_slot(page, slot_no - 1);
-
- rec2 = page_dir_slot_get_rec(slot);
-
- if (page_is_comp(page)) {
- while (rec != rec2) {
- prev_rec = rec2;
- rec2 = page_rec_get_next_low(rec2, TRUE);
- }
- } else {
- while (rec != rec2) {
- prev_rec = rec2;
- rec2 = page_rec_get_next_low(rec2, FALSE);
- }
- }
-
- ut_a(prev_rec);
-
- return(prev_rec);
-}
-
-/************************************************************//**
-Gets the pointer to the previous record.
-@return pointer to previous record */
-UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
- rec_t* rec) /*!< in: pointer to record, must not be page
- infimum */
-{
- return((rec_t*) page_rec_get_prev_const(rec));
-}
-
-/***************************************************************//**
-Looks for the record which owns the given record.
-@return the owner record */
-UNIV_INLINE
-rec_t*
-page_rec_find_owner_rec(
-/*====================*/
- rec_t* rec) /*!< in: the physical record */
-{
- ut_ad(page_rec_check(rec));
-
- if (page_rec_is_comp(rec)) {
- while (rec_get_n_owned_new(rec) == 0) {
- rec = page_rec_get_next(rec);
- }
- } else {
- while (rec_get_n_owned_old(rec) == 0) {
- rec = page_rec_get_next(rec);
- }
- }
-
- return(rec);
-}
-
-/**********************************************************//**
-Returns the base extra size of a physical record. This is the
-size of the fixed header, independent of the record size.
-@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
-UNIV_INLINE
-ulint
-page_rec_get_base_extra_size(
-/*=========================*/
- const rec_t* rec) /*!< in: physical record */
-{
-#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
-# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
-#endif
- return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
-}
-
-/************************************************************//**
-Returns the sum of the sizes of the records in the record list, excluding
-the infimum and supremum records.
-@return data in bytes */
-UNIV_INLINE
-ulint
-page_get_data_size(
-/*===============*/
- const page_t* page) /*!< in: index page */
-{
- ulint ret;
-
- ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
- - (page_is_comp(page)
- ? PAGE_NEW_SUPREMUM_END
- : PAGE_OLD_SUPREMUM_END)
- - page_header_get_field(page, PAGE_GARBAGE));
-
- ut_ad(ret < UNIV_PAGE_SIZE);
-
- return(ret);
-}
-
-
-/************************************************************//**
-Allocates a block of memory from the free list of an index page. */
-UNIV_INLINE
-void
-page_mem_alloc_free(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
- space available for inserting the record,
- or NULL */
- rec_t* next_rec,/*!< in: pointer to the new head of the
- free record list */
- ulint need) /*!< in: number of bytes allocated */
-{
- ulint garbage;
-
-#ifdef UNIV_DEBUG
- const rec_t* old_rec = page_header_get_ptr(page, PAGE_FREE);
- ulint next_offs;
-
- ut_ad(old_rec);
- next_offs = rec_get_next_offs(old_rec, page_is_comp(page));
- ut_ad(next_rec == (next_offs ? page + next_offs : NULL));
-#endif
-
- page_header_set_ptr(page, page_zip, PAGE_FREE, next_rec);
-
- garbage = page_header_get_field(page, PAGE_GARBAGE);
- ut_ad(garbage >= need);
-
- page_header_set_field(page, page_zip, PAGE_GARBAGE, garbage - need);
-}
-
-/*************************************************************//**
-Calculates free space if a page is emptied.
-@return free space */
-UNIV_INLINE
-ulint
-page_get_free_space_of_empty(
-/*=========================*/
- ulint comp) /*!< in: nonzero=compact page layout */
-{
- if (UNIV_LIKELY(comp)) {
- return((ulint)(UNIV_PAGE_SIZE
- - PAGE_NEW_SUPREMUM_END
- - PAGE_DIR
- - 2 * PAGE_DIR_SLOT_SIZE));
- }
-
- return((ulint)(UNIV_PAGE_SIZE
- - PAGE_OLD_SUPREMUM_END
- - PAGE_DIR
- - 2 * PAGE_DIR_SLOT_SIZE));
-}
-
-/************************************************************//**
-Each user record on a page, and also the deleted user records in the heap
-takes its size plus the fraction of the dir cell size /
-PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
-value of page_get_free_space_of_empty, the insert is impossible, otherwise
-it is allowed. This function returns the maximum combined size of records
-which can be inserted on top of the record heap.
-@return maximum combined size for inserted records */
-UNIV_INLINE
-ulint
-page_get_max_insert_size(
-/*=====================*/
- const page_t* page, /*!< in: index page */
- ulint n_recs) /*!< in: number of records */
-{
- ulint occupied;
- ulint free_space;
-
- if (page_is_comp(page)) {
- occupied = page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_NEW_SUPREMUM_END
- + page_dir_calc_reserved_space(
- n_recs + page_dir_get_n_heap(page) - 2);
-
- free_space = page_get_free_space_of_empty(TRUE);
- } else {
- occupied = page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_OLD_SUPREMUM_END
- + page_dir_calc_reserved_space(
- n_recs + page_dir_get_n_heap(page) - 2);
-
- free_space = page_get_free_space_of_empty(FALSE);
- }
-
- /* Above the 'n_recs +' part reserves directory space for the new
- inserted records; the '- 2' excludes page infimum and supremum
- records */
-
- if (occupied > free_space) {
-
- return(0);
- }
-
- return(free_space - occupied);
-}
-
-/************************************************************//**
-Returns the maximum combined size of records which can be inserted on top
-of the record heap if a page is first reorganized.
-@return maximum combined size for inserted records */
-UNIV_INLINE
-ulint
-page_get_max_insert_size_after_reorganize(
-/*======================================*/
- const page_t* page, /*!< in: index page */
- ulint n_recs) /*!< in: number of records */
-{
- ulint occupied;
- ulint free_space;
-
- occupied = page_get_data_size(page)
- + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
-
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- if (occupied > free_space) {
-
- return(0);
- }
-
- return(free_space - occupied);
-}
-
-/************************************************************//**
-Puts a record to free list. */
-UNIV_INLINE
-void
-page_mem_free(
-/*==========*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- rec_t* rec, /*!< in: pointer to the (origin of) record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- rec_t* free;
- ulint garbage;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- free = page_header_get_ptr(page, PAGE_FREE);
-
- page_rec_set_next(rec, free);
- page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
-
- garbage = page_header_get_field(page, PAGE_GARBAGE);
-
- page_header_set_field(page, page_zip, PAGE_GARBAGE,
- garbage + rec_offs_size(offsets));
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- page_zip_dir_delete(page_zip, rec, index, offsets, free);
- } else {
- page_header_set_field(page, page_zip, PAGE_N_RECS,
- page_get_n_recs(page) - 1);
- }
-}
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
diff --git a/storage/innodb_plugin/include/page0types.h b/storage/innodb_plugin/include/page0types.h
deleted file mode 100644
index d9a277bf208..00000000000
--- a/storage/innodb_plugin/include/page0types.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/page0types.h
-Index page routines
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef page0types_h
-#define page0types_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "mtr0types.h"
-
-/** Eliminates a name collision on HP-UX */
-#define page_t ib_page_t
-/** Type of the index page */
-typedef byte page_t;
-/** Index page cursor */
-typedef struct page_cur_struct page_cur_t;
-
-/** Compressed index page */
-typedef byte page_zip_t;
-/** Compressed page descriptor */
-typedef struct page_zip_des_struct page_zip_des_t;
-
-/* The following definitions would better belong to page0zip.h,
-but we cannot include page0zip.h from rem0rec.ic, because
-page0*.h includes rem0rec.h and may include rem0rec.ic. */
-
-/** Number of bits needed for representing different compressed page sizes */
-#define PAGE_ZIP_SSIZE_BITS 3
-
-/** log2 of smallest compressed page size */
-#define PAGE_ZIP_MIN_SIZE_SHIFT 10
-/** Smallest compressed page size */
-#define PAGE_ZIP_MIN_SIZE (1 << PAGE_ZIP_MIN_SIZE_SHIFT)
-
-/** Number of supported compressed page sizes */
-#define PAGE_ZIP_NUM_SSIZE (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
-#if PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)
-# error "PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)"
-#endif
-
-/** Compressed page descriptor */
-struct page_zip_des_struct
-{
- page_zip_t* data; /*!< compressed page data */
-
-#ifdef UNIV_DEBUG
- unsigned m_start:16; /*!< start offset of modification log */
-#endif /* UNIV_DEBUG */
- unsigned m_end:16; /*!< end offset of modification log */
- unsigned m_nonempty:1; /*!< TRUE if the modification log
- is not empty */
- unsigned n_blobs:12; /*!< number of externally stored
- columns on the page; the maximum
- is 744 on a 16 KiB page */
- unsigned ssize:PAGE_ZIP_SSIZE_BITS;
- /*!< 0 or compressed page size;
- the size in bytes is
- PAGE_ZIP_MIN_SIZE << (ssize - 1). */
-};
-
-/** Compression statistics for a given page size */
-struct page_zip_stat_struct {
- /** Number of page compressions */
- ulint compressed;
- /** Number of successful page compressions */
- ulint compressed_ok;
- /** Number of page decompressions */
- ulint decompressed;
- /** Duration of page compressions in microseconds */
- ib_uint64_t compressed_usec;
- /** Duration of page decompressions in microseconds */
- ib_uint64_t decompressed_usec;
-};
-
-/** Compression statistics */
-typedef struct page_zip_stat_struct page_zip_stat_t;
-
-/** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */
-extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
-
-/**********************************************************************//**
-Write the "deleted" flag of a record on a compressed page. The flag must
-already have been written on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_rec_set_deleted(
-/*=====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record on the uncompressed page */
- ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
- __attribute__((nonnull));
-
-/**********************************************************************//**
-Write the "owned" flag of a record on a compressed page. The n_owned field
-must already have been written on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_rec_set_owned(
-/*===================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record on the uncompressed page */
- ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
- __attribute__((nonnull));
-
-/**********************************************************************//**
-Shift the dense page directory when a record is deleted. */
-UNIV_INTERN
-void
-page_zip_dir_delete(
-/*================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in: deleted record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- const byte* free) /*!< in: previous start of the free list */
- __attribute__((nonnull(1,2,3,4)));
-
-/**********************************************************************//**
-Add a slot to the dense page directory. */
-UNIV_INTERN
-void
-page_zip_dir_add_slot(
-/*==================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- ulint is_clustered) /*!< in: nonzero for clustered index,
- zero for others */
- __attribute__((nonnull));
-#endif
diff --git a/storage/innodb_plugin/include/pars0grm.h b/storage/innodb_plugin/include/pars0grm.h
deleted file mode 100644
index 3de233eed3a..00000000000
--- a/storage/innodb_plugin/include/pars0grm.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software
-Foundation, Inc.
-
-As a special exception, when this file is copied by Bison into a
-Bison output file, you may use that output file without restriction.
-This special exception was added by the Free Software Foundation
-in version 1.24 of Bison.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/* A Bison parser, made by GNU Bison 1.875d. */
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- NEG = 350
- };
-#endif
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define NEG 350
-
-
-
-
-#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-extern YYSTYPE yylval;
-
-
-
diff --git a/storage/innodb_plugin/include/pars0opt.h b/storage/innodb_plugin/include/pars0opt.h
deleted file mode 100644
index 42d956068f8..00000000000
--- a/storage/innodb_plugin/include/pars0opt.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0opt.h
-Simple SQL optimizer
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0opt_h
-#define pars0opt_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "pars0sym.h"
-#include "dict0types.h"
-#include "row0sel.h"
-
-/*******************************************************************//**
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-UNIV_INTERN
-void
-opt_search_plan(
-/*============*/
- sel_node_t* sel_node); /*!< in: parsed select node */
-/*******************************************************************//**
-Looks for occurrences of the columns of the table in the query subgraph and
-adds them to the list of columns if an occurrence of the same column does not
-already exist in the list. If the column is already in the list, puts a value
-indirection to point to the occurrence in the column list, except if the
-column occurrence we are looking at is in the column list, in which case
-nothing is done. */
-UNIV_INTERN
-void
-opt_find_all_cols(
-/*==============*/
- ibool copy_val, /*!< in: if TRUE, new found columns are
- added as columns to copy */
- dict_index_t* index, /*!< in: index to use */
- sym_node_list_t* col_list, /*!< in: base node of a list where
- to add new found columns */
- plan_t* plan, /*!< in: plan or NULL */
- que_node_t* exp); /*!< in: expression or condition */
-/********************************************************************//**
-Prints info of a query plan. */
-UNIV_INTERN
-void
-opt_print_query_plan(
-/*=================*/
- sel_node_t* sel_node); /*!< in: select node */
-
-#ifndef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/pars0opt.ic b/storage/innodb_plugin/include/pars0opt.ic
deleted file mode 100644
index e0bb6bf1af2..00000000000
--- a/storage/innodb_plugin/include/pars0opt.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0opt.ic
-Simple SQL optimizer
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/pars0pars.h b/storage/innodb_plugin/include/pars0pars.h
deleted file mode 100644
index fe5d76ebbb0..00000000000
--- a/storage/innodb_plugin/include/pars0pars.h
+++ /dev/null
@@ -1,748 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0pars.h
-SQL parser
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0pars_h
-#define pars0pars_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "pars0types.h"
-#include "row0types.h"
-#include "trx0types.h"
-#include "ut0vec.h"
-
-/** Type of the user functions. The first argument is always InnoDB-supplied
-and varies in type, while 'user_arg' is a user-supplied argument. The
-meaning of the return type also varies. See the individual use cases, e.g.
-the FETCH statement, for details on them. */
-typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg);
-
-/** If the following is set TRUE, the parser will emit debugging
-information */
-extern int yydebug;
-
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-extern ibool pars_print_lexed;
-#endif /* UNIV_SQL_DEBUG */
-
-/* Global variable used while parsing a single procedure or query : the code is
-NOT re-entrant */
-extern sym_tab_t* pars_sym_tab_global;
-
-extern pars_res_word_t pars_to_char_token;
-extern pars_res_word_t pars_to_number_token;
-extern pars_res_word_t pars_to_binary_token;
-extern pars_res_word_t pars_binary_to_number_token;
-extern pars_res_word_t pars_substr_token;
-extern pars_res_word_t pars_replstr_token;
-extern pars_res_word_t pars_concat_token;
-extern pars_res_word_t pars_length_token;
-extern pars_res_word_t pars_instr_token;
-extern pars_res_word_t pars_sysdate_token;
-extern pars_res_word_t pars_printf_token;
-extern pars_res_word_t pars_assert_token;
-extern pars_res_word_t pars_rnd_token;
-extern pars_res_word_t pars_rnd_str_token;
-extern pars_res_word_t pars_count_token;
-extern pars_res_word_t pars_sum_token;
-extern pars_res_word_t pars_distinct_token;
-extern pars_res_word_t pars_binary_token;
-extern pars_res_word_t pars_blob_token;
-extern pars_res_word_t pars_int_token;
-extern pars_res_word_t pars_char_token;
-extern pars_res_word_t pars_float_token;
-extern pars_res_word_t pars_update_token;
-extern pars_res_word_t pars_asc_token;
-extern pars_res_word_t pars_desc_token;
-extern pars_res_word_t pars_open_token;
-extern pars_res_word_t pars_close_token;
-extern pars_res_word_t pars_share_token;
-extern pars_res_word_t pars_unique_token;
-extern pars_res_word_t pars_clustered_token;
-
-extern ulint pars_star_denoter;
-
-/* Procedure parameter types */
-#define PARS_INPUT 0
-#define PARS_OUTPUT 1
-#define PARS_NOT_PARAM 2
-
-int
-yyparse(void);
-
-/*************************************************************//**
-Parses an SQL string returning the query graph.
-@return own: the query graph */
-UNIV_INTERN
-que_t*
-pars_sql(
-/*=====*/
- pars_info_t* info, /*!< in: extra information, or NULL */
- const char* str); /*!< in: SQL string */
-/*************************************************************//**
-Retrieves characters to the lexical analyzer. */
-UNIV_INTERN
-void
-pars_get_lex_chars(
-/*===============*/
- char* buf, /*!< in/out: buffer where to copy */
- int* result, /*!< out: number of characters copied or EOF */
- int max_size); /*!< in: maximum number of characters which fit
- in the buffer */
-/*************************************************************//**
-Called by yyparse on error. */
-UNIV_INTERN
-void
-yyerror(
-/*====*/
- const char* s); /*!< in: error message string */
-/*********************************************************************//**
-Parses a variable declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_variable_declaration(
-/*======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the variable */
- pars_res_word_t* type); /*!< in: pointer to a type token */
-/*********************************************************************//**
-Parses a function expression.
-@return own: function node in a query tree */
-UNIV_INTERN
-func_node_t*
-pars_func(
-/*======*/
- que_node_t* res_word,/*!< in: function name reserved word */
- que_node_t* arg); /*!< in: first argument in the argument list */
-/*********************************************************************//**
-Parses an operator expression.
-@return own: function node in a query tree */
-UNIV_INTERN
-func_node_t*
-pars_op(
-/*====*/
- int func, /*!< in: operator token code */
- que_node_t* arg1, /*!< in: first argument */
- que_node_t* arg2); /*!< in: second argument or NULL for an unary
- operator */
-/*********************************************************************//**
-Parses an ORDER BY clause. Order by a single column only is supported.
-@return own: order-by node in a query tree */
-UNIV_INTERN
-order_node_t*
-pars_order_by(
-/*==========*/
- sym_node_t* column, /*!< in: column name */
- pars_res_word_t* asc); /*!< in: &pars_asc_token or pars_desc_token */
-/*********************************************************************//**
-Parses a select list; creates a query graph node for the whole SELECT
-statement.
-@return own: select node in a query tree */
-UNIV_INTERN
-sel_node_t*
-pars_select_list(
-/*=============*/
- que_node_t* select_list, /*!< in: select list */
- sym_node_t* into_list); /*!< in: variables list or NULL */
-/*********************************************************************//**
-Parses a cursor declaration.
-@return sym_node */
-UNIV_INTERN
-que_node_t*
-pars_cursor_declaration(
-/*====================*/
- sym_node_t* sym_node, /*!< in: cursor id node in the symbol
- table */
- sel_node_t* select_node); /*!< in: select node */
-/*********************************************************************//**
-Parses a function declaration.
-@return sym_node */
-UNIV_INTERN
-que_node_t*
-pars_function_declaration(
-/*======================*/
- sym_node_t* sym_node); /*!< in: function id node in the symbol
- table */
-/*********************************************************************//**
-Parses a select statement.
-@return own: select node in a query tree */
-UNIV_INTERN
-sel_node_t*
-pars_select_statement(
-/*==================*/
- sel_node_t* select_node, /*!< in: select node already containing
- the select list */
- sym_node_t* table_list, /*!< in: table list */
- que_node_t* search_cond, /*!< in: search condition or NULL */
- pars_res_word_t* for_update, /*!< in: NULL or &pars_update_token */
- pars_res_word_t* consistent_read,/*!< in: NULL or
- &pars_consistent_token */
- order_node_t* order_by); /*!< in: NULL or an order-by node */
-/*********************************************************************//**
-Parses a column assignment in an update.
-@return column assignment node */
-UNIV_INTERN
-col_assign_node_t*
-pars_column_assignment(
-/*===================*/
- sym_node_t* column, /*!< in: column to assign */
- que_node_t* exp); /*!< in: value to assign */
-/*********************************************************************//**
-Parses a delete or update statement start.
-@return own: update node in a query tree */
-UNIV_INTERN
-upd_node_t*
-pars_update_statement_start(
-/*========================*/
- ibool is_delete, /*!< in: TRUE if delete */
- sym_node_t* table_sym, /*!< in: table name node */
- col_assign_node_t* col_assign_list);/*!< in: column assignment list, NULL
- if delete */
-/*********************************************************************//**
-Parses an update or delete statement.
-@return own: update node in a query tree */
-UNIV_INTERN
-upd_node_t*
-pars_update_statement(
-/*==================*/
- upd_node_t* node, /*!< in: update node */
- sym_node_t* cursor_sym, /*!< in: pointer to a cursor entry in
- the symbol table or NULL */
- que_node_t* search_cond); /*!< in: search condition or NULL */
-/*********************************************************************//**
-Parses an insert statement.
-@return own: update node in a query tree */
-UNIV_INTERN
-ins_node_t*
-pars_insert_statement(
-/*==================*/
- sym_node_t* table_sym, /*!< in: table name node */
- que_node_t* values_list, /*!< in: value expression list or NULL */
- sel_node_t* select); /*!< in: select condition or NULL */
-/*********************************************************************//**
-Parses a procedure parameter declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the parameter */
- ulint param_type,
- /*!< in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type); /*!< in: pointer to a type token */
-/*********************************************************************//**
-Parses an elsif element.
-@return elsif node */
-UNIV_INTERN
-elsif_node_t*
-pars_elsif_element(
-/*===============*/
- que_node_t* cond, /*!< in: if-condition */
- que_node_t* stat_list); /*!< in: statement list */
-/*********************************************************************//**
-Parses an if-statement.
-@return if-statement node */
-UNIV_INTERN
-if_node_t*
-pars_if_statement(
-/*==============*/
- que_node_t* cond, /*!< in: if-condition */
- que_node_t* stat_list, /*!< in: statement list */
- que_node_t* else_part); /*!< in: else-part statement list */
-/*********************************************************************//**
-Parses a for-loop-statement.
-@return for-statement node */
-UNIV_INTERN
-for_node_t*
-pars_for_statement(
-/*===============*/
- sym_node_t* loop_var, /*!< in: loop variable */
- que_node_t* loop_start_limit,/*!< in: loop start expression */
- que_node_t* loop_end_limit, /*!< in: loop end expression */
- que_node_t* stat_list); /*!< in: statement list */
-/*********************************************************************//**
-Parses a while-statement.
-@return while-statement node */
-UNIV_INTERN
-while_node_t*
-pars_while_statement(
-/*=================*/
- que_node_t* cond, /*!< in: while-condition */
- que_node_t* stat_list); /*!< in: statement list */
-/*********************************************************************//**
-Parses an exit statement.
-@return exit statement node */
-UNIV_INTERN
-exit_node_t*
-pars_exit_statement(void);
-/*=====================*/
-/*********************************************************************//**
-Parses a return-statement.
-@return return-statement node */
-UNIV_INTERN
-return_node_t*
-pars_return_statement(void);
-/*=======================*/
-/*********************************************************************//**
-Parses a procedure call.
-@return function node */
-UNIV_INTERN
-func_node_t*
-pars_procedure_call(
-/*================*/
- que_node_t* res_word,/*!< in: procedure name reserved word */
- que_node_t* args); /*!< in: argument list */
-/*********************************************************************//**
-Parses an assignment statement.
-@return assignment statement node */
-UNIV_INTERN
-assign_node_t*
-pars_assignment_statement(
-/*======================*/
- sym_node_t* var, /*!< in: variable to assign */
- que_node_t* val); /*!< in: value to assign */
-/*********************************************************************//**
-Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL.
-@return fetch statement node */
-UNIV_INTERN
-fetch_node_t*
-pars_fetch_statement(
-/*=================*/
- sym_node_t* cursor, /*!< in: cursor node */
- sym_node_t* into_list, /*!< in: variables to set, or NULL */
- sym_node_t* user_func); /*!< in: user function name, or NULL */
-/*********************************************************************//**
-Parses an open or close cursor statement.
-@return fetch statement node */
-UNIV_INTERN
-open_node_t*
-pars_open_statement(
-/*================*/
- ulint type, /*!< in: ROW_SEL_OPEN_CURSOR
- or ROW_SEL_CLOSE_CURSOR */
- sym_node_t* cursor); /*!< in: cursor node */
-/*********************************************************************//**
-Parses a row_printf-statement.
-@return row_printf-statement node */
-UNIV_INTERN
-row_printf_node_t*
-pars_row_printf_statement(
-/*======================*/
- sel_node_t* sel_node); /*!< in: select node */
-/*********************************************************************//**
-Parses a commit statement.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-pars_commit_statement(void);
-/*=======================*/
-/*********************************************************************//**
-Parses a rollback statement.
-@return own: rollback node struct */
-UNIV_INTERN
-roll_node_t*
-pars_rollback_statement(void);
-/*=========================*/
-/*********************************************************************//**
-Parses a column definition at a table creation.
-@return column sym table node */
-UNIV_INTERN
-sym_node_t*
-pars_column_def(
-/*============*/
- sym_node_t* sym_node, /*!< in: column node in the
- symbol table */
- pars_res_word_t* type, /*!< in: data type */
- sym_node_t* len, /*!< in: length of column, or
- NULL */
- void* is_unsigned, /*!< in: if not NULL, column
- is of type UNSIGNED. */
- void* is_not_null); /*!< in: if not NULL, column
- is of type NOT NULL. */
-/*********************************************************************//**
-Parses a table creation operation.
-@return table create subgraph */
-UNIV_INTERN
-tab_node_t*
-pars_create_table(
-/*==============*/
- sym_node_t* table_sym, /*!< in: table name node in the symbol
- table */
- sym_node_t* column_defs, /*!< in: list of column names */
- void* not_fit_in_memory);/*!< in: a non-NULL pointer means that
- this is a table which in simulations
- should be simulated as not fitting
- in memory; thread is put to sleep
- to simulate disk accesses; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about non-NULL value if
- it has to reload the table definition
- from disk */
-/*********************************************************************//**
-Parses an index creation operation.
-@return index create subgraph */
-UNIV_INTERN
-ind_node_t*
-pars_create_index(
-/*==============*/
- pars_res_word_t* unique_def, /*!< in: not NULL if a unique index */
- pars_res_word_t* clustered_def, /*!< in: not NULL if a clustered index */
- sym_node_t* index_sym, /*!< in: index name node in the symbol
- table */
- sym_node_t* table_sym, /*!< in: table name node in the symbol
- table */
- sym_node_t* column_list); /*!< in: list of column names */
-/*********************************************************************//**
-Parses a procedure definition.
-@return query fork node */
-UNIV_INTERN
-que_fork_t*
-pars_procedure_definition(
-/*======================*/
- sym_node_t* sym_node, /*!< in: procedure id node in the symbol
- table */
- sym_node_t* param_list, /*!< in: parameter declaration list */
- que_node_t* stat_list); /*!< in: statement list */
-
-/*************************************************************//**
-Parses a stored procedure call, when this is not within another stored
-procedure, that is, the client issues a procedure call directly.
-In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return query graph */
-UNIV_INTERN
-que_fork_t*
-pars_stored_procedure_call(
-/*=======================*/
- sym_node_t* sym_node); /*!< in: stored procedure name */
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
-above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE.
-@return query thread node to run */
-UNIV_INTERN
-que_thr_t*
-pars_complete_graph_for_exec(
-/*=========================*/
- que_node_t* node, /*!< in: root node for an incomplete
- query graph */
- trx_t* trx, /*!< in: transaction handle */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
-
-/****************************************************************//**
-Create parser info struct.
-@return own: info struct */
-UNIV_INTERN
-pars_info_t*
-pars_info_create(void);
-/*==================*/
-
-/****************************************************************//**
-Free info struct and everything it contains. */
-UNIV_INTERN
-void
-pars_info_free(
-/*===========*/
- pars_info_t* info); /*!< in, own: info struct */
-
-/****************************************************************//**
-Add bound literal. */
-UNIV_INTERN
-void
-pars_info_add_literal(
-/*==================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const void* address, /*!< in: address */
- ulint length, /*!< in: length of data */
- ulint type, /*!< in: type, e.g. DATA_FIXBINARY */
- ulint prtype); /*!< in: precise type, e.g.
- DATA_UNSIGNED */
-
-/****************************************************************//**
-Equivalent to pars_info_add_literal(info, name, str, strlen(str),
-DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
-void
-pars_info_add_str_literal(
-/*======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const char* str); /*!< in: string */
-
-/****************************************************************//**
-Equivalent to:
-
-char buf[4];
-mach_write_to_4(buf, val);
-pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_add_int4_literal(
-/*=======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- lint val); /*!< in: value */
-
-/****************************************************************//**
-Equivalent to:
-
-char buf[8];
-mach_write_to_8(buf, val);
-pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_add_dulint_literal(
-/*=========================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- dulint val); /*!< in: value */
-/****************************************************************//**
-Add user function. */
-UNIV_INTERN
-void
-pars_info_add_function(
-/*===================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: function name */
- pars_user_func_cb_t func, /*!< in: function address */
- void* arg); /*!< in: user-supplied argument */
-
-/****************************************************************//**
-Add bound id. */
-UNIV_INTERN
-void
-pars_info_add_id(
-/*=============*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const char* id); /*!< in: id */
-
-/****************************************************************//**
-Get user function with the given name.
-@return user func, or NULL if not found */
-UNIV_INTERN
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name); /*!< in: function name to find*/
-
-/****************************************************************//**
-Get bound literal with the given name.
-@return bound literal, or NULL if not found */
-UNIV_INTERN
-pars_bound_lit_t*
-pars_info_get_bound_lit(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name); /*!< in: bound literal name to find */
-
-/****************************************************************//**
-Get bound id with the given name.
-@return bound id, or NULL if not found */
-UNIV_INTERN
-pars_bound_id_t*
-pars_info_get_bound_id(
-/*===================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name); /*!< in: bound id name to find */
-
-/******************************************************************//**
-Release any resources used by the lexer. */
-UNIV_INTERN
-void
-pars_lexer_close(void);
-/*==================*/
-
-/** Extra information supplied for pars_sql(). */
-struct pars_info_struct {
- mem_heap_t* heap; /*!< our own memory heap */
-
- ib_vector_t* funcs; /*!< user functions, or NUll
- (pars_user_func_t*) */
- ib_vector_t* bound_lits; /*!< bound literals, or NULL
- (pars_bound_lit_t*) */
- ib_vector_t* bound_ids; /*!< bound ids, or NULL
- (pars_bound_id_t*) */
-
- ibool graph_owns_us; /*!< if TRUE (which is the default),
- que_graph_free() will free us */
-};
-
-/** User-supplied function and argument. */
-struct pars_user_func_struct {
- const char* name; /*!< function name */
- pars_user_func_cb_t func; /*!< function address */
- void* arg; /*!< user-supplied argument */
-};
-
-/** Bound literal. */
-struct pars_bound_lit_struct {
- const char* name; /*!< name */
- const void* address; /*!< address */
- ulint length; /*!< length of data */
- ulint type; /*!< type, e.g. DATA_FIXBINARY */
- ulint prtype; /*!< precise type, e.g. DATA_UNSIGNED */
-};
-
-/** Bound identifier. */
-struct pars_bound_id_struct {
- const char* name; /*!< name */
- const char* id; /*!< identifier */
-};
-
-/** Struct used to denote a reserved word in a parsing tree */
-struct pars_res_word_struct{
- int code; /*!< the token code for the reserved word from
- pars0grm.h */
-};
-
-/** A predefined function or operator node in a parsing tree; this construct
-is also used for some non-functions like the assignment ':=' */
-struct func_node_struct{
- que_common_t common; /*!< type: QUE_NODE_FUNC */
- int func; /*!< token code of the function name */
- ulint class; /*!< class of the function */
- que_node_t* args; /*!< argument(s) of the function */
- UT_LIST_NODE_T(func_node_t) cond_list;
- /*!< list of comparison conditions; defined
- only for comparison operator nodes except,
- presently, for OPT_SCROLL_TYPE ones */
- UT_LIST_NODE_T(func_node_t) func_node_list;
- /*!< list of function nodes in a parsed
- query graph */
-};
-
-/** An order-by node in a select */
-struct order_node_struct{
- que_common_t common; /*!< type: QUE_NODE_ORDER */
- sym_node_t* column; /*!< order-by column */
- ibool asc; /*!< TRUE if ascending, FALSE if descending */
-};
-
-/** Procedure definition node */
-struct proc_node_struct{
- que_common_t common; /*!< type: QUE_NODE_PROC */
- sym_node_t* proc_id; /*!< procedure name symbol in the symbol
- table of this same procedure */
- sym_node_t* param_list; /*!< input and output parameters */
- que_node_t* stat_list; /*!< statement list */
- sym_tab_t* sym_tab; /*!< symbol table of this procedure */
-};
-
-/** elsif-element node */
-struct elsif_node_struct{
- que_common_t common; /*!< type: QUE_NODE_ELSIF */
- que_node_t* cond; /*!< if condition */
- que_node_t* stat_list; /*!< statement list */
-};
-
-/** if-statement node */
-struct if_node_struct{
- que_common_t common; /*!< type: QUE_NODE_IF */
- que_node_t* cond; /*!< if condition */
- que_node_t* stat_list; /*!< statement list */
- que_node_t* else_part; /*!< else-part statement list */
- elsif_node_t* elsif_list; /*!< elsif element list */
-};
-
-/** while-statement node */
-struct while_node_struct{
- que_common_t common; /*!< type: QUE_NODE_WHILE */
- que_node_t* cond; /*!< while condition */
- que_node_t* stat_list; /*!< statement list */
-};
-
-/** for-loop-statement node */
-struct for_node_struct{
- que_common_t common; /*!< type: QUE_NODE_FOR */
- sym_node_t* loop_var; /*!< loop variable: this is the
- dereferenced symbol from the
- variable declarations, not the
- symbol occurrence in the for loop
- definition */
- que_node_t* loop_start_limit;/*!< initial value of loop variable */
- que_node_t* loop_end_limit; /*!< end value of loop variable */
- lint loop_end_value; /*!< evaluated value for the end value:
- it is calculated only when the loop
- is entered, and will not change within
- the loop */
- que_node_t* stat_list; /*!< statement list */
-};
-
-/** exit statement node */
-struct exit_node_struct{
- que_common_t common; /*!< type: QUE_NODE_EXIT */
-};
-
-/** return-statement node */
-struct return_node_struct{
- que_common_t common; /*!< type: QUE_NODE_RETURN */
-};
-
-/** Assignment statement node */
-struct assign_node_struct{
- que_common_t common; /*!< type: QUE_NODE_ASSIGNMENT */
- sym_node_t* var; /*!< variable to set */
- que_node_t* val; /*!< value to assign */
-};
-
-/** Column assignment node */
-struct col_assign_node_struct{
- que_common_t common; /*!< type: QUE_NODE_COL_ASSIGN */
- sym_node_t* col; /*!< column to set */
- que_node_t* val; /*!< value to assign */
-};
-
-/** Classes of functions */
-/* @{ */
-#define PARS_FUNC_ARITH 1 /*!< +, -, *, / */
-#define PARS_FUNC_LOGICAL 2 /*!< AND, OR, NOT */
-#define PARS_FUNC_CMP 3 /*!< comparison operators */
-#define PARS_FUNC_PREDEFINED 4 /*!< TO_NUMBER, SUBSTR, ... */
-#define PARS_FUNC_AGGREGATE 5 /*!< COUNT, DISTINCT, SUM */
-#define PARS_FUNC_OTHER 6 /*!< these are not real functions,
- e.g., := */
-/* @} */
-
-#ifndef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/pars0pars.ic b/storage/innodb_plugin/include/pars0pars.ic
deleted file mode 100644
index ae6c13cd671..00000000000
--- a/storage/innodb_plugin/include/pars0pars.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0pars.ic
-SQL parser
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/pars0sym.h b/storage/innodb_plugin/include/pars0sym.h
deleted file mode 100644
index 6d1a4b82414..00000000000
--- a/storage/innodb_plugin/include/pars0sym.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0sym.h
-SQL parser symbol table
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0sym_h
-#define pars0sym_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "dict0types.h"
-#include "pars0types.h"
-#include "row0types.h"
-
-/******************************************************************//**
-Creates a symbol table for a single stored procedure or query.
-@return own: symbol table */
-UNIV_INTERN
-sym_tab_t*
-sym_tab_create(
-/*===========*/
- mem_heap_t* heap); /*!< in: memory heap where to create */
-/******************************************************************//**
-Frees the memory allocated dynamically AFTER parsing phase for variables
-etc. in the symbol table. Does not free the mem heap where the table was
-originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
-void
-sym_tab_free_private(
-/*=================*/
- sym_tab_t* sym_tab); /*!< in, own: symbol table */
-/******************************************************************//**
-Adds an integer literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_int_lit(
-/*================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- ulint val); /*!< in: integer value */
-/******************************************************************//**
-Adds an string literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_str_lit(
-/*================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- byte* str, /*!< in: string with no quotes around
- it */
- ulint len); /*!< in: string length */
-/******************************************************************//**
-Add a bound literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_bound_lit(
-/*==================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const char* name, /*!< in: name of bound literal */
- ulint* lit_type); /*!< out: type of literal (PARS_*_LIT) */
-/******************************************************************//**
-Adds an SQL null literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_null_lit(
-/*=================*/
- sym_tab_t* sym_tab); /*!< in: symbol table */
-/******************************************************************//**
-Adds an identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_id(
-/*===========*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- byte* name, /*!< in: identifier name */
- ulint len); /*!< in: identifier length */
-
-/******************************************************************//**
-Add a bound identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_bound_id(
-/*===========*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const char* name); /*!< in: name of bound id */
-
-/** Index of sym_node_struct::field_nos corresponding to the clustered index */
-#define SYM_CLUST_FIELD_NO 0
-/** Index of sym_node_struct::field_nos corresponding to a secondary index */
-#define SYM_SEC_FIELD_NO 1
-
-/** Types of a symbol table node */
-enum sym_tab_entry {
- SYM_VAR = 91, /*!< declared parameter or local
- variable of a procedure */
- SYM_IMPLICIT_VAR, /*!< storage for a intermediate result
- of a calculation */
- SYM_LIT, /*!< literal */
- SYM_TABLE, /*!< database table name */
- SYM_COLUMN, /*!< database table name */
- SYM_CURSOR, /*!< named cursor */
- SYM_PROCEDURE_NAME, /*!< stored procedure name */
- SYM_INDEX, /*!< database index name */
- SYM_FUNCTION /*!< user function name */
-};
-
-/** Symbol table node */
-struct sym_node_struct{
- que_common_t common; /*!< node type:
- QUE_NODE_SYMBOL */
- /* NOTE: if the data field in 'common.val' is not NULL and the symbol
- table node is not for a temporary column, the memory for the value has
- been allocated from dynamic memory and it should be freed when the
- symbol table is discarded */
-
- /* 'alias' and 'indirection' are almost the same, but not quite.
- 'alias' always points to the primary instance of the variable, while
- 'indirection' does the same only if we should use the primary
- instance's values for the node's data. This is usually the case, but
- when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM
- t WHERE id = x;"), we copy the values from the primary instance to
- the cursor's instance so that they are fixed for the duration of the
- cursor, and set 'indirection' to NULL. If we did not, the value of
- 'x' could change between fetches and things would break horribly.
-
- TODO: It would be cleaner to make 'indirection' a boolean field and
- always use 'alias' to refer to the primary node. */
-
- sym_node_t* indirection; /*!< pointer to
- another symbol table
- node which contains
- the value for this
- node, NULL otherwise */
- sym_node_t* alias; /*!< pointer to
- another symbol table
- node for which this
- node is an alias,
- NULL otherwise */
- UT_LIST_NODE_T(sym_node_t) col_var_list; /*!< list of table
- columns or a list of
- input variables for an
- explicit cursor */
- ibool copy_val; /*!< TRUE if a column
- and its value should
- be copied to dynamic
- memory when fetched */
- ulint field_nos[2]; /*!< if a column, in
- the position
- SYM_CLUST_FIELD_NO is
- the field number in the
- clustered index; in
- the position
- SYM_SEC_FIELD_NO
- the field number in the
- non-clustered index to
- use first; if not found
- from the index, then
- ULINT_UNDEFINED */
- ibool resolved; /*!< TRUE if the
- meaning of a variable
- or a column has been
- resolved; for literals
- this is always TRUE */
- enum sym_tab_entry token_type; /*!< type of the
- parsed token */
- const char* name; /*!< name of an id */
- ulint name_len; /*!< id name length */
- dict_table_t* table; /*!< table definition
- if a table id or a
- column id */
- ulint col_no; /*!< column number if a
- column */
- sel_buf_t* prefetch_buf; /*!< NULL, or a buffer
- for cached column
- values for prefetched
- rows */
- sel_node_t* cursor_def; /*!< cursor definition
- select node if a
- named cursor */
- ulint param_type; /*!< PARS_INPUT,
- PARS_OUTPUT, or
- PARS_NOT_PARAM if not a
- procedure parameter */
- sym_tab_t* sym_table; /*!< back pointer to
- the symbol table */
- UT_LIST_NODE_T(sym_node_t) sym_list; /*!< list of symbol
- nodes */
-};
-
-/** Symbol table */
-struct sym_tab_struct{
- que_t* query_graph;
- /*!< query graph generated by the
- parser */
- const char* sql_string;
- /*!< SQL string to parse */
- size_t string_len;
- /*!< SQL string length */
- int next_char_pos;
- /*!< position of the next character in
- sql_string to give to the lexical
- analyzer */
- pars_info_t* info; /*!< extra information, or NULL */
- sym_node_list_t sym_list;
- /*!< list of symbol nodes in the symbol
- table */
- UT_LIST_BASE_NODE_T(func_node_t)
- func_node_list;
- /*!< list of function nodes in the
- parsed query graph */
- mem_heap_t* heap; /*!< memory heap from which we can
- allocate space */
-};
-
-#ifndef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/pars0sym.ic b/storage/innodb_plugin/include/pars0sym.ic
deleted file mode 100644
index 9eb09db3a47..00000000000
--- a/storage/innodb_plugin/include/pars0sym.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0sym.ic
-SQL parser symbol table
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/pars0types.h b/storage/innodb_plugin/include/pars0types.h
deleted file mode 100644
index e0a8a86bf07..00000000000
--- a/storage/innodb_plugin/include/pars0types.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0types.h
-SQL parser global types
-
-Created 1/11/1998 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0types_h
-#define pars0types_h
-
-typedef struct pars_info_struct pars_info_t;
-typedef struct pars_user_func_struct pars_user_func_t;
-typedef struct pars_bound_lit_struct pars_bound_lit_t;
-typedef struct pars_bound_id_struct pars_bound_id_t;
-typedef struct sym_node_struct sym_node_t;
-typedef struct sym_tab_struct sym_tab_t;
-typedef struct pars_res_word_struct pars_res_word_t;
-typedef struct func_node_struct func_node_t;
-typedef struct order_node_struct order_node_t;
-typedef struct proc_node_struct proc_node_t;
-typedef struct elsif_node_struct elsif_node_t;
-typedef struct if_node_struct if_node_t;
-typedef struct while_node_struct while_node_t;
-typedef struct for_node_struct for_node_t;
-typedef struct exit_node_struct exit_node_t;
-typedef struct return_node_struct return_node_t;
-typedef struct assign_node_struct assign_node_t;
-typedef struct col_assign_node_struct col_assign_node_t;
-
-typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t;
-
-#endif
diff --git a/storage/innodb_plugin/include/que0que.h b/storage/innodb_plugin/include/que0que.h
deleted file mode 100644
index 420f34550e2..00000000000
--- a/storage/innodb_plugin/include/que0que.h
+++ /dev/null
@@ -1,513 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/que0que.h
-Query graph
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef que0que_h
-#define que0que_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "pars0types.h"
-
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-extern ibool que_trace_on;
-
-/***********************************************************************//**
-Adds a query graph to the session's list of graphs. */
-UNIV_INTERN
-void
-que_graph_publish(
-/*==============*/
- que_t* graph, /*!< in: graph */
- sess_t* sess); /*!< in: session */
-/***********************************************************************//**
-Creates a query graph fork node.
-@return own: fork node */
-UNIV_INTERN
-que_fork_t*
-que_fork_create(
-/*============*/
- que_t* graph, /*!< in: graph, if NULL then this
- fork node is assumed to be the
- graph root */
- que_node_t* parent, /*!< in: parent node */
- ulint fork_type, /*!< in: fork type */
- mem_heap_t* heap); /*!< in: memory heap where created */
-/***********************************************************************//**
-Gets the first thr in a fork. */
-UNIV_INLINE
-que_thr_t*
-que_fork_get_first_thr(
-/*===================*/
- que_fork_t* fork); /*!< in: query fork */
-/***********************************************************************//**
-Gets the child node of the first thr in a fork. */
-UNIV_INLINE
-que_node_t*
-que_fork_get_child(
-/*===============*/
- que_fork_t* fork); /*!< in: query fork */
-/***********************************************************************//**
-Sets the parent of a graph node. */
-UNIV_INLINE
-void
-que_node_set_parent(
-/*================*/
- que_node_t* node, /*!< in: graph node */
- que_node_t* parent);/*!< in: parent */
-/***********************************************************************//**
-Creates a query graph thread node.
-@return own: query thread node */
-UNIV_INTERN
-que_thr_t*
-que_thr_create(
-/*===========*/
- que_fork_t* parent, /*!< in: parent node, i.e., a fork node */
- mem_heap_t* heap); /*!< in: memory heap where created */
-/**********************************************************************//**
-Frees a query graph, but not the heap where it was created. Does not free
-explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
-void
-que_graph_free_recursive(
-/*=====================*/
- que_node_t* node); /*!< in: query graph node */
-/**********************************************************************//**
-Frees a query graph. */
-UNIV_INTERN
-void
-que_graph_free(
-/*===========*/
- que_t* graph); /*!< in: query graph; we assume that the memory
- heap where this graph was created is private
- to this graph: if not, then use
- que_graph_free_recursive and free the heap
- afterwards! */
-/**********************************************************************//**
-Stops a query thread if graph or trx is in a state requiring it. The
-conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved.
-@return TRUE if stopped */
-UNIV_INTERN
-ibool
-que_thr_stop(
-/*=========*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction. */
-UNIV_INTERN
-void
-que_thr_move_to_run_state_for_mysql(
-/*================================*/
- que_thr_t* thr, /*!< in: an query thread */
- trx_t* trx); /*!< in: transaction */
-/**********************************************************************//**
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL
-select, when there is no error or lock wait. */
-UNIV_INTERN
-void
-que_thr_stop_for_mysql_no_error(
-/*============================*/
- que_thr_t* thr, /*!< in: query thread */
- trx_t* trx); /*!< in: transaction */
-/**********************************************************************//**
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
-query thread is stopped and made inactive, except in the case where
-it was put to the lock wait state in lock0lock.c, but the lock has already
-been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
-void
-que_thr_stop_for_mysql(
-/*===================*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Run a query thread. Handles lock waits. */
-UNIV_INTERN
-void
-que_run_threads(
-/*============*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-After signal handling is finished, returns control to a query graph error
-handling routine. (Currently, just returns the control to the root of the
-graph so that the graph can communicate an error message to the client.) */
-UNIV_INTERN
-void
-que_fork_error_handle(
-/*==================*/
- trx_t* trx, /*!< in: trx */
- que_t* fork); /*!< in: query graph which was run before signal
- handling started, NULL not allowed */
-/**********************************************************************//**
-Moves a suspended query thread to the QUE_THR_RUNNING state and releases
-a single worker thread to execute it. This function should be used to end
-the wait state of a query thread waiting for a lock or a stored procedure
-completion. */
-UNIV_INTERN
-void
-que_thr_end_wait(
-/*=============*/
- que_thr_t* thr, /*!< in: query thread in the
- QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
- que_thr_t** next_thr); /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/**********************************************************************//**
-Same as que_thr_end_wait, but no parameter next_thr available. */
-UNIV_INTERN
-void
-que_thr_end_wait_no_next_thr(
-/*=========================*/
- que_thr_t* thr); /*!< in: query thread in the
- QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
-/**********************************************************************//**
-Starts execution of a command in a query fork. Picks a query thread which
-is not in the QUE_THR_RUNNING state and moves it to that state. If none
-can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned.
-@return a query thread of the graph moved to QUE_THR_RUNNING state, or
-NULL; the query thread should be executed by que_run_threads by the
-caller */
-UNIV_INTERN
-que_thr_t*
-que_fork_start_command(
-/*===================*/
- que_fork_t* fork); /*!< in: a query fork */
-/***********************************************************************//**
-Gets the trx of a query thread. */
-UNIV_INLINE
-trx_t*
-thr_get_trx(
-/*========*/
- que_thr_t* thr); /*!< in: query thread */
-/***********************************************************************//**
-Gets the type of a graph node. */
-UNIV_INLINE
-ulint
-que_node_get_type(
-/*==============*/
- que_node_t* node); /*!< in: graph node */
-/***********************************************************************//**
-Gets pointer to the value data type field of a graph node. */
-UNIV_INLINE
-dtype_t*
-que_node_get_data_type(
-/*===================*/
- que_node_t* node); /*!< in: graph node */
-/***********************************************************************//**
-Gets pointer to the value dfield of a graph node. */
-UNIV_INLINE
-dfield_t*
-que_node_get_val(
-/*=============*/
- que_node_t* node); /*!< in: graph node */
-/***********************************************************************//**
-Gets the value buffer size of a graph node.
-@return val buffer size, not defined if val.data == NULL in node */
-UNIV_INLINE
-ulint
-que_node_get_val_buf_size(
-/*======================*/
- que_node_t* node); /*!< in: graph node */
-/***********************************************************************//**
-Sets the value buffer size of a graph node. */
-UNIV_INLINE
-void
-que_node_set_val_buf_size(
-/*======================*/
- que_node_t* node, /*!< in: graph node */
- ulint size); /*!< in: size */
-/*********************************************************************//**
-Gets the next list node in a list of query graph nodes. */
-UNIV_INLINE
-que_node_t*
-que_node_get_next(
-/*==============*/
- que_node_t* node); /*!< in: node in a list */
-/*********************************************************************//**
-Gets the parent node of a query graph node.
-@return parent node or NULL */
-UNIV_INLINE
-que_node_t*
-que_node_get_parent(
-/*================*/
- que_node_t* node); /*!< in: node */
-/****************************************************************//**
-Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop.
-@return containing loop node, or NULL. */
-UNIV_INTERN
-que_node_t*
-que_node_get_containing_loop_node(
-/*==============================*/
- que_node_t* node); /*!< in: node */
-/*********************************************************************//**
-Catenates a query graph node to a list of them, possible empty list.
-@return one-way list of nodes */
-UNIV_INLINE
-que_node_t*
-que_node_list_add_last(
-/*===================*/
- que_node_t* node_list, /*!< in: node list, or NULL */
- que_node_t* node); /*!< in: node */
-/*********************************************************************//**
-Gets a query graph node list length.
-@return length, for NULL list 0 */
-UNIV_INLINE
-ulint
-que_node_list_get_len(
-/*==================*/
- que_node_t* node_list); /*!< in: node list, or NULL */
-/**********************************************************************//**
-Checks if graph, trx, or session is in a state where the query thread should
-be stopped.
-@return TRUE if should be stopped; NOTE that if the peek is made
-without reserving the kernel mutex, then another peek with the mutex
-reserved is necessary before deciding the actual stopping */
-UNIV_INLINE
-ibool
-que_thr_peek_stop(
-/*==============*/
- que_thr_t* thr); /*!< in: query thread */
-/***********************************************************************//**
-Returns TRUE if the query graph is for a SELECT statement.
-@return TRUE if a select */
-UNIV_INLINE
-ibool
-que_graph_is_select(
-/*================*/
- que_t* graph); /*!< in: graph */
-/**********************************************************************//**
-Prints info of an SQL query graph node. */
-UNIV_INTERN
-void
-que_node_print_info(
-/*================*/
- que_node_t* node); /*!< in: query graph node */
-/*********************************************************************//**
-Evaluate the given SQL
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-que_eval_sql(
-/*=========*/
- pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql, /*!< in: SQL string */
- ibool reserve_dict_mutex,
- /*!< in: if TRUE, acquire/release
- dict_sys->mutex around call to pars_sql. */
- trx_t* trx); /*!< in: trx */
-
-/* Query graph query thread node: the fields are protected by the kernel
-mutex with the exceptions named below */
-
-struct que_thr_struct{
- que_common_t common; /*!< type: QUE_NODE_THR */
- ulint magic_n; /*!< magic number to catch memory
- corruption */
- que_node_t* child; /*!< graph child node */
- que_t* graph; /*!< graph where this node belongs */
- ibool is_active; /*!< TRUE if the thread has been set
- to the run state in
- que_thr_move_to_run_state, but not
- deactivated in
- que_thr_dec_reference_count */
- ulint state; /*!< state of the query thread */
- UT_LIST_NODE_T(que_thr_t)
- thrs; /*!< list of thread nodes of the fork
- node */
- UT_LIST_NODE_T(que_thr_t)
- trx_thrs; /*!< lists of threads in wait list of
- the trx */
- UT_LIST_NODE_T(que_thr_t)
- queue; /*!< list of runnable thread nodes in
- the server task queue */
- /*------------------------------*/
- /* The following fields are private to the OS thread executing the
- query thread, and are not protected by the kernel mutex: */
-
- que_node_t* run_node; /*!< pointer to the node where the
- subgraph down from this node is
- currently executed */
- que_node_t* prev_node; /*!< pointer to the node from which
- the control came */
- ulint resource; /*!< resource usage of the query thread
- thus far */
- ulint lock_state; /*!< lock state of thread (table or
- row) */
-};
-
-#define QUE_THR_MAGIC_N 8476583
-#define QUE_THR_MAGIC_FREED 123461526
-
-/* Query graph fork node: its fields are protected by the kernel mutex */
-struct que_fork_struct{
- que_common_t common; /*!< type: QUE_NODE_FORK */
- que_t* graph; /*!< query graph of this node */
- ulint fork_type; /*!< fork type */
- ulint n_active_thrs; /*!< if this is the root of a graph, the
- number query threads that have been
- started in que_thr_move_to_run_state
- but for which que_thr_dec_refer_count
- has not yet been called */
- trx_t* trx; /*!< transaction: this is set only in
- the root node */
- ulint state; /*!< state of the fork node */
- que_thr_t* caller; /*!< pointer to a possible calling query
- thread */
- UT_LIST_BASE_NODE_T(que_thr_t)
- thrs; /*!< list of query threads */
- /*------------------------------*/
- /* The fields in this section are defined only in the root node */
- sym_tab_t* sym_tab; /*!< symbol table of the query,
- generated by the parser, or NULL
- if the graph was created 'by hand' */
- pars_info_t* info; /*!< info struct, or NULL */
- /* The following cur_... fields are relevant only in a select graph */
-
- ulint cur_end; /*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START,
- QUE_CUR_END */
- ulint cur_pos; /*!< if there are n rows in the result
- set, values 0 and n + 1 mean before
- first row, or after last row, depending
- on cur_end; values 1...n mean a row
- index */
- ibool cur_on_row; /*!< TRUE if cursor is on a row, i.e.,
- it is not before the first row or
- after the last row */
- dulint n_inserts; /*!< number of rows inserted */
- dulint n_updates; /*!< number of rows updated */
- dulint n_deletes; /*!< number of rows deleted */
- sel_node_t* last_sel_node; /*!< last executed select node, or NULL
- if none */
- UT_LIST_NODE_T(que_fork_t)
- graphs; /*!< list of query graphs of a session
- or a stored procedure */
- /*------------------------------*/
- mem_heap_t* heap; /*!< memory heap where the fork was
- created */
-
-};
-
-/* Query fork (or graph) types */
-#define QUE_FORK_SELECT_NON_SCROLL 1 /* forward-only cursor */
-#define QUE_FORK_SELECT_SCROLL 2 /* scrollable cursor */
-#define QUE_FORK_INSERT 3
-#define QUE_FORK_UPDATE 4
-#define QUE_FORK_ROLLBACK 5
- /* This is really the undo graph used in rollback,
- no signal-sending roll_node in this graph */
-#define QUE_FORK_PURGE 6
-#define QUE_FORK_EXECUTE 7
-#define QUE_FORK_PROCEDURE 8
-#define QUE_FORK_PROCEDURE_CALL 9
-#define QUE_FORK_MYSQL_INTERFACE 10
-#define QUE_FORK_RECOVERY 11
-
-/* Query fork (or graph) states */
-#define QUE_FORK_ACTIVE 1
-#define QUE_FORK_COMMAND_WAIT 2
-#define QUE_FORK_INVALID 3
-#define QUE_FORK_BEING_FREED 4
-
-/* Flag which is ORed to control structure statement node types */
-#define QUE_NODE_CONTROL_STAT 1024
-
-/* Query graph node types */
-#define QUE_NODE_LOCK 1
-#define QUE_NODE_INSERT 2
-#define QUE_NODE_UPDATE 4
-#define QUE_NODE_CURSOR 5
-#define QUE_NODE_SELECT 6
-#define QUE_NODE_AGGREGATE 7
-#define QUE_NODE_FORK 8
-#define QUE_NODE_THR 9
-#define QUE_NODE_UNDO 10
-#define QUE_NODE_COMMIT 11
-#define QUE_NODE_ROLLBACK 12
-#define QUE_NODE_PURGE 13
-#define QUE_NODE_CREATE_TABLE 14
-#define QUE_NODE_CREATE_INDEX 15
-#define QUE_NODE_SYMBOL 16
-#define QUE_NODE_RES_WORD 17
-#define QUE_NODE_FUNC 18
-#define QUE_NODE_ORDER 19
-#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_ASSIGNMENT 23
-#define QUE_NODE_FETCH 24
-#define QUE_NODE_OPEN 25
-#define QUE_NODE_COL_ASSIGNMENT 26
-#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_RETURN 28
-#define QUE_NODE_ROW_PRINTF 29
-#define QUE_NODE_ELSIF 30
-#define QUE_NODE_CALL 31
-#define QUE_NODE_EXIT 32
-
-/* Query thread states */
-#define QUE_THR_RUNNING 1
-#define QUE_THR_PROCEDURE_WAIT 2
-#define QUE_THR_COMPLETED 3 /* in selects this means that the
- thread is at the end of its result set
- (or start, in case of a scroll cursor);
- in other statements, this means the
- thread has done its task */
-#define QUE_THR_COMMAND_WAIT 4
-#define QUE_THR_LOCK_WAIT 5
-#define QUE_THR_SIG_REPLY_WAIT 6
-#define QUE_THR_SUSPENDED 7
-#define QUE_THR_ERROR 8
-
-/* Query thread lock states */
-#define QUE_THR_LOCK_NOLOCK 0
-#define QUE_THR_LOCK_ROW 1
-#define QUE_THR_LOCK_TABLE 2
-
-/* From where the cursor position is counted */
-#define QUE_CUR_NOT_DEFINED 1
-#define QUE_CUR_START 2
-#define QUE_CUR_END 3
-
-
-#ifndef UNIV_NONINL
-#include "que0que.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/que0que.ic b/storage/innodb_plugin/include/que0que.ic
deleted file mode 100644
index a1c0dc1e77a..00000000000
--- a/storage/innodb_plugin/include/que0que.ic
+++ /dev/null
@@ -1,273 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/que0que.ic
-Query graph
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "usr0sess.h"
-
-/***********************************************************************//**
-Gets the trx of a query thread. */
-UNIV_INLINE
-trx_t*
-thr_get_trx(
-/*========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad(thr);
-
- return(thr->graph->trx);
-}
-
-/***********************************************************************//**
-Gets the first thr in a fork. */
-UNIV_INLINE
-que_thr_t*
-que_fork_get_first_thr(
-/*===================*/
- que_fork_t* fork) /*!< in: query fork */
-{
- return(UT_LIST_GET_FIRST(fork->thrs));
-}
-
-/***********************************************************************//**
-Gets the child node of the first thr in a fork. */
-UNIV_INLINE
-que_node_t*
-que_fork_get_child(
-/*===============*/
- que_fork_t* fork) /*!< in: query fork */
-{
- que_thr_t* thr;
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- return(thr->child);
-}
-
-/***********************************************************************//**
-Gets the type of a graph node. */
-UNIV_INLINE
-ulint
-que_node_get_type(
-/*==============*/
- que_node_t* node) /*!< in: graph node */
-{
- ut_ad(node);
-
- return(((que_common_t*)node)->type);
-}
-
-/***********************************************************************//**
-Gets pointer to the value dfield of a graph node. */
-UNIV_INLINE
-dfield_t*
-que_node_get_val(
-/*=============*/
- que_node_t* node) /*!< in: graph node */
-{
- ut_ad(node);
-
- return(&(((que_common_t*)node)->val));
-}
-
-/***********************************************************************//**
-Gets the value buffer size of a graph node.
-@return val buffer size, not defined if val.data == NULL in node */
-UNIV_INLINE
-ulint
-que_node_get_val_buf_size(
-/*======================*/
- que_node_t* node) /*!< in: graph node */
-{
- ut_ad(node);
-
- return(((que_common_t*)node)->val_buf_size);
-}
-
-/***********************************************************************//**
-Sets the value buffer size of a graph node. */
-UNIV_INLINE
-void
-que_node_set_val_buf_size(
-/*======================*/
- que_node_t* node, /*!< in: graph node */
- ulint size) /*!< in: size */
-{
- ut_ad(node);
-
- ((que_common_t*)node)->val_buf_size = size;
-}
-
-/***********************************************************************//**
-Sets the parent of a graph node. */
-UNIV_INLINE
-void
-que_node_set_parent(
-/*================*/
- que_node_t* node, /*!< in: graph node */
- que_node_t* parent) /*!< in: parent */
-{
- ut_ad(node);
-
- ((que_common_t*)node)->parent = parent;
-}
-
-/***********************************************************************//**
-Gets pointer to the value data type field of a graph node. */
-UNIV_INLINE
-dtype_t*
-que_node_get_data_type(
-/*===================*/
- que_node_t* node) /*!< in: graph node */
-{
- ut_ad(node);
-
- return(dfield_get_type(&((que_common_t*) node)->val));
-}
-
-/*********************************************************************//**
-Catenates a query graph node to a list of them, possible empty list.
-@return one-way list of nodes */
-UNIV_INLINE
-que_node_t*
-que_node_list_add_last(
-/*===================*/
- que_node_t* node_list, /*!< in: node list, or NULL */
- que_node_t* node) /*!< in: node */
-{
- que_common_t* cnode;
- que_common_t* cnode2;
-
- cnode = (que_common_t*) node;
-
- cnode->brother = NULL;
-
- if (node_list == NULL) {
-
- return(node);
- }
-
- cnode2 = (que_common_t*) node_list;
-
- while (cnode2->brother != NULL) {
- cnode2 = (que_common_t*) cnode2->brother;
- }
-
- cnode2->brother = node;
-
- return(node_list);
-}
-
-/*********************************************************************//**
-Gets the next list node in a list of query graph nodes.
-@return next node in a list of nodes */
-UNIV_INLINE
-que_node_t*
-que_node_get_next(
-/*==============*/
- que_node_t* node) /*!< in: node in a list */
-{
- return(((que_common_t*)node)->brother);
-}
-
-/*********************************************************************//**
-Gets a query graph node list length.
-@return length, for NULL list 0 */
-UNIV_INLINE
-ulint
-que_node_list_get_len(
-/*==================*/
- que_node_t* node_list) /*!< in: node list, or NULL */
-{
- const que_common_t* cnode;
- ulint len;
-
- cnode = (const que_common_t*) node_list;
- len = 0;
-
- while (cnode != NULL) {
- len++;
- cnode = (const que_common_t*) cnode->brother;
- }
-
- return(len);
-}
-
-/*********************************************************************//**
-Gets the parent node of a query graph node.
-@return parent node or NULL */
-UNIV_INLINE
-que_node_t*
-que_node_get_parent(
-/*================*/
- que_node_t* node) /*!< in: node */
-{
- return(((que_common_t*)node)->parent);
-}
-
-/**********************************************************************//**
-Checks if graph, trx, or session is in a state where the query thread should
-be stopped.
-@return TRUE if should be stopped; NOTE that if the peek is made
-without reserving the kernel mutex, then another peek with the mutex
-reserved is necessary before deciding the actual stopping */
-UNIV_INLINE
-ibool
-que_thr_peek_stop(
-/*==============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- que_t* graph;
-
- graph = thr->graph;
- trx = graph->trx;
-
- if (graph->state != QUE_FORK_ACTIVE
- || trx->que_state == TRX_QUE_LOCK_WAIT
- || (UT_LIST_GET_LEN(trx->signals) > 0
- && trx->que_state == TRX_QUE_RUNNING)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***********************************************************************//**
-Returns TRUE if the query graph is for a SELECT statement.
-@return TRUE if a select */
-UNIV_INLINE
-ibool
-que_graph_is_select(
-/*================*/
- que_t* graph) /*!< in: graph */
-{
- if (graph->fork_type == QUE_FORK_SELECT_SCROLL
- || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/innodb_plugin/include/que0types.h b/storage/innodb_plugin/include/que0types.h
deleted file mode 100644
index ea976074768..00000000000
--- a/storage/innodb_plugin/include/que0types.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/que0types.h
-Query graph global types
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef que0types_h
-#define que0types_h
-
-#include "data0data.h"
-#include "dict0types.h"
-
-/* Pseudotype for all graph nodes */
-typedef void que_node_t;
-
-typedef struct que_fork_struct que_fork_t;
-
-/* Query graph root is a fork node */
-typedef que_fork_t que_t;
-
-typedef struct que_thr_struct que_thr_t;
-typedef struct que_common_struct que_common_t;
-
-/* Common struct at the beginning of each query graph node; the name of this
-substruct must be 'common' */
-
-struct que_common_struct{
- ulint type; /*!< query node type */
- que_node_t* parent; /*!< back pointer to parent node, or NULL */
- que_node_t* brother;/* pointer to a possible brother node */
- dfield_t val; /*!< evaluated value for an expression */
- ulint val_buf_size;
- /* buffer size for the evaluated value data,
- if the buffer has been allocated dynamically:
- if this field is != 0, and the node is a
- symbol node or a function node, then we
- have to free the data field in val
- explicitly */
-};
-
-#endif
diff --git a/storage/innodb_plugin/include/read0read.h b/storage/innodb_plugin/include/read0read.h
deleted file mode 100644
index 4d9a9fade36..00000000000
--- a/storage/innodb_plugin/include/read0read.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.h
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0read_h
-#define read0read_h
-
-#include "univ.i"
-
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "trx0trx.h"
-#include "read0types.h"
-
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or ut_dulint_zero
- used in purge */
- mem_heap_t* heap); /*!< in: memory heap from which
- allocated */
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, or opens a new. The view
-must be closed with ..._close.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_oldest_copy_or_open_new(
-/*==============================*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or ut_dulint_zero
- used in purge */
- mem_heap_t* heap); /*!< in: memory heap from which
- allocated */
-/*********************************************************************//**
-Closes a read view. */
-UNIV_INTERN
-void
-read_view_close(
-/*============*/
- read_view_t* view); /*!< in: read view */
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
-void
-read_view_close_for_mysql(
-/*======================*/
- trx_t* trx); /*!< in: trx which has a read view */
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return TRUE if sees */
-UNIV_INLINE
-ibool
-read_view_sees_trx_id(
-/*==================*/
- const read_view_t* view, /*!< in: read view */
- trx_id_t trx_id);/*!< in: trx id */
-/*********************************************************************//**
-Prints a read view to stderr. */
-UNIV_INTERN
-void
-read_view_print(
-/*============*/
- const read_view_t* view); /*!< in: read view */
-/*********************************************************************//**
-Create a consistent cursor view for mysql to be used in cursors. In this
-consistent read view modifications done by the creating transaction or future
-transactions are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
- trx_t* cr_trx);/*!< in: trx where cursor view is created */
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
- trx_t* trx, /*!< in: trx */
- cursor_view_t* curview); /*!< in: cursor view to be closed */
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
- trx_t* trx, /*!< in: transaction where cursor is set */
- cursor_view_t* curview);/*!< in: consistent cursor view to be set */
-
-/** Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
-
-struct read_view_struct{
- ulint type; /*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
- undo_no_t undo_no;/*!< ut_dulint_zero or if type is
- VIEW_HIGH_GRANULARITY
- transaction undo_no when this high-granularity
- consistent read view was created */
- trx_id_t low_limit_no;
- /*!< The view does not need to see the undo
- logs for transactions whose transaction number
- is strictly smaller (<) than this value: they
- can be removed in purge if not needed by other
- views */
- trx_id_t low_limit_id;
- /*!< The read should not see any transaction
- with trx id >= this value. In other words,
- this is the "high water mark". */
- trx_id_t up_limit_id;
- /*!< The read should see all trx ids which
- are strictly smaller (<) than this value.
- In other words,
- this is the "low water mark". */
- ulint n_trx_ids;
- /*!< Number of cells in the trx_ids array */
- trx_id_t* trx_ids;/*!< Additional trx ids which the read should
- not see: typically, these are the active
- transactions at the time when the read is
- serialized, except the reading transaction
- itself; the trx ids in this array are in a
- descending order. These trx_ids should be
- between the "low" and "high" water marks,
- that is, up_limit_id and low_limit_id. */
- trx_id_t creator_trx_id;
- /*!< trx id of creating transaction, or
- ut_dulint_zero used in purge */
- UT_LIST_NODE_T(read_view_t) view_list;
- /*!< List of read views in trx_sys */
-};
-
-/** Read view types @{ */
-#define VIEW_NORMAL 1 /*!< Normal consistent read view
- where transaction does not see changes
- made by active transactions except
- creating transaction. */
-#define VIEW_HIGH_GRANULARITY 2 /*!< High-granularity read view where
- transaction does not see changes
- made by active transactions and own
- changes after a point in time when this
- read view was created. */
-/* @} */
-
-/** Implement InnoDB framework to support consistent read views in
-cursors. This struct holds both heap where consistent read view
-is allocated and pointer to a read view. */
-
-struct cursor_view_struct{
- mem_heap_t* heap;
- /*!< Memory heap for the cursor view */
- read_view_t* read_view;
- /*!< Consistent read view of the cursor*/
- ulint n_mysql_tables_in_use;
- /*!< number of Innobase tables used in the
- processing of this cursor */
-};
-
-#ifndef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/read0read.ic b/storage/innodb_plugin/include/read0read.ic
deleted file mode 100644
index 9924967cc2d..00000000000
--- a/storage/innodb_plugin/include/read0read.ic
+++ /dev/null
@@ -1,98 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.ic
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-/*********************************************************************//**
-Gets the nth trx id in a read view.
-@return trx id */
-UNIV_INLINE
-trx_id_t
-read_view_get_nth_trx_id(
-/*=====================*/
- const read_view_t* view, /*!< in: read view */
- ulint n) /*!< in: position */
-{
- ut_ad(n < view->n_trx_ids);
-
- return(*(view->trx_ids + n));
-}
-
-/*********************************************************************//**
-Sets the nth trx id in a read view. */
-UNIV_INLINE
-void
-read_view_set_nth_trx_id(
-/*=====================*/
- read_view_t* view, /*!< in: read view */
- ulint n, /*!< in: position */
- trx_id_t trx_id) /*!< in: trx id to set */
-{
- ut_ad(n < view->n_trx_ids);
-
- *(view->trx_ids + n) = trx_id;
-}
-
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return TRUE if sees */
-UNIV_INLINE
-ibool
-read_view_sees_trx_id(
-/*==================*/
- const read_view_t* view, /*!< in: read view */
- trx_id_t trx_id) /*!< in: trx id */
-{
- ulint n_ids;
- int cmp;
- ulint i;
-
- if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) {
-
- return(TRUE);
- }
-
- if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) {
-
- return(FALSE);
- }
-
- /* We go through the trx ids in the array smallest first: this order
- may save CPU time, because if there was a very long running
- transaction in the trx id array, its trx id is looked at first, and
- the first two comparisons may well decide the visibility of trx_id. */
-
- n_ids = view->n_trx_ids;
-
- for (i = 0; i < n_ids; i++) {
-
- cmp = ut_dulint_cmp(
- trx_id,
- read_view_get_nth_trx_id(view, n_ids - i - 1));
- if (cmp <= 0) {
- return(cmp < 0);
- }
- }
-
- return(TRUE);
-}
diff --git a/storage/innodb_plugin/include/read0types.h b/storage/innodb_plugin/include/read0types.h
deleted file mode 100644
index caf69e3fb51..00000000000
--- a/storage/innodb_plugin/include/read0types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0types.h
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0types_h
-#define read0types_h
-
-typedef struct read_view_struct read_view_t;
-typedef struct cursor_view_struct cursor_view_t;
-
-#endif
diff --git a/storage/innodb_plugin/include/rem0cmp.h b/storage/innodb_plugin/include/rem0cmp.h
deleted file mode 100644
index 072f74267ea..00000000000
--- a/storage/innodb_plugin/include/rem0cmp.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/rem0cmp.h
-Comparison services for records
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-#ifndef rem0cmp_h
-#define rem0cmp_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "data0type.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
-
-/*************************************************************//**
-Returns TRUE if two columns are equal for comparison purposes.
-@return TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
-ibool
-cmp_cols_are_equal(
-/*===============*/
- const dict_col_t* col1, /*!< in: column 1 */
- const dict_col_t* col2, /*!< in: column 2 */
- ibool check_charsets);
- /*!< in: whether to check charsets */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
- const dfield_t* dfield1,/*!< in: data field; must have type field set */
- const dfield_t* dfield2);/*!< in: data field */
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
-int
-cmp_dtuple_rec_with_match(
-/*======================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns,
- contains the value for current comparison */
- ulint* matched_bytes); /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns, contains the
- value for current comparison */
-/**************************************************************//**
-Compares a data tuple to a physical record.
-@see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
-int
-cmp_dtuple_rec(
-/*===========*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**************************************************************//**
-Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record.
-@return TRUE if prefix */
-UNIV_INTERN
-ibool
-cmp_dtuple_is_prefix_of_rec(
-/*========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
-none of which are stored externally.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
-UNIV_INTERN
-int
-cmp_rec_rec_simple(
-/*===============*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
- const dict_index_t* index); /*!< in: data dictionary index */
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
-int
-cmp_rec_rec_with_match(
-/*===================*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /*!< in: data dictionary index */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when the function returns,
- contains the value the for current
- comparison */
- ulint* matched_bytes);/*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when the function returns, contains
- the value for the current comparison */
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index); /*!< in: data dictionary index */
-
-
-#ifndef UNIV_NONINL
-#include "rem0cmp.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/rem0cmp.ic b/storage/innodb_plugin/include/rem0cmp.ic
deleted file mode 100644
index 39ef5f4fba3..00000000000
--- a/storage/innodb_plugin/include/rem0cmp.ic
+++ /dev/null
@@ -1,91 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/rem0cmp.ic
-Comparison services for records
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /*!< in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
-}
-
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
- const dfield_t* dfield1,/*!< in: data field; must have type field set */
- const dfield_t* dfield2)/*!< in: data field */
-{
- const dtype_t* type;
-
- ut_ad(dfield_check_typed(dfield1));
-
- type = dfield_get_type(dfield1);
-
- return(cmp_data_data(type->mtype, type->prtype,
- (const byte*) dfield_get_data(dfield1),
- dfield_get_len(dfield1),
- (const byte*) dfield_get_data(dfield2),
- dfield_get_len(dfield2)));
-}
-
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index) /*!< in: data dictionary index */
-{
- ulint match_f = 0;
- ulint match_b = 0;
-
- return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
- &match_f, &match_b));
-}
diff --git a/storage/innodb_plugin/include/rem0rec.h b/storage/innodb_plugin/include/rem0rec.h
deleted file mode 100644
index 17d08afabb9..00000000000
--- a/storage/innodb_plugin/include/rem0rec.h
+++ /dev/null
@@ -1,824 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/rem0rec.h
-Record manager
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef rem0rec_h
-#define rem0rec_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "rem0types.h"
-#include "mtr0types.h"
-#include "page0types.h"
-
-/* Info bit denoting the predefined minimum record: this bit is set
-if and only if the record is the first user record on a non-leaf
-B-tree page that is the leftmost page on its level
-(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
-#define REC_INFO_MIN_REC_FLAG 0x10UL
-/* The deleted flag in info bits */
-#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
- record has been delete marked */
-
-/* Number of extra bytes in an old-style record,
-in addition to the data and the offsets */
-#define REC_N_OLD_EXTRA_BYTES 6
-/* Number of extra bytes in a new-style record,
-in addition to the data and the offsets */
-#define REC_N_NEW_EXTRA_BYTES 5
-
-/* Record status values */
-#define REC_STATUS_ORDINARY 0
-#define REC_STATUS_NODE_PTR 1
-#define REC_STATUS_INFIMUM 2
-#define REC_STATUS_SUPREMUM 3
-
-/* The following four constants are needed in page0zip.c in order to
-efficiently compress and decompress pages. */
-
-/* The offset of heap_no in a compact record */
-#define REC_NEW_HEAP_NO 4
-/* The shift of heap_no in a compact record.
-The status is stored in the low-order bits. */
-#define REC_HEAP_NO_SHIFT 3
-
-/* Length of a B-tree node pointer, in bytes */
-#define REC_NODE_PTR_SIZE 4
-
-#ifdef UNIV_DEBUG
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 4
-#else /* UNIV_DEBUG */
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 2
-#endif /* UNIV_DEBUG */
-
-/* Number of elements that should be initially allocated for the
-offsets[] array, first passed to rec_get_offsets() */
-#define REC_OFFS_NORMAL_SIZE 100
-#define REC_OFFS_SMALL_SIZE 10
-
-/******************************************************//**
-The following function is used to get the pointer of the next chained record
-on the same page.
-@return pointer to the next chained record, or NULL if none */
-UNIV_INLINE
-const rec_t*
-rec_get_next_ptr_const(
-/*===================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
-/******************************************************//**
-The following function is used to get the pointer of the next chained record
-on the same page.
-@return pointer to the next chained record, or NULL if none */
-UNIV_INLINE
-rec_t*
-rec_get_next_ptr(
-/*=============*/
- rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
-/******************************************************//**
-The following function is used to get the offset of the
-next chained record on the same page.
-@return the page offset of the next chained record, or 0 if none */
-UNIV_INLINE
-ulint
-rec_get_next_offs(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
-/******************************************************//**
-The following function is used to set the next record offset field
-of an old-style record. */
-UNIV_INLINE
-void
-rec_set_next_offs_old(
-/*==================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint next); /*!< in: offset of the next record */
-/******************************************************//**
-The following function is used to set the next record offset field
-of a new-style record. */
-UNIV_INLINE
-void
-rec_set_next_offs_new(
-/*==================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- ulint next); /*!< in: offset of the next record */
-/******************************************************//**
-The following function is used to get the number of fields
-in an old-style record.
-@return number of data fields */
-UNIV_INLINE
-ulint
-rec_get_n_fields_old(
-/*=================*/
- const rec_t* rec); /*!< in: physical record */
-/******************************************************//**
-The following function is used to get the number of fields
-in a record.
-@return number of data fields */
-UNIV_INLINE
-ulint
-rec_get_n_fields(
-/*=============*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index); /*!< in: record descriptor */
-/******************************************************//**
-The following function is used to get the number of records owned by the
-previous directory record.
-@return number of owned records */
-UNIV_INLINE
-ulint
-rec_get_n_owned_old(
-/*================*/
- const rec_t* rec); /*!< in: old-style physical record */
-/******************************************************//**
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned_old(
-/*================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint n_owned); /*!< in: the number of owned */
-/******************************************************//**
-The following function is used to get the number of records owned by the
-previous directory record.
-@return number of owned records */
-UNIV_INLINE
-ulint
-rec_get_n_owned_new(
-/*================*/
- const rec_t* rec); /*!< in: new-style physical record */
-/******************************************************//**
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned_new(
-/*================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n_owned);/*!< in: the number of owned */
-/******************************************************//**
-The following function is used to retrieve the info bits of
-a record.
-@return info bits */
-UNIV_INLINE
-ulint
-rec_get_info_bits(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
-/******************************************************//**
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits_old(
-/*==================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint bits); /*!< in: info bits */
-/******************************************************//**
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits_new(
-/*==================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- ulint bits); /*!< in: info bits */
-/******************************************************//**
-The following function retrieves the status bits of a new-style record.
-@return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
- const rec_t* rec); /*!< in: physical record */
-
-/******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
- rec_t* rec, /*!< in/out: physical record */
- ulint bits); /*!< in: info bits */
-
-/******************************************************//**
-The following function is used to retrieve the info and status
-bits of a record. (Only compact records have status bits.)
-@return info bits */
-UNIV_INLINE
-ulint
-rec_get_info_and_status_bits(
-/*=========================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
-/******************************************************//**
-The following function is used to set the info and status
-bits of a record. (Only compact records have status bits.) */
-UNIV_INLINE
-void
-rec_set_info_and_status_bits(
-/*=========================*/
- rec_t* rec, /*!< in/out: compact physical record */
- ulint bits); /*!< in: info bits */
-
-/******************************************************//**
-The following function tells if record is delete marked.
-@return nonzero if delete marked */
-UNIV_INLINE
-ulint
-rec_get_deleted_flag(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
-/******************************************************//**
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag_old(
-/*=====================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint flag); /*!< in: nonzero if delete marked */
-/******************************************************//**
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag_new(
-/*=====================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint flag); /*!< in: nonzero if delete marked */
-/******************************************************//**
-The following function tells if a new-style record is a node pointer.
-@return TRUE if node pointer */
-UNIV_INLINE
-ibool
-rec_get_node_ptr_flag(
-/*==================*/
- const rec_t* rec); /*!< in: physical record */
-/******************************************************//**
-The following function is used to get the order number
-of an old-style record in the heap of the index page.
-@return heap order number */
-UNIV_INLINE
-ulint
-rec_get_heap_no_old(
-/*================*/
- const rec_t* rec); /*!< in: physical record */
-/******************************************************//**
-The following function is used to set the heap number
-field in an old-style record. */
-UNIV_INLINE
-void
-rec_set_heap_no_old(
-/*================*/
- rec_t* rec, /*!< in: physical record */
- ulint heap_no);/*!< in: the heap number */
-/******************************************************//**
-The following function is used to get the order number
-of a new-style record in the heap of the index page.
-@return heap order number */
-UNIV_INLINE
-ulint
-rec_get_heap_no_new(
-/*================*/
- const rec_t* rec); /*!< in: physical record */
-/******************************************************//**
-The following function is used to set the heap number
-field in a new-style record. */
-UNIV_INLINE
-void
-rec_set_heap_no_new(
-/*================*/
- rec_t* rec, /*!< in/out: physical record */
- ulint heap_no);/*!< in: the heap number */
-/******************************************************//**
-The following function is used to test whether the data offsets
-in the record are stored in one-byte or two-byte format.
-@return TRUE if 1-byte form */
-UNIV_INLINE
-ibool
-rec_get_1byte_offs_flag(
-/*====================*/
- const rec_t* rec); /*!< in: physical record */
-
-/******************************************************//**
-Determine how many of the first n columns in a compact
-physical record are stored externally.
-@return number of externally stored columns */
-UNIV_INTERN
-ulint
-rec_get_n_extern_new(
-/*=================*/
- const rec_t* rec, /*!< in: compact physical record */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n); /*!< in: number of columns to scan */
-
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array.
-@return the new offsets */
-UNIV_INTERN
-ulint*
-rec_get_offsets_func(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: array consisting of
- offsets[0] allocated elements,
- or an array from rec_get_offsets(),
- or NULL */
- ulint n_fields,/*!< in: maximum number of
- initialized fields
- (ULINT_UNDEFINED if all fields) */
- mem_heap_t** heap, /*!< in/out: memory heap */
- const char* file, /*!< in: file name where called */
- ulint line); /*!< in: line number where called */
-
-#define rec_get_offsets(rec,index,offsets,n,heap) \
- rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
-
-/******************************************************//**
-Determine the offset to each field in a leaf-page record
-in ROW_FORMAT=COMPACT. This is a special case of
-rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
-void
-rec_init_offsets_comp_ordinary(
-/*===========================*/
- const rec_t* rec, /*!< in: physical record in
- ROW_FORMAT=COMPACT */
- ulint extra, /*!< in: number of bytes to reserve
- between the record header and
- the data payload
- (usually REC_N_NEW_EXTRA_BYTES) */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets);/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array. */
-UNIV_INTERN
-void
-rec_get_offsets_reverse(
-/*====================*/
- const byte* extra, /*!< in: the extra bytes of a
- compact record in reverse order,
- excluding the fixed-size
- REC_N_NEW_EXTRA_BYTES */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint node_ptr,/*!< in: nonzero=node pointer,
- 0=leaf node */
- ulint* offsets);/*!< in/out: array consisting of
- offsets[0] allocated elements */
-
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
- const rec_t* rec, /*!< in: record or NULL */
- const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets);/*!< in: array returned by
- rec_get_offsets() */
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets);/*!< in: array returned by
- rec_get_offsets() */
-#else
-# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
-#endif /* UNIV_DEBUG */
-
-/************************************************************//**
-The following function is used to get the offset to the nth
-data field in an old-style record.
-@return offset to the field */
-UNIV_INTERN
-ulint
-rec_get_nth_field_offs_old(
-/*=======================*/
- const rec_t* rec, /*!< in: record */
- ulint n, /*!< in: index of the field */
- ulint* len); /*!< out: length of the field; UNIV_SQL_NULL
- if SQL null */
-#define rec_get_nth_field_old(rec, n, len) \
-((rec) + rec_get_nth_field_offs_old(rec, n, len))
-/************************************************************//**
-Gets the physical size of an old-style field.
-Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size.
-@return field size in bytes */
-UNIV_INLINE
-ulint
-rec_get_nth_field_size(
-/*===================*/
- const rec_t* rec, /*!< in: record */
- ulint n); /*!< in: index of the field */
-/************************************************************//**
-The following function is used to get an offset to the nth
-data field in a record.
-@return offset from the origin of rec */
-UNIV_INLINE
-ulint
-rec_get_nth_field_offs(
-/*===================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n, /*!< in: index of the field */
- ulint* len); /*!< out: length of the field; UNIV_SQL_NULL
- if SQL null */
-#define rec_get_nth_field(rec, offsets, n, len) \
-((rec) + rec_get_nth_field_offs(offsets, n, len))
-/******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/******************************************************//**
-Returns nonzero if the extern bit is set in nth field of rec.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
-/******************************************************//**
-Gets the physical size of a field.
-@return length of field */
-UNIV_INLINE
-ulint
-rec_offs_nth_size(
-/*==============*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
-
-/******************************************************//**
-Returns the number of extern bits set in a record.
-@return number of externally stored fields */
-UNIV_INLINE
-ulint
-rec_offs_n_extern(
-/*==============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***********************************************************//**
-This is used to modify the value of an already existing field in a record.
-The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null.
-For records in ROW_FORMAT=COMPACT (new-style records), len must not be
-UNIV_SQL_NULL unless the field already is SQL null. */
-UNIV_INLINE
-void
-rec_set_nth_field(
-/*==============*/
- rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n, /*!< in: index number of the field */
- const void* data, /*!< in: pointer to the data if not SQL null */
- ulint len); /*!< in: length of the data or UNIV_SQL_NULL */
-/**********************************************************//**
-The following function returns the data size of an old-style physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_get_data_size_old(
-/*==================*/
- const rec_t* rec); /*!< in: physical record */
-/**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
- const ulint* offsets);/*!< in: array for rec_get_offsets() */
-/**********************************************************//**
-The following function sets the number of allocated elements
-for an array of offsets. */
-UNIV_INLINE
-void
-rec_offs_set_n_alloc(
-/*=================*/
- ulint* offsets, /*!< out: array for rec_get_offsets(),
- must be allocated */
- ulint n_alloc); /*!< in: number of elements */
-#define rec_offs_init(offsets) \
- rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
-/**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**********************************************************//**
-The following function returns the data size of a physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_data_size(
-/*===============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**********************************************************//**
-Returns the total size of record minus data size of record.
-The value returned by the function is the distance from record
-start to record origin in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_extra_size(
-/*================*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**********************************************************//**
-Returns the total size of a physical record.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_size(
-/*==========*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**********************************************************//**
-Returns a pointer to the start of the record.
-@return pointer to start */
-UNIV_INLINE
-byte*
-rec_get_start(
-/*==========*/
- rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**********************************************************//**
-Returns a pointer to the end of the record.
-@return pointer to end */
-UNIV_INLINE
-byte*
-rec_get_end(
-/*========*/
- rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return pointer to the origin of the copy */
-UNIV_INLINE
-rec_t*
-rec_copy(
-/*=====*/
- void* buf, /*!< in: buffer */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-#ifndef UNIV_HOTBACKUP
-/**************************************************************//**
-Copies the first n fields of a physical record to a new physical record in
-a buffer.
-@return own: copied record */
-UNIV_INTERN
-rec_t*
-rec_copy_prefix_to_buf(
-/*===================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- byte** buf, /*!< in/out: memory buffer
- for the copied prefix,
- or NULL */
- ulint* buf_size); /*!< in/out: buffer size */
-/************************************************************//**
-Folds a prefix of a physical record to a ulint.
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
-/*=====*/
- const rec_t* rec, /*!< in: the physical record */
- const ulint* offsets, /*!< in: array returned by
- rec_get_offsets() */
- ulint n_fields, /*!< in: number of complete
- fields to fold */
- ulint n_bytes, /*!< in: number of bytes to fold
- in an incomplete last field */
- dulint tree_id) /*!< in: index tree id */
- __attribute__((pure));
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************//**
-Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
-void
-rec_convert_dtuple_to_rec_comp(
-/*===========================*/
- rec_t* rec, /*!< in: origin of record */
- ulint extra, /*!< in: number of bytes to
- reserve between the record
- header and the data payload
- (normally REC_N_NEW_EXTRA_BYTES) */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint status, /*!< in: status bits of the record */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields);/*!< in: number of data fields */
-/*********************************************************//**
-Builds a physical record out of a data tuple and
-stores it into the given buffer.
-@return pointer to the origin of physical record */
-UNIV_INTERN
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
- byte* buf, /*!< in: start address of the
- physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext); /*!< in: number of
- externally stored columns */
-/**********************************************************//**
-Returns the extra size of an old-style physical record if we know its
-data size and number of fields.
-@return extra size */
-UNIV_INLINE
-ulint
-rec_get_converted_extra_size(
-/*=========================*/
- ulint data_size, /*!< in: data size */
- ulint n_fields, /*!< in: number of fields */
- ulint n_ext) /*!< in: number of externally stored columns */
- __attribute__((const));
-/**********************************************************//**
-Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_comp_prefix(
-/*===============================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra); /*!< out: extra size */
-/**********************************************************//**
-Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_comp(
-/*========================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
- ulint status, /*!< in: status bits of the record */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra); /*!< out: extra size */
-/**********************************************************//**
-The following function returns the size of a data tuple when converted to
-a physical record.
-@return size */
-UNIV_INLINE
-ulint
-rec_get_converted_size(
-/*===================*/
- dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext); /*!< in: number of externally stored columns */
-#ifndef UNIV_HOTBACKUP
-/**************************************************************//**
-Copies the first n fields of a physical record to a data tuple.
-The fields are copied to the memory heap. */
-UNIV_INTERN
-void
-rec_copy_prefix_to_dtuple(
-/*======================*/
- dtuple_t* tuple, /*!< out: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- mem_heap_t* heap); /*!< in: memory heap */
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-Validates the consistency of a physical record.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-rec_validate(
-/*=========*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***************************************************************//**
-Prints an old-style physical record. */
-UNIV_INTERN
-void
-rec_print_old(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec); /*!< in: physical record */
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
-record header. */
-UNIV_INTERN
-void
-rec_print_comp(
-/*===========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***************************************************************//**
-Prints a physical record. */
-UNIV_INTERN
-void
-rec_print_new(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***************************************************************//**
-Prints a physical record. */
-UNIV_INTERN
-void
-rec_print(
-/*======*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- dict_index_t* index); /*!< in: record descriptor */
-#endif /* UNIV_HOTBACKUP */
-
-#define REC_INFO_BITS 6 /* This is single byte bit-field */
-
-/* Maximum lengths for the data in a physical record if the offsets
-are given in one byte (resp. two byte) format. */
-#define REC_1BYTE_OFFS_LIMIT 0x7FUL
-#define REC_2BYTE_OFFS_LIMIT 0x7FFFUL
-
-/* The data size of record must be smaller than this because we reserve
-two upmost bits in a two byte offset for special purposes */
-#define REC_MAX_DATA_SIZE (16 * 1024)
-
-#ifndef UNIV_NONINL
-#include "rem0rec.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/rem0rec.ic b/storage/innodb_plugin/include/rem0rec.ic
deleted file mode 100644
index 8e5bd9a7fcd..00000000000
--- a/storage/innodb_plugin/include/rem0rec.ic
+++ /dev/null
@@ -1,1647 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/rem0rec.ic
-Record manager
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mach0data.h"
-#include "ut0byte.h"
-#include "dict0dict.h"
-
-/* Compact flag ORed to the extra size returned by rec_get_offsets() */
-#define REC_OFFS_COMPACT ((ulint) 1 << 31)
-/* SQL NULL flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_SQL_NULL ((ulint) 1 << 31)
-/* External flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_EXTERNAL ((ulint) 1 << 30)
-/* Mask for offsets returned by rec_get_offsets() */
-#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1)
-
-/* Offsets of the bit-fields in an old-style record. NOTE! In the table the
-most significant bytes and bits are written below less significant.
-
- (1) byte offset (2) bit usage within byte
- downward from
- origin -> 1 8 bits pointer to next record
- 2 8 bits pointer to next record
- 3 1 bit short flag
- 7 bits number of fields
- 4 3 bits number of fields
- 5 bits heap number
- 5 8 bits heap number
- 6 4 bits n_owned
- 4 bits info bits
-*/
-
-/* Offsets of the bit-fields in a new-style record. NOTE! In the table the
-most significant bytes and bits are written below less significant.
-
- (1) byte offset (2) bit usage within byte
- downward from
- origin -> 1 8 bits relative offset of next record
- 2 8 bits relative offset of next record
- the relative offset is an unsigned 16-bit
- integer:
- (offset_of_next_record
- - offset_of_this_record) mod 64Ki,
- where mod is the modulo as a non-negative
- number;
- we can calculate the offset of the next
- record with the formula:
- relative_offset + offset_of_this_record
- mod UNIV_PAGE_SIZE
- 3 3 bits status:
- 000=conventional record
- 001=node pointer record (inside B-tree)
- 010=infimum record
- 011=supremum record
- 1xx=reserved
- 5 bits heap number
- 4 8 bits heap number
- 5 4 bits n_owned
- 4 bits info bits
-*/
-
-/* We list the byte offsets from the origin of the record, the mask,
-and the shift needed to obtain each bit-field of the record. */
-
-#define REC_NEXT 2
-#define REC_NEXT_MASK 0xFFFFUL
-#define REC_NEXT_SHIFT 0
-
-#define REC_OLD_SHORT 3 /* This is single byte bit-field */
-#define REC_OLD_SHORT_MASK 0x1UL
-#define REC_OLD_SHORT_SHIFT 0
-
-#define REC_OLD_N_FIELDS 4
-#define REC_OLD_N_FIELDS_MASK 0x7FEUL
-#define REC_OLD_N_FIELDS_SHIFT 1
-
-#define REC_NEW_STATUS 3 /* This is single byte bit-field */
-#define REC_NEW_STATUS_MASK 0x7UL
-#define REC_NEW_STATUS_SHIFT 0
-
-#define REC_OLD_HEAP_NO 5
-#define REC_HEAP_NO_MASK 0xFFF8UL
-#if 0 /* defined in rem0rec.h for use of page0zip.c */
-#define REC_NEW_HEAP_NO 4
-#define REC_HEAP_NO_SHIFT 3
-#endif
-
-#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */
-#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */
-#define REC_N_OWNED_MASK 0xFUL
-#define REC_N_OWNED_SHIFT 0
-
-#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */
-#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */
-#define REC_INFO_BITS_MASK 0xF0UL
-#define REC_INFO_BITS_SHIFT 0
-
-/* The following masks are used to filter the SQL null bit from
-one-byte and two-byte offsets */
-
-#define REC_1BYTE_SQL_NULL_MASK 0x80UL
-#define REC_2BYTE_SQL_NULL_MASK 0x8000UL
-
-/* In a 2-byte offset the second most significant bit denotes
-a field stored to another page: */
-
-#define REC_2BYTE_EXTERN_MASK 0x4000UL
-
-#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
- ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
- ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
- ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \
- ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \
- ^ 0xFFFFFFFFUL
-# error "sum of old-style masks != 0xFFFFFFFFUL"
-#endif
-#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \
- ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \
- ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \
- ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \
- ^ 0xFFFFFFUL
-# error "sum of new-style masks != 0xFFFFFFUL"
-#endif
-
-/***********************************************************//**
-Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
-void
-rec_set_nth_field_null_bit(
-/*=======================*/
- rec_t* rec, /*!< in: record */
- ulint i, /*!< in: ith field */
- ibool val); /*!< in: value to set */
-/***********************************************************//**
-Sets an old-style record field to SQL null.
-The physical size of the field is not changed. */
-UNIV_INTERN
-void
-rec_set_nth_field_sql_null(
-/*=======================*/
- rec_t* rec, /*!< in: record */
- ulint n); /*!< in: index of the field */
-
-/******************************************************//**
-Gets a bit field from within 1 byte. */
-UNIV_INLINE
-ulint
-rec_get_bit_field_1(
-/*================*/
- const rec_t* rec, /*!< in: pointer to record origin */
- ulint offs, /*!< in: offset from the origin down */
- ulint mask, /*!< in: mask used to filter bits */
- ulint shift) /*!< in: shift right applied after masking */
-{
- ut_ad(rec);
-
- return((mach_read_from_1(rec - offs) & mask) >> shift);
-}
-
-/******************************************************//**
-Sets a bit field within 1 byte. */
-UNIV_INLINE
-void
-rec_set_bit_field_1(
-/*================*/
- rec_t* rec, /*!< in: pointer to record origin */
- ulint val, /*!< in: value to set */
- ulint offs, /*!< in: offset from the origin down */
- ulint mask, /*!< in: mask used to filter bits */
- ulint shift) /*!< in: shift right applied after masking */
-{
- ut_ad(rec);
- ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
- ut_ad(mask);
- ut_ad(mask <= 0xFFUL);
- ut_ad(((mask >> shift) << shift) == mask);
- ut_ad(((val << shift) & mask) == (val << shift));
-
- mach_write_to_1(rec - offs,
- (mach_read_from_1(rec - offs) & ~mask)
- | (val << shift));
-}
-
-/******************************************************//**
-Gets a bit field from within 2 bytes. */
-UNIV_INLINE
-ulint
-rec_get_bit_field_2(
-/*================*/
- const rec_t* rec, /*!< in: pointer to record origin */
- ulint offs, /*!< in: offset from the origin down */
- ulint mask, /*!< in: mask used to filter bits */
- ulint shift) /*!< in: shift right applied after masking */
-{
- ut_ad(rec);
-
- return((mach_read_from_2(rec - offs) & mask) >> shift);
-}
-
-/******************************************************//**
-Sets a bit field within 2 bytes. */
-UNIV_INLINE
-void
-rec_set_bit_field_2(
-/*================*/
- rec_t* rec, /*!< in: pointer to record origin */
- ulint val, /*!< in: value to set */
- ulint offs, /*!< in: offset from the origin down */
- ulint mask, /*!< in: mask used to filter bits */
- ulint shift) /*!< in: shift right applied after masking */
-{
- ut_ad(rec);
- ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
- ut_ad(mask > 0xFFUL);
- ut_ad(mask <= 0xFFFFUL);
- ut_ad((mask >> shift) & 1);
- ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
- ut_ad(((mask >> shift) << shift) == mask);
- ut_ad(((val << shift) & mask) == (val << shift));
-
- mach_write_to_2(rec - offs,
- (mach_read_from_2(rec - offs) & ~mask)
- | (val << shift));
-}
-
-/******************************************************//**
-The following function is used to get the pointer of the next chained record
-on the same page.
-@return pointer to the next chained record, or NULL if none */
-UNIV_INLINE
-const rec_t*
-rec_get_next_ptr_const(
-/*===================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- ulint field_value;
-
- ut_ad(REC_NEXT_MASK == 0xFFFFUL);
- ut_ad(REC_NEXT_SHIFT == 0);
-
- field_value = mach_read_from_2(rec - REC_NEXT);
-
- if (UNIV_UNLIKELY(field_value == 0)) {
-
- return(NULL);
- }
-
- if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
-#if UNIV_PAGE_SIZE <= 32768
- /* Note that for 64 KiB pages, field_value can 'wrap around'
- and the debug assertion is not valid */
-
- /* In the following assertion, field_value is interpreted
- as signed 16-bit integer in 2's complement arithmetics.
- If all platforms defined int16_t in the standard headers,
- the expression could be written simpler as
- (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
- */
- ut_ad((field_value >= 32768
- ? field_value - 65536
- : field_value)
- + ut_align_offset(rec, UNIV_PAGE_SIZE)
- < UNIV_PAGE_SIZE);
-#endif
- /* There must be at least REC_N_NEW_EXTRA_BYTES + 1
- between each record. */
- ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
- && field_value < 32768)
- || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
-
- return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
- + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
- } else {
- ut_ad(field_value < UNIV_PAGE_SIZE);
-
- return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
- + field_value);
- }
-}
-
-/******************************************************//**
-The following function is used to get the pointer of the next chained record
-on the same page.
-@return pointer to the next chained record, or NULL if none */
-UNIV_INLINE
-rec_t*
-rec_get_next_ptr(
-/*=============*/
- rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- return((rec_t*) rec_get_next_ptr_const(rec, comp));
-}
-
-/******************************************************//**
-The following function is used to get the offset of the next chained record
-on the same page.
-@return the page offset of the next chained record, or 0 if none */
-UNIV_INLINE
-ulint
-rec_get_next_offs(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- ulint field_value;
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
- field_value = mach_read_from_2(rec - REC_NEXT);
-
- if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
-#if UNIV_PAGE_SIZE <= 32768
- /* Note that for 64 KiB pages, field_value can 'wrap around'
- and the debug assertion is not valid */
-
- /* In the following assertion, field_value is interpreted
- as signed 16-bit integer in 2's complement arithmetics.
- If all platforms defined int16_t in the standard headers,
- the expression could be written simpler as
- (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
- */
- ut_ad((field_value >= 32768
- ? field_value - 65536
- : field_value)
- + ut_align_offset(rec, UNIV_PAGE_SIZE)
- < UNIV_PAGE_SIZE);
-#endif
- if (UNIV_UNLIKELY(field_value == 0)) {
-
- return(0);
- }
-
- /* There must be at least REC_N_NEW_EXTRA_BYTES + 1
- between each record. */
- ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
- && field_value < 32768)
- || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
-
- return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
- } else {
- ut_ad(field_value < UNIV_PAGE_SIZE);
-
- return(field_value);
- }
-}
-
-/******************************************************//**
-The following function is used to set the next record offset field
-of an old-style record. */
-UNIV_INLINE
-void
-rec_set_next_offs_old(
-/*==================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint next) /*!< in: offset of the next record */
-{
- ut_ad(rec);
- ut_ad(UNIV_PAGE_SIZE > next);
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
- mach_write_to_2(rec - REC_NEXT, next);
-}
-
-/******************************************************//**
-The following function is used to set the next record offset field
-of a new-style record. */
-UNIV_INLINE
-void
-rec_set_next_offs_new(
-/*==================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- ulint next) /*!< in: offset of the next record */
-{
- ulint field_value;
-
- ut_ad(rec);
- ut_ad(UNIV_PAGE_SIZE > next);
-
- if (UNIV_UNLIKELY(!next)) {
- field_value = 0;
- } else {
- /* The following two statements calculate
- next - offset_of_rec mod 64Ki, where mod is the modulo
- as a non-negative number */
-
- field_value = (ulint)
- ((lint) next
- - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
- field_value &= REC_NEXT_MASK;
- }
-
- mach_write_to_2(rec - REC_NEXT, field_value);
-}
-
-/******************************************************//**
-The following function is used to get the number of fields
-in an old-style record.
-@return number of data fields */
-UNIV_INLINE
-ulint
-rec_get_n_fields_old(
-/*=================*/
- const rec_t* rec) /*!< in: physical record */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS,
- REC_OLD_N_FIELDS_MASK,
- REC_OLD_N_FIELDS_SHIFT);
- ut_ad(ret <= REC_MAX_N_FIELDS);
- ut_ad(ret > 0);
-
- return(ret);
-}
-
-/******************************************************//**
-The following function is used to set the number of fields
-in an old-style record. */
-UNIV_INLINE
-void
-rec_set_n_fields_old(
-/*=================*/
- rec_t* rec, /*!< in: physical record */
- ulint n_fields) /*!< in: the number of fields */
-{
- ut_ad(rec);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields > 0);
-
- rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS,
- REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
-}
-
-/******************************************************//**
-The following function retrieves the status bits of a new-style record.
-@return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
- const rec_t* rec) /*!< in: physical record */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
- REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
- ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
-
- return(ret);
-}
-
-/******************************************************//**
-The following function is used to get the number of fields
-in a record.
-@return number of data fields */
-UNIV_INLINE
-ulint
-rec_get_n_fields(
-/*=============*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index) /*!< in: record descriptor */
-{
- ut_ad(rec);
- ut_ad(index);
-
- if (!dict_table_is_comp(index->table)) {
- return(rec_get_n_fields_old(rec));
- }
-
- switch (rec_get_status(rec)) {
- case REC_STATUS_ORDINARY:
- return(dict_index_get_n_fields(index));
- case REC_STATUS_NODE_PTR:
- return(dict_index_get_n_unique_in_tree(index) + 1);
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- return(1);
- default:
- ut_error;
- return(ULINT_UNDEFINED);
- }
-}
-
-/******************************************************//**
-The following function is used to get the number of records owned by the
-previous directory record.
-@return number of owned records */
-UNIV_INLINE
-ulint
-rec_get_n_owned_old(
-/*================*/
- const rec_t* rec) /*!< in: old-style physical record */
-{
- return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned_old(
-/*================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint n_owned) /*!< in: the number of owned */
-{
- rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to get the number of records owned by the
-previous directory record.
-@return number of owned records */
-UNIV_INLINE
-ulint
-rec_get_n_owned_new(
-/*================*/
- const rec_t* rec) /*!< in: new-style physical record */
-{
- return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned_new(
-/*================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n_owned)/*!< in: the number of owned */
-{
- rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
- if (UNIV_LIKELY_NULL(page_zip)
- && UNIV_LIKELY(rec_get_status(rec)
- != REC_STATUS_SUPREMUM)) {
- page_zip_rec_set_owned(page_zip, rec, n_owned);
- }
-}
-
-/******************************************************//**
-The following function is used to retrieve the info bits of a record.
-@return info bits */
-UNIV_INLINE
-ulint
-rec_get_info_bits(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- return(rec_get_bit_field_1(
- rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits_old(
-/*==================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint bits) /*!< in: info bits */
-{
- rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
-}
-/******************************************************//**
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits_new(
-/*==================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- ulint bits) /*!< in: info bits */
-{
- rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
- rec_t* rec, /*!< in/out: physical record */
- ulint bits) /*!< in: info bits */
-{
- rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
- REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to retrieve the info and status
-bits of a record. (Only compact records have status bits.)
-@return info bits */
-UNIV_INLINE
-ulint
-rec_get_info_and_status_bits(
-/*=========================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- ulint bits;
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
- if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
- bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
- } else {
- bits = rec_get_info_bits(rec, FALSE);
- ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
- }
- return(bits);
-}
-/******************************************************//**
-The following function is used to set the info and status
-bits of a record. (Only compact records have status bits.) */
-UNIV_INLINE
-void
-rec_set_info_and_status_bits(
-/*=========================*/
- rec_t* rec, /*!< in/out: physical record */
- ulint bits) /*!< in: info bits */
-{
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
- rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
- rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
-}
-
-/******************************************************//**
-The following function tells if record is delete marked.
-@return nonzero if delete marked */
-UNIV_INLINE
-ulint
-rec_get_deleted_flag(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
- return(UNIV_UNLIKELY(
- rec_get_bit_field_1(rec, REC_NEW_INFO_BITS,
- REC_INFO_DELETED_FLAG,
- REC_INFO_BITS_SHIFT)));
- } else {
- return(UNIV_UNLIKELY(
- rec_get_bit_field_1(rec, REC_OLD_INFO_BITS,
- REC_INFO_DELETED_FLAG,
- REC_INFO_BITS_SHIFT)));
- }
-}
-
-/******************************************************//**
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag_old(
-/*=====================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint flag) /*!< in: nonzero if delete marked */
-{
- ulint val;
-
- val = rec_get_info_bits(rec, FALSE);
-
- if (flag) {
- val |= REC_INFO_DELETED_FLAG;
- } else {
- val &= ~REC_INFO_DELETED_FLAG;
- }
-
- rec_set_info_bits_old(rec, val);
-}
-
-/******************************************************//**
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag_new(
-/*=====================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint flag) /*!< in: nonzero if delete marked */
-{
- ulint val;
-
- val = rec_get_info_bits(rec, TRUE);
-
- if (flag) {
- val |= REC_INFO_DELETED_FLAG;
- } else {
- val &= ~REC_INFO_DELETED_FLAG;
- }
-
- rec_set_info_bits_new(rec, val);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- page_zip_rec_set_deleted(page_zip, rec, flag);
- }
-}
-
-/******************************************************//**
-The following function tells if a new-style record is a node pointer.
-@return TRUE if node pointer */
-UNIV_INLINE
-ibool
-rec_get_node_ptr_flag(
-/*==================*/
- const rec_t* rec) /*!< in: physical record */
-{
- return(REC_STATUS_NODE_PTR == rec_get_status(rec));
-}
-
-/******************************************************//**
-The following function is used to get the order number
-of an old-style record in the heap of the index page.
-@return heap order number */
-UNIV_INLINE
-ulint
-rec_get_heap_no_old(
-/*================*/
- const rec_t* rec) /*!< in: physical record */
-{
- return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the heap number
-field in an old-style record. */
-UNIV_INLINE
-void
-rec_set_heap_no_old(
-/*================*/
- rec_t* rec, /*!< in: physical record */
- ulint heap_no)/*!< in: the heap number */
-{
- rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to get the order number
-of a new-style record in the heap of the index page.
-@return heap order number */
-UNIV_INLINE
-ulint
-rec_get_heap_no_new(
-/*================*/
- const rec_t* rec) /*!< in: physical record */
-{
- return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the heap number
-field in a new-style record. */
-UNIV_INLINE
-void
-rec_set_heap_no_new(
-/*================*/
- rec_t* rec, /*!< in/out: physical record */
- ulint heap_no)/*!< in: the heap number */
-{
- rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to test whether the data offsets in the record
-are stored in one-byte or two-byte format.
-@return TRUE if 1-byte form */
-UNIV_INLINE
-ibool
-rec_get_1byte_offs_flag(
-/*====================*/
- const rec_t* rec) /*!< in: physical record */
-{
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
-
- return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
- REC_OLD_SHORT_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the 1-byte offsets flag. */
-UNIV_INLINE
-void
-rec_set_1byte_offs_flag(
-/*====================*/
- rec_t* rec, /*!< in: physical record */
- ibool flag) /*!< in: TRUE if 1byte form */
-{
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
- ut_ad(flag <= TRUE);
-
- rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
- REC_OLD_SHORT_SHIFT);
-}
-
-/******************************************************//**
-Returns the offset of nth field end if the record is stored in the 1-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value.
-@return offset of the start of the field, SQL null flag ORed */
-UNIV_INLINE
-ulint
-rec_1_get_field_end_info(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1)));
-}
-
-/******************************************************//**
-Returns the offset of nth field end if the record is stored in the 2-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value.
-@return offset of the start of the field, SQL null flag and extern
-storage flag ORed */
-UNIV_INLINE
-ulint
-rec_2_get_field_end_info(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
-}
-
-/* Get the base address of offsets. The extra_size is stored at
-this position, and following positions hold the end offsets of
-the fields. */
-#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
-
-/**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
- const ulint* offsets)/*!< in: array for rec_get_offsets() */
-{
- ulint n_alloc;
- ut_ad(offsets);
- n_alloc = offsets[0];
- ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
- UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
- return(n_alloc);
-}
-
-/**********************************************************//**
-The following function sets the number of allocated elements
-for an array of offsets. */
-UNIV_INLINE
-void
-rec_offs_set_n_alloc(
-/*=================*/
- ulint* offsets, /*!< out: array for rec_get_offsets(),
- must be allocated */
- ulint n_alloc) /*!< in: number of elements */
-{
- ut_ad(offsets);
- ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
- UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets);
- offsets[0] = n_alloc;
-}
-
-/**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- ut_ad(offsets);
- n_fields = offsets[1];
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields + REC_OFFS_HEADER_SIZE
- <= rec_offs_get_n_alloc(offsets));
- return(n_fields);
-}
-
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
- const rec_t* rec, /*!< in: record or NULL */
- const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
-{
- ulint i = rec_offs_n_fields(offsets);
- ulint last = ULINT_MAX;
- ulint comp = *rec_offs_base(offsets) & REC_OFFS_COMPACT;
-
- if (rec) {
- ut_ad((ulint) rec == offsets[2]);
- if (!comp) {
- ut_a(rec_get_n_fields_old(rec) >= i);
- }
- }
- if (index) {
- ulint max_n_fields;
- ut_ad((ulint) index == offsets[3]);
- max_n_fields = ut_max(
- dict_index_get_n_fields(index),
- dict_index_get_n_unique_in_tree(index) + 1);
- if (comp && rec) {
- switch (rec_get_status(rec)) {
- case REC_STATUS_ORDINARY:
- break;
- case REC_STATUS_NODE_PTR:
- max_n_fields = dict_index_get_n_unique_in_tree(
- index) + 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- max_n_fields = 1;
- break;
- default:
- ut_error;
- }
- }
- /* index->n_def == 0 for dummy indexes if !comp */
- ut_a(!comp || index->n_def);
- ut_a(!index->n_def || i <= max_n_fields);
- }
- while (i--) {
- ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
- ut_a(curr <= last);
- last = curr;
- }
- return(TRUE);
-}
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
-{
- ut_ad(rec);
- ut_ad(index);
- ut_ad(offsets);
- ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
- offsets[2] = (ulint) rec;
- offsets[3] = (ulint) index;
-}
-#endif /* UNIV_DEBUG */
-
-/************************************************************//**
-The following function is used to get an offset to the nth
-data field in a record.
-@return offset from the origin of rec */
-UNIV_INLINE
-ulint
-rec_get_nth_field_offs(
-/*===================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n, /*!< in: index of the field */
- ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
- if SQL null */
-{
- ulint offs;
- ulint length;
- ut_ad(n < rec_offs_n_fields(offsets));
- ut_ad(len);
-
- if (UNIV_UNLIKELY(n == 0)) {
- offs = 0;
- } else {
- offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK;
- }
-
- length = rec_offs_base(offsets)[1 + n];
-
- if (length & REC_OFFS_SQL_NULL) {
- length = UNIV_SQL_NULL;
- } else {
- length &= REC_OFFS_MASK;
- length -= offs;
- }
-
- *len = length;
- return(offs);
-}
-
-/******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
-}
-
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
-}
-
-/******************************************************//**
-Returns nonzero if the extern bit is set in nth field of rec.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n]
- & REC_OFFS_EXTERNAL));
-}
-
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n]
- & REC_OFFS_SQL_NULL));
-}
-
-/******************************************************//**
-Gets the physical size of a field.
-@return length of field */
-UNIV_INLINE
-ulint
-rec_offs_nth_size(
-/*==============*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- if (!n) {
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK);
- }
- return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n])
- & REC_OFFS_MASK);
-}
-
-/******************************************************//**
-Returns the number of extern bits set in a record.
-@return number of externally stored fields */
-UNIV_INLINE
-ulint
-rec_offs_n_extern(
-/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n = 0;
-
- if (rec_offs_any_extern(offsets)) {
- ulint i;
-
- for (i = rec_offs_n_fields(offsets); i--; ) {
- if (rec_offs_nth_extern(offsets, i)) {
- n++;
- }
- }
- }
-
- return(n);
-}
-
-/******************************************************//**
-Returns the offset of n - 1th field end if the record is stored in the 1-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value. This function and the 2-byte counterpart are defined here because the
-C-compiler was not able to sum negative and positive constant offsets, and
-warned of constant arithmetic overflow within the compiler.
-@return offset of the start of the PREVIOUS field, SQL null flag ORed */
-UNIV_INLINE
-ulint
-rec_1_get_prev_field_end_info(
-/*==========================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n)));
-}
-
-/******************************************************//**
-Returns the offset of n - 1th field end if the record is stored in the 2-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value.
-@return offset of the start of the PREVIOUS field, SQL null flag ORed */
-UNIV_INLINE
-ulint
-rec_2_get_prev_field_end_info(
-/*==========================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n)));
-}
-
-/******************************************************//**
-Sets the field end info for the nth field if the record is stored in the
-1-byte format. */
-UNIV_INLINE
-void
-rec_1_set_field_end_info(
-/*=====================*/
- rec_t* rec, /*!< in: record */
- ulint n, /*!< in: field index */
- ulint info) /*!< in: value to set */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info);
-}
-
-/******************************************************//**
-Sets the field end info for the nth field if the record is stored in the
-2-byte format. */
-UNIV_INLINE
-void
-rec_2_set_field_end_info(
-/*=====================*/
- rec_t* rec, /*!< in: record */
- ulint n, /*!< in: field index */
- ulint info) /*!< in: value to set */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info);
-}
-
-/******************************************************//**
-Returns the offset of nth field start if the record is stored in the 1-byte
-offsets form.
-@return offset of the start of the field */
-UNIV_INLINE
-ulint
-rec_1_get_field_start_offs(
-/*=======================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- return(rec_1_get_prev_field_end_info(rec, n)
- & ~REC_1BYTE_SQL_NULL_MASK);
-}
-
-/******************************************************//**
-Returns the offset of nth field start if the record is stored in the 2-byte
-offsets form.
-@return offset of the start of the field */
-UNIV_INLINE
-ulint
-rec_2_get_field_start_offs(
-/*=======================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- return(rec_2_get_prev_field_end_info(rec, n)
- & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
-}
-
-/******************************************************//**
-The following function is used to read the offset of the start of a data field
-in the record. The start of an SQL null field is the end offset of the
-previous non-null field, or 0, if none exists. If n is the number of the last
-field + 1, then the end offset of the last field is returned.
-@return offset of the start of the field */
-UNIV_INLINE
-ulint
-rec_get_field_start_offs(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(rec);
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- return(rec_1_get_field_start_offs(rec, n));
- }
-
- return(rec_2_get_field_start_offs(rec, n));
-}
-
-/************************************************************//**
-Gets the physical size of an old-style field.
-Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size.
-@return field size in bytes */
-UNIV_INLINE
-ulint
-rec_get_nth_field_size(
-/*===================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: index of the field */
-{
- ulint os;
- ulint next_os;
-
- os = rec_get_field_start_offs(rec, n);
- next_os = rec_get_field_start_offs(rec, n + 1);
-
- ut_ad(next_os - os < UNIV_PAGE_SIZE);
-
- return(next_os - os);
-}
-
-/***********************************************************//**
-This is used to modify the value of an already existing field in a record.
-The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null.
-For records in ROW_FORMAT=COMPACT (new-style records), len must not be
-UNIV_SQL_NULL unless the field already is SQL null. */
-UNIV_INLINE
-void
-rec_set_nth_field(
-/*==============*/
- rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n, /*!< in: index number of the field */
- const void* data, /*!< in: pointer to the data
- if not SQL null */
- ulint len) /*!< in: length of the data or UNIV_SQL_NULL */
-{
- byte* data2;
- ulint len2;
-
- ut_ad(rec);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) {
- if (!rec_offs_nth_sql_null(offsets, n)) {
- ut_a(!rec_offs_comp(offsets));
- rec_set_nth_field_sql_null(rec, n);
- }
-
- return;
- }
-
- data2 = rec_get_nth_field(rec, offsets, n, &len2);
- if (len2 == UNIV_SQL_NULL) {
- ut_ad(!rec_offs_comp(offsets));
- rec_set_nth_field_null_bit(rec, n, FALSE);
- ut_ad(len == rec_get_nth_field_size(rec, n));
- } else {
- ut_ad(len2 == len);
- }
-
- ut_memcpy(data2, data, len);
-}
-
-/**********************************************************//**
-The following function returns the data size of an old-style physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_get_data_size_old(
-/*==================*/
- const rec_t* rec) /*!< in: physical record */
-{
- ut_ad(rec);
-
- return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec)));
-}
-
-/**********************************************************//**
-The following function sets the number of fields in offsets. */
-UNIV_INLINE
-void
-rec_offs_set_n_fields(
-/*==================*/
- ulint* offsets, /*!< in/out: array returned by
- rec_get_offsets() */
- ulint n_fields) /*!< in: number of fields */
-{
- ut_ad(offsets);
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields + REC_OFFS_HEADER_SIZE
- <= rec_offs_get_n_alloc(offsets));
- offsets[1] = n_fields;
-}
-
-/**********************************************************//**
-The following function returns the data size of a physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_data_size(
-/*===============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint size;
-
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
- & REC_OFFS_MASK;
- ut_ad(size < UNIV_PAGE_SIZE);
- return(size);
-}
-
-/**********************************************************//**
-Returns the total size of record minus data size of record. The value
-returned by the function is the distance from record start to record origin
-in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_extra_size(
-/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint size;
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL);
- ut_ad(size < UNIV_PAGE_SIZE);
- return(size);
-}
-
-/**********************************************************//**
-Returns the total size of a physical record.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_size(
-/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
-}
-
-/**********************************************************//**
-Returns a pointer to the end of the record.
-@return pointer to end */
-UNIV_INLINE
-byte*
-rec_get_end(
-/*========*/
- rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(rec + rec_offs_data_size(offsets));
-}
-
-/**********************************************************//**
-Returns a pointer to the start of the record.
-@return pointer to start */
-UNIV_INLINE
-byte*
-rec_get_start(
-/*==========*/
- rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(rec - rec_offs_extra_size(offsets));
-}
-
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return pointer to the origin of the copy */
-UNIV_INLINE
-rec_t*
-rec_copy(
-/*=====*/
- void* buf, /*!< in: buffer */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint extra_len;
- ulint data_len;
-
- ut_ad(rec && buf);
- ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
- ut_ad(rec_validate(rec, offsets));
-
- extra_len = rec_offs_extra_size(offsets);
- data_len = rec_offs_data_size(offsets);
-
- ut_memcpy(buf, rec - extra_len, extra_len + data_len);
-
- return((byte*)buf + extra_len);
-}
-
-/**********************************************************//**
-Returns the extra size of an old-style physical record if we know its
-data size and number of fields.
-@return extra size */
-UNIV_INLINE
-ulint
-rec_get_converted_extra_size(
-/*=========================*/
- ulint data_size, /*!< in: data size */
- ulint n_fields, /*!< in: number of fields */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
-
- return(REC_N_OLD_EXTRA_BYTES + n_fields);
- }
-
- return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields);
-}
-
-/**********************************************************//**
-The following function returns the size of a data tuple when converted to
-a physical record.
-@return size */
-UNIV_INLINE
-ulint
-rec_get_converted_size(
-/*===================*/
- dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- ulint data_size;
- ulint extra_size;
-
- ut_ad(index);
- ut_ad(dtuple);
- ut_ad(dtuple_check_typed(dtuple));
-
- ut_ad(index->type & DICT_UNIVERSAL
- || dtuple_get_n_fields(dtuple)
- == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
- == REC_STATUS_NODE_PTR)
- ? dict_index_get_n_unique_in_tree(index) + 1
- : dict_index_get_n_fields(index)));
-
- if (dict_table_is_comp(index->table)) {
- return(rec_get_converted_size_comp(index,
- dtuple_get_info_bits(dtuple)
- & REC_NEW_STATUS_MASK,
- dtuple->fields,
- dtuple->n_fields, NULL));
- }
-
- data_size = dtuple_get_data_size(dtuple, 0);
-
- extra_size = rec_get_converted_extra_size(
- data_size, dtuple_get_n_fields(dtuple), n_ext);
-
- return(data_size + extra_size);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Folds a prefix of a physical record to a ulint. Folds only existing fields,
-that is, checks that we do not run out of the record.
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
-/*=====*/
- const rec_t* rec, /*!< in: the physical record */
- const ulint* offsets, /*!< in: array returned by
- rec_get_offsets() */
- ulint n_fields, /*!< in: number of complete
- fields to fold */
- ulint n_bytes, /*!< in: number of bytes to fold
- in an incomplete last field */
- dulint tree_id) /*!< in: index tree id */
-{
- ulint i;
- const byte* data;
- ulint len;
- ulint fold;
- ulint n_fields_rec;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(rec_validate(rec, offsets));
- ut_ad(n_fields + n_bytes > 0);
-
- n_fields_rec = rec_offs_n_fields(offsets);
- ut_ad(n_fields <= n_fields_rec);
- ut_ad(n_fields < n_fields_rec || n_bytes == 0);
-
- if (n_fields > n_fields_rec) {
- n_fields = n_fields_rec;
- }
-
- if (n_fields == n_fields_rec) {
- n_bytes = 0;
- }
-
- fold = ut_fold_dulint(tree_id);
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- if (n_bytes > 0) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- if (len > n_bytes) {
- len = n_bytes;
- }
-
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- return(fold);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/rem0types.h b/storage/innodb_plugin/include/rem0types.h
deleted file mode 100644
index 8b84d4af233..00000000000
--- a/storage/innodb_plugin/include/rem0types.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/rem0types.h
-Record manager global types
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef rem0types_h
-#define rem0types_h
-
-/* We define the physical record simply as an array of bytes */
-typedef byte rec_t;
-
-/* Maximum values for various fields (for non-blob tuples) */
-#define REC_MAX_N_FIELDS (1024 - 1)
-#define REC_MAX_HEAP_NO (2 * 8192 - 1)
-#define REC_MAX_N_OWNED (16 - 1)
-
-/* REC_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length). It is set to 3*256,
-so that one can create a column prefix index on 256 characters of a
-TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
-a character may take at most 3 bytes.
-This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
-files would be at risk! */
-#define REC_MAX_INDEX_COL_LEN 768
-
-#endif
diff --git a/storage/innodb_plugin/include/row0ins.h b/storage/innodb_plugin/include/row0ins.h
deleted file mode 100644
index 9f93565ddb7..00000000000
--- a/storage/innodb_plugin/include/row0ins.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0ins.h
-Insert into a table
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0ins_h
-#define row0ins_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-
-/***************************************************************//**
-Checks if foreign key constraint fails for an index entry. Sets shared locks
-which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_foreign_key_check_lock.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
-DB_ROW_IS_REFERENCED */
-UNIV_INTERN
-ulint
-row_ins_check_foreign_constraint(
-/*=============================*/
- ibool check_ref,/*!< in: TRUE If we want to check that
- the referenced table is ok, FALSE if we
- want to check the foreign key table */
- dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the
- tables mentioned in it must be in the
- dictionary cache if they exist at all */
- dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign
- table, else the referenced table */
- dtuple_t* entry, /*!< in: index entry for index */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-Creates an insert node struct.
-@return own: insert node struct */
-UNIV_INTERN
-ins_node_t*
-ins_node_create(
-/*============*/
- ulint ins_type, /*!< in: INS_VALUES, ... */
- dict_table_t* table, /*!< in: table where to insert */
- mem_heap_t* heap); /*!< in: mem heap where created */
-/*********************************************************************//**
-Sets a new row to insert for an INS_DIRECT node. This function is only used
-if we have constructed the row separately, which is a rare case; this
-function is quite slow. */
-UNIV_INTERN
-void
-ins_node_set_new_row(
-/*=================*/
- ins_node_t* node, /*!< in: insert node */
- dtuple_t* row); /*!< in: new row (or first row) for the node */
-/***************************************************************//**
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
-ulint
-row_ins_index_entry(
-/*================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- ibool foreign,/*!< in: TRUE=check foreign key constraints */
- que_thr_t* thr); /*!< in: query thread */
-/***********************************************************//**
-Inserts a row to a table. This is a high-level function used in
-SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_ins_step(
-/*=========*/
- que_thr_t* thr); /*!< in: query thread */
-/***********************************************************//**
-Creates an entry template for each index of a table. */
-UNIV_INTERN
-void
-ins_node_create_entry_list(
-/*=======================*/
- ins_node_t* node); /*!< in: row insert node */
-
-/* Insert node structure */
-
-struct ins_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_INSERT */
- ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
- dtuple_t* row; /*!< row to insert */
- dict_table_t* table; /*!< table where to insert */
- sel_node_t* select; /*!< select in searched insert */
- que_node_t* values_list;/* list of expressions to evaluate and
- insert in an INS_VALUES insert */
- ulint state; /*!< node execution state */
- dict_index_t* index; /*!< NULL, or the next index where the index
- entry should be inserted */
- dtuple_t* entry; /*!< NULL, or entry to insert in the index;
- after a successful insert of the entry,
- this should be reset to NULL */
- UT_LIST_BASE_NODE_T(dtuple_t)
- entry_list;/* list of entries, one for each index */
- byte* row_id_buf;/* buffer for the row id sys field in row */
- trx_id_t trx_id; /*!< trx id or the last trx which executed the
- node */
- byte* trx_id_buf;/* buffer for the trx id sys field in row */
- mem_heap_t* entry_sys_heap;
- /* memory heap used as auxiliary storage;
- entry_list and sys fields are stored here;
- if this is NULL, entry list should be created
- and buffers for sys fields in row allocated */
- ulint magic_n;
-};
-
-#define INS_NODE_MAGIC_N 15849075
-
-/* Insert node types */
-#define INS_SEARCHED 0 /* INSERT INTO ... SELECT ... */
-#define INS_VALUES 1 /* INSERT INTO ... VALUES ... */
-#define INS_DIRECT 2 /* this is for internal use in dict0crea:
- insert the row directly */
-
-/* Node execution states */
-#define INS_NODE_SET_IX_LOCK 1 /* we should set an IX lock on table */
-#define INS_NODE_ALLOC_ROW_ID 2 /* row id should be allocated */
-#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and
- inserted */
-
-#ifndef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0ins.ic b/storage/innodb_plugin/include/row0ins.ic
deleted file mode 100644
index 84f6da255bf..00000000000
--- a/storage/innodb_plugin/include/row0ins.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0ins.ic
-Insert into a table
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/innodb_plugin/include/row0mysql.h b/storage/innodb_plugin/include/row0mysql.h
deleted file mode 100644
index b05241f00f8..00000000000
--- a/storage/innodb_plugin/include/row0mysql.h
+++ /dev/null
@@ -1,784 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0mysql.h
-Interface between Innobase row operations and MySQL.
-Contains also create table and other data dictionary operations.
-
-Created 9/17/2000 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0mysql_h
-#define row0mysql_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-#include "btr0pcur.h"
-#include "trx0types.h"
-
-extern ibool row_rollback_on_timeout;
-
-typedef struct row_prebuilt_struct row_prebuilt_t;
-
-/*******************************************************************//**
-Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
-void
-row_mysql_prebuilt_free_blob_heap(
-/*==============================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a
- ha_innobase:: table handle */
-/*******************************************************************//**
-Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format.
-@return pointer to the data, we skip the 1 or 2 bytes at the start
-that are used to store the len */
-UNIV_INTERN
-byte*
-row_mysql_store_true_var_len(
-/*=========================*/
- byte* dest, /*!< in: where to store */
- ulint len, /*!< in: length, must fit in two bytes */
- ulint lenlen);/*!< in: storage length of len: either 1 or 2 bytes */
-/*******************************************************************//**
-Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data.
-@return pointer to the data, we skip the 1 or 2 bytes at the start
-that are used to store the len */
-UNIV_INTERN
-const byte*
-row_mysql_read_true_varchar(
-/*========================*/
- ulint* len, /*!< out: variable-length field length */
- const byte* field, /*!< in: field in the MySQL format */
- ulint lenlen);/*!< in: storage length of len: either 1
- or 2 bytes */
-/*******************************************************************//**
-Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
-void
-row_mysql_store_blob_ref(
-/*=====================*/
- byte* dest, /*!< in: where to store */
- ulint col_len,/*!< in: dest buffer size: determines into
- how many bytes the BLOB length is stored,
- the space for the length may vary from 1
- to 4 bytes */
- const void* data, /*!< in: BLOB data; if the value to store
- is SQL NULL this should be NULL pointer */
- ulint len); /*!< in: BLOB length; if the value to store
- is SQL NULL this should be 0; remember
- also to set the NULL bit in the MySQL record
- header! */
-/*******************************************************************//**
-Reads a reference to a BLOB in the MySQL format.
-@return pointer to BLOB data */
-UNIV_INTERN
-const byte*
-row_mysql_read_blob_ref(
-/*====================*/
- ulint* len, /*!< out: BLOB length */
- const byte* ref, /*!< in: BLOB reference in the
- MySQL format */
- ulint col_len); /*!< in: BLOB reference length
- (not BLOB length) */
-/**************************************************************//**
-Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
-The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c.
-@return up to which byte we used buf in the conversion */
-UNIV_INTERN
-byte*
-row_mysql_store_col_in_innobase_format(
-/*===================================*/
- dfield_t* dfield, /*!< in/out: dfield where dtype
- information must be already set when
- this function is called! */
- byte* buf, /*!< in/out: buffer for a converted
- integer value; this must be at least
- col_len long then! */
- ibool row_format_col, /*!< TRUE if the mysql_data is from
- a MySQL row, FALSE if from a MySQL
- key value;
- in MySQL, a true VARCHAR storage
- format differs in a row and in a
- key value: in a key value the length
- is always stored in 2 bytes! */
- const byte* mysql_data, /*!< in: MySQL column value, not
- SQL NULL; NOTE that dfield may also
- get a pointer to mysql_data,
- therefore do not discard this as long
- as dfield is used! */
- ulint col_len, /*!< in: MySQL column length; NOTE that
- this is the storage length of the
- column in the MySQL format row, not
- necessarily the length of the actual
- payload data; if the column is a true
- VARCHAR then this is irrelevant */
- ulint comp); /*!< in: nonzero=compact format */
-/****************************************************************//**
-Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running the
-query thread */
-UNIV_INTERN
-ibool
-row_mysql_handle_errors(
-/*====================*/
- ulint* new_err,/*!< out: possible new error encountered in
- rollback, or the old error which was
- during the function entry */
- trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept);/*!< in: savepoint */
-/********************************************************************//**
-Create a prebuilt struct for a MySQL table handle.
-@return own: a prebuilt struct */
-UNIV_INTERN
-row_prebuilt_t*
-row_create_prebuilt(
-/*================*/
- dict_table_t* table); /*!< in: Innobase table handle */
-/********************************************************************//**
-Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
-void
-row_prebuilt_free(
-/*==============*/
- row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
- ibool dict_locked); /*!< in: TRUE=data dictionary locked */
-/*********************************************************************//**
-Updates the transaction pointers in query graphs stored in the prebuilt
-struct. */
-UNIV_INTERN
-void
-row_update_prebuilt_trx(
-/*====================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
- in MySQL handle */
- trx_t* trx); /*!< in: transaction handle */
-/*********************************************************************//**
-Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
-function should be called at the the end of an SQL statement, by the
-connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
-void
-row_unlock_table_autoinc_for_mysql(
-/*===============================*/
- trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
-Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
-AUTO_INC lock gives exclusive access to the auto-inc counter of the
-table. The lock is reserved only for the duration of an SQL statement.
-It is not compatible with another AUTO_INC or exclusive lock on the
-table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_lock_table_autoinc_for_mysql(
-/*=============================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in the MySQL
- table handle */
-/*********************************************************************//**
-Sets a table lock on the table mentioned in prebuilt.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_lock_table_for_mysql(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
- table handle */
- dict_table_t* table, /*!< in: table to lock, or NULL
- if prebuilt->table should be
- locked as
- prebuilt->select_lock_type */
- ulint mode); /*!< in: lock mode of table
- (ignored if table==NULL) */
-
-/*********************************************************************//**
-Does an insert for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_insert_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: row in the MySQL format */
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
- handle */
-/*********************************************************************//**
-Builds a dummy query graph used in selects. */
-UNIV_INTERN
-void
-row_prebuild_sel_graph(
-/*===================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
- handle */
-/*********************************************************************//**
-Gets pointer to a prebuilt update vector used in updates. If the update
-graph has not yet been built in the prebuilt struct, then this function
-first builds it.
-@return prebuilt update vector */
-UNIV_INTERN
-upd_t*
-row_get_prebuilt_update_vector(
-/*===========================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
- handle */
-/*********************************************************************//**
-Checks if a table is such that we automatically created a clustered
-index on it (on row id).
-@return TRUE if the clustered index was generated automatically */
-UNIV_INTERN
-ibool
-row_table_got_default_clust_index(
-/*==============================*/
- const dict_table_t* table); /*!< in: table */
-/*********************************************************************//**
-Calculates the key number used inside MySQL for an Innobase index. We have
-to take into account if we generated a default clustered index for the table
-@return the key number used inside MySQL */
-UNIV_INTERN
-ulint
-row_get_mysql_key_number_for_index(
-/*===============================*/
- const dict_index_t* index); /*!< in: index */
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_update_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: the row to be updated, in
- the MySQL format */
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
- handle */
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or
-session is using a READ COMMITTED isolation level. Before
-calling this function we must use trx_reset_new_rec_lock_info() and
-trx_register_new_rec_lock() to store the information which new record locks
-really were set. This function removes a newly set lock under prebuilt->pcur,
-and also under prebuilt->clust_pcur. Currently, this is only used and tested
-in the case of an UPDATE or a DELETE statement, where the row lock is of the
-LOCK_X type.
-Thus, this implements a 'mini-rollback' that releases the latest record
-locks we set.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_unlock_for_mysql(
-/*=================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL
- handle */
- ibool has_latches_on_recs);/*!< TRUE if called so that we have
- the latches on the records under pcur
- and clust_pcur, and we do not need to
- reposition the cursors. */
-/*********************************************************************//**
-Creates an query graph node of 'update' type to be used in the MySQL
-interface.
-@return own: update node */
-UNIV_INTERN
-upd_node_t*
-row_create_update_node_for_mysql(
-/*=============================*/
- dict_table_t* table, /*!< in: table to update */
- mem_heap_t* heap); /*!< in: mem heap from which allocated */
-/**********************************************************************//**
-Does a cascaded delete or set null in a foreign key operation.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_update_cascade_for_mysql(
-/*=========================*/
- que_thr_t* thr, /*!< in: query thread */
- upd_node_t* node, /*!< in: update node used in the cascade
- or set null operation */
- dict_table_t* table); /*!< in: table where we do the operation */
-/*********************************************************************//**
-Locks the data dictionary exclusively for performing a table create or other
-data dictionary modification operation. */
-UNIV_INTERN
-void
-row_mysql_lock_data_dictionary_func(
-/*================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- ulint line); /*!< in: line number */
-#define row_mysql_lock_data_dictionary(trx) \
- row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__)
-/*********************************************************************//**
-Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
-void
-row_mysql_unlock_data_dictionary(
-/*=============================*/
- trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
-Locks the data dictionary in shared mode from modifications, for performing
-foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
-void
-row_mysql_freeze_data_dictionary_func(
-/*==================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- ulint line); /*!< in: line number */
-#define row_mysql_freeze_data_dictionary(trx) \
- row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__)
-/*********************************************************************//**
-Unlocks the data dictionary shared lock. */
-UNIV_INTERN
-void
-row_mysql_unfreeze_data_dictionary(
-/*===============================*/
- trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate().
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_create_table_for_mysql(
-/*=======================*/
- dict_table_t* table, /*!< in, own: table definition
- (will be freed) */
- trx_t* trx); /*!< in: transaction handle */
-/*********************************************************************//**
-Does an index creation operation for MySQL. TODO: currently failure
-to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table.
-@return error number or DB_SUCCESS */
-UNIV_INTERN
-int
-row_create_index_for_mysql(
-/*=======================*/
- dict_index_t* index, /*!< in, own: index definition
- (will be freed) */
- trx_t* trx, /*!< in: transaction handle */
- const ulint* field_lengths); /*!< in: if not NULL, must contain
- dict_index_get_n_fields(index)
- actual field lengths for the
- index columns, which are
- then checked for not being too
- large. */
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_table_add_foreign_constraints(
-/*==============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the
- database name before it: test.table2 */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks); /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-
-/*********************************************************************//**
-The master thread in srv0srv.c calls this regularly to drop tables which
-we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix.
-@return how many tables dropped + remaining tables in list */
-UNIV_INTERN
-ulint
-row_drop_tables_for_mysql_in_background(void);
-/*=========================================*/
-/*********************************************************************//**
-Get the background drop list length. NOTE: the caller must own the kernel
-mutex!
-@return how many tables in list */
-UNIV_INTERN
-ulint
-row_get_background_drop_list_len_low(void);
-/*======================================*/
-/*********************************************************************//**
-Truncates a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_truncate_table_for_mysql(
-/*=========================*/
- dict_table_t* table, /*!< in: table handle */
- trx_t* trx); /*!< in: transaction handle */
-/*********************************************************************//**
-Drops a table for MySQL. If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. If the data dictionary was not already locked
-by the transaction, the transaction will be committed. Otherwise, the
-data dictionary will remain locked.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_drop_table_for_mysql(
-/*=====================*/
- const char* name, /*!< in: table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool drop_db);/*!< in: TRUE=dropping whole database */
-
-/*********************************************************************//**
-Discards the tablespace of a table which stored in an .ibd file. Discarding
-means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_discard_tablespace_for_mysql(
-/*=============================*/
- const char* name, /*!< in: table name */
- trx_t* trx); /*!< in: transaction handle */
-/*****************************************************************//**
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_import_tablespace_for_mysql(
-/*============================*/
- const char* name, /*!< in: table name */
- trx_t* trx); /*!< in: transaction handle */
-/*********************************************************************//**
-Drops a database for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_drop_database_for_mysql(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx); /*!< in: transaction handle */
-/*********************************************************************//**
-Renames a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_rename_table_for_mysql(
-/*=======================*/
- const char* old_name, /*!< in: old table name */
- const char* new_name, /*!< in: new table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool commit); /*!< in: if TRUE then commit trx */
-/*********************************************************************//**
-Checks a table for corruption.
-@return DB_ERROR or DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_check_table_for_mysql(
-/*======================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
- handle */
-
-/*********************************************************************//**
-Determines if a table is a magic monitor table.
-@return TRUE if monitor table */
-UNIV_INTERN
-ibool
-row_is_magic_monitor_table(
-/*=======================*/
- const char* table_name); /*!< in: name of the table, in the
- form database/table_name */
-
-/* A struct describing a place for an individual column in the MySQL
-row format which is presented to the table handler in ha_innobase.
-This template struct is used to speed up row transformations between
-Innobase and MySQL. */
-
-typedef struct mysql_row_templ_struct mysql_row_templ_t;
-struct mysql_row_templ_struct {
- ulint col_no; /*!< column number of the column */
- ulint rec_field_no; /*!< field number of the column in an
- Innobase record in the current index;
- not defined if template_type is
- ROW_MYSQL_WHOLE_ROW */
- ulint mysql_col_offset; /*!< offset of the column in the MySQL
- row format */
- ulint mysql_col_len; /*!< length of the column in the MySQL
- row format */
- ulint mysql_null_byte_offset; /*!< MySQL NULL bit byte offset in a
- MySQL record */
- ulint mysql_null_bit_mask; /*!< bit mask to get the NULL bit,
- zero if column cannot be NULL */
- ulint type; /*!< column type in Innobase mtype
- numbers DATA_CHAR... */
- ulint mysql_type; /*!< MySQL type code; this is always
- < 256 */
- ulint mysql_length_bytes; /*!< if mysql_type
- == DATA_MYSQL_TRUE_VARCHAR, this tells
- whether we should use 1 or 2 bytes to
- store the MySQL true VARCHAR data
- length at the start of row in the MySQL
- format (NOTE that the MySQL key value
- format always uses 2 bytes for the data
- len) */
- ulint charset; /*!< MySQL charset-collation code
- of the column, or zero */
- ulint mbminlen; /*!< minimum length of a char, in bytes,
- or zero if not a char type */
- ulint mbmaxlen; /*!< maximum length of a char, in bytes,
- or zero if not a char type */
- ulint is_unsigned; /*!< if a column type is an integer
- type and this field is != 0, then
- it is an unsigned integer type */
-};
-
-#define MYSQL_FETCH_CACHE_SIZE 8
-/* After fetching this many rows, we start caching them in fetch_cache */
-#define MYSQL_FETCH_CACHE_THRESHOLD 4
-
-#define ROW_PREBUILT_ALLOCATED 78540783
-#define ROW_PREBUILT_FREED 26423527
-
-/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
-handle used within MySQL; these are used to save CPU time. */
-
-struct row_prebuilt_struct {
- ulint magic_n; /*!< this magic number is set to
- ROW_PREBUILT_ALLOCATED when created,
- or ROW_PREBUILT_FREED when the
- struct has been freed */
- dict_table_t* table; /*!< Innobase table handle */
- dict_index_t* index; /*!< current index for a search, if
- any */
- trx_t* trx; /*!< current transaction handle */
- unsigned sql_stat_start:1;/*!< TRUE when we start processing of
- an SQL statement: we may have to set
- an intention lock on the table,
- create a consistent read view etc. */
- unsigned mysql_has_locked:1;/*!< this is set TRUE when MySQL
- calls external_lock on this handle
- with a lock flag, and set FALSE when
- with the F_UNLOCK flag */
- unsigned clust_index_was_generated:1;
- /*!< if the user did not define a
- primary key in MySQL, then Innobase
- automatically generated a clustered
- index where the ordering column is
- the row id: in this case this flag
- is set to TRUE */
- unsigned index_usable:1; /*!< caches the value of
- row_merge_is_index_usable(trx,index) */
- unsigned read_just_key:1;/*!< set to 1 when MySQL calls
- ha_innobase::extra with the
- argument HA_EXTRA_KEYREAD; it is enough
- to read just columns defined in
- the index (i.e., no read of the
- clustered index record necessary) */
- unsigned used_in_HANDLER:1;/*!< TRUE if we have been using this
- handle in a MySQL HANDLER low level
- index cursor command: then we must
- store the pcur position even in a
- unique search from a clustered index,
- because HANDLER allows NEXT and PREV
- in such a situation */
- unsigned template_type:2;/*!< ROW_MYSQL_WHOLE_ROW,
- ROW_MYSQL_REC_FIELDS,
- ROW_MYSQL_DUMMY_TEMPLATE, or
- ROW_MYSQL_NO_TEMPLATE */
- unsigned n_template:10; /*!< number of elements in the
- template */
- unsigned null_bitmap_len:10;/*!< number of bytes in the SQL NULL
- bitmap at the start of a row in the
- MySQL format */
- unsigned need_to_access_clustered:1; /*!< if we are fetching
- columns through a secondary index
- and at least one column is not in
- the secondary index, then this is
- set to TRUE */
- unsigned templ_contains_blob:1;/*!< TRUE if the template contains
- BLOB column(s) */
- mysql_row_templ_t* mysql_template;/*!< template used to transform
- rows fast between MySQL and Innobase
- formats; memory for this template
- is not allocated from 'heap' */
- mem_heap_t* heap; /*!< memory heap from which
- these auxiliary structures are
- allocated when needed */
- ins_node_t* ins_node; /*!< Innobase SQL insert node
- used to perform inserts
- to the table */
- byte* ins_upd_rec_buff;/*!< buffer for storing data converted
- to the Innobase format from the MySQL
- format */
- const byte* default_rec; /*!< the default values of all columns
- (a "default row") in MySQL format */
- ulint hint_need_to_fetch_extra_cols;
- /*!< normally this is set to 0; if this
- is set to ROW_RETRIEVE_PRIMARY_KEY,
- then we should at least retrieve all
- columns in the primary key; if this
- is set to ROW_RETRIEVE_ALL_COLS, then
- we must retrieve all columns in the
- key (if read_just_key == 1), or all
- columns in the table */
- upd_node_t* upd_node; /*!< Innobase SQL update node used
- to perform updates and deletes */
- que_fork_t* ins_graph; /*!< Innobase SQL query graph used
- in inserts */
- que_fork_t* upd_graph; /*!< Innobase SQL query graph used
- in updates or deletes */
- btr_pcur_t* pcur; /*!< persistent cursor used in selects
- and updates */
- btr_pcur_t* clust_pcur; /*!< persistent cursor used in
- some selects and updates */
- que_fork_t* sel_graph; /*!< dummy query graph used in
- selects */
- dtuple_t* search_tuple; /*!< prebuilt dtuple used in selects */
- byte row_id[DATA_ROW_ID_LEN];
- /*!< if the clustered index was
- generated, the row id of the
- last row fetched is stored
- here */
- dtuple_t* clust_ref; /*!< prebuilt dtuple used in
- sel/upd/del */
- ulint select_lock_type;/*!< LOCK_NONE, LOCK_S, or LOCK_X */
- ulint stored_select_lock_type;/*!< this field is used to
- remember the original select_lock_type
- that was decided in ha_innodb.cc,
- ::store_lock(), ::external_lock(),
- etc. */
- ulint row_read_type; /*!< ROW_READ_WITH_LOCKS if row locks
- should be the obtained for records
- under an UPDATE or DELETE cursor.
- If innodb_locks_unsafe_for_binlog
- is TRUE, this can be set to
- ROW_READ_TRY_SEMI_CONSISTENT, so that
- if the row under an UPDATE or DELETE
- cursor was locked by another
- transaction, InnoDB will resort
- to reading the last committed value
- ('semi-consistent read'). Then,
- this field will be set to
- ROW_READ_DID_SEMI_CONSISTENT to
- indicate that. If the row does not
- match the WHERE condition, MySQL will
- invoke handler::unlock_row() to
- clear the flag back to
- ROW_READ_TRY_SEMI_CONSISTENT and
- to simply skip the row. If
- the row matches, the next call to
- row_search_for_mysql() will lock
- the row.
- This eliminates lock waits in some
- cases; note that this breaks
- serializability. */
- ulint new_rec_locks; /*!< normally 0; if
- srv_locks_unsafe_for_binlog is
- TRUE or session is using READ
- COMMITTED isolation level, in a
- cursor search, if we set a new
- record lock on an index, this is
- incremented; this is used in
- releasing the locks under the
- cursors if we are performing an
- UPDATE and we determine after
- retrieving the row that it does
- not need to be locked; thus,
- these can be used to implement a
- 'mini-rollback' that releases
- the latest record locks */
- ulint mysql_prefix_len;/*!< byte offset of the end of
- the last requested column */
- ulint mysql_row_len; /*!< length in bytes of a row in the
- MySQL format */
- ulint n_rows_fetched; /*!< number of rows fetched after
- positioning the current cursor */
- ulint fetch_direction;/*!< ROW_SEL_NEXT or ROW_SEL_PREV */
- byte* fetch_cache[MYSQL_FETCH_CACHE_SIZE];
- /*!< a cache for fetched rows if we
- fetch many rows from the same cursor:
- it saves CPU time to fetch them in a
- batch; we reserve mysql_row_len
- bytes for each such row; these
- pointers point 4 bytes past the
- allocated mem buf start, because
- there is a 4 byte magic number at the
- start and at the end */
- ibool keep_other_fields_on_keyread; /*!< when using fetch
- cache with HA_EXTRA_KEYREAD, don't
- overwrite other fields in mysql row
- row buffer.*/
- ulint fetch_cache_first;/*!< position of the first not yet
- fetched row in fetch_cache */
- ulint n_fetch_cached; /*!< number of not yet fetched rows
- in fetch_cache */
- mem_heap_t* blob_heap; /*!< in SELECTS BLOB fields are copied
- to this heap */
- mem_heap_t* old_vers_heap; /*!< memory heap where a previous
- version is built in consistent read */
- /*----------------------*/
- ulonglong autoinc_last_value;
- /*!< last value of AUTO-INC interval */
- ulonglong autoinc_increment;/*!< The increment step of the auto
- increment column. Value must be
- greater than or equal to 1. Required to
- calculate the next value */
- ulonglong autoinc_offset; /*!< The offset passed to
- get_auto_increment() by MySQL. Required
- to calculate the next value */
- ulint autoinc_error; /*!< The actual error code encountered
- while trying to init or read the
- autoinc value from the table. We
- store it here so that we can return
- it to MySQL */
- /*----------------------*/
- ulint magic_n2; /*!< this should be the same as
- magic_n */
-};
-
-#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
-
-#define ROW_MYSQL_WHOLE_ROW 0
-#define ROW_MYSQL_REC_FIELDS 1
-#define ROW_MYSQL_NO_TEMPLATE 2
-#define ROW_MYSQL_DUMMY_TEMPLATE 3 /* dummy template used in
- row_scan_and_check_index */
-
-/* Values for hint_need_to_fetch_extra_cols */
-#define ROW_RETRIEVE_PRIMARY_KEY 1
-#define ROW_RETRIEVE_ALL_COLS 2
-
-/* Values for row_read_type */
-#define ROW_READ_WITH_LOCKS 0
-#define ROW_READ_TRY_SEMI_CONSISTENT 1
-#define ROW_READ_DID_SEMI_CONSISTENT 2
-
-#ifndef UNIV_NONINL
-#include "row0mysql.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0mysql.ic b/storage/innodb_plugin/include/row0mysql.ic
deleted file mode 100644
index 35033aa2ad1..00000000000
--- a/storage/innodb_plugin/include/row0mysql.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2001, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0mysql.ic
-MySQL interface for Innobase
-
-Created 1/23/2001 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/row0purge.h b/storage/innodb_plugin/include/row0purge.h
deleted file mode 100644
index 89ec54fb54a..00000000000
--- a/storage/innodb_plugin/include/row0purge.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0purge.h
-Purge obsolete records
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0purge_h
-#define row0purge_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return own: purge node */
-UNIV_INTERN
-purge_node_t*
-row_purge_node_create(
-/*==================*/
- que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
- mem_heap_t* heap); /*!< in: memory heap where created */
-/***********************************************************//**
-Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_purge_step(
-/*===========*/
- que_thr_t* thr); /*!< in: query thread */
-
-/* Purge node structure */
-
-struct purge_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_PURGE */
- /*----------------------*/
- /* Local storage for this graph node */
- roll_ptr_t roll_ptr;/* roll pointer to undo log record */
- trx_undo_rec_t* undo_rec;/* undo log record */
- trx_undo_inf_t* reservation;/* reservation for the undo log record in
- the purge array */
- undo_no_t undo_no;/* undo number of the record */
- ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
- ... */
- btr_pcur_t pcur; /*!< persistent cursor used in searching the
- clustered index record */
- ibool found_clust;/* TRUE if the clustered index record
- determined by ref was found in the clustered
- index, and we were able to position pcur on
- it */
- dict_table_t* table; /*!< table where purge is done */
- ulint cmpl_info;/* compiler analysis info of an update */
- upd_t* update; /*!< update vector for a clustered index
- record */
- dtuple_t* ref; /*!< NULL, or row reference to the next row to
- handle */
- dtuple_t* row; /*!< NULL, or a copy (also fields copied to
- heap) of the indexed fields of the row to
- handle */
- dict_index_t* index; /*!< NULL, or the next index whose record should
- be handled */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
- row; this must be emptied after a successful
- purge of a row */
-};
-
-#ifndef UNIV_NONINL
-#include "row0purge.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0purge.ic b/storage/innodb_plugin/include/row0purge.ic
deleted file mode 100644
index 23d7d3845a4..00000000000
--- a/storage/innodb_plugin/include/row0purge.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-
-/**************************************************//**
-@file include/row0purge.ic
-Purge obsolete records
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/row0row.h b/storage/innodb_plugin/include/row0row.h
deleted file mode 100644
index 723b7b53395..00000000000
--- a/storage/innodb_plugin/include/row0row.h
+++ /dev/null
@@ -1,310 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0row.h
-General row routines
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0row_h
-#define row0row_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "mtr0mtr.h"
-#include "rem0types.h"
-#include "read0types.h"
-#include "row0types.h"
-#include "btr0types.h"
-
-/*********************************************************************//**
-Gets the offset of the trx id field, in bytes relative to the origin of
-a clustered index record.
-@return offset of DATA_TRX_ID */
-UNIV_INTERN
-ulint
-row_get_trx_id_offset(
-/*==================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
-/*********************************************************************//**
-Reads the trx id field from a clustered index record.
-@return value of the field */
-UNIV_INLINE
-trx_id_t
-row_get_rec_trx_id(
-/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
-/*********************************************************************//**
-Reads the roll pointer field from a clustered index record.
-@return value of the field */
-UNIV_INLINE
-roll_ptr_t
-row_get_rec_roll_ptr(
-/*=================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
-/*****************************************************************//**
-When an insert or purge to a table is performed, this function builds
-the entry to be inserted into or purged from an index on the table.
-@return index entry which should be inserted or purged, or NULL if the
-externally stored columns in the clustered index record are
-unavailable and ext != NULL */
-UNIV_INTERN
-dtuple_t*
-row_build_index_entry(
-/*==================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- row_ext_t* ext, /*!< in: externally stored column prefixes,
- or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap); /*!< in: memory heap from which the memory for
- the index entry is allocated */
-/*******************************************************************//**
-An inverse function to row_build_index_entry. Builds a row from a
-record in a clustered index.
-@return own: row built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_build(
-/*======*/
- ulint type, /*!< in: ROW_COPY_POINTERS or
- ROW_COPY_DATA; the latter
- copies also the data fields to
- heap while the first only
- places pointers to data fields
- on the index page, and thus is
- more efficient */
- const dict_index_t* index, /*!< in: clustered index */
- const rec_t* rec, /*!< in: record in the clustered
- index; NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
- const dict_table_t* col_table,
- /*!< in: table, to check which
- externally stored columns
- occur in the ordering columns
- of an index, or NULL if
- index->table should be
- consulted instead; the user
- columns in this table should be
- the same columns as in index->table */
- row_ext_t** ext, /*!< out, own: cache of
- externally stored column
- prefixes, or NULL */
- mem_heap_t* heap); /*!< in: memory heap from which
- the memory needed is allocated */
-/*******************************************************************//**
-Converts an index record to a typed data tuple.
-@return index entry built; does not set info_bits, and the data fields
-in the entry will point directly to rec */
-UNIV_INTERN
-dtuple_t*
-row_rec_to_index_entry_low(
-/*=======================*/
- const rec_t* rec, /*!< in: record in the index */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
- mem_heap_t* heap); /*!< in: memory heap from which
- the memory needed is allocated */
-/*******************************************************************//**
-Converts an index record to a typed data tuple. NOTE that externally
-stored (often big) fields are NOT copied to heap.
-@return own: index entry built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
- ulint type, /*!< in: ROW_COPY_DATA, or
- ROW_COPY_POINTERS: the former
- copies also the data fields to
- heap as the latter only places
- pointers to data fields on the
- index page */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the dtuple is used! */
- const dict_index_t* index, /*!< in: index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
- mem_heap_t* heap); /*!< in: memory heap from which
- the memory needed is allocated */
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record.
-@return own: row reference built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_build_row_ref(
-/*==============*/
- ulint type, /*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap, whereas the latter only places pointers
- to data fields on the index page */
- dict_index_t* index, /*!< in: secondary index */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- mem_heap_t* heap); /*!< in: memory heap from which the memory
- needed is allocated */
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INTERN
-void
-row_build_row_ref_in_tuple(
-/*=======================*/
- dtuple_t* ref, /*!< in/out: row reference built;
- see the NOTE below! */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: the data fields in ref
- will point directly into this
- record, therefore, the buffer
- page of this record must be at
- least s-latched and the latch
- held as long as the row
- reference is used! */
- const dict_index_t* index, /*!< in: secondary index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index)
- or NULL */
- trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INLINE
-void
-row_build_row_ref_fast(
-/*===================*/
- dtuple_t* ref, /*!< in/out: typed data tuple where the
- reference is built */
- const ulint* map, /*!< in: array of field numbers in rec
- telling how ref should be built from
- the fields of rec */
- const rec_t* rec, /*!< in: record in the index; must be
- preserved while ref is used, as we do
- not copy field values to heap */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***************************************************************//**
-Searches the clustered index record for a row, if we have the row
-reference.
-@return TRUE if found */
-UNIV_INTERN
-ibool
-row_search_on_row_ref(
-/*==================*/
- btr_pcur_t* pcur, /*!< out: persistent cursor, which must
- be closed by the caller */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- const dict_table_t* table, /*!< in: table */
- const dtuple_t* ref, /*!< in: row reference */
- mtr_t* mtr); /*!< in/out: mtr */
-/*********************************************************************//**
-Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved.
-@return record or NULL, if no record found */
-UNIV_INTERN
-rec_t*
-row_get_clust_rec(
-/*==============*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: secondary index */
- dict_index_t** clust_index,/*!< out: clustered index */
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Searches an index record.
-@return TRUE if found */
-UNIV_INTERN
-ibool
-row_search_index_entry(
-/*===================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry, /*!< in: index entry */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
- be closed by the caller */
- mtr_t* mtr); /*!< in: mtr */
-
-
-#define ROW_COPY_DATA 1
-#define ROW_COPY_POINTERS 2
-
-/* The allowed latching order of index records is the following:
-(1) a secondary index record ->
-(2) the clustered index record ->
-(3) rollback segment data for the clustered index record.
-
-No new latches may be obtained while the kernel mutex is reserved.
-However, the kernel mutex can be reserved while latches are owned. */
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) using
-"dict_field" and writes the result to "buf".
-Not more than "buf_size" bytes are written to "buf".
-The result is always NUL-terminated (provided buf_size is positive) and the
-number of bytes that were written to "buf" is returned (including the
-terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
-ulint
-row_raw_format(
-/*===========*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- const dict_field_t* dict_field, /*!< in: index field */
- char* buf, /*!< out: output buffer */
- ulint buf_size); /*!< in: output buffer size
- in bytes */
-
-#ifndef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0row.ic b/storage/innodb_plugin/include/row0row.ic
deleted file mode 100644
index 05c007641af..00000000000
--- a/storage/innodb_plugin/include/row0row.ic
+++ /dev/null
@@ -1,120 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0row.ic
-General row routines
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0dict.h"
-#include "rem0rec.h"
-#include "trx0undo.h"
-
-/*********************************************************************//**
-Reads the trx id field from a clustered index record.
-@return value of the field */
-UNIV_INLINE
-trx_id_t
-row_get_rec_trx_id(
-/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ulint offset;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (!offset) {
- offset = row_get_trx_id_offset(rec, index, offsets);
- }
-
- return(trx_read_trx_id(rec + offset));
-}
-
-/*********************************************************************//**
-Reads the roll pointer field from a clustered index record.
-@return value of the field */
-UNIV_INLINE
-roll_ptr_t
-row_get_rec_roll_ptr(
-/*=================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ulint offset;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (!offset) {
- offset = row_get_trx_id_offset(rec, index, offsets);
- }
-
- return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
-}
-
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INLINE
-void
-row_build_row_ref_fast(
-/*===================*/
- dtuple_t* ref, /*!< in/out: typed data tuple where the
- reference is built */
- const ulint* map, /*!< in: array of field numbers in rec
- telling how ref should be built from
- the fields of rec */
- const rec_t* rec, /*!< in: record in the index; must be
- preserved while ref is used, as we do
- not copy field values to heap */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- dfield_t* dfield;
- const byte* field;
- ulint len;
- ulint ref_len;
- ulint field_no;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_any_extern(offsets));
- ref_len = dtuple_get_n_fields(ref);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- field_no = *(map + i);
-
- if (field_no != ULINT_UNDEFINED) {
-
- field = rec_get_nth_field(rec, offsets,
- field_no, &len);
- dfield_set_data(dfield, field, len);
- }
- }
-}
diff --git a/storage/innodb_plugin/include/row0sel.h b/storage/innodb_plugin/include/row0sel.h
deleted file mode 100644
index 01a5afaa23e..00000000000
--- a/storage/innodb_plugin/include/row0sel.h
+++ /dev/null
@@ -1,413 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0sel.h
-Select
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0sel_h
-#define row0sel_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "btr0pcur.h"
-#include "read0read.h"
-#include "row0mysql.h"
-
-/*********************************************************************//**
-Creates a select node struct.
-@return own: select node struct */
-UNIV_INTERN
-sel_node_t*
-sel_node_create(
-/*============*/
- mem_heap_t* heap); /*!< in: memory heap where created */
-/*********************************************************************//**
-Frees the memory private to a select node when a query graph is freed,
-does not free the heap where the node was originally created. */
-UNIV_INTERN
-void
-sel_node_free_private(
-/*==================*/
- sel_node_t* node); /*!< in: select node struct */
-/*********************************************************************//**
-Frees a prefetch buffer for a column, including the dynamically allocated
-memory for data stored there. */
-UNIV_INTERN
-void
-sel_col_prefetch_buf_free(
-/*======================*/
- sel_buf_t* prefetch_buf); /*!< in, own: prefetch buffer */
-/*********************************************************************//**
-Gets the plan node for the nth table in a join.
-@return plan node */
-UNIV_INLINE
-plan_t*
-sel_node_get_nth_plan(
-/*==================*/
- sel_node_t* node, /*!< in: select node */
- ulint i); /*!< in: get ith plan node */
-/**********************************************************************//**
-Performs a select step. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_sel_step(
-/*=========*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of an open or close cursor statement node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-open_step(
-/*======*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs a fetch for a cursor.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-fetch_step(
-/*=======*/
- que_thr_t* thr); /*!< in: query thread */
-/****************************************************************//**
-Sample callback function for fetch that prints each row.
-@return always returns non-NULL */
-UNIV_INTERN
-void*
-row_fetch_print(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg); /*!< in: not used */
-/****************************************************************//**
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4.
-@return always returns NULL */
-UNIV_INTERN
-void*
-row_fetch_store_uint4(
-/*==================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg); /*!< in: data pointer */
-/***********************************************************//**
-Prints a row in a select result.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_printf_step(
-/*============*/
- que_thr_t* thr); /*!< in: query thread */
-/****************************************************************//**
-Converts a key value stored in MySQL format to an Innobase dtuple. The last
-field of the key value may be just a prefix of a fixed length field: hence
-the parameter key_len. But currently we do not allow search keys where the
-last field is only a prefix of the full key field len and print a warning if
-such appears. */
-UNIV_INTERN
-void
-row_sel_convert_mysql_key_to_innobase(
-/*==================================*/
- dtuple_t* tuple, /*!< in/out: tuple where to build;
- NOTE: we assume that the type info
- in the tuple is already according
- to index! */
- byte* buf, /*!< in: buffer to use in field
- conversions */
- ulint buf_len, /*!< in: buffer length */
- dict_index_t* index, /*!< in: index of the key value */
- const byte* key_ptr, /*!< in: MySQL key value */
- ulint key_len, /*!< in: MySQL key value length */
- trx_t* trx); /*!< in: transaction */
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor!
-@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
-UNIV_INTERN
-ulint
-row_search_for_mysql(
-/*=================*/
- byte* buf, /*!< in/out: buffer for the fetched
- row in the MySQL format */
- ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
- table handle; this contains the info
- of search_tuple, index; if search
- tuple contains 0 fields then we
- position the cursor at the start or
- the end of the index, depending on
- 'mode' */
- ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX */
- ulint direction); /*!< in: 0 or ROW_SEL_NEXT or
- ROW_SEL_PREV; NOTE: if this is != 0,
- then prebuilt must have a pcur
- with stored position! In opening of a
- cursor 'direction' should be 0. */
-/*******************************************************************//**
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache.
-@return TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
- trx_t* trx, /*!< in: transaction object */
- const char* norm_name); /*!< in: concatenation of database name,
- '/' char, table name */
-/*******************************************************************//**
-Read the max AUTOINC value from an index.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-ulint
-row_search_max_autoinc(
-/*===================*/
- dict_index_t* index, /*!< in: index to search */
- const char* col_name, /*!< in: autoinc column name */
- ib_uint64_t* value); /*!< out: AUTOINC value read */
-
-/** A structure for caching column values for prefetched rows */
-struct sel_buf_struct{
- byte* data; /*!< data, or NULL; if not NULL, this field
- has allocated memory which must be explicitly
- freed; can be != NULL even when len is
- UNIV_SQL_NULL */
- ulint len; /*!< data length or UNIV_SQL_NULL */
- ulint val_buf_size;
- /*!< size of memory buffer allocated for data:
- this can be more than len; this is defined
- when data != NULL */
-};
-
-/** Query plan */
-struct plan_struct{
- dict_table_t* table; /*!< table struct in the dictionary
- cache */
- dict_index_t* index; /*!< table index used in the search */
- btr_pcur_t pcur; /*!< persistent cursor used to search
- the index */
- ibool asc; /*!< TRUE if cursor traveling upwards */
- ibool pcur_is_open; /*!< TRUE if pcur has been positioned
- and we can try to fetch new rows */
- ibool cursor_at_end; /*!< TRUE if the cursor is open but
- we know that there are no more
- qualifying rows left to retrieve from
- the index tree; NOTE though, that
- there may still be unprocessed rows in
- the prefetch stack; always FALSE when
- pcur_is_open is FALSE */
- ibool stored_cursor_rec_processed;
- /*!< TRUE if the pcur position has been
- stored and the record it is positioned
- on has already been processed */
- que_node_t** tuple_exps; /*!< array of expressions
- which are used to calculate
- the field values in the search
- tuple: there is one expression
- for each field in the search
- tuple */
- dtuple_t* tuple; /*!< search tuple */
- ulint mode; /*!< search mode: PAGE_CUR_G, ... */
- ulint n_exact_match; /*!< number of first fields in
- the search tuple which must be
- exactly matched */
- ibool unique_search; /*!< TRUE if we are searching an
- index record with a unique key */
- ulint n_rows_fetched; /*!< number of rows fetched using pcur
- after it was opened */
- ulint n_rows_prefetched;/*!< number of prefetched rows cached
- for fetch: fetching several rows in
- the same mtr saves CPU time */
- ulint first_prefetched;/*!< index of the first cached row in
- select buffer arrays for each column */
- ibool no_prefetch; /*!< no prefetch for this table */
- sym_node_list_t columns; /*!< symbol table nodes for the columns
- to retrieve from the table */
- UT_LIST_BASE_NODE_T(func_node_t)
- end_conds; /*!< conditions which determine the
- fetch limit of the index segment we
- have to look at: when one of these
- fails, the result set has been
- exhausted for the cursor in this
- index; these conditions are normalized
- so that in a comparison the column
- for this table is the first argument */
- UT_LIST_BASE_NODE_T(func_node_t)
- other_conds; /*!< the rest of search conditions we can
- test at this table in a join */
- ibool must_get_clust; /*!< TRUE if index is a non-clustered
- index and we must also fetch the
- clustered index record; this is the
- case if the non-clustered record does
- not contain all the needed columns, or
- if this is a single-table explicit
- cursor, or a searched update or
- delete */
- ulint* clust_map; /*!< map telling how clust_ref is built
- from the fields of a non-clustered
- record */
- dtuple_t* clust_ref; /*!< the reference to the clustered
- index entry is built here if index is
- a non-clustered index */
- btr_pcur_t clust_pcur; /*!< if index is non-clustered, we use
- this pcur to search the clustered
- index */
- mem_heap_t* old_vers_heap; /*!< memory heap used in building an old
- version of a row, or NULL */
-};
-
-/** Select node states */
-enum sel_node_state {
- SEL_NODE_CLOSED, /*!< it is a declared cursor which is not
- currently open */
- SEL_NODE_OPEN, /*!< intention locks not yet set on tables */
- SEL_NODE_FETCH, /*!< intention locks have been set */
- SEL_NODE_NO_MORE_ROWS /*!< cursor has reached the result set end */
-};
-
-/** Select statement node */
-struct sel_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_SELECT */
- enum sel_node_state
- state; /*!< node state */
- que_node_t* select_list; /*!< select list */
- sym_node_t* into_list; /*!< variables list or NULL */
- sym_node_t* table_list; /*!< table list */
- ibool asc; /*!< TRUE if the rows should be fetched
- in an ascending order */
- ibool set_x_locks; /*!< TRUE if the cursor is for update or
- delete, which means that a row x-lock
- should be placed on the cursor row */
- ulint row_lock_mode; /*!< LOCK_X or LOCK_S */
- ulint n_tables; /*!< number of tables */
- ulint fetch_table; /*!< number of the next table to access
- in the join */
- plan_t* plans; /*!< array of n_tables many plan nodes
- containing the search plan and the
- search data structures */
- que_node_t* search_cond; /*!< search condition */
- read_view_t* read_view; /*!< if the query is a non-locking
- consistent read, its read view is
- placed here, otherwise NULL */
- ibool consistent_read;/*!< TRUE if the select is a consistent,
- non-locking read */
- order_node_t* order_by; /*!< order by column definition, or
- NULL */
- ibool is_aggregate; /*!< TRUE if the select list consists of
- aggregate functions */
- ibool aggregate_already_fetched;
- /*!< TRUE if the aggregate row has
- already been fetched for the current
- cursor */
- ibool can_get_updated;/*!< this is TRUE if the select
- is in a single-table explicit
- cursor which can get updated
- within the stored procedure,
- or in a searched update or
- delete; NOTE that to determine
- of an explicit cursor if it
- can get updated, the parser
- checks from a stored procedure
- if it contains positioned
- update or delete statements */
- sym_node_t* explicit_cursor;/*!< not NULL if an explicit cursor */
- UT_LIST_BASE_NODE_T(sym_node_t)
- copy_variables; /*!< variables whose values we have to
- copy when an explicit cursor is opened,
- so that they do not change between
- fetches */
-};
-
-/** Fetch statement node */
-struct fetch_node_struct{
- que_common_t common; /*!< type: QUE_NODE_FETCH */
- sel_node_t* cursor_def; /*!< cursor definition */
- sym_node_t* into_list; /*!< variables to set */
-
- pars_user_func_t*
- func; /*!< User callback function or NULL.
- The first argument to the function
- is a sel_node_t*, containing the
- results of the SELECT operation for
- one row. If the function returns
- NULL, it is not interested in
- further rows and the cursor is
- modified so (cursor % NOTFOUND) is
- true. If it returns not-NULL,
- continue normally. See
- row_fetch_print() for an example
- (and a useful debugging tool). */
-};
-
-/** Open or close cursor operation type */
-enum open_node_op {
- ROW_SEL_OPEN_CURSOR, /*!< open cursor */
- ROW_SEL_CLOSE_CURSOR /*!< close cursor */
-};
-
-/** Open or close cursor statement node */
-struct open_node_struct{
- que_common_t common; /*!< type: QUE_NODE_OPEN */
- enum open_node_op
- op_type; /*!< operation type: open or
- close cursor */
- sel_node_t* cursor_def; /*!< cursor definition */
-};
-
-/** Row printf statement node */
-struct row_printf_node_struct{
- que_common_t common; /*!< type: QUE_NODE_ROW_PRINTF */
- sel_node_t* sel_node; /*!< select */
-};
-
-/** Search direction for the MySQL interface */
-enum row_sel_direction {
- ROW_SEL_NEXT = 1, /*!< ascending direction */
- ROW_SEL_PREV = 2 /*!< descending direction */
-};
-
-/** Match mode for the MySQL interface */
-enum row_sel_match_mode {
- ROW_SEL_EXACT = 1, /*!< search using a complete key value */
- ROW_SEL_EXACT_PREFIX /*!< search using a key prefix which
- must match rows: the prefix may
- contain an incomplete field (the last
- field in prefix may be just a prefix
- of a fixed length column) */
-};
-
-#ifndef UNIV_NONINL
-#include "row0sel.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0sel.ic b/storage/innodb_plugin/include/row0sel.ic
deleted file mode 100644
index 5907f9913da..00000000000
--- a/storage/innodb_plugin/include/row0sel.ic
+++ /dev/null
@@ -1,105 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0sel.ic
-Select
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-
-/*********************************************************************//**
-Gets the plan node for the nth table in a join.
-@return plan node */
-UNIV_INLINE
-plan_t*
-sel_node_get_nth_plan(
-/*==================*/
- sel_node_t* node, /*!< in: select node */
- ulint i) /*!< in: get ith plan node */
-{
- ut_ad(i < node->n_tables);
-
- return(node->plans + i);
-}
-
-/*********************************************************************//**
-Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means
-that it will start fetching from the start of the result set again, regardless
-of where it was before, and it will set intention locks on the tables. */
-UNIV_INLINE
-void
-sel_node_reset_cursor(
-/*==================*/
- sel_node_t* node) /*!< in: select node */
-{
- node->state = SEL_NODE_OPEN;
-}
-
-/**********************************************************************//**
-Performs an execution step of an open or close cursor statement node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-open_step(
-/*======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- sel_node_t* sel_node;
- open_node_t* node;
- ulint err;
-
- ut_ad(thr);
-
- node = (open_node_t*) thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_OPEN);
-
- sel_node = node->cursor_def;
-
- err = DB_SUCCESS;
-
- if (node->op_type == ROW_SEL_OPEN_CURSOR) {
-
- /* if (sel_node->state == SEL_NODE_CLOSED) { */
-
- sel_node_reset_cursor(sel_node);
- /* } else {
- err = DB_ERROR;
- } */
- } else {
- if (sel_node->state != SEL_NODE_CLOSED) {
-
- sel_node->state = SEL_NODE_CLOSED;
- } else {
- err = DB_ERROR;
- }
- }
-
- if (UNIV_EXPECT(err, DB_SUCCESS) != DB_SUCCESS) {
- /* SQL error detected */
- fprintf(stderr, "SQL error %lu\n", (ulong) err);
-
- ut_error;
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
diff --git a/storage/innodb_plugin/include/row0types.h b/storage/innodb_plugin/include/row0types.h
deleted file mode 100644
index 7920fd75061..00000000000
--- a/storage/innodb_plugin/include/row0types.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0types.h
-Row operation global types
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0types_h
-#define row0types_h
-
-typedef struct plan_struct plan_t;
-
-typedef struct upd_struct upd_t;
-
-typedef struct upd_field_struct upd_field_t;
-
-typedef struct upd_node_struct upd_node_t;
-
-typedef struct del_node_struct del_node_t;
-
-typedef struct ins_node_struct ins_node_t;
-
-typedef struct sel_node_struct sel_node_t;
-
-typedef struct open_node_struct open_node_t;
-
-typedef struct fetch_node_struct fetch_node_t;
-
-typedef struct row_printf_node_struct row_printf_node_t;
-typedef struct sel_buf_struct sel_buf_t;
-
-typedef struct undo_node_struct undo_node_t;
-
-typedef struct purge_node_struct purge_node_t;
-
-typedef struct row_ext_struct row_ext_t;
-
-/* MySQL data types */
-typedef struct st_table TABLE;
-
-#endif
diff --git a/storage/innodb_plugin/include/row0uins.h b/storage/innodb_plugin/include/row0uins.h
deleted file mode 100644
index 77b071c3a6b..00000000000
--- a/storage/innodb_plugin/include/row0uins.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0uins.h
-Fresh insert undo
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0uins_h
-#define row0uins_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/***********************************************************//**
-Undoes a fresh insert of a row to a table. A fresh insert means that
-the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. InnoDB is eager in a rollback:
-if it figures out that an index record will be removed in the purge
-anyway, it will remove it in the rollback.
-@return DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_undo_ins(
-/*=========*/
- undo_node_t* node); /*!< in: row undo node */
-
-#ifndef UNIV_NONINL
-#include "row0uins.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0uins.ic b/storage/innodb_plugin/include/row0uins.ic
deleted file mode 100644
index 27606150d8e..00000000000
--- a/storage/innodb_plugin/include/row0uins.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0uins.ic
-Fresh insert undo
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innodb_plugin/include/row0umod.h b/storage/innodb_plugin/include/row0umod.h
deleted file mode 100644
index ed44cc8d601..00000000000
--- a/storage/innodb_plugin/include/row0umod.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0umod.h
-Undo modify of a row
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0umod_h
-#define row0umod_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/***********************************************************//**
-Undoes a modify operation on a row of a table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-row_undo_mod(
-/*=========*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr); /*!< in: query thread */
-
-
-#ifndef UNIV_NONINL
-#include "row0umod.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0umod.ic b/storage/innodb_plugin/include/row0umod.ic
deleted file mode 100644
index ea3fd3b43c7..00000000000
--- a/storage/innodb_plugin/include/row0umod.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0umod.ic
-Undo modify of a row
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/row0undo.h b/storage/innodb_plugin/include/row0undo.h
deleted file mode 100644
index 6eb4ca448b3..00000000000
--- a/storage/innodb_plugin/include/row0undo.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0undo.h
-Row undo
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0undo_h
-#define row0undo_h
-
-#include "univ.i"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-
-/********************************************************************//**
-Creates a row undo node to a query graph.
-@return own: undo node */
-UNIV_INTERN
-undo_node_t*
-row_undo_node_create(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
- mem_heap_t* heap); /*!< in: memory heap where created */
-/***********************************************************//**
-Looks for the clustered index record when node has the row reference.
-The pcur in node is used in the search. If found, stores the row to node,
-and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
-caller, regardless of the return value */
-UNIV_INTERN
-ibool
-row_undo_search_clust_to_pcur(
-/*==========================*/
- undo_node_t* node); /*!< in: row undo node */
-/***********************************************************//**
-Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_undo_step(
-/*==========*/
- que_thr_t* thr); /*!< in: query thread */
-
-/* A single query thread will try to perform the undo for all successive
-versions of a clustered index record, if the transaction has modified it
-several times during the execution which is rolled back. It may happen
-that the task is transferred to another query thread, if the other thread
-is assigned to handle an undo log record in the chain of different versions
-of the record, and the other thread happens to get the x-latch to the
-clustered index record at the right time.
- If a query thread notices that the clustered index record it is looking
-for is missing, or the roll ptr field in the record doed not point to the
-undo log record the thread was assigned to handle, then it gives up the undo
-task for that undo log record, and fetches the next. This situation can occur
-just in the case where the transaction modified the same record several times
-and another thread is currently doing the undo for successive versions of
-that index record. */
-
-/** Execution state of an undo node */
-enum undo_exec {
- UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next
- undo log record */
- UNDO_NODE_PREV_VERS, /*!< the roll ptr to previous
- version of a row is stored in
- node, and undo should be done
- based on it */
- UNDO_NODE_INSERT, /*!< undo a fresh insert of a
- row to a table */
- UNDO_NODE_MODIFY /*!< undo a modify operation
- (DELETE or UPDATE) on a row
- of a table */
-};
-
-/** Undo node structure */
-struct undo_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_UNDO */
- enum undo_exec state; /*!< node execution state */
- trx_t* trx; /*!< trx for which undo is done */
- roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */
- trx_undo_rec_t* undo_rec;/*!< undo log record */
- undo_no_t undo_no;/*!< undo number of the record */
- ulint rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
- ... */
- roll_ptr_t new_roll_ptr;
- /*!< roll ptr to restore to clustered index
- record */
- trx_id_t new_trx_id; /*!< trx id to restore to clustered index
- record */
- btr_pcur_t pcur; /*!< persistent cursor used in searching the
- clustered index record */
- dict_table_t* table; /*!< table where undo is done */
- ulint cmpl_info;/*!< compiler analysis of an update */
- upd_t* update; /*!< update vector for a clustered index
- record */
- dtuple_t* ref; /*!< row reference to the next row to handle */
- dtuple_t* row; /*!< a copy (also fields copied to heap) of the
- row to handle */
- row_ext_t* ext; /*!< NULL, or prefixes of the externally
- stored columns of the row */
- dtuple_t* undo_row;/*!< NULL, or the row after undo */
- row_ext_t* undo_ext;/*!< NULL, or prefixes of the externally
- stored columns of undo_row */
- dict_index_t* index; /*!< the next index whose record should be
- handled */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
- row; this must be emptied after undo is tried
- on a row */
-};
-
-
-#ifndef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0undo.ic b/storage/innodb_plugin/include/row0undo.ic
deleted file mode 100644
index dc788debc14..00000000000
--- a/storage/innodb_plugin/include/row0undo.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0undo.ic
-Row undo
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/row0upd.h b/storage/innodb_plugin/include/row0upd.h
deleted file mode 100644
index 635d746d5a1..00000000000
--- a/storage/innodb_plugin/include/row0upd.h
+++ /dev/null
@@ -1,483 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0upd.h
-Update of a row
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0upd_h
-#define row0upd_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "row0types.h"
-#include "btr0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "btr0pcur.h"
-# include "que0types.h"
-# include "pars0types.h"
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Creates an update vector object.
-@return own: update vector object */
-UNIV_INLINE
-upd_t*
-upd_create(
-/*=======*/
- ulint n, /*!< in: number of fields */
- mem_heap_t* heap); /*!< in: heap from which memory allocated */
-/*********************************************************************//**
-Returns the number of fields in the update vector == number of columns
-to be updated by an update vector.
-@return number of fields */
-UNIV_INLINE
-ulint
-upd_get_n_fields(
-/*=============*/
- const upd_t* update); /*!< in: update vector */
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Returns the nth field of an update vector.
-@return update vector field */
-UNIV_INLINE
-upd_field_t*
-upd_get_nth_field(
-/*==============*/
- const upd_t* update, /*!< in: update vector */
- ulint n); /*!< in: field position in update vector */
-#else
-# define upd_get_nth_field(update, n) ((update)->fields + (n))
-#endif
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Sets an index field number to be updated by an update vector field. */
-UNIV_INLINE
-void
-upd_field_set_field_no(
-/*===================*/
- upd_field_t* upd_field, /*!< in: update vector field */
- ulint field_no, /*!< in: field number in a clustered
- index */
- dict_index_t* index, /*!< in: index */
- trx_t* trx); /*!< in: transaction */
-/*********************************************************************//**
-Returns a field of an update vector by field_no.
-@return update vector field, or NULL */
-UNIV_INLINE
-const upd_field_t*
-upd_get_field_by_field_no(
-/*======================*/
- const upd_t* update, /*!< in: update vector */
- ulint no) /*!< in: field_no */
- __attribute__((nonnull, pure));
-/*********************************************************************//**
-Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record.
-@return new pointer to mlog */
-UNIV_INTERN
-byte*
-row_upd_write_sys_vals_to_log(
-/*==========================*/
- dict_index_t* index, /*!< in: clustered index */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
- byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
- in mlog */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************************//**
-Updates the trx id and roll ptr field in a clustered index record when
-a row is updated or marked deleted. */
-UNIV_INLINE
-void
-row_upd_rec_sys_fields(
-/*===================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */
-/*********************************************************************//**
-Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
-void
-row_upd_index_entry_sys_field(
-/*==========================*/
- const dtuple_t* entry, /*!< in: index entry, where the memory buffers
- for sys fields are already allocated:
- the function just copies the new values to
- them */
- dict_index_t* index, /*!< in: clustered index */
- ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
- dulint val); /*!< in: value to write */
-/*********************************************************************//**
-Creates an update node for a query graph.
-@return own: update node */
-UNIV_INTERN
-upd_node_t*
-upd_node_create(
-/*============*/
- mem_heap_t* heap); /*!< in: mem heap where created */
-/***********************************************************//**
-Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
-void
-row_upd_index_write_log(
-/*====================*/
- const upd_t* update, /*!< in: update vector */
- byte* log_ptr,/*!< in: pointer to mlog buffer: must
- contain at least MLOG_BUF_MARGIN bytes
- of free space; the buffer is closed
- within this function */
- mtr_t* mtr); /*!< in: mtr into whose log to write */
-/***********************************************************//**
-Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update.
-@return TRUE if the update changes the size of some field in index or
-the field is external in rec or update */
-UNIV_INTERN
-ibool
-row_upd_changes_field_size_or_external(
-/*===================================*/
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const upd_t* update);/*!< in: update vector */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. */
-UNIV_INTERN
-void
-row_upd_rec_in_place(
-/*=================*/
- rec_t* rec, /*!< in/out: record where replaced */
- dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- const upd_t* update, /*!< in: update vector */
- page_zip_des_t* page_zip);/*!< in: compressed page with enough space
- available, or NULL */
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Builds an update vector from those fields which in a secondary index entry
-differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings!
-@return own: update vector of differing fields */
-UNIV_INTERN
-upd_t*
-row_upd_build_sec_rec_difference_binary(
-/*====================================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: secondary index record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
-trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings!
-@return own: update vector of differing fields, excluding roll ptr and
-trx id */
-UNIV_INTERN
-upd_t*
-row_upd_build_difference_binary(
-/*============================*/
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: clustered index record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
-UNIV_INTERN
-void
-row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the index so
- that the field number in an upd_field is the
- index position */
- ibool order_only,
- /*!< in: if TRUE, limit the replacement to
- ordering fields of index; note that this
- does not work for non-clustered indexes. */
- mem_heap_t* heap) /*!< in: memory heap for allocating and
- copying the new values */
- __attribute__((nonnull));
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
-UNIV_INTERN
-void
-row_upd_index_replace_new_col_vals(
-/*===============================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the
- CLUSTERED index so that the field number in
- an upd_field is the clustered index position */
- mem_heap_t* heap) /*!< in: memory heap for allocating and
- copying the new values */
- __attribute__((nonnull));
-/***********************************************************//**
-Replaces the new column values stored in the update vector. */
-UNIV_INTERN
-void
-row_upd_replace(
-/*============*/
- dtuple_t* row, /*!< in/out: row where replaced,
- indexed by col_no;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- row_ext_t** ext, /*!< out, own: NULL, or externally
- stored column prefixes */
- const dict_index_t* index, /*!< in: clustered index */
- const upd_t* update, /*!< in: an update vector built for the
- clustered index */
- mem_heap_t* heap); /*!< in: memory heap */
-/***********************************************************//**
-Checks if an update vector changes an ordering field of an index record.
-
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings!
-@return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
-ibool
-row_upd_changes_ord_field_binary(
-/*=============================*/
- const dtuple_t* row, /*!< in: old value of row, or NULL if the
- row and the data values in update are not
- known when this function is called, e.g., at
- compile time */
- dict_index_t* index, /*!< in: index of the record */
- const upd_t* update);/*!< in: update vector for the row; NOTE: the
- field numbers in this MUST be clustered index
- positions! */
-/***********************************************************//**
-Checks if an update vector changes an ordering field of an index record.
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings!
-@return TRUE if update vector may change an ordering field in an index
-record */
-UNIV_INTERN
-ibool
-row_upd_changes_some_index_ord_field_binary(
-/*========================================*/
- const dict_table_t* table, /*!< in: table */
- const upd_t* update);/*!< in: update vector for the row */
-/***********************************************************//**
-Updates a row in a table. This is a high-level function used
-in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_upd_step(
-/*=========*/
- que_thr_t* thr); /*!< in: query thread */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Parses the log data of system field values.
-@return log data end or NULL */
-UNIV_INTERN
-byte*
-row_upd_parse_sys_vals(
-/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint* pos, /*!< out: TRX_ID position in record */
- trx_id_t* trx_id, /*!< out: trx id */
- roll_ptr_t* roll_ptr);/*!< out: roll ptr */
-/*********************************************************************//**
-Updates the trx id and roll ptr field in a clustered index record in database
-recovery. */
-UNIV_INTERN
-void
-row_upd_rec_sys_fields_in_recovery(
-/*===============================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint pos, /*!< in: TRX_ID position in rec */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */
-/*********************************************************************//**
-Parses the log data written by row_upd_index_write_log.
-@return log data end or NULL */
-UNIV_INTERN
-byte*
-row_upd_index_parse(
-/*================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- mem_heap_t* heap, /*!< in: memory heap where update vector is
- built */
- upd_t** update_out);/*!< out: update vector */
-
-
-/* Update vector field */
-struct upd_field_struct{
- unsigned field_no:16; /*!< field number in an index, usually
- the clustered index, but in updating
- a secondary index record in btr0cur.c
- this is the position in the secondary
- index */
-#ifndef UNIV_HOTBACKUP
- unsigned orig_len:16; /*!< original length of the locally
- stored part of an externally stored
- column, or 0 */
- que_node_t* exp; /*!< expression for calculating a new
- value: it refers to column values and
- constants in the symbol table of the
- query graph */
-#endif /* !UNIV_HOTBACKUP */
- dfield_t new_val; /*!< new value for the column */
-};
-
-/* Update vector structure */
-struct upd_struct{
- ulint info_bits; /*!< new value of info bits to record;
- default is 0 */
- ulint n_fields; /*!< number of update fields */
- upd_field_t* fields; /*!< array of update fields */
-};
-
-#ifndef UNIV_HOTBACKUP
-/* Update node structure which also implements the delete operation
-of a row */
-
-struct upd_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_UPDATE */
- ibool is_delete;/* TRUE if delete, FALSE if update */
- ibool searched_update;
- /* TRUE if searched update, FALSE if
- positioned */
- ibool in_mysql_interface;
- /* TRUE if the update node was created
- for the MySQL interface */
- dict_foreign_t* foreign;/* NULL or pointer to a foreign key
- constraint if this update node is used in
- doing an ON DELETE or ON UPDATE operation */
- upd_node_t* cascade_node;/* NULL or an update node template which
- is used to implement ON DELETE/UPDATE CASCADE
- or ... SET NULL for foreign keys */
- mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade
- node is created */
- sel_node_t* select; /*!< query graph subtree implementing a base
- table cursor: the rows returned will be
- updated */
- btr_pcur_t* pcur; /*!< persistent cursor placed on the clustered
- index record which should be updated or
- deleted; the cursor is stored in the graph
- of 'select' field above, except in the case
- of the MySQL interface */
- dict_table_t* table; /*!< table where updated */
- upd_t* update; /*!< update vector for the row */
- ulint update_n_fields;
- /* when this struct is used to implement
- a cascade operation for foreign keys, we store
- here the size of the buffer allocated for use
- as the update vector */
- sym_node_list_t columns;/* symbol table nodes for the columns
- to retrieve from the table */
- ibool has_clust_rec_x_lock;
- /* TRUE if the select which retrieves the
- records to update already sets an x-lock on
- the clustered record; note that it must always
- set at least an s-lock */
- ulint cmpl_info;/* information extracted during query
- compilation; speeds up execution:
- UPD_NODE_NO_ORD_CHANGE and
- UPD_NODE_NO_SIZE_CHANGE, ORed */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /*!< node execution state */
- dict_index_t* index; /*!< NULL, or the next index whose record should
- be updated */
- dtuple_t* row; /*!< NULL, or a copy (also fields copied to
- heap) of the row to update; this must be reset
- to NULL after a successful update */
- row_ext_t* ext; /*!< NULL, or prefixes of the externally
- stored columns in the old row */
- dtuple_t* upd_row;/* NULL, or a copy of the updated row */
- row_ext_t* upd_ext;/* NULL, or prefixes of the externally
- stored columns in upd_row */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage;
- this must be emptied after a successful
- update */
- /*----------------------*/
- sym_node_t* table_sym;/* table node in symbol table */
- que_node_t* col_assign_list;
- /* column assignment list */
- ulint magic_n;
-};
-
-#define UPD_NODE_MAGIC_N 1579975
-
-/* Node execution states */
-#define UPD_NODE_SET_IX_LOCK 1 /* execution came to the node from
- a node above and if the field
- has_clust_rec_x_lock is FALSE, we
- should set an intention x-lock on
- the table */
-#define UPD_NODE_UPDATE_CLUSTERED 2 /* clustered index record should be
- updated */
-#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be
- inserted, old record is already delete
- marked */
-#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered
- index record was changed, or this is
- a delete operation: should update
- all the secondary index records */
-#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be
- looked at and updated if an ordering
- field changed */
-
-/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
-#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be
- changed in the update and no ordering
- field of the clustered index */
-#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be
- changed in the update */
-
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "row0upd.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0upd.ic b/storage/innodb_plugin/include/row0upd.ic
deleted file mode 100644
index 18e22f1eca9..00000000000
--- a/storage/innodb_plugin/include/row0upd.ic
+++ /dev/null
@@ -1,184 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0upd.ic
-Update of a row
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0log.h"
-#ifndef UNIV_HOTBACKUP
-# include "trx0trx.h"
-# include "trx0undo.h"
-# include "row0row.h"
-# include "btr0sea.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "page0zip.h"
-
-/*********************************************************************//**
-Creates an update vector object.
-@return own: update vector object */
-UNIV_INLINE
-upd_t*
-upd_create(
-/*=======*/
- ulint n, /*!< in: number of fields */
- mem_heap_t* heap) /*!< in: heap from which memory allocated */
-{
- upd_t* update;
-
- update = (upd_t*) mem_heap_alloc(heap, sizeof(upd_t));
-
- update->info_bits = 0;
- update->n_fields = n;
- update->fields = (upd_field_t*)
- mem_heap_alloc(heap, sizeof(upd_field_t) * n);
-
- return(update);
-}
-
-/*********************************************************************//**
-Returns the number of fields in the update vector == number of columns
-to be updated by an update vector.
-@return number of fields */
-UNIV_INLINE
-ulint
-upd_get_n_fields(
-/*=============*/
- const upd_t* update) /*!< in: update vector */
-{
- ut_ad(update);
-
- return(update->n_fields);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Returns the nth field of an update vector.
-@return update vector field */
-UNIV_INLINE
-upd_field_t*
-upd_get_nth_field(
-/*==============*/
- const upd_t* update, /*!< in: update vector */
- ulint n) /*!< in: field position in update vector */
-{
- ut_ad(update);
- ut_ad(n < update->n_fields);
-
- return((upd_field_t*) update->fields + n);
-}
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Sets an index field number to be updated by an update vector field. */
-UNIV_INLINE
-void
-upd_field_set_field_no(
-/*===================*/
- upd_field_t* upd_field, /*!< in: update vector field */
- ulint field_no, /*!< in: field number in a clustered
- index */
- dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction */
-{
- upd_field->field_no = field_no;
- upd_field->orig_len = 0;
-
- if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) {
- fprintf(stderr,
- "InnoDB: Error: trying to access field %lu in ",
- (ulong) field_no);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, "\n"
- "InnoDB: but index only has %lu fields\n",
- (ulong) dict_index_get_n_fields(index));
- }
-
- dict_col_copy_type(dict_index_get_nth_col(index, field_no),
- dfield_get_type(&upd_field->new_val));
-}
-
-/*********************************************************************//**
-Returns a field of an update vector by field_no.
-@return update vector field, or NULL */
-UNIV_INLINE
-const upd_field_t*
-upd_get_field_by_field_no(
-/*======================*/
- const upd_t* update, /*!< in: update vector */
- ulint no) /*!< in: field_no */
-{
- ulint i;
- for (i = 0; i < upd_get_n_fields(update); i++) {
- const upd_field_t* uf = upd_get_nth_field(update, i);
-
- if (uf->field_no == no) {
-
- return(uf);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Updates the trx id and roll ptr field in a clustered index record when
-a row is updated or marked deleted. */
-UNIV_INLINE
-void
-row_upd_rec_sys_fields(
-/*===================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
-#ifdef UNIV_SYNC_DEBUG
- if (!rw_lock_own(&btr_search_latch, RW_LOCK_EX)) {
- ut_ad(!buf_block_align(rec)->is_hashed);
- }
-#endif /* UNIV_SYNC_DEBUG */
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- ulint pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
- page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets,
- pos, trx->id, roll_ptr);
- } else {
- ulint offset = index->trx_id_offset;
-
- if (!offset) {
- offset = row_get_trx_id_offset(rec, index, offsets);
- }
-
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
- trx_write_trx_id(rec + offset, trx->id);
- trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/row0vers.h b/storage/innodb_plugin/include/row0vers.h
deleted file mode 100644
index 5a2e38230d5..00000000000
--- a/storage/innodb_plugin/include/row0vers.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0vers.h
-Row versions
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0vers_h
-#define row0vers_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "rem0types.h"
-#include "mtr0mtr.h"
-#include "read0types.h"
-
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
-index record. NOTE: the kernel mutex is temporarily released in this
-function!
-@return NULL if committed, else the active transaction */
-UNIV_INTERN
-trx_t*
-row_vers_impl_x_locked_off_kernel(
-/*==============================*/
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
-/*****************************************************************//**
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view.
-@return TRUE if earlier version should be preserved */
-UNIV_INTERN
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
- trx_id_t trx_id, /*!< in: transaction id in the version */
- mtr_t* mtr); /*!< in: mtr holding the latch on the
- clustered index record; it will also
- hold the latch on purge_view */
-/*****************************************************************//**
-Finds out if a version of the record, where the version >= the current
-purge view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE.
-@return TRUE if earlier version should have */
-UNIV_INTERN
-ibool
-row_vers_old_has_index_entry(
-/*=========================*/
- ibool also_curr,/*!< in: TRUE if also rec is included in the
- versions to search; otherwise only versions
- prior to it are searched */
- const rec_t* rec, /*!< in: record in the clustered index; the
- caller must have a latch on the page */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /*!< in: the secondary index */
- const dtuple_t* ientry);/*!< in: the secondary index entry */
-/*****************************************************************//**
-Constructs the version of a clustered index record which a consistent
-read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
-ulint
-row_vers_build_for_consistent_read(
-/*===============================*/
- const rec_t* rec, /*!< in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
- rec_get_offsets(rec, index) */
- read_view_t* view, /*!< in: the consistent read view */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
- *old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- rec_t** old_vers);/*!< out, own: old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-
-/*****************************************************************//**
-Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
-ulint
-row_vers_build_for_semi_consistent_read(
-/*====================================*/
- const rec_t* rec, /*!< in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec */
- dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
- rec_get_offsets(rec, index) */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
- *old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- const rec_t** old_vers);/*!< out: rec, old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-
-
-#ifndef UNIV_NONINL
-#include "row0vers.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/row0vers.ic b/storage/innodb_plugin/include/row0vers.ic
deleted file mode 100644
index 8bb3a5c0cb3..00000000000
--- a/storage/innodb_plugin/include/row0vers.ic
+++ /dev/null
@@ -1,30 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0vers.ic
-Row versions
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-#include "dict0dict.h"
-#include "read0read.h"
-#include "page0page.h"
-#include "log0recv.h"
diff --git a/storage/innodb_plugin/include/srv0que.h b/storage/innodb_plugin/include/srv0que.h
deleted file mode 100644
index 82ee7739ef7..00000000000
--- a/storage/innodb_plugin/include/srv0que.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/srv0que.h
-Server query execution
-
-Created 6/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef srv0que_h
-#define srv0que_h
-
-#include "univ.i"
-#include "que0types.h"
-
-/**********************************************************************//**
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-UNIV_INTERN
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr); /*!< in: query thread */
-
-#endif
-
diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
deleted file mode 100644
index 228c9f6600a..00000000000
--- a/storage/innodb_plugin/include/srv0srv.h
+++ /dev/null
@@ -1,660 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, 2009, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
-
-/**************************************************//**
-@file include/srv0srv.h
-The server main program
-
-Created 10/10/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef srv0srv_h
-#define srv0srv_h
-
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-#include "sync0sync.h"
-#include "os0sync.h"
-#include "que0types.h"
-#include "trx0types.h"
-
-extern const char* srv_main_thread_op_info;
-
-/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
-extern const char srv_mysql50_table_name_prefix[9];
-
-/* When this event is set the lock timeout and InnoDB monitor
-thread starts running */
-extern os_event_t srv_lock_timeout_thread_event;
-
-/* If the last data file is auto-extended, we add this many pages to it
-at a time */
-#define SRV_AUTO_EXTEND_INCREMENT \
- (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
-
-/* This is set to TRUE if the MySQL user has set it in MySQL */
-extern ibool srv_lower_case_table_names;
-
-/* Mutex for locking srv_monitor_file */
-extern mutex_t srv_monitor_file_mutex;
-/* Temporary file for innodb monitor output */
-extern FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-extern mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-extern FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-extern mutex_t srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
-extern FILE* srv_misc_tmpfile;
-
-/* Server parameters which are read from the initfile */
-
-extern char* srv_data_home;
-#ifdef UNIV_LOG_ARCHIVE
-extern char* srv_arch_dir;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/** store to its own file each table created by an user; data
-dictionary tables are in the system tablespace 0 */
-#ifndef UNIV_HOTBACKUP
-extern my_bool srv_file_per_table;
-#else
-extern ibool srv_file_per_table;
-#endif /* UNIV_HOTBACKUP */
-/** The file format to use on new *.ibd files. */
-extern ulint srv_file_format;
-/** Whether to check file format during startup. A value of
-DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
-set it to the highest format we support. */
-extern ulint srv_check_file_format_at_startup;
-/** Place locks to records only i.e. do not use next-key locking except
-on duplicate key checking and foreign key checking */
-extern ibool srv_locks_unsafe_for_binlog;
-#endif /* !UNIV_HOTBACKUP */
-
-extern ulint srv_n_data_files;
-extern char** srv_data_file_names;
-extern ulint* srv_data_file_sizes;
-extern ulint* srv_data_file_is_raw_partition;
-
-extern ibool srv_auto_extend_last_data_file;
-extern ulint srv_last_file_size_max;
-extern char** srv_log_group_home_dirs;
-#ifndef UNIV_HOTBACKUP
-extern ulong srv_auto_extend_increment;
-
-extern ibool srv_created_new_raw;
-
-extern ulint srv_n_log_groups;
-extern ulint srv_n_log_files;
-extern ulint srv_log_file_size;
-extern ulint srv_log_buffer_size;
-extern ulong srv_flush_log_at_trx_commit;
-extern char srv_adaptive_flushing;
-
-
-/* The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-extern const byte* srv_latin1_ordering;
-#ifndef UNIV_HOTBACKUP
-extern my_bool srv_use_sys_malloc;
-#else
-extern ibool srv_use_sys_malloc;
-#endif /* UNIV_HOTBACKUP */
-extern ulint srv_buf_pool_size; /*!< requested size in bytes */
-extern ulint srv_buf_pool_old_size; /*!< previously requested size */
-extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */
-extern ulint srv_mem_pool_size;
-extern ulint srv_lock_table_size;
-
-extern ulint srv_n_file_io_threads;
-extern ulong srv_read_ahead_threshold;
-extern ulint srv_n_read_io_threads;
-extern ulint srv_n_write_io_threads;
-
-/* Number of IO operations per second the server can do */
-extern ulong srv_io_capacity;
-/* Returns the number of IO operations that is X percent of the
-capacity. PCT_IO(5) -> returns the number of IO operations that
-is 5% of the max where max is srv_io_capacity. */
-#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0)))
-
-#ifdef UNIV_LOG_ARCHIVE
-extern ibool srv_log_archive_on;
-extern ibool srv_archive_recovery;
-extern dulint srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-extern char* srv_file_flush_method_str;
-extern ulint srv_unix_file_flush_method;
-extern ulint srv_win_file_flush_method;
-
-extern ulint srv_max_n_open_files;
-
-extern ulint srv_max_dirty_pages_pct;
-
-extern ulint srv_force_recovery;
-extern ulong srv_thread_concurrency;
-
-extern ulint srv_max_n_threads;
-
-extern lint srv_conc_n_threads;
-
-extern ulint srv_fast_shutdown; /* If this is 1, do not do a
- purge and index buffer merge.
- If this 2, do not even flush the
- buffer pool to data files at the
- shutdown: we effectively 'crash'
- InnoDB (but lose no committed
- transactions). */
-extern ibool srv_innodb_status;
-
-extern unsigned long long srv_stats_sample_pages;
-
-extern ibool srv_use_doublewrite_buf;
-extern ibool srv_use_checksums;
-
-extern ibool srv_set_thread_priorities;
-extern int srv_query_thread_priority;
-
-extern ulong srv_max_buf_pool_modified_pct;
-extern ulong srv_max_purge_lag;
-
-extern ulong srv_replication_delay;
-/*-------------------------------------------*/
-
-extern ulint srv_n_rows_inserted;
-extern ulint srv_n_rows_updated;
-extern ulint srv_n_rows_deleted;
-extern ulint srv_n_rows_read;
-
-extern ibool srv_print_innodb_monitor;
-extern ibool srv_print_innodb_lock_monitor;
-extern ibool srv_print_innodb_tablespace_monitor;
-extern ibool srv_print_verbose_log;
-extern ibool srv_print_innodb_table_monitor;
-
-extern ibool srv_lock_timeout_and_monitor_active;
-extern ibool srv_error_monitor_active;
-
-extern ulong srv_n_spin_wait_rounds;
-extern ulong srv_n_free_tickets_to_enter;
-extern ulong srv_thread_sleep_delay;
-extern ulong srv_spin_wait_delay;
-extern ibool srv_priority_boost;
-
-extern ulint srv_mem_pool_size;
-extern ulint srv_lock_table_size;
-
-#ifdef UNIV_DEBUG
-extern ibool srv_print_thread_releases;
-extern ibool srv_print_lock_waits;
-extern ibool srv_print_buf_io;
-extern ibool srv_print_log_io;
-extern ibool srv_print_latch_waits;
-#else /* UNIV_DEBUG */
-# define srv_print_thread_releases FALSE
-# define srv_print_lock_waits FALSE
-# define srv_print_buf_io FALSE
-# define srv_print_log_io FALSE
-# define srv_print_latch_waits FALSE
-#endif /* UNIV_DEBUG */
-
-extern ulint srv_activity_count;
-extern ulint srv_fatal_semaphore_wait_threshold;
-extern ulint srv_dml_needed_delay;
-
-extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
- query threads, and lock table: we allocate
- it from dynamic memory to get it to the
- same DRAM page as other hotspot semaphores */
-#define kernel_mutex (*kernel_mutex_temp)
-
-#define SRV_MAX_N_IO_THREADS 130
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-extern const char* srv_io_thread_op_info[];
-extern const char* srv_io_thread_function[];
-
-/* the number of the log write requests done */
-extern ulint srv_log_write_requests;
-
-/* the number of physical writes to the log performed */
-extern ulint srv_log_writes;
-
-/* amount of data written to the log files in bytes */
-extern ulint srv_os_log_written;
-
-/* amount of writes being done to the log files */
-extern ulint srv_os_log_pending_writes;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-extern ulint srv_log_waits;
-
-/* variable that counts amount of data read in total (in bytes) */
-extern ulint srv_data_read;
-
-/* here we count the amount of data written in total (in bytes) */
-extern ulint srv_data_written;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-extern ulint srv_dblwr_writes;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-extern ulint srv_dblwr_pages_written;
-
-/* in this variable we store the number of write requests issued */
-extern ulint srv_buf_pool_write_requests;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-extern ulint srv_buf_pool_wait_free;
-
-/* variable to count the number of pages that were written from the
-buffer pool to disk */
-extern ulint srv_buf_pool_flushed;
-
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-extern ulint srv_buf_pool_reads;
-
-/** Status variables to be passed to MySQL */
-typedef struct export_var_struct export_struc;
-
-/** Status variables to be passed to MySQL */
-extern export_struc export_vars;
-
-/** The server system */
-typedef struct srv_sys_struct srv_sys_t;
-
-/** The server system */
-extern srv_sys_t* srv_sys;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Types of raw partitions in innodb_data_file_path */
-enum {
- SRV_NOT_RAW = 0, /*!< Not a raw partition */
- SRV_NEW_RAW, /*!< A 'newraw' partition, only to be
- initialized */
- SRV_OLD_RAW /*!< An initialized raw partition */
-};
-
-/** Alternatives for the file flush option in Unix; see the InnoDB manual
-about what these mean */
-enum {
- SRV_UNIX_FSYNC = 1, /*!< fsync, the default */
- SRV_UNIX_O_DSYNC, /*!< open log files in O_SYNC mode */
- SRV_UNIX_LITTLESYNC, /*!< do not call os_file_flush()
- when writing data files, but do flush
- after writing to log files */
- SRV_UNIX_NOSYNC, /*!< do not flush after writing */
- SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
- data files */
-};
-
-/** Alternatives for file i/o in Windows */
-enum {
- SRV_WIN_IO_NORMAL = 1, /*!< buffered I/O */
- SRV_WIN_IO_UNBUFFERED /*!< unbuffered I/O; this is the default */
-};
-
-/** Alternatives for srv_force_recovery. Non-zero values are intended
-to help the user get a damaged database up so that he can dump intact
-tables and rows with SELECT INTO OUTFILE. The database must not otherwise
-be used with these options! A bigger number below means that all precautions
-of lower numbers are included. */
-enum {
- SRV_FORCE_IGNORE_CORRUPT = 1, /*!< let the server run even if it
- detects a corrupt page */
- SRV_FORCE_NO_BACKGROUND = 2, /*!< prevent the main thread from
- running: if a crash would occur
- in purge, this prevents it */
- SRV_FORCE_NO_TRX_UNDO = 3, /*!< do not run trx rollback after
- recovery */
- SRV_FORCE_NO_IBUF_MERGE = 4, /*!< prevent also ibuf operations:
- if they would cause a crash, better
- not do them */
- SRV_FORCE_NO_UNDO_LOG_SCAN = 5, /*!< do not look at undo logs when
- starting the database: InnoDB will
- treat even incomplete transactions
- as committed */
- SRV_FORCE_NO_LOG_REDO = 6 /*!< do not do the log roll-forward
- in connection with recovery */
-};
-
-#ifndef UNIV_HOTBACKUP
-/** Types of threads existing in the system. */
-enum srv_thread_type {
- SRV_COM = 1, /**< threads serving communication and queries */
- SRV_CONSOLE, /**< thread serving console */
- SRV_WORKER, /**< threads serving parallelized queries and
- queries released from lock wait */
-#if 0
- /* Utility threads */
- SRV_BUFFER, /**< thread flushing dirty buffer blocks */
- SRV_RECOVERY, /**< threads finishing a recovery */
- SRV_INSERT, /**< thread flushing the insert buffer to disk */
-#endif
- SRV_MASTER /**< the master thread, (whose type number must
- be biggest) */
-};
-
-/*********************************************************************//**
-Boots Innobase server.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-srv_boot(void);
-/*==========*/
-/*********************************************************************//**
-Initializes the server. */
-UNIV_INTERN
-void
-srv_init(void);
-/*==========*/
-/*********************************************************************//**
-Frees the data structures created in srv_init(). */
-UNIV_INTERN
-void
-srv_free(void);
-/*==========*/
-/*********************************************************************//**
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-UNIV_INTERN
-void
-srv_general_init(void);
-/*==================*/
-/*********************************************************************//**
-Gets the number of threads in the system.
-@return sum of srv_n_threads[] */
-UNIV_INTERN
-ulint
-srv_get_n_threads(void);
-/*===================*/
-/*********************************************************************//**
-Returns the calling thread type.
-@return SRV_COM, ... */
-
-enum srv_thread_type
-srv_get_thread_type(void);
-/*=====================*/
-/*********************************************************************//**
-Sets the info describing an i/o thread current state. */
-UNIV_INTERN
-void
-srv_set_io_thread_op_info(
-/*======================*/
- ulint i, /*!< in: the 'segment' of the i/o thread */
- const char* str); /*!< in: constant char string describing the
- state */
-/*********************************************************************//**
-Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller!
-@return number of threads released: this may be less than n if not
-enough threads were suspended at the moment */
-UNIV_INTERN
-ulint
-srv_release_threads(
-/*================*/
- enum srv_thread_type type, /*!< in: thread type */
- ulint n); /*!< in: number of threads to release */
-/*********************************************************************//**
-The master thread controlling the server.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_master_thread(
-/*==============*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-/*******************************************************************//**
-Tells the Innobase server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the kernel
-mutex, for performace reasons). */
-UNIV_INTERN
-void
-srv_active_wake_master_thread(void);
-/*===============================*/
-/*******************************************************************//**
-Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
-void
-srv_wake_master_thread(void);
-/*========================*/
-/*********************************************************************//**
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-/*********************************************************************//**
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
-void
-srv_conc_force_enter_innodb(
-/*========================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-/*********************************************************************//**
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-UNIV_INTERN
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-/*********************************************************************//**
-This must be called when a thread exits InnoDB. */
-UNIV_INTERN
-void
-srv_conc_exit_innodb(
-/*=================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-/***************************************************************//**
-Puts a MySQL OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-UNIV_INTERN
-void
-srv_suspend_mysql_thread(
-/*=====================*/
- que_thr_t* thr); /*!< in: query thread associated with the MySQL
- OS thread */
-/********************************************************************//**
-Releases a MySQL OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-UNIV_INTERN
-void
-srv_release_mysql_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr); /*!< in: query thread associated with the
- MySQL OS thread */
-/*********************************************************************//**
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-/*********************************************************************//**
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_error_monitor_thread(
-/*=====================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-/******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor. */
-UNIV_INTERN
-void
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file, /*!< in: output stream */
- ulint* trx_start, /*!< out: file position of the start of
- the list of active transactions */
- ulint* trx_end); /*!< out: file position of the end of
- the list of active transactions */
-
-/******************************************************************//**
-Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
-void
-srv_export_innodb_status(void);
-/*==========================*/
-
-/** Thread slot in the thread table */
-typedef struct srv_slot_struct srv_slot_t;
-
-/** Thread table is an array of slots */
-typedef srv_slot_t srv_table_t;
-
-/** Status variables to be passed to MySQL */
-struct export_var_struct{
- ulint innodb_data_pending_reads; /*!< Pending reads */
- ulint innodb_data_pending_writes; /*!< Pending writes */
- ulint innodb_data_pending_fsyncs; /*!< Pending fsyncs */
- ulint innodb_data_fsyncs; /*!< Number of fsyncs so far */
- ulint innodb_data_read; /*!< Data bytes read */
- ulint innodb_data_writes; /*!< I/O write requests */
- ulint innodb_data_written; /*!< Data bytes written */
- ulint innodb_data_reads; /*!< I/O read requests */
- ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
- ulint innodb_buffer_pool_pages_data; /*!< Data pages */
- ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
- ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */
- ulint innodb_buffer_pool_pages_free; /*!< Free pages */
-#ifdef UNIV_DEBUG
- ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
-#endif /* UNIV_DEBUG */
- ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */
- ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
- ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
- ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
- ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
- ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
- ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
- ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
- ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
- ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */
- ulint innodb_log_waits; /*!< srv_log_waits */
- ulint innodb_log_write_requests; /*!< srv_log_write_requests */
- ulint innodb_log_writes; /*!< srv_log_writes */
- ulint innodb_os_log_written; /*!< srv_os_log_written */
- ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */
- ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
- ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */
- ulint innodb_page_size; /*!< UNIV_PAGE_SIZE */
- ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */
- ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read */
- ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */
- ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
- ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
- ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
- / 1000 */
- ulint innodb_row_lock_time_avg; /*!< srv_n_lock_wait_time
- / 1000
- / srv_n_lock_wait_count */
- ulint innodb_row_lock_time_max; /*!< srv_n_lock_max_wait_time
- / 1000 */
- ulint innodb_rows_read; /*!< srv_n_rows_read */
- ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */
- ulint innodb_rows_updated; /*!< srv_n_rows_updated */
- ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
-};
-
-/** The server system struct */
-struct srv_sys_struct{
- srv_table_t* threads; /*!< server thread table */
- UT_LIST_BASE_NODE_T(que_thr_t)
- tasks; /*!< task queue */
-};
-
-extern ulint srv_n_threads_active[];
-#else /* !UNIV_HOTBACKUP */
-# define srv_use_checksums TRUE
-# define srv_use_adaptive_hash_indexes FALSE
-# define srv_force_recovery 0UL
-# define srv_set_io_thread_op_info(t,info) ((void) 0)
-# define srv_is_being_started 0
-# define srv_win_file_flush_method SRV_WIN_IO_UNBUFFERED
-# define srv_unix_file_flush_method SRV_UNIX_O_DSYNC
-# define srv_start_raw_disk_in_use 0
-# define srv_file_per_table 1
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/innodb_plugin/include/srv0srv.ic b/storage/innodb_plugin/include/srv0srv.ic
deleted file mode 100644
index 8a1a678a016..00000000000
--- a/storage/innodb_plugin/include/srv0srv.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/srv0srv.ic
-Server main program
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/srv0start.h b/storage/innodb_plugin/include/srv0start.h
deleted file mode 100644
index 8abf15da9c1..00000000000
--- a/storage/innodb_plugin/include/srv0start.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/srv0start.h
-Starts the Innobase database server
-
-Created 10/10/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef srv0start_h
-#define srv0start_h
-
-#include "univ.i"
-#include "ut0byte.h"
-
-/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str); /*!< in/out: null-terminated character string */
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- char* str); /*!< in/out: the data file path string */
-/*********************************************************************//**
-Reads log group home directories from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
- char* str); /*!< in/out: character string */
-/*********************************************************************//**
-Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
-and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
-void
-srv_free_paths_and_sizes(void);
-/*==========================*/
-/*********************************************************************//**
-Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty.
-@return string which has the separator if the string is not empty */
-UNIV_INTERN
-char*
-srv_add_path_separator_if_needed(
-/*=============================*/
- char* str); /*!< in: null-terminated character string */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Starts Innobase and creates a new database if database files
-are not found and the user wants.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-int
-innobase_start_or_create_for_mysql(void);
-/*====================================*/
-/****************************************************************//**
-Shuts down the Innobase database.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-int
-innobase_shutdown_for_mysql(void);
-/*=============================*/
-/** Log sequence number at shutdown */
-extern ib_uint64_t srv_shutdown_lsn;
-/** Log sequence number immediately after startup */
-extern ib_uint64_t srv_start_lsn;
-
-#ifdef __NETWARE__
-void set_panic_flag_for_netware(void);
-#endif
-
-#ifdef HAVE_DARWIN_THREADS
-/** TRUE if the F_FULLFSYNC option is available */
-extern ibool srv_have_fullfsync;
-#endif
-
-/** TRUE if the server is being started */
-extern ibool srv_is_being_started;
-/** TRUE if the server was successfully started */
-extern ibool srv_was_started;
-/** TRUE if the server is being started, before rolling back any
-incomplete transactions */
-extern ibool srv_startup_is_before_trx_rollback_phase;
-
-/** TRUE if a raw partition is in use */
-extern ibool srv_start_raw_disk_in_use;
-
-
-/** Shutdown state */
-enum srv_shutdown_state {
- SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */
- SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in
- logs_empty_and_mark_files_at_shutdown() */
- SRV_SHUTDOWN_LAST_PHASE,/*!< Last phase after ensuring that
- the buffer pool can be freed: flush
- all file spaces and close all files */
- SRV_SHUTDOWN_EXIT_THREADS/*!< Exit all threads */
-};
-
-/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
-SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-extern enum srv_shutdown_state srv_shutdown_state;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Log 'spaces' have id's >= this */
-#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
-
-#endif
diff --git a/storage/innodb_plugin/include/sync0arr.h b/storage/innodb_plugin/include/sync0arr.h
deleted file mode 100644
index 5f1280f5e28..00000000000
--- a/storage/innodb_plugin/include/sync0arr.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0arr.h
-The wait array used in synchronization primitives
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0arr_h
-#define sync0arr_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-
-/** Synchronization wait array cell */
-typedef struct sync_cell_struct sync_cell_t;
-/** Synchronization wait array */
-typedef struct sync_array_struct sync_array_t;
-
-/** Parameters for sync_array_create() @{ */
-#define SYNC_ARRAY_OS_MUTEX 1 /*!< protected by os_mutex_t */
-#define SYNC_ARRAY_MUTEX 2 /*!< protected by mutex_t */
-/* @} */
-
-/*******************************************************************//**
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called.
-@return own: created wait array */
-UNIV_INTERN
-sync_array_t*
-sync_array_create(
-/*==============*/
- ulint n_cells, /*!< in: number of cells in the array
- to create */
- ulint protection); /*!< in: either SYNC_ARRAY_OS_MUTEX or
- SYNC_ARRAY_MUTEX: determines the type
- of mutex protecting the data structure */
-/******************************************************************//**
-Frees the resources in a wait array. */
-UNIV_INTERN
-void
-sync_array_free(
-/*============*/
- sync_array_t* arr); /*!< in, own: sync wait array */
-/******************************************************************//**
-Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state. */
-UNIV_INTERN
-void
-sync_array_reserve_cell(
-/*====================*/
- sync_array_t* arr, /*!< in: wait array */
- void* object, /*!< in: pointer to the object to wait for */
- ulint type, /*!< in: lock request type */
- const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index); /*!< out: index of the reserved cell */
-/******************************************************************//**
-This function should be called when a thread starts to wait on
-a wait array cell. In the debug version this function checks
-if the wait for a semaphore will result in a deadlock, in which
-case prints info and asserts. */
-UNIV_INTERN
-void
-sync_array_wait_event(
-/*==================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index); /*!< in: index of the reserved cell */
-/******************************************************************//**
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
-UNIV_INTERN
-void
-sync_array_free_cell(
-/*=================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index); /*!< in: index of the cell in array */
-/**********************************************************************//**
-Note that one of the wait objects was signalled. */
-UNIV_INTERN
-void
-sync_array_object_signalled(
-/*========================*/
- sync_array_t* arr); /*!< in: wait array */
-/**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server. */
-UNIV_INTERN
-void
-sync_arr_wake_threads_if_sema_free(void);
-/*====================================*/
-/**********************************************************************//**
-Prints warnings of long semaphore waits to stderr.
-@return TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
-ibool
-sync_array_print_long_waits(void);
-/*=============================*/
-/********************************************************************//**
-Validates the integrity of the wait array. Checks
-that the number of reserved cells equals the count variable. */
-UNIV_INTERN
-void
-sync_array_validate(
-/*================*/
- sync_array_t* arr); /*!< in: sync wait array */
-/**********************************************************************//**
-Prints info of the wait array. */
-UNIV_INTERN
-void
-sync_array_print_info(
-/*==================*/
- FILE* file, /*!< in: file where to print */
- sync_array_t* arr); /*!< in: wait array */
-
-
-#ifndef UNIV_NONINL
-#include "sync0arr.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/sync0arr.ic b/storage/innodb_plugin/include/sync0arr.ic
deleted file mode 100644
index bf57f5b2dc2..00000000000
--- a/storage/innodb_plugin/include/sync0arr.ic
+++ /dev/null
@@ -1,27 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0arr.ic
-The wait array for synchronization primitives
-
-Inline code
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innodb_plugin/include/sync0rw.h b/storage/innodb_plugin/include/sync0rw.h
deleted file mode 100644
index aedfd5f3f86..00000000000
--- a/storage/innodb_plugin/include/sync0rw.h
+++ /dev/null
@@ -1,585 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0rw.h
-The read-write lock (for threads, not for database transactions)
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0rw_h
-#define sync0rw_h
-
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-#include "ut0lst.h"
-#include "sync0sync.h"
-#include "os0sync.h"
-
-/* The following undef is to prevent a name conflict with a macro
-in MySQL: */
-#undef rw_lock_t
-#endif /* !UNIV_HOTBACKUP */
-
-/* Latch types; these are used also in btr0btr.h: keep the numerical values
-smaller than 30 and the order of the numerical values like below! */
-#define RW_S_LATCH 1
-#define RW_X_LATCH 2
-#define RW_NO_LATCH 3
-
-#ifndef UNIV_HOTBACKUP
-/* We decrement lock_word by this amount for each x_lock. It is also the
-start value for the lock_word, meaning that it limits the maximum number
-of concurrent read locks before the rw_lock breaks. The current value of
-0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
-#define X_LOCK_DECR 0x00100000
-
-typedef struct rw_lock_struct rw_lock_t;
-#ifdef UNIV_SYNC_DEBUG
-typedef struct rw_lock_debug_struct rw_lock_debug_t;
-#endif /* UNIV_SYNC_DEBUG */
-
-typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t;
-
-extern rw_lock_list_t rw_lock_list;
-extern mutex_t rw_lock_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-
-acquired in addition to the mutex protecting the lock. */
-extern mutex_t rw_lock_debug_mutex;
-extern os_event_t rw_lock_debug_event; /*!< If deadlock detection does
- not get immediately the mutex it
- may wait for this event */
-extern ibool rw_lock_debug_waiters; /*!< This is set to TRUE, if
- there may be waiters for the event */
-#endif /* UNIV_SYNC_DEBUG */
-
-/** number of spin waits on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_s_spin_wait_count;
-/** number of spin loop rounds on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_s_spin_round_count;
-/** number of unlocks (that unlock shared locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-extern ib_int64_t rw_s_exit_count;
-/** number of OS waits on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_s_os_wait_count;
-/** number of spin waits on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_x_spin_wait_count;
-/** number of spin loop rounds on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_x_spin_round_count;
-/** number of OS waits on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_x_os_wait_count;
-/** number of unlocks (that unlock exclusive locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-extern ib_int64_t rw_x_exit_count;
-
-/******************************************************************//**
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
-# else /* UNIV_SYNC_DEBUG */
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), #L, __FILE__, __LINE__)
-# endif /* UNIV_SYNC_DEBUG */
-#else /* UNIV_DEBUG */
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), __FILE__, __LINE__)
-#endif /* UNIV_DEBUG */
-
-/******************************************************************//**
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-rw_lock_create_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-/******************************************************************//**
-Calling this function is obligatory only if the memory buffer containing
-the rw-lock is freed. Removes an rw-lock object from the global list. The
-rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
-void
-rw_lock_free(
-/*=========*/
- rw_lock_t* lock); /*!< in: rw-lock */
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks.
-@return TRUE */
-UNIV_INTERN
-ibool
-rw_lock_validate(
-/*=============*/
- rw_lock_t* lock); /*!< in: rw-lock */
-#endif /* UNIV_DEBUG */
-/**************************************************************//**
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock(M) rw_lock_s_lock_func(\
- (M), 0, __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\
- (M), (P), __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\
- (M), 0, (F), (L))
-/******************************************************************//**
-Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_low(
-/*===============*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass __attribute__((unused)),
- /*!< in: pass value; != 0, if the lock will be
- passed to another thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function, except if
-you supply the file name and line number. Lock an rw-lock in shared mode
-for the current thread. If the rw-lock is locked in exclusive mode, or
-there is an exclusive lock request waiting, the function spins a preset
-time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
-suspending the thread. */
-UNIV_INLINE
-void
-rw_lock_s_lock_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_func_nowait(
-/*=======================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Releases a shared mode lock. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock); /*!< in/out: rw-lock */
-
-#ifdef UNIV_SYNC_DEBUG
-# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L)
-#else
-# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
-#endif
-/*******************************************************************//**
-Releases a shared mode lock. */
-#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0)
-
-/**************************************************************//**
-NOTE! The following macro should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock(M) rw_lock_x_lock_func(\
- (M), 0, __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macro should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\
- (M), (P), __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macros should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\
- (M), __FILE__, __LINE__)
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread. If the rw-lock is locked
-in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock, before suspending the thread. If the same thread has an x-lock
-on the rw-lock, locking succeed, with the following exception: if pass != 0,
-only a single x-lock may be taken on the lock. NOTE: If the same thread has
-an s-lock, locking does not succeed! */
-UNIV_INTERN
-void
-rw_lock_x_lock_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Releases an exclusive mode lock. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock); /*!< in/out: rw-lock */
-
-#ifdef UNIV_SYNC_DEBUG
-# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
-#else
-# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
-#endif
-/*******************************************************************//**
-Releases an exclusive mode lock. */
-#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0)
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line); /*!< in: line where lock requested */
-/******************************************************************//**
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line); /*!< in: line where lock requested */
-/******************************************************************//**
-This function is used in the insert buffer to move the ownership of an
-x-latch on a buffer frame to the current thread. The x-latch was set by
-the buffer read operation and it protected the buffer frame while the
-read was done. The ownership is moved because we want that the current
-thread is able to acquire a second x-latch which is stored in an mtr.
-This, in turn, is needed to pass the debug checks of index page
-operations. */
-UNIV_INTERN
-void
-rw_lock_x_lock_move_ownership(
-/*==========================*/
- rw_lock_t* lock); /*!< in: lock which was x-locked in the
- buffer read */
-/******************************************************************//**
-Releases a shared mode lock when we know there are no waiters and none
-else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_direct(
-/*====================*/
- rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock durint the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_direct(
-/*====================*/
- rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call.
-@return value of writer_count */
-UNIV_INLINE
-ulint
-rw_lock_get_x_lock_count(
-/*=====================*/
- const rw_lock_t* lock); /*!< in: rw-lock */
-/********************************************************************//**
-Check if there are threads waiting for the rw-lock.
-@return 1 if waiters, 0 otherwise */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- const rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Returns the write-status of the lock - this function made more sense
-with the old rw_lock implementation.
-@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
-UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- const rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Returns the number of readers.
-@return number of readers */
-UNIV_INLINE
-ulint
-rw_lock_get_reader_count(
-/*=====================*/
- const rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Decrements lock_word the specified amount if it is greater than 0.
-This is used by both s_lock and x_lock operations.
-@return TRUE if decr occurs */
-UNIV_INLINE
-ibool
-rw_lock_lock_word_decr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount); /*!< in: amount to decrement */
-/******************************************************************//**
-Increments lock_word the specified amount and returns new value.
-@return lock->lock_word after increment */
-UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount); /*!< in: amount to increment */
-/******************************************************************//**
-This function sets the lock->writer_thread and lock->recursive fields.
-For platforms where we are using atomic builtins instead of lock->mutex
-it sets the lock->writer_thread field using atomics to ensure memory
-ordering. Note that it is assumed that the caller of this function
-effectively owns the lock i.e.: nobody else is allowed to modify
-lock->writer_thread at this point in time.
-The protocol is that lock->writer_thread MUST be updated BEFORE the
-lock->recursive flag is set. */
-UNIV_INLINE
-void
-rw_lock_set_writer_id_and_recursion_flag(
-/*=====================================*/
- rw_lock_t* lock, /*!< in/out: lock to work on */
- ibool recursive); /*!< in: TRUE if recursion
- allowed */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Checks if the thread has locked the rw-lock in the specified mode, with
-the pass value == 0. */
-UNIV_INTERN
-ibool
-rw_lock_own(
-/*========*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode. */
-UNIV_INTERN
-ibool
-rw_lock_is_locked(
-/*==============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-#ifdef UNIV_SYNC_DEBUG
-/***************************************************************//**
-Prints debug info of an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_print(
-/*==========*/
- rw_lock_t* lock); /*!< in: rw-lock */
-/***************************************************************//**
-Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
-void
-rw_lock_list_print_info(
-/*====================*/
- FILE* file); /*!< in: file where to print */
-/***************************************************************//**
-Returns the number of currently locked rw-locks.
-Works only in the debug version.
-@return number of locked rw-locks */
-UNIV_INTERN
-ulint
-rw_lock_n_locked(void);
-/*==================*/
-
-/*#####################################################################*/
-
-/******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_enter(void);
-/*==========================*/
-/******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void);
-/*==========================*/
-/*********************************************************************//**
-Prints info of a debug struct. */
-UNIV_INTERN
-void
-rw_lock_debug_print(
-/*================*/
- rw_lock_debug_t* info); /*!< in: debug struct */
-#endif /* UNIV_SYNC_DEBUG */
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! */
-
-/** The structure used in the spin lock implementation of a read-write
-lock. Several threads may have a shared lock simultaneously in this
-lock, but only one writer may have an exclusive lock, in which case no
-shared locks are allowed. To prevent starving of a writer blocked by
-readers, a writer may queue for x-lock by decrementing lock_word: no
-new readers will be let in while the thread waits for readers to
-exit. */
-struct rw_lock_struct {
- volatile lint lock_word;
- /*!< Holds the state of the lock. */
- volatile ulint waiters;/*!< 1: there are waiters */
- volatile ibool recursive;/*!< Default value FALSE which means the lock
- is non-recursive. The value is typically set
- to TRUE making normal rw_locks recursive. In
- case of asynchronous IO, when a non-zero
- value of 'pass' is passed then we keep the
- lock non-recursive.
- This flag also tells us about the state of
- writer_thread field. If this flag is set
- then writer_thread MUST contain the thread
- id of the current x-holder or wait-x thread.
- This flag must be reset in x_unlock
- functions before incrementing the lock_word */
- volatile os_thread_id_t writer_thread;
- /*!< Thread id of writer thread. Is only
- guaranteed to have sane and non-stale
- value iff recursive flag is set. */
- os_event_t event; /*!< Used by sync0arr.c for thread queueing */
- os_event_t wait_ex_event;
- /*!< Event for next-writer to wait on. A thread
- must decrement lock_word before waiting. */
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_t mutex; /*!< The mutex protecting rw_lock_struct */
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-
- UT_LIST_NODE_T(rw_lock_t) list;
- /*!< All allocated rw locks are put into a
- list */
-#ifdef UNIV_SYNC_DEBUG
- UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
- /*!< In the debug version: pointer to the debug
- info list of the lock */
- ulint level; /*!< Level in the global latching order. */
-#endif /* UNIV_SYNC_DEBUG */
- ulint count_os_wait; /*!< Count of os_waits. May not be accurate */
- const char* cfile_name;/*!< File name where lock created */
- /* last s-lock file/line is not guaranteed to be correct */
- const char* last_s_file_name;/*!< File name where last s-locked */
- const char* last_x_file_name;/*!< File name where last x-locked */
- ibool writer_is_wait_ex;
- /*!< This is TRUE if the writer field is
- RW_LOCK_WAIT_EX; this field is located far
- from the memory update hotspot fields which
- are at the start of this struct, thus we can
- peek this field without causing much memory
- bus traffic */
- unsigned cline:14; /*!< Line where created */
- unsigned last_s_line:14; /*!< Line number where last time s-locked */
- unsigned last_x_line:14; /*!< Line number where last time x-locked */
- ulint magic_n; /*!< RW_LOCK_MAGIC_N */
-};
-
-/** Value of rw_lock_struct::magic_n */
-#define RW_LOCK_MAGIC_N 22643
-
-#ifdef UNIV_SYNC_DEBUG
-/** The structure for storing debug info of an rw-lock */
-struct rw_lock_debug_struct {
-
- os_thread_id_t thread_id; /*!< The thread id of the thread which
- locked the rw-lock */
- ulint pass; /*!< Pass value given in the lock operation */
- ulint lock_type; /*!< Type of the lock: RW_LOCK_EX,
- RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
- const char* file_name;/*!< File name where the lock was obtained */
- ulint line; /*!< Line where the rw-lock was locked */
- UT_LIST_NODE_T(rw_lock_debug_t) list;
- /*!< Debug structs are linked in a two-way
- list */
-};
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifndef UNIV_NONINL
-#include "sync0rw.ic"
-#endif
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/innodb_plugin/include/sync0rw.ic b/storage/innodb_plugin/include/sync0rw.ic
deleted file mode 100644
index 7116f1b7c9b..00000000000
--- a/storage/innodb_plugin/include/sync0rw.ic
+++ /dev/null
@@ -1,624 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0rw.ic
-The read-write lock (for threads)
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-/******************************************************************//**
-Lock an rw-lock in shared mode for the current thread. If the rw-lock is
-locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
-waiting for the lock before suspending the thread. */
-UNIV_INTERN
-void
-rw_lock_s_lock_spin(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Inserts the debug information for an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_add_debug_info(
-/*===================*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint pass, /*!< in: pass value */
- ulint lock_type, /*!< in: lock type */
- const char* file_name, /*!< in: file where requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_remove_debug_info(
-/*======================*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint pass, /*!< in: pass value */
- ulint lock_type); /*!< in: lock type */
-#endif /* UNIV_SYNC_DEBUG */
-
-/********************************************************************//**
-Check if there are threads waiting for the rw-lock.
-@return 1 if waiters, 0 otherwise */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- return(lock->waiters);
-}
-
-/********************************************************************//**
-Sets lock->waiters to 1. It is not an error if lock->waiters is already
-1. On platforms where ATOMIC builtins are used this function enforces a
-memory barrier. */
-UNIV_INLINE
-void
-rw_lock_set_waiter_flag(
-/*====================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- os_compare_and_swap_ulint(&lock->waiters, 0, 1);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- lock->waiters = 1;
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/********************************************************************//**
-Resets lock->waiters to 0. It is not an error if lock->waiters is already
-0. On platforms where ATOMIC builtins are used this function enforces a
-memory barrier. */
-UNIV_INLINE
-void
-rw_lock_reset_waiter_flag(
-/*======================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- os_compare_and_swap_ulint(&lock->waiters, 1, 0);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- lock->waiters = 0;
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-Returns the write-status of the lock - this function made more sense
-with the old rw_lock implementation.
-@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
-UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- lint lock_word = lock->lock_word;
- if (lock_word > 0) {
- /* return NOT_LOCKED in s-lock state, like the writer
- member of the old lock implementation. */
- return(RW_LOCK_NOT_LOCKED);
- } else if (((-lock_word) % X_LOCK_DECR) == 0) {
- return(RW_LOCK_EX);
- } else {
- ut_ad(lock_word > -X_LOCK_DECR);
- return(RW_LOCK_WAIT_EX);
- }
-}
-
-/******************************************************************//**
-Returns the number of readers.
-@return number of readers */
-UNIV_INLINE
-ulint
-rw_lock_get_reader_count(
-/*=====================*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- lint lock_word = lock->lock_word;
- if (lock_word > 0) {
- /* s-locked, no x-waiters */
- return(X_LOCK_DECR - lock_word);
- } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
- /* s-locked, with x-waiters */
- return((ulint)(-lock_word));
- }
- return(0);
-}
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
-UNIV_INLINE
-mutex_t*
-rw_lock_get_mutex(
-/*==============*/
- rw_lock_t* lock)
-{
- return(&(lock->mutex));
-}
-#endif
-
-/******************************************************************//**
-Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call.
-@return value of writer_count */
-UNIV_INLINE
-ulint
-rw_lock_get_x_lock_count(
-/*=====================*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- lint lock_copy = lock->lock_word;
- /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
- if (lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
- return(0);
- }
- return(((-lock_copy) / X_LOCK_DECR) + 1);
-}
-
-/******************************************************************//**
-Two different implementations for decrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others. This does
-does not support recusive x-locks: they should be handled by the caller and
-need not be atomic since they are performed by the current lock holder.
-Returns true if the decrement was made, false if not.
-@return TRUE if decr occurs */
-UNIV_INLINE
-ibool
-rw_lock_lock_word_decr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount) /*!< in: amount to decrement */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- lint local_lock_word = lock->lock_word;
- while (local_lock_word > 0) {
- if (os_compare_and_swap_lint(&lock->lock_word,
- local_lock_word,
- local_lock_word - amount)) {
- return(TRUE);
- }
- local_lock_word = lock->lock_word;
- }
- return(FALSE);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- ibool success = FALSE;
- mutex_enter(&(lock->mutex));
- if (lock->lock_word > 0) {
- lock->lock_word -= amount;
- success = TRUE;
- }
- mutex_exit(&(lock->mutex));
- return(success);
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-Increments lock_word the specified amount and returns new value.
-@return lock->lock_word after increment */
-UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount) /*!< in: amount of increment */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- return(os_atomic_increment_lint(&lock->lock_word, amount));
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- lint local_lock_word;
-
- mutex_enter(&(lock->mutex));
-
- lock->lock_word += amount;
- local_lock_word = lock->lock_word;
-
- mutex_exit(&(lock->mutex));
-
- return(local_lock_word);
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-This function sets the lock->writer_thread and lock->recursive fields.
-For platforms where we are using atomic builtins instead of lock->mutex
-it sets the lock->writer_thread field using atomics to ensure memory
-ordering. Note that it is assumed that the caller of this function
-effectively owns the lock i.e.: nobody else is allowed to modify
-lock->writer_thread at this point in time.
-The protocol is that lock->writer_thread MUST be updated BEFORE the
-lock->recursive flag is set. */
-UNIV_INLINE
-void
-rw_lock_set_writer_id_and_recursion_flag(
-/*=====================================*/
- rw_lock_t* lock, /*!< in/out: lock to work on */
- ibool recursive) /*!< in: TRUE if recursion
- allowed */
-{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- os_thread_id_t local_thread;
- ibool success;
-
- /* Prevent Valgrind warnings about writer_thread being
- uninitialized. It does not matter if writer_thread is
- uninitialized, because we are comparing writer_thread against
- itself, and the operation should always succeed. */
- UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread);
-
- local_thread = lock->writer_thread;
- success = os_compare_and_swap_thread_id(
- &lock->writer_thread, local_thread, curr_thread);
- ut_a(success);
- lock->recursive = recursive;
-
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-
- mutex_enter(&lock->mutex);
- lock->writer_thread = curr_thread;
- lock->recursive = recursive;
- mutex_exit(&lock->mutex);
-
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_low(
-/*===============*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass __attribute__((unused)),
- /*!< in: pass value; != 0, if the lock will be
- passed to another thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
- if (!rw_lock_lock_word_decr(lock, 1)) {
- /* Locking did not succeed */
- return(FALSE);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
-#endif
- /* These debugging values are not set safely: they may be incorrect
- or even refer to a line that is invalid for the file name. */
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
- return(TRUE); /* locking succeeded */
-}
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line) /*!< in: line where lock requested */
-{
- ut_ad(lock->lock_word == X_LOCK_DECR);
-
- /* Indicate there is a new reader by decrementing lock_word */
- lock->lock_word--;
-
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line);
-#endif
-}
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line) /*!< in: line where lock requested */
-{
- ut_ad(rw_lock_validate(lock));
- ut_ad(lock->lock_word == X_LOCK_DECR);
-
- lock->lock_word -= X_LOCK_DECR;
- lock->writer_thread = os_thread_get_curr_id();
- lock->recursive = TRUE;
-
- lock->last_x_file_name = file_name;
- lock->last_x_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in shared mode for the current thread. If the rw-lock is locked
-in exclusive mode, or there is an exclusive lock request waiting, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
-the lock, before suspending the thread. */
-UNIV_INLINE
-void
-rw_lock_s_lock_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- /* NOTE: As we do not know the thread ids for threads which have
- s-locked a latch, and s-lockers will be served only after waiting
- x-lock requests have been fulfilled, then if this thread already
- owns an s-lock here, it may end up in a deadlock with another thread
- which requests an x-lock here. Therefore, we will forbid recursive
- s-locking of a latch: the following assert will warn the programmer
- of the possibility of this kind of a deadlock. If we want to implement
- safe recursive s-locking, we should keep in a list the thread ids of
- the threads which have s-locked a latch. This would use some CPU
- time. */
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
-#endif /* UNIV_SYNC_DEBUG */
-
- /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
- if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
-
- return; /* Success */
- } else {
- /* Did not succeed, try spin wait */
-
- rw_lock_s_lock_spin(lock, pass, file_name, line);
-
- return;
- }
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_func_nowait(
-/*=======================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
- ibool success;
-
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- success = os_compare_and_swap_lint(&lock->lock_word, X_LOCK_DECR, 0);
-#else
-
- success = FALSE;
- mutex_enter(&(lock->mutex));
- if (lock->lock_word == X_LOCK_DECR) {
- lock->lock_word = 0;
- success = TRUE;
- }
- mutex_exit(&(lock->mutex));
-
-#endif
- if (success) {
- rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
-
- } else if (lock->recursive
- && os_thread_eq(lock->writer_thread, curr_thread)) {
- /* Relock: this lock_word modification is safe since no other
- threads can modify (lock, unlock, or reserve) lock_word while
- there is an exclusive writer and this is the writer thread. */
- lock->lock_word -= X_LOCK_DECR;
-
- ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
-
- } else {
- /* Failure */
- return(FALSE);
- }
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-
- lock->last_x_file_name = file_name;
- lock->last_x_line = line;
-
- ut_ad(rw_lock_validate(lock));
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Releases a shared mode lock. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
-#endif
-
- /* Increment lock_word to indicate 1 less reader */
- if (rw_lock_lock_word_incr(lock, 1) == 0) {
-
- /* wait_ex waiter exists. It may not be asleep, but we signal
- anyway. We do not wake other waiters, because they can't
- exist without wait_ex waiter and wait_ex waiter goes first.*/
- os_event_set(lock->wait_ex_event);
- sync_array_object_signalled(sync_primary_wait_array);
-
- }
-
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_s_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Releases a shared mode lock when we know there are no waiters and none
-else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_direct(
-/*====================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- ut_ad(lock->lock_word < X_LOCK_DECR);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
-#endif
-
- /* Decrease reader count by incrementing lock_word */
- lock->lock_word++;
-
- ut_ad(!lock->waiters);
- ut_ad(rw_lock_validate(lock));
-#ifdef UNIV_SYNC_PERF_STAT
- rw_s_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Releases an exclusive mode lock. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
-
- /* lock->recursive flag also indicates if lock->writer_thread is
- valid or stale. If we are the last of the recursive callers
- then we must unset lock->recursive flag to indicate that the
- lock->writer_thread is now stale.
- Note that since we still hold the x-lock we can safely read the
- lock_word. */
- if (lock->lock_word == 0) {
- /* Last caller in a possible recursive chain. */
- lock->recursive = FALSE;
- UNIV_MEM_INVALID(&lock->writer_thread,
- sizeof lock->writer_thread);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
-#endif
-
- if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
- /* Lock is now free. May have to signal read/write waiters.
- We do not need to signal wait_ex waiters, since they cannot
- exist when there is a writer. */
- if (lock->waiters) {
- rw_lock_reset_waiter_flag(lock);
- os_event_set(lock->event);
- sync_array_object_signalled(sync_primary_wait_array);
- }
- }
-
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_x_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_direct(
-/*====================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- /* Reset the exclusive lock if this thread no longer has an x-mode
- lock */
-
- ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
-#endif
-
- if (lock->lock_word == 0) {
- lock->recursive = FALSE;
- UNIV_MEM_INVALID(&lock->writer_thread,
- sizeof lock->writer_thread);
- }
-
- lock->lock_word += X_LOCK_DECR;
-
- ut_ad(!lock->waiters);
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_x_exit_count++;
-#endif
-}
diff --git a/storage/innodb_plugin/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h
deleted file mode 100644
index df990823cc4..00000000000
--- a/storage/innodb_plugin/include/sync0sync.h
+++ /dev/null
@@ -1,578 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0sync.h
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0sync_h
-#define sync0sync_h
-
-#include "univ.i"
-#include "sync0types.h"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-#include "os0sync.h"
-#include "sync0arr.h"
-
-#if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
-extern my_bool timed_mutexes;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-
-#ifdef HAVE_WINDOWS_ATOMICS
-typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
- on LONG variable */
-#else
-typedef byte lock_word_t;
-#endif
-
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void);
-/*===========*/
-/******************************************************************//**
-Frees the resources in synchronization data structures. */
-UNIV_INTERN
-void
-sync_close(void);
-/*===========*/
-/******************************************************************//**
-Creates, or rather, initializes a mutex object to a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define mutex_create(M, level) \
- mutex_create_func((M), #M, (level), __FILE__, __LINE__)
-# else
-# define mutex_create(M, level) \
- mutex_create_func((M), #M, __FILE__, __LINE__)
-# endif
-#else
-# define mutex_create(M, level) \
- mutex_create_func((M), __FILE__, __LINE__)
-#endif
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
- mutex_t* mutex, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
- const char* cmutex_name, /*!< in: mutex name */
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-
-#undef mutex_free /* Fix for MacOS X */
-
-/******************************************************************//**
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free(
-/*=======*/
- mutex_t* mutex); /*!< in: mutex */
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-/* NOTE! currently same as mutex_enter! */
-
-#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__)
-/******************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Locks a mutex for the current thread. If the mutex is reserved
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line); /*!< in: line where locked */
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-#define mutex_enter_nowait(M) \
- mutex_enter_nowait_func((M), __FILE__, __LINE__)
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return 0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit(
-/*=======*/
- mutex_t* mutex); /*!< in: pointer to mutex */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked.
-Works only in the debug version.
-@return TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void);
-/*================*/
-#endif /* UNIV_SYNC_DEBUG */
-/*#####################################################################
-FUNCTION PROTOTYPES FOR DEBUGGING */
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
- FILE* file); /*!< in: file where to print */
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
-void
-sync_print(
-/*=======*/
- FILE* file); /*!< in: file where to print */
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
- const mutex_t* mutex); /*!< in: mutex */
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only
-in the debug version.
-@return TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
- const mutex_t* mutex); /*!< in: mutex */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
- void* latch, /*!< in: pointer to a mutex or an rw-lock */
- ulint level); /*!< in: level in the latching order; if
- SYNC_LEVEL_VARYING, nothing is done */
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
- void* latch); /*!< in: pointer to a mutex or an rw-lock */
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return TRUE if empty */
-UNIV_INTERN
-ibool
-sync_thread_levels_empty(void);
-/*==========================*/
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return TRUE if empty except the exceptions specified below */
-UNIV_INTERN
-ibool
-sync_thread_levels_empty_gen(
-/*=========================*/
- ibool dict_mutex_allowed); /*!< in: TRUE if dictionary mutex is
- allowed to be owned by the thread,
- also purge_is_running mutex is
- allowed */
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_get_debug_info(
-/*=================*/
- mutex_t* mutex, /*!< in: mutex */
- const char** file_name, /*!< out: file where requested */
- ulint* line, /*!< out: line where requested */
- os_thread_id_t* thread_id); /*!< out: id of the thread which owns
- the mutex */
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void);
-/*==================*/
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the value
-of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
- const mutex_t* mutex); /*!< in: mutex */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the waiters
-field in a mutex.
-@return value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
- const mutex_t* mutex); /*!< in: mutex */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*
- LATCHING ORDER WITHIN THE DATABASE
- ==================================
-
-The mutex or latch in the central memory object, for instance, a rollback
-segment object, must be acquired before acquiring the latch or latches to
-the corresponding file data structure. In the latching order below, these
-file page object latches are placed immediately below the corresponding
-central memory object latch or mutex.
-
-Synchronization object Notes
----------------------- -----
-
-Dictionary mutex If we have a pointer to a dictionary
-| object, e.g., a table, it can be
-| accessed without reserving the
-| dictionary mutex. We must have a
-| reservation, a memoryfix, to the
-| appropriate table object in this case,
-| and the table must be explicitly
-| released later.
-V
-Dictionary header
-|
-V
-Secondary index tree latch The tree latch protects also all
-| the B-tree non-leaf pages. These
-V can be read with the page only
-Secondary index non-leaf bufferfixed to save CPU time,
-| no s-latch is needed on the page.
-| Modification of a page requires an
-| x-latch on the page, however. If a
-| thread owns an x-latch to the tree,
-| it is allowed to latch non-leaf pages
-| even after it has acquired the fsp
-| latch.
-V
-Secondary index leaf The latch on the secondary index leaf
-| can be kept while accessing the
-| clustered index, to save CPU time.
-V
-Clustered index tree latch To increase concurrency, the tree
-| latch is usually released when the
-| leaf page latch has been acquired.
-V
-Clustered index non-leaf
-|
-V
-Clustered index leaf
-|
-V
-Transaction system header
-|
-V
-Transaction undo mutex The undo log entry must be written
-| before any index page is modified.
-| Transaction undo mutex is for the undo
-| logs the analogue of the tree latch
-| for a B-tree. If a thread has the
-| trx undo mutex reserved, it is allowed
-| to latch the undo log pages in any
-| order, and also after it has acquired
-| the fsp latch.
-V
-Rollback segment mutex The rollback segment mutex must be
-| reserved, if, e.g., a new page must
-| be added to an undo log. The rollback
-| segment and the undo logs in its
-| history list can be seen as an
-| analogue of a B-tree, and the latches
-| reserved similarly, using a version of
-| lock-coupling. If an undo log must be
-| extended by a page when inserting an
-| undo log record, this corresponds to
-| a pessimistic insert in a B-tree.
-V
-Rollback segment header
-|
-V
-Purge system latch
-|
-V
-Undo log pages If a thread owns the trx undo mutex,
-| or for a log in the history list, the
-| rseg mutex, it is allowed to latch
-| undo log pages in any order, and even
-| after it has acquired the fsp latch.
-| If a thread does not have the
-| appropriate mutex, it is allowed to
-| latch only a single undo log page in
-| a mini-transaction.
-V
-File space management latch If a mini-transaction must allocate
-| several file pages, it can do that,
-| because it keeps the x-latch to the
-| file space management in its memo.
-V
-File system pages
-|
-V
-Kernel mutex If a kernel operation needs a file
-| page allocation, it must reserve the
-| fsp x-latch before acquiring the kernel
-| mutex.
-V
-Search system mutex
-|
-V
-Buffer pool mutex
-|
-V
-Log mutex
-|
-Any other latch
-|
-V
-Memory pool mutex */
-
-/* Latching order levels */
-
-/* User transaction locks are higher than any of the latch levels below:
-no latches are allowed when a thread goes to wait for a normal table
-or row lock! */
-#define SYNC_USER_TRX_LOCK 9999
-#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress
- latching order checking */
-#define SYNC_LEVEL_VARYING 2000 /* Level is varying. Only used with
- buffer pool page locks, which do not
- have a fixed level, but instead have
- their level set after the page is
- locked; see e.g.
- ibuf_bitmap_get_map_page(). */
-#define SYNC_TRX_I_S_RWLOCK 1910 /* Used for
- trx_i_s_cache_t::rw_lock */
-#define SYNC_TRX_I_S_LAST_READ 1900 /* Used for
- trx_i_s_cache_t::last_read_mutex */
-#define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the
- file format tag */
-#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve
- this in X-mode, implicit or backround
- operations purge, rollback, foreign
- key checks reserve this in S-mode */
-#define SYNC_DICT 1000
-#define SYNC_DICT_AUTOINC_MUTEX 999
-#define SYNC_DICT_HEADER 995
-#define SYNC_IBUF_HEADER 914
-#define SYNC_IBUF_PESS_INSERT_MUTEX 912
-#define SYNC_IBUF_MUTEX 910 /* ibuf mutex is really below
- SYNC_FSP_PAGE: we assign a value this
- high only to make the program to pass
- the debug checks */
-/*-------------------------------*/
-#define SYNC_INDEX_TREE 900
-#define SYNC_TREE_NODE_NEW 892
-#define SYNC_TREE_NODE_FROM_HASH 891
-#define SYNC_TREE_NODE 890
-#define SYNC_PURGE_SYS 810
-#define SYNC_PURGE_LATCH 800
-#define SYNC_TRX_UNDO 700
-#define SYNC_RSEG 600
-#define SYNC_RSEG_HEADER_NEW 591
-#define SYNC_RSEG_HEADER 590
-#define SYNC_TRX_UNDO_PAGE 570
-#define SYNC_EXTERN_STORAGE 500
-#define SYNC_FSP 400
-#define SYNC_FSP_PAGE 395
-/*------------------------------------- Insert buffer headers */
-/*------------------------------------- ibuf_mutex */
-/*------------------------------------- Insert buffer tree */
-#define SYNC_IBUF_BITMAP_MUTEX 351
-#define SYNC_IBUF_BITMAP 350
-/*------------------------------------- MySQL query cache mutex */
-/*------------------------------------- MySQL binlog mutex */
-/*-------------------------------*/
-#define SYNC_KERNEL 300
-#define SYNC_REC_LOCK 299
-#define SYNC_TRX_LOCK_HEAP 298
-#define SYNC_TRX_SYS_HEADER 290
-#define SYNC_LOG 170
-#define SYNC_RECV 168
-#define SYNC_WORK_QUEUE 162
-#define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
-#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
- heap that can be extended to the
- buffer pool, its logical level is
- SYNC_SEARCH_SYS, as memory allocation
- can call routines there! Otherwise
- the level is SYNC_MEM_HASH. */
-#define SYNC_BUF_POOL 150
-#define SYNC_BUF_BLOCK 149
-#define SYNC_DOUBLEWRITE 140
-#define SYNC_ANY_LATCH 135
-#define SYNC_THR_LOCAL 133
-#define SYNC_MEM_HASH 131
-#define SYNC_MEM_POOL 130
-
-/* Codes used to designate lock operations */
-#define RW_LOCK_NOT_LOCKED 350
-#define RW_LOCK_EX 351
-#define RW_LOCK_EXCLUSIVE 351
-#define RW_LOCK_SHARED 352
-#define RW_LOCK_WAIT_EX 353
-#define SYNC_MUTEX 354
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a mutual exclusion semaphore. */
-
-/** InnoDB mutex */
-struct mutex_struct {
- os_event_t event; /*!< Used by sync0arr.c for the wait queue */
- volatile lock_word_t lock_word; /*!< lock_word is the target
- of the atomic test-and-set instruction when
- atomic operations are enabled. */
-
-#if !defined(HAVE_ATOMIC_BUILTINS)
- os_fast_mutex_t
- os_fast_mutex; /*!< We use this OS mutex in place of lock_word
- when atomic operations are not enabled */
-#endif
- ulint waiters; /*!< This ulint is set to 1 if there are (or
- may be) threads waiting in the global wait
- array for this mutex to be released.
- Otherwise, this is 0. */
- UT_LIST_NODE_T(mutex_t) list; /*!< All allocated mutexes are put into
- a list. Pointers to the next and prev. */
-#ifdef UNIV_SYNC_DEBUG
- const char* file_name; /*!< File where the mutex was locked */
- ulint line; /*!< Line where the mutex was locked */
- ulint level; /*!< Level in the global latching order */
-#endif /* UNIV_SYNC_DEBUG */
- const char* cfile_name;/*!< File name where mutex created */
- ulint cline; /*!< Line where created */
-#ifdef UNIV_DEBUG
- os_thread_id_t thread_id; /*!< The thread id of the thread
- which locked the mutex. */
- ulint magic_n; /*!< MUTEX_MAGIC_N */
-/** Value of mutex_struct::magic_n */
-# define MUTEX_MAGIC_N (ulint)979585
-#endif /* UNIV_DEBUG */
- ulong count_os_wait; /*!< count of os_wait */
-#ifdef UNIV_DEBUG
- ulong count_using; /*!< count of times mutex used */
- ulong count_spin_loop; /*!< count of spin loops */
- ulong count_spin_rounds;/*!< count of spin rounds */
- ulong count_os_yield; /*!< count of os_wait */
- ulonglong lspent_time; /*!< mutex os_wait timer msec */
- ulonglong lmax_spent_time;/*!< mutex os_wait timer msec */
- const char* cmutex_name; /*!< mutex name */
- ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
-#endif /* UNIV_DEBUG */
-};
-
-/** The global array of wait cells for implementation of the databases own
-mutexes and read-write locks. */
-extern sync_array_t* sync_primary_wait_array;/* Appears here for
- debugging purposes only! */
-
-/** Constant determining how long spin wait is continued before suspending
-the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
-to 20 microseconds. */
-
-#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
-
-/** The number of mutex_exit calls. Intended for performance monitoring. */
-extern ib_int64_t mutex_exit_count;
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-extern ibool sync_order_checks_on;
-#endif /* UNIV_SYNC_DEBUG */
-
-/** This variable is set to TRUE when sync_init is called */
-extern ibool sync_initialized;
-
-/** Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(mutex_t) ut_list_base_node_t;
-/** Global list of database mutexes (not OS mutexes) created. */
-extern ut_list_base_node_t mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-extern mutex_t mutex_list_mutex;
-
-
-#ifndef UNIV_NONINL
-#include "sync0sync.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/sync0sync.ic b/storage/innodb_plugin/include/sync0sync.ic
deleted file mode 100644
index b05020b5660..00000000000
--- a/storage/innodb_plugin/include/sync0sync.ic
+++ /dev/null
@@ -1,222 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0sync.ic
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
- mutex_t* mutex, /*!< in: mutex */
- ulint n); /*!< in: value to set */
-/******************************************************************//**
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INTERN
-void
-mutex_spin_wait(
-/*============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
- mutex_t* mutex, /*!< in: mutex */
- const char* file_name, /*!< in: file where requested */
- ulint line); /*!< in: line where requested */
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
- mutex_t* mutex); /*!< in: mutex */
-
-/******************************************************************//**
-Performs an atomic test-and-set instruction to the lock_word field of a
-mutex.
-@return the previous value of lock_word: 0 or 1 */
-UNIV_INLINE
-byte
-mutex_test_and_set(
-/*===============*/
- mutex_t* mutex) /*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
- return(os_atomic_test_and_set_byte(&mutex->lock_word, 1));
-#else
- ibool ret;
-
- ret = os_fast_mutex_trylock(&(mutex->os_fast_mutex));
-
- if (ret == 0) {
- /* We check that os_fast_mutex_trylock does not leak
- and allow race conditions */
- ut_a(mutex->lock_word == 0);
-
- mutex->lock_word = 1;
- }
-
- return((byte)ret);
-#endif
-}
-
-/******************************************************************//**
-Performs a reset instruction to the lock_word field of a mutex. This
-instruction also serializes memory operations to the program order. */
-UNIV_INLINE
-void
-mutex_reset_lock_word(
-/*==================*/
- mutex_t* mutex) /*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
- /* In theory __sync_lock_release should be used to release the lock.
- Unfortunately, it does not work properly alone. The workaround is
- that more conservative __sync_lock_test_and_set is used instead. */
- os_atomic_test_and_set_byte(&mutex->lock_word, 0);
-#else
- mutex->lock_word = 0;
-
- os_fast_mutex_unlock(&(mutex->os_fast_mutex));
-#endif
-}
-
-/******************************************************************//**
-Gets the value of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
- const mutex_t* mutex) /*!< in: mutex */
-{
- ut_ad(mutex);
-
- return(mutex->lock_word);
-}
-
-/******************************************************************//**
-Gets the waiters field in a mutex.
-@return value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
- const mutex_t* mutex) /*!< in: mutex */
-{
- const volatile ulint* ptr; /*!< declared volatile to ensure that
- the value is read from memory */
- ut_ad(mutex);
-
- ptr = &(mutex->waiters);
-
- return(*ptr); /* Here we assume that the read of a single
- word from memory is atomic */
-}
-
-/******************************************************************//**
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit(
-/*=======*/
- mutex_t* mutex) /*!< in: pointer to mutex */
-{
- ut_ad(mutex_own(mutex));
-
- ut_d(mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED);
-
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_reset_level(mutex);
-#endif
- mutex_reset_lock_word(mutex);
-
- /* A problem: we assume that mutex_reset_lock word
- is a memory barrier, that is when we read the waiters
- field next, the read must be serialized in memory
- after the reset. A speculative processor might
- perform the read first, which could leave a waiting
- thread hanging indefinitely.
-
- Our current solution call every second
- sync_arr_wake_threads_if_sema_free()
- to wake up possible hanging threads if
- they are missed in mutex_signal_object. */
-
- if (mutex_get_waiters(mutex) != 0) {
-
- mutex_signal_object(mutex);
- }
-
-#ifdef UNIV_SYNC_PERF_STAT
- mutex_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Locks a mutex for the current thread. If the mutex is reserved, the function
-spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
-before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line) /*!< in: line where locked */
-{
- ut_ad(mutex_validate(mutex));
- ut_ad(!mutex_own(mutex));
-
- /* Note that we do not peek at the value of lock_word before trying
- the atomic test_and_set; we could peek, and possibly save time. */
-
- ut_d(mutex->count_using++);
-
- if (!mutex_test_and_set(mutex)) {
- ut_d(mutex->thread_id = os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- return; /* Succeeded! */
- }
-
- mutex_spin_wait(mutex, file_name, line);
-}
diff --git a/storage/innodb_plugin/include/sync0types.h b/storage/innodb_plugin/include/sync0types.h
deleted file mode 100644
index 1911bbac7fd..00000000000
--- a/storage/innodb_plugin/include/sync0types.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0types.h
-Global types for sync
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0types_h
-#define sync0types_h
-
-/** Rename mutex_t to avoid name space collision on some systems */
-#define mutex_t ib_mutex_t
-/** InnoDB mutex */
-typedef struct mutex_struct mutex_t;
-
-#endif
diff --git a/storage/innodb_plugin/include/thr0loc.h b/storage/innodb_plugin/include/thr0loc.h
deleted file mode 100644
index b7eb29f2ed0..00000000000
--- a/storage/innodb_plugin/include/thr0loc.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/thr0loc.h
-The thread local storage
-
-Created 10/5/1995 Heikki Tuuri
-*******************************************************/
-
-/* This module implements storage private to each thread,
-a capability useful in some situations like storing the
-OS handle to the current thread, or its priority. */
-
-#ifndef thr0loc_h
-#define thr0loc_h
-
-#include "univ.i"
-#include "os0thread.h"
-
-/****************************************************************//**
-Initializes the thread local storage module. */
-UNIV_INTERN
-void
-thr_local_init(void);
-/*================*/
- /****************************************************************//**
-Close the thread local storage module. */
-UNIV_INTERN
-void
-thr_local_close(void);
-/*=================*/
-/*******************************************************************//**
-Creates a local storage struct for the calling new thread. */
-UNIV_INTERN
-void
-thr_local_create(void);
-/*==================*/
-/*******************************************************************//**
-Frees the local storage struct for the specified thread. */
-UNIV_INTERN
-void
-thr_local_free(
-/*===========*/
- os_thread_id_t id); /*!< in: thread id */
-/*******************************************************************//**
-Gets the slot number in the thread table of a thread.
-@return slot number */
-UNIV_INTERN
-ulint
-thr_local_get_slot_no(
-/*==================*/
- os_thread_id_t id); /*!< in: thread id of the thread */
-/*******************************************************************//**
-Sets in the local storage the slot number in the thread table of a thread. */
-UNIV_INTERN
-void
-thr_local_set_slot_no(
-/*==================*/
- os_thread_id_t id, /*!< in: thread id of the thread */
- ulint slot_no);/*!< in: slot number */
-/*******************************************************************//**
-Returns pointer to the 'in_ibuf' field within the current thread local
-storage.
-@return pointer to the in_ibuf field */
-UNIV_INTERN
-ibool*
-thr_local_get_in_ibuf_field(void);
-/*=============================*/
-
-#ifndef UNIV_NONINL
-#include "thr0loc.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/thr0loc.ic b/storage/innodb_plugin/include/thr0loc.ic
deleted file mode 100644
index ce44e512320..00000000000
--- a/storage/innodb_plugin/include/thr0loc.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/thr0loc.ic
-Thread local storage
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/trx0purge.h b/storage/innodb_plugin/include/trx0purge.h
deleted file mode 100644
index 908760580f6..00000000000
--- a/storage/innodb_plugin/include/trx0purge.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0purge.h
-Purge old versions
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0purge_h
-#define trx0purge_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "que0types.h"
-#include "page0page.h"
-#include "usr0sess.h"
-#include "fil0fil.h"
-
-/** The global data structure coordinating a purge */
-extern trx_purge_t* purge_sys;
-
-/** A dummy undo record used as a return value when we have a whole undo log
-which needs no purge */
-extern trx_undo_rec_t trx_purge_dummy_rec;
-
-/********************************************************************//**
-Calculates the file address of an undo log header when we have the file
-address of its history list node.
-@return file address of the log */
-UNIV_INLINE
-fil_addr_t
-trx_purge_get_log_from_hist(
-/*========================*/
- fil_addr_t node_addr); /*!< in: file address of the history
- list node of the log */
-/*****************************************************************//**
-Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system.
-@return TRUE if is sure that it is preserved, also if the function
-returns FALSE, it is possible that the undo log still exists in the
-system */
-UNIV_INTERN
-ibool
-trx_purge_update_undo_must_exist(
-/*=============================*/
- trx_id_t trx_id);/*!< in: transaction id */
-/********************************************************************//**
-Creates the global purge system control structure and inits the history
-mutex. */
-UNIV_INTERN
-void
-trx_purge_sys_create(void);
-/*======================*/
-/********************************************************************//**
-Frees the global purge system control structure. */
-UNIV_INTERN
-void
-trx_purge_sys_close(void);
-/*======================*/
-/************************************************************************
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
-void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
- trx_t* trx, /*!< in: transaction */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function.
-@return copy of an undo log record or pointer to trx_purge_dummy_rec,
-if the whole undo log can skipped in purge; NULL if none left */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_purge_fetch_next_rec(
-/*=====================*/
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- trx_undo_inf_t** cell, /*!< out: storage cell for the record in the
- purge array */
- mem_heap_t* heap); /*!< in: memory heap where copied */
-/*******************************************************************//**
-Releases a reserved purge undo record. */
-UNIV_INTERN
-void
-trx_purge_rec_release(
-/*==================*/
- trx_undo_inf_t* cell); /*!< in: storage cell */
-/*******************************************************************//**
-This function runs a purge batch.
-@return number of undo log pages handled in the batch */
-UNIV_INTERN
-ulint
-trx_purge(void);
-/*===========*/
-/******************************************************************//**
-Prints information of the purge system to stderr. */
-UNIV_INTERN
-void
-trx_purge_sys_print(void);
-/*======================*/
-
-/** The control structure used in the purge operation */
-struct trx_purge_struct{
- ulint state; /*!< Purge system state */
- sess_t* sess; /*!< System session running the purge
- query */
- trx_t* trx; /*!< System transaction running the purge
- query: this trx is not in the trx list
- of the trx system and it never ends */
- que_t* query; /*!< The query graph which will do the
- parallelized purge operation */
- rw_lock_t latch; /*!< The latch protecting the purge view.
- A purge operation must acquire an
- x-latch here for the instant at which
- it changes the purge view: an undo
- log operation can prevent this by
- obtaining an s-latch here. */
- read_view_t* view; /*!< The purge will not remove undo logs
- which are >= this view (purge view) */
- mutex_t mutex; /*!< Mutex protecting the fields below */
- ulint n_pages_handled;/*!< Approximate number of undo log
- pages processed in purge */
- ulint handle_limit; /*!< Target of how many pages to get
- processed in the current purge */
- /*------------------------------*/
- /* The following two fields form the 'purge pointer' which advances
- during a purge, and which is used in history list truncation */
-
- trx_id_t purge_trx_no; /*!< Purge has advanced past all
- transactions whose number is less
- than this */
- undo_no_t purge_undo_no; /*!< Purge has advanced past all records
- whose undo number is less than this */
- /*-----------------------------*/
- ibool next_stored; /*!< TRUE if the info of the next record
- to purge is stored below: if yes, then
- the transaction number and the undo
- number of the record are stored in
- purge_trx_no and purge_undo_no above */
- trx_rseg_t* rseg; /*!< Rollback segment for the next undo
- record to purge */
- ulint page_no; /*!< Page number for the next undo
- record to purge, page number of the
- log header, if dummy record */
- ulint offset; /*!< Page offset for the next undo
- record to purge, 0 if the dummy
- record */
- ulint hdr_page_no; /*!< Header page of the undo log where
- the next record to purge belongs */
- ulint hdr_offset; /*!< Header byte offset on the page */
- /*-----------------------------*/
- trx_undo_arr_t* arr; /*!< Array of transaction numbers and
- undo numbers of the undo records
- currently under processing in purge */
- mem_heap_t* heap; /*!< Temporary storage used during a
- purge: can be emptied after purge
- completes */
-};
-
-#define TRX_PURGE_ON 1 /* purge operation is running */
-#define TRX_STOP_PURGE 2 /* purge operation is stopped, or
- it should be stopped */
-#ifndef UNIV_NONINL
-#include "trx0purge.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/trx0purge.ic b/storage/innodb_plugin/include/trx0purge.ic
deleted file mode 100644
index de09e393654..00000000000
--- a/storage/innodb_plugin/include/trx0purge.ic
+++ /dev/null
@@ -1,43 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0purge.ic
-Purge old versions
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0undo.h"
-
-/********************************************************************//**
-Calculates the file address of an undo log header when we have the file
-address of its history list node.
-@return file address of the log */
-UNIV_INLINE
-fil_addr_t
-trx_purge_get_log_from_hist(
-/*========================*/
- fil_addr_t node_addr) /*!< in: file address of the history
- list node of the log */
-{
- node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
-
- return(node_addr);
-}
-
diff --git a/storage/innodb_plugin/include/trx0rec.h b/storage/innodb_plugin/include/trx0rec.h
deleted file mode 100644
index a6e56e963c6..00000000000
--- a/storage/innodb_plugin/include/trx0rec.h
+++ /dev/null
@@ -1,338 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0rec.h
-Transaction undo log record
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0rec_h
-#define trx0rec_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-#include "dict0types.h"
-#include "data0data.h"
-#include "rem0types.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "que0types.h"
-
-/***********************************************************************//**
-Copies the undo record to the heap.
-@return own: copy of undo log record */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_rec_copy(
-/*==============*/
- const trx_undo_rec_t* undo_rec, /*!< in: undo log record */
- mem_heap_t* heap); /*!< in: heap where copied */
-/**********************************************************************//**
-Reads the undo log record type.
-@return record type */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_type(
-/*==================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Reads the undo log record number.
-@return undo no */
-UNIV_INLINE
-undo_no_t
-trx_undo_rec_get_undo_no(
-/*=====================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
- undo_no_t undo_no) /*!< in: undo no read from node */
- __attribute__((const));
-
-/**********************************************************************//**
-Returns the start of the undo record data area. */
-#define trx_undo_rec_get_ptr(undo_rec, undo_no) \
- ((undo_rec) + trx_undo_rec_get_offset(undo_no))
-
-/**********************************************************************//**
-Reads from an undo log record the general parameters.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_pars(
-/*==================*/
- trx_undo_rec_t* undo_rec, /*!< in: undo log record */
- ulint* type, /*!< out: undo record type:
- TRX_UNDO_INSERT_REC, ... */
- ulint* cmpl_info, /*!< out: compiler info, relevant only
- for update type records */
- ibool* updated_extern, /*!< out: TRUE if we updated an
- externally stored fild */
- undo_no_t* undo_no, /*!< out: undo log record number */
- dulint* table_id); /*!< out: table id */
-/*******************************************************************//**
-Builds a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_row_ref(
-/*=====================*/
- byte* ptr, /*!< in: remaining part of a copy of an undo log
- record, at the start of the row reference;
- NOTE that this copy of the undo log record must
- be preserved as long as the row reference is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t** ref, /*!< out, own: row reference */
- mem_heap_t* heap); /*!< in: memory heap from which the memory
- needed is allocated */
-/*******************************************************************//**
-Skips a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_skip_row_ref(
-/*======================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, at the start of the row reference */
- dict_index_t* index); /*!< in: clustered index */
-/**********************************************************************//**
-Reads from an undo log update record the system field values of the old
-version.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
-byte*
-trx_undo_update_rec_get_sys_cols(
-/*=============================*/
- byte* ptr, /*!< in: remaining part of undo
- log record after reading
- general parameters */
- trx_id_t* trx_id, /*!< out: trx id */
- roll_ptr_t* roll_ptr, /*!< out: roll ptr */
- ulint* info_bits); /*!< out: info bits state */
-/*******************************************************************//**
-Builds an update vector based on a remaining part of an undo log record.
-@return remaining part of the record, NULL if an error detected, which
-means that the record is corrupted */
-UNIV_INTERN
-byte*
-trx_undo_update_rec_get_update(
-/*===========================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, after reading the row reference
- NOTE that this copy of the undo log record must
- be preserved as long as the update vector is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
- TRX_UNDO_UPD_DEL_REC, or
- TRX_UNDO_DEL_MARK_REC; in the last case,
- only trx id and roll ptr fields are added to
- the update vector */
- trx_id_t trx_id, /*!< in: transaction id from this undorecord */
- roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
- ulint info_bits,/*!< in: info bits from this undo record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap, /*!< in: memory heap from which the memory
- needed is allocated */
- upd_t** upd); /*!< out, own: update vector */
-/*******************************************************************//**
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_partial_row(
-/*=========================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record of a suitable type, at the start of
- the stored index columns;
- NOTE that this copy of the undo log record must
- be preserved as long as the partial row is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t** row, /*!< out, own: partial row */
- ibool ignore_prefix, /*!< in: flag to indicate if we
- expect blob prefixes in undo. Used
- only in the assertion. */
- mem_heap_t* heap); /*!< in: memory heap from which the memory
- needed is allocated */
-/***********************************************************************//**
-Writes information to an undo log about an insert, update, or a delete marking
-of a clustered index record. This information is used in a rollback of the
-transaction and in consistent reads that must look to the history of this
-transaction.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-trx_undo_report_row_operation(
-/*==========================*/
- ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
- set, does nothing */
- ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or
- TRX_UNDO_MODIFY_OP */
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* clust_entry, /*!< in: in the case of an insert,
- index entry to insert into the
- clustered index, otherwise NULL */
- const upd_t* update, /*!< in: in the case of an update,
- the update vector, otherwise NULL */
- ulint cmpl_info, /*!< in: compiler info on secondary
- index updates */
- const rec_t* rec, /*!< in: case of an update or delete
- marking, the record in the clustered
- index, otherwise NULL */
- roll_ptr_t* roll_ptr); /*!< out: rollback pointer to the
- inserted undo log record,
- ut_dulint_zero if BTR_NO_UNDO_LOG
- flag was specified */
-/******************************************************************//**
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists.
-@return own: copy of the record */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-/*======================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- mem_heap_t* heap); /*!< in: memory heap where copied */
-/******************************************************************//**
-Copies an undo record to heap.
-
-NOTE: the caller must have latches on the clustered index page and
-purge_view.
-
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
-truncated and we cannot fetch the old version */
-UNIV_INTERN
-ulint
-trx_undo_get_undo_rec(
-/*==================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- trx_id_t trx_id, /*!< in: id of the trx that generated
- the roll pointer: it points to an
- undo log of this transaction */
- trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
- mem_heap_t* heap); /*!< in: memory heap where copied */
-/*******************************************************************//**
-Build a previous version of a clustered index record. This function checks
-that the caller has a latch on the index page of the clustered index record
-and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
-earlier than purge_view, which means that it may have been removed,
-DB_ERROR if corrupted record */
-UNIV_INTERN
-ulint
-trx_undo_prev_version_build(
-/*========================*/
- const rec_t* index_rec,/*!< in: clustered index record in the
- index tree */
- mtr_t* index_mtr,/*!< in: mtr which contains the latch to
- index_rec page and purge_view */
- const rec_t* rec, /*!< in: version of a clustered index record */
- dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- mem_heap_t* heap, /*!< in: memory heap from which the memory
- needed is allocated */
- rec_t** old_vers);/*!< out, own: previous version, or NULL if
- rec is the first inserted version, or if
- history data has been deleted */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_add_undo_rec(
-/*========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page); /*!< in: page or NULL */
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-
-#ifndef UNIV_HOTBACKUP
-
-/* Types of an undo log record: these have to be smaller than 16, as the
-compilation info multiplied by 16 is ORed to this value in an undo log
-record */
-
-#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
-#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
- record */
-#define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to
- a not delete marked record; also the
- fields of the record can change */
-#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields
- do not change */
-#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
- this and ORed to the type above */
-#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
- to denote that we updated external
- storage fields: used by purge to
- free the external storage */
-
-/* Operation type flags used in trx_undo_report_row_operation */
-#define TRX_UNDO_INSERT_OP 1
-#define TRX_UNDO_MODIFY_OP 2
-
-#ifndef UNIV_NONINL
-#include "trx0rec.ic"
-#endif
-
-#endif /* !UNIV_HOTBACKUP */
-
-#endif /* trx0rec_h */
diff --git a/storage/innodb_plugin/include/trx0rec.ic b/storage/innodb_plugin/include/trx0rec.ic
deleted file mode 100644
index e7e41d6d9f6..00000000000
--- a/storage/innodb_plugin/include/trx0rec.ic
+++ /dev/null
@@ -1,112 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0rec.ic
-Transaction undo log record
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Reads from an undo log record the record type.
-@return record type */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_type(
-/*==================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
-}
-
-/**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
-}
-
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Reads the undo log record number.
-@return undo no */
-UNIV_INLINE
-undo_no_t
-trx_undo_rec_get_undo_no(
-/*=====================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- const byte* ptr;
-
- ptr = undo_rec + 3;
-
- return(mach_dulint_read_much_compressed(ptr));
-}
-
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
- undo_no_t undo_no) /*!< in: undo no read from node */
-{
- return (3 + mach_dulint_get_much_compressed_size(undo_no));
-}
-
-/***********************************************************************//**
-Copies the undo record to the heap.
-@return own: copy of undo log record */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_rec_copy(
-/*==============*/
- const trx_undo_rec_t* undo_rec, /*!< in: undo log record */
- mem_heap_t* heap) /*!< in: heap where copied */
-{
- ulint len;
-
- len = mach_read_from_2(undo_rec)
- - ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
- return(mem_heap_dup(heap, undo_rec, len));
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/trx0roll.h b/storage/innodb_plugin/include/trx0roll.h
deleted file mode 100644
index 1dee5655c8c..00000000000
--- a/storage/innodb_plugin/include/trx0roll.h
+++ /dev/null
@@ -1,352 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0roll.h
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0roll_h
-#define trx0roll_h
-
-#include "univ.i"
-#include "trx0trx.h"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-
-#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL)
-
-/*******************************************************************//**
-Determines if this transaction is rolling back an incomplete transaction
-in crash recovery.
-@return TRUE if trx is an incomplete transaction that is being rolled
-back in crash recovery */
-UNIV_INTERN
-ibool
-trx_is_recv(
-/*========*/
- const trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Returns a transaction savepoint taken at this point in time.
-@return savepoint */
-UNIV_INTERN
-trx_savept_t
-trx_savept_take(
-/*============*/
- trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Creates an undo number array. */
-UNIV_INTERN
-trx_undo_arr_t*
-trx_undo_arr_create(void);
-/*=====================*/
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
-void
-trx_undo_arr_free(
-/*==============*/
- trx_undo_arr_t* arr); /*!< in: undo number array */
-/*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return pointer to the nth element */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- ulint n); /*!< in: position */
-/***********************************************************************//**
-Tries truncate the undo logs. */
-UNIV_INTERN
-void
-trx_roll_try_truncate(
-/*==================*/
- trx_t* trx); /*!< in/out: transaction */
-/********************************************************************//**
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
-@return undo log record copied to heap, NULL if none left, or if the
-undo number of the top record would be less than the limit */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t limit, /*!< in: least undo number we need */
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- mem_heap_t* heap); /*!< in: memory heap where copied */
-/********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no);/*!< in: undo number of the record */
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no);/*!< in: undo number */
-/*********************************************************************//**
-Starts a rollback operation. */
-UNIV_INTERN
-void
-trx_rollback(
-/*=========*/
- trx_t* trx, /*!< in: transaction */
- trx_sig_t* sig, /*!< in: signal starting the rollback */
- que_thr_t** next_thr);/*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
-void
-trx_rollback_or_clean_recovered(
-/*============================*/
- ibool all); /*!< in: FALSE=roll back dictionary transactions;
- TRUE=roll back all non-PREPARED transactions */
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-trx_rollback_or_clean_all_recovered(
-/*================================*/
- void* arg __attribute__((unused)));
- /*!< in: a dummy parameter required by
- os_thread_create */
-/****************************************************************//**
-Finishes a transaction rollback. */
-UNIV_INTERN
-void
-trx_finish_rollback_off_kernel(
-/*===========================*/
- que_t* graph, /*!< in: undo graph which can now be freed */
- trx_t* trx, /*!< in: transaction */
- que_thr_t** next_thr);/*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if this parameter is
- NULL, it is ignored */
-/****************************************************************//**
-Builds an undo 'query' graph for a transaction. The actual rollback is
-performed by executing this query graph like a query subprocedure call.
-The reply about the completion of the rollback will be sent by this
-graph.
-@return own: the query graph */
-UNIV_INTERN
-que_t*
-trx_roll_graph_build(
-/*=================*/
- trx_t* trx); /*!< in: trx handle */
-/*********************************************************************//**
-Creates a rollback command node struct.
-@return own: rollback node struct */
-UNIV_INTERN
-roll_node_t*
-roll_node_create(
-/*=============*/
- mem_heap_t* heap); /*!< in: mem heap where created */
-/***********************************************************//**
-Performs an execution step for a rollback command node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_rollback_step(
-/*==============*/
- que_thr_t* thr); /*!< in: query thread */
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-trx_rollback_for_mysql(
-/*===================*/
- trx_t* trx); /*!< in: transaction handle */
-/*******************************************************************//**
-Rollback the latest SQL statement for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-trx_rollback_last_sql_stat_for_mysql(
-/*=================================*/
- trx_t* trx); /*!< in: transaction handle */
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-trx_general_rollback_for_mysql(
-/*===========================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_savept_t* savept);/*!< in: pointer to savepoint undo number, if
- partial rollback requested, or NULL for
- complete rollback */
-/*******************************************************************//**
-Rolls back a transaction back to a named savepoint. Modifications after the
-savepoint are undone but InnoDB does NOT release the corresponding locks
-which are stored in memory. If a lock is 'implicit', that is, a new inserted
-row holds a lock where the lock information is carried by the trx id stored in
-the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-UNIV_INTERN
-ulint
-trx_rollback_to_savepoint_for_mysql(
-/*================================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t* mysql_binlog_cache_pos);/*!< out: the MySQL binlog cache
- position corresponding to this
- savepoint; MySQL needs this
- information to remove the
- binlog entries of the queries
- executed after the savepoint */
-/*******************************************************************//**
-Creates a named savepoint. If the transaction is not yet started, starts it.
-If there is already a savepoint of the same name, this call erases that old
-savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback.
-@return always DB_SUCCESS */
-UNIV_INTERN
-ulint
-trx_savepoint_for_mysql(
-/*====================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t binlog_cache_pos); /*!< in: MySQL binlog cache
- position corresponding to this
- connection at the time of the
- savepoint */
-
-/*******************************************************************//**
-Releases a named savepoint. Savepoints which
-were set after this savepoint are deleted.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-UNIV_INTERN
-ulint
-trx_release_savepoint_for_mysql(
-/*============================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name); /*!< in: savepoint name */
-
-/*******************************************************************//**
-Frees a single savepoint struct. */
-UNIV_INTERN
-void
-trx_roll_savepoint_free(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep); /*!< in: savepoint to free */
-
-/*******************************************************************//**
-Frees savepoint structs starting from savep, if savep == NULL then
-free all savepoints. */
-UNIV_INTERN
-void
-trx_roll_savepoints_free(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep); /*!< in: free all savepoints > this one;
- if this is NULL, free all savepoints
- of trx */
-
-/** A cell of trx_undo_arr_struct; used during a rollback and a purge */
-struct trx_undo_inf_struct{
- trx_id_t trx_no; /*!< transaction number: not defined during
- a rollback */
- undo_no_t undo_no;/*!< undo number of an undo record */
- ibool in_use; /*!< TRUE if the cell is in use */
-};
-
-/** During a rollback and a purge, undo numbers of undo records currently being
-processed are stored in this array */
-
-struct trx_undo_arr_struct{
- ulint n_cells; /*!< number of cells in the array */
- ulint n_used; /*!< number of cells currently in use */
- trx_undo_inf_t* infos; /*!< the array of undo infos */
- mem_heap_t* heap; /*!< memory heap from which allocated */
-};
-
-/** Rollback node states */
-enum roll_node_state {
- ROLL_NODE_SEND = 1, /*!< about to send a rollback signal to
- the transaction */
- ROLL_NODE_WAIT /*!< rollback signal sent to the transaction,
- waiting for completion */
-};
-
-/** Rollback command node in a query graph */
-struct roll_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_ROLLBACK */
- enum roll_node_state state; /*!< node execution state */
- ibool partial;/*!< TRUE if we want a partial
- rollback */
- trx_savept_t savept; /*!< savepoint to which to
- roll back, in the case of a
- partial rollback */
-};
-
-/** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
-struct trx_named_savept_struct{
- char* name; /*!< savepoint name */
- trx_savept_t savept; /*!< the undo number corresponding to
- the savepoint */
- ib_int64_t mysql_binlog_cache_pos;
- /*!< the MySQL binlog cache position
- corresponding to this savepoint, not
- defined if the MySQL binlogging is not
- enabled */
- UT_LIST_NODE_T(trx_named_savept_t)
- trx_savepoints; /*!< the list of savepoints of a
- transaction */
-};
-
-#ifndef UNIV_NONINL
-#include "trx0roll.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/trx0roll.ic b/storage/innodb_plugin/include/trx0roll.ic
deleted file mode 100644
index 3460832b18c..00000000000
--- a/storage/innodb_plugin/include/trx0roll.ic
+++ /dev/null
@@ -1,40 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0roll.ic
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return pointer to the nth element */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- ulint n) /*!< in: position */
-{
- ut_ad(arr);
- ut_ad(n < arr->n_cells);
-
- return(arr->infos + n);
-}
diff --git a/storage/innodb_plugin/include/trx0rseg.h b/storage/innodb_plugin/include/trx0rseg.h
deleted file mode 100644
index ba1fc88b6c4..00000000000
--- a/storage/innodb_plugin/include/trx0rseg.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0rseg.h
-Rollback segment
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0rseg_h
-#define trx0rseg_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "trx0sys.h"
-
-/******************************************************************//**
-Gets a rollback segment header.
-@return rollback segment header, page x-latched */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get(
-/*==========*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return rollback segment header, page x-latched */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get_new(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Sets the file page number of the nth undo log slot. */
-UNIV_INLINE
-void
-trx_rsegf_set_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- ulint page_no,/*!< in: page number of the undo log segment */
- mtr_t* mtr); /*!< in: mtr */
-/****************************************************************//**
-Looks for a free slot for an undo log segment.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INLINE
-ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Looks for a rollback segment, based on the rollback segment id.
-@return rollback segment */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- ulint id); /*!< in: rollback segment id */
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
-ulint
-trx_rseg_header_create(
-/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint max_size, /*!< in: max size in pages */
- ulint* slot_no, /*!< out: rseg id == slot number in trx sys */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************************//**
-Creates the memory copies for rollback segments and initializes the
-rseg list and array in trx_sys at a database startup. */
-UNIV_INTERN
-void
-trx_rseg_list_and_array_init(
-/*=========================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
- mtr_t* mtr); /*!< in: mtr */
-/****************************************************************//**
-Creates a new rollback segment to the database.
-@return the created segment object, NULL if fail */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint max_size, /*!< in: max size in pages */
- ulint* id, /*!< out: rseg id */
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************************
-Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
-void
-trx_rseg_mem_free(
-/*==============*/
- trx_rseg_t* rseg); /* in, own: instance to free */
-
-
-/* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16)
-
-/* Maximum number of transactions supported by a single rollback segment */
-#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
-
-/* The rollback segment memory object */
-struct trx_rseg_struct{
- /*--------------------------------------------------------*/
- ulint id; /*!< rollback segment id == the index of
- its slot in the trx system file copy */
- mutex_t mutex; /*!< mutex protecting the fields in this
- struct except id; NOTE that the latching
- order must always be kernel mutex ->
- rseg mutex */
- ulint space; /*!< space where the rollback segment is
- header is placed */
- ulint zip_size;/* compressed page size of space
- in bytes, or 0 for uncompressed spaces */
- ulint page_no;/* page number of the rollback segment
- header */
- ulint max_size;/* maximum allowed size in pages */
- ulint curr_size;/* current size in pages */
- /*--------------------------------------------------------*/
- /* Fields for update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
- /* List of update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
- /* List of update undo log segments
- cached for fast reuse */
- /*--------------------------------------------------------*/
- /* Fields for insert undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
- /* List of insert undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
- /* List of insert undo log segments
- cached for fast reuse */
- /*--------------------------------------------------------*/
- ulint last_page_no; /*!< Page number of the last not yet
- purged log header in the history list;
- FIL_NULL if all list purged */
- ulint last_offset; /*!< Byte offset of the last not yet
- purged log header */
- trx_id_t last_trx_no; /*!< Transaction number of the last not
- yet purged log */
- ibool last_del_marks; /*!< TRUE if the last not yet purged log
- needs purging */
- /*--------------------------------------------------------*/
- UT_LIST_NODE_T(trx_rseg_t) rseg_list;
- /* the list of the rollback segment
- memory objects */
-};
-
-/* Undo log segment slot in a rollback segment header */
-/*-------------------------------------------------------------*/
-#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
- an undo log segment */
-/*-------------------------------------------------------------*/
-/* Slot size */
-#define TRX_RSEG_SLOT_SIZE 4
-
-/* The offset of the rollback segment header on its page */
-#define TRX_RSEG FSEG_PAGE_DATA
-
-/* Transaction rollback segment header */
-/*-------------------------------------------------------------*/
-#define TRX_RSEG_MAX_SIZE 0 /* Maximum allowed size for rollback
- segment in pages */
-#define TRX_RSEG_HISTORY_SIZE 4 /* Number of file pages occupied
- by the logs in the history list */
-#define TRX_RSEG_HISTORY 8 /* The update undo logs for committed
- transactions */
-#define TRX_RSEG_FSEG_HEADER (8 + FLST_BASE_NODE_SIZE)
- /* Header for the file segment where
- this page is placed */
-#define TRX_RSEG_UNDO_SLOTS (8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
- /* Undo log segment slots */
-/*-------------------------------------------------------------*/
-
-#ifndef UNIV_NONINL
-#include "trx0rseg.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/trx0rseg.ic b/storage/innodb_plugin/include/trx0rseg.ic
deleted file mode 100644
index daffa92fc7d..00000000000
--- a/storage/innodb_plugin/include/trx0rseg.ic
+++ /dev/null
@@ -1,145 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0rseg.ic
-Rollback segment
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "srv0srv.h"
-#include "mtr0log.h"
-
-/******************************************************************//**
-Gets a rollback segment header.
-@return rollback segment header, page x-latched */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get(
-/*==========*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- trx_rsegf_t* header;
-
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
-
- header = TRX_RSEG + buf_block_get_frame(block);
-
- return(header);
-}
-
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return rollback segment header, page x-latched */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get_new(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- trx_rsegf_t* header;
-
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
-
- header = TRX_RSEG + buf_block_get_frame(block);
-
- return(header);
-}
-
-/***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr,
- "InnoDB: Error: trying to get slot %lu of rseg\n",
- (ulong) n);
- ut_error;
- }
-
- return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
- + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
-}
-
-/***************************************************************//**
-Sets the file page number of the nth undo log slot. */
-UNIV_INLINE
-void
-trx_rsegf_set_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- ulint page_no,/*!< in: page number of the undo log segment */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr,
- "InnoDB: Error: trying to set slot %lu of rseg\n",
- (ulong) n);
- ut_error;
- }
-
- mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
- page_no, MLOG_4BYTES, mtr);
-}
-
-/****************************************************************//**
-Looks for a free slot for an undo log segment.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INLINE
-ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
- ulint page_no;
-
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
-
- page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
diff --git a/storage/innodb_plugin/include/trx0sys.h b/storage/innodb_plugin/include/trx0sys.h
deleted file mode 100644
index a53296a06d9..00000000000
--- a/storage/innodb_plugin/include/trx0sys.h
+++ /dev/null
@@ -1,624 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0sys.h
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0sys_h
-#define trx0sys_h
-
-#include "univ.i"
-
-#include "trx0types.h"
-#include "fsp0types.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0mtr.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-#include "sync0sync.h"
-#include "ut0lst.h"
-#include "read0types.h"
-#include "page0types.h"
-
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-extern char trx_sys_mysql_master_log_name[];
-/** Master binlog file position. We have successfully got the updates
-up to this position. -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-extern ib_int64_t trx_sys_mysql_master_log_pos;
-/* @} */
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-extern char trx_sys_mysql_bin_log_name[];
-/** Binlog file position, or -1 if unknown */
-extern ib_int64_t trx_sys_mysql_bin_log_pos;
-/* @} */
-
-/** The transaction system */
-extern trx_sys_t* trx_sys;
-
-/** Doublewrite system */
-extern trx_doublewrite_t* trx_doublewrite;
-/** The following is set to TRUE when we are upgrading from pre-4.1
-format data files to the multiple tablespaces format data files */
-extern ibool trx_doublewrite_must_reset_space_ids;
-/** Set to TRUE when the doublewrite buffer is being created */
-extern ibool trx_doublewrite_buf_is_being_created;
-/** The following is TRUE when we are using the database in the
-post-4.1 format, i.e., we have successfully upgraded, or have created
-a new database installation */
-extern ibool trx_sys_multiple_tablespace_format;
-
-/****************************************************************//**
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
-trx_sys_create_doublewrite_buf(void);
-/*================================*/
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-UNIV_INTERN
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
- ibool restore_corrupt_pages); /*!< in: TRUE=restore pages */
-/****************************************************************//**
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-UNIV_INTERN
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void);
-/*===============================================*/
-/****************************************************************//**
-Determines if a page number is located inside the doublewrite buffer.
-@return TRUE if the location is inside the two blocks of the
-doublewrite buffer */
-UNIV_INTERN
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
- ulint page_no); /*!< in: page number */
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return TRUE if trx sys header page */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
- ulint space, /*!< in: space */
- ulint page_no);/*!< in: page number */
-/*****************************************************************//**
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
-UNIV_INTERN
-void
-trx_sys_init_at_db_start(void);
-/*==========================*/
-/*****************************************************************//**
-Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
-void
-trx_sys_create(void);
-/*================*/
-/****************************************************************//**
-Looks for a free slot for a rollback segment in the trx system file copy.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Gets the pointer in the nth slot of the rseg array.
-@return pointer to rseg object, NULL if slot not in use */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n); /*!< in: index of slot */
-/***************************************************************//**
-Sets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-void
-trx_sys_set_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n, /*!< in: index of slot */
- trx_rseg_t* rseg); /*!< in: pointer to rseg object, NULL if slot
- not in use */
-/**********************************************************************//**
-Gets a pointer to the transaction system file copy and x-locks its page.
-@return pointer to system file copy, page x-locked */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-file copy.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint space, /*!< in: space id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint page_no, /*!< in: page number, FIL_NULL if
- the slot is reset to unused */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id(void);
-/*========================*/
-/*****************************************************************//**
-Allocates a new transaction number.
-@return new, allocated trx number */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_no(void);
-/*========================*/
-#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
- byte* ptr, /*!< in: pointer to memory where written */
- trx_id_t id); /*!< in: id */
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
-@return id */
-UNIV_INLINE
-trx_id_t
-trx_read_trx_id(
-/*============*/
- const byte* ptr); /*!< in: pointer to memory from where to read */
-/****************************************************************//**
-Looks for the trx handle with the given id in trx_list.
-@return the trx handle or NULL if not found */
-UNIV_INLINE
-trx_t*
-trx_get_on_id(
-/*==========*/
- trx_id_t trx_id);/*!< in: trx id to search for */
-/****************************************************************//**
-Returns the minumum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->conc_state to
-find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_list_get_min_trx_id(void);
-/*=========================*/
-/****************************************************************//**
-Checks if a transaction with the given id is active.
-@return TRUE if active */
-UNIV_INLINE
-ibool
-trx_is_active(
-/*==========*/
- trx_id_t trx_id);/*!< in: trx id of the transaction */
-/****************************************************************//**
-Checks that trx is in the trx list.
-@return TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
-/*============*/
- trx_t* in_trx);/*!< in: trx */
-/*****************************************************************//**
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-UNIV_INTERN
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- const char* file_name,/*!< in: MySQL log file name */
- ib_int64_t offset, /*!< in: position in that log file */
- ulint field, /*!< in: offset of the MySQL log info field in
- the trx sys header */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset(void);
-/*===================================*/
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void);
-/*====================================*/
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_init(void);
-/*==========================*/
-/*****************************************************************//**
-Closes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_close(void);
-/*===========================*/
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-UNIV_INTERN
-void
-trx_sys_file_format_tag_init(void);
-/*==============================*/
-/*****************************************************************//**
-Shutdown/Close the transaction system. */
-UNIV_INTERN
-void
-trx_sys_close(void);
-/*===============*/
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id); /*!< in: id of the file format */
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
- ulint format_id, /*!< in: file format id */
- const char** name); /*!< out: max file format name or
- NULL if not needed. */
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_max_get(void);
-/*=============================*/
-/*****************************************************************//**
-Check for the max file format tag stored on disk.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-trx_sys_file_format_max_check(
-/*==========================*/
- ulint max_format_id); /*!< in: the max format id to check */
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
- const char** name, /*!< out: max file format name */
- ulint format_id); /*!< in: file format identifier */
-#else /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- const byte* page); /*!< in: buffer containing the trx
- system header page, i.e., page number
- TRX_SYS_PAGE_NO in the tablespace */
-/*****************************************************************//**
-Reads the file format id from the first system table space file.
-Even if the call succeeds and returns TRUE, the returned format id
-may be ULINT_UNDEFINED signalling that the format id was not present
-in the data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_file_format_id(
-/*========================*/
- const char *pathname, /*!< in: pathname of the first system
- table space file */
- ulint *format_id); /*!< out: file format of the system table
- space */
-/*****************************************************************//**
-Reads the file format id from the given per-table data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_pertable_file_format_id(
-/*=================================*/
- const char *pathname, /*!< in: pathname of a per-table
- datafile */
- ulint *format_id); /*!< out: file format of the per-table
- data file */
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id); /*!< in: id of the file format */
-
-#endif /* !UNIV_HOTBACKUP */
-/* The automatically created system rollback segment has this id */
-#define TRX_SYS_SYSTEM_RSEG_ID 0
-
-/* Space id and page no where the trx system file copy resides */
-#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
-#include "fsp0fsp.h"
-#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
-
-/* The offset of the transaction system header on the page */
-#define TRX_SYS FSEG_PAGE_DATA
-
-/** Transaction system header */
-/*------------------------------------------------------------- @{ */
-#define TRX_SYS_TRX_ID_STORE 0 /*!< the maximum trx id or trx
- number modulo
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- written to a file page by any
- transaction; the assignment of
- transaction ids continues from
- this number rounded up by
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- plus
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- when the database is
- started */
-#define TRX_SYS_FSEG_HEADER 8 /*!< segment header for the
- tablespace segment the trx
- system is created into */
-#define TRX_SYS_RSEGS (8 + FSEG_HEADER_SIZE)
- /*!< the start of the array of
- rollback segment specification
- slots */
-/*------------------------------------------------------------- @} */
-
-/** Maximum number of rollback segments: the number of segment
-specification slots in the transaction system array; rollback segment
-id must fit in one byte, therefore 256; each slot is currently 8 bytes
-in size */
-#define TRX_SYS_N_RSEGS 256
-
-/** Maximum length of MySQL binlog file name, in bytes.
-@see trx_sys_mysql_master_log_name
-@see trx_sys_mysql_bin_log_name */
-#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
-/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
-#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
-
-#if UNIV_PAGE_SIZE < 4096
-# error "UNIV_PAGE_SIZE < 4096"
-#endif
-/** The offset of the MySQL replication info in the trx system header;
-this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
-#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
-
-/** The offset of the MySQL binlog offset info in the trx system header */
-#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
-#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /*!< magic number which is
- TRX_SYS_MYSQL_LOG_MAGIC_N
- if we have valid data in the
- MySQL binlog info */
-#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /*!< high 4 bytes of the offset
- within that file */
-#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /*!< low 4 bytes of the offset
- within that file */
-#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
-
-#ifndef UNIV_HOTBACKUP
-/** Doublewrite buffer */
-/* @{ */
-/** The offset of the doublewrite buffer header on the trx system header page */
-#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
-/*-------------------------------------------------------------*/
-#define TRX_SYS_DOUBLEWRITE_FSEG 0 /*!< fseg header of the fseg
- containing the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE
- /*!< 4-byte magic number which
- shows if we already have
- created the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE)
- /*!< page number of the
- first page in the first
- sequence of 64
- (= FSP_EXTENT_SIZE) consecutive
- pages in the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE)
- /*!< page number of the
- first page in the second
- sequence of 64 consecutive
- pages in the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /*!< we repeat
- TRX_SYS_DOUBLEWRITE_MAGIC,
- TRX_SYS_DOUBLEWRITE_BLOCK1,
- TRX_SYS_DOUBLEWRITE_BLOCK2
- so that if the trx sys
- header is half-written
- to disk, we still may
- be able to recover the
- information */
-/** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
-we must reset the doublewrite buffer, because starting from 4.1.x the
-space id of a data page is stored into
-FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */
-#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
-
-/*-------------------------------------------------------------*/
-/** Contents of TRX_SYS_DOUBLEWRITE_MAGIC */
-#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855
-/** Contents of TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED */
-#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
-
-/** Size of the doublewrite block in pages */
-#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
-/* @} */
-
-/** File format tag */
-/* @{ */
-/** The offset of the file format tag on the trx system header page
-(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
-#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
-
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
-/* @} */
-
-/** Doublewrite control struct */
-struct trx_doublewrite_struct{
- mutex_t mutex; /*!< mutex protecting the first_free field and
- write_buf */
- ulint block1; /*!< the page number of the first
- doublewrite block (64 pages) */
- ulint block2; /*!< page number of the second block */
- ulint first_free; /*!< first free position in write_buf measured
- in units of UNIV_PAGE_SIZE */
- byte* write_buf; /*!< write buffer used in writing to the
- doublewrite buffer, aligned to an
- address divisible by UNIV_PAGE_SIZE
- (which is required by Windows aio) */
- byte* write_buf_unaligned;
- /*!< pointer to write_buf, but unaligned */
- buf_page_t**
- buf_block_arr; /*!< array to store pointers to the buffer
- blocks which have been cached to write_buf */
-};
-
-/** The transaction system central memory data structure; protected by the
-kernel mutex */
-struct trx_sys_struct{
- trx_id_t max_trx_id; /*!< The smallest number not yet
- assigned as a transaction id or
- transaction number */
- UT_LIST_BASE_NODE_T(trx_t) trx_list;
- /*!< List of active and committed in
- memory transactions, sorted on trx id,
- biggest first */
- UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list;
- /*!< List of transactions created
- for MySQL */
- UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list;
- /*!< List of rollback segment
- objects */
- trx_rseg_t* latest_rseg; /*!< Latest rollback segment in the
- round-robin assignment of rollback
- segments to transactions */
- trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
- /*!< Pointer array to rollback
- segments; NULL if slot not in use */
- ulint rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
- list (update undo logs for committed
- transactions), protected by
- rseg->mutex */
- UT_LIST_BASE_NODE_T(read_view_t) view_list;
- /*!< List of read views sorted
- on trx no, biggest first */
-};
-
-/** When a trx id which is zero modulo this number (which must be a power of
-two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
-page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN 256
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/trx0sys.ic b/storage/innodb_plugin/include/trx0sys.ic
deleted file mode 100644
index 820d31d0692..00000000000
--- a/storage/innodb_plugin/include/trx0sys.ic
+++ /dev/null
@@ -1,387 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0sys.ic
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0trx.h"
-#include "data0type.h"
-#ifndef UNIV_HOTBACKUP
-# include "srv0srv.h"
-# include "mtr0log.h"
-
-/* The typedef for rseg slot in the file copy */
-typedef byte trx_sysf_rseg_t;
-
-/* Rollback segment specification slot offsets */
-/*-------------------------------------------------------------*/
-#define TRX_SYS_RSEG_SPACE 0 /* space where the segment
- header is placed; starting with
- MySQL/InnoDB 5.1.7, this is
- UNIV_UNDEFINED if the slot is unused */
-#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the segment
- header is placed; this is FIL_NULL
- if the slot is unused */
-/*-------------------------------------------------------------*/
-/* Size of a rollback segment specification slot */
-#define TRX_SYS_RSEG_SLOT_SIZE 8
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
-void
-trx_sys_flush_max_trx_id(void);
-/*==========================*/
-
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return TRUE if trx sys header page */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***************************************************************//**
-Gets the pointer in the nth slot of the rseg array.
-@return pointer to rseg object, NULL if slot not in use */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n) /*!< in: index of slot */
-{
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(n < TRX_SYS_N_RSEGS);
-
- return(sys->rseg_array[n]);
-}
-
-/***************************************************************//**
-Sets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-void
-trx_sys_set_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n, /*!< in: index of slot */
- trx_rseg_t* rseg) /*!< in: pointer to rseg object, NULL if slot
- not in use */
-{
- ut_ad(n < TRX_SYS_N_RSEGS);
-
- sys->rseg_array[n] = rseg;
-}
-
-/**********************************************************************//**
-Gets a pointer to the transaction system header and x-latches its page.
-@return pointer to system header, page x-latched. */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- trx_sysf_t* header;
-
- ut_ad(mtr);
-
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
- header = TRX_SYS + buf_block_get_frame(block);
-
- return(header);
-}
-
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys header */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-header.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint space, /*!< in: space id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- mlog_write_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE,
- space,
- MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-header. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys header */
- ulint i, /*!< in: slot index == rseg id */
- ulint page_no, /*!< in: page number, FIL_NULL if the
- slot is reset to unused */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- mlog_write_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_PAGE_NO,
- page_no,
- MLOG_4BYTES, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
- byte* ptr, /*!< in: pointer to memory where written */
- trx_id_t id) /*!< in: id */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
- mach_write_to_6(ptr, id);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
-@return id */
-UNIV_INLINE
-trx_id_t
-trx_read_trx_id(
-/*============*/
- const byte* ptr) /*!< in: pointer to memory from where to read */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
- return(mach_read_from_6(ptr));
-}
-
-/****************************************************************//**
-Looks for the trx handle with the given id in trx_list.
-@return the trx handle or NULL if not found */
-UNIV_INLINE
-trx_t*
-trx_get_on_id(
-/*==========*/
- trx_id_t trx_id) /*!< in: trx id to search for */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx != NULL) {
- if (0 == ut_dulint_cmp(trx_id, trx->id)) {
-
- return(trx);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- return(NULL);
-}
-
-/****************************************************************//**
-Returns the minumum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->conc_state to
-find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_list_get_min_trx_id(void)
-/*=========================*/
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- trx = UT_LIST_GET_LAST(trx_sys->trx_list);
-
- if (trx == NULL) {
-
- return(trx_sys->max_trx_id);
- }
-
- return(trx->id);
-}
-
-/****************************************************************//**
-Checks if a transaction with the given id is active.
-@return TRUE if active */
-UNIV_INLINE
-ibool
-trx_is_active(
-/*==========*/
- trx_id_t trx_id) /*!< in: trx id of the transaction */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- if (ut_dulint_cmp(trx_id, trx_list_get_min_trx_id()) < 0) {
-
- return(FALSE);
- }
-
- if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
-
- /* There must be corruption: we return TRUE because this
- function is only called by lock_clust_rec_some_has_impl()
- and row_vers_impl_x_locked_off_kernel() and they have
- diagnostic prints in this case */
-
- return(TRUE);
- }
-
- trx = trx_get_on_id(trx_id);
- if (trx && (trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id(void)
-/*========================*/
-{
- trx_id_t id;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
- will evaluate to TRUE when this function is first time called,
- and the value for trx id will be written to disk-based header!
- Thus trx id values will not overlap when the database is
- repeatedly started! */
-
- if (ut_dulint_get_low(trx_sys->max_trx_id)
- % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) {
-
- trx_sys_flush_max_trx_id();
- }
-
- id = trx_sys->max_trx_id;
-
- UT_DULINT_INC(trx_sys->max_trx_id);
-
- return(id);
-}
-
-/*****************************************************************//**
-Allocates a new transaction number.
-@return new, allocated trx number */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_no(void)
-/*========================*/
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- return(trx_sys_get_new_trx_id());
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h
deleted file mode 100644
index d2a59740c93..00000000000
--- a/storage/innodb_plugin/include/trx0trx.h
+++ /dev/null
@@ -1,814 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0trx.h
-The transaction
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0trx_h
-#define trx0trx_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "dict0types.h"
-#ifndef UNIV_HOTBACKUP
-#include "lock0types.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "mem0mem.h"
-#include "read0types.h"
-#include "trx0xa.h"
-#include "ut0vec.h"
-
-/** Dummy session used currently in MySQL interface */
-extern sess_t* trx_dummy_sess;
-
-/** Number of transactions currently allocated for MySQL: protected by
-the kernel mutex */
-extern ulint trx_n_mysql_transactions;
-
-/********************************************************************//**
-Releases the search latch if trx has reserved it. */
-UNIV_INTERN
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx); /*!< in: transaction */
-/******************************************************************//**
-Set detailed error message for the transaction. */
-UNIV_INTERN
-void
-trx_set_detailed_error(
-/*===================*/
- trx_t* trx, /*!< in: transaction struct */
- const char* msg); /*!< in: detailed error message */
-/*************************************************************//**
-Set detailed error message for the transaction from a file. Note that the
-file is rewinded before reading from it. */
-UNIV_INTERN
-void
-trx_set_detailed_error_from_file(
-/*=============================*/
- trx_t* trx, /*!< in: transaction struct */
- FILE* file); /*!< in: file to read message from */
-/****************************************************************//**
-Retrieves the error_info field from a trx.
-@return the error info */
-UNIV_INLINE
-const dict_index_t*
-trx_get_error_info(
-/*===============*/
- const trx_t* trx); /*!< in: trx object */
-/****************************************************************//**
-Creates and initializes a transaction object.
-@return own: the transaction */
-UNIV_INTERN
-trx_t*
-trx_create(
-/*=======*/
- sess_t* sess) /*!< in: session */
- __attribute__((nonnull));
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_mysql(void);
-/*========================*/
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_background(void);
-/*=============================*/
-/********************************************************************//**
-Frees a transaction object. */
-UNIV_INTERN
-void
-trx_free(
-/*=====*/
- trx_t* trx); /*!< in, own: trx object */
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
-void
-trx_free_for_mysql(
-/*===============*/
- trx_t* trx); /*!< in, own: trx object */
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx); /*!< in, own: trx object */
-/****************************************************************//**
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-UNIV_INTERN
-void
-trx_lists_init_at_db_start(void);
-/*============================*/
-/****************************************************************//**
-Starts a new transaction.
-@return TRUE if success, FALSE if the rollback segment could not
-support this many transactions */
-UNIV_INTERN
-ibool
-trx_start(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-/****************************************************************//**
-Starts a new transaction.
-@return TRUE */
-UNIV_INTERN
-ibool
-trx_start_low(
-/*==========*/
- trx_t* trx, /*!< in: transaction */
- ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-/*************************************************************//**
-Starts the transaction if it is not yet started. */
-UNIV_INLINE
-void
-trx_start_if_not_started(
-/*=====================*/
- trx_t* trx); /*!< in: transaction */
-/*************************************************************//**
-Starts the transaction if it is not yet started. Assumes we have reserved
-the kernel mutex! */
-UNIV_INLINE
-void
-trx_start_if_not_started_low(
-/*=========================*/
- trx_t* trx); /*!< in: transaction */
-/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
-void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx); /*!< in: transaction */
-/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-UNIV_INTERN
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx); /*!< in: transaction */
-/**********************************************************************//**
-Does the transaction commit for MySQL.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-trx_commit_for_mysql(
-/*=================*/
- trx_t* trx); /*!< in: trx handle */
-/**********************************************************************//**
-Does the transaction prepare for MySQL.
-@return 0 or error number */
-UNIV_INTERN
-ulint
-trx_prepare_for_mysql(
-/*==================*/
- trx_t* trx); /*!< in: trx handle */
-/**********************************************************************//**
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery.
-@return number of prepared transactions */
-UNIV_INTERN
-int
-trx_recover_for_mysql(
-/*==================*/
- XID* xid_list, /*!< in/out: prepared transactions */
- ulint len); /*!< in: number of slots in xid_list */
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL */
-UNIV_INTERN
-trx_t *
-trx_get_trx_by_xid(
-/*===============*/
- XID* xid); /*!< in: X/Open XA transaction identification */
-/**********************************************************************//**
-If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE.
-@return 0 or error number */
-UNIV_INTERN
-ulint
-trx_commit_complete_for_mysql(
-/*==========================*/
- trx_t* trx); /*!< in: trx handle */
-/**********************************************************************//**
-Marks the latest SQL statement ended. */
-UNIV_INTERN
-void
-trx_mark_sql_stat_end(
-/*==================*/
- trx_t* trx); /*!< in: trx handle */
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return consistent read view */
-UNIV_INTERN
-read_view_t*
-trx_assign_read_view(
-/*=================*/
- trx_t* trx); /*!< in: active transaction */
-/***********************************************************//**
-The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
-the TRX_QUE_RUNNING state and releases query threads which were
-waiting for a lock in the wait_thrs list. */
-UNIV_INTERN
-void
-trx_end_lock_wait(
-/*==============*/
- trx_t* trx); /*!< in: transaction */
-/****************************************************************//**
-Sends a signal to a trx object. */
-UNIV_INTERN
-void
-trx_sig_send(
-/*=========*/
- trx_t* trx, /*!< in: trx handle */
- ulint type, /*!< in: signal type */
- ulint sender, /*!< in: TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver_thr, /*!< in: query thread which wants the
- reply, or NULL; if type is
- TRX_SIG_END_WAIT, this must be NULL */
- trx_savept_t* savept, /*!< in: possible rollback savepoint, or
- NULL */
- que_thr_t** next_thr); /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-/****************************************************************//**
-Send the reply message when a signal in the queue of the trx has
-been handled. */
-UNIV_INTERN
-void
-trx_sig_reply(
-/*==========*/
- trx_sig_t* sig, /*!< in: signal */
- que_thr_t** next_thr); /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/****************************************************************//**
-Removes the signal object from a trx signal queue. */
-UNIV_INTERN
-void
-trx_sig_remove(
-/*===========*/
- trx_t* trx, /*!< in: trx handle */
- trx_sig_t* sig); /*!< in, own: signal */
-/****************************************************************//**
-Starts handling of a trx signal. */
-UNIV_INTERN
-void
-trx_sig_start_handle(
-/*=================*/
- trx_t* trx, /*!< in: trx handle */
- que_thr_t** next_thr); /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/****************************************************************//**
-Ends signal handling. If the session is in the error state, and
-trx->graph_before_signal_handling != NULL, returns control to the error
-handling routine of the graph (currently only returns the control to the
-graph root which then sends an error message to the client). */
-UNIV_INTERN
-void
-trx_end_signal_handling(
-/*====================*/
- trx_t* trx); /*!< in: trx */
-/*********************************************************************//**
-Creates a commit command node struct.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-commit_node_create(
-/*===============*/
- mem_heap_t* heap); /*!< in: mem heap where created */
-/***********************************************************//**
-Performs an execution step for a commit type node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_commit_step(
-/*============*/
- que_thr_t* thr); /*!< in: query thread */
-
-/**********************************************************************//**
-Prints info about a transaction to the given file. The caller must own the
-kernel mutex and must have called
-innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL
-or InnoDB cannot meanwhile change the info printed here. */
-UNIV_INTERN
-void
-trx_print(
-/*======*/
- FILE* f, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- ulint max_query_len); /*!< in: max query length to print, or 0 to
- use the default max length */
-
-/** Type of data dictionary operation */
-enum trx_dict_op {
- /** The transaction is not modifying the data dictionary. */
- TRX_DICT_OP_NONE = 0,
- /** The transaction is creating a table or an index, or
- dropping a table. The table must be dropped in crash
- recovery. This and TRX_DICT_OP_NONE are the only possible
- operation modes in crash recovery. */
- TRX_DICT_OP_TABLE = 1,
- /** The transaction is creating or dropping an index in an
- existing table. In crash recovery, the data dictionary
- must be locked, but the table must not be dropped. */
- TRX_DICT_OP_INDEX = 2
-};
-
-/**********************************************************************//**
-Determine if a transaction is a dictionary operation.
-@return dictionary operation mode */
-UNIV_INLINE
-enum trx_dict_op
-trx_get_dict_operation(
-/*===================*/
- const trx_t* trx) /*!< in: transaction */
- __attribute__((pure));
-/**********************************************************************//**
-Flag a transaction a dictionary operation. */
-UNIV_INLINE
-void
-trx_set_dict_operation(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op op); /*!< in: operation, not
- TRX_DICT_OP_NONE */
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-UNIV_INTERN
-ibool
-trx_is_interrupted(
-/*===============*/
- trx_t* trx); /*!< in: transaction */
-#else /* !UNIV_HOTBACKUP */
-#define trx_is_interrupted(trx) FALSE
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Calculates the "weight" of a transaction. The weight of one transaction
-is estimated as the number of altered rows + the number of locked rows.
-@param t transaction
-@return transaction weight */
-#define TRX_WEIGHT(t) \
- ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))
-
-/*******************************************************************//**
-Compares the "weight" (or size) of two transactions. Transactions that
-have edited non-transactional tables are considered heavier than ones
-that have not.
-@return <0, 0 or >0; similar to strcmp(3) */
-UNIV_INTERN
-int
-trx_weight_cmp(
-/*===========*/
- const trx_t* a, /*!< in: the first transaction to be compared */
- const trx_t* b); /*!< in: the second transaction to be compared */
-
-/*******************************************************************//**
-Retrieves transacion's id, represented as unsigned long long.
-@return transaction's id */
-UNIV_INLINE
-ullint
-trx_get_id(
-/*=======*/
- const trx_t* trx); /*!< in: transaction */
-
-/* Maximum length of a string that can be returned by
-trx_get_que_state_str(). */
-#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */
-
-/*******************************************************************//**
-Retrieves transaction's que state in a human readable string. The string
-should not be free()'d or modified.
-@return string in the data segment */
-UNIV_INLINE
-const char*
-trx_get_que_state_str(
-/*==================*/
- const trx_t* trx); /*!< in: transaction */
-
-/* Signal to a transaction */
-struct trx_sig_struct{
- unsigned type:3; /*!< signal type */
- unsigned sender:1; /*!< TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver; /*!< non-NULL if the sender of the signal
- wants reply after the operation induced
- by the signal is completed */
- trx_savept_t savept; /*!< possible rollback savepoint */
- UT_LIST_NODE_T(trx_sig_t)
- signals; /*!< queue of pending signals to the
- transaction */
- UT_LIST_NODE_T(trx_sig_t)
- reply_signals; /*!< list of signals for which the sender
- transaction is waiting a reply */
-};
-
-#define TRX_MAGIC_N 91118598
-
-/* The transaction handle; every session has a trx object which is freed only
-when the session is freed; in addition there may be session-less transactions
-rolling back after a database recovery */
-
-struct trx_struct{
- ulint magic_n;
- /* All the next fields are protected by the kernel mutex, except the
- undo logs which are protected by undo_mutex */
- const char* op_info; /*!< English text describing the
- current operation, or an empty
- string */
- unsigned is_purge:1; /*!< 0=user transaction, 1=purge */
- unsigned is_recovered:1; /*!< 0=normal transaction,
- 1=recovered, must be rolled back */
- unsigned conc_state:2; /*!< state of the trx from the point
- of view of concurrency control:
- TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
- ... */
- unsigned que_state:2; /*!< valid when conc_state == TRX_ACTIVE:
- TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
- ... */
- unsigned isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */
- unsigned check_foreigns:1;/* normally TRUE, but if the user
- wants to suppress foreign key checks,
- (in table imports, for example) we
- set this FALSE */
- unsigned check_unique_secondary:1;
- /* normally TRUE, but if the user
- wants to speed up inserts by
- suppressing unique key checks
- for secondary indexes when we decide
- if we can use the insert buffer for
- them, we set this FALSE */
- unsigned support_xa:1; /*!< normally we do the XA two-phase
- commit steps, but by setting this to
- FALSE, one can save CPU time and about
- 150 bytes in the undo log size as then
- we skip XA steps */
- unsigned flush_log_later:1;/* In 2PC, we hold the
- prepare_commit mutex across
- both phases. In that case, we
- defer flush of the logs to disk
- until after we release the
- mutex. */
- unsigned must_flush_log_later:1;/* this flag is set to TRUE in
- trx_commit_off_kernel() if
- flush_log_later was TRUE, and there
- were modifications by the transaction;
- in that case we must flush the log
- in trx_commit_complete_for_mysql() */
- unsigned dict_operation:2;/**< @see enum trx_dict_op */
- unsigned duplicates:2; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- unsigned active_trans:2; /*!< 1 - if a transaction in MySQL
- is active. 2 - if prepare_commit_mutex
- was taken */
- unsigned has_search_latch:1;
- /* TRUE if this trx has latched the
- search system latch in S-mode */
- unsigned declared_to_be_inside_innodb:1;
- /* this is TRUE if we have declared
- this transaction in
- srv_conc_enter_innodb to be inside the
- InnoDB engine */
- unsigned handling_signals:1;/* this is TRUE as long as the trx
- is handling signals */
- unsigned dict_operation_lock_mode:2;
- /* 0, RW_S_LATCH, or RW_X_LATCH:
- the latch mode trx currently holds
- on dict_operation_lock */
- time_t start_time; /*!< time the trx object was created
- or the state last time became
- TRX_ACTIVE */
- trx_id_t id; /*!< transaction id */
- XID xid; /*!< X/Open XA transaction
- identification to identify a
- transaction branch */
- trx_id_t no; /*!< transaction serialization number ==
- max trx id when the transaction is
- moved to COMMITTED_IN_MEMORY state */
- ib_uint64_t commit_lsn; /*!< lsn at the time of the commit */
- trx_id_t table_id; /*!< Table to drop iff dict_operation
- is TRUE, or ut_dulint_zero. */
- /*------------------------------*/
- void* mysql_thd; /*!< MySQL thread handle corresponding
- to this trx, or NULL */
- char** mysql_query_str;/* pointer to the field in mysqld_thd
- which contains the pointer to the
- current SQL query string */
- const char* mysql_log_file_name;
- /* if MySQL binlog is used, this field
- contains a pointer to the latest file
- name; this is NULL if binlog is not
- used */
- ib_int64_t mysql_log_offset;/* if MySQL binlog is used, this field
- contains the end offset of the binlog
- entry */
- os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
- with this transaction object */
- ulint mysql_process_no;/* since in Linux, 'top' reports
- process id's and not thread id's, we
- store the process number too */
- /*------------------------------*/
- ulint n_mysql_tables_in_use; /* number of Innobase tables
- used in the processing of the current
- SQL statement in MySQL */
- ulint mysql_n_tables_locked;
- /* how many tables the current SQL
- statement uses, except those
- in consistent read */
- ulint search_latch_timeout;
- /* If we notice that someone is
- waiting for our S-lock on the search
- latch to be released, we wait in
- row0sel.c for BTR_SEA_TIMEOUT new
- searches until we try to keep
- the search latch again over
- calls from MySQL; this is intended
- to reduce contention on the search
- latch */
- /*------------------------------*/
- ulint n_tickets_to_enter_innodb;
- /* this can be > 0 only when
- declared_to_... is TRUE; when we come
- to srv_conc_innodb_enter, if the value
- here is > 0, we decrement this by 1 */
- /*------------------------------*/
- UT_LIST_NODE_T(trx_t)
- trx_list; /*!< list of transactions */
- UT_LIST_NODE_T(trx_t)
- mysql_trx_list; /*!< list of transactions created for
- MySQL */
- /*------------------------------*/
- ulint error_state; /*!< 0 if no error, otherwise error
- number; NOTE That ONLY the thread
- doing the transaction is allowed to
- set this field: this is NOT protected
- by the kernel mutex */
- const dict_index_t*error_info; /*!< if the error number indicates a
- duplicate key error, a pointer to
- the problematic index is stored here */
- ulint error_key_num; /*!< if the index creation fails to a
- duplicate key error, a mysql key
- number of that index is stored here */
- sess_t* sess; /*!< session of the trx, NULL if none */
- que_t* graph; /*!< query currently run in the session,
- or NULL if none; NOTE that the query
- belongs to the session, and it can
- survive over a transaction commit, if
- it is a stored procedure with a COMMIT
- WORK statement, for instance */
- ulint n_active_thrs; /*!< number of active query threads */
- que_t* graph_before_signal_handling;
- /* value of graph when signal handling
- for this trx started: this is used to
- return control to the original query
- graph for error processing */
- trx_sig_t sig; /*!< one signal object can be allocated
- in this space, avoiding mem_alloc */
- UT_LIST_BASE_NODE_T(trx_sig_t)
- signals; /*!< queue of processed or pending
- signals to the trx */
- UT_LIST_BASE_NODE_T(trx_sig_t)
- reply_signals; /*!< list of signals sent by the query
- threads of this trx for which a thread
- is waiting for a reply; if this trx is
- killed, the reply requests in the list
- must be canceled */
- /*------------------------------*/
- lock_t* wait_lock; /*!< if trx execution state is
- TRX_QUE_LOCK_WAIT, this points to
- the lock request, otherwise this is
- NULL */
- ibool was_chosen_as_deadlock_victim;
- /* when the transaction decides to wait
- for a lock, it sets this to FALSE;
- if another transaction chooses this
- transaction as a victim in deadlock
- resolution, it sets this to TRUE */
- time_t wait_started; /*!< lock wait started at this time */
- UT_LIST_BASE_NODE_T(que_thr_t)
- wait_thrs; /*!< query threads belonging to this
- trx that are in the QUE_THR_LOCK_WAIT
- state */
- ulint deadlock_mark; /*!< a mark field used in deadlock
- checking algorithm. This must be
- in its own machine word, because
- it can be changed by other
- threads while holding kernel_mutex. */
- /*------------------------------*/
- mem_heap_t* lock_heap; /*!< memory heap for the locks of the
- transaction */
- UT_LIST_BASE_NODE_T(lock_t)
- trx_locks; /*!< locks reserved by the transaction */
- /*------------------------------*/
- mem_heap_t* global_read_view_heap;
- /* memory heap for the global read
- view */
- read_view_t* global_read_view;
- /* consistent read view associated
- to a transaction or NULL */
- read_view_t* read_view; /*!< consistent read view used in the
- transaction or NULL, this read view
- if defined can be normal read view
- associated to a transaction (i.e.
- same as global_read_view) or read view
- associated to a cursor */
- /*------------------------------*/
- UT_LIST_BASE_NODE_T(trx_named_savept_t)
- trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
- oldest first */
- /*------------------------------*/
- mutex_t undo_mutex; /*!< mutex protecting the fields in this
- section (down to undo_no_arr), EXCEPT
- last_sql_stat_start, which can be
- accessed only when we know that there
- cannot be any activity in the undo
- logs! */
- undo_no_t undo_no; /*!< next undo log record number to
- assign; since the undo log is
- private for a transaction, this
- is a simple ascending sequence
- with no gaps; thus it represents
- the number of modified/inserted
- rows in a transaction */
- trx_savept_t last_sql_stat_start;
- /* undo_no when the last sql statement
- was started: in case of an error, trx
- is rolled back down to this undo
- number; see note at undo_mutex! */
- trx_rseg_t* rseg; /*!< rollback segment assigned to the
- transaction, or NULL if not assigned
- yet */
- trx_undo_t* insert_undo; /*!< pointer to the insert undo log, or
- NULL if no inserts performed yet */
- trx_undo_t* update_undo; /*!< pointer to the update undo log, or
- NULL if no update performed yet */
- undo_no_t roll_limit; /*!< least undo number to undo during
- a rollback */
- ulint pages_undone; /*!< number of undo log pages undone
- since the last undo log truncation */
- trx_undo_arr_t* undo_no_arr; /*!< array of undo numbers of undo log
- records which are currently processed
- by a rollback operation */
- /*------------------------------*/
- ulint n_autoinc_rows; /*!< no. of AUTO-INC rows required for
- an SQL statement. This is useful for
- multi-row INSERTs */
- ib_vector_t* autoinc_locks; /* AUTOINC locks held by this
- transaction. Note that these are
- also in the lock list trx_locks. This
- vector needs to be freed explicitly
- when the trx_t instance is desrtoyed */
- /*------------------------------*/
- char detailed_error[256]; /*!< detailed error message for last
- error, or empty. */
-};
-
-#define TRX_MAX_N_THREADS 32 /* maximum number of
- concurrent threads running a
- single operation of a
- transaction, e.g., a parallel
- query */
-/* Transaction concurrency states (trx->conc_state) */
-#define TRX_NOT_STARTED 0
-#define TRX_ACTIVE 1
-#define TRX_COMMITTED_IN_MEMORY 2
-#define TRX_PREPARED 3 /* Support for 2PC/XA */
-
-/* Transaction execution states when trx->conc_state == TRX_ACTIVE */
-#define TRX_QUE_RUNNING 0 /* transaction is running */
-#define TRX_QUE_LOCK_WAIT 1 /* transaction is waiting for a lock */
-#define TRX_QUE_ROLLING_BACK 2 /* transaction is rolling back */
-#define TRX_QUE_COMMITTING 3 /* transaction is committing */
-
-/* Transaction isolation levels (trx->isolation_level) */
-#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking
- SELECTs are performed so that
- we do not look at a possible
- earlier version of a record;
- thus they are not 'consistent'
- reads under this isolation
- level; otherwise like level
- 2 */
-
-#define TRX_ISO_READ_COMMITTED 1 /* somewhat Oracle-like
- isolation, except that in
- range UPDATE and DELETE we
- must block phantom rows
- with next-key locks;
- SELECT ... FOR UPDATE and ...
- LOCK IN SHARE MODE only lock
- the index records, NOT the
- gaps before them, and thus
- allow free inserting;
- each consistent read reads its
- own snapshot */
-
-#define TRX_ISO_REPEATABLE_READ 2 /* this is the default;
- all consistent reads in the
- same trx read the same
- snapshot;
- full next-key locking used
- in locking reads to block
- insertions into gaps */
-
-#define TRX_ISO_SERIALIZABLE 3 /* all plain SELECTs are
- converted to LOCK IN SHARE
- MODE reads */
-
-/* Treatment of duplicate values (trx->duplicates; for example, in inserts).
-Multiple flags can be combined with bitwise OR. */
-#define TRX_DUP_IGNORE 1 /* duplicate rows are to be updated */
-#define TRX_DUP_REPLACE 2 /* duplicate rows are to be replaced */
-
-
-/* Types of a trx signal */
-#define TRX_SIG_NO_SIGNAL 0
-#define TRX_SIG_TOTAL_ROLLBACK 1
-#define TRX_SIG_ROLLBACK_TO_SAVEPT 2
-#define TRX_SIG_COMMIT 3
-#define TRX_SIG_ERROR_OCCURRED 4
-#define TRX_SIG_BREAK_EXECUTION 5
-
-/* Sender types of a signal */
-#define TRX_SIG_SELF 0 /* sent by the session itself, or
- by an error occurring within this
- session */
-#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
- must hold rights to this) */
-
-/** Commit node states */
-enum commit_node_state {
- COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
- the transaction */
- COMMIT_NODE_WAIT /*!< commit signal sent to the transaction,
- waiting for completion */
-};
-
-/** Commit command node in a query graph */
-struct commit_node_struct{
- que_common_t common; /*!< node type: QUE_NODE_COMMIT */
- enum commit_node_state
- state; /*!< node execution state */
-};
-
-
-
-#ifndef UNIV_NONINL
-#include "trx0trx.ic"
-#endif
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/innodb_plugin/include/trx0trx.ic b/storage/innodb_plugin/include/trx0trx.ic
deleted file mode 100644
index 7332eeece85..00000000000
--- a/storage/innodb_plugin/include/trx0trx.ic
+++ /dev/null
@@ -1,164 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0trx.ic
-The transaction
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/*************************************************************//**
-Starts the transaction if it is not yet started. */
-UNIV_INLINE
-void
-trx_start_if_not_started(
-/*=====================*/
- trx_t* trx) /*!< in: transaction */
-{
- ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- trx_start(trx, ULINT_UNDEFINED);
- }
-}
-
-/*************************************************************//**
-Starts the transaction if it is not yet started. Assumes we have reserved
-the kernel mutex! */
-UNIV_INLINE
-void
-trx_start_if_not_started_low(
-/*=========================*/
- trx_t* trx) /*!< in: transaction */
-{
- ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- trx_start_low(trx, ULINT_UNDEFINED);
- }
-}
-
-/****************************************************************//**
-Retrieves the error_info field from a trx.
-@return the error info */
-UNIV_INLINE
-const dict_index_t*
-trx_get_error_info(
-/*===============*/
- const trx_t* trx) /*!< in: trx object */
-{
- return(trx->error_info);
-}
-
-/*******************************************************************//**
-Retrieves transacion's id, represented as unsigned long long.
-@return transaction's id */
-UNIV_INLINE
-ullint
-trx_get_id(
-/*=======*/
- const trx_t* trx) /*!< in: transaction */
-{
- return((ullint)ut_conv_dulint_to_longlong(trx->id));
-}
-
-/*******************************************************************//**
-Retrieves transaction's que state in a human readable string. The string
-should not be free()'d or modified.
-@return string in the data segment */
-UNIV_INLINE
-const char*
-trx_get_que_state_str(
-/*==================*/
- const trx_t* trx) /*!< in: transaction */
-{
- /* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */
- switch (trx->que_state) {
- case TRX_QUE_RUNNING:
- return("RUNNING");
- case TRX_QUE_LOCK_WAIT:
- return("LOCK WAIT");
- case TRX_QUE_ROLLING_BACK:
- return("ROLLING BACK");
- case TRX_QUE_COMMITTING:
- return("COMMITTING");
- default:
- return("UNKNOWN");
- }
-}
-
-/**********************************************************************//**
-Determine if a transaction is a dictionary operation.
-@return dictionary operation mode */
-UNIV_INLINE
-enum trx_dict_op
-trx_get_dict_operation(
-/*===================*/
- const trx_t* trx) /*!< in: transaction */
-{
- enum trx_dict_op op = (enum trx_dict_op) trx->dict_operation;
-
-#ifdef UNIV_DEBUG
- switch (op) {
- case TRX_DICT_OP_NONE:
- case TRX_DICT_OP_TABLE:
- case TRX_DICT_OP_INDEX:
- return(op);
- }
- ut_error;
-#endif /* UNIV_DEBUG */
- return((enum trx_dict_op) UNIV_EXPECT(op, TRX_DICT_OP_NONE));
-}
-/**********************************************************************//**
-Flag a transaction a dictionary operation. */
-UNIV_INLINE
-void
-trx_set_dict_operation(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op op) /*!< in: operation, not
- TRX_DICT_OP_NONE */
-{
-#ifdef UNIV_DEBUG
- enum trx_dict_op old_op = trx_get_dict_operation(trx);
-
- switch (op) {
- case TRX_DICT_OP_NONE:
- ut_error;
- break;
- case TRX_DICT_OP_TABLE:
- switch (old_op) {
- case TRX_DICT_OP_NONE:
- case TRX_DICT_OP_INDEX:
- case TRX_DICT_OP_TABLE:
- goto ok;
- }
- ut_error;
- break;
- case TRX_DICT_OP_INDEX:
- ut_ad(old_op == TRX_DICT_OP_NONE);
- break;
- }
-ok:
-#endif /* UNIV_DEBUG */
-
- trx->dict_operation = op;
-}
diff --git a/storage/innodb_plugin/include/trx0types.h b/storage/innodb_plugin/include/trx0types.h
deleted file mode 100644
index 24cf57d53d5..00000000000
--- a/storage/innodb_plugin/include/trx0types.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0types.h
-Transaction system global type definitions
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0types_h
-#define trx0types_h
-
-#include "ut0byte.h"
-
-/** prepare trx_t::id for being printed via printf(3) */
-#define TRX_ID_PREP_PRINTF(id) (ullint) ut_conv_dulint_to_longlong(id)
-
-/** printf(3) format used for printing TRX_ID_PRINTF_PREP() */
-#define TRX_ID_FMT "%llX"
-
-/** maximum length that a formatted trx_t::id could take, not including
-the terminating NUL character. */
-#define TRX_ID_MAX_LEN 17
-
-/** Memory objects */
-/* @{ */
-/** Transaction */
-typedef struct trx_struct trx_t;
-/** Transaction system */
-typedef struct trx_sys_struct trx_sys_t;
-/** Doublewrite information */
-typedef struct trx_doublewrite_struct trx_doublewrite_t;
-/** Signal */
-typedef struct trx_sig_struct trx_sig_t;
-/** Rollback segment */
-typedef struct trx_rseg_struct trx_rseg_t;
-/** Transaction undo log */
-typedef struct trx_undo_struct trx_undo_t;
-/** Array of undo numbers of undo records being rolled back or purged */
-typedef struct trx_undo_arr_struct trx_undo_arr_t;
-/** A cell of trx_undo_arr_t */
-typedef struct trx_undo_inf_struct trx_undo_inf_t;
-/** The control structure used in the purge operation */
-typedef struct trx_purge_struct trx_purge_t;
-/** Rollback command node in a query graph */
-typedef struct roll_node_struct roll_node_t;
-/** Commit command node in a query graph */
-typedef struct commit_node_struct commit_node_t;
-/** SAVEPOINT command node in a query graph */
-typedef struct trx_named_savept_struct trx_named_savept_t;
-/* @} */
-
-/** Rollback contexts */
-enum trx_rb_ctx {
- RB_NONE = 0, /*!< no rollback */
- RB_NORMAL, /*!< normal rollback */
- RB_RECOVERY /*!< rolling back an incomplete transaction,
- in crash recovery */
-};
-
-/** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
-typedef dulint trx_id_t;
-/** Rollback pointer (DB_ROLL_PTR, DATA_ROLL_PTR) */
-typedef dulint roll_ptr_t;
-/** Undo number */
-typedef dulint undo_no_t;
-
-/** Transaction savepoint */
-typedef struct trx_savept_struct trx_savept_t;
-/** Transaction savepoint */
-struct trx_savept_struct{
- undo_no_t least_undo_no; /*!< least undo number to undo */
-};
-
-/** File objects */
-/* @{ */
-/** Transaction system header */
-typedef byte trx_sysf_t;
-/** Rollback segment header */
-typedef byte trx_rsegf_t;
-/** Undo segment header */
-typedef byte trx_usegf_t;
-/** Undo log header */
-typedef byte trx_ulogf_t;
-/** Undo log page header */
-typedef byte trx_upagef_t;
-
-/** Undo log record */
-typedef byte trx_undo_rec_t;
-/* @} */
-
-#endif
diff --git a/storage/innodb_plugin/include/trx0undo.h b/storage/innodb_plugin/include/trx0undo.h
deleted file mode 100644
index a084f2394b5..00000000000
--- a/storage/innodb_plugin/include/trx0undo.h
+++ /dev/null
@@ -1,551 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0undo.h
-Transaction undo log
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0undo_h
-#define trx0undo_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "page0types.h"
-#include "trx0xa.h"
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Builds a roll pointer.
-@return roll pointer */
-UNIV_INLINE
-roll_ptr_t
-trx_undo_build_roll_ptr(
-/*====================*/
- ibool is_insert, /*!< in: TRUE if insert undo log */
- ulint rseg_id, /*!< in: rollback segment id */
- ulint page_no, /*!< in: page number */
- ulint offset); /*!< in: offset of the undo entry within page */
-/***********************************************************************//**
-Decodes a roll pointer. */
-UNIV_INLINE
-void
-trx_undo_decode_roll_ptr(
-/*=====================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer */
- ibool* is_insert, /*!< out: TRUE if insert undo log */
- ulint* rseg_id, /*!< out: rollback segment id */
- ulint* page_no, /*!< out: page number */
- ulint* offset); /*!< out: offset of the undo
- entry within page */
-/***********************************************************************//**
-Returns TRUE if the roll pointer is of the insert type.
-@return TRUE if insert undo log */
-UNIV_INLINE
-ibool
-trx_undo_roll_ptr_is_insert(
-/*========================*/
- roll_ptr_t roll_ptr); /*!< in: roll pointer */
-#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Writes a roll ptr to an index page. In case that the size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_roll_ptr(
-/*===============*/
- byte* ptr, /*!< in: pointer to memory where
- written */
- roll_ptr_t roll_ptr); /*!< in: roll ptr */
-/*****************************************************************//**
-Reads a roll ptr from an index page. In case that the roll ptr size
-changes in some future version, this function should be used instead of
-mach_read_...
-@return roll ptr */
-UNIV_INLINE
-roll_ptr_t
-trx_read_roll_ptr(
-/*==============*/
- const byte* ptr); /*!< in: pointer to memory from where to read */
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return pointer to page x-latched */
-UNIV_INLINE
-page_t*
-trx_undo_page_get(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return pointer to page s-latched */
-UNIV_INLINE
-page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the next undo log record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_next_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset); /*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header offset on page */
-/***********************************************************************//**
-Gets the previous record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_prev_rec(
-/*==================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************************//**
-Gets the next record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_next_rec(
-/*==================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return undo log record, the page latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_first_rec(
-/*===================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Tries to add a page to the undo log segment where the undo log is placed.
-@return page number if success, else FIL_NULL */
-UNIV_INTERN
-ulint
-trx_undo_add_page(
-/*==============*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory object */
- mtr_t* mtr); /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-/***********************************************************************//**
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-UNIV_INTERN
-void
-trx_undo_truncate_end(
-/*==================*/
- trx_t* trx, /*!< in: transaction whose undo log it is */
- trx_undo_t* undo, /*!< in: undo log */
- undo_no_t limit); /*!< in: all undo records with undo number
- >= this value should be truncated */
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
-void
-trx_undo_truncate_start(
-/*====================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ulint space, /*!< in: space id of the log */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset on the page */
- undo_no_t limit); /*!< in: all undo pages with
- undo numbers < this value
- should be truncated; NOTE that
- the function only frees whole
- pages; the header page is not
- freed, but emptied, if all the
- records there are < limit */
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy.
-This function is only called when the database is started or a new
-rollback segment created.
-@return the combined size of undo log segments in pages */
-UNIV_INTERN
-ulint
-trx_undo_lists_init(
-/*================*/
- trx_rseg_t* rseg); /*!< in: rollback segment memory object */
-/**********************************************************************//**
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused.
-@return DB_SUCCESS if undo log assign successful, possible error codes
-are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE
-DB_OUT_OF_MEMORY */
-UNIV_INTERN
-ulint
-trx_undo_assign_undo(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- ulint type); /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction finish.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
-page_t*
-trx_undo_set_state_at_finish(
-/*=========================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
-page_t*
-trx_undo_set_state_at_prepare(
-/*==========================*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr); /*!< in: mtr */
-
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-UNIV_INTERN
-void
-trx_undo_update_cleanup(
-/*====================*/
- trx_t* trx, /*!< in: trx owning the update undo log */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
-void
-trx_undo_insert_cleanup(
-/*====================*/
- trx_t* trx); /*!< in: transaction handle */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_page_init(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_page_header(
-/*=======================*/
- ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header discard.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/************************************************************************
-Frees an undo log memory copy. */
-UNIV_INTERN
-void
-trx_undo_mem_free(
-/*==============*/
- trx_undo_t* undo); /* in: the undo object to be freed */
-
-/* Types of an undo log segment */
-#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */
-#define TRX_UNDO_UPDATE 2 /* contains undo entries for updates
- and delete markings: in short,
- modifys (the name 'UPDATE' is a
- historical relic) */
-/* States of an undo log segment */
-#define TRX_UNDO_ACTIVE 1 /* contains an undo log of an active
- transaction */
-#define TRX_UNDO_CACHED 2 /* cached for quick reuse */
-#define TRX_UNDO_TO_FREE 3 /* insert undo segment can be freed */
-#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be
- reused: it can be freed in purge when
- all undo data in it is removed */
-#define TRX_UNDO_PREPARED 5 /* contains an undo log of an
- prepared transaction */
-
-#ifndef UNIV_HOTBACKUP
-/** Transaction undo log memory object; this is protected by the undo_mutex
-in the corresponding transaction object */
-
-struct trx_undo_struct{
- /*-----------------------------*/
- ulint id; /*!< undo log slot number within the
- rollback segment */
- ulint type; /*!< TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- ulint state; /*!< state of the corresponding undo log
- segment */
- ibool del_marks; /*!< relevant only in an update undo log:
- this is TRUE if the transaction may
- have delete marked records, because of
- a delete of a row or an update of an
- indexed field; purge is then
- necessary; also TRUE if the transaction
- has updated an externally stored
- field */
- trx_id_t trx_id; /*!< id of the trx assigned to the undo
- log */
- XID xid; /*!< X/Open XA transaction
- identification */
- ibool dict_operation; /*!< TRUE if a dict operation trx */
- dulint table_id; /*!< if a dict operation, then the table
- id */
- trx_rseg_t* rseg; /*!< rseg where the undo log belongs */
- /*-----------------------------*/
- ulint space; /*!< space id where the undo log
- placed */
- ulint zip_size; /*!< compressed page size of space
- in bytes, or 0 for uncompressed */
- ulint hdr_page_no; /*!< page number of the header page in
- the undo log */
- ulint hdr_offset; /*!< header offset of the undo log on the
- page */
- ulint last_page_no; /*!< page number of the last page in the
- undo log; this may differ from
- top_page_no during a rollback */
- ulint size; /*!< current size in pages */
- /*-----------------------------*/
- ulint empty; /*!< TRUE if the stack of undo log
- records is currently empty */
- ulint top_page_no; /*!< page number where the latest undo
- log record was catenated; during
- rollback the page from which the latest
- undo record was chosen */
- ulint top_offset; /*!< offset of the latest undo record,
- i.e., the topmost element in the undo
- log if we think of it as a stack */
- undo_no_t top_undo_no; /*!< undo number of the latest record */
- buf_block_t* guess_block; /*!< guess for the buffer block where
- the top page might reside */
- /*-----------------------------*/
- UT_LIST_NODE_T(trx_undo_t) undo_list;
- /*!< undo log objects in the rollback
- segment are chained into lists */
-};
-#endif /* !UNIV_HOTBACKUP */
-
-/** The offset of the undo log page header on pages of the undo log */
-#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA
-/*-------------------------------------------------------------*/
-/** Transaction undo log page header offsets */
-/* @{ */
-#define TRX_UNDO_PAGE_TYPE 0 /*!< TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
-#define TRX_UNDO_PAGE_START 2 /*!< Byte offset where the undo log
- records for the LATEST transaction
- start on this page (remember that
- in an update undo log, the first page
- can contain several undo logs) */
-#define TRX_UNDO_PAGE_FREE 4 /*!< On each page of the undo log this
- field contains the byte offset of the
- first free byte on the page */
-#define TRX_UNDO_PAGE_NODE 6 /*!< The file list node in the chain
- of undo log pages */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE)
- /*!< Size of the transaction undo
- log page header, in bytes */
-/* @} */
-
-/** An update undo segment with just one page can be reused if it has
-at most this many bytes used; we must leave space at least for one new undo
-log header on the page */
-
-#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4)
-
-/* An update undo log segment may contain several undo logs on its first page
-if the undo logs took so little space that the segment could be cached and
-reused. All the undo log headers are then on the first page, and the last one
-owns the undo log records on subsequent pages if the segment is bigger than
-one page. If an undo log is stored in a segment, then on the first page it is
-allowed to have zero undo records, but if the segment extends to several
-pages, then all the rest of the pages must contain at least one undo log
-record. */
-
-/** The offset of the undo log segment header on the first page of the undo
-log segment */
-
-#define TRX_UNDO_SEG_HDR (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
-/** Undo log segment header */
-/* @{ */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_STATE 0 /*!< TRX_UNDO_ACTIVE, ... */
-#define TRX_UNDO_LAST_LOG 2 /*!< Offset of the last undo log header
- on the segment header page, 0 if
- none */
-#define TRX_UNDO_FSEG_HEADER 4 /*!< Header for the file segment which
- the undo log segment occupies */
-#define TRX_UNDO_PAGE_LIST (4 + FSEG_HEADER_SIZE)
- /*!< Base node for the list of pages in
- the undo log segment; defined only on
- the undo log segment's first page */
-/*-------------------------------------------------------------*/
-/** Size of the undo log segment header */
-#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
-/* @} */
-
-
-/** The undo log header. There can be several undo log headers on the first
-page of an update undo log segment. */
-/* @{ */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_TRX_ID 0 /*!< Transaction id */
-#define TRX_UNDO_TRX_NO 8 /*!< Transaction number of the
- transaction; defined only if the log
- is in a history list */
-#define TRX_UNDO_DEL_MARKS 16 /*!< Defined only in an update undo
- log: TRUE if the transaction may have
- done delete markings of records, and
- thus purge is necessary */
-#define TRX_UNDO_LOG_START 18 /*!< Offset of the first undo log record
- of this log on the header page; purge
- may remove undo log record from the
- log start, and therefore this is not
- necessarily the same as this log
- header end offset */
-#define TRX_UNDO_XID_EXISTS 20 /*!< TRUE if undo log header includes
- X/Open XA transaction identification
- XID */
-#define TRX_UNDO_DICT_TRANS 21 /*!< TRUE if the transaction is a table
- create, index create, or drop
- transaction: in recovery
- the transaction cannot be rolled back
- in the usual way: a 'rollback' rather
- means dropping the created or dropped
- table, if it still exists */
-#define TRX_UNDO_TABLE_ID 22 /*!< Id of the table if the preceding
- field is TRUE */
-#define TRX_UNDO_NEXT_LOG 30 /*!< Offset of the next undo log header
- on this page, 0 if none */
-#define TRX_UNDO_PREV_LOG 32 /*!< Offset of the previous undo log
- header on this page, 0 if none */
-#define TRX_UNDO_HISTORY_NODE 34 /*!< If the log is put to the history
- list, the file list node is here */
-/*-------------------------------------------------------------*/
-/** Size of the undo log header without XID information */
-#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
-
-/* Note: the writing of the undo log old header is coded by a log record
-MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
-header is logged separately. In this sense, the XID is not really a member
-of the undo log header. TODO: do not append the XID to the log header if XA
-is not needed by the user. The XID wastes about 150 bytes of space in every
-undo log. In the history list we may have millions of undo logs, which means
-quite a large overhead. */
-
-/** X/Open XA Transaction Identification (XID) */
-/* @{ */
-/** xid_t::formatID */
-#define TRX_UNDO_XA_FORMAT (TRX_UNDO_LOG_OLD_HDR_SIZE)
-/** xid_t::gtrid_length */
-#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4)
-/** xid_t::bqual_length */
-#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4)
-/** Distributed transaction identifier data */
-#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4)
-/*--------------------------------------------------------------*/
-#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
- /*!< Total size of the undo log header
- with the XA XID */
-/* @} */
-
-#ifndef UNIV_NONINL
-#include "trx0undo.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/trx0undo.ic b/storage/innodb_plugin/include/trx0undo.ic
deleted file mode 100644
index 2d289b34ef1..00000000000
--- a/storage/innodb_plugin/include/trx0undo.ic
+++ /dev/null
@@ -1,351 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0undo.ic
-Transaction undo log
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "data0type.h"
-#include "page0page.h"
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Builds a roll pointer.
-@return roll pointer */
-UNIV_INLINE
-roll_ptr_t
-trx_undo_build_roll_ptr(
-/*====================*/
- ibool is_insert, /*!< in: TRUE if insert undo log */
- ulint rseg_id, /*!< in: rollback segment id */
- ulint page_no, /*!< in: page number */
- ulint offset) /*!< in: offset of the undo entry within page */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- ut_ad(rseg_id < 128);
-
- return(ut_dulint_create(is_insert * 128 * 256 * 256
- + rseg_id * 256 * 256
- + (page_no / 256) / 256,
- (page_no % (256 * 256)) * 256 * 256
- + offset));
-}
-
-/***********************************************************************//**
-Decodes a roll pointer. */
-UNIV_INLINE
-void
-trx_undo_decode_roll_ptr(
-/*=====================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer */
- ibool* is_insert, /*!< out: TRUE if insert undo log */
- ulint* rseg_id, /*!< out: rollback segment id */
- ulint* page_no, /*!< out: page number */
- ulint* offset) /*!< out: offset of the undo
- entry within page */
-{
- ulint low;
- ulint high;
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- high = ut_dulint_get_high(roll_ptr);
- low = ut_dulint_get_low(roll_ptr);
-
- *offset = low % (256 * 256);
-
- *is_insert = high / (256 * 256 * 128); /* TRUE == 1 */
- *rseg_id = (high / (256 * 256)) % 128;
-
- *page_no = (high % (256 * 256)) * 256 * 256
- + (low / 256) / 256;
-}
-
-/***********************************************************************//**
-Returns TRUE if the roll pointer is of the insert type.
-@return TRUE if insert undo log */
-UNIV_INLINE
-ibool
-trx_undo_roll_ptr_is_insert(
-/*========================*/
- roll_ptr_t roll_ptr) /*!< in: roll pointer */
-{
- ulint high;
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- high = ut_dulint_get_high(roll_ptr);
-
- return(high / (256 * 256 * 128));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*****************************************************************//**
-Writes a roll ptr to an index page. In case that the size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_roll_ptr(
-/*===============*/
- byte* ptr, /*!< in: pointer to memory where
- written */
- roll_ptr_t roll_ptr) /*!< in: roll ptr */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- mach_write_to_7(ptr, roll_ptr);
-}
-
-/*****************************************************************//**
-Reads a roll ptr from an index page. In case that the roll ptr size
-changes in some future version, this function should be used instead of
-mach_read_...
-@return roll ptr */
-UNIV_INLINE
-roll_ptr_t
-trx_read_roll_ptr(
-/*==============*/
- const byte* ptr) /*!< in: pointer to memory from where to read */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- return(mach_read_from_7(ptr));
-}
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return pointer to page x-latched */
-UNIV_INLINE
-page_t*
-trx_undo_page_get(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block = buf_page_get(space, zip_size, page_no,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- return(buf_block_get_frame(block));
-}
-
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return pointer to page s-latched */
-UNIV_INLINE
-page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block = buf_page_get(space, zip_size, page_no,
- RW_S_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- return(buf_block_get_frame(block));
-}
-
-/******************************************************************//**
-Returns the start offset of the undo log records of the specified undo
-log on the page.
-@return start offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_start(
-/*====================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
-
- if (page_no == page_get_page_no(undo_page)) {
-
- start = mach_read_from_2(offset + undo_page
- + TRX_UNDO_LOG_START);
- } else {
- start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
- }
-
- return(start);
-}
-
-/******************************************************************//**
-Returns the end offset of the undo log records of the specified undo
-log on the page.
-@return end offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_end(
-/*==================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- trx_ulogf_t* log_hdr;
- ulint end;
-
- if (page_no == page_get_page_no(undo_page)) {
-
- log_hdr = undo_page + offset;
-
- end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
- if (end == 0) {
- end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- }
- } else {
- end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- }
-
- return(end);
-}
-
-/******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- page_t* undo_page;
- ulint start;
-
- undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
-
- if (start + undo_page == rec) {
-
- return(NULL);
- }
-
- return(undo_page + mach_read_from_2(rec - 2));
-}
-
-/******************************************************************//**
-Returns the next undo log record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_next_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- page_t* undo_page;
- ulint end;
- ulint next;
-
- undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
-
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- next = mach_read_from_2(rec);
-
- if (next == end) {
-
- return(NULL);
- }
-
- return(undo_page + next);
-}
-
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
- ulint end;
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- if (start == end) {
-
- return(NULL);
- }
-
- return(undo_page + mach_read_from_2(undo_page + end - 2));
-}
-
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
- ulint end;
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- if (start == end) {
-
- return(NULL);
- }
-
- return(undo_page + start);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/include/trx0xa.h b/storage/innodb_plugin/include/trx0xa.h
deleted file mode 100644
index e0dd8a1af5b..00000000000
--- a/storage/innodb_plugin/include/trx0xa.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*
- * Start of xa.h header
- *
- * Define a symbol to prevent multiple inclusions of this header file
- */
-#ifndef XA_H
-#define XA_H
-
-/*
- * Transaction branch identification: XID and NULLXID:
- */
-#ifndef XIDDATASIZE
-
-/** Sizes of transaction identifier */
-#define XIDDATASIZE 128 /*!< maximum size of a transaction
- identifier, in bytes */
-#define MAXGTRIDSIZE 64 /*!< maximum size in bytes of gtrid */
-#define MAXBQUALSIZE 64 /*!< maximum size in bytes of bqual */
-
-/** X/Open XA distributed transaction identifier */
-struct xid_t {
- long formatID; /*!< format identifier; -1
- means that the XID is null */
- long gtrid_length; /*!< value from 1 through 64 */
- long bqual_length; /*!< value from 1 through 64 */
- char data[XIDDATASIZE]; /*!< distributed transaction
- identifier */
-};
-/** X/Open XA distributed transaction identifier */
-typedef struct xid_t XID;
-#endif
-/** X/Open XA distributed transaction status codes */
-/* @{ */
-#define XA_OK 0 /*!< normal execution */
-#define XAER_ASYNC -2 /*!< asynchronous operation already
- outstanding */
-#define XAER_RMERR -3 /*!< a resource manager error
- occurred in the transaction
- branch */
-#define XAER_NOTA -4 /*!< the XID is not valid */
-#define XAER_INVAL -5 /*!< invalid arguments were given */
-#define XAER_PROTO -6 /*!< routine invoked in an improper
- context */
-#define XAER_RMFAIL -7 /*!< resource manager unavailable */
-#define XAER_DUPID -8 /*!< the XID already exists */
-#define XAER_OUTSIDE -9 /*!< resource manager doing
- work outside transaction */
-/* @} */
-#endif /* ifndef XA_H */
-/*
- * End of xa.h header
- */
diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i
deleted file mode 100644
index 2081e136590..00000000000
--- a/storage/innodb_plugin/include/univ.i
+++ /dev/null
@@ -1,486 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2009, Sun Microsystems, Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/***********************************************************************//**
-@file include/univ.i
-Version control for database, common definitions, and include files
-
-Created 1/20/1994 Heikki Tuuri
-****************************************************************************/
-
-#ifndef univ_i
-#define univ_i
-
-#ifdef UNIV_HOTBACKUP
-#include "hb_univ.i"
-#endif /* UNIV_HOTBACKUP */
-
-#define INNODB_VERSION_MAJOR 1
-#define INNODB_VERSION_MINOR 0
-#define INNODB_VERSION_BUGFIX 6
-
-/* The following is the InnoDB version as shown in
-SELECT plugin_version FROM information_schema.plugins;
-calculated in make_version_string() in sql/sql_show.cc like this:
-"version >> 8" . "version & 0xff"
-because the version is shown with only one dot, we skip the last
-component, i.e. we show M.N.P as M.N */
-#define INNODB_VERSION_SHORT \
- (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
-
-/* auxiliary macros to help creating the version as string */
-#define __INNODB_VERSION(a, b, c) (#a "." #b "." #c)
-#define _INNODB_VERSION(a, b, c) __INNODB_VERSION(a, b, c)
-
-#define INNODB_VERSION_STR \
- _INNODB_VERSION(INNODB_VERSION_MAJOR, \
- INNODB_VERSION_MINOR, \
- INNODB_VERSION_BUGFIX)
-
-#define REFMAN "http://dev.mysql.com/doc/refman/5.1/en/"
-
-#ifdef MYSQL_DYNAMIC_PLUGIN
-/* In the dynamic plugin, redefine some externally visible symbols
-in order not to conflict with the symbols of a builtin InnoDB. */
-
-/* Rename all C++ classes that contain virtual functions, because we
-have not figured out how to apply the visibility=hidden attribute to
-the virtual method table (vtable) in GCC 3. */
-# define ha_innobase ha_innodb
-#endif /* MYSQL_DYNAMIC_PLUGIN */
-
-/* if any of the following macros is defined at this point this means
-that the code from the "right" plug.in was executed and we do not
-need to include ut0auxconf.h which would either define the same macros
-or will be empty */
-#if !defined(HAVE_IB_GCC_ATOMIC_BUILTINS) \
- && !defined(HAVE_IB_ATOMIC_PTHREAD_T_GCC) \
- && !defined(HAVE_IB_SOLARIS_ATOMICS) \
- && !defined(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) \
- && !defined(SIZEOF_PTHREAD_T) \
- && !defined(HAVE_IB_PAUSE_INSTRUCTION)
-# include "ut0auxconf.h"
-#endif
-
-#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
-# undef __WIN__
-# define __WIN__
-
-# include <windows.h>
-
-# ifdef _NT_
-# define __NT__
-# endif
-
-#else
-/* The defines used with MySQL */
-
-/* Include two header files from MySQL to make the Unix flavor used
-in compiling more Posix-compatible. These headers also define __WIN__
-if we are compiling on Windows. */
-
-#ifndef UNIV_HOTBACKUP
-# include <my_global.h>
-# include <my_pthread.h>
-#endif /* UNIV_HOTBACKUP */
-
-/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
-# include <sys/stat.h>
-# if !defined(__NETWARE__) && !defined(__WIN__)
-# include <sys/mman.h> /* mmap() for os0proc.c */
-# endif
-
-/* Include the header file generated by GNU autoconf */
-# ifndef __WIN__
-# ifndef UNIV_HOTBACKUP
-# include "config.h"
-# endif /* UNIV_HOTBACKUP */
-# endif
-
-# ifdef HAVE_SCHED_H
-# include <sched.h>
-# endif
-
-/* We only try to do explicit inlining of functions with gcc and
-Sun Studio */
-
-# if !defined(__GNUC__) && !(defined(__SUNPRO_C) || defined(__SUNPRO_CC))
-# undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */
-# define UNIV_MUST_NOT_INLINE
-# endif
-
-# ifdef HAVE_PREAD
-# define HAVE_PWRITE
-# endif
-
-#endif /* #if (defined(WIN32) || ... */
-
-/* DEBUG VERSION CONTROL
- ===================== */
-
-/* The following flag will make InnoDB to initialize
-all memory it allocates to zero. It hides Purify
-warnings about reading unallocated memory unless
-memory is read outside the allocated blocks. */
-/*
-#define UNIV_INIT_MEM_TO_ZERO
-*/
-
-/* When this macro is defined then additional test functions will be
-compiled. These functions live at the end of each relevant source file
-and have "test_" prefix. These functions are not called from anywhere in
-the code, they can be called from gdb after
-innobase_start_or_create_for_mysql() has executed using the call
-command. Not tested on Windows. */
-/*
-#define UNIV_COMPILE_TEST_FUNCS
-*/
-
-#if 0
-#define UNIV_DEBUG_VALGRIND /* Enable extra
- Valgrind instrumentation */
-#define UNIV_DEBUG_PRINT /* Enable the compilation of
- some debug print functions */
-#define UNIV_AHI_DEBUG /* Enable adaptive hash index
- debugging without UNIV_DEBUG */
-#define UNIV_BUF_DEBUG /* Enable buffer pool
- debugging without UNIV_DEBUG */
-#define UNIV_DEBUG /* Enable ut_ad() assertions
- and disable UNIV_INLINE */
-#define UNIV_DEBUG_LOCK_VALIDATE /* Enable
- ut_ad(lock_rec_validate_page())
- assertions. */
-#define UNIV_DEBUG_FILE_ACCESSES /* Debug .ibd file access
- (field file_page_was_freed
- in buf_page_t) */
-#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */
-#define UNIV_HASH_DEBUG /* debug HASH_ macros */
-#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */
-#define UNIV_LOG_LSN_DEBUG /* write LSN to the redo log;
-this will break redo log file compatibility, but it may be useful when
-debugging redo log application problems. */
-#define UNIV_MEM_DEBUG /* detect memory leaks etc */
-#define UNIV_IBUF_DEBUG /* debug the insert buffer */
-#define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer;
-this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
-and the insert buffer must be empty when the database is started */
-#define UNIV_SYNC_DEBUG /* debug mutex and latch
-operations (very slow); also UNIV_DEBUG must be defined */
-#define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */
-#define UNIV_SYNC_PERF_STAT /* operation counts for
- rw-locks and mutexes */
-#define UNIV_SEARCH_PERF_STAT /* statistics for the
- adaptive hash index */
-#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output
- in sync0sync.c */
-#define UNIV_BTR_PRINT /* enable functions for
- printing B-trees */
-#define UNIV_ZIP_DEBUG /* extensive consistency checks
- for compressed pages */
-#define UNIV_ZIP_COPY /* call page_zip_copy_recs()
- more often */
-#endif
-
-#define UNIV_BTR_DEBUG /* check B-tree links */
-#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */
-
-#ifdef HAVE_purify
-/* The following sets all new allocated memory to zero before use:
-this can be used to eliminate unnecessary Purify warnings, but note that
-it also masks many bugs Purify could detect. For detailed Purify analysis it
-is best to remove the define below and look through the warnings one
-by one. */
-#define UNIV_SET_MEM_TO_ZERO
-#endif
-
-/*
-#define UNIV_SQL_DEBUG
-#define UNIV_LOG_DEBUG
-*/
- /* the above option prevents forcing of log to disk
- at a buffer page write: it should be tested with this
- option off; also some ibuf tests are suppressed */
-/*
-#define UNIV_BASIC_LOG_DEBUG
-*/
- /* the above option enables basic recovery debugging:
- new allocated file pages are reset */
-
-/* Linkage specifier for non-static InnoDB symbols (variables and functions)
-that are only referenced from within InnoDB, not from MySQL */
-#if defined(__GNUC__) && (__GNUC__ >= 4) || defined(__INTEL_COMPILER)
-# define UNIV_INTERN __attribute__((visibility ("hidden")))
-#else
-# define UNIV_INTERN
-#endif
-
-#if (!defined(UNIV_DEBUG) && !defined(UNIV_MUST_NOT_INLINE))
-/* Definition for inline version */
-
-#ifdef __WIN__
-# define UNIV_INLINE __inline
-#elif defined(__SUNPRO_CC) || defined(__SUNPRO_C)
-# define UNIV_INLINE static inline
-#else
-# define UNIV_INLINE static __inline__
-#endif
-
-#else
-/* If we want to compile a noninlined version we use the following macro
-definitions: */
-
-#define UNIV_NONINL
-#define UNIV_INLINE UNIV_INTERN
-
-#endif /* UNIV_DEBUG */
-
-#ifdef _WIN32
-#define UNIV_WORD_SIZE 4
-#elif defined(_WIN64)
-#define UNIV_WORD_SIZE 8
-#else
-/* MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */
-#define UNIV_WORD_SIZE SIZEOF_LONG
-#endif
-
-/* The following alignment is used in memory allocations in memory heap
-management to ensure correct alignment for doubles etc. */
-#define UNIV_MEM_ALIGNMENT 8
-
-/* The following alignment is used in aligning lints etc. */
-#define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE
-
-/*
- DATABASE VERSION CONTROL
- ========================
-*/
-
-/* The 2-logarithm of UNIV_PAGE_SIZE: */
-#define UNIV_PAGE_SIZE_SHIFT 14
-/* The universal page size of the database */
-#define UNIV_PAGE_SIZE (1 << UNIV_PAGE_SIZE_SHIFT)
-
-/* Maximum number of parallel threads in a parallelized operation */
-#define UNIV_MAX_PARALLELISM 32
-
-/*
- UNIVERSAL TYPE DEFINITIONS
- ==========================
-*/
-
-/* Note that inside MySQL 'byte' is defined as char on Linux! */
-#define byte unsigned char
-
-/* Define an unsigned integer type that is exactly 32 bits. */
-
-#if SIZEOF_INT == 4
-typedef unsigned int ib_uint32_t;
-#elif SIZEOF_LONG == 4
-typedef unsigned long ib_uint32_t;
-#else
-#error "Neither int or long is 4 bytes"
-#endif
-
-/* Another basic type we use is unsigned long integer which should be equal to
-the word size of the machine, that is on a 32-bit platform 32 bits, and on a
-64-bit platform 64 bits. We also give the printf format for the type as a
-macro ULINTPF. */
-
-#ifdef _WIN64
-typedef unsigned __int64 ulint;
-#define ULINTPF "%I64u"
-typedef __int64 lint;
-#else
-typedef unsigned long int ulint;
-#define ULINTPF "%lu"
-typedef long int lint;
-#endif
-
-#ifdef __WIN__
-typedef __int64 ib_int64_t;
-typedef unsigned __int64 ib_uint64_t;
-#elif !defined(UNIV_HOTBACKUP)
-/* Note: longlong and ulonglong come from MySQL headers. */
-typedef longlong ib_int64_t;
-typedef ulonglong ib_uint64_t;
-#endif
-
-#ifndef UNIV_HOTBACKUP
-typedef unsigned long long int ullint;
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef __WIN__
-#if SIZEOF_LONG != SIZEOF_VOIDP
-#error "Error: InnoDB's ulint must be of the same size as void*"
-#endif
-#endif
-
-/* The 'undefined' value for a ulint */
-#define ULINT_UNDEFINED ((ulint)(-1))
-
-/* The undefined 32-bit unsigned integer */
-#define ULINT32_UNDEFINED 0xFFFFFFFF
-
-/* Maximum value for a ulint */
-#define ULINT_MAX ((ulint)(-2))
-
-/* Maximum value for ib_uint64_t */
-#define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL))
-
-/* This 'ibool' type is used within Innobase. Remember that different included
-headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
-#define ibool ulint
-
-#ifndef TRUE
-
-#define TRUE 1
-#define FALSE 0
-
-#endif
-
-/* The following number as the length of a logical field means that the field
-has the SQL NULL as its value. NOTE that because we assume that the length
-of a field is a 32-bit integer when we store it, for example, to an undo log
-on disk, we must have also this number fit in 32 bits, also in 64-bit
-computers! */
-
-#define UNIV_SQL_NULL ULINT32_UNDEFINED
-
-/* Lengths which are not UNIV_SQL_NULL, but bigger than the following
-number indicate that a field contains a reference to an externally
-stored part of the field in the tablespace. The length field then
-contains the sum of the following flag and the locally stored len. */
-
-#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE)
-
-/* Some macros to improve branch prediction and reduce cache misses */
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
-/* Tell the compiler that 'expr' probably evaluates to 'constant'. */
-# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant)
-/* Tell the compiler that a pointer is likely to be NULL */
-# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ulint) ptr, 0)
-/* Minimize cache-miss latency by moving data at addr into a cache before
-it is read. */
-# define UNIV_PREFETCH_R(addr) __builtin_prefetch(addr, 0, 3)
-/* Minimize cache-miss latency by moving data at addr into a cache before
-it is read or written. */
-# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
-/* Sun Studio includes sun_prefetch.h as of version 5.9 */
-#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \
- || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590)
-# include <sun_prefetch.h>
-#if __SUNPRO_C >= 0x550
-# undef UNIV_INTERN
-# define UNIV_INTERN __hidden
-#endif /* __SUNPRO_C >= 0x550 */
-/* Use sun_prefetch when compile with Sun Studio */
-# define UNIV_EXPECT(expr,value) (expr)
-# define UNIV_LIKELY_NULL(expr) (expr)
-# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr)
-# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
-#else
-/* Dummy versions of the macros */
-# define UNIV_EXPECT(expr,value) (expr)
-# define UNIV_LIKELY_NULL(expr) (expr)
-# define UNIV_PREFETCH_R(addr) ((void) 0)
-# define UNIV_PREFETCH_RW(addr) ((void) 0)
-#endif
-/* Tell the compiler that cond is likely to hold */
-#define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE)
-/* Tell the compiler that cond is unlikely to hold */
-#define UNIV_UNLIKELY(cond) UNIV_EXPECT(cond, FALSE)
-
-/* Compile-time constant of the given array's size. */
-#define UT_ARR_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-
-/* The return type from a thread's start function differs between Unix and
-Windows, so define a typedef for it and a macro to use at the end of such
-functions. */
-
-#ifdef __WIN__
-typedef ulint os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(0)
-#else
-typedef void* os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(NULL)
-#endif
-
-#include <stdio.h>
-#include "ut0dbg.h"
-#include "ut0ut.h"
-#include "db0err.h"
-#ifdef UNIV_DEBUG_VALGRIND
-# include <valgrind/memcheck.h>
-# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size)
-# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
-# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
-# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
-# define UNIV_MEM_DESC(addr, size, b) VALGRIND_CREATE_BLOCK(addr, size, b)
-# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b)
-# define UNIV_MEM_ASSERT_RW(addr, size) do { \
- const void* _p = (const void*) (ulint) \
- VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \
- if (UNIV_LIKELY_NULL(_p)) \
- fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n", \
- __FILE__, __LINE__, \
- (const void*) (addr), (unsigned) (size), (long) \
- (((const char*) _p) - ((const char*) (addr)))); \
- } while (0)
-# define UNIV_MEM_ASSERT_W(addr, size) do { \
- const void* _p = (const void*) (ulint) \
- VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size); \
- if (UNIV_LIKELY_NULL(_p)) \
- fprintf(stderr, "%s:%d: %p[%u] unwritable at %ld\n", \
- __FILE__, __LINE__, \
- (const void*) (addr), (unsigned) (size), (long) \
- (((const char*) _p) - ((const char*) (addr)))); \
- } while (0)
-#else
-# define UNIV_MEM_VALID(addr, size) do {} while(0)
-# define UNIV_MEM_INVALID(addr, size) do {} while(0)
-# define UNIV_MEM_FREE(addr, size) do {} while(0)
-# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
-# define UNIV_MEM_DESC(addr, size, b) do {} while(0)
-# define UNIV_MEM_UNDESC(b) do {} while(0)
-# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
-# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
-#endif
-#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \
- UNIV_MEM_ASSERT_W(addr, size); \
- UNIV_MEM_FREE(addr, size); \
-} while (0)
-#define UNIV_MEM_ASSERT_AND_ALLOC(addr, size) do { \
- UNIV_MEM_ASSERT_W(addr, size); \
- UNIV_MEM_ALLOC(addr, size); \
-} while (0)
-
-#endif
diff --git a/storage/innodb_plugin/include/usr0sess.h b/storage/innodb_plugin/include/usr0sess.h
deleted file mode 100644
index 2c288f7d455..00000000000
--- a/storage/innodb_plugin/include/usr0sess.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0sess.h
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0sess_h
-#define usr0sess_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "trx0types.h"
-#include "srv0srv.h"
-#include "trx0types.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "data0data.h"
-#include "rem0rec.h"
-
-/*********************************************************************//**
-Opens a session.
-@return own: session object */
-UNIV_INTERN
-sess_t*
-sess_open(void);
-/*============*/
-/*********************************************************************//**
-Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
-void
-sess_close(
-/*=======*/
- sess_t* sess); /* in, own: session object */
-
-/* The session handle. All fields are protected by the kernel mutex */
-struct sess_struct{
- ulint state; /*!< state of the session */
- trx_t* trx; /*!< transaction object permanently
- assigned for the session: the
- transaction instance designated by the
- trx id changes, but the memory
- structure is preserved */
- UT_LIST_BASE_NODE_T(que_t)
- graphs; /*!< query graphs belonging to this
- session */
-};
-
-/* Session states */
-#define SESS_ACTIVE 1
-#define SESS_ERROR 2 /* session contains an error message
- which has not yet been communicated
- to the client */
-#ifndef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/usr0sess.ic b/storage/innodb_plugin/include/usr0sess.ic
deleted file mode 100644
index 35a75d75acc..00000000000
--- a/storage/innodb_plugin/include/usr0sess.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0sess.ic
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innodb_plugin/include/usr0types.h b/storage/innodb_plugin/include/usr0types.h
deleted file mode 100644
index 6cc6f015613..00000000000
--- a/storage/innodb_plugin/include/usr0types.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0types.h
-Users and sessions global types
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0types_h
-#define usr0types_h
-
-typedef struct sess_struct sess_t;
-
-#endif
diff --git a/storage/innodb_plugin/include/ut0byte.h b/storage/innodb_plugin/include/ut0byte.h
deleted file mode 100644
index f55e2888c60..00000000000
--- a/storage/innodb_plugin/include/ut0byte.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0byte.h
-Utilities for byte operations
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0byte_h
-#define ut0byte_h
-
-
-#include "univ.i"
-
-/** Pair of ulint integers. */
-typedef struct dulint_struct dulint;
-/** Type definition for a 64-bit unsigned integer, which works also
-in 32-bit machines. NOTE! Access the fields only with the accessor
-functions. This definition appears here only for the compiler to
-know the size of a dulint. */
-struct dulint_struct{
- ulint high; /*!< most significant 32 bits */
- ulint low; /*!< least significant 32 bits */
-};
-
-/** Zero value for a dulint */
-extern const dulint ut_dulint_zero;
-
-/** Maximum value for a dulint */
-extern const dulint ut_dulint_max;
-
-/*******************************************************//**
-Creates a 64-bit dulint out of two ulints.
-@return created dulint */
-UNIV_INLINE
-dulint
-ut_dulint_create(
-/*=============*/
- ulint high, /*!< in: high-order 32 bits */
- ulint low); /*!< in: low-order 32 bits */
-/*******************************************************//**
-Gets the high-order 32 bits of a dulint.
-@return 32 bits in ulint */
-UNIV_INLINE
-ulint
-ut_dulint_get_high(
-/*===============*/
- dulint d); /*!< in: dulint */
-/*******************************************************//**
-Gets the low-order 32 bits of a dulint.
-@return 32 bits in ulint */
-UNIV_INLINE
-ulint
-ut_dulint_get_low(
-/*==============*/
- dulint d); /*!< in: dulint */
-/*******************************************************//**
-Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit
-integer type.
-@return value in ib_int64_t type */
-UNIV_INLINE
-ib_int64_t
-ut_conv_dulint_to_longlong(
-/*=======================*/
- dulint d); /*!< in: dulint */
-/*******************************************************//**
-Tests if a dulint is zero.
-@return TRUE if zero */
-UNIV_INLINE
-ibool
-ut_dulint_is_zero(
-/*==============*/
- dulint a); /*!< in: dulint */
-/*******************************************************//**
-Compares two dulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
-UNIV_INLINE
-int
-ut_dulint_cmp(
-/*==========*/
- dulint a, /*!< in: dulint */
- dulint b); /*!< in: dulint */
-/*******************************************************//**
-Calculates the max of two dulints.
-@return max(a, b) */
-UNIV_INLINE
-dulint
-ut_dulint_get_max(
-/*==============*/
- dulint a, /*!< in: dulint */
- dulint b); /*!< in: dulint */
-/*******************************************************//**
-Calculates the min of two dulints.
-@return min(a, b) */
-UNIV_INLINE
-dulint
-ut_dulint_get_min(
-/*==============*/
- dulint a, /*!< in: dulint */
- dulint b); /*!< in: dulint */
-/*******************************************************//**
-Adds a ulint to a dulint.
-@return sum a + b */
-UNIV_INLINE
-dulint
-ut_dulint_add(
-/*==========*/
- dulint a, /*!< in: dulint */
- ulint b); /*!< in: ulint */
-/*******************************************************//**
-Subtracts a ulint from a dulint.
-@return a - b */
-UNIV_INLINE
-dulint
-ut_dulint_subtract(
-/*===============*/
- dulint a, /*!< in: dulint */
- ulint b); /*!< in: ulint, b <= a */
-/*******************************************************//**
-Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G.
-@return a - b */
-UNIV_INLINE
-ulint
-ut_dulint_minus(
-/*============*/
- dulint a, /*!< in: dulint; NOTE a must be >= b and at most
- 2 to power 32 - 1 greater */
- dulint b); /*!< in: dulint */
-/********************************************************//**
-Rounds a dulint downward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-dulint
-ut_dulint_align_down(
-/*=================*/
- dulint n, /*!< in: number to be rounded */
- ulint align_no); /*!< in: align by this number which must be a
- power of 2 */
-/********************************************************//**
-Rounds a dulint upward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-dulint
-ut_dulint_align_up(
-/*===============*/
- dulint n, /*!< in: number to be rounded */
- ulint align_no); /*!< in: align by this number which must be a
- power of 2 */
-/********************************************************//**
-Rounds a dulint downward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-ib_uint64_t
-ut_uint64_align_down(
-/*=================*/
- ib_uint64_t n, /*!< in: number to be rounded */
- ulint align_no); /*!< in: align by this number
- which must be a power of 2 */
-/********************************************************//**
-Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-ib_uint64_t
-ut_uint64_align_up(
-/*===============*/
- ib_uint64_t n, /*!< in: number to be rounded */
- ulint align_no); /*!< in: align by this number
- which must be a power of 2 */
-/*******************************************************//**
-Increments a dulint variable by 1. */
-#define UT_DULINT_INC(D)\
-{\
- if ((D).low == 0xFFFFFFFFUL) {\
- (D).high = (D).high + 1;\
- (D).low = 0;\
- } else {\
- (D).low = (D).low + 1;\
- }\
-}
-/*******************************************************//**
-Tests if two dulints are equal. */
-#define UT_DULINT_EQ(D1, D2) (((D1).low == (D2).low)\
- && ((D1).high == (D2).high))
-#ifdef notdefined
-/************************************************************//**
-Sort function for dulint arrays. */
-UNIV_INTERN
-void
-ut_dulint_sort(
-/*===========*/
- dulint* arr, /*!< in/out: array to be sorted */
- dulint* aux_arr,/*!< in/out: auxiliary array (same size as arr) */
- ulint low, /*!< in: low bound of sort interval, inclusive */
- ulint high); /*!< in: high bound of sort interval, noninclusive */
-#endif /* notdefined */
-
-/*********************************************************//**
-The following function rounds up a pointer to the nearest aligned address.
-@return aligned pointer */
-UNIV_INLINE
-void*
-ut_align(
-/*=====*/
- const void* ptr, /*!< in: pointer */
- ulint align_no); /*!< in: align by this number */
-/*********************************************************//**
-The following function rounds down a pointer to the nearest
-aligned address.
-@return aligned pointer */
-UNIV_INLINE
-void*
-ut_align_down(
-/*==========*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
- __attribute__((const));
-/*********************************************************//**
-The following function computes the offset of a pointer from the nearest
-aligned address.
-@return distance from aligned pointer */
-UNIV_INLINE
-ulint
-ut_align_offset(
-/*============*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
- __attribute__((const));
-/*****************************************************************//**
-Gets the nth bit of a ulint.
-@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
-UNIV_INLINE
-ibool
-ut_bit_get_nth(
-/*===========*/
- ulint a, /*!< in: ulint */
- ulint n); /*!< in: nth bit requested */
-/*****************************************************************//**
-Sets the nth bit of a ulint.
-@return the ulint with the bit set as requested */
-UNIV_INLINE
-ulint
-ut_bit_set_nth(
-/*===========*/
- ulint a, /*!< in: ulint */
- ulint n, /*!< in: nth bit requested */
- ibool val); /*!< in: value for the bit to set */
-
-#ifndef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/ut0byte.ic b/storage/innodb_plugin/include/ut0byte.ic
deleted file mode 100644
index 3dd51890cb4..00000000000
--- a/storage/innodb_plugin/include/ut0byte.ic
+++ /dev/null
@@ -1,411 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************************//**
-@file include/ut0byte.ic
-Utilities for byte operations
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-/*******************************************************//**
-Creates a 64-bit dulint out of two ulints.
-@return created dulint */
-UNIV_INLINE
-dulint
-ut_dulint_create(
-/*=============*/
- ulint high, /*!< in: high-order 32 bits */
- ulint low) /*!< in: low-order 32 bits */
-{
- dulint res;
-
- ut_ad(high <= 0xFFFFFFFF);
- ut_ad(low <= 0xFFFFFFFF);
-
- res.high = high;
- res.low = low;
-
- return(res);
-}
-
-/*******************************************************//**
-Gets the high-order 32 bits of a dulint.
-@return 32 bits in ulint */
-UNIV_INLINE
-ulint
-ut_dulint_get_high(
-/*===============*/
- dulint d) /*!< in: dulint */
-{
- return(d.high);
-}
-
-/*******************************************************//**
-Gets the low-order 32 bits of a dulint.
-@return 32 bits in ulint */
-UNIV_INLINE
-ulint
-ut_dulint_get_low(
-/*==============*/
- dulint d) /*!< in: dulint */
-{
- return(d.low);
-}
-
-/*******************************************************//**
-Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit
-integer type.
-@return value in ib_int64_t type */
-UNIV_INLINE
-ib_int64_t
-ut_conv_dulint_to_longlong(
-/*=======================*/
- dulint d) /*!< in: dulint */
-{
- return((ib_int64_t)d.low
- + (((ib_int64_t)d.high) << 32));
-}
-
-/*******************************************************//**
-Tests if a dulint is zero.
-@return TRUE if zero */
-UNIV_INLINE
-ibool
-ut_dulint_is_zero(
-/*==============*/
- dulint a) /*!< in: dulint */
-{
- if ((a.low == 0) && (a.high == 0)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*******************************************************//**
-Compares two dulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
-UNIV_INLINE
-int
-ut_dulint_cmp(
-/*==========*/
- dulint a, /*!< in: dulint */
- dulint b) /*!< in: dulint */
-{
- if (a.high > b.high) {
- return(1);
- } else if (a.high < b.high) {
- return(-1);
- } else if (a.low > b.low) {
- return(1);
- } else if (a.low < b.low) {
- return(-1);
- } else {
- return(0);
- }
-}
-
-/*******************************************************//**
-Calculates the max of two dulints.
-@return max(a, b) */
-UNIV_INLINE
-dulint
-ut_dulint_get_max(
-/*==============*/
- dulint a, /*!< in: dulint */
- dulint b) /*!< in: dulint */
-{
- if (ut_dulint_cmp(a, b) > 0) {
-
- return(a);
- }
-
- return(b);
-}
-
-/*******************************************************//**
-Calculates the min of two dulints.
-@return min(a, b) */
-UNIV_INLINE
-dulint
-ut_dulint_get_min(
-/*==============*/
- dulint a, /*!< in: dulint */
- dulint b) /*!< in: dulint */
-{
- if (ut_dulint_cmp(a, b) > 0) {
-
- return(b);
- }
-
- return(a);
-}
-
-/*******************************************************//**
-Adds a ulint to a dulint.
-@return sum a + b */
-UNIV_INLINE
-dulint
-ut_dulint_add(
-/*==========*/
- dulint a, /*!< in: dulint */
- ulint b) /*!< in: ulint */
-{
- if (0xFFFFFFFFUL - b >= a.low) {
- a.low += b;
-
- return(a);
- }
-
- a.low = a.low - (0xFFFFFFFFUL - b) - 1;
-
- a.high++;
-
- return(a);
-}
-
-/*******************************************************//**
-Subtracts a ulint from a dulint.
-@return a - b */
-UNIV_INLINE
-dulint
-ut_dulint_subtract(
-/*===============*/
- dulint a, /*!< in: dulint */
- ulint b) /*!< in: ulint, b <= a */
-{
- if (a.low >= b) {
- a.low -= b;
-
- return(a);
- }
-
- b -= a.low + 1;
-
- a.low = 0xFFFFFFFFUL - b;
-
- ut_ad(a.high > 0);
-
- a.high--;
-
- return(a);
-}
-
-/*******************************************************//**
-Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G.
-@return a - b */
-UNIV_INLINE
-ulint
-ut_dulint_minus(
-/*============*/
- dulint a, /*!< in: dulint; NOTE a must be >= b and at most
- 2 to power 32 - 1 greater */
- dulint b) /*!< in: dulint */
-{
- ulint diff;
-
- if (a.high == b.high) {
- ut_ad(a.low >= b.low);
-
- return(a.low - b.low);
- }
-
- ut_ad(a.high == b.high + 1);
-
- diff = (ulint)(0xFFFFFFFFUL - b.low);
- diff += 1 + a.low;
-
- ut_ad(diff > a.low);
-
- return(diff);
-}
-
-/********************************************************//**
-Rounds a dulint downward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-dulint
-ut_dulint_align_down(
-/*=================*/
- dulint n, /*!< in: number to be rounded */
- ulint align_no) /*!< in: align by this number which must be a
- power of 2 */
-{
- ulint low, high;
-
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
-
- low = ut_dulint_get_low(n);
- high = ut_dulint_get_high(n);
-
- low = low & ~(align_no - 1);
-
- return(ut_dulint_create(high, low));
-}
-
-/********************************************************//**
-Rounds a dulint upward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-dulint
-ut_dulint_align_up(
-/*===============*/
- dulint n, /*!< in: number to be rounded */
- ulint align_no) /*!< in: align by this number which must be a
- power of 2 */
-{
- return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no));
-}
-
-/********************************************************//**
-Rounds ib_uint64_t downward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-ib_uint64_t
-ut_uint64_align_down(
-/*=================*/
- ib_uint64_t n, /*!< in: number to be rounded */
- ulint align_no) /*!< in: align by this number
- which must be a power of 2 */
-{
- ut_ad(align_no > 0);
- ut_ad(ut_is_2pow(align_no));
-
- return(n & ~((ib_uint64_t) align_no - 1));
-}
-
-/********************************************************//**
-Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-ib_uint64_t
-ut_uint64_align_up(
-/*===============*/
- ib_uint64_t n, /*!< in: number to be rounded */
- ulint align_no) /*!< in: align by this number
- which must be a power of 2 */
-{
- ib_uint64_t align_1 = (ib_uint64_t) align_no - 1;
-
- ut_ad(align_no > 0);
- ut_ad(ut_is_2pow(align_no));
-
- return((n + align_1) & ~align_1);
-}
-
-/*********************************************************//**
-The following function rounds up a pointer to the nearest aligned address.
-@return aligned pointer */
-UNIV_INLINE
-void*
-ut_align(
-/*=====*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1)));
-}
-
-/*********************************************************//**
-The following function rounds down a pointer to the nearest
-aligned address.
-@return aligned pointer */
-UNIV_INLINE
-void*
-ut_align_down(
-/*==========*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return((void*)((((ulint)ptr)) & ~(align_no - 1)));
-}
-
-/*********************************************************//**
-The following function computes the offset of a pointer from the nearest
-aligned address.
-@return distance from aligned pointer */
-UNIV_INLINE
-ulint
-ut_align_offset(
-/*============*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return(((ulint)ptr) & (align_no - 1));
-}
-
-/*****************************************************************//**
-Gets the nth bit of a ulint.
-@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
-UNIV_INLINE
-ibool
-ut_bit_get_nth(
-/*===========*/
- ulint a, /*!< in: ulint */
- ulint n) /*!< in: nth bit requested */
-{
- ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- return(1 & (a >> n));
-}
-
-/*****************************************************************//**
-Sets the nth bit of a ulint.
-@return the ulint with the bit set as requested */
-UNIV_INLINE
-ulint
-ut_bit_set_nth(
-/*===========*/
- ulint a, /*!< in: ulint */
- ulint n, /*!< in: nth bit requested */
- ibool val) /*!< in: value for the bit to set */
-{
- ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- if (val) {
- return(((ulint) 1 << n) | a);
- } else {
- return(~((ulint) 1 << n) & a);
- }
-}
diff --git a/storage/innodb_plugin/include/ut0dbg.h b/storage/innodb_plugin/include/ut0dbg.h
deleted file mode 100644
index 78b525c38ab..00000000000
--- a/storage/innodb_plugin/include/ut0dbg.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*****************************************************************//**
-@file include/ut0dbg.h
-Debug utilities for Innobase
-
-Created 1/30/1994 Heikki Tuuri
-**********************************************************************/
-
-#ifndef ut0dbg_h
-#define ut0dbg_h
-
-#include "univ.i"
-#include <stdlib.h>
-#include "os0thread.h"
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-/** Test if an assertion fails.
-@param EXPR assertion expression
-@return nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
-#else
-/** This is used to eliminate compiler warnings */
-extern ulint ut_dbg_zero;
-/** Test if an assertion fails.
-@param EXPR assertion expression
-@return nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
-#endif
-
-/*************************************************************//**
-Report a failed assertion. */
-UNIV_INTERN
-void
-ut_dbg_assertion_failed(
-/*====================*/
- const char* expr, /*!< in: the failed assertion */
- const char* file, /*!< in: source file containing the assertion */
- ulint line); /*!< in: line number of the assertion */
-
-#ifdef __NETWARE__
-/** Flag for ignoring further assertion failures. This is set to TRUE
-when on NetWare there happens an InnoDB assertion failure or other
-fatal error condition that requires an immediate shutdown. */
-extern ibool panic_shutdown;
-/* Abort the execution. */
-void ut_dbg_panic(void);
-# define UT_DBG_PANIC ut_dbg_panic()
-/* Stop threads in ut_a(). */
-# define UT_DBG_STOP do {} while (0) /* We do not do this on NetWare */
-#else /* __NETWARE__ */
-# if defined(__WIN__) || defined(__INTEL_COMPILER)
-# undef UT_DBG_USE_ABORT
-# elif defined(__GNUC__) && (__GNUC__ > 2)
-# define UT_DBG_USE_ABORT
-# endif
-
-# ifndef UT_DBG_USE_ABORT
-/** A null pointer that will be dereferenced to trigger a memory trap */
-extern ulint* ut_dbg_null_ptr;
-# endif
-
-# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
-will stop at the next ut_a() or ut_ad(). */
-extern ibool ut_dbg_stop_threads;
-
-/*************************************************************//**
-Stop a thread after assertion failure. */
-UNIV_INTERN
-void
-ut_dbg_stop_thread(
-/*===============*/
- const char* file,
- ulint line);
-# endif
-
-# ifdef UT_DBG_USE_ABORT
-/** Abort the execution. */
-# define UT_DBG_PANIC abort()
-/** Stop threads (null operation) */
-# define UT_DBG_STOP do {} while (0)
-# else /* UT_DBG_USE_ABORT */
-/** Abort the execution. */
-# define UT_DBG_PANIC \
- if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL
-/** Stop threads in ut_a(). */
-# define UT_DBG_STOP do \
- if (UNIV_UNLIKELY(ut_dbg_stop_threads)) { \
- ut_dbg_stop_thread(__FILE__, (ulint) __LINE__); \
- } while (0)
-# endif /* UT_DBG_USE_ABORT */
-#endif /* __NETWARE__ */
-
-/** Abort execution if EXPR does not evaluate to nonzero.
-@param EXPR assertion expression that should hold */
-#define ut_a(EXPR) do { \
- if (UT_DBG_FAIL(EXPR)) { \
- ut_dbg_assertion_failed(#EXPR, \
- __FILE__, (ulint) __LINE__); \
- UT_DBG_PANIC; \
- } \
- UT_DBG_STOP; \
-} while (0)
-
-/** Abort execution. */
-#define ut_error do { \
- ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__); \
- UT_DBG_PANIC; \
-} while (0)
-
-#ifdef UNIV_DEBUG
-/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_ad(EXPR) ut_a(EXPR)
-/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_d(EXPR) do {EXPR;} while (0)
-#else
-/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_ad(EXPR)
-/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_d(EXPR)
-#endif
-
-/** Silence warnings about an unused variable by doing a null assignment.
-@param A the unused variable */
-#define UT_NOT_USED(A) A = A
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-/** structure used for recording usage statistics */
-typedef struct speedo_struct {
- struct rusage ru; /*!< getrusage() result */
- struct timeval tv; /*!< gettimeofday() result */
-} speedo_t;
-
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
- speedo_t* speedo); /*!< out: speedo */
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
- const speedo_t* speedo); /*!< in: speedo */
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-
-#endif
diff --git a/storage/innodb_plugin/include/ut0list.h b/storage/innodb_plugin/include/ut0list.h
deleted file mode 100644
index ec67f4e2a0f..00000000000
--- a/storage/innodb_plugin/include/ut0list.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0list.h
-A double-linked list
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-/*******************************************************************//**
-A double-linked list. This differs from the one in ut0lst.h in that in this
-one, each list node contains a pointer to the data, whereas the one in
-ut0lst.h uses a strategy where the list pointers are embedded in the data
-items themselves.
-
-Use this one when you need to store arbitrary data in the list where you
-can't embed the list pointers in the data, if a data item needs to be
-stored in multiple lists, etc.
-
-Note about the memory management: ib_list_t is a fixed-size struct whose
-allocation/deallocation is done through ib_list_create/ib_list_free, but the
-memory for the list nodes is allocated through a user-given memory heap,
-which can either be the same for all nodes or vary per node. Most users will
-probably want to create a memory heap to store the item-specific data, and
-pass in this same heap to the list node creation functions, thus
-automatically freeing the list node when the item's heap is freed.
-
-************************************************************************/
-
-#ifndef IB_LIST_H
-#define IB_LIST_H
-
-#include "mem0mem.h"
-
-typedef struct ib_list_struct ib_list_t;
-typedef struct ib_list_node_struct ib_list_node_t;
-typedef struct ib_list_helper_struct ib_list_helper_t;
-
-/****************************************************************//**
-Create a new list using mem_alloc. Lists created with this function must be
-freed with ib_list_free.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create(void);
-/*=================*/
-
-
-/****************************************************************//**
-Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create_heap(
-/*================*/
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
-Free a list. */
-UNIV_INTERN
-void
-ib_list_free(
-/*=========*/
- ib_list_t* list); /*!< in: list */
-
-/****************************************************************//**
-Add the data to the start of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_first(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
-Add the data to the end of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_last(
-/*=============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
-Add the data after the indicated node.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_after(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* prev_node, /*!< in: node preceding new node (can
- be NULL) */
- void* data, /*!< in: data */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
-Remove the node from the list. */
-UNIV_INTERN
-void
-ib_list_remove(
-/*===========*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* node); /*!< in: node to remove */
-
-/****************************************************************//**
-Get the first node in the list.
-@return first node, or NULL */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_first(
-/*==============*/
- ib_list_t* list); /*!< in: list */
-
-/****************************************************************//**
-Get the last node in the list.
-@return last node, or NULL */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_last(
-/*=============*/
- ib_list_t* list); /*!< in: list */
-
-/* List. */
-struct ib_list_struct {
- ib_list_node_t* first; /*!< first node */
- ib_list_node_t* last; /*!< last node */
- ibool is_heap_list; /*!< TRUE if this list was
- allocated through a heap */
-};
-
-/* A list node. */
-struct ib_list_node_struct {
- ib_list_node_t* prev; /*!< previous node */
- ib_list_node_t* next; /*!< next node */
- void* data; /*!< user data */
-};
-
-/* Quite often, the only additional piece of data you need is the per-item
-memory heap, so we have this generic struct available to use in those
-cases. */
-struct ib_list_helper_struct {
- mem_heap_t* heap; /*!< memory heap */
- void* data; /*!< user data */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0list.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/ut0list.ic b/storage/innodb_plugin/include/ut0list.ic
deleted file mode 100644
index eb5c62796e8..00000000000
--- a/storage/innodb_plugin/include/ut0list.ic
+++ /dev/null
@@ -1,48 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0list.ic
-A double-linked list
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-/****************************************************************//**
-Get the first node in the list.
-@return first node, or NULL */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_first(
-/*==============*/
- ib_list_t* list) /*!< in: list */
-{
- return(list->first);
-}
-
-/****************************************************************//**
-Get the last node in the list.
-@return last node, or NULL */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_last(
-/*=============*/
- ib_list_t* list) /*!< in: list */
-{
- return(list->last);
-}
diff --git a/storage/innodb_plugin/include/ut0lst.h b/storage/innodb_plugin/include/ut0lst.h
deleted file mode 100644
index 261d33963dc..00000000000
--- a/storage/innodb_plugin/include/ut0lst.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0lst.h
-List utilities
-
-Created 9/10/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0lst_h
-#define ut0lst_h
-
-#include "univ.i"
-
-/* This module implements the two-way linear list which should be used
-if a list is used in the database. Note that a single struct may belong
-to two or more lists, provided that the list are given different names.
-An example of the usage of the lists can be found in fil0fil.c. */
-
-/*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which acts
-as the two-way list base node. The base node contains pointers
-to both ends of the list and a count of nodes in the list (excluding
-the base node from the count).
-@param TYPE the name of the list node data type */
-#define UT_LIST_BASE_NODE_T(TYPE)\
-struct {\
- ulint count; /*!< count of nodes in list */\
- TYPE * start; /*!< pointer to list start, NULL if empty */\
- TYPE * end; /*!< pointer to list end, NULL if empty */\
-}\
-
-/*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which
-should be embedded in the nodes of the list, the node type must be a struct.
-This struct contains the pointers to next and previous nodes in the list.
-The name of the field in the node struct should be the name given
-to the list.
-@param TYPE the list node type name */
-/* Example:
-typedef struct LRU_node_struct LRU_node_t;
-struct LRU_node_struct {
- UT_LIST_NODE_T(LRU_node_t) LRU_list;
- ...
-}
-The example implements an LRU list of name LRU_list. Its nodes are of type
-LRU_node_t. */
-
-#define UT_LIST_NODE_T(TYPE)\
-struct {\
- TYPE * prev; /*!< pointer to the previous node,\
- NULL if start of list */\
- TYPE * next; /*!< pointer to next node, NULL if end of list */\
-}\
-
-/*******************************************************************//**
-Initializes the base node of a two-way list.
-@param BASE the list base node
-*/
-#define UT_LIST_INIT(BASE)\
-{\
- (BASE).count = 0;\
- (BASE).start = NULL;\
- (BASE).end = NULL;\
-}\
-
-/*******************************************************************//**
-Adds the node as the first element in a two-way linked list.
-@param NAME list name
-@param BASE the base node (not a pointer to it)
-@param N pointer to the node to be added to the list.
-*/
-#define UT_LIST_ADD_FIRST(NAME, BASE, N)\
-{\
- ut_ad(N);\
- ((BASE).count)++;\
- ((N)->NAME).next = (BASE).start;\
- ((N)->NAME).prev = NULL;\
- if (UNIV_LIKELY((BASE).start != NULL)) {\
- ut_ad((BASE).start != (N));\
- (((BASE).start)->NAME).prev = (N);\
- }\
- (BASE).start = (N);\
- if (UNIV_UNLIKELY((BASE).end == NULL)) {\
- (BASE).end = (N);\
- }\
-}\
-
-/*******************************************************************//**
-Adds the node as the last element in a two-way linked list.
-@param NAME list name
-@param BASE the base node (not a pointer to it)
-@param N pointer to the node to be added to the list
-*/
-#define UT_LIST_ADD_LAST(NAME, BASE, N)\
-{\
- ut_ad(N);\
- ((BASE).count)++;\
- ((N)->NAME).prev = (BASE).end;\
- ((N)->NAME).next = NULL;\
- if ((BASE).end != NULL) {\
- ut_ad((BASE).end != (N));\
- (((BASE).end)->NAME).next = (N);\
- }\
- (BASE).end = (N);\
- if ((BASE).start == NULL) {\
- (BASE).start = (N);\
- }\
-}\
-
-/*******************************************************************//**
-Inserts a NODE2 after NODE1 in a list.
-@param NAME list name
-@param BASE the base node (not a pointer to it)
-@param NODE1 pointer to node after which NODE2 is inserted
-@param NODE2 pointer to node being inserted after NODE1
-*/
-#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\
-{\
- ut_ad(NODE1);\
- ut_ad(NODE2);\
- ut_ad((NODE1) != (NODE2));\
- ((BASE).count)++;\
- ((NODE2)->NAME).prev = (NODE1);\
- ((NODE2)->NAME).next = ((NODE1)->NAME).next;\
- if (((NODE1)->NAME).next != NULL) {\
- ((((NODE1)->NAME).next)->NAME).prev = (NODE2);\
- }\
- ((NODE1)->NAME).next = (NODE2);\
- if ((BASE).end == (NODE1)) {\
- (BASE).end = (NODE2);\
- }\
-}\
-
-#ifdef UNIV_LIST_DEBUG
-/** Invalidate the pointers in a list node.
-@param NAME list name
-@param N pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(NAME, N) \
-((N)->NAME.prev = (N)->NAME.next = (void*) -1)
-#else
-/** Invalidate the pointers in a list node.
-@param NAME list name
-@param N pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(NAME, N) while (0)
-#endif
-
-/*******************************************************************//**
-Removes a node from a two-way linked list.
-@param NAME list name
-@param BASE the base node (not a pointer to it)
-@param N pointer to the node to be removed from the list
-*/
-#define UT_LIST_REMOVE(NAME, BASE, N) \
-do { \
- ut_ad(N); \
- ut_a((BASE).count > 0); \
- ((BASE).count)--; \
- if (((N)->NAME).next != NULL) { \
- ((((N)->NAME).next)->NAME).prev = ((N)->NAME).prev; \
- } else { \
- (BASE).end = ((N)->NAME).prev; \
- } \
- if (((N)->NAME).prev != NULL) { \
- ((((N)->NAME).prev)->NAME).next = ((N)->NAME).next; \
- } else { \
- (BASE).start = ((N)->NAME).next; \
- } \
- UT_LIST_REMOVE_CLEAR(NAME, N); \
-} while (0)
-
-/********************************************************************//**
-Gets the next node in a two-way list.
-@param NAME list name
-@param N pointer to a node
-@return the successor of N in NAME, or NULL */
-#define UT_LIST_GET_NEXT(NAME, N)\
- (((N)->NAME).next)
-
-/********************************************************************//**
-Gets the previous node in a two-way list.
-@param NAME list name
-@param N pointer to a node
-@return the predecessor of N in NAME, or NULL */
-#define UT_LIST_GET_PREV(NAME, N)\
- (((N)->NAME).prev)
-
-/********************************************************************//**
-Alternative macro to get the number of nodes in a two-way list, i.e.,
-its length.
-@param BASE the base node (not a pointer to it).
-@return the number of nodes in the list */
-#define UT_LIST_GET_LEN(BASE)\
- (BASE).count
-
-/********************************************************************//**
-Gets the first node in a two-way list.
-@param BASE the base node (not a pointer to it)
-@return first node, or NULL if the list is empty */
-#define UT_LIST_GET_FIRST(BASE)\
- (BASE).start
-
-/********************************************************************//**
-Gets the last node in a two-way list.
-@param BASE the base node (not a pointer to it)
-@return last node, or NULL if the list is empty */
-#define UT_LIST_GET_LAST(BASE)\
- (BASE).end
-
-/********************************************************************//**
-Checks the consistency of a two-way list.
-@param NAME the name of the list
-@param TYPE node type
-@param BASE base node (not a pointer to it)
-@param ASSERTION a condition on ut_list_node_313 */
-#define UT_LIST_VALIDATE(NAME, TYPE, BASE, ASSERTION) \
-do { \
- ulint ut_list_i_313; \
- TYPE* ut_list_node_313; \
- \
- ut_list_node_313 = (BASE).start; \
- \
- for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
- ut_a(ut_list_node_313); \
- ASSERTION; \
- ut_ad((ut_list_node_313->NAME).next || !ut_list_i_313); \
- ut_list_node_313 = (ut_list_node_313->NAME).next; \
- } \
- \
- ut_a(ut_list_node_313 == NULL); \
- \
- ut_list_node_313 = (BASE).end; \
- \
- for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
- ut_a(ut_list_node_313); \
- ASSERTION; \
- ut_ad((ut_list_node_313->NAME).prev || !ut_list_i_313); \
- ut_list_node_313 = (ut_list_node_313->NAME).prev; \
- } \
- \
- ut_a(ut_list_node_313 == NULL); \
-} while (0)
-
-#endif
-
diff --git a/storage/innodb_plugin/include/ut0mem.h b/storage/innodb_plugin/include/ut0mem.h
deleted file mode 100644
index cf41cba4643..00000000000
--- a/storage/innodb_plugin/include/ut0mem.h
+++ /dev/null
@@ -1,306 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0mem.h
-Memory primitives
-
-Created 5/30/1994 Heikki Tuuri
-************************************************************************/
-
-#ifndef ut0mem_h
-#define ut0mem_h
-
-#include "univ.i"
-#include <string.h>
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc(). Does not count malloc()
-if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
-extern ulint ut_total_allocated_memory;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-extern os_fast_mutex_t ut_list_mutex;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Wrapper for memcpy(3). Copy memory area when the source and
-target are not overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
-UNIV_INLINE
-void*
-ut_memcpy(void* dest, const void* sour, ulint n);
-
-/** Wrapper for memmove(3). Copy memory area when the source and
-target are overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
-UNIV_INLINE
-void*
-ut_memmove(void* dest, const void* sour, ulint n);
-
-/** Wrapper for memcmp(3). Compare memory areas.
-* @param str1 in: first memory block to compare
-* @param str2 in: second memory block to compare
-* @param n in: number of bytes to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
- or greater than str2, respectively. */
-UNIV_INLINE
-int
-ut_memcmp(const void* str1, const void* str2, ulint n);
-
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void);
-/*=============*/
-
-/**********************************************************************//**
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE.
-@return own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
- ulint n, /*!< in: number of bytes to allocate */
- ibool set_to_zero, /*!< in: TRUE if allocated memory
- should be set to zero if
- UNIV_SET_MEM_TO_ZERO is defined */
- ibool assert_on_error); /*!< in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
-/**********************************************************************//**
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined.
-@return own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc(
-/*======*/
- ulint n); /*!< in: number of bytes to allocate */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
-out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-ut_test_malloc(
-/*===========*/
- ulint n); /*!< in: try to allocate this many bytes */
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
- void* ptr); /*!< in, own: memory block */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
- realloc() changes the size of the memory block pointed to
- by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem­
- ory will be uninitialized. If ptr is NULL, the call is
- equivalent to malloc(size); if size is equal to zero, the
- call is equivalent to free(ptr). Unless ptr is NULL, it
- must have been returned by an earlier call to malloc(),
- calloc() or realloc().
-
-RETURN VALUE
- realloc() returns a pointer to the newly allocated memory,
- which is suitably aligned for any kind of variable and may
- be different from ptr, or NULL if the request fails. If
- size was equal to 0, either NULL or a pointer suitable to
- be passed to free() is returned. If realloc() fails the
- original block is left untouched - it is not freed or
- moved.
-@return own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
- void* ptr, /*!< in: pointer to old block or NULL */
- ulint size); /*!< in: desired size */
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void);
-/*=================*/
-#endif /* !UNIV_HOTBACKUP */
-
-/** Wrapper for strcpy(3). Copy a NUL-terminated string.
-* @param dest in: copy to
-* @param sour in: copy from
-* @return dest */
-UNIV_INLINE
-char*
-ut_strcpy(char* dest, const char* sour);
-
-/** Wrapper for strlen(3). Determine the length of a NUL-terminated string.
-* @param str in: string
-* @return length of the string in bytes, excluding the terminating NUL */
-UNIV_INLINE
-ulint
-ut_strlen(const char* str);
-
-/** Wrapper for strcmp(3). Compare NUL-terminated strings.
-* @param str1 in: first string to compare
-* @param str2 in: second string to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
- or greater than str2, respectively. */
-UNIV_INLINE
-int
-ut_strcmp(const char* str1, const char* str2);
-
-/**********************************************************************//**
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy(
-/*=======*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size); /*!< in: size of destination buffer */
-
-/**********************************************************************//**
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy_rev(
-/*===========*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size); /*!< in: size of destination buffer */
-
-/**********************************************************************//**
-Compute strlen(ut_strcpyq(str, q)).
-@return length of the string when quoted */
-UNIV_INLINE
-ulint
-ut_strlenq(
-/*=======*/
- const char* str, /*!< in: null-terminated string */
- char q); /*!< in: the quote character */
-
-/**********************************************************************//**
-Make a quoted copy of a NUL-terminated string. Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq().
-@return pointer to end of dest */
-UNIV_INTERN
-char*
-ut_strcpyq(
-/*=======*/
- char* dest, /*!< in: output buffer */
- char q, /*!< in: the quote character */
- const char* src); /*!< in: null-terminated string */
-
-/**********************************************************************//**
-Make a quoted copy of a fixed-length string. Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq().
-@return pointer to end of dest */
-UNIV_INTERN
-char*
-ut_memcpyq(
-/*=======*/
- char* dest, /*!< in: output buffer */
- char q, /*!< in: the quote character */
- const char* src, /*!< in: string to be quoted */
- ulint len); /*!< in: length of src */
-
-/**********************************************************************//**
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once.
-@return the number of times s2 occurs in s1 */
-UNIV_INTERN
-ulint
-ut_strcount(
-/*========*/
- const char* s1, /*!< in: string to search in */
- const char* s2); /*!< in: string to search for */
-
-/**********************************************************************//**
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once.
-@return own: modified string, must be freed with mem_free() */
-UNIV_INTERN
-char*
-ut_strreplace(
-/*==========*/
- const char* str, /*!< in: string to operate on */
- const char* s1, /*!< in: string to replace */
- const char* s2); /*!< in: string to replace s1 with */
-
-/**********************************************************************//**
-Converts a raw binary data to a NUL-terminated hex string. The output is
-truncated if there is not enough space in "hex", make sure "hex_size" is at
-least (2 * raw_size + 1) if you do not want this to happen. Returns the
-actual number of characters written to "hex" (including the NUL).
-@return number of chars written */
-UNIV_INLINE
-ulint
-ut_raw_to_hex(
-/*==========*/
- const void* raw, /*!< in: raw data */
- ulint raw_size, /*!< in: "raw" length in bytes */
- char* hex, /*!< out: hex string */
- ulint hex_size); /*!< in: "hex" size in bytes */
-
-/*******************************************************************//**
-Adds single quotes to the start and end of string and escapes any quotes
-by doubling them. Returns the number of bytes that were written to "buf"
-(including the terminating NUL). If buf_size is too small then the
-trailing bytes from "str" are discarded.
-@return number of bytes that were written */
-UNIV_INLINE
-ulint
-ut_str_sql_format(
-/*==============*/
- const char* str, /*!< in: string */
- ulint str_len, /*!< in: string length in bytes */
- char* buf, /*!< out: output buffer */
- ulint buf_size); /*!< in: output buffer size
- in bytes */
-
-#ifndef UNIV_NONINL
-#include "ut0mem.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/ut0mem.ic b/storage/innodb_plugin/include/ut0mem.ic
deleted file mode 100644
index f36c28f1989..00000000000
--- a/storage/innodb_plugin/include/ut0mem.ic
+++ /dev/null
@@ -1,338 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0mem.ic
-Memory primitives
-
-Created 5/30/1994 Heikki Tuuri
-************************************************************************/
-
-#include "ut0byte.h"
-#include "mach0data.h"
-
-/** Wrapper for memcpy(3). Copy memory area when the source and
-target are not overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
-UNIV_INLINE
-void*
-ut_memcpy(void* dest, const void* sour, ulint n)
-{
- return(memcpy(dest, sour, n));
-}
-
-/** Wrapper for memmove(3). Copy memory area when the source and
-target are overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
-UNIV_INLINE
-void*
-ut_memmove(void* dest, const void* sour, ulint n)
-{
- return(memmove(dest, sour, n));
-}
-
-/** Wrapper for memcmp(3). Compare memory areas.
-* @param str1 in: first memory block to compare
-* @param str2 in: second memory block to compare
-* @param n in: number of bytes to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
- or greater than str2, respectively. */
-UNIV_INLINE
-int
-ut_memcmp(const void* str1, const void* str2, ulint n)
-{
- return(memcmp(str1, str2, n));
-}
-
-/** Wrapper for strcpy(3). Copy a NUL-terminated string.
-* @param dest in: copy to
-* @param sour in: copy from
-* @return dest */
-UNIV_INLINE
-char*
-ut_strcpy(char* dest, const char* sour)
-{
- return(strcpy(dest, sour));
-}
-
-/** Wrapper for strlen(3). Determine the length of a NUL-terminated string.
-* @param str in: string
-* @return length of the string in bytes, excluding the terminating NUL */
-UNIV_INLINE
-ulint
-ut_strlen(const char* str)
-{
- return(strlen(str));
-}
-
-/** Wrapper for strcmp(3). Compare NUL-terminated strings.
-* @param str1 in: first string to compare
-* @param str2 in: second string to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
- or greater than str2, respectively. */
-UNIV_INLINE
-int
-ut_strcmp(const char* str1, const char* str2)
-{
- return(strcmp(str1, str2));
-}
-
-/**********************************************************************//**
-Compute strlen(ut_strcpyq(str, q)).
-@return length of the string when quoted */
-UNIV_INLINE
-ulint
-ut_strlenq(
-/*=======*/
- const char* str, /*!< in: null-terminated string */
- char q) /*!< in: the quote character */
-{
- ulint len;
-
- for (len = 0; *str; len++, str++) {
- if (*str == q) {
- len++;
- }
- }
-
- return(len);
-}
-
-/**********************************************************************//**
-Converts a raw binary data to a NUL-terminated hex string. The output is
-truncated if there is not enough space in "hex", make sure "hex_size" is at
-least (2 * raw_size + 1) if you do not want this to happen. Returns the
-actual number of characters written to "hex" (including the NUL).
-@return number of chars written */
-UNIV_INLINE
-ulint
-ut_raw_to_hex(
-/*==========*/
- const void* raw, /*!< in: raw data */
- ulint raw_size, /*!< in: "raw" length in bytes */
- char* hex, /*!< out: hex string */
- ulint hex_size) /*!< in: "hex" size in bytes */
-{
-
-#ifdef WORDS_BIGENDIAN
-
-#define MK_UINT16(a, b) (((uint16) (a)) << 8 | (uint16) (b))
-
-#define UINT16_GET_A(u) ((unsigned char) ((u) >> 8))
-#define UINT16_GET_B(u) ((unsigned char) ((u) & 0xFF))
-
-#else /* WORDS_BIGENDIAN */
-
-#define MK_UINT16(a, b) (((uint16) (b)) << 8 | (uint16) (a))
-
-#define UINT16_GET_A(u) ((unsigned char) ((u) & 0xFF))
-#define UINT16_GET_B(u) ((unsigned char) ((u) >> 8))
-
-#endif /* WORDS_BIGENDIAN */
-
-#define MK_ALL_UINT16_WITH_A(a) \
- MK_UINT16(a, '0'), \
- MK_UINT16(a, '1'), \
- MK_UINT16(a, '2'), \
- MK_UINT16(a, '3'), \
- MK_UINT16(a, '4'), \
- MK_UINT16(a, '5'), \
- MK_UINT16(a, '6'), \
- MK_UINT16(a, '7'), \
- MK_UINT16(a, '8'), \
- MK_UINT16(a, '9'), \
- MK_UINT16(a, 'A'), \
- MK_UINT16(a, 'B'), \
- MK_UINT16(a, 'C'), \
- MK_UINT16(a, 'D'), \
- MK_UINT16(a, 'E'), \
- MK_UINT16(a, 'F')
-
- static const uint16 hex_map[256] = {
- MK_ALL_UINT16_WITH_A('0'),
- MK_ALL_UINT16_WITH_A('1'),
- MK_ALL_UINT16_WITH_A('2'),
- MK_ALL_UINT16_WITH_A('3'),
- MK_ALL_UINT16_WITH_A('4'),
- MK_ALL_UINT16_WITH_A('5'),
- MK_ALL_UINT16_WITH_A('6'),
- MK_ALL_UINT16_WITH_A('7'),
- MK_ALL_UINT16_WITH_A('8'),
- MK_ALL_UINT16_WITH_A('9'),
- MK_ALL_UINT16_WITH_A('A'),
- MK_ALL_UINT16_WITH_A('B'),
- MK_ALL_UINT16_WITH_A('C'),
- MK_ALL_UINT16_WITH_A('D'),
- MK_ALL_UINT16_WITH_A('E'),
- MK_ALL_UINT16_WITH_A('F')
- };
- const unsigned char* rawc;
- ulint read_bytes;
- ulint write_bytes;
- ulint i;
-
- rawc = (const unsigned char*) raw;
-
- if (hex_size == 0) {
-
- return(0);
- }
-
- if (hex_size <= 2 * raw_size) {
-
- read_bytes = hex_size / 2;
- write_bytes = hex_size;
- } else {
-
- read_bytes = raw_size;
- write_bytes = 2 * raw_size + 1;
- }
-
-#define LOOP_READ_BYTES(ASSIGN) \
- for (i = 0; i < read_bytes; i++) { \
- ASSIGN; \
- hex += 2; \
- rawc++; \
- }
-
- if (ut_align_offset(hex, 2) == 0) {
-
- LOOP_READ_BYTES(
- *(uint16*) hex = hex_map[*rawc]
- );
- } else {
-
- LOOP_READ_BYTES(
- *hex = UINT16_GET_A(hex_map[*rawc]);
- *(hex + 1) = UINT16_GET_B(hex_map[*rawc])
- );
- }
-
- if (hex_size <= 2 * raw_size && hex_size % 2 == 0) {
-
- hex--;
- }
-
- *hex = '\0';
-
- return(write_bytes);
-}
-
-/*******************************************************************//**
-Adds single quotes to the start and end of string and escapes any quotes
-by doubling them. Returns the number of bytes that were written to "buf"
-(including the terminating NUL). If buf_size is too small then the
-trailing bytes from "str" are discarded.
-@return number of bytes that were written */
-UNIV_INLINE
-ulint
-ut_str_sql_format(
-/*==============*/
- const char* str, /*!< in: string */
- ulint str_len, /*!< in: string length in bytes */
- char* buf, /*!< out: output buffer */
- ulint buf_size) /*!< in: output buffer size
- in bytes */
-{
- ulint str_i;
- ulint buf_i;
-
- buf_i = 0;
-
- switch (buf_size) {
- case 3:
-
- if (str_len == 0) {
-
- buf[buf_i] = '\'';
- buf_i++;
- buf[buf_i] = '\'';
- buf_i++;
- }
- /* FALLTHROUGH */
- case 2:
- case 1:
-
- buf[buf_i] = '\0';
- buf_i++;
- /* FALLTHROUGH */
- case 0:
-
- return(buf_i);
- }
-
- /* buf_size >= 4 */
-
- buf[0] = '\'';
- buf_i = 1;
-
- for (str_i = 0; str_i < str_len; str_i++) {
-
- char ch;
-
- if (buf_size - buf_i == 2) {
-
- break;
- }
-
- ch = str[str_i];
-
- switch (ch) {
- case '\0':
-
- if (UNIV_UNLIKELY(buf_size - buf_i < 4)) {
-
- goto func_exit;
- }
- buf[buf_i] = '\\';
- buf_i++;
- buf[buf_i] = '0';
- buf_i++;
- break;
- case '\'':
- case '\\':
-
- if (UNIV_UNLIKELY(buf_size - buf_i < 4)) {
-
- goto func_exit;
- }
- buf[buf_i] = ch;
- buf_i++;
- /* FALLTHROUGH */
- default:
-
- buf[buf_i] = ch;
- buf_i++;
- }
- }
-
-func_exit:
-
- buf[buf_i] = '\'';
- buf_i++;
- buf[buf_i] = '\0';
- buf_i++;
-
- return(buf_i);
-}
diff --git a/storage/innodb_plugin/include/ut0rnd.h b/storage/innodb_plugin/include/ut0rnd.h
deleted file mode 100644
index ce5152e942f..00000000000
--- a/storage/innodb_plugin/include/ut0rnd.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0rnd.h
-Random numbers and hashing
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0rnd_h
-#define ut0rnd_h
-
-#include "univ.i"
-
-#include "ut0byte.h"
-
-/** The 'character code' for end of field or string (used
-in folding records */
-#define UT_END_OF_FIELD 257
-
-/********************************************************//**
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
- ulint seed); /*!< in: seed */
-/********************************************************//**
-The following function generates a series of 'random' ulint integers.
-@return the next 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
- ulint rnd); /*!< in: the previous random number value */
-/*********************************************************//**
-The following function generates 'random' ulint integers which
-enumerate the value space (let there be N of them) of ulint integers
-in a pseudo-random fashion. Note that the same integer is repeated
-always after N calls to the generator.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void);
-/*==================*/
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- ulint low, /*!< in: low limit; can generate also this value */
- ulint high); /*!< in: high limit; can generate also this value */
-/*********************************************************//**
-Generates a random iboolean value.
-@return the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void);
-/*=================*/
-/*******************************************************//**
-The following function generates a hash value for a ulint integer
-to a hash table of size table_size, which should be a prime or some
-random number to work reliably.
-@return hash value */
-UNIV_INLINE
-ulint
-ut_hash_ulint(
-/*==========*/
- ulint key, /*!< in: value to be hashed */
- ulint table_size); /*!< in: hash table size */
-/*************************************************************//**
-Folds a pair of ulints.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
- ulint n1, /*!< in: ulint */
- ulint n2) /*!< in: ulint */
- __attribute__((const));
-/*************************************************************//**
-Folds a dulint.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_dulint(
-/*===========*/
- dulint d) /*!< in: dulint */
- __attribute__((const));
-/*************************************************************//**
-Folds a character string ending in the null character.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_string(
-/*===========*/
- const char* str) /*!< in: null-terminated string */
- __attribute__((pure));
-/*************************************************************//**
-Folds a binary string.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_binary(
-/*===========*/
- const byte* str, /*!< in: string of bytes */
- ulint len) /*!< in: length */
- __attribute__((pure));
-/***********************************************************//**
-Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2.
-@return prime */
-UNIV_INTERN
-ulint
-ut_find_prime(
-/*==========*/
- ulint n) /*!< in: positive number > 100 */
- __attribute__((const));
-
-
-#ifndef UNIV_NONINL
-#include "ut0rnd.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/ut0rnd.ic b/storage/innodb_plugin/include/ut0rnd.ic
deleted file mode 100644
index 763469142ec..00000000000
--- a/storage/innodb_plugin/include/ut0rnd.ic
+++ /dev/null
@@ -1,230 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************************//**
-@file include/ut0rnd.ic
-Random numbers and hashing
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-#define UT_HASH_RANDOM_MASK 1463735687
-#define UT_HASH_RANDOM_MASK2 1653893711
-#define UT_RND1 151117737
-#define UT_RND2 119785373
-#define UT_RND3 85689495
-#define UT_RND4 76595339
-#define UT_SUM_RND2 98781234
-#define UT_SUM_RND3 126792457
-#define UT_SUM_RND4 63498502
-#define UT_XOR_RND1 187678878
-#define UT_XOR_RND2 143537923
-
-/** Seed value of ut_rnd_gen_ulint() */
-extern ulint ut_rnd_ulint_counter;
-
-/********************************************************//**
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
- ulint seed) /*!< in: seed */
-{
- ut_rnd_ulint_counter = seed;
-}
-
-/********************************************************//**
-The following function generates a series of 'random' ulint integers.
-@return the next 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
- ulint rnd) /*!< in: the previous random number value */
-{
- ulint n_bits;
-
- n_bits = 8 * sizeof(ulint);
-
- rnd = UT_RND2 * rnd + UT_SUM_RND3;
- rnd = UT_XOR_RND1 ^ rnd;
- rnd = (rnd << 20) + (rnd >> (n_bits - 20));
- rnd = UT_RND3 * rnd + UT_SUM_RND4;
- rnd = UT_XOR_RND2 ^ rnd;
- rnd = (rnd << 20) + (rnd >> (n_bits - 20));
- rnd = UT_RND1 * rnd + UT_SUM_RND2;
-
- return(rnd);
-}
-
-/********************************************************//**
-The following function generates 'random' ulint integers which
-enumerate the value space of ulint integers in a pseudo random
-fashion. Note that the same integer is repeated always after
-2 to power 32 calls to the generator (if ulint is 32-bit).
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void)
-/*==================*/
-{
- ulint rnd;
- ulint n_bits;
-
- n_bits = 8 * sizeof(ulint);
-
- ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
-
- rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
-
- return(rnd);
-}
-
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- ulint low, /*!< in: low limit; can generate also this value */
- ulint high) /*!< in: high limit; can generate also this value */
-{
- ulint rnd;
-
- ut_ad(high >= low);
-
- if (low == high) {
-
- return(low);
- }
-
- rnd = ut_rnd_gen_ulint();
-
- return(low + (rnd % (high - low + 1)));
-}
-
-/*********************************************************//**
-Generates a random iboolean value.
-@return the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void)
-/*=================*/
-{
- ulint x;
-
- x = ut_rnd_gen_ulint();
-
- if (((x >> 20) + (x >> 15)) & 1) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*******************************************************//**
-The following function generates a hash value for a ulint integer
-to a hash table of size table_size, which should be a prime
-or some random number for the hash table to work reliably.
-@return hash value */
-UNIV_INLINE
-ulint
-ut_hash_ulint(
-/*==========*/
- ulint key, /*!< in: value to be hashed */
- ulint table_size) /*!< in: hash table size */
-{
- key = key ^ UT_HASH_RANDOM_MASK2;
-
- return(key % table_size);
-}
-
-/*************************************************************//**
-Folds a pair of ulints.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
- ulint n1, /*!< in: ulint */
- ulint n2) /*!< in: ulint */
-{
- return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
- ^ UT_HASH_RANDOM_MASK) + n2);
-}
-
-/*************************************************************//**
-Folds a dulint.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_dulint(
-/*===========*/
- dulint d) /*!< in: dulint */
-{
- return(ut_fold_ulint_pair(ut_dulint_get_low(d),
- ut_dulint_get_high(d)));
-}
-
-/*************************************************************//**
-Folds a character string ending in the null character.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_string(
-/*===========*/
- const char* str) /*!< in: null-terminated string */
-{
- ulint fold = 0;
-
- ut_ad(str);
-
- while (*str != '\0') {
- fold = ut_fold_ulint_pair(fold, (ulint)(*str));
- str++;
- }
-
- return(fold);
-}
-
-/*************************************************************//**
-Folds a binary string.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_binary(
-/*===========*/
- const byte* str, /*!< in: string of bytes */
- ulint len) /*!< in: length */
-{
- const byte* str_end = str + len;
- ulint fold = 0;
-
- ut_ad(str || !len);
-
- while (str < str_end) {
- fold = ut_fold_ulint_pair(fold, (ulint)(*str));
-
- str++;
- }
-
- return(fold);
-}
diff --git a/storage/innodb_plugin/include/ut0sort.h b/storage/innodb_plugin/include/ut0sort.h
deleted file mode 100644
index 5c6647dda9e..00000000000
--- a/storage/innodb_plugin/include/ut0sort.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0sort.h
-Sort utility
-
-Created 11/9/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0sort_h
-#define ut0sort_h
-
-#include "univ.i"
-
-/* This module gives a macro definition of the body of
-a standard sort function for an array of elements of any
-type. The comparison function is given as a parameter to
-the macro. The sort algorithm is mergesort which has logarithmic
-worst case.
-*/
-
-/*******************************************************************//**
-This macro expands to the body of a standard sort function.
-The sort function uses mergesort and must be defined separately
-for each type of array.
-Also the comparison function has to be defined individually
-for each array cell type. SORT_FUN is the sort function name.
-The function takes the array to be sorted (ARR),
-the array of auxiliary space (AUX_ARR) of same size,
-and the low (LOW), inclusive, and high (HIGH), noninclusive,
-limits for the sort interval as arguments.
-CMP_FUN is the comparison function name. It takes as arguments
-two elements from the array and returns 1, if the first is bigger,
-0 if equal, and -1 if the second bigger. */
-
-#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
-{\
- ulint ut_sort_mid77;\
- ulint ut_sort_i77;\
- ulint ut_sort_low77;\
- ulint ut_sort_high77;\
-\
- ut_ad((LOW) < (HIGH));\
- ut_ad(ARR);\
- ut_ad(AUX_ARR);\
-\
- if ((LOW) == (HIGH) - 1) {\
- return;\
- } else if ((LOW) == (HIGH) - 2) {\
- if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\
- (AUX_ARR)[LOW] = (ARR)[LOW];\
- (ARR)[LOW] = (ARR)[(HIGH) - 1];\
- (ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\
- }\
- return;\
- }\
-\
- ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\
-\
- SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\
- SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\
-\
- ut_sort_low77 = (LOW);\
- ut_sort_high77 = ut_sort_mid77;\
-\
- for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
-\
- if (ut_sort_low77 >= ut_sort_mid77) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
- ut_sort_high77++;\
- } else if (ut_sort_high77 >= (HIGH)) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
- ut_sort_low77++;\
- } else if (CMP_FUN((ARR)[ut_sort_low77],\
- (ARR)[ut_sort_high77]) > 0) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
- ut_sort_high77++;\
- } else {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
- ut_sort_low77++;\
- }\
- }\
-\
- memcpy((void*) ((ARR) + (LOW)), (AUX_ARR) + (LOW),\
- ((HIGH) - (LOW)) * sizeof *(ARR));\
-}\
-
-
-#endif
-
diff --git a/storage/innodb_plugin/include/ut0ut.h b/storage/innodb_plugin/include/ut0ut.h
deleted file mode 100644
index 197b8401428..00000000000
--- a/storage/innodb_plugin/include/ut0ut.h
+++ /dev/null
@@ -1,403 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Sun Microsystems, Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0ut.h
-Various utilities
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0ut_h
-#define ut0ut_h
-
-#include "univ.i"
-
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#endif /* UNIV_HOTBACKUP */
-
-#include <time.h>
-#ifndef MYSQL_SERVER
-#include <ctype.h>
-#endif
-
-/** Index name prefix in fast index creation */
-#define TEMP_INDEX_PREFIX '\377'
-/** Index name prefix in fast index creation, as a string constant */
-#define TEMP_INDEX_PREFIX_STR "\377"
-
-/** Time stamp */
-typedef time_t ib_time_t;
-
-#ifndef UNIV_HOTBACKUP
-#if defined(HAVE_IB_PAUSE_INSTRUCTION)
-# ifdef WIN32
- /* In the Win32 API, the x86 PAUSE instruction is executed by calling
- the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
- independent way by using YieldProcessor.*/
-# define UT_RELAX_CPU() YieldProcessor()
-# else
- /* According to the gcc info page, asm volatile means that the
- instruction has important side-effects and must not be removed.
- Also asm volatile may trigger a memory barrier (spilling all registers
- to memory). */
-# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif
-#elif defined(HAVE_ATOMIC_BUILTINS)
-# define UT_RELAX_CPU() do { \
- volatile lint volatile_var; \
- os_compare_and_swap_lint(&volatile_var, 0, 1); \
- } while (0)
-#else
-# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
-#endif
-
-/*********************************************************************//**
-Delays execution for at most max_wait_us microseconds or returns earlier
-if cond becomes true.
-@param cond in: condition to wait for; evaluated every 2 ms
-@param max_wait_us in: maximum delay to wait, in microseconds */
-#define UT_WAIT_FOR(cond, max_wait_us) \
-do { \
- ullint start_us; \
- start_us = ut_time_us(NULL); \
- while (!(cond) \
- && ut_time_us(NULL) - start_us < (max_wait_us)) {\
- \
- os_thread_sleep(2000 /* 2 ms */); \
- } \
-} while (0)
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Gets the high 32 bits in a ulint. That is makes a shift >> 32,
-but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion.
-@return a >> 32 */
-UNIV_INTERN
-ulint
-ut_get_high32(
-/*==========*/
- ulint a); /*!< in: ulint */
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2); /*!< in: second number */
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2); /*!< in: second number */
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
-UNIV_INLINE
-void
-ut_pair_min(
-/*========*/
- ulint* a, /*!< out: more significant part of minimum */
- ulint* b, /*!< out: less significant part of minimum */
- ulint a1, /*!< in: more significant part of first pair */
- ulint b1, /*!< in: less significant part of first pair */
- ulint a2, /*!< in: more significant part of second pair */
- ulint b2); /*!< in: less significant part of second pair */
-/******************************************************//**
-Compares two ulints.
-@return 1 if a > b, 0 if a == b, -1 if a < b */
-UNIV_INLINE
-int
-ut_ulint_cmp(
-/*=========*/
- ulint a, /*!< in: ulint */
- ulint b); /*!< in: ulint */
-/*******************************************************//**
-Compares two pairs of ulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
-UNIV_INLINE
-int
-ut_pair_cmp(
-/*========*/
- ulint a1, /*!< in: more significant part of first pair */
- ulint a2, /*!< in: less significant part of first pair */
- ulint b1, /*!< in: more significant part of second pair */
- ulint b2); /*!< in: less significant part of second pair */
-/*************************************************************//**
-Determines if a number is zero or a power of two.
-@param n in: number
-@return nonzero if n is zero or a power of two; zero otherwise */
-#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
-/*************************************************************//**
-Calculates fast the remainder of n/m when m is a power of two.
-@param n in: numerator
-@param m in: denominator, must be a power of two
-@return the remainder of n/m */
-#define ut_2pow_remainder(n, m) ((n) & ((m) - 1))
-/*************************************************************//**
-Calculates the biggest multiple of m that is not bigger than n
-when m is a power of two. In other words, rounds n down to m * k.
-@param n in: number to round down
-@param m in: alignment, must be a power of two
-@return n rounded down to the biggest possible integer multiple of m */
-#define ut_2pow_round(n, m) ((n) & ~((m) - 1))
-/** Align a number down to a multiple of a power of two.
-@param n in: number to round down
-@param m in: alignment, must be a power of two
-@return n rounded down to the biggest possible integer multiple of m */
-#define ut_calc_align_down(n, m) ut_2pow_round(n, m)
-/********************************************************//**
-Calculates the smallest multiple of m that is not smaller than n
-when m is a power of two. In other words, rounds n up to m * k.
-@param n in: number to round up
-@param m in: alignment, must be a power of two
-@return n rounded up to the smallest possible integer multiple of m */
-#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1))
-/*************************************************************//**
-Calculates fast the 2-logarithm of a number, rounded upward to an
-integer.
-@return logarithm in the base 2, rounded upward */
-UNIV_INLINE
-ulint
-ut_2_log(
-/*=====*/
- ulint n); /*!< in: number */
-/*************************************************************//**
-Calculates 2 to power n.
-@return 2 to power n */
-UNIV_INLINE
-ulint
-ut_2_exp(
-/*=====*/
- ulint n); /*!< in: number */
-/*************************************************************//**
-Calculates fast the number rounded up to the nearest power of 2.
-@return first power of 2 which is >= n */
-UNIV_INTERN
-ulint
-ut_2_power_up(
-/*==========*/
- ulint n) /*!< in: number != 0 */
- __attribute__((const));
-
-/** Determine how many bytes (groups of 8 bits) are needed to
-store the given number of bits.
-@param b in: bits
-@return number of bytes (octets) needed to represent b */
-#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
-
-/**********************************************************//**
-Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime.
-@return system time */
-UNIV_INTERN
-ib_time_t
-ut_time(void);
-/*=========*/
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Returns system time.
-Upon successful completion, the value 0 is returned; otherwise the
-value -1 is returned and the global variable errno is set to indicate the
-error.
-@return 0 on success, -1 otherwise */
-UNIV_INTERN
-int
-ut_usectime(
-/*========*/
- ulint* sec, /*!< out: seconds since the Epoch */
- ulint* ms); /*!< out: microseconds since the Epoch+*sec */
-
-/**********************************************************//**
-Returns the number of microseconds since epoch. Similar to
-time(3), the return value is also stored in *tloc, provided
-that tloc is non-NULL.
-@return us since epoch */
-UNIV_INTERN
-ullint
-ut_time_us(
-/*=======*/
- ullint* tloc); /*!< out: us since epoch, if non-NULL */
-/**********************************************************//**
-Returns the number of milliseconds since some epoch. The
-value may wrap around. It should only be used for heuristic
-purposes.
-@return ms since epoch */
-UNIV_INTERN
-ulint
-ut_time_ms(void);
-/*============*/
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Returns the difference of two times in seconds.
-@return time2 - time1 expressed in seconds */
-UNIV_INTERN
-double
-ut_difftime(
-/*========*/
- ib_time_t time2, /*!< in: time */
- ib_time_t time1); /*!< in: time */
-/**********************************************************//**
-Prints a timestamp to a file. */
-UNIV_INTERN
-void
-ut_print_timestamp(
-/*===============*/
- FILE* file); /*!< in: file where to print */
-/**********************************************************//**
-Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp(
-/*=================*/
- char* buf); /*!< in: buffer where to sprintf */
-#ifdef UNIV_HOTBACKUP
-/**********************************************************//**
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
- char* buf); /*!< in: buffer where to sprintf */
-/**********************************************************//**
-Returns current year, month, day. */
-UNIV_INTERN
-void
-ut_get_year_month_day(
-/*==================*/
- ulint* year, /*!< out: current year */
- ulint* month, /*!< out: month */
- ulint* day); /*!< out: day */
-#else /* UNIV_HOTBACKUP */
-/*************************************************************//**
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++.
-@return dummy value */
-UNIV_INTERN
-ulint
-ut_delay(
-/*=====*/
- ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */
-#endif /* UNIV_HOTBACKUP */
-/*************************************************************//**
-Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
-void
-ut_print_buf(
-/*=========*/
- FILE* file, /*!< in: file where to print */
- const void* buf, /*!< in: memory buffer */
- ulint len); /*!< in: length of the buffer */
-
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
-void
-ut_print_filename(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const char* name); /*!< in: name to print */
-
-#ifndef UNIV_HOTBACKUP
-/* Forward declaration of transaction handle */
-struct trx_struct;
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_name(
-/*==========*/
- FILE* f, /*!< in: output stream */
- struct trx_struct*trx, /*!< in: transaction */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name); /*!< in: name to print */
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_namel(
-/*===========*/
- FILE* f, /*!< in: output stream */
- struct trx_struct*trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /*!< in: name to print */
- ulint namelen);/*!< in: length of name */
-
-/**********************************************************************//**
-Catenate files. */
-UNIV_INTERN
-void
-ut_copy_file(
-/*=========*/
- FILE* dest, /*!< in: output file */
- FILE* src); /*!< in: input file to be appended to output */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef __WIN__
-/**********************************************************************//**
-A substitute for snprintf(3), formatted output conversion into
-a limited buffer.
-@return number of characters that would have been printed if the size
-were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
-int
-ut_snprintf(
-/*========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- ...); /*!< in: format values */
-#else
-/**********************************************************************//**
-A wrapper for snprintf(3), formatted output conversion into
-a limited buffer. */
-# define ut_snprintf snprintf
-#endif /* __WIN__ */
-
-#ifndef UNIV_NONINL
-#include "ut0ut.ic"
-#endif
-
-#endif
-
diff --git a/storage/innodb_plugin/include/ut0ut.ic b/storage/innodb_plugin/include/ut0ut.ic
deleted file mode 100644
index 6f55c7e410e..00000000000
--- a/storage/innodb_plugin/include/ut0ut.ic
+++ /dev/null
@@ -1,162 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************************//**
-@file include/ut0ut.ic
-Various utilities
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2) /*!< in: second number */
-{
- return((n1 <= n2) ? n1 : n2);
-}
-
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2) /*!< in: second number */
-{
- return((n1 <= n2) ? n2 : n1);
-}
-
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
-UNIV_INLINE
-void
-ut_pair_min(
-/*========*/
- ulint* a, /*!< out: more significant part of minimum */
- ulint* b, /*!< out: less significant part of minimum */
- ulint a1, /*!< in: more significant part of first pair */
- ulint b1, /*!< in: less significant part of first pair */
- ulint a2, /*!< in: more significant part of second pair */
- ulint b2) /*!< in: less significant part of second pair */
-{
- if (a1 == a2) {
- *a = a1;
- *b = ut_min(b1, b2);
- } else if (a1 < a2) {
- *a = a1;
- *b = b1;
- } else {
- *a = a2;
- *b = b2;
- }
-}
-
-/******************************************************//**
-Compares two ulints.
-@return 1 if a > b, 0 if a == b, -1 if a < b */
-UNIV_INLINE
-int
-ut_ulint_cmp(
-/*=========*/
- ulint a, /*!< in: ulint */
- ulint b) /*!< in: ulint */
-{
- if (a < b) {
- return(-1);
- } else if (a == b) {
- return(0);
- } else {
- return(1);
- }
-}
-
-/*******************************************************//**
-Compares two pairs of ulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
-UNIV_INLINE
-int
-ut_pair_cmp(
-/*========*/
- ulint a1, /*!< in: more significant part of first pair */
- ulint a2, /*!< in: less significant part of first pair */
- ulint b1, /*!< in: more significant part of second pair */
- ulint b2) /*!< in: less significant part of second pair */
-{
- if (a1 > b1) {
- return(1);
- } else if (a1 < b1) {
- return(-1);
- } else if (a2 > b2) {
- return(1);
- } else if (a2 < b2) {
- return(-1);
- } else {
- return(0);
- }
-}
-
-/*************************************************************//**
-Calculates fast the 2-logarithm of a number, rounded upward to an
-integer.
-@return logarithm in the base 2, rounded upward */
-UNIV_INLINE
-ulint
-ut_2_log(
-/*=====*/
- ulint n) /*!< in: number != 0 */
-{
- ulint res;
-
- res = 0;
-
- ut_ad(n > 0);
-
- n = n - 1;
-
- for (;;) {
- n = n / 2;
-
- if (n == 0) {
- break;
- }
-
- res++;
- }
-
- return(res + 1);
-}
-
-/*************************************************************//**
-Calculates 2 to power n.
-@return 2 to power n */
-UNIV_INLINE
-ulint
-ut_2_exp(
-/*=====*/
- ulint n) /*!< in: number */
-{
- return((ulint) 1 << n);
-}
diff --git a/storage/innodb_plugin/include/ut0vec.h b/storage/innodb_plugin/include/ut0vec.h
deleted file mode 100644
index a770f671cfc..00000000000
--- a/storage/innodb_plugin/include/ut0vec.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0vec.h
-A vector of pointers to data items
-
-Created 4/6/2006 Osku Salerma
-************************************************************************/
-
-#ifndef IB_VECTOR_H
-#define IB_VECTOR_H
-
-#include "univ.i"
-#include "mem0mem.h"
-
-/** An automatically resizing vector data type. */
-typedef struct ib_vector_struct ib_vector_t;
-
-/* An automatically resizing vector datatype with the following properties:
-
- -Contains void* items.
-
- -The items are owned by the caller.
-
- -All memory allocation is done through a heap owned by the caller, who is
- responsible for freeing it when done with the vector.
-
- -When the vector is resized, the old memory area is left allocated since it
- uses the same heap as the new memory area, so this is best used for
- relatively small or short-lived uses.
-*/
-
-/****************************************************************//**
-Create a new vector with the given initial size.
-@return vector */
-UNIV_INTERN
-ib_vector_t*
-ib_vector_create(
-/*=============*/
- mem_heap_t* heap, /*!< in: heap */
- ulint size); /*!< in: initial size */
-
-/****************************************************************//**
-Push a new element to the vector, increasing its size if necessary. */
-UNIV_INTERN
-void
-ib_vector_push(
-/*===========*/
- ib_vector_t* vec, /*!< in: vector */
- void* elem); /*!< in: data element */
-
-/****************************************************************//**
-Get the number of elements in the vector.
-@return number of elements in vector */
-UNIV_INLINE
-ulint
-ib_vector_size(
-/*===========*/
- const ib_vector_t* vec); /*!< in: vector */
-
-/****************************************************************//**
-Test whether a vector is empty or not.
-@return TRUE if empty */
-UNIV_INLINE
-ibool
-ib_vector_is_empty(
-/*===============*/
- const ib_vector_t* vec); /*!< in: vector */
-
-/****************************************************************//**
-Get the n'th element.
-@return n'th element */
-UNIV_INLINE
-void*
-ib_vector_get(
-/*==========*/
- ib_vector_t* vec, /*!< in: vector */
- ulint n); /*!< in: element index to get */
-
-/****************************************************************//**
-Remove the last element from the vector. */
-UNIV_INLINE
-void*
-ib_vector_pop(
-/*==========*/
- ib_vector_t* vec); /*!< in: vector */
-
-/****************************************************************//**
-Free the underlying heap of the vector. Note that vec is invalid
-after this call. */
-UNIV_INLINE
-void
-ib_vector_free(
-/*===========*/
- ib_vector_t* vec); /*!< in,own: vector */
-
-/** An automatically resizing vector data type. */
-struct ib_vector_struct {
- mem_heap_t* heap; /*!< heap */
- void** data; /*!< data elements */
- ulint used; /*!< number of elements currently used */
- ulint total; /*!< number of elements allocated */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
-
-#endif
diff --git a/storage/innodb_plugin/include/ut0vec.ic b/storage/innodb_plugin/include/ut0vec.ic
deleted file mode 100644
index 02e881f9bca..00000000000
--- a/storage/innodb_plugin/include/ut0vec.ic
+++ /dev/null
@@ -1,96 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0vec.ic
-A vector of pointers to data items
-
-Created 4/6/2006 Osku Salerma
-************************************************************************/
-
-/****************************************************************//**
-Get number of elements in vector.
-@return number of elements in vector */
-UNIV_INLINE
-ulint
-ib_vector_size(
-/*===========*/
- const ib_vector_t* vec) /*!< in: vector */
-{
- return(vec->used);
-}
-
-/****************************************************************//**
-Get n'th element.
-@return n'th element */
-UNIV_INLINE
-void*
-ib_vector_get(
-/*==========*/
- ib_vector_t* vec, /*!< in: vector */
- ulint n) /*!< in: element index to get */
-{
- ut_a(n < vec->used);
-
- return(vec->data[n]);
-}
-
-/****************************************************************//**
-Remove the last element from the vector.
-@return last vector element */
-UNIV_INLINE
-void*
-ib_vector_pop(
-/*==========*/
- ib_vector_t* vec) /*!< in/out: vector */
-{
- void* elem;
-
- ut_a(vec->used > 0);
- --vec->used;
- elem = vec->data[vec->used];
-
- ut_d(vec->data[vec->used] = NULL);
- UNIV_MEM_INVALID(&vec->data[vec->used], sizeof(*vec->data));
-
- return(elem);
-}
-
-/****************************************************************//**
-Free the underlying heap of the vector. Note that vec is invalid
-after this call. */
-UNIV_INLINE
-void
-ib_vector_free(
-/*===========*/
- ib_vector_t* vec) /*!< in, own: vector */
-{
- mem_heap_free(vec->heap);
-}
-
-/****************************************************************//**
-Test whether a vector is empty or not.
-@return TRUE if empty */
-UNIV_INLINE
-ibool
-ib_vector_is_empty(
-/*===============*/
- const ib_vector_t* vec) /*!< in: vector */
-{
- return(ib_vector_size(vec) == 0);
-}
diff --git a/storage/innodb_plugin/include/ut0wqueue.h b/storage/innodb_plugin/include/ut0wqueue.h
deleted file mode 100644
index 2ec0f16ab05..00000000000
--- a/storage/innodb_plugin/include/ut0wqueue.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0wqueue.h
-A work queue
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-/*******************************************************************//**
-A Work queue. Threads can add work items to the queue and other threads can
-wait for work items to be available and take them off the queue for
-processing.
-************************************************************************/
-
-#ifndef IB_WORK_QUEUE_H
-#define IB_WORK_QUEUE_H
-
-#include "ut0list.h"
-#include "mem0mem.h"
-#include "os0sync.h"
-#include "sync0types.h"
-
-typedef struct ib_wqueue_struct ib_wqueue_t;
-
-/****************************************************************//**
-Create a new work queue.
-@return work queue */
-UNIV_INTERN
-ib_wqueue_t*
-ib_wqueue_create(void);
-/*===================*/
-
-/****************************************************************//**
-Free a work queue. */
-UNIV_INTERN
-void
-ib_wqueue_free(
-/*===========*/
- ib_wqueue_t* wq); /*!< in: work queue */
-
-/****************************************************************//**
-Add a work item to the queue. */
-UNIV_INTERN
-void
-ib_wqueue_add(
-/*==========*/
- ib_wqueue_t* wq, /*!< in: work queue */
- void* item, /*!< in: work item */
- mem_heap_t* heap); /*!< in: memory heap to use for allocating the
- list node */
-
-/****************************************************************//**
-Wait for a work item to appear in the queue.
-@return work item */
-UNIV_INTERN
-void*
-ib_wqueue_wait(
-/*===========*/
- ib_wqueue_t* wq); /*!< in: work queue */
-
-/* Work queue. */
-struct ib_wqueue_struct {
- mutex_t mutex; /*!< mutex protecting everything */
- ib_list_t* items; /*!< work item list */
- os_event_t event; /*!< event we use to signal additions to list */
-};
-
-#endif
diff --git a/storage/innodb_plugin/lock/lock0iter.c b/storage/innodb_plugin/lock/lock0iter.c
deleted file mode 100644
index 51d1802ccde..00000000000
--- a/storage/innodb_plugin/lock/lock0iter.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file lock/lock0iter.c
-Lock queue iterator. Can iterate over table and record
-lock queues.
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-#define LOCK_MODULE_IMPLEMENTATION
-
-#include "univ.i"
-#include "lock0iter.h"
-#include "lock0lock.h"
-#include "lock0priv.h"
-#include "ut0dbg.h"
-#include "ut0lst.h"
-#ifdef UNIV_DEBUG
-# include "srv0srv.h" /* kernel_mutex */
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Initialize lock queue iterator so that it starts to iterate from
-"lock". bit_no specifies the record number within the heap where the
-record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
-1. If the lock is a table lock, thus we have a table lock queue;
-2. If the lock is a record lock and it is a wait lock. In this case
- bit_no is calculated in this function by using
- lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
- of a wait lock. */
-UNIV_INTERN
-void
-lock_queue_iterator_reset(
-/*======================*/
- lock_queue_iterator_t* iter, /*!< out: iterator */
- const lock_t* lock, /*!< in: lock to start from */
- ulint bit_no) /*!< in: record number in the
- heap */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- iter->current_lock = lock;
-
- if (bit_no != ULINT_UNDEFINED) {
-
- iter->bit_no = bit_no;
- } else {
-
- switch (lock_get_type_low(lock)) {
- case LOCK_TABLE:
- iter->bit_no = ULINT_UNDEFINED;
- break;
- case LOCK_REC:
- iter->bit_no = lock_rec_find_set_bit(lock);
- ut_a(iter->bit_no != ULINT_UNDEFINED);
- break;
- default:
- ut_error;
- }
- }
-}
-
-/*******************************************************************//**
-Gets the previous lock in the lock queue, returns NULL if there are no
-more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned).
-@return previous lock or NULL */
-UNIV_INTERN
-const lock_t*
-lock_queue_iterator_get_prev(
-/*=========================*/
- lock_queue_iterator_t* iter) /*!< in/out: iterator */
-{
- const lock_t* prev_lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- switch (lock_get_type_low(iter->current_lock)) {
- case LOCK_REC:
- prev_lock = lock_rec_get_prev(
- iter->current_lock, iter->bit_no);
- break;
- case LOCK_TABLE:
- prev_lock = UT_LIST_GET_PREV(
- un_member.tab_lock.locks, iter->current_lock);
- break;
- default:
- ut_error;
- }
-
- if (prev_lock != NULL) {
-
- iter->current_lock = prev_lock;
- }
-
- return(prev_lock);
-}
diff --git a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/lock0lock.c
deleted file mode 100644
index 736198dc346..00000000000
--- a/storage/innodb_plugin/lock/lock0lock.c
+++ /dev/null
@@ -1,5640 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file lock/lock0lock.c
-The transaction lock system
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#define LOCK_MODULE_IMPLEMENTATION
-
-#include "lock0lock.h"
-#include "lock0priv.h"
-
-#ifdef UNIV_NONINL
-#include "lock0lock.ic"
-#include "lock0priv.ic"
-#endif
-
-#include "ha_prototypes.h"
-#include "usr0sess.h"
-#include "trx0purge.h"
-#include "dict0mem.h"
-#include "trx0sys.h"
-
-/* Restricts the length of search we will do in the waits-for
-graph of transactions */
-#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
-
-/* Restricts the recursion depth of the search we will do in the waits-for
-graph of transactions */
-#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
-
-/* When releasing transaction locks, this specifies how often we release
-the kernel mutex for a moment to give also others access to it */
-
-#define LOCK_RELEASE_KERNEL_INTERVAL 1000
-
-/* Safety margin when creating a new record lock: this many extra records
-can be inserted to the page without need to create a lock with a bigger
-bitmap */
-
-#define LOCK_PAGE_BITMAP_MARGIN 64
-
-/* An explicit record lock affects both the record and the gap before it.
-An implicit x-lock does not affect the gap, it only locks the index
-record from read or update.
-
-If a transaction has modified or inserted an index record, then
-it owns an implicit x-lock on the record. On a secondary index record,
-a transaction has an implicit x-lock also if it has modified the
-clustered index record, the max trx id of the page where the secondary
-index record resides is >= trx id of the transaction (or database recovery
-is running), and there are no explicit non-gap lock requests on the
-secondary index record.
-
-This complicated definition for a secondary index comes from the
-implementation: we want to be able to determine if a secondary index
-record has an implicit x-lock, just by looking at the present clustered
-index record, not at the historical versions of the record. The
-complicated definition can be explained to the user so that there is
-nondeterminism in the access path when a query is answered: we may,
-or may not, access the clustered index record and thus may, or may not,
-bump into an x-lock set there.
-
-Different transaction can have conflicting locks set on the gap at the
-same time. The locks on the gap are purely inhibitive: an insert cannot
-be made, or a select cursor may have to wait if a different transaction
-has a conflicting lock on the gap. An x-lock on the gap does not give
-the right to insert into the gap.
-
-An explicit lock can be placed on a user record or the supremum record of
-a page. The locks on the supremum record are always thought to be of the gap
-type, though the gap bit is not set. When we perform an update of a record
-where the size of the record changes, we may temporarily store its explicit
-locks on the infimum record of the page, though the infimum otherwise never
-carries locks.
-
-A waiting record lock can also be of the gap type. A waiting lock request
-can be granted when there is no conflicting mode lock request by another
-transaction ahead of it in the explicit lock queue.
-
-In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
-It only locks the record it is placed on, not the gap before the record.
-This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
-level.
-
--------------------------------------------------------------------------
-RULE 1: If there is an implicit x-lock on a record, and there are non-gap
--------
-lock requests waiting in the queue, then the transaction holding the implicit
-x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
-released, we can grant locks to waiting lock requests purely by looking at
-the explicit lock requests in the queue.
-
-RULE 3: Different transactions cannot have conflicting granted non-gap locks
--------
-on a record at the same time. However, they can have conflicting granted gap
-locks.
-RULE 4: If a there is a waiting lock request in a queue, no lock request,
--------
-gap or not, can be inserted ahead of it in the queue. In record deletes
-and page splits new gap type locks can be created by the database manager
-for a transaction, and without rule 4, the waits-for graph of transactions
-might become cyclic without the database noticing it, as the deadlock check
-is only performed when a transaction itself requests a lock!
--------------------------------------------------------------------------
-
-An insert is allowed to a gap if there are no explicit lock requests by
-other transactions on the next record. It does not matter if these lock
-requests are granted or waiting, gap bit set or not, with the exception
-that a gap type request set by another transaction to wait for
-its turn to do an insert is ignored. On the other hand, an
-implicit x-lock by another transaction does not prevent an insert, which
-allows for more concurrency when using an Oracle-style sequence number
-generator for the primary key with many transactions doing inserts
-concurrently.
-
-A modify of a record is allowed if the transaction has an x-lock on the
-record, or if other transactions do not have any non-gap lock requests on the
-record.
-
-A read of a single user record with a cursor is allowed if the transaction
-has a non-gap explicit, or an implicit lock on the record, or if the other
-transactions have no x-lock requests on the record. At a page supremum a
-read is always allowed.
-
-In summary, an implicit lock is seen as a granted x-lock only on the
-record, not on the gap. An explicit lock with no gap bit set is a lock
-both on the record and the gap. If the gap bit is set, the lock is only
-on the gap. Different transaction cannot own conflicting locks on the
-record at the same time, but they may own conflicting locks on the gap.
-Granted locks on a record give an access right to the record, but gap type
-locks just inhibit operations.
-
-NOTE: Finding out if some transaction has an implicit x-lock on a secondary
-index record can be cumbersome. We may have to look at previous versions of
-the corresponding clustered index record to find out if a delete marked
-secondary index record was delete marked by an active transaction, not by
-a committed one.
-
-FACT A: If a transaction has inserted a row, it can delete it any time
-without need to wait for locks.
-
-PROOF: The transaction has an implicit x-lock on every index record inserted
-for the row, and can thus modify each record without the need to wait. Q.E.D.
-
-FACT B: If a transaction has read some result set with a cursor, it can read
-it again, and retrieves the same result set, if it has not modified the
-result set in the meantime. Hence, there is no phantom problem. If the
-biggest record, in the alphabetical order, touched by the cursor is removed,
-a lock wait may occur, otherwise not.
-
-PROOF: When a read cursor proceeds, it sets an s-lock on each user record
-it passes, and a gap type s-lock on each page supremum. The cursor must
-wait until it has these locks granted. Then no other transaction can
-have a granted x-lock on any of the user records, and therefore cannot
-modify the user records. Neither can any other transaction insert into
-the gaps which were passed over by the cursor. Page splits and merges,
-and removal of obsolete versions of records do not affect this, because
-when a user record or a page supremum is removed, the next record inherits
-its locks as gap type locks, and therefore blocks inserts to the same gap.
-Also, if a page supremum is inserted, it inherits its locks from the successor
-record. When the cursor is positioned again at the start of the result set,
-the records it will touch on its course are either records it touched
-during the last pass or new inserted page supremums. It can immediately
-access all these records, and when it arrives at the biggest record, it
-notices that the result set is complete. If the biggest record was removed,
-lock wait can occur because the next record only inherits a gap type lock,
-and a wait may be needed. Q.E.D. */
-
-/* If an index record should be changed or a new inserted, we must check
-the lock on the record or the next. When a read cursor starts reading,
-we will set a record level s-lock on each record it passes, except on the
-initial record on which the cursor is positioned before we start to fetch
-records. Our index tree search has the convention that the B-tree
-cursor is positioned BEFORE the first possibly matching record in
-the search. Optimizations are possible here: if the record is searched
-on an equality condition to a unique key, we could actually set a special
-lock on the record, a lock which would not prevent any insert before
-this record. In the next key locking an x-lock set on a record also
-prevents inserts just before that record.
- There are special infimum and supremum records on each page.
-A supremum record can be locked by a read cursor. This records cannot be
-updated but the lock prevents insert of a user record to the end of
-the page.
- Next key locks will prevent the phantom problem where new rows
-could appear to SELECT result sets after the select operation has been
-performed. Prevention of phantoms ensures the serilizability of
-transactions.
- What should we check if an insert of a new record is wanted?
-Only the lock on the next record on the same page, because also the
-supremum record can carry a lock. An s-lock prevents insertion, but
-what about an x-lock? If it was set by a searched update, then there
-is implicitly an s-lock, too, and the insert should be prevented.
-What if our transaction owns an x-lock to the next record, but there is
-a waiting s-lock request on the next record? If this s-lock was placed
-by a read cursor moving in the ascending order in the index, we cannot
-do the insert immediately, because when we finally commit our transaction,
-the read cursor should see also the new inserted record. So we should
-move the read cursor backward from the next record for it to pass over
-the new inserted record. This move backward may be too cumbersome to
-implement. If we in this situation just enqueue a second x-lock request
-for our transaction on the next record, then the deadlock mechanism
-notices a deadlock between our transaction and the s-lock request
-transaction. This seems to be an ok solution.
- We could have the convention that granted explicit record locks,
-lock the corresponding records from changing, and also lock the gaps
-before them from inserting. A waiting explicit lock request locks the gap
-before from inserting. Implicit record x-locks, which we derive from the
-transaction id in the clustered index record, only lock the record itself
-from modification, not the gap before it from inserting.
- How should we store update locks? If the search is done by a unique
-key, we could just modify the record trx id. Otherwise, we could put a record
-x-lock on the record. If the update changes ordering fields of the
-clustered index record, the inserted new record needs no record lock in
-lock table, the trx id is enough. The same holds for a secondary index
-record. Searched delete is similar to update.
-
-PROBLEM:
-What about waiting lock requests? If a transaction is waiting to make an
-update to a record which another modified, how does the other transaction
-know to send the end-lock-wait signal to the waiting transaction? If we have
-the convention that a transaction may wait for just one lock at a time, how
-do we preserve it if lock wait ends?
-
-PROBLEM:
-Checking the trx id label of a secondary index record. In the case of a
-modification, not an insert, is this necessary? A secondary index record
-is modified only by setting or resetting its deleted flag. A secondary index
-record contains fields to uniquely determine the corresponding clustered
-index record. A secondary index record is therefore only modified if we
-also modify the clustered index record, and the trx id checking is done
-on the clustered index record, before we come to modify the secondary index
-record. So, in the case of delete marking or unmarking a secondary index
-record, we do not have to care about trx ids, only the locks in the lock
-table must be checked. In the case of a select from a secondary index, the
-trx id is relevant, and in this case we may have to search the clustered
-index record.
-
-PROBLEM: How to update record locks when page is split or merged, or
---------------------------------------------------------------------
-a record is deleted or updated?
-If the size of fields in a record changes, we perform the update by
-a delete followed by an insert. How can we retain the locks set or
-waiting on the record? Because a record lock is indexed in the bitmap
-by the heap number of the record, when we remove the record from the
-record list, it is possible still to keep the lock bits. If the page
-is reorganized, we could make a table of old and new heap numbers,
-and permute the bitmaps in the locks accordingly. We can add to the
-table a row telling where the updated record ended. If the update does
-not require a reorganization of the page, we can simply move the lock
-bits for the updated record to the position determined by its new heap
-number (we may have to allocate a new lock, if we run out of the bitmap
-in the old one).
- A more complicated case is the one where the reinsertion of the
-updated record is done pessimistically, because the structure of the
-tree may change.
-
-PROBLEM: If a supremum record is removed in a page merge, or a record
----------------------------------------------------------------------
-removed in a purge, what to do to the waiting lock requests? In a split to
-the right, we just move the lock requests to the new supremum. If a record
-is removed, we could move the waiting lock request to its inheritor, the
-next record in the index. But, the next record may already have lock
-requests on its own queue. A new deadlock check should be made then. Maybe
-it is easier just to release the waiting transactions. They can then enqueue
-new lock requests on appropriate records.
-
-PROBLEM: When a record is inserted, what locks should it inherit from the
--------------------------------------------------------------------------
-upper neighbor? An insert of a new supremum record in a page split is
-always possible, but an insert of a new user record requires that the upper
-neighbor does not have any lock requests by other transactions, granted or
-waiting, in its lock queue. Solution: We can copy the locks as gap type
-locks, so that also the waiting locks are transformed to granted gap type
-locks on the inserted record. */
-
-/* LOCK COMPATIBILITY MATRIX
- * IS IX S X AI
- * IS + + + - +
- * IX + + - - +
- * S + - + - -
- * X - - - - -
- * AI + + - - -
- *
- * Note that for rows, InnoDB only acquires S or X locks.
- * For tables, InnoDB normally acquires IS or IX locks.
- * S or X table locks are only acquired for LOCK TABLES.
- * Auto-increment (AI) locks are needed because of
- * statement-level MySQL binlog.
- * See also lock_mode_compatible().
- */
-#define LK(a,b) (1 << ((a) * LOCK_NUM + (b)))
-#define LKS(a,b) LK(a,b) | LK(b,a)
-
-/* Define the lock compatibility matrix in a ulint. The first line below
-defines the diagonal entries. The following lines define the compatibility
-for LOCK_IX, LOCK_S, and LOCK_AUTO_INC using LKS(), since the matrix
-is symmetric. */
-#define LOCK_MODE_COMPATIBILITY 0 \
- | LK(LOCK_IS, LOCK_IS) | LK(LOCK_IX, LOCK_IX) | LK(LOCK_S, LOCK_S) \
- | LKS(LOCK_IX, LOCK_IS) | LKS(LOCK_IS, LOCK_AUTO_INC) \
- | LKS(LOCK_S, LOCK_IS) \
- | LKS(LOCK_AUTO_INC, LOCK_IS) | LKS(LOCK_AUTO_INC, LOCK_IX)
-
-/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
- * IS IX S X AI
- * IS + - - - -
- * IX + + - - -
- * S + - + - -
- * X + + + + +
- * AI - - - - +
- * See lock_mode_stronger_or_eq().
- */
-
-/* Define the stronger-or-equal lock relation in a ulint. This relation
-contains all pairs LK(mode1, mode2) where mode1 is stronger than or
-equal to mode2. */
-#define LOCK_MODE_STRONGER_OR_EQ 0 \
- | LK(LOCK_IS, LOCK_IS) \
- | LK(LOCK_IX, LOCK_IS) | LK(LOCK_IX, LOCK_IX) \
- | LK(LOCK_S, LOCK_IS) | LK(LOCK_S, LOCK_S) \
- | LK(LOCK_AUTO_INC, LOCK_AUTO_INC) \
- | LK(LOCK_X, LOCK_IS) | LK(LOCK_X, LOCK_IX) | LK(LOCK_X, LOCK_S) \
- | LK(LOCK_X, LOCK_AUTO_INC) | LK(LOCK_X, LOCK_X)
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool lock_print_waits = FALSE;
-
-/*********************************************************************//**
-Validates the lock system.
-@return TRUE if ok */
-static
-ibool
-lock_validate(void);
-/*===============*/
-
-/*********************************************************************//**
-Validates the record lock queues on a page.
-@return TRUE if ok */
-static
-ibool
-lock_rec_validate_page(
-/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no);/*!< in: page number */
-#endif /* UNIV_DEBUG */
-
-/* The lock system */
-UNIV_INTERN lock_sys_t* lock_sys = NULL;
-
-/* We store info on the latest deadlock error to this buffer. InnoDB
-Monitor will then fetch it and print */
-UNIV_INTERN ibool lock_deadlock_found = FALSE;
-UNIV_INTERN FILE* lock_latest_err_file;
-
-/* Flags for recursive deadlock search */
-#define LOCK_VICTIM_IS_START 1
-#define LOCK_VICTIM_IS_OTHER 2
-
-/********************************************************************//**
-Checks if a lock request results in a deadlock.
-@return TRUE if a deadlock was detected and we chose trx as a victim;
-FALSE if no deadlock, or there was a deadlock, but we chose other
-transaction(s) as victim(s) */
-static
-ibool
-lock_deadlock_occurs(
-/*=================*/
- lock_t* lock, /*!< in: lock the transaction is requesting */
- trx_t* trx); /*!< in: transaction */
-/********************************************************************//**
-Looks recursively for a deadlock.
-@return 0 if no deadlock found, LOCK_VICTIM_IS_START if there was a
-deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
-deadlock was found and we chose some other trx as a victim: we must do
-the search again in this last case because there may be another
-deadlock! */
-static
-ulint
-lock_deadlock_recursive(
-/*====================*/
- trx_t* start, /*!< in: recursion starting point */
- trx_t* trx, /*!< in: a transaction waiting for a lock */
- lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
- ulint* cost, /*!< in/out: number of calculation steps thus
- far: if this exceeds LOCK_MAX_N_STEPS_...
- we return LOCK_VICTIM_IS_START */
- ulint depth); /*!< in: recursion depth: if this exceeds
- LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
- return LOCK_VICTIM_IS_START */
-
-/*********************************************************************//**
-Gets the nth bit of a record lock.
-@return TRUE if bit set */
-UNIV_INLINE
-ibool
-lock_rec_get_nth_bit(
-/*=================*/
- const lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit */
-{
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- if (i >= lock->un_member.rec_lock.n_bits) {
-
- return(FALSE);
- }
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- return(1 & ((const byte*) &lock[1])[byte_index] >> bit_index);
-}
-
-/*************************************************************************/
-
-#define lock_mutex_enter_kernel() mutex_enter(&kernel_mutex)
-#define lock_mutex_exit_kernel() mutex_exit(&kernel_mutex)
-
-/*********************************************************************//**
-Checks that a transaction id is sensible, i.e., not in the future.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-lock_check_trx_id_sanity(
-/*=====================*/
- trx_id_t trx_id, /*!< in: trx id */
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- ibool has_kernel_mutex)/*!< in: TRUE if the caller owns the
- kernel mutex */
-{
- ibool is_ok = TRUE;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!has_kernel_mutex) {
- mutex_enter(&kernel_mutex);
- }
-
- /* A sanity check: the trx_id in rec must be smaller than the global
- trx id counter */
-
- if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: transaction id associated"
- " with record\n",
- stderr);
- rec_print_new(stderr, rec, offsets);
- fputs("InnoDB: in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, "\n"
- "InnoDB: is " TRX_ID_FMT " which is higher than the"
- " global trx id counter " TRX_ID_FMT "!\n"
- "InnoDB: The table is corrupt. You have to do"
- " dump + drop + reimport.\n",
- TRX_ID_PREP_PRINTF(trx_id),
- TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
-
- is_ok = FALSE;
- }
-
- if (!has_kernel_mutex) {
- mutex_exit(&kernel_mutex);
- }
-
- return(is_ok);
-}
-
-/*********************************************************************//**
-Checks that a record is seen in a consistent read.
-@return TRUE if sees, or FALSE if an earlier version of the record
-should be retrieved */
-UNIV_INTERN
-ibool
-lock_clust_rec_cons_read_sees(
-/*==========================*/
- const rec_t* rec, /*!< in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- read_view_t* view) /*!< in: consistent read view */
-{
- trx_id_t trx_id;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- /* NOTE that we call this function while holding the search
- system latch. To obey the latching order we must NOT reserve the
- kernel mutex here! */
-
- trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- return(read_view_sees_trx_id(view, trx_id));
-}
-
-/*********************************************************************//**
-Checks that a non-clustered index record is seen in a consistent read.
-
-NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case FALSE, the present version of
-rec may be the right, but we must check this from the clustered index
-record.
-
-@return TRUE if certainly sees, or FALSE if an earlier version of the
-clustered index record might be needed */
-UNIV_INTERN
-ulint
-lock_sec_rec_cons_read_sees(
-/*========================*/
- const rec_t* rec, /*!< in: user record which
- should be read or passed over
- by a read cursor */
- const read_view_t* view) /*!< in: consistent read view */
-{
- trx_id_t max_trx_id;
-
- ut_ad(page_rec_is_user_rec(rec));
-
- /* NOTE that we might call this function while holding the search
- system latch. To obey the latching order we must NOT reserve the
- kernel mutex here! */
-
- if (recv_recovery_is_on()) {
-
- return(FALSE);
- }
-
- max_trx_id = page_get_max_trx_id(page_align(rec));
- ut_ad(!ut_dulint_is_zero(max_trx_id));
-
- return(ut_dulint_cmp(max_trx_id, view->up_limit_id) < 0);
-}
-
-/*********************************************************************//**
-Creates the lock system at database start. */
-UNIV_INTERN
-void
-lock_sys_create(
-/*============*/
- ulint n_cells) /*!< in: number of slots in lock hash table */
-{
- lock_sys = mem_alloc(sizeof(lock_sys_t));
-
- lock_sys->rec_hash = hash_create(n_cells);
-
- /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */
-
- lock_latest_err_file = os_file_create_tmpfile();
- ut_a(lock_latest_err_file);
-}
-
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-UNIV_INTERN
-void
-lock_sys_close(void)
-/*================*/
-{
- if (lock_latest_err_file != NULL) {
- fclose(lock_latest_err_file);
- lock_latest_err_file = NULL;
- }
-
- hash_table_free(lock_sys->rec_hash);
- mem_free(lock_sys);
- lock_sys = NULL;
-}
-
-/*********************************************************************//**
-Gets the size of a lock struct.
-@return size in bytes */
-UNIV_INTERN
-ulint
-lock_get_size(void)
-/*===============*/
-{
- return((ulint)sizeof(lock_t));
-}
-
-/*********************************************************************//**
-Gets the mode of a lock.
-@return mode */
-UNIV_INLINE
-enum lock_mode
-lock_get_mode(
-/*==========*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock);
-
- return(lock->type_mode & LOCK_MODE_MASK);
-}
-
-/*********************************************************************//**
-Gets the wait flag of a lock.
-@return TRUE if waiting */
-UNIV_INLINE
-ibool
-lock_get_wait(
-/*==========*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock);
-
- if (UNIV_UNLIKELY(lock->type_mode & LOCK_WAIT)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock.
-@return the source table of transaction, if it is covered by an IX or
-IS table lock; dest if there is no source table, and NULL if the
-transaction is locking more than two tables or an inconsistency is
-found */
-UNIV_INTERN
-dict_table_t*
-lock_get_src_table(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* dest, /*!< in: destination of ALTER TABLE */
- enum lock_mode* mode) /*!< out: lock mode of the source table */
-{
- dict_table_t* src;
- lock_t* lock;
-
- src = NULL;
- *mode = LOCK_NONE;
-
- for (lock = UT_LIST_GET_FIRST(trx->trx_locks);
- lock;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
- lock_table_t* tab_lock;
- enum lock_mode lock_mode;
- if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
- /* We are only interested in table locks. */
- continue;
- }
- tab_lock = &lock->un_member.tab_lock;
- if (dest == tab_lock->table) {
- /* We are not interested in the destination table. */
- continue;
- } else if (!src) {
- /* This presumably is the source table. */
- src = tab_lock->table;
- if (UT_LIST_GET_LEN(src->locks) != 1
- || UT_LIST_GET_FIRST(src->locks) != lock) {
- /* We only support the case when
- there is only one lock on this table. */
- return(NULL);
- }
- } else if (src != tab_lock->table) {
- /* The transaction is locking more than
- two tables (src and dest): abort */
- return(NULL);
- }
-
- /* Check that the source table is locked by
- LOCK_IX or LOCK_IS. */
- lock_mode = lock_get_mode(lock);
- if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
- if (*mode != LOCK_NONE && *mode != lock_mode) {
- /* There are multiple locks on src. */
- return(NULL);
- }
- *mode = lock_mode;
- }
- }
-
- if (!src) {
- /* No source table lock found: flag the situation to caller */
- src = dest;
- }
-
- return(src);
-}
-
-/*********************************************************************//**
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table.
-@return TRUE if table is only locked by trx, with LOCK_IX, and
-possibly LOCK_AUTO_INC */
-UNIV_INTERN
-ibool
-lock_is_table_exclusive(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- trx_t* trx) /*!< in: transaction */
-{
- const lock_t* lock;
- ibool ok = FALSE;
-
- ut_ad(table);
- ut_ad(trx);
-
- lock_mutex_enter_kernel();
-
- for (lock = UT_LIST_GET_FIRST(table->locks);
- lock;
- lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
- if (lock->trx != trx) {
- /* A lock on the table is held
- by some other transaction. */
- goto not_ok;
- }
-
- if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
- /* We are interested in table locks only. */
- continue;
- }
-
- switch (lock_get_mode(lock)) {
- case LOCK_IX:
- ok = TRUE;
- break;
- case LOCK_AUTO_INC:
- /* It is allowed for trx to hold an
- auto_increment lock. */
- break;
- default:
-not_ok:
- /* Other table locks than LOCK_IX are not allowed. */
- ok = FALSE;
- goto func_exit;
- }
- }
-
-func_exit:
- lock_mutex_exit_kernel();
-
- return(ok);
-}
-
-/*********************************************************************//**
-Sets the wait flag of a lock and the back pointer in trx to lock. */
-UNIV_INLINE
-void
-lock_set_lock_and_trx_wait(
-/*=======================*/
- lock_t* lock, /*!< in: lock */
- trx_t* trx) /*!< in: trx */
-{
- ut_ad(lock);
- ut_ad(trx->wait_lock == NULL);
-
- trx->wait_lock = lock;
- lock->type_mode |= LOCK_WAIT;
-}
-
-/**********************************************************************//**
-The back pointer to a waiting lock request in the transaction is set to NULL
-and the wait bit in lock type_mode is reset. */
-UNIV_INLINE
-void
-lock_reset_lock_and_trx_wait(
-/*=========================*/
- lock_t* lock) /*!< in: record lock */
-{
- ut_ad((lock->trx)->wait_lock == lock);
- ut_ad(lock_get_wait(lock));
-
- /* Reset the back pointer in trx to this waiting lock request */
-
- (lock->trx)->wait_lock = NULL;
- lock->type_mode &= ~LOCK_WAIT;
-}
-
-/*********************************************************************//**
-Gets the gap flag of a record lock.
-@return TRUE if gap flag set */
-UNIV_INLINE
-ibool
-lock_rec_get_gap(
-/*=============*/
- const lock_t* lock) /*!< in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- if (lock->type_mode & LOCK_GAP) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Gets the LOCK_REC_NOT_GAP flag of a record lock.
-@return TRUE if LOCK_REC_NOT_GAP flag set */
-UNIV_INLINE
-ibool
-lock_rec_get_rec_not_gap(
-/*=====================*/
- const lock_t* lock) /*!< in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- if (lock->type_mode & LOCK_REC_NOT_GAP) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Gets the waiting insert flag of a record lock.
-@return TRUE if gap flag set */
-UNIV_INLINE
-ibool
-lock_rec_get_insert_intention(
-/*==========================*/
- const lock_t* lock) /*!< in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- if (lock->type_mode & LOCK_INSERT_INTENTION) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Calculates if lock mode 1 is stronger or equal to lock mode 2.
-@return nonzero if mode1 stronger or equal to mode2 */
-UNIV_INLINE
-ulint
-lock_mode_stronger_or_eq(
-/*=====================*/
- enum lock_mode mode1, /*!< in: lock mode */
- enum lock_mode mode2) /*!< in: lock mode */
-{
- ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
- || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
- ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
- || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
-
- return((LOCK_MODE_STRONGER_OR_EQ) & LK(mode1, mode2));
-}
-
-/*********************************************************************//**
-Calculates if lock mode 1 is compatible with lock mode 2.
-@return nonzero if mode1 compatible with mode2 */
-UNIV_INLINE
-ulint
-lock_mode_compatible(
-/*=================*/
- enum lock_mode mode1, /*!< in: lock mode */
- enum lock_mode mode2) /*!< in: lock mode */
-{
- ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
- || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
- ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
- || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
-
- return((LOCK_MODE_COMPATIBILITY) & LK(mode1, mode2));
-}
-
-/*********************************************************************//**
-Checks if a lock request for a new lock has to wait for request lock2.
-@return TRUE if new lock has to wait for lock2 to be removed */
-UNIV_INLINE
-ibool
-lock_rec_has_to_wait(
-/*=================*/
- const trx_t* trx, /*!< in: trx of new lock */
- ulint type_mode,/*!< in: precise mode of the new lock
- to set: LOCK_S or LOCK_X, possibly
- ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
- LOCK_INSERT_INTENTION */
- const lock_t* lock2, /*!< in: another record lock; NOTE that
- it is assumed that this has a lock bit
- set on the same record as in the new
- lock we are setting */
- ibool lock_is_on_supremum) /*!< in: TRUE if we are setting the
- lock on the 'supremum' record of an
- index page: we know then that the lock
- request is really for a 'gap' type lock */
-{
- ut_ad(trx && lock2);
- ut_ad(lock_get_type_low(lock2) == LOCK_REC);
-
- if (trx != lock2->trx
- && !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
- lock_get_mode(lock2))) {
-
- /* We have somewhat complex rules when gap type record locks
- cause waits */
-
- if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
- && !(type_mode & LOCK_INSERT_INTENTION)) {
-
- /* Gap type locks without LOCK_INSERT_INTENTION flag
- do not need to wait for anything. This is because
- different users can have conflicting lock types
- on gaps. */
-
- return(FALSE);
- }
-
- if (!(type_mode & LOCK_INSERT_INTENTION)
- && lock_rec_get_gap(lock2)) {
-
- /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
- does not need to wait for a gap type lock */
-
- return(FALSE);
- }
-
- if ((type_mode & LOCK_GAP)
- && lock_rec_get_rec_not_gap(lock2)) {
-
- /* Lock on gap does not need to wait for
- a LOCK_REC_NOT_GAP type lock */
-
- return(FALSE);
- }
-
- if (lock_rec_get_insert_intention(lock2)) {
-
- /* No lock request needs to wait for an insert
- intention lock to be removed. This is ok since our
- rules allow conflicting locks on gaps. This eliminates
- a spurious deadlock caused by a next-key lock waiting
- for an insert intention lock; when the insert
- intention lock was granted, the insert deadlocked on
- the waiting next-key lock.
-
- Also, insert intention locks do not disturb each
- other. */
-
- return(FALSE);
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
-ibool
-lock_has_to_wait(
-/*=============*/
- const lock_t* lock1, /*!< in: waiting lock */
- const lock_t* lock2) /*!< in: another lock; NOTE that it is
- assumed that this has a lock bit set
- on the same record as in lock1 if the
- locks are record locks */
-{
- ut_ad(lock1 && lock2);
-
- if (lock1->trx != lock2->trx
- && !lock_mode_compatible(lock_get_mode(lock1),
- lock_get_mode(lock2))) {
- if (lock_get_type_low(lock1) == LOCK_REC) {
- ut_ad(lock_get_type_low(lock2) == LOCK_REC);
-
- /* If this lock request is for a supremum record
- then the second bit on the lock bitmap is set */
-
- return(lock_rec_has_to_wait(lock1->trx,
- lock1->type_mode, lock2,
- lock_rec_get_nth_bit(
- lock1, 1)));
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
-
-/*********************************************************************//**
-Gets the number of bits in a record lock bitmap.
-@return number of bits */
-UNIV_INLINE
-ulint
-lock_rec_get_n_bits(
-/*================*/
- const lock_t* lock) /*!< in: record lock */
-{
- return(lock->un_member.rec_lock.n_bits);
-}
-
-/**********************************************************************//**
-Sets the nth bit of a record lock to TRUE. */
-UNIV_INLINE
-void
-lock_rec_set_nth_bit(
-/*=================*/
- lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit */
-{
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- ut_ad(i < lock->un_member.rec_lock.n_bits);
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- ((byte*) &lock[1])[byte_index] |= 1 << bit_index;
-}
-
-/**********************************************************************//**
-Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found.
-@return bit index == heap number of the record, or ULINT_UNDEFINED if
-none found */
-UNIV_INTERN
-ulint
-lock_rec_find_set_bit(
-/*==================*/
- const lock_t* lock) /*!< in: record lock with at least one bit set */
-{
- ulint i;
-
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
-
- if (lock_rec_get_nth_bit(lock, i)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Resets the nth bit of a record lock. */
-UNIV_INLINE
-void
-lock_rec_reset_nth_bit(
-/*===================*/
- lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit which must be set to TRUE
- when this function is called */
-{
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- ut_ad(i < lock->un_member.rec_lock.n_bits);
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- ((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
-}
-
-/*********************************************************************//**
-Gets the first or next record lock on a page.
-@return next lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next_on_page(
-/*======================*/
- lock_t* lock) /*!< in: a record lock */
-{
- ulint space;
- ulint page_no;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- for (;;) {
- lock = HASH_GET_NEXT(hash, lock);
-
- if (!lock) {
-
- break;
- }
-
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the first record lock on a page, where the page is identified by its
-file address.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page_addr(
-/*============================*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = HASH_GET_FIRST(lock_sys->rec_hash,
- lock_rec_hash(space, page_no));
- while (lock) {
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
-
- lock = HASH_GET_NEXT(hash, lock);
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Returns TRUE if there are explicit record locks on a page.
-@return TRUE if there are explicit record locks on the page */
-UNIV_INTERN
-ibool
-lock_rec_expl_exist_on_page(
-/*========================*/
- ulint space, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- ibool ret;
-
- mutex_enter(&kernel_mutex);
-
- if (lock_rec_get_first_on_page_addr(space, page_no)) {
- ret = TRUE;
- } else {
- ret = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Gets the first record lock on a page, where the page is identified by a
-pointer to it.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page(
-/*=======================*/
- const buf_block_t* block) /*!< in: buffer block */
-{
- ulint hash;
- lock_t* lock;
- ulint space = buf_block_get_space(block);
- ulint page_no = buf_block_get_page_no(block);
-
- ut_ad(mutex_own(&kernel_mutex));
-
- hash = buf_block_get_lock_hash_val(block);
-
- lock = HASH_GET_FIRST(lock_sys->rec_hash, hash);
-
- while (lock) {
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
-
- lock = HASH_GET_NEXT(hash, lock);
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the next explicit lock request on a record.
-@return next lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next(
-/*==============*/
- ulint heap_no,/*!< in: heap number of the record */
- lock_t* lock) /*!< in: lock */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- do {
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- lock = lock_rec_get_next_on_page(lock);
- } while (lock && !lock_rec_get_nth_bit(lock, heap_no));
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the first explicit lock request on a record.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first(
-/*===============*/
- const buf_block_t* block, /*!< in: block containing the record */
- ulint heap_no)/*!< in: heap number of the record */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- for (lock = lock_rec_get_first_on_page(block); lock;
- lock = lock_rec_get_next_on_page(lock)) {
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
-pointer in the transaction! This function is used in lock object creation
-and resetting. */
-static
-void
-lock_rec_bitmap_reset(
-/*==================*/
- lock_t* lock) /*!< in: record lock */
-{
- ulint n_bytes;
-
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- /* Reset to zero the bitmap which resides immediately after the lock
- struct */
-
- n_bytes = lock_rec_get_n_bits(lock) / 8;
-
- ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
-
- memset(&lock[1], 0, n_bytes);
-}
-
-/*********************************************************************//**
-Copies a record lock to heap.
-@return copy of lock */
-static
-lock_t*
-lock_rec_copy(
-/*==========*/
- const lock_t* lock, /*!< in: record lock */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint size;
-
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
-
- return(mem_heap_dup(heap, lock, size));
-}
-
-/*********************************************************************//**
-Gets the previous record lock set on a record.
-@return previous lock on the same record, NULL if none exists */
-UNIV_INTERN
-const lock_t*
-lock_rec_get_prev(
-/*==============*/
- const lock_t* in_lock,/*!< in: record lock */
- ulint heap_no)/*!< in: heap number of the record */
-{
- lock_t* lock;
- ulint space;
- ulint page_no;
- lock_t* found_lock = NULL;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- for (;;) {
- ut_ad(lock);
-
- if (lock == in_lock) {
-
- return(found_lock);
- }
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
-
- found_lock = lock;
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-}
-
-/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
-
-/*********************************************************************//**
-Checks if a transaction has the specified table lock, or stronger.
-@return lock or NULL */
-UNIV_INLINE
-lock_t*
-lock_table_has(
-/*===========*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: table */
- enum lock_mode mode) /*!< in: lock mode */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /* Look for stronger locks the same trx already has on the table */
-
- lock = UT_LIST_GET_LAST(table->locks);
-
- while (lock != NULL) {
-
- if (lock->trx == trx
- && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
-
- /* The same trx already has locked the table in
- a mode stronger or equal to the mode given */
-
- ut_ad(!lock_get_wait(lock));
-
- return(lock);
- }
-
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
- }
-
- return(NULL);
-}
-
-/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
-
-/*********************************************************************//**
-Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
-to precise_mode.
-@return lock or NULL */
-UNIV_INLINE
-lock_t*
-lock_rec_has_expl(
-/*==============*/
- ulint precise_mode,/*!< in: LOCK_S or LOCK_X
- possibly ORed to LOCK_GAP or
- LOCK_REC_NOT_GAP, for a
- supremum record we regard this
- always a gap type request */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- trx_t* trx) /*!< in: transaction */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
- || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
- ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
-
- lock = lock_rec_get_first(block, heap_no);
-
- while (lock) {
- if (lock->trx == trx
- && lock_mode_stronger_or_eq(lock_get_mode(lock),
- precise_mode & LOCK_MODE_MASK)
- && !lock_get_wait(lock)
- && (!lock_rec_get_rec_not_gap(lock)
- || (precise_mode & LOCK_REC_NOT_GAP)
- || heap_no == PAGE_HEAP_NO_SUPREMUM)
- && (!lock_rec_get_gap(lock)
- || (precise_mode & LOCK_GAP)
- || heap_no == PAGE_HEAP_NO_SUPREMUM)
- && (!lock_rec_get_insert_intention(lock))) {
-
- return(lock);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-
- return(NULL);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Checks if some other transaction has a lock request in the queue.
-@return lock or NULL */
-static
-lock_t*
-lock_rec_other_has_expl_req(
-/*========================*/
- enum lock_mode mode, /*!< in: LOCK_S or LOCK_X */
- ulint gap, /*!< in: LOCK_GAP if also gap
- locks are taken into account,
- or 0 if not */
- ulint wait, /*!< in: LOCK_WAIT if also
- waiting locks are taken into
- account, or 0 if not */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- const trx_t* trx) /*!< in: transaction, or NULL if
- requests by all transactions
- are taken into account */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(mode == LOCK_X || mode == LOCK_S);
- ut_ad(gap == 0 || gap == LOCK_GAP);
- ut_ad(wait == 0 || wait == LOCK_WAIT);
-
- lock = lock_rec_get_first(block, heap_no);
-
- while (lock) {
- if (lock->trx != trx
- && (gap
- || !(lock_rec_get_gap(lock)
- || heap_no == PAGE_HEAP_NO_SUPREMUM))
- && (wait || !lock_get_wait(lock))
- && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
-
- return(lock);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-
- return(NULL);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Checks if some other transaction has a conflicting explicit lock request
-in the queue, so that we have to wait.
-@return lock or NULL */
-static
-lock_t*
-lock_rec_other_has_conflicting(
-/*===========================*/
- enum lock_mode mode, /*!< in: LOCK_S or LOCK_X,
- possibly ORed to LOCK_GAP or
- LOC_REC_NOT_GAP,
- LOCK_INSERT_INTENTION */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- trx_t* trx) /*!< in: our transaction */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first(block, heap_no);
-
- if (UNIV_LIKELY_NULL(lock)) {
- if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
-
- do {
- if (lock_rec_has_to_wait(trx, mode, lock,
- TRUE)) {
- return(lock);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- } while (lock);
- } else {
-
- do {
- if (lock_rec_has_to_wait(trx, mode, lock,
- FALSE)) {
- return(lock);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- } while (lock);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Looks for a suitable type record lock struct by the same trx on the same page.
-This can be used to save space when a new record lock should be set on a page:
-no new struct is needed, if a suitable old is found.
-@return lock or NULL */
-UNIV_INLINE
-lock_t*
-lock_rec_find_similar_on_page(
-/*==========================*/
- ulint type_mode, /*!< in: lock type_mode field */
- ulint heap_no, /*!< in: heap number of the record */
- lock_t* lock, /*!< in: lock_rec_get_first_on_page() */
- const trx_t* trx) /*!< in: transaction */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- while (lock != NULL) {
- if (lock->trx == trx
- && lock->type_mode == type_mode
- && lock_rec_get_n_bits(lock) > heap_no) {
-
- return(lock);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Checks if some transaction has an implicit x-lock on a record in a secondary
-index.
-@return transaction which has the x-lock, or NULL */
-static
-trx_t*
-lock_sec_rec_some_has_impl_off_kernel(
-/*==================================*/
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- const page_t* page = page_align(rec);
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(!dict_index_is_clust(index));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- /* Some transaction may have an implicit x-lock on the record only
- if the max trx id for the page >= min trx id for the trx list, or
- database recovery is running. We do not write the changes of a page
- max trx id to the log, and therefore during recovery, this value
- for a page may be incorrect. */
-
- if (!(ut_dulint_cmp(page_get_max_trx_id(page),
- trx_list_get_min_trx_id()) >= 0)
- && !recv_recovery_is_on()) {
-
- return(NULL);
- }
-
- /* Ok, in this case it is possible that some transaction has an
- implicit x-lock. We have to look in the clustered index. */
-
- if (!lock_check_trx_id_sanity(page_get_max_trx_id(page),
- rec, index, offsets, TRUE)) {
- buf_page_print(page, 0);
-
- /* The page is corrupt: try to avoid a crash by returning
- NULL */
- return(NULL);
- }
-
- return(row_vers_impl_x_locked_off_kernel(rec, index, offsets));
-}
-
-/*********************************************************************//**
-Return approximate number or record locks (bits set in the bitmap) for
-this transaction. Since delete-marked records may be removed, the
-record count will not be precise. */
-UNIV_INTERN
-ulint
-lock_number_of_rows_locked(
-/*=======================*/
- trx_t* trx) /*!< in: transaction */
-{
- lock_t* lock;
- ulint n_records = 0;
- ulint n_bits;
- ulint n_bit;
-
- lock = UT_LIST_GET_FIRST(trx->trx_locks);
-
- while (lock) {
- if (lock_get_type_low(lock) == LOCK_REC) {
- n_bits = lock_rec_get_n_bits(lock);
-
- for (n_bit = 0; n_bit < n_bits; n_bit++) {
- if (lock_rec_get_nth_bit(lock, n_bit)) {
- n_records++;
- }
- }
- }
-
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- }
-
- return (n_records);
-}
-
-/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
-
-/*********************************************************************//**
-Creates a new record lock and inserts it to the lock queue. Does NOT check
-for deadlocks or lock compatibility!
-@return created lock */
-static
-lock_t*
-lock_rec_create(
-/*============*/
- ulint type_mode,/*!< in: lock mode and wait
- flag, type is ignored and
- replaced by LOCK_REC */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- dict_index_t* index, /*!< in: index of record */
- trx_t* trx) /*!< in: transaction */
-{
- lock_t* lock;
- ulint page_no;
- ulint space;
- ulint n_bits;
- ulint n_bytes;
- const page_t* page;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- page = block->frame;
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- /* If rec is the supremum record, then we reset the gap and
- LOCK_REC_NOT_GAP bits, as all locks on the supremum are
- automatically of the gap type */
-
- if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
- ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
-
- type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
- }
-
- /* Make lock bitmap bigger by a safety margin */
- n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
- n_bytes = 1 + n_bits / 8;
-
- lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes);
-
- UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
-
- lock->trx = trx;
-
- lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
- lock->index = index;
-
- lock->un_member.rec_lock.space = space;
- lock->un_member.rec_lock.page_no = page_no;
- lock->un_member.rec_lock.n_bits = n_bytes * 8;
-
- /* Reset to zero the bitmap which resides immediately after the
- lock struct */
-
- lock_rec_bitmap_reset(lock);
-
- /* Set the bit corresponding to rec */
- lock_rec_set_nth_bit(lock, heap_no);
-
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), lock);
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
-
- lock_set_lock_and_trx_wait(lock, trx);
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Enqueues a waiting request for a lock which cannot be granted immediately.
-Checks for deadlocks.
-@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
-DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
-transaction was chosen as a victim, and we got the lock immediately:
-no need to wait then */
-static
-ulint
-lock_rec_enqueue_waiting(
-/*=====================*/
- ulint type_mode,/*!< in: lock mode this
- transaction is requesting:
- LOCK_S or LOCK_X, possibly
- ORed with LOCK_GAP or
- LOCK_REC_NOT_GAP, ORed with
- LOCK_INSERT_INTENTION if this
- waiting lock request is set
- when performing an insert of
- an index record */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
-{
- lock_t* lock;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /* Test if there already is some other reason to suspend thread:
- we do not enqueue a lock request if the query thread should be
- stopped anyway */
-
- if (UNIV_UNLIKELY(que_thr_stop(thr))) {
-
- ut_error;
-
- return(DB_QUE_THR_SUSPENDED);
- }
-
- trx = thr_get_trx(thr);
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- break;
- case TRX_DICT_OP_TABLE:
- case TRX_DICT_OP_INDEX:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: a record lock wait happens"
- " in a dictionary operation!\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs(".\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- stderr);
- }
-
- /* Enqueue the lock request that will wait to be granted */
- lock = lock_rec_create(type_mode | LOCK_WAIT,
- block, heap_no, index, trx);
-
- /* Check if a deadlock occurs: if yes, remove the lock request and
- return an error code */
-
- if (UNIV_UNLIKELY(lock_deadlock_occurs(lock, trx))) {
-
- lock_reset_lock_and_trx_wait(lock);
- lock_rec_reset_nth_bit(lock, heap_no);
-
- return(DB_DEADLOCK);
- }
-
- /* If there was a deadlock but we chose another transaction as a
- victim, it is possible that we already have the lock now granted! */
-
- if (trx->wait_lock == NULL) {
-
- return(DB_SUCCESS);
- }
-
- trx->que_state = TRX_QUE_LOCK_WAIT;
- trx->was_chosen_as_deadlock_victim = FALSE;
- trx->wait_started = time(NULL);
-
- ut_a(que_thr_stop(thr));
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Lock wait for trx %lu in index ",
- (ulong) ut_dulint_get_low(trx->id));
- ut_print_name(stderr, trx, FALSE, index->name);
- }
-#endif /* UNIV_DEBUG */
-
- return(DB_LOCK_WAIT);
-}
-
-/*********************************************************************//**
-Adds a record lock request in the record queue. The request is normally
-added as the last in the queue, but if there are no waiting lock requests
-on the record, and the request to be added is not a waiting request, we
-can reuse a suitable record lock object already existing on the same page,
-just setting the appropriate bit in its bitmap. This is a low-level function
-which does NOT check for deadlocks or lock compatibility!
-@return lock where the bit was set */
-static
-lock_t*
-lock_rec_add_to_queue(
-/*==================*/
- ulint type_mode,/*!< in: lock mode, wait, gap
- etc. flags; type is ignored
- and replaced by LOCK_REC */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- dict_index_t* index, /*!< in: index of record */
- trx_t* trx) /*!< in: transaction */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-#ifdef UNIV_DEBUG
- switch (type_mode & LOCK_MODE_MASK) {
- case LOCK_X:
- case LOCK_S:
- break;
- default:
- ut_error;
- }
-
- if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
- enum lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
- ? LOCK_X
- : LOCK_S;
- lock_t* other_lock
- = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
- block, heap_no, trx);
- ut_a(!other_lock);
- }
-#endif /* UNIV_DEBUG */
-
- type_mode |= LOCK_REC;
-
- /* If rec is the supremum record, then we can reset the gap bit, as
- all locks on the supremum are automatically of the gap type, and we
- try to avoid unnecessary memory consumption of a new record lock
- struct for a gap type lock */
-
- if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
- ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
-
- /* There should never be LOCK_REC_NOT_GAP on a supremum
- record, but let us play safe */
-
- type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
- }
-
- /* Look for a waiting lock request on the same record or on a gap */
-
- lock = lock_rec_get_first_on_page(block);
-
- while (lock != NULL) {
- if (lock_get_wait(lock)
- && (lock_rec_get_nth_bit(lock, heap_no))) {
-
- goto somebody_waits;
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
-
- /* Look for a similar record lock on the same page:
- if one is found and there are no waiting lock requests,
- we can just set the bit */
-
- lock = lock_rec_find_similar_on_page(
- type_mode, heap_no,
- lock_rec_get_first_on_page(block), trx);
-
- if (lock) {
-
- lock_rec_set_nth_bit(lock, heap_no);
-
- return(lock);
- }
- }
-
-somebody_waits:
- return(lock_rec_create(type_mode, block, heap_no, index, trx));
-}
-
-/*********************************************************************//**
-This is a fast routine for locking a record in the most common cases:
-there are no explicit locks on the page, or there is just one lock, owned
-by this transaction, and of the right type_mode. This is a low-level function
-which does NOT look at implicit locks! Checks lock compatibility within
-explicit locks. This function sets a normal next-key lock, or in the case of
-a page supremum record, a gap type lock.
-@return TRUE if locking succeeded */
-UNIV_INLINE
-ibool
-lock_rec_lock_fast(
-/*===============*/
- ibool impl, /*!< in: if TRUE, no lock is set
- if no wait is necessary: we
- assume that the caller will
- set an implicit lock */
- ulint mode, /*!< in: lock mode: LOCK_X or
- LOCK_S possibly ORed to either
- LOCK_GAP or LOCK_REC_NOT_GAP */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
-{
- lock_t* lock;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
-
- lock = lock_rec_get_first_on_page(block);
-
- trx = thr_get_trx(thr);
-
- if (lock == NULL) {
- if (!impl) {
- lock_rec_create(mode, block, heap_no, index, trx);
- }
-
- return(TRUE);
- }
-
- if (lock_rec_get_next_on_page(lock)) {
-
- return(FALSE);
- }
-
- if (lock->trx != trx
- || lock->type_mode != (mode | LOCK_REC)
- || lock_rec_get_n_bits(lock) <= heap_no) {
-
- return(FALSE);
- }
-
- if (!impl) {
- /* If the nth bit of the record lock is already set then we
- do not set a new lock bit, otherwise we do set */
-
- if (!lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_set_nth_bit(lock, heap_no);
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-This is the general, and slower, routine for locking a record. This is a
-low-level function which does NOT look at implicit locks! Checks lock
-compatibility within explicit locks. This function sets a normal next-key
-lock, or in the case of a page supremum record, a gap type lock.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
-static
-ulint
-lock_rec_lock_slow(
-/*===============*/
- ibool impl, /*!< in: if TRUE, no lock is set
- if no wait is necessary: we
- assume that the caller will
- set an implicit lock */
- ulint mode, /*!< in: lock mode: LOCK_X or
- LOCK_S possibly ORed to either
- LOCK_GAP or LOCK_REC_NOT_GAP */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- ulint err;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
-
- trx = thr_get_trx(thr);
-
- if (lock_rec_has_expl(mode, block, heap_no, trx)) {
- /* The trx already has a strong enough lock on rec: do
- nothing */
-
- err = DB_SUCCESS;
- } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) {
-
- /* If another transaction has a non-gap conflicting request in
- the queue, as this transaction does not have a lock strong
- enough already granted on the record, we have to wait. */
-
- err = lock_rec_enqueue_waiting(mode, block, heap_no,
- index, thr);
- } else {
- if (!impl) {
- /* Set the requested lock on the record */
-
- lock_rec_add_to_queue(LOCK_REC | mode, block,
- heap_no, index, trx);
- }
-
- err = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Tries to lock the specified record in the mode requested. If not immediately
-possible, enqueues a waiting lock request. This is a low-level function
-which does NOT look at implicit locks! Checks lock compatibility within
-explicit locks. This function sets a normal next-key lock, or in the case
-of a page supremum record, a gap type lock.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
-static
-ulint
-lock_rec_lock(
-/*==========*/
- ibool impl, /*!< in: if TRUE, no lock is set
- if no wait is necessary: we
- assume that the caller will
- set an implicit lock */
- ulint mode, /*!< in: lock mode: LOCK_X or
- LOCK_S possibly ORed to either
- LOCK_GAP or LOCK_REC_NOT_GAP */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0);
-
- if (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
-
- /* We try a simplified and faster subroutine for the most
- common cases */
-
- err = DB_SUCCESS;
- } else {
- err = lock_rec_lock_slow(impl, mode, block,
- heap_no, index, thr);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if a waiting record lock request still has to wait in a queue.
-@return TRUE if still has to wait */
-static
-ibool
-lock_rec_has_to_wait_in_queue(
-/*==========================*/
- lock_t* wait_lock) /*!< in: waiting record lock */
-{
- lock_t* lock;
- ulint space;
- ulint page_no;
- ulint heap_no;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_wait(wait_lock));
- ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
-
- space = wait_lock->un_member.rec_lock.space;
- page_no = wait_lock->un_member.rec_lock.page_no;
- heap_no = lock_rec_find_set_bit(wait_lock);
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- while (lock != wait_lock) {
-
- if (lock_rec_get_nth_bit(lock, heap_no)
- && lock_has_to_wait(wait_lock, lock)) {
-
- return(TRUE);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Grants a lock to a waiting lock request and releases the waiting
-transaction. */
-static
-void
-lock_grant(
-/*=======*/
- lock_t* lock) /*!< in/out: waiting lock request */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- lock_reset_lock_and_trx_wait(lock);
-
- if (lock_get_mode(lock) == LOCK_AUTO_INC) {
- trx_t* trx = lock->trx;
- dict_table_t* table = lock->un_member.tab_lock.table;
-
- if (table->autoinc_trx == trx) {
- fprintf(stderr,
- "InnoDB: Error: trx already had"
- " an AUTO-INC lock!\n");
- } else {
- table->autoinc_trx = trx;
-
- ib_vector_push(trx->autoinc_locks, lock);
- }
- }
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Lock wait for trx %lu ends\n",
- (ulong) ut_dulint_get_low(lock->trx->id));
- }
-#endif /* UNIV_DEBUG */
-
- /* If we are resolving a deadlock by choosing another transaction
- as a victim, then our original transaction may not be in the
- TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
- for it */
-
- if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
- trx_end_lock_wait(lock->trx);
- }
-}
-
-/*************************************************************//**
-Cancels a waiting record lock request and releases the waiting transaction
-that requested it. NOTE: does NOT check if waiting lock requests behind this
-one can now be granted! */
-static
-void
-lock_rec_cancel(
-/*============*/
- lock_t* lock) /*!< in: waiting record lock request */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- /* Reset the bit (there can be only one set bit) in the lock bitmap */
- lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
-
- /* Reset the wait flag and the back pointer to lock in trx */
-
- lock_reset_lock_and_trx_wait(lock);
-
- /* The following function releases the trx from lock wait */
-
- trx_end_lock_wait(lock->trx);
-}
-
-/*************************************************************//**
-Removes a record lock request, waiting or granted, from the queue and
-grants locks to other transactions in the queue if they now are entitled
-to a lock. NOTE: all record locks contained in in_lock are removed. */
-static
-void
-lock_rec_dequeue_from_page(
-/*=======================*/
- lock_t* in_lock)/*!< in: record lock object: all record locks which
- are contained in this lock object are removed;
- transactions waiting behind will get their lock
- requests granted, if they are now qualified to it */
-{
- ulint space;
- ulint page_no;
- lock_t* lock;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
-
- trx = in_lock->trx;
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), in_lock);
-
- UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
-
- /* Check if waiting locks in the queue can now be granted: grant
- locks if there are no conflicting locks ahead. */
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- while (lock != NULL) {
- if (lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- lock_grant(lock);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-}
-
-/*************************************************************//**
-Removes a record lock request, waiting or granted, from the queue. */
-static
-void
-lock_rec_discard(
-/*=============*/
- lock_t* in_lock)/*!< in: record lock object: all record locks which
- are contained in this lock object are removed */
-{
- ulint space;
- ulint page_no;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
-
- trx = in_lock->trx;
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), in_lock);
-
- UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
-}
-
-/*************************************************************//**
-Removes record lock objects set on an index page which is discarded. This
-function does not move locks, or check for waiting locks, therefore the
-lock bitmaps must already be reset when this function is called. */
-static
-void
-lock_rec_free_all_from_discard_page(
-/*================================*/
- const buf_block_t* block) /*!< in: page to be discarded */
-{
- ulint space;
- ulint page_no;
- lock_t* lock;
- lock_t* next_lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- while (lock != NULL) {
- ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
- ut_ad(!lock_get_wait(lock));
-
- next_lock = lock_rec_get_next_on_page(lock);
-
- lock_rec_discard(lock);
-
- lock = next_lock;
- }
-}
-
-/*============= RECORD LOCK MOVING AND INHERITING ===================*/
-
-/*************************************************************//**
-Resets the lock bits for a single record. Releases transactions waiting for
-lock requests here. */
-static
-void
-lock_rec_reset_and_release_wait(
-/*============================*/
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no)/*!< in: heap number of record */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first(block, heap_no);
-
- while (lock != NULL) {
- if (lock_get_wait(lock)) {
- lock_rec_cancel(lock);
- } else {
- lock_rec_reset_nth_bit(lock, heap_no);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-}
-
-/*************************************************************//**
-Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
-of another record as gap type locks, but does not reset the lock bits of
-the other record. Also waiting lock requests on rec are inherited as
-GRANTED gap locks. */
-static
-void
-lock_rec_inherit_to_gap(
-/*====================*/
- const buf_block_t* heir_block, /*!< in: block containing the
- record which inherits */
- const buf_block_t* block, /*!< in: block containing the
- record from which inherited;
- does NOT reset the locks on
- this record */
- ulint heir_heap_no, /*!< in: heap_no of the
- inheriting record */
- ulint heap_no) /*!< in: heap_no of the
- donating record */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first(block, heap_no);
-
- /* If srv_locks_unsafe_for_binlog is TRUE or session is using
- READ COMMITTED isolation level, we do not want locks set
- by an UPDATE or a DELETE to be inherited as gap type locks. But we
- DO want S-locks set by a consistency constraint to be inherited also
- then. */
-
- while (lock != NULL) {
- if (!lock_rec_get_insert_intention(lock)
- && !((srv_locks_unsafe_for_binlog
- || lock->trx->isolation_level
- == TRX_ISO_READ_COMMITTED)
- && lock_get_mode(lock) == LOCK_X)) {
-
- lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
- | lock_get_mode(lock),
- heir_block, heir_heap_no,
- lock->index, lock->trx);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-}
-
-/*************************************************************//**
-Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
-of another record as gap type locks, but does not reset the lock bits of the
-other record. Also waiting lock requests are inherited as GRANTED gap locks. */
-static
-void
-lock_rec_inherit_to_gap_if_gap_lock(
-/*================================*/
- const buf_block_t* block, /*!< in: buffer block */
- ulint heir_heap_no, /*!< in: heap_no of
- record which inherits */
- ulint heap_no) /*!< in: heap_no of record
- from which inherited;
- does NOT reset the locks
- on this record */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first(block, heap_no);
-
- while (lock != NULL) {
- if (!lock_rec_get_insert_intention(lock)
- && (heap_no == PAGE_HEAP_NO_SUPREMUM
- || !lock_rec_get_rec_not_gap(lock))) {
-
- lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
- | lock_get_mode(lock),
- block, heir_heap_no,
- lock->index, lock->trx);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-}
-
-/*************************************************************//**
-Moves the locks of a record to another record and resets the lock bits of
-the donating record. */
-static
-void
-lock_rec_move(
-/*==========*/
- const buf_block_t* receiver, /*!< in: buffer block containing
- the receiving record */
- const buf_block_t* donator, /*!< in: buffer block containing
- the donating record */
- ulint receiver_heap_no,/*!< in: heap_no of the record
- which gets the locks; there
- must be no lock requests
- on it! */
- ulint donator_heap_no)/*!< in: heap_no of the record
- which gives the locks */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first(donator, donator_heap_no);
-
- ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
-
- while (lock != NULL) {
- const ulint type_mode = lock->type_mode;
-
- lock_rec_reset_nth_bit(lock, donator_heap_no);
-
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- /* Note that we FIRST reset the bit, and then set the lock:
- the function works also if donator == receiver */
-
- lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no,
- lock->index, lock->trx);
- lock = lock_rec_get_next(donator_heap_no, lock);
- }
-
- ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
-}
-
-/*************************************************************//**
-Updates the lock table when we have reorganized a page. NOTE: we copy
-also the locks set on the infimum of the page; the infimum may carry
-locks if an update of a record is occurring on the page, and its locks
-were temporarily stored on the infimum. */
-UNIV_INTERN
-void
-lock_move_reorganize_page(
-/*======================*/
- const buf_block_t* block, /*!< in: old index page, now
- reorganized */
- const buf_block_t* oblock) /*!< in: copy of the old, not
- reorganized page */
-{
- lock_t* lock;
- UT_LIST_BASE_NODE_T(lock_t) old_locks;
- mem_heap_t* heap = NULL;
- ulint comp;
-
- lock_mutex_enter_kernel();
-
- lock = lock_rec_get_first_on_page(block);
-
- if (lock == NULL) {
- lock_mutex_exit_kernel();
-
- return;
- }
-
- heap = mem_heap_create(256);
-
- /* Copy first all the locks on the page to heap and reset the
- bitmaps in the original locks; chain the copies of the locks
- using the trx_locks field in them. */
-
- UT_LIST_INIT(old_locks);
-
- do {
- /* Make a copy of the lock */
- lock_t* old_lock = lock_rec_copy(lock, heap);
-
- UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
-
- /* Reset bitmap of lock */
- lock_rec_bitmap_reset(lock);
-
- if (lock_get_wait(lock)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- } while (lock != NULL);
-
- comp = page_is_comp(block->frame);
- ut_ad(comp == page_is_comp(oblock->frame));
-
- for (lock = UT_LIST_GET_FIRST(old_locks); lock;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
- /* NOTE: we copy also the locks set on the infimum and
- supremum of the page; the infimum may carry locks if an
- update of a record is occurring on the page, and its locks
- were temporarily stored on the infimum */
- page_cur_t cur1;
- page_cur_t cur2;
-
- page_cur_set_before_first(block, &cur1);
- page_cur_set_before_first(oblock, &cur2);
-
- /* Set locks according to old locks */
- for (;;) {
- ulint old_heap_no;
- ulint new_heap_no;
-
- ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- if (UNIV_LIKELY(comp)) {
- old_heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- new_heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
- } else {
- old_heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
- new_heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- }
-
- if (lock_rec_get_nth_bit(lock, old_heap_no)) {
-
- /* Clear the bit in old_lock. */
- ut_d(lock_rec_reset_nth_bit(lock,
- old_heap_no));
-
- /* NOTE that the old lock bitmap could be too
- small for the new heap number! */
-
- lock_rec_add_to_queue(lock->type_mode, block,
- new_heap_no,
- lock->index, lock->trx);
-
- /* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
- && lock_get_wait(lock)) {
- fprintf(stderr,
- "---\n--\n!!!Lock reorg: supr type %lu\n",
- lock->type_mode);
- } */
- }
-
- if (UNIV_UNLIKELY
- (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
-
- ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
- break;
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
-#ifdef UNIV_DEBUG
- {
- ulint i = lock_rec_find_set_bit(lock);
-
- /* Check that all locks were moved. */
- if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
- fprintf(stderr,
- "lock_move_reorganize_page():"
- " %lu not moved in %p\n",
- (ulong) i, (void*) lock);
- ut_error;
- }
- }
-#endif /* UNIV_DEBUG */
- }
-
- lock_mutex_exit_kernel();
-
- mem_heap_free(heap);
-
-#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block)));
-#endif
-}
-
-/*************************************************************//**
-Moves the explicit locks on user records to another page if a record
-list end is moved to another page. */
-UNIV_INTERN
-void
-lock_move_rec_list_end(
-/*===================*/
- const buf_block_t* new_block, /*!< in: index page to move to */
- const buf_block_t* block, /*!< in: index page */
- const rec_t* rec) /*!< in: record on page: this
- is the first record moved */
-{
- lock_t* lock;
- const ulint comp = page_rec_is_comp(rec);
-
- lock_mutex_enter_kernel();
-
- /* Note: when we move locks from record to record, waiting locks
- and possible granted gap type locks behind them are enqueued in
- the original order, because new elements are inserted to a hash
- table to the end of the hash chain, and lock_rec_add_to_queue
- does not reuse locks if there are waiters in the queue. */
-
- for (lock = lock_rec_get_first_on_page(block); lock;
- lock = lock_rec_get_next_on_page(lock)) {
- page_cur_t cur1;
- page_cur_t cur2;
- const ulint type_mode = lock->type_mode;
-
- page_cur_position(rec, block, &cur1);
-
- if (page_cur_is_before_first(&cur1)) {
- page_cur_move_to_next(&cur1);
- }
-
- page_cur_set_before_first(new_block, &cur2);
- page_cur_move_to_next(&cur2);
-
- /* Copy lock requests on user records to new page and
- reset the lock bits on the old */
-
- while (!page_cur_is_after_last(&cur1)) {
- ulint heap_no;
-
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- ut_ad(!memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(&cur2))));
- }
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_reset_nth_bit(lock, heap_no);
-
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
- }
-
- lock_rec_add_to_queue(type_mode,
- new_block, heap_no,
- lock->index, lock->trx);
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
- }
-
- lock_mutex_exit_kernel();
-
-#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block)));
- ut_ad(lock_rec_validate_page(buf_block_get_space(new_block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(new_block)));
-#endif
-}
-
-/*************************************************************//**
-Moves the explicit locks on user records to another page if a record
-list start is moved to another page. */
-UNIV_INTERN
-void
-lock_move_rec_list_start(
-/*=====================*/
- const buf_block_t* new_block, /*!< in: index page to move to */
- const buf_block_t* block, /*!< in: index page */
- const rec_t* rec, /*!< in: record on page:
- this is the first
- record NOT copied */
- const rec_t* old_end) /*!< in: old
- previous-to-last
- record on new_page
- before the records
- were copied */
-{
- lock_t* lock;
- const ulint comp = page_rec_is_comp(rec);
-
- ut_ad(block->frame == page_align(rec));
- ut_ad(new_block->frame == page_align(old_end));
-
- lock_mutex_enter_kernel();
-
- for (lock = lock_rec_get_first_on_page(block); lock;
- lock = lock_rec_get_next_on_page(lock)) {
- page_cur_t cur1;
- page_cur_t cur2;
- const ulint type_mode = lock->type_mode;
-
- page_cur_set_before_first(block, &cur1);
- page_cur_move_to_next(&cur1);
-
- page_cur_position(old_end, new_block, &cur2);
- page_cur_move_to_next(&cur2);
-
- /* Copy lock requests on user records to new page and
- reset the lock bits on the old */
-
- while (page_cur_get_rec(&cur1) != rec) {
- ulint heap_no;
-
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- ut_ad(!memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- }
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_reset_nth_bit(lock, heap_no);
-
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
- }
-
- lock_rec_add_to_queue(type_mode,
- new_block, heap_no,
- lock->index, lock->trx);
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
-#ifdef UNIV_DEBUG
- if (page_rec_is_supremum(rec)) {
- ulint i;
-
- for (i = PAGE_HEAP_NO_USER_LOW;
- i < lock_rec_get_n_bits(lock); i++) {
- if (UNIV_UNLIKELY
- (lock_rec_get_nth_bit(lock, i))) {
-
- fprintf(stderr,
- "lock_move_rec_list_start():"
- " %lu not moved in %p\n",
- (ulong) i, (void*) lock);
- ut_error;
- }
- }
- }
-#endif /* UNIV_DEBUG */
- }
-
- lock_mutex_exit_kernel();
-
-#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block)));
-#endif
-}
-
-/*************************************************************//**
-Updates the lock table when a page is split to the right. */
-UNIV_INTERN
-void
-lock_update_split_right(
-/*====================*/
- const buf_block_t* right_block, /*!< in: right page */
- const buf_block_t* left_block) /*!< in: left page */
-{
- ulint heap_no = lock_get_min_heap_no(right_block);
-
- lock_mutex_enter_kernel();
-
- /* Move the locks on the supremum of the left page to the supremum
- of the right page */
-
- lock_rec_move(right_block, left_block,
- PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
-
- /* Inherit the locks to the supremum of left page from the successor
- of the infimum on right page */
-
- lock_rec_inherit_to_gap(left_block, right_block,
- PAGE_HEAP_NO_SUPREMUM, heap_no);
-
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
-void
-lock_update_merge_right(
-/*====================*/
- const buf_block_t* right_block, /*!< in: right page to
- which merged */
- const rec_t* orig_succ, /*!< in: original
- successor of infimum
- on the right page
- before merge */
- const buf_block_t* left_block) /*!< in: merged index
- page which will be
- discarded */
-{
- lock_mutex_enter_kernel();
-
- /* Inherit the locks from the supremum of the left page to the
- original successor of infimum on the right page, to which the left
- page was merged */
-
- lock_rec_inherit_to_gap(right_block, left_block,
- page_rec_get_heap_no(orig_succ),
- PAGE_HEAP_NO_SUPREMUM);
-
- /* Reset the locks on the supremum of the left page, releasing
- waiting transactions */
-
- lock_rec_reset_and_release_wait(left_block,
- PAGE_HEAP_NO_SUPREMUM);
-
- lock_rec_free_all_from_discard_page(left_block);
-
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************//**
-Updates the lock table when the root page is copied to another in
-btr_root_raise_and_insert. Note that we leave lock structs on the
-root page, even though they do not make sense on other than leaf
-pages: the reason is that in a pessimistic update the infimum record
-of the root page will act as a dummy carrier of the locks of the record
-to be updated. */
-UNIV_INTERN
-void
-lock_update_root_raise(
-/*===================*/
- const buf_block_t* block, /*!< in: index page to which copied */
- const buf_block_t* root) /*!< in: root page */
-{
- lock_mutex_enter_kernel();
-
- /* Move the locks on the supremum of the root to the supremum
- of block */
-
- lock_rec_move(block, root,
- PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is copied to another and the original page
-is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
-void
-lock_update_copy_and_discard(
-/*=========================*/
- const buf_block_t* new_block, /*!< in: index page to
- which copied */
- const buf_block_t* block) /*!< in: index page;
- NOT the root! */
-{
- lock_mutex_enter_kernel();
-
- /* Move the locks on the supremum of the old page to the supremum
- of new_page */
-
- lock_rec_move(new_block, block,
- PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
- lock_rec_free_all_from_discard_page(block);
-
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is split to the left. */
-UNIV_INTERN
-void
-lock_update_split_left(
-/*===================*/
- const buf_block_t* right_block, /*!< in: right page */
- const buf_block_t* left_block) /*!< in: left page */
-{
- ulint heap_no = lock_get_min_heap_no(right_block);
-
- lock_mutex_enter_kernel();
-
- /* Inherit the locks to the supremum of the left page from the
- successor of the infimum on the right page */
-
- lock_rec_inherit_to_gap(left_block, right_block,
- PAGE_HEAP_NO_SUPREMUM, heap_no);
-
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
-void
-lock_update_merge_left(
-/*===================*/
- const buf_block_t* left_block, /*!< in: left page to
- which merged */
- const rec_t* orig_pred, /*!< in: original predecessor
- of supremum on the left page
- before merge */
- const buf_block_t* right_block) /*!< in: merged index page
- which will be discarded */
-{
- const rec_t* left_next_rec;
-
- ut_ad(left_block->frame == page_align(orig_pred));
-
- lock_mutex_enter_kernel();
-
- left_next_rec = page_rec_get_next_const(orig_pred);
-
- if (!page_rec_is_supremum(left_next_rec)) {
-
- /* Inherit the locks on the supremum of the left page to the
- first record which was moved from the right page */
-
- lock_rec_inherit_to_gap(left_block, left_block,
- page_rec_get_heap_no(left_next_rec),
- PAGE_HEAP_NO_SUPREMUM);
-
- /* Reset the locks on the supremum of the left page,
- releasing waiting transactions */
-
- lock_rec_reset_and_release_wait(left_block,
- PAGE_HEAP_NO_SUPREMUM);
- }
-
- /* Move the locks from the supremum of right page to the supremum
- of the left page */
-
- lock_rec_move(left_block, right_block,
- PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
-
- lock_rec_free_all_from_discard_page(right_block);
-
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************//**
-Resets the original locks on heir and replaces them with gap type locks
-inherited from rec. */
-UNIV_INTERN
-void
-lock_rec_reset_and_inherit_gap_locks(
-/*=================================*/
- const buf_block_t* heir_block, /*!< in: block containing the
- record which inherits */
- const buf_block_t* block, /*!< in: block containing the
- record from which inherited;
- does NOT reset the locks on
- this record */
- ulint heir_heap_no, /*!< in: heap_no of the
- inheriting record */
- ulint heap_no) /*!< in: heap_no of the
- donating record */
-{
- mutex_enter(&kernel_mutex);
-
- lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
-
- lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************//**
-Updates the lock table when a page is discarded. */
-UNIV_INTERN
-void
-lock_update_discard(
-/*================*/
- const buf_block_t* heir_block, /*!< in: index page
- which will inherit the locks */
- ulint heir_heap_no, /*!< in: heap_no of the record
- which will inherit the locks */
- const buf_block_t* block) /*!< in: index page
- which will be discarded */
-{
- const page_t* page = block->frame;
- const rec_t* rec;
- ulint heap_no;
-
- lock_mutex_enter_kernel();
-
- if (!lock_rec_get_first_on_page(block)) {
- /* No locks exist on page, nothing to do */
-
- lock_mutex_exit_kernel();
-
- return;
- }
-
- /* Inherit all the locks on the page to the record and reset all
- the locks on the page */
-
- if (page_is_comp(page)) {
- rec = page + PAGE_NEW_INFIMUM;
-
- do {
- heap_no = rec_get_heap_no_new(rec);
-
- lock_rec_inherit_to_gap(heir_block, block,
- heir_heap_no, heap_no);
-
- lock_rec_reset_and_release_wait(block, heap_no);
-
- rec = page + rec_get_next_offs(rec, TRUE);
- } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
- } else {
- rec = page + PAGE_OLD_INFIMUM;
-
- do {
- heap_no = rec_get_heap_no_old(rec);
-
- lock_rec_inherit_to_gap(heir_block, block,
- heir_heap_no, heap_no);
-
- lock_rec_reset_and_release_wait(block, heap_no);
-
- rec = page + rec_get_next_offs(rec, FALSE);
- } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
- }
-
- lock_rec_free_all_from_discard_page(block);
-
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************//**
-Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
-void
-lock_update_insert(
-/*===============*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec) /*!< in: the inserted record */
-{
- ulint receiver_heap_no;
- ulint donator_heap_no;
-
- ut_ad(block->frame == page_align(rec));
-
- /* Inherit the gap-locking locks for rec, in gap mode, from the next
- record */
-
- if (page_rec_is_comp(rec)) {
- receiver_heap_no = rec_get_heap_no_new(rec);
- donator_heap_no = rec_get_heap_no_new(
- page_rec_get_next_low(rec, TRUE));
- } else {
- receiver_heap_no = rec_get_heap_no_old(rec);
- donator_heap_no = rec_get_heap_no_old(
- page_rec_get_next_low(rec, FALSE));
- }
-
- lock_mutex_enter_kernel();
- lock_rec_inherit_to_gap_if_gap_lock(block,
- receiver_heap_no, donator_heap_no);
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************//**
-Updates the lock table when a record is removed. */
-UNIV_INTERN
-void
-lock_update_delete(
-/*===============*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec) /*!< in: the record to be removed */
-{
- const page_t* page = block->frame;
- ulint heap_no;
- ulint next_heap_no;
-
- ut_ad(page == page_align(rec));
-
- if (page_is_comp(page)) {
- heap_no = rec_get_heap_no_new(rec);
- next_heap_no = rec_get_heap_no_new(page
- + rec_get_next_offs(rec,
- TRUE));
- } else {
- heap_no = rec_get_heap_no_old(rec);
- next_heap_no = rec_get_heap_no_old(page
- + rec_get_next_offs(rec,
- FALSE));
- }
-
- lock_mutex_enter_kernel();
-
- /* Let the next record inherit the locks from rec, in gap mode */
-
- lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
-
- /* Reset the lock bits on rec and release waiting transactions */
-
- lock_rec_reset_and_release_wait(block, heap_no);
-
- lock_mutex_exit_kernel();
-}
-
-/*********************************************************************//**
-Stores on the page infimum record the explicit locks of another record.
-This function is used to store the lock state of a record when it is
-updated and the size of the record changes in the update. The record
-is moved in such an update, perhaps to another page. The infimum record
-acts as a dummy carrier record, taking care of lock releases while the
-actual record is being moved. */
-UNIV_INTERN
-void
-lock_rec_store_on_page_infimum(
-/*===========================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec) /*!< in: record whose lock state
- is stored on the infimum
- record of the same page; lock
- bits are reset on the
- record */
-{
- ulint heap_no = page_rec_get_heap_no(rec);
-
- ut_ad(block->frame == page_align(rec));
-
- lock_mutex_enter_kernel();
-
- lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
-
- lock_mutex_exit_kernel();
-}
-
-/*********************************************************************//**
-Restores the state of explicit lock requests on a single record, where the
-state was stored on the infimum of the page. */
-UNIV_INTERN
-void
-lock_rec_restore_from_page_infimum(
-/*===============================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record whose lock state
- is restored */
- const buf_block_t* donator)/*!< in: page (rec is not
- necessarily on this page)
- whose infimum stored the lock
- state; lock bits are reset on
- the infimum */
-{
- ulint heap_no = page_rec_get_heap_no(rec);
-
- lock_mutex_enter_kernel();
-
- lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
-
- lock_mutex_exit_kernel();
-}
-
-/*=========== DEADLOCK CHECKING ======================================*/
-
-/********************************************************************//**
-Checks if a lock request results in a deadlock.
-@return TRUE if a deadlock was detected and we chose trx as a victim;
-FALSE if no deadlock, or there was a deadlock, but we chose other
-transaction(s) as victim(s) */
-static
-ibool
-lock_deadlock_occurs(
-/*=================*/
- lock_t* lock, /*!< in: lock the transaction is requesting */
- trx_t* trx) /*!< in: transaction */
-{
- dict_table_t* table;
- dict_index_t* index;
- trx_t* mark_trx;
- ulint ret;
- ulint cost = 0;
-
- ut_ad(trx);
- ut_ad(lock);
- ut_ad(mutex_own(&kernel_mutex));
-retry:
- /* We check that adding this trx to the waits-for graph
- does not produce a cycle. First mark all active transactions
- with 0: */
-
- mark_trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (mark_trx) {
- mark_trx->deadlock_mark = 0;
- mark_trx = UT_LIST_GET_NEXT(trx_list, mark_trx);
- }
-
- ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
-
- if (ret == LOCK_VICTIM_IS_OTHER) {
- /* We chose some other trx as a victim: retry if there still
- is a deadlock */
-
- goto retry;
- }
-
- if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
- if (lock_get_type_low(lock) & LOCK_TABLE) {
- table = lock->un_member.tab_lock.table;
- index = NULL;
- } else {
- index = lock->index;
- table = index->table;
- }
-
- lock_deadlock_found = TRUE;
-
- fputs("*** WE ROLL BACK TRANSACTION (2)\n",
- lock_latest_err_file);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Looks recursively for a deadlock.
-@return 0 if no deadlock found, LOCK_VICTIM_IS_START if there was a
-deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
-deadlock was found and we chose some other trx as a victim: we must do
-the search again in this last case because there may be another
-deadlock! */
-static
-ulint
-lock_deadlock_recursive(
-/*====================*/
- trx_t* start, /*!< in: recursion starting point */
- trx_t* trx, /*!< in: a transaction waiting for a lock */
- lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
- ulint* cost, /*!< in/out: number of calculation steps thus
- far: if this exceeds LOCK_MAX_N_STEPS_...
- we return LOCK_VICTIM_IS_START */
- ulint depth) /*!< in: recursion depth: if this exceeds
- LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
- return LOCK_VICTIM_IS_START */
-{
- lock_t* lock;
- ulint bit_no = ULINT_UNDEFINED;
- trx_t* lock_trx;
- ulint ret;
-
- ut_a(trx);
- ut_a(start);
- ut_a(wait_lock);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (trx->deadlock_mark == 1) {
- /* We have already exhaustively searched the subtree starting
- from this trx */
-
- return(0);
- }
-
- *cost = *cost + 1;
-
- lock = wait_lock;
-
- if (lock_get_type_low(wait_lock) == LOCK_REC) {
-
- bit_no = lock_rec_find_set_bit(wait_lock);
-
- ut_a(bit_no != ULINT_UNDEFINED);
- }
-
- /* Look at the locks ahead of wait_lock in the lock queue */
-
- for (;;) {
- if (lock_get_type_low(lock) & LOCK_TABLE) {
-
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
- lock);
- } else {
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- ut_a(bit_no != ULINT_UNDEFINED);
-
- lock = (lock_t*) lock_rec_get_prev(lock, bit_no);
- }
-
- if (lock == NULL) {
- /* We can mark this subtree as searched */
- trx->deadlock_mark = 1;
-
- return(FALSE);
- }
-
- if (lock_has_to_wait(wait_lock, lock)) {
-
- ibool too_far
- = depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
- || *cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK;
-
- lock_trx = lock->trx;
-
- if (lock_trx == start || too_far) {
-
- /* We came back to the recursion starting
- point: a deadlock detected; or we have
- searched the waits-for graph too long */
-
- FILE* ef = lock_latest_err_file;
-
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs("\n*** (1) TRANSACTION:\n", ef);
-
- trx_print(ef, wait_lock->trx, 3000);
-
- fputs("*** (1) WAITING FOR THIS LOCK"
- " TO BE GRANTED:\n", ef);
-
- if (lock_get_type_low(wait_lock) == LOCK_REC) {
- lock_rec_print(ef, wait_lock);
- } else {
- lock_table_print(ef, wait_lock);
- }
-
- fputs("*** (2) TRANSACTION:\n", ef);
-
- trx_print(ef, lock->trx, 3000);
-
- fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
-
- if (lock_get_type_low(lock) == LOCK_REC) {
- lock_rec_print(ef, lock);
- } else {
- lock_table_print(ef, lock);
- }
-
- fputs("*** (2) WAITING FOR THIS LOCK"
- " TO BE GRANTED:\n", ef);
-
- if (lock_get_type_low(start->wait_lock)
- == LOCK_REC) {
- lock_rec_print(ef, start->wait_lock);
- } else {
- lock_table_print(ef, start->wait_lock);
- }
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fputs("Deadlock detected"
- " or too long search\n",
- stderr);
- }
-#endif /* UNIV_DEBUG */
- if (too_far) {
-
- fputs("TOO DEEP OR LONG SEARCH"
- " IN THE LOCK TABLE"
- " WAITS-FOR GRAPH\n", ef);
-
- return(LOCK_VICTIM_IS_START);
- }
-
- if (trx_weight_cmp(wait_lock->trx,
- start) >= 0) {
- /* Our recursion starting point
- transaction is 'smaller', let us
- choose 'start' as the victim and roll
- back it */
-
- return(LOCK_VICTIM_IS_START);
- }
-
- lock_deadlock_found = TRUE;
-
- /* Let us choose the transaction of wait_lock
- as a victim to try to avoid deadlocking our
- recursion starting point transaction */
-
- fputs("*** WE ROLL BACK TRANSACTION (1)\n",
- ef);
-
- wait_lock->trx->was_chosen_as_deadlock_victim
- = TRUE;
-
- lock_cancel_waiting_and_release(wait_lock);
-
- /* Since trx and wait_lock are no longer
- in the waits-for graph, we can return FALSE;
- note that our selective algorithm can choose
- several transactions as victims, but still
- we may end up rolling back also the recursion
- starting point transaction! */
-
- return(LOCK_VICTIM_IS_OTHER);
- }
-
- if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
-
- /* Another trx ahead has requested lock in an
- incompatible mode, and is itself waiting for
- a lock */
-
- ret = lock_deadlock_recursive(
- start, lock_trx,
- lock_trx->wait_lock, cost, depth + 1);
- if (ret != 0) {
-
- return(ret);
- }
- }
- }
- }/* end of the 'for (;;)'-loop */
-}
-
-/*========================= TABLE LOCKS ==============================*/
-
-/*********************************************************************//**
-Creates a table lock object and adds it as the last in the lock queue
-of the table. Does NOT check for deadlocks or lock compatibility.
-@return own: new lock object */
-UNIV_INLINE
-lock_t*
-lock_table_create(
-/*==============*/
- dict_table_t* table, /*!< in: database table in dictionary cache */
- ulint type_mode,/*!< in: lock mode possibly ORed with
- LOCK_WAIT */
- trx_t* trx) /*!< in: trx */
-{
- lock_t* lock;
-
- ut_ad(table && trx);
- ut_ad(mutex_own(&kernel_mutex));
-
- if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
- ++table->n_waiting_or_granted_auto_inc_locks;
- }
-
- /* For AUTOINC locking we reuse the lock instance only if
- there is no wait involved else we allocate the waiting lock
- from the transaction lock heap. */
- if (type_mode == LOCK_AUTO_INC) {
-
- lock = table->autoinc_lock;
-
- table->autoinc_trx = trx;
-
- ib_vector_push(trx->autoinc_locks, lock);
- } else {
- lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
- }
-
- UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
-
- lock->type_mode = type_mode | LOCK_TABLE;
- lock->trx = trx;
-
- lock->un_member.tab_lock.table = table;
-
- UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
-
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
-
- lock_set_lock_and_trx_wait(lock, trx);
- }
-
- return(lock);
-}
-
-/*************************************************************//**
-Removes a table lock request from the queue and the trx list of locks;
-this is a low-level function which does NOT check if waiting requests
-can now be granted. */
-UNIV_INLINE
-void
-lock_table_remove_low(
-/*==================*/
- lock_t* lock) /*!< in: table lock */
-{
- trx_t* trx;
- dict_table_t* table;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx = lock->trx;
- table = lock->un_member.tab_lock.table;
-
- /* Remove the table from the transaction's AUTOINC vector, if
- the lock that is being release is an AUTOINC lock. */
- if (lock_get_mode(lock) == LOCK_AUTO_INC) {
-
- /* The table's AUTOINC lock can get transferred to
- another transaction before we get here. */
- if (table->autoinc_trx == trx) {
- table->autoinc_trx = NULL;
- }
-
- /* The locks must be freed in the reverse order from
- the one in which they were acquired. This is to avoid
- traversing the AUTOINC lock vector unnecessarily.
-
- We only store locks that were granted in the
- trx->autoinc_locks vector (see lock_table_create()
- and lock_grant()). Therefore it can be empty and we
- need to check for that. */
-
- if (!lock_get_wait(lock)
- && !ib_vector_is_empty(trx->autoinc_locks)) {
- lock_t* autoinc_lock;
-
- autoinc_lock = ib_vector_pop(trx->autoinc_locks);
- ut_a(autoinc_lock == lock);
- }
-
- ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
- --table->n_waiting_or_granted_auto_inc_locks;
- }
-
- UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock);
- UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
-}
-
-/*********************************************************************//**
-Enqueues a waiting request for a table lock which cannot be granted
-immediately. Checks for deadlocks.
-@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
-DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
-transaction was chosen as a victim, and we got the lock immediately:
-no need to wait then */
-static
-ulint
-lock_table_enqueue_waiting(
-/*=======================*/
- ulint mode, /*!< in: lock mode this transaction is
- requesting */
- dict_table_t* table, /*!< in: table */
- que_thr_t* thr) /*!< in: query thread */
-{
- lock_t* lock;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /* Test if there already is some other reason to suspend thread:
- we do not enqueue a lock request if the query thread should be
- stopped anyway */
-
- if (que_thr_stop(thr)) {
- ut_error;
-
- return(DB_QUE_THR_SUSPENDED);
- }
-
- trx = thr_get_trx(thr);
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- break;
- case TRX_DICT_OP_TABLE:
- case TRX_DICT_OP_INDEX:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: a table lock wait happens"
- " in a dictionary operation!\n"
- "InnoDB: Table name ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(".\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- stderr);
- }
-
- /* Enqueue the lock request that will wait to be granted */
-
- lock = lock_table_create(table, mode | LOCK_WAIT, trx);
-
- /* Check if a deadlock occurs: if yes, remove the lock request and
- return an error code */
-
- if (lock_deadlock_occurs(lock, trx)) {
-
- /* The order here is important, we don't want to
- lose the state of the lock before calling remove. */
- lock_table_remove_low(lock);
- lock_reset_lock_and_trx_wait(lock);
-
- return(DB_DEADLOCK);
- }
-
- if (trx->wait_lock == NULL) {
- /* Deadlock resolution chose another transaction as a victim,
- and we accidentally got our lock granted! */
-
- return(DB_SUCCESS);
- }
-
- trx->que_state = TRX_QUE_LOCK_WAIT;
- trx->was_chosen_as_deadlock_victim = FALSE;
- trx->wait_started = time(NULL);
-
- ut_a(que_thr_stop(thr));
-
- return(DB_LOCK_WAIT);
-}
-
-/*********************************************************************//**
-Checks if other transactions have an incompatible mode lock request in
-the lock queue. */
-UNIV_INLINE
-ibool
-lock_table_other_has_incompatible(
-/*==============================*/
- trx_t* trx, /*!< in: transaction, or NULL if all
- transactions should be included */
- ulint wait, /*!< in: LOCK_WAIT if also waiting locks are
- taken into account, or 0 if not */
- dict_table_t* table, /*!< in: table */
- enum lock_mode mode) /*!< in: lock mode */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = UT_LIST_GET_LAST(table->locks);
-
- while (lock != NULL) {
-
- if ((lock->trx != trx)
- && (!lock_mode_compatible(lock_get_mode(lock), mode))
- && (wait || !(lock_get_wait(lock)))) {
-
- return(TRUE);
- }
-
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_table(
-/*=======*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- dict_table_t* table, /*!< in: database table in dictionary cache */
- enum lock_mode mode, /*!< in: lock mode */
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- ulint err;
-
- ut_ad(table && thr);
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- ut_a(flags == 0);
-
- trx = thr_get_trx(thr);
-
- lock_mutex_enter_kernel();
-
- /* Look for stronger locks the same trx already has on the table */
-
- if (lock_table_has(trx, table, mode)) {
-
- lock_mutex_exit_kernel();
-
- return(DB_SUCCESS);
- }
-
- /* We have to check if the new lock is compatible with any locks
- other transactions have in the table lock queue. */
-
- if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
-
- /* Another trx has a request on the table in an incompatible
- mode: this trx may have to wait */
-
- err = lock_table_enqueue_waiting(mode | flags, table, thr);
-
- lock_mutex_exit_kernel();
-
- return(err);
- }
-
- lock_table_create(table, mode | flags, trx);
-
- ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
-
- lock_mutex_exit_kernel();
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Checks if a waiting table lock request still has to wait in a queue.
-@return TRUE if still has to wait */
-static
-ibool
-lock_table_has_to_wait_in_queue(
-/*============================*/
- lock_t* wait_lock) /*!< in: waiting table lock */
-{
- dict_table_t* table;
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_wait(wait_lock));
-
- table = wait_lock->un_member.tab_lock.table;
-
- lock = UT_LIST_GET_FIRST(table->locks);
-
- while (lock != wait_lock) {
-
- if (lock_has_to_wait(wait_lock, lock)) {
-
- return(TRUE);
- }
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Removes a table lock request, waiting or granted, from the queue and grants
-locks to other transactions in the queue, if they now are entitled to a
-lock. */
-static
-void
-lock_table_dequeue(
-/*===============*/
- lock_t* in_lock)/*!< in: table lock object; transactions waiting
- behind will get their lock requests granted, if
- they are now qualified to it */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
-
- lock_table_remove_low(in_lock);
-
- /* Check if waiting locks in the queue can now be granted: grant
- locks if there are no conflicting locks ahead. */
-
- while (lock != NULL) {
-
- if (lock_get_wait(lock)
- && !lock_table_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- lock_grant(lock);
- }
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
- }
-}
-
-/*=========================== LOCK RELEASE ==============================*/
-
-/*************************************************************//**
-Removes a granted record lock of a transaction from the queue and grants
-locks to other transactions waiting in the queue if they now are entitled
-to a lock. */
-UNIV_INTERN
-void
-lock_rec_unlock(
-/*============*/
- trx_t* trx, /*!< in: transaction that has
- set a record lock */
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record */
- enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */
-{
- lock_t* lock;
- lock_t* release_lock = NULL;
- ulint heap_no;
-
- ut_ad(trx && rec);
- ut_ad(block->frame == page_align(rec));
-
- heap_no = page_rec_get_heap_no(rec);
-
- mutex_enter(&kernel_mutex);
-
- lock = lock_rec_get_first(block, heap_no);
-
- /* Find the last lock with the same lock_mode and transaction
- from the record. */
-
- while (lock != NULL) {
- if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
- release_lock = lock;
- ut_a(!lock_get_wait(lock));
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-
- /* If a record lock is found, release the record lock */
-
- if (UNIV_LIKELY(release_lock != NULL)) {
- lock_rec_reset_nth_bit(release_lock, heap_no);
- } else {
- mutex_exit(&kernel_mutex);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: unlock row could not"
- " find a %lu mode lock on the record\n",
- (ulong) lock_mode);
-
- return;
- }
-
- /* Check if we can now grant waiting lock requests */
-
- lock = lock_rec_get_first(block, heap_no);
-
- while (lock != NULL) {
- if (lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- lock_grant(lock);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-
- mutex_exit(&kernel_mutex);
-}
-
-/*********************************************************************//**
-Releases transaction locks, and releases possible other transactions waiting
-because of these locks. */
-UNIV_INTERN
-void
-lock_release_off_kernel(
-/*====================*/
- trx_t* trx) /*!< in: transaction */
-{
- dict_table_t* table;
- ulint count;
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = UT_LIST_GET_LAST(trx->trx_locks);
-
- count = 0;
-
- while (lock != NULL) {
-
- count++;
-
- if (lock_get_type_low(lock) == LOCK_REC) {
-
- lock_rec_dequeue_from_page(lock);
- } else {
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-
- if (lock_get_mode(lock) != LOCK_IS
- && !ut_dulint_is_zero(trx->undo_no)) {
-
- /* The trx may have modified the table. We
- block the use of the MySQL query cache for
- all currently active transactions. */
-
- table = lock->un_member.tab_lock.table;
-
- table->query_cache_inv_trx_id
- = trx_sys->max_trx_id;
- }
-
- lock_table_dequeue(lock);
- }
-
- if (count == LOCK_RELEASE_KERNEL_INTERVAL) {
- /* Release the kernel mutex for a while, so that we
- do not monopolize it */
-
- lock_mutex_exit_kernel();
-
- lock_mutex_enter_kernel();
-
- count = 0;
- }
-
- lock = UT_LIST_GET_LAST(trx->trx_locks);
- }
-
- ut_a(ib_vector_size(trx->autoinc_locks) == 0);
-
- mem_heap_empty(trx->lock_heap);
-}
-
-/*********************************************************************//**
-Cancels a waiting lock request and releases possible other transactions
-waiting behind it. */
-UNIV_INTERN
-void
-lock_cancel_waiting_and_release(
-/*============================*/
- lock_t* lock) /*!< in: waiting lock request */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (lock_get_type_low(lock) == LOCK_REC) {
-
- lock_rec_dequeue_from_page(lock);
- } else {
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-
- if (lock->trx->autoinc_locks != NULL) {
- /* Release the transaction's AUTOINC locks/ */
- lock_release_autoinc_locks(lock->trx);
- }
-
- lock_table_dequeue(lock);
- }
-
- /* Reset the wait flag and the back pointer to lock in trx */
-
- lock_reset_lock_and_trx_wait(lock);
-
- /* The following function releases the trx from lock wait */
-
- trx_end_lock_wait(lock->trx);
-}
-
-/* True if a lock mode is S or X */
-#define IS_LOCK_S_OR_X(lock) \
- (lock_get_mode(lock) == LOCK_S \
- || lock_get_mode(lock) == LOCK_X)
-
-
-/*********************************************************************//**
-Removes locks of a transaction on a table to be dropped.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-static
-void
-lock_remove_all_on_table_for_trx(
-/*=============================*/
- dict_table_t* table, /*!< in: table to be dropped */
- trx_t* trx, /*!< in: a transaction */
- ibool remove_also_table_sx_locks)/*!< in: also removes
- table S and X locks */
-{
- lock_t* lock;
- lock_t* prev_lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = UT_LIST_GET_LAST(trx->trx_locks);
-
- while (lock != NULL) {
- prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
-
- if (lock_get_type_low(lock) == LOCK_REC
- && lock->index->table == table) {
- ut_a(!lock_get_wait(lock));
-
- lock_rec_discard(lock);
- } else if (lock_get_type_low(lock) & LOCK_TABLE
- && lock->un_member.tab_lock.table == table
- && (remove_also_table_sx_locks
- || !IS_LOCK_S_OR_X(lock))) {
-
- ut_a(!lock_get_wait(lock));
-
- lock_table_remove_low(lock);
- }
-
- lock = prev_lock;
- }
-}
-
-/*********************************************************************//**
-Removes locks on a table to be dropped or truncated.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
-void
-lock_remove_all_on_table(
-/*=====================*/
- dict_table_t* table, /*!< in: table to be dropped
- or truncated */
- ibool remove_also_table_sx_locks)/*!< in: also removes
- table S and X locks */
-{
- lock_t* lock;
- lock_t* prev_lock;
-
- mutex_enter(&kernel_mutex);
-
- lock = UT_LIST_GET_FIRST(table->locks);
-
- while (lock != NULL) {
-
- prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
- lock);
-
- /* If we should remove all locks (remove_also_table_sx_locks
- is TRUE), or if the lock is not table-level S or X lock,
- then check we are not going to remove a wait lock. */
- if (remove_also_table_sx_locks
- || !(lock_get_type(lock) == LOCK_TABLE
- && IS_LOCK_S_OR_X(lock))) {
-
- ut_a(!lock_get_wait(lock));
- }
-
- lock_remove_all_on_table_for_trx(table, lock->trx,
- remove_also_table_sx_locks);
-
- if (prev_lock == NULL) {
- if (lock == UT_LIST_GET_FIRST(table->locks)) {
- /* lock was not removed, pick its successor */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, lock);
- } else {
- /* lock was removed, pick the first one */
- lock = UT_LIST_GET_FIRST(table->locks);
- }
- } else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
- prev_lock) != lock) {
- /* If lock was removed by
- lock_remove_all_on_table_for_trx() then pick the
- successor of prev_lock ... */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, prev_lock);
- } else {
- /* ... otherwise pick the successor of lock. */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, lock);
- }
- }
-
- mutex_exit(&kernel_mutex);
-}
-
-/*===================== VALIDATION AND DEBUGGING ====================*/
-
-/*********************************************************************//**
-Prints info of a table lock. */
-UNIV_INTERN
-void
-lock_table_print(
-/*=============*/
- FILE* file, /*!< in: file where to print */
- const lock_t* lock) /*!< in: table type lock */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type_low(lock) == LOCK_TABLE);
-
- fputs("TABLE LOCK table ", file);
- ut_print_name(file, lock->trx, TRUE,
- lock->un_member.tab_lock.table->name);
- fprintf(file, " trx id " TRX_ID_FMT,
- TRX_ID_PREP_PRINTF(lock->trx->id));
-
- if (lock_get_mode(lock) == LOCK_S) {
- fputs(" lock mode S", file);
- } else if (lock_get_mode(lock) == LOCK_X) {
- fputs(" lock mode X", file);
- } else if (lock_get_mode(lock) == LOCK_IS) {
- fputs(" lock mode IS", file);
- } else if (lock_get_mode(lock) == LOCK_IX) {
- fputs(" lock mode IX", file);
- } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
- fputs(" lock mode AUTO-INC", file);
- } else {
- fprintf(file, " unknown lock mode %lu",
- (ulong) lock_get_mode(lock));
- }
-
- if (lock_get_wait(lock)) {
- fputs(" waiting", file);
- }
-
- putc('\n', file);
-}
-
-/*********************************************************************//**
-Prints info of a record lock. */
-UNIV_INTERN
-void
-lock_rec_print(
-/*===========*/
- FILE* file, /*!< in: file where to print */
- const lock_t* lock) /*!< in: record type lock */
-{
- const buf_block_t* block;
- ulint space;
- ulint page_no;
- ulint i;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
- (ulong) space, (ulong) page_no,
- (ulong) lock_rec_get_n_bits(lock));
- dict_index_name_print(file, lock->trx, lock->index);
- fprintf(file, " trx id " TRX_ID_FMT,
- TRX_ID_PREP_PRINTF(lock->trx->id));
-
- if (lock_get_mode(lock) == LOCK_S) {
- fputs(" lock mode S", file);
- } else if (lock_get_mode(lock) == LOCK_X) {
- fputs(" lock_mode X", file);
- } else {
- ut_error;
- }
-
- if (lock_rec_get_gap(lock)) {
- fputs(" locks gap before rec", file);
- }
-
- if (lock_rec_get_rec_not_gap(lock)) {
- fputs(" locks rec but not gap", file);
- }
-
- if (lock_rec_get_insert_intention(lock)) {
- fputs(" insert intention", file);
- }
-
- if (lock_get_wait(lock)) {
- fputs(" waiting", file);
- }
-
- mtr_start(&mtr);
-
- putc('\n', file);
-
- block = buf_page_try_get(space, page_no, &mtr);
-
- if (block) {
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
-
- if (lock_rec_get_nth_bit(lock, i)) {
-
- const rec_t* rec
- = page_find_rec_with_heap_no(
- buf_block_get_frame(block), i);
- offsets = rec_get_offsets(
- rec, lock->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- fprintf(file, "Record lock, heap no %lu ",
- (ulong) i);
- rec_print_new(file, rec, offsets);
- putc('\n', file);
- }
- }
- } else {
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
- fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
- }
- }
-
- mtr_commit(&mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-#ifdef UNIV_DEBUG
-/* Print the number of lock structs from lock_print_info_summary() only
-in non-production builds for performance reasons, see
-http://bugs.mysql.com/36942 */
-#define PRINT_NUM_OF_LOCK_STRUCTS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_NUM_OF_LOCK_STRUCTS
-/*********************************************************************//**
-Calculates the number of record lock structs in the record lock hash table.
-@return number of record locks */
-static
-ulint
-lock_get_n_rec_locks(void)
-/*======================*/
-{
- lock_t* lock;
- ulint n_locks = 0;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
-
- lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
-
- while (lock) {
- n_locks++;
-
- lock = HASH_GET_NEXT(hash, lock);
- }
- }
-
- return(n_locks);
-}
-#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
-
-/*********************************************************************//**
-Prints info of locks for all transactions. */
-UNIV_INTERN
-void
-lock_print_info_summary(
-/*====================*/
- FILE* file) /*!< in: file where to print */
-{
- /* We must protect the MySQL thd->query field with a MySQL mutex, and
- because the MySQL mutex must be reserved before the kernel_mutex of
- InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */
-
- innobase_mysql_prepare_print_arbitrary_thd();
- lock_mutex_enter_kernel();
-
- if (lock_deadlock_found) {
- fputs("------------------------\n"
- "LATEST DETECTED DEADLOCK\n"
- "------------------------\n", file);
-
- ut_copy_file(file, lock_latest_err_file);
- }
-
- fputs("------------\n"
- "TRANSACTIONS\n"
- "------------\n", file);
-
- fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
-
- fprintf(file,
- "Purge done for trx's n:o < " TRX_ID_FMT
- " undo n:o < " TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(purge_sys->purge_trx_no),
- TRX_ID_PREP_PRINTF(purge_sys->purge_undo_no));
-
- fprintf(file,
- "History list length %lu\n",
- (ulong) trx_sys->rseg_history_len);
-
-#ifdef PRINT_NUM_OF_LOCK_STRUCTS
- fprintf(file,
- "Total number of lock structs in row lock hash table %lu\n",
- (ulong) lock_get_n_rec_locks());
-#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
-}
-
-/*********************************************************************//**
-Prints info of locks for each transaction. */
-UNIV_INTERN
-void
-lock_print_info_all_transactions(
-/*=============================*/
- FILE* file) /*!< in: file where to print */
-{
- lock_t* lock;
- ibool load_page_first = TRUE;
- ulint nth_trx = 0;
- ulint nth_lock = 0;
- ulint i;
- mtr_t mtr;
- trx_t* trx;
-
- fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
-
- /* First print info on non-active transactions */
-
- trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
-
- while (trx) {
- if (trx->conc_state == TRX_NOT_STARTED) {
- fputs("---", file);
- trx_print(file, trx, 600);
- }
-
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
- }
-
-loop:
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- i = 0;
-
- /* Since we temporarily release the kernel mutex when
- reading a database page in below, variable trx may be
- obsolete now and we must loop through the trx list to
- get probably the same trx, or some other trx. */
-
- while (trx && (i < nth_trx)) {
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- i++;
- }
-
- if (trx == NULL) {
- lock_mutex_exit_kernel();
- innobase_mysql_end_print_arbitrary_thd();
-
- ut_ad(lock_validate());
-
- return;
- }
-
- if (nth_lock == 0) {
- fputs("---", file);
- trx_print(file, trx, 600);
-
- if (trx->read_view) {
- fprintf(file,
- "Trx read view will not see trx with"
- " id >= " TRX_ID_FMT
- ", sees < " TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(
- trx->read_view->low_limit_id),
- TRX_ID_PREP_PRINTF(
- trx->read_view->up_limit_id));
- }
-
- if (trx->que_state == TRX_QUE_LOCK_WAIT) {
- fprintf(file,
- "------- TRX HAS BEEN WAITING %lu SEC"
- " FOR THIS LOCK TO BE GRANTED:\n",
- (ulong) difftime(time(NULL),
- trx->wait_started));
-
- if (lock_get_type_low(trx->wait_lock) == LOCK_REC) {
- lock_rec_print(file, trx->wait_lock);
- } else {
- lock_table_print(file, trx->wait_lock);
- }
-
- fputs("------------------\n", file);
- }
- }
-
- if (!srv_print_innodb_lock_monitor) {
- nth_trx++;
- goto loop;
- }
-
- i = 0;
-
- /* Look at the note about the trx loop above why we loop here:
- lock may be an obsolete pointer now. */
-
- lock = UT_LIST_GET_FIRST(trx->trx_locks);
-
- while (lock && (i < nth_lock)) {
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- i++;
- }
-
- if (lock == NULL) {
- nth_trx++;
- nth_lock = 0;
-
- goto loop;
- }
-
- if (lock_get_type_low(lock) == LOCK_REC) {
- if (load_page_first) {
- ulint space = lock->un_member.rec_lock.space;
- ulint zip_size= fil_space_get_zip_size(space);
- ulint page_no = lock->un_member.rec_lock.page_no;
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
-
- /* It is a single table tablespace and
- the .ibd file is missing (TRUNCATE
- TABLE probably stole the locks): just
- print the lock without attempting to
- load the page in the buffer pool. */
-
- fprintf(file, "RECORD LOCKS on"
- " non-existing space %lu\n",
- (ulong) space);
- goto print_rec;
- }
-
- lock_mutex_exit_kernel();
- innobase_mysql_end_print_arbitrary_thd();
-
- mtr_start(&mtr);
-
- buf_page_get_with_no_latch(space, zip_size,
- page_no, &mtr);
-
- mtr_commit(&mtr);
-
- load_page_first = FALSE;
-
- innobase_mysql_prepare_print_arbitrary_thd();
- lock_mutex_enter_kernel();
-
- goto loop;
- }
-
-print_rec:
- lock_rec_print(file, lock);
- } else {
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-
- lock_table_print(file, lock);
- }
-
- load_page_first = TRUE;
-
- nth_lock++;
-
- if (nth_lock >= 10) {
- fputs("10 LOCKS PRINTED FOR THIS TRX:"
- " SUPPRESSING FURTHER PRINTS\n",
- file);
-
- nth_trx++;
- nth_lock = 0;
-
- goto loop;
- }
-
- goto loop;
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates the lock queue on a table.
-@return TRUE if ok */
-static
-ibool
-lock_table_queue_validate(
-/*======================*/
- dict_table_t* table) /*!< in: table */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = UT_LIST_GET_FIRST(table->locks);
-
- while (lock) {
- ut_a(((lock->trx)->conc_state == TRX_ACTIVE)
- || ((lock->trx)->conc_state == TRX_PREPARED)
- || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY));
-
- if (!lock_get_wait(lock)) {
-
- ut_a(!lock_table_other_has_incompatible(
- lock->trx, 0, table,
- lock_get_mode(lock)));
- } else {
-
- ut_a(lock_table_has_to_wait_in_queue(lock));
- }
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Validates the lock queue on a single record.
-@return TRUE if ok */
-static
-ibool
-lock_rec_queue_validate(
-/*====================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record to look at */
- dict_index_t* index, /*!< in: index, or NULL if not known */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- trx_t* impl_trx;
- lock_t* lock;
- ulint heap_no;
-
- ut_a(rec);
- ut_a(block->frame == page_align(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
-
- heap_no = page_rec_get_heap_no(rec);
-
- lock_mutex_enter_kernel();
-
- if (!page_rec_is_user_rec(rec)) {
-
- lock = lock_rec_get_first(block, heap_no);
-
- while (lock) {
- switch(lock->trx->conc_state) {
- case TRX_ACTIVE:
- case TRX_PREPARED:
- case TRX_COMMITTED_IN_MEMORY:
- break;
- default:
- ut_error;
- }
-
- ut_a(trx_in_trx_list(lock->trx));
-
- if (lock_get_wait(lock)) {
- ut_a(lock_rec_has_to_wait_in_queue(lock));
- }
-
- if (index) {
- ut_a(lock->index == index);
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-
- lock_mutex_exit_kernel();
-
- return(TRUE);
- }
-
- if (!index);
- else if (dict_index_is_clust(index)) {
-
- impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
-
- if (impl_trx
- && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
- block, heap_no, impl_trx)) {
-
- ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, impl_trx));
- }
- } else {
-
- /* The kernel mutex may get released temporarily in the
- next function call: we have to release lock table mutex
- to obey the latching order */
-
- /* If this thread is holding the file space latch
- (fil_space_t::latch), the following check WILL break
- latching order and may cause a deadlock of threads. */
-
- impl_trx = lock_sec_rec_some_has_impl_off_kernel(
- rec, index, offsets);
-
- if (impl_trx
- && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
- block, heap_no, impl_trx)) {
-
- ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, impl_trx));
- }
- }
-
- lock = lock_rec_get_first(block, heap_no);
-
- while (lock) {
- ut_a(lock->trx->conc_state == TRX_ACTIVE
- || lock->trx->conc_state == TRX_PREPARED
- || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
- ut_a(trx_in_trx_list(lock->trx));
-
- if (index) {
- ut_a(lock->index == index);
- }
-
- if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
-
- enum lock_mode mode;
-
- if (lock_get_mode(lock) == LOCK_S) {
- mode = LOCK_X;
- } else {
- mode = LOCK_S;
- }
- ut_a(!lock_rec_other_has_expl_req(
- mode, 0, 0, block, heap_no, lock->trx));
-
- } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
-
- ut_a(lock_rec_has_to_wait_in_queue(lock));
- }
-
- lock = lock_rec_get_next(heap_no, lock);
- }
-
- lock_mutex_exit_kernel();
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Validates the record lock queues on a page.
-@return TRUE if ok */
-static
-ibool
-lock_rec_validate_page(
-/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
-{
- dict_index_t* index;
- buf_block_t* block;
- const page_t* page;
- lock_t* lock;
- const rec_t* rec;
- ulint nth_lock = 0;
- ulint nth_bit = 0;
- ulint i;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- mtr_start(&mtr);
-
- ut_ad(zip_size != ULINT_UNDEFINED);
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- page = block->frame;
-
- lock_mutex_enter_kernel();
-loop:
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- if (!lock) {
- goto function_exit;
- }
-
- for (i = 0; i < nth_lock; i++) {
-
- lock = lock_rec_get_next_on_page(lock);
-
- if (!lock) {
- goto function_exit;
- }
- }
-
- ut_a(trx_in_trx_list(lock->trx));
- ut_a(lock->trx->conc_state == TRX_ACTIVE
- || lock->trx->conc_state == TRX_PREPARED
- || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
-
- for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
-
- if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
-
- index = lock->index;
- rec = page_find_rec_with_heap_no(page, i);
- ut_a(rec);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- fprintf(stderr,
- "Validating %lu %lu\n",
- (ulong) space, (ulong) page_no);
-
- lock_mutex_exit_kernel();
-
- /* If this thread is holding the file space
- latch (fil_space_t::latch), the following
- check WILL break the latching order and may
- cause a deadlock of threads. */
-
- lock_rec_queue_validate(block, rec, index, offsets);
-
- lock_mutex_enter_kernel();
-
- nth_bit = i + 1;
-
- goto loop;
- }
- }
-
- nth_bit = 0;
- nth_lock++;
-
- goto loop;
-
-function_exit:
- lock_mutex_exit_kernel();
-
- mtr_commit(&mtr);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(TRUE);
-}
-
-/*********************************************************************//**
-Validates the lock system.
-@return TRUE if ok */
-static
-ibool
-lock_validate(void)
-/*===============*/
-{
- lock_t* lock;
- trx_t* trx;
- dulint limit;
- ulint space;
- ulint page_no;
- ulint i;
-
- lock_mutex_enter_kernel();
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- lock = UT_LIST_GET_FIRST(trx->trx_locks);
-
- while (lock) {
- if (lock_get_type_low(lock) & LOCK_TABLE) {
-
- lock_table_queue_validate(
- lock->un_member.tab_lock.table);
- }
-
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
-
- limit = ut_dulint_zero;
-
- for (;;) {
- lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
-
- while (lock) {
- ut_a(trx_in_trx_list(lock->trx));
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- if (ut_dulint_cmp(
- ut_dulint_create(space, page_no),
- limit) >= 0) {
- break;
- }
-
- lock = HASH_GET_NEXT(hash, lock);
- }
-
- if (!lock) {
-
- break;
- }
-
- lock_mutex_exit_kernel();
-
- lock_rec_validate_page(space,
- fil_space_get_zip_size(space),
- page_no);
-
- lock_mutex_enter_kernel();
-
- limit = ut_dulint_create(space, page_no + 1);
- }
- }
-
- lock_mutex_exit_kernel();
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
-
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate insert of
-a record. If they do, first tests if the query thread should anyway
-be suspended for some reason; if not, then puts the transaction and
-the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_rec_insert_check_and_lock(
-/*===========================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
- set, does nothing */
- const rec_t* rec, /*!< in: record after which to insert */
- buf_block_t* block, /*!< in/out: buffer block of rec */
- dict_index_t* index, /*!< in: index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit)/*!< out: set to TRUE if the new
- inserted record maybe should inherit
- LOCK_GAP type locks from the successor
- record */
-{
- const rec_t* next_rec;
- trx_t* trx;
- lock_t* lock;
- ulint err;
- ulint next_rec_heap_no;
-
- ut_ad(block->frame == page_align(rec));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- trx = thr_get_trx(thr);
- next_rec = page_rec_get_next((rec_t*) rec);
- next_rec_heap_no = page_rec_get_heap_no(next_rec);
-
- lock_mutex_enter_kernel();
-
- /* When inserting a record into an index, the table must be at
- least IX-locked or we must be building an index, in which case
- the table must be at least S-locked. */
- ut_ad(lock_table_has(trx, index->table, LOCK_IX)
- || (*index->name == TEMP_INDEX_PREFIX
- && lock_table_has(trx, index->table, LOCK_S)));
-
- lock = lock_rec_get_first(block, next_rec_heap_no);
-
- if (UNIV_LIKELY(lock == NULL)) {
- /* We optimize CPU time usage in the simplest case */
-
- lock_mutex_exit_kernel();
-
- if (!dict_index_is_clust(index)) {
- /* Update the page max trx id field */
- page_update_max_trx_id(block,
- buf_block_get_page_zip(block),
- trx->id, mtr);
- }
-
- *inherit = FALSE;
-
- return(DB_SUCCESS);
- }
-
- *inherit = TRUE;
-
- /* If another transaction has an explicit lock request which locks
- the gap, waiting or granted, on the successor, the insert has to wait.
-
- An exception is the case where the lock by the another transaction
- is a gap type lock which it placed to wait for its turn to insert. We
- do not consider that kind of a lock conflicting with our insert. This
- eliminates an unnecessary deadlock which resulted when 2 transactions
- had to wait for their insert. Both had waiting gap type lock requests
- on the successor, which produced an unnecessary deadlock. */
-
- if (lock_rec_other_has_conflicting(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
- block, next_rec_heap_no, trx)) {
-
- /* Note that we may get DB_SUCCESS also here! */
- err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
- | LOCK_INSERT_INTENTION,
- block, next_rec_heap_no,
- index, thr);
- } else {
- err = DB_SUCCESS;
- }
-
- lock_mutex_exit_kernel();
-
- if ((err == DB_SUCCESS) && !dict_index_is_clust(index)) {
- /* Update the page max trx id field */
- page_update_max_trx_id(block,
- buf_block_get_page_zip(block),
- trx->id, mtr);
- }
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(next_rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
- ut_ad(lock_rec_queue_validate(block,
- next_rec, index, offsets));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
-
- return(err);
-}
-
-/*********************************************************************//**
-If a transaction has an implicit x-lock on a record, but no explicit x-lock
-set on the record, sets one for it. NOTE that in the case of a secondary
-index, the kernel mutex may get temporarily released. */
-static
-void
-lock_rec_convert_impl_to_expl(
-/*==========================*/
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record on page */
- dict_index_t* index, /*!< in: index of record */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- trx_t* impl_trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
-
- if (dict_index_is_clust(index)) {
- impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
- } else {
- impl_trx = lock_sec_rec_some_has_impl_off_kernel(
- rec, index, offsets);
- }
-
- if (impl_trx) {
- ulint heap_no = page_rec_get_heap_no(rec);
-
- /* If the transaction has no explicit x-lock set on the
- record, set one for it */
-
- if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
- heap_no, impl_trx)) {
-
- lock_rec_add_to_queue(
- LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, index, impl_trx);
- }
- }
-}
-
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate modify (update,
-delete mark, or delete unmark) of a clustered index record. If they do,
-first tests if the query thread should anyway be suspended for some
-reason; if not, then puts the transaction and the query thread to the
-lock wait state and inserts a waiting request for a record x-lock to the
-lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_clust_rec_modify_check_and_lock(
-/*=================================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record which should be
- modified */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
- ulint heap_no;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(dict_index_is_clust(index));
- ut_ad(block->frame == page_align(rec));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- heap_no = rec_offs_comp(offsets)
- ? rec_get_heap_no_new(rec)
- : rec_get_heap_no_old(rec);
-
- lock_mutex_enter_kernel();
-
- ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-
- /* If a transaction has no explicit x-lock set on the record, set one
- for it */
-
- lock_rec_convert_impl_to_expl(block, rec, index, offsets);
-
- err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, index, thr);
-
- lock_mutex_exit_kernel();
-
- ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate modify (delete
-mark or delete unmark) of a secondary index record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_sec_rec_modify_check_and_lock(
-/*===============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- buf_block_t* block, /*!< in/out: buffer block of rec */
- const rec_t* rec, /*!< in: record which should be
- modified; NOTE: as this is a secondary
- index, we always have to modify the
- clustered index record first: see the
- comment below */
- dict_index_t* index, /*!< in: secondary index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint err;
- ulint heap_no;
-
- ut_ad(!dict_index_is_clust(index));
- ut_ad(block->frame == page_align(rec));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- heap_no = page_rec_get_heap_no(rec);
-
- /* Another transaction cannot have an implicit lock on the record,
- because when we come here, we already have modified the clustered
- index record, and this would not have been possible if another active
- transaction had modified this secondary index record. */
-
- lock_mutex_enter_kernel();
-
- ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-
- err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, index, thr);
-
- lock_mutex_exit_kernel();
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
- ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
-
- if (err == DB_SUCCESS) {
- /* Update the page max trx id field */
- page_update_max_trx_id(block,
- buf_block_get_page_zip(block),
- thr_get_trx(thr)->id, mtr);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Like the counterpart for a clustered index below, but now we read a
-secondary index record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_sec_rec_read_check_and_lock(
-/*=============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
- ulint heap_no;
-
- ut_ad(!dict_index_is_clust(index));
- ut_ad(block->frame == page_align(rec));
- ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mode == LOCK_X || mode == LOCK_S);
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- heap_no = page_rec_get_heap_no(rec);
-
- lock_mutex_enter_kernel();
-
- ut_ad(mode != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad(mode != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-
- /* Some transaction may have an implicit x-lock on the record only
- if the max trx id for the page >= min trx id for the trx list or a
- database recovery is running. */
-
- if (((ut_dulint_cmp(page_get_max_trx_id(block->frame),
- trx_list_get_min_trx_id()) >= 0)
- || recv_recovery_is_on())
- && !page_rec_is_supremum(rec)) {
-
- lock_rec_convert_impl_to_expl(block, rec, index, offsets);
- }
-
- err = lock_rec_lock(FALSE, mode | gap_mode,
- block, heap_no, index, thr);
-
- lock_mutex_exit_kernel();
-
- ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_clust_rec_read_check_and_lock(
-/*===============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
- ulint heap_no;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(block->frame == page_align(rec));
- ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
- ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
- || gap_mode == LOCK_REC_NOT_GAP);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- heap_no = page_rec_get_heap_no(rec);
-
- lock_mutex_enter_kernel();
-
- ut_ad(mode != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad(mode != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-
- if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
-
- lock_rec_convert_impl_to_expl(block, rec, index, offsets);
- }
-
- err = lock_rec_lock(FALSE, mode | gap_mode,
- block, heap_no, index, thr);
-
- lock_mutex_exit_kernel();
-
- ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
-
- return(err);
-}
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. This is an alternative version of
-lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets".
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-ulint
-lock_clust_rec_read_check_and_lock_alt(
-/*===================================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: clustered index */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
-{
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ulint ret;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &tmp_heap);
- ret = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
- offsets, mode, gap_mode, thr);
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
- return(ret);
-}
-
-/*******************************************************************//**
-Release the last lock from the transaction's autoinc locks. */
-UNIV_INLINE
-void
-lock_release_autoinc_last_lock(
-/*===========================*/
- ib_vector_t* autoinc_locks) /*!< in/out: vector of AUTOINC locks */
-{
- ulint last;
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(!ib_vector_is_empty(autoinc_locks));
-
- /* The lock to be release must be the last lock acquired. */
- last = ib_vector_size(autoinc_locks) - 1;
- lock = ib_vector_get(autoinc_locks, last);
-
- /* Should have only AUTOINC locks in the vector. */
- ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
- ut_a(lock_get_type(lock) == LOCK_TABLE);
-
- ut_a(lock->un_member.tab_lock.table != NULL);
-
- /* This will remove the lock from the trx autoinc_locks too. */
- lock_table_dequeue(lock);
-}
-
-/*******************************************************************//**
-Check if a transaction holds any autoinc locks.
-@return TRUE if the transaction holds any AUTOINC locks. */
-UNIV_INTERN
-ibool
-lock_trx_holds_autoinc_locks(
-/*=========================*/
- const trx_t* trx) /*!< in: transaction */
-{
- ut_a(trx->autoinc_locks != NULL);
-
- return(!ib_vector_is_empty(trx->autoinc_locks));
-}
-
-/*******************************************************************//**
-Release all the transaction's autoinc locks. */
-UNIV_INTERN
-void
-lock_release_autoinc_locks(
-/*=======================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- ut_a(trx->autoinc_locks != NULL);
-
- /* We release the locks in the reverse order. This is to
- avoid searching the vector for the element to delete at
- the lower level. See (lock_table_remove_low()) for details. */
- while (!ib_vector_is_empty(trx->autoinc_locks)) {
-
- /* lock_table_remove_low() will also remove the lock from
- the transaction's autoinc_locks vector. */
- lock_release_autoinc_last_lock(trx->autoinc_locks);
- }
-
- /* Should release all locks. */
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
-}
-
-/*******************************************************************//**
-Gets the type of a lock. Non-inline version for using outside of the
-lock module.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
-ulint
-lock_get_type(
-/*==========*/
- const lock_t* lock) /*!< in: lock */
-{
- return(lock_get_type_low(lock));
-}
-
-/*******************************************************************//**
-Gets the id of the transaction owning a lock.
-@return transaction id */
-UNIV_INTERN
-ullint
-lock_get_trx_id(
-/*============*/
- const lock_t* lock) /*!< in: lock */
-{
- return(trx_get_id(lock->trx));
-}
-
-/*******************************************************************//**
-Gets the mode of a lock in a human readable string.
-The string should not be free()'d or modified.
-@return lock mode */
-UNIV_INTERN
-const char*
-lock_get_mode_str(
-/*==============*/
- const lock_t* lock) /*!< in: lock */
-{
- ibool is_gap_lock;
-
- is_gap_lock = lock_get_type_low(lock) == LOCK_REC
- && lock_rec_get_gap(lock);
-
- switch (lock_get_mode(lock)) {
- case LOCK_S:
- if (is_gap_lock) {
- return("S,GAP");
- } else {
- return("S");
- }
- case LOCK_X:
- if (is_gap_lock) {
- return("X,GAP");
- } else {
- return("X");
- }
- case LOCK_IS:
- if (is_gap_lock) {
- return("IS,GAP");
- } else {
- return("IS");
- }
- case LOCK_IX:
- if (is_gap_lock) {
- return("IX,GAP");
- } else {
- return("IX");
- }
- case LOCK_AUTO_INC:
- return("AUTO_INC");
- default:
- return("UNKNOWN");
- }
-}
-
-/*******************************************************************//**
-Gets the type of a lock in a human readable string.
-The string should not be free()'d or modified.
-@return lock type */
-UNIV_INTERN
-const char*
-lock_get_type_str(
-/*==============*/
- const lock_t* lock) /*!< in: lock */
-{
- switch (lock_get_type_low(lock)) {
- case LOCK_REC:
- return("RECORD");
- case LOCK_TABLE:
- return("TABLE");
- default:
- return("UNKNOWN");
- }
-}
-
-/*******************************************************************//**
-Gets the table on which the lock is.
-@return table */
-UNIV_INLINE
-dict_table_t*
-lock_get_table(
-/*===========*/
- const lock_t* lock) /*!< in: lock */
-{
- switch (lock_get_type_low(lock)) {
- case LOCK_REC:
- return(lock->index->table);
- case LOCK_TABLE:
- return(lock->un_member.tab_lock.table);
- default:
- ut_error;
- return(NULL);
- }
-}
-
-/*******************************************************************//**
-Gets the id of the table on which the lock is.
-@return id of the table */
-UNIV_INTERN
-ullint
-lock_get_table_id(
-/*==============*/
- const lock_t* lock) /*!< in: lock */
-{
- dict_table_t* table;
-
- table = lock_get_table(lock);
-
- return((ullint)ut_conv_dulint_to_longlong(table->id));
-}
-
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return name of the table */
-UNIV_INTERN
-const char*
-lock_get_table_name(
-/*================*/
- const lock_t* lock) /*!< in: lock */
-{
- dict_table_t* table;
-
- table = lock_get_table(lock);
-
- return(table->name);
-}
-
-/*******************************************************************//**
-For a record lock, gets the index on which the lock is.
-@return index */
-UNIV_INTERN
-const dict_index_t*
-lock_rec_get_index(
-/*===============*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->index);
-}
-
-/*******************************************************************//**
-For a record lock, gets the name of the index on which the lock is.
-The string should not be free()'d or modified.
-@return name of the index */
-UNIV_INTERN
-const char*
-lock_rec_get_index_name(
-/*====================*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->index->name);
-}
-
-/*******************************************************************//**
-For a record lock, gets the tablespace number on which the lock is.
-@return tablespace number */
-UNIV_INTERN
-ulint
-lock_rec_get_space_id(
-/*==================*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->un_member.rec_lock.space);
-}
-
-/*******************************************************************//**
-For a record lock, gets the page number on which the lock is.
-@return page number */
-UNIV_INTERN
-ulint
-lock_rec_get_page_no(
-/*=================*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->un_member.rec_lock.page_no);
-}
diff --git a/storage/innodb_plugin/log/log0log.c b/storage/innodb_plugin/log/log0log.c
deleted file mode 100644
index d5b696074b3..00000000000
--- a/storage/innodb_plugin/log/log0log.c
+++ /dev/null
@@ -1,3467 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file log/log0log.c
-Database log
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#include "log0log.h"
-
-#ifdef UNIV_NONINL
-#include "log0log.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#include "mem0mem.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "srv0srv.h"
-#include "log0recv.h"
-#include "fil0fil.h"
-#include "dict0boot.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
-
-/*
-General philosophy of InnoDB redo-logs:
-
-1) Every change to a contents of a data page must be done
-through mtr, which in mtr_commit() writes log records
-to the InnoDB redo log.
-
-2) Normally these changes are performed using a mlog_write_ulint()
-or similar function.
-
-3) In some page level operations only a code number of a
-c-function and its parameters are written to the log to
-reduce the size of the log.
-
- 3a) You should not add parameters to these kind of functions
- (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
-
- 3b) You should not add such functionality which either change
- working when compared with the old or are dependent on data
- outside of the page. These kind of functions should implement
- self-contained page transformation and it should be unchanged
- if you don't have very essential reasons to change log
- semantics or format.
-
-*/
-
-/* Current free limit of space 0; protected by the log sys mutex; 0 means
-uninitialized */
-UNIV_INTERN ulint log_fsp_current_free_limit = 0;
-
-/* Global log system variable */
-UNIV_INTERN log_t* log_sys = NULL;
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
-
-/* These control how often we print warnings if the last checkpoint is too
-old */
-UNIV_INTERN ibool log_has_printed_chkp_warning = FALSE;
-UNIV_INTERN time_t log_last_warning_time;
-
-#ifdef UNIV_LOG_ARCHIVE
-/* Pointer to this variable is used as the i/o-message when we do i/o to an
-archive */
-UNIV_INTERN byte log_archive_io;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/* A margin for free space in the log buffer before a log entry is catenated */
-#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
-
-/* Margins for free space in the log buffer after a log entry is catenated */
-#define LOG_BUF_FLUSH_RATIO 2
-#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
-
-/* Margin for the free space in the smallest log group, before a new query
-step which modifies the database, is started */
-
-#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
-#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
-
-/* This parameter controls asynchronous making of a new checkpoint; the value
-should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
-
-#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
-
-/* This parameter controls synchronous preflushing of modified buffer pages */
-#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
-
-/* The same ratio for asynchronous preflushing; this value should be less than
-the previous */
-#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
-
-/* Extra margin, in addition to one log file, used in archiving */
-#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
-
-/* This parameter controls asynchronous writing to the archive */
-#define LOG_ARCHIVE_RATIO_ASYNC 16
-
-/* Codes used in unlocking flush latches */
-#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
-#define LOG_UNLOCK_FLUSH_LOCK 2
-
-/* States of an archiving operation */
-#define LOG_ARCHIVE_READ 1
-#define LOG_ARCHIVE_WRITE 2
-
-/******************************************************//**
-Completes a checkpoint write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void);
-/*============================*/
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void);
-/*=========================*/
-#endif /* UNIV_LOG_ARCHIVE */
-
-/****************************************************************//**
-Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
-so that we know that the limit has been written to a log checkpoint field
-on disk. */
-UNIV_INTERN
-void
-log_fsp_current_free_limit_set_and_checkpoint(
-/*==========================================*/
- ulint limit) /*!< in: limit to set */
-{
- ibool success;
-
- mutex_enter(&(log_sys->mutex));
-
- log_fsp_current_free_limit = limit;
-
- mutex_exit(&(log_sys->mutex));
-
- /* Try to make a synchronous checkpoint */
-
- success = FALSE;
-
- while (!success) {
- success = log_checkpoint(TRUE, TRUE);
- }
-}
-
-/****************************************************************//**
-Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
-exists.
-@return LSN of oldest modification */
-static
-ib_uint64_t
-log_buf_pool_get_oldest_modification(void)
-/*======================================*/
-{
- ib_uint64_t lsn;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- lsn = buf_pool_get_oldest_modification();
-
- if (!lsn) {
-
- lsn = log_sys->lsn;
- }
-
- return(lsn);
-}
-
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release.
-@return start lsn of the log record */
-UNIV_INTERN
-ib_uint64_t
-log_reserve_and_open(
-/*=================*/
- ulint len) /*!< in: length of data to be catenated */
-{
- log_t* log = log_sys;
- ulint len_upper_limit;
-#ifdef UNIV_LOG_ARCHIVE
- ulint archived_lsn_age;
- ulint dummy;
-#endif /* UNIV_LOG_ARCHIVE */
-#ifdef UNIV_DEBUG
- ulint count = 0;
-#endif /* UNIV_DEBUG */
-
- ut_a(len < log->buf_size / 2);
-loop:
- mutex_enter(&(log->mutex));
- ut_ad(!recv_no_log_write);
-
- /* Calculate an upper limit for the space the string may take in the
- log buffer */
-
- len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
-
- if (log->buf_free + len_upper_limit > log->buf_size) {
-
- mutex_exit(&(log->mutex));
-
- /* Not enough free space, do a syncronous flush of the log
- buffer */
-
- log_buffer_flush_to_disk();
-
- srv_log_waits++;
-
- ut_ad(++count < 50);
-
- goto loop;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (log->archiving_state != LOG_ARCH_OFF) {
-
- archived_lsn_age = log->lsn - log->archived_lsn;
- if (archived_lsn_age + len_upper_limit
- > log->max_archived_lsn_age) {
- /* Not enough free archived space in log groups: do a
- synchronous archive write batch: */
-
- mutex_exit(&(log->mutex));
-
- ut_ad(len_upper_limit <= log->max_archived_lsn_age);
-
- log_archive_do(TRUE, &dummy);
-
- ut_ad(++count < 50);
-
- goto loop;
- }
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifdef UNIV_LOG_DEBUG
- log->old_buf_free = log->buf_free;
- log->old_lsn = log->lsn;
-#endif
- return(log->lsn);
-}
-
-/************************************************************//**
-Writes to the log the string given. It is assumed that the caller holds the
-log mutex. */
-UNIV_INTERN
-void
-log_write_low(
-/*==========*/
- byte* str, /*!< in: string */
- ulint str_len) /*!< in: string length */
-{
- log_t* log = log_sys;
- ulint len;
- ulint data_len;
- byte* log_block;
-
- ut_ad(mutex_own(&(log->mutex)));
-part_loop:
- ut_ad(!recv_no_log_write);
- /* Calculate a part length */
-
- data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
-
- if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
-
- /* The string fits within the current log block */
-
- len = str_len;
- } else {
- data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
-
- len = OS_FILE_LOG_BLOCK_SIZE
- - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_TRL_SIZE;
- }
-
- ut_memcpy(log->buf + log->buf_free, str, len);
-
- str_len -= len;
- str = str + len;
-
- log_block = ut_align_down(log->buf + log->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_data_len(log_block, data_len);
-
- if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
- /* This block became full */
- log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_checkpoint_no(log_block,
- log_sys->next_checkpoint_no);
- len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
-
- log->lsn += len;
-
- /* Initialize the next block header */
- log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
- } else {
- log->lsn += len;
- }
-
- log->buf_free += len;
-
- ut_ad(log->buf_free <= log->buf_size);
-
- if (str_len > 0) {
- goto part_loop;
- }
-
- srv_log_write_requests++;
-}
-
-/************************************************************//**
-Closes the log.
-@return lsn */
-UNIV_INTERN
-ib_uint64_t
-log_close(void)
-/*===========*/
-{
- byte* log_block;
- ulint first_rec_group;
- ib_uint64_t oldest_lsn;
- ib_uint64_t lsn;
- log_t* log = log_sys;
- ib_uint64_t checkpoint_age;
-
- ut_ad(mutex_own(&(log->mutex)));
- ut_ad(!recv_no_log_write);
-
- lsn = log->lsn;
-
- log_block = ut_align_down(log->buf + log->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
- first_rec_group = log_block_get_first_rec_group(log_block);
-
- if (first_rec_group == 0) {
- /* We initialized a new log block which was not written
- full by the current mtr: the next mtr log record group
- will start within this block at the offset data_len */
-
- log_block_set_first_rec_group(
- log_block, log_block_get_data_len(log_block));
- }
-
- if (log->buf_free > log->max_buf_free) {
-
- log->check_flush_or_checkpoint = TRUE;
- }
-
- checkpoint_age = lsn - log->last_checkpoint_lsn;
-
- if (checkpoint_age >= log->log_group_capacity) {
- /* TODO: split btr_store_big_rec_extern_fields() into small
- steps so that we can release all latches in the middle, and
- call log_free_check() to ensure we never write over log written
- after the latest checkpoint. In principle, we should split all
- big_rec operations, but other operations are smaller. */
-
- if (!log_has_printed_chkp_warning
- || difftime(time(NULL), log_last_warning_time) > 15) {
-
- log_has_printed_chkp_warning = TRUE;
- log_last_warning_time = time(NULL);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: the age of the last"
- " checkpoint is %lu,\n"
- "InnoDB: which exceeds the log group"
- " capacity %lu.\n"
- "InnoDB: If you are using big"
- " BLOB or TEXT rows, you must set the\n"
- "InnoDB: combined size of log files"
- " at least 10 times bigger than the\n"
- "InnoDB: largest such row.\n",
- (ulong) checkpoint_age,
- (ulong) log->log_group_capacity);
- }
- }
-
- if (checkpoint_age <= log->max_modified_age_async) {
-
- goto function_exit;
- }
-
- oldest_lsn = buf_pool_get_oldest_modification();
-
- if (!oldest_lsn
- || lsn - oldest_lsn > log->max_modified_age_async
- || checkpoint_age > log->max_checkpoint_age_async) {
-
- log->check_flush_or_checkpoint = TRUE;
- }
-function_exit:
-
-#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log->buf + log->old_buf_free,
- log->buf_free - log->old_buf_free, log->old_lsn);
-#endif
-
- return(lsn);
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Pads the current log block full with dummy log records. Used in producing
-consistent archived log files. */
-static
-void
-log_pad_current_log_block(void)
-/*===========================*/
-{
- byte b = MLOG_DUMMY_RECORD;
- ulint pad_length;
- ulint i;
- ib_uint64_t lsn;
-
- /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
- lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
-
- pad_length = OS_FILE_LOG_BLOCK_SIZE
- - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_TRL_SIZE;
-
- for (i = 0; i < pad_length; i++) {
- log_write_low(&b, 1);
- }
-
- lsn = log_sys->lsn;
-
- log_close();
- log_release();
-
- ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-/******************************************************//**
-Calculates the data capacity of a log group, when the log file headers are not
-included.
-@return capacity in bytes */
-UNIV_INTERN
-ulint
-log_group_get_capacity(
-/*===================*/
- const log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
-}
-
-/******************************************************//**
-Calculates the offset within a log group, when the log file headers are not
-included.
-@return size offset (<= offset) */
-UNIV_INLINE
-ulint
-log_group_calc_size_offset(
-/*=======================*/
- ulint offset, /*!< in: real offset within the
- log group */
- const log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
-}
-
-/******************************************************//**
-Calculates the offset within a log group, when the log file headers are
-included.
-@return real offset (>= offset) */
-UNIV_INLINE
-ulint
-log_group_calc_real_offset(
-/*=======================*/
- ulint offset, /*!< in: size offset within the
- log group */
- const log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return(offset + LOG_FILE_HDR_SIZE
- * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
-}
-
-/******************************************************//**
-Calculates the offset of an lsn within a log group.
-@return offset within the log group */
-static
-ulint
-log_group_calc_lsn_offset(
-/*======================*/
- ib_uint64_t lsn, /*!< in: lsn, must be within 4 GB of
- group->lsn */
- const log_group_t* group) /*!< in: log group */
-{
- ib_uint64_t gr_lsn;
- ib_int64_t gr_lsn_size_offset;
- ib_int64_t difference;
- ib_int64_t group_size;
- ib_int64_t offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- /* If total log file size is > 2 GB we can easily get overflows
- with 32-bit integers. Use 64-bit integers instead. */
-
- gr_lsn = group->lsn;
-
- gr_lsn_size_offset = (ib_int64_t)
- log_group_calc_size_offset(group->lsn_offset, group);
-
- group_size = (ib_int64_t) log_group_get_capacity(group);
-
- if (lsn >= gr_lsn) {
-
- difference = (ib_int64_t) (lsn - gr_lsn);
- } else {
- difference = (ib_int64_t) (gr_lsn - lsn);
-
- difference = difference % group_size;
-
- difference = group_size - difference;
- }
-
- offset = (gr_lsn_size_offset + difference) % group_size;
-
- ut_a(offset < (((ib_int64_t) 1) << 32)); /* offset must be < 4 GB */
-
- /* fprintf(stderr,
- "Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
- (ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
- */
-
- return(log_group_calc_real_offset((ulint)offset, group));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool log_debug_writes = FALSE;
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Calculates where in log files we find a specified lsn.
-@return log file number */
-UNIV_INTERN
-ulint
-log_calc_where_lsn_is(
-/*==================*/
- ib_int64_t* log_file_offset, /*!< out: offset in that file
- (including the header) */
- ib_uint64_t first_header_lsn, /*!< in: first log file start
- lsn */
- ib_uint64_t lsn, /*!< in: lsn whose position to
- determine */
- ulint n_log_files, /*!< in: total number of log
- files */
- ib_int64_t log_file_size) /*!< in: log file size
- (including the header) */
-{
- ib_int64_t capacity = log_file_size - LOG_FILE_HDR_SIZE;
- ulint file_no;
- ib_int64_t add_this_many;
-
- if (lsn < first_header_lsn) {
- add_this_many = 1 + (first_header_lsn - lsn)
- / (capacity * (ib_int64_t)n_log_files);
- lsn += add_this_many
- * capacity * (ib_int64_t)n_log_files;
- }
-
- ut_a(lsn >= first_header_lsn);
-
- file_no = ((ulint)((lsn - first_header_lsn) / capacity))
- % n_log_files;
- *log_file_offset = (lsn - first_header_lsn) % capacity;
-
- *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
-
- return(file_no);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
-void
-log_group_set_fields(
-/*=================*/
- log_group_t* group, /*!< in/out: group */
- ib_uint64_t lsn) /*!< in: lsn for which the values should be
- set */
-{
- group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
- group->lsn = lsn;
-}
-
-/*****************************************************************//**
-Calculates the recommended highest values for lsn - last_checkpoint_lsn,
-lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age.
-@return error value FALSE if the smallest log group is too small to
-accommodate the number of OS threads in the database server */
-static
-ibool
-log_calc_max_ages(void)
-/*===================*/
-{
- log_group_t* group;
- ulint margin;
- ulint free;
- ibool success = TRUE;
- ulint smallest_capacity;
- ulint archive_margin;
- ulint smallest_archive_margin;
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- ut_ad(group);
-
- smallest_capacity = ULINT_MAX;
- smallest_archive_margin = ULINT_MAX;
-
- while (group) {
- if (log_group_get_capacity(group) < smallest_capacity) {
-
- smallest_capacity = log_group_get_capacity(group);
- }
-
- archive_margin = log_group_get_capacity(group)
- - (group->file_size - LOG_FILE_HDR_SIZE)
- - LOG_ARCHIVE_EXTRA_MARGIN;
-
- if (archive_margin < smallest_archive_margin) {
-
- smallest_archive_margin = archive_margin;
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- /* Add extra safety */
- smallest_capacity = smallest_capacity - smallest_capacity / 10;
-
- /* For each OS thread we must reserve so much free space in the
- smallest log group that it can accommodate the log entries produced
- by single query steps: running out of free log space is a serious
- system error which requires rebooting the database. */
-
- free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
- + LOG_CHECKPOINT_EXTRA_FREE;
- if (free >= smallest_capacity / 2) {
- success = FALSE;
-
- goto failure;
- } else {
- margin = smallest_capacity - free;
- }
-
- margin = ut_min(margin, log_sys->adm_checkpoint_interval);
-
- margin = margin - margin / 10; /* Add still some extra safety */
-
- log_sys->log_group_capacity = smallest_capacity;
-
- log_sys->max_modified_age_async = margin
- - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
- log_sys->max_modified_age_sync = margin
- - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
-
- log_sys->max_checkpoint_age_async = margin - margin
- / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
- log_sys->max_checkpoint_age = margin;
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->max_archived_lsn_age = smallest_archive_margin;
-
- log_sys->max_archived_lsn_age_async = smallest_archive_margin
- - smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
-#endif /* UNIV_LOG_ARCHIVE */
-failure:
- mutex_exit(&(log_sys->mutex));
-
- if (!success) {
- fprintf(stderr,
- "InnoDB: Error: ib_logfiles are too small"
- " for innodb_thread_concurrency %lu.\n"
- "InnoDB: The combined size of ib_logfiles"
- " should be bigger than\n"
- "InnoDB: 200 kB * innodb_thread_concurrency.\n"
- "InnoDB: To get mysqld to start up, set"
- " innodb_thread_concurrency in my.cnf\n"
- "InnoDB: to a lower value, for example, to 8."
- " After an ERROR-FREE shutdown\n"
- "InnoDB: of mysqld you can adjust the size of"
- " ib_logfiles, as explained in\n"
- "InnoDB: " REFMAN "adding-and-removing.html\n"
- "InnoDB: Cannot continue operation."
- " Calling exit(1).\n",
- (ulong)srv_thread_concurrency);
-
- exit(1);
- }
-
- return(success);
-}
-
-/******************************************************//**
-Initializes the log. */
-UNIV_INTERN
-void
-log_init(void)
-/*==========*/
-{
- log_sys = mem_alloc(sizeof(log_t));
-
- mutex_create(&log_sys->mutex, SYNC_LOG);
-
- mutex_enter(&(log_sys->mutex));
-
- /* Start the lsn from one log block from zero: this way every
- log record has a start lsn != zero, a fact which we will use */
-
- log_sys->lsn = LOG_START_LSN;
-
- ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
- ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
-
- log_sys->buf_ptr = mem_alloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- log_sys->buf = ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_size = LOG_BUFFER_SIZE;
-
- memset(log_sys->buf, '\0', LOG_BUFFER_SIZE);
-
- log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
- - LOG_BUF_FLUSH_MARGIN;
- log_sys->check_flush_or_checkpoint = TRUE;
- UT_LIST_INIT(log_sys->log_groups);
-
- log_sys->n_log_ios = 0;
-
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = time(NULL);
- /*----------------------------*/
-
- log_sys->buf_next_to_write = 0;
-
- log_sys->write_lsn = 0;
- log_sys->current_flush_lsn = 0;
- log_sys->flushed_to_disk_lsn = 0;
-
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->n_pending_writes = 0;
-
- log_sys->no_flush_event = os_event_create(NULL);
-
- os_event_set(log_sys->no_flush_event);
-
- log_sys->one_flushed_event = os_event_create(NULL);
-
- os_event_set(log_sys->one_flushed_event);
-
- /*----------------------------*/
- log_sys->adm_checkpoint_interval = ULINT_MAX;
-
- log_sys->next_checkpoint_no = 0;
- log_sys->last_checkpoint_lsn = log_sys->lsn;
- log_sys->n_pending_checkpoint_writes = 0;
-
- rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK);
-
- log_sys->checkpoint_buf_ptr = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
- log_sys->checkpoint_buf = ut_align(log_sys->checkpoint_buf_ptr,
- OS_FILE_LOG_BLOCK_SIZE);
- memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
- /*----------------------------*/
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Under MySQL, log archiving is always off */
- log_sys->archiving_state = LOG_ARCH_OFF;
- log_sys->archived_lsn = log_sys->lsn;
- log_sys->next_archived_lsn = 0;
-
- log_sys->n_pending_archive_ios = 0;
-
- rw_lock_create(&log_sys->archive_lock, SYNC_NO_ORDER_CHECK);
-
- log_sys->archive_buf = NULL;
-
- /* ut_align(
- ut_malloc(LOG_ARCHIVE_BUF_SIZE
- + OS_FILE_LOG_BLOCK_SIZE),
- OS_FILE_LOG_BLOCK_SIZE); */
- log_sys->archive_buf_size = 0;
-
- /* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
-
- log_sys->archiving_on = os_event_create(NULL);
-#endif /* UNIV_LOG_ARCHIVE */
-
- /*----------------------------*/
-
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
-
- mutex_exit(&(log_sys->mutex));
-
-#ifdef UNIV_LOG_DEBUG
- recv_sys_create();
- recv_sys_init(buf_pool_get_curr_size());
-
- recv_sys->parse_start_lsn = log_sys->lsn;
- recv_sys->scanned_lsn = log_sys->lsn;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = log_sys->lsn;
- recv_sys->limit_lsn = IB_ULONGLONG_MAX;
-#endif
-}
-
-/******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
-void
-log_group_init(
-/*===========*/
- ulint id, /*!< in: group id */
- ulint n_files, /*!< in: number of log files */
- ulint file_size, /*!< in: log file size in bytes */
- ulint space_id, /*!< in: space id of the file space
- which contains the log files of this
- group */
- ulint archive_space_id __attribute__((unused)))
- /*!< in: space id of the file space
- which contains some archived log
- files for this group; currently, only
- for the first log group this is
- used */
-{
- ulint i;
-
- log_group_t* group;
-
- group = mem_alloc(sizeof(log_group_t));
-
- group->id = id;
- group->n_files = n_files;
- group->file_size = file_size;
- group->space_id = space_id;
- group->state = LOG_GROUP_OK;
- group->lsn = LOG_START_LSN;
- group->lsn_offset = LOG_FILE_HDR_SIZE;
- group->n_pending_writes = 0;
-
- group->file_header_bufs_ptr = mem_alloc(sizeof(byte*) * n_files);
- group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_file_header_bufs_ptr = mem_alloc(
- sizeof(byte*) * n_files);
- group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < n_files; i++) {
- group->file_header_bufs_ptr[i] = mem_alloc(
- LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
-
- group->file_header_bufs[i] = ut_align(
- group->file_header_bufs_ptr[i],
- OS_FILE_LOG_BLOCK_SIZE);
-
- memset(*(group->file_header_bufs + i), '\0',
- LOG_FILE_HDR_SIZE);
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_file_header_bufs_ptr[i] = mem_alloc(
- LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
-
- group->archive_file_header_bufs[i] = ut_align(
- group->archive_file_header_bufs_ptr[i],
- OS_FILE_LOG_BLOCK_SIZE);
-
- memset(*(group->archive_file_header_bufs + i), '\0',
- LOG_FILE_HDR_SIZE);
-#endif /* UNIV_LOG_ARCHIVE */
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_space_id = archive_space_id;
-
- group->archived_file_no = 0;
- group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
-
- group->checkpoint_buf_ptr = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
- group->checkpoint_buf = ut_align(group->checkpoint_buf_ptr,
- OS_FILE_LOG_BLOCK_SIZE);
-
- memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
-
- UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
-
- ut_a(log_calc_max_ages());
-}
-
-/******************************************************************//**
-Does the unlockings needed in flush i/o completion. */
-UNIV_INLINE
-void
-log_flush_do_unlocks(
-/*=================*/
- ulint code) /*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
- and LOG_UNLOCK_NONE_FLUSHED_LOCK */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- /* NOTE that we must own the log mutex when doing the setting of the
- events: this is because transactions will wait for these events to
- be set, and at that moment the log flush they were waiting for must
- have ended. If the log mutex were not reserved here, the i/o-thread
- calling this function might be preempted for a while, and when it
- resumed execution, it might be that a new flush had been started, and
- this function would erroneously signal the NEW flush as completed.
- Thus, the changes in the state of these events are performed
- atomically in conjunction with the changes in the state of
- log_sys->n_pending_writes etc. */
-
- if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
- os_event_set(log_sys->one_flushed_event);
- }
-
- if (code & LOG_UNLOCK_FLUSH_LOCK) {
- os_event_set(log_sys->no_flush_event);
- }
-}
-
-/******************************************************************//**
-Checks if a flush is completed for a log group and does the completion
-routine if yes.
-@return LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
-UNIV_INLINE
-ulint
-log_group_check_flush_completion(
-/*=============================*/
- log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (!log_sys->one_flushed && group->n_pending_writes == 0) {
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Log flushed first to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- log_sys->written_to_some_lsn = log_sys->write_lsn;
- log_sys->one_flushed = TRUE;
-
- return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes && (group->n_pending_writes == 0)) {
-
- fprintf(stderr, "Log flushed to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- return(0);
-}
-
-/******************************************************//**
-Checks if a flush is completed and does the completion routine if yes.
-@return LOG_UNLOCK_FLUSH_LOCK or 0 */
-static
-ulint
-log_sys_check_flush_completion(void)
-/*================================*/
-{
- ulint move_start;
- ulint move_end;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->n_pending_writes == 0) {
-
- log_sys->written_to_all_lsn = log_sys->write_lsn;
- log_sys->buf_next_to_write = log_sys->write_end_offset;
-
- if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
- /* Move the log buffer content to the start of the
- buffer */
-
- move_start = ut_calc_align_down(
- log_sys->write_end_offset,
- OS_FILE_LOG_BLOCK_SIZE);
- move_end = ut_calc_align(log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
-
- ut_memmove(log_sys->buf, log_sys->buf + move_start,
- move_end - move_start);
- log_sys->buf_free -= move_start;
-
- log_sys->buf_next_to_write -= move_start;
- }
-
- return(LOG_UNLOCK_FLUSH_LOCK);
- }
-
- return(0);
-}
-
-/******************************************************//**
-Completes an i/o to a log file. */
-UNIV_INTERN
-void
-log_io_complete(
-/*============*/
- log_group_t* group) /*!< in: log group or a dummy pointer */
-{
- ulint unlock;
-
-#ifdef UNIV_LOG_ARCHIVE
- if ((byte*)group == &log_archive_io) {
- /* It was an archive write */
-
- log_io_complete_archive();
-
- return;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if ((ulint)group & 0x1UL) {
- /* It was a checkpoint write */
- group = (log_group_t*)((ulint)group - 1);
-
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
-
- fil_flush(group->space_id);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Checkpoint info written to group %lu\n",
- group->id);
- }
-#endif /* UNIV_DEBUG */
- log_io_complete_checkpoint();
-
- return;
- }
-
- ut_error; /*!< We currently use synchronous writing of the
- logs and cannot end up here! */
-
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && srv_flush_log_at_trx_commit != 2) {
-
- fil_flush(group->space_id);
- }
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- ut_a(group->n_pending_writes > 0);
- ut_a(log_sys->n_pending_writes > 0);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
-
- unlock = log_group_check_flush_completion(group);
- unlock = unlock | log_sys_check_flush_completion();
-
- log_flush_do_unlocks(unlock);
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/******************************************************//**
-Writes a log file header to a log file space. */
-static
-void
-log_group_file_header_flush(
-/*========================*/
- log_group_t* group, /*!< in: log group */
- ulint nth_file, /*!< in: header to the nth file in the
- log file space */
- ib_uint64_t start_lsn) /*!< in: log file data starts at this
- lsn */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(!recv_no_log_write);
- ut_a(nth_file < group->n_files);
-
- buf = *(group->file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
-
- /* Wipe over possible label of ibbackup --restore */
- memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
-
- dest_offset = nth_file * group->file_size;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Writing log file header to group %lu file %lu\n",
- (ulong) group->id, (ulong) nth_file);
- }
-#endif /* UNIV_DEBUG */
- if (log_do_write) {
- log_sys->n_log_ios++;
-
- srv_os_log_pending_writes++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf, group);
-
- srv_os_log_pending_writes--;
- }
-}
-
-/******************************************************//**
-Stores a 4-byte checksum to the trailer checksum field of a log block
-before writing it to a log file. This checksum is used in recovery to
-check the consistency of a log block. */
-static
-void
-log_block_store_checksum(
-/*=====================*/
- byte* block) /*!< in/out: pointer to a log block */
-{
- log_block_set_checksum(block, log_block_calc_checksum(block));
-}
-
-/******************************************************//**
-Writes a buffer to a log file group. */
-UNIV_INTERN
-void
-log_group_write_buf(
-/*================*/
- log_group_t* group, /*!< in: log group */
- byte* buf, /*!< in: buffer */
- ulint len, /*!< in: buffer len; must be divisible
- by OS_FILE_LOG_BLOCK_SIZE */
- ib_uint64_t start_lsn, /*!< in: start lsn of the buffer; must
- be divisible by
- OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset)/*!< in: start offset of new data in
- buf: this parameter is used to decide
- if we have to write a new log file
- header */
-{
- ulint write_len;
- ibool write_header;
- ulint next_offset;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(!recv_no_log_write);
- ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a(((ulint) start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- if (new_data_offset == 0) {
- write_header = TRUE;
- } else {
- write_header = FALSE;
- }
-loop:
- if (len == 0) {
-
- return;
- }
-
- next_offset = log_group_calc_lsn_offset(start_lsn, group);
-
- if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
- && write_header) {
- /* We start to write a new log file instance in the group */
-
- log_group_file_header_flush(group,
- next_offset / group->file_size,
- start_lsn);
- srv_os_log_written+= OS_FILE_LOG_BLOCK_SIZE;
- srv_log_writes++;
- }
-
- if ((next_offset % group->file_size) + len > group->file_size) {
-
- write_len = group->file_size
- - (next_offset % group->file_size);
- } else {
- write_len = len;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
-
- fprintf(stderr,
- "Writing log file segment to group %lu"
- " offset %lu len %lu\n"
- "start lsn %llu\n"
- "First block n:o %lu last block n:o %lu\n",
- (ulong) group->id, (ulong) next_offset,
- (ulong) write_len,
- start_lsn,
- (ulong) log_block_get_hdr_no(buf),
- (ulong) log_block_get_hdr_no(
- buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
- ut_a(log_block_get_hdr_no(buf)
- == log_block_convert_lsn_to_no(start_lsn));
-
- for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
-
- ut_a(log_block_get_hdr_no(buf) + i
- == log_block_get_hdr_no(
- buf + i * OS_FILE_LOG_BLOCK_SIZE));
- }
- }
-#endif /* UNIV_DEBUG */
- /* Calculate the checksums for each log block and write them to
- the trailer fields of the log blocks */
-
- for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
- log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
- }
-
- if (log_do_write) {
- log_sys->n_log_ios++;
-
- srv_os_log_pending_writes++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
- next_offset / UNIV_PAGE_SIZE,
- next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
-
- srv_os_log_pending_writes--;
-
- srv_os_log_written+= write_len;
- srv_log_writes++;
- }
-
- if (write_len < len) {
- start_lsn += write_len;
- len -= write_len;
- buf += write_len;
-
- write_header = TRUE;
-
- goto loop;
- }
-}
-
-/******************************************************//**
-This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been written to the log file up to the last log entry written
-by the transaction. If there is a flush running, it waits and checks if the
-flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
-void
-log_write_up_to(
-/*============*/
- ib_uint64_t lsn, /*!< in: log sequence number up to which
- the log should be written,
- IB_ULONGLONG_MAX if not specified */
- ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk)
- /*!< in: TRUE if we want the written log
- also to be flushed to disk */
-{
- log_group_t* group;
- ulint start_offset;
- ulint end_offset;
- ulint area_start;
- ulint area_end;
-#ifdef UNIV_DEBUG
- ulint loop_count = 0;
-#endif /* UNIV_DEBUG */
- ulint unlock;
-
- if (recv_no_ibuf_operations) {
- /* Recovery is running and no operations on the log files are
- allowed yet (the variable name .._no_ibuf_.. is misleading) */
-
- return;
- }
-
-loop:
-#ifdef UNIV_DEBUG
- loop_count++;
-
- ut_ad(loop_count < 5);
-
-# if 0
- if (loop_count > 2) {
- fprintf(stderr, "Log loop count %lu\n", loop_count);
- }
-# endif
-#endif
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- if (flush_to_disk
- && log_sys->flushed_to_disk_lsn >= lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- if (!flush_to_disk
- && (log_sys->written_to_all_lsn >= lsn
- || (log_sys->written_to_some_lsn >= lsn
- && wait != LOG_WAIT_ALL_GROUPS))) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- if (log_sys->n_pending_writes > 0) {
- /* A write (+ possibly flush to disk) is running */
-
- if (flush_to_disk
- && log_sys->current_flush_lsn >= lsn) {
- /* The write + flush will write enough: wait for it to
- complete */
-
- goto do_waits;
- }
-
- if (!flush_to_disk
- && log_sys->write_lsn >= lsn) {
- /* The write will write enough: wait for it to
- complete */
-
- goto do_waits;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the write to complete and try to start a new
- write */
-
- os_event_wait(log_sys->no_flush_event);
-
- goto loop;
- }
-
- if (!flush_to_disk
- && log_sys->buf_free == log_sys->buf_next_to_write) {
- /* Nothing to write and no flush to disk requested */
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Writing log from %llu up to lsn %llu\n",
- log_sys->written_to_all_lsn,
- log_sys->lsn);
- }
-#endif /* UNIV_DEBUG */
- log_sys->n_pending_writes++;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group->n_pending_writes++; /*!< We assume here that we have only
- one log group! */
-
- os_event_reset(log_sys->no_flush_event);
- os_event_reset(log_sys->one_flushed_event);
-
- start_offset = log_sys->buf_next_to_write;
- end_offset = log_sys->buf_free;
-
- area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
- area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_ad(area_end - area_start > 0);
-
- log_sys->write_lsn = log_sys->lsn;
-
- if (flush_to_disk) {
- log_sys->current_flush_lsn = log_sys->lsn;
- }
-
- log_sys->one_flushed = FALSE;
-
- log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
- log_block_set_checkpoint_no(
- log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
- log_sys->next_checkpoint_no);
-
- /* Copy the last, incompletely written, log block a log block length
- up, so that when the flush operation writes from the log buffer, the
- segment to write will not be changed by writers to the log */
-
- ut_memcpy(log_sys->buf + area_end,
- log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
- OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
- log_sys->write_end_offset = log_sys->buf_free;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- /* Do the write to the log files */
-
- while (group) {
- log_group_write_buf(
- group, log_sys->buf + area_start,
- area_end - area_start,
- ut_uint64_align_down(log_sys->written_to_all_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- start_offset - area_start);
-
- log_group_set_fields(group, log_sys->write_lsn);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
- /* O_DSYNC means the OS did not buffer the log file at all:
- so we have also flushed to disk what we have written */
-
- log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
-
- } else if (flush_to_disk) {
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- fil_flush(group->space_id);
- log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
- }
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- ut_a(group->n_pending_writes == 1);
- ut_a(log_sys->n_pending_writes == 1);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
-
- unlock = log_group_check_flush_completion(group);
- unlock = unlock | log_sys_check_flush_completion();
-
- log_flush_do_unlocks(unlock);
-
- mutex_exit(&(log_sys->mutex));
-
- return;
-
-do_waits:
- mutex_exit(&(log_sys->mutex));
-
- switch (wait) {
- case LOG_WAIT_ONE_GROUP:
- os_event_wait(log_sys->one_flushed_event);
- break;
- case LOG_WAIT_ALL_GROUPS:
- os_event_wait(log_sys->no_flush_event);
- break;
-#ifdef UNIV_DEBUG
- case LOG_NO_WAIT:
- break;
- default:
- ut_error;
-#endif /* UNIV_DEBUG */
- }
-}
-
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
-void
-log_buffer_flush_to_disk(void)
-/*==========================*/
-{
- ib_uint64_t lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-}
-
-/****************************************************************//**
-This functions writes the log buffer to the log file and if 'flush'
-is set it forces a flush of the log file as well. This is meant to be
-called from background master thread only as it does not wait for
-the write (+ possible flush) to finish. */
-UNIV_INTERN
-void
-log_buffer_sync_in_background(
-/*==========================*/
- ibool flush) /*!< in: flush the logs to disk */
-{
- ib_uint64_t lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(lsn, LOG_NO_WAIT, flush);
-}
-
-/********************************************************************
-
-Tries to establish a big enough margin of free space in the log buffer, such
-that a new log entry can be catenated without an immediate need for a flush. */
-static
-void
-log_flush_margin(void)
-/*==================*/
-{
- log_t* log = log_sys;
- ib_uint64_t lsn = 0;
-
- mutex_enter(&(log->mutex));
-
- if (log->buf_free > log->max_buf_free) {
-
- if (log->n_pending_writes > 0) {
- /* A flush is running: hope that it will provide enough
- free space */
- } else {
- lsn = log->lsn;
- }
- }
-
- mutex_exit(&(log->mutex));
-
- if (lsn) {
- log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
- }
-}
-
-/****************************************************************//**
-Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool. NOTE: this function may only be called if the calling thread owns
-no synchronization objects!
-@return FALSE if there was a flush batch of the same type running,
-which means that we could not start this flush batch */
-UNIV_INTERN
-ibool
-log_preflush_pool_modified_pages(
-/*=============================*/
- ib_uint64_t new_oldest, /*!< in: try to advance
- oldest_modified_lsn at least
- to this lsn */
- ibool sync) /*!< in: TRUE if synchronous
- operation is desired */
-{
- ulint n_pages;
-
- if (recv_recovery_on) {
- /* If the recovery is running, we must first apply all
- log records to their respective file pages to get the
- right modify lsn values to these pages: otherwise, there
- might be pages on disk which are not yet recovered to the
- current lsn, and even after calling this function, we could
- not know how up-to-date the disk version of the database is,
- and we could not make a new checkpoint on the basis of the
- info on the buffer pool only. */
-
- recv_apply_hashed_log_recs(TRUE);
- }
-
- n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest);
-
- if (sync) {
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
- }
-
- if (n_pages == ULINT_UNDEFINED) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/******************************************************//**
-Completes a checkpoint. */
-static
-void
-log_complete_checkpoint(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(log_sys->n_pending_checkpoint_writes == 0);
-
- log_sys->next_checkpoint_no++;
-
- log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
-
- rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
-}
-
-/******************************************************//**
-Completes an asynchronous checkpoint info write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void)
-/*============================*/
-{
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(log_sys->n_pending_checkpoint_writes > 0);
-
- log_sys->n_pending_checkpoint_writes--;
-
- if (log_sys->n_pending_checkpoint_writes == 0) {
- log_complete_checkpoint();
- }
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/*******************************************************************//**
-Writes info to a checkpoint about a log group. */
-static
-void
-log_checkpoint_set_nth_group_info(
-/*==============================*/
- byte* buf, /*!< in: buffer for checkpoint info */
- ulint n, /*!< in: nth slot */
- ulint file_no,/*!< in: archived file number */
- ulint offset) /*!< in: archived file offset */
-{
- ut_ad(n < LOG_MAX_N_GROUPS);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
- mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
-}
-
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
- const byte* buf, /*!< in: buffer containing checkpoint info */
- ulint n, /*!< in: nth slot */
- ulint* file_no,/*!< out: archived file number */
- ulint* offset) /*!< out: archived file offset */
-{
- ut_ad(n < LOG_MAX_N_GROUPS);
-
- *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
- *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
-}
-
-/******************************************************//**
-Writes the checkpoint info to a log group header. */
-static
-void
-log_group_checkpoint(
-/*=================*/
- log_group_t* group) /*!< in: log group */
-{
- log_group_t* group2;
-#ifdef UNIV_LOG_ARCHIVE
- ib_uint64_t archived_lsn;
- ib_uint64_t next_archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
- ulint write_offset;
- ulint fold;
- byte* buf;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
-# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
-#endif
-
- buf = group->checkpoint_buf;
-
- mach_write_ull(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
- mach_write_ull(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
- log_group_calc_lsn_offset(
- log_sys->next_checkpoint_lsn, group));
-
- mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
-
-#ifdef UNIV_LOG_ARCHIVE
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- archived_lsn = IB_ULONGLONG_MAX;
- } else {
- archived_lsn = log_sys->archived_lsn;
-
- if (archived_lsn != log_sys->next_archived_lsn) {
- next_archived_lsn = log_sys->next_archived_lsn;
- /* For debugging only */
- }
- }
-
- mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
-#else /* UNIV_LOG_ARCHIVE */
- mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
- log_checkpoint_set_nth_group_info(buf, i, 0, 0);
- }
-
- group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group2) {
- log_checkpoint_set_nth_group_info(buf, group2->id,
-#ifdef UNIV_LOG_ARCHIVE
- group2->archived_file_no,
- group2->archived_offset
-#else /* UNIV_LOG_ARCHIVE */
- 0, 0
-#endif /* UNIV_LOG_ARCHIVE */
- );
-
- group2 = UT_LIST_GET_NEXT(log_groups, group2);
- }
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
-
- /* Starting from InnoDB-3.23.50, we also write info on allocated
- size in the tablespace */
-
- mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT,
- log_fsp_current_free_limit);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N,
- LOG_CHECKPOINT_FSP_MAGIC_N_VAL);
-
- /* We alternate the physical place of the checkpoint info in the first
- log file */
-
- if ((log_sys->next_checkpoint_no & 1) == 0) {
- write_offset = LOG_CHECKPOINT_1;
- } else {
- write_offset = LOG_CHECKPOINT_2;
- }
-
- if (log_do_write) {
- if (log_sys->n_pending_checkpoint_writes == 0) {
-
- rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
- LOG_CHECKPOINT);
- }
-
- log_sys->n_pending_checkpoint_writes++;
-
- log_sys->n_log_ios++;
-
- /* We send as the last parameter the group machine address
- added with 1, as we want to distinguish between a normal log
- file write and a checkpoint field write */
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, 0,
- write_offset / UNIV_PAGE_SIZE,
- write_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf, ((byte*)group + 1));
-
- ut_ad(((ulint)group & 0x1UL) == 0);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-UNIV_INTERN
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/*!< in: buffer which will be written to the
- start of the first log file */
- ib_uint64_t start) /*!< in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
-{
- ulint fold;
- byte* buf;
- ib_uint64_t lsn;
-
- mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
- mach_write_ull(hdr_buf + LOG_FILE_START_LSN, start);
-
- lsn = start + LOG_BLOCK_HDR_SIZE;
-
- /* Write the label of ibbackup --restore */
- strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- "ibbackup ");
- ut_sprintf_timestamp((char*) hdr_buf
- + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
- + (sizeof "ibbackup ") - 1));
- buf = hdr_buf + LOG_CHECKPOINT_1;
-
- mach_write_ull(buf + LOG_CHECKPOINT_NO, 0);
- mach_write_ull(buf + LOG_CHECKPOINT_LSN, lsn);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
- LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
-
- mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
-
- /* Starting from InnoDB-3.23.50, we should also write info on
- allocated size in the tablespace, but unfortunately we do not
- know it here */
-}
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
-void
-log_group_read_checkpoint_info(
-/*===========================*/
- log_group_t* group, /*!< in: log group */
- ulint field) /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 0,
- field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
-}
-
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
-void
-log_groups_write_checkpoint_info(void)
-/*==================================*/
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- log_group_checkpoint(group);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-}
-
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
-log_checkpoint(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is
- desired */
- ibool write_always) /*!< in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
-{
- ib_uint64_t oldest_lsn;
-
- if (recv_recovery_is_on()) {
- recv_apply_hashed_log_recs(TRUE);
- }
-
- if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
- fil_flush_file_spaces(FIL_TABLESPACE);
- }
-
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(!recv_no_log_write);
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- mutex_exit(&(log_sys->mutex));
-
- /* Because log also contains headers and dummy log records,
- if the buffer pool contains no dirty buffers, oldest_lsn
- gets the value log_sys->lsn from the previous function,
- and we must make sure that the log is flushed up to that
- lsn. If there are dirty buffers in the buffer pool, then our
- write-ahead-logging algorithm ensures that the log has been flushed
- up to oldest_lsn. */
-
- log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
- mutex_enter(&(log_sys->mutex));
-
- if (!write_always
- && log_sys->last_checkpoint_lsn >= oldest_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(TRUE);
- }
-
- ut_ad(log_sys->written_to_all_lsn >= oldest_lsn);
-
- if (log_sys->n_pending_checkpoint_writes > 0) {
- /* A checkpoint write is running */
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- }
-
- return(FALSE);
- }
-
- log_sys->next_checkpoint_lsn = oldest_lsn;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr, "Making checkpoint no %lu at lsn %llu\n",
- (ulong) log_sys->next_checkpoint_no,
- oldest_lsn);
- }
-#endif /* UNIV_DEBUG */
-
- log_groups_write_checkpoint_info();
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- }
-
- return(TRUE);
-}
-
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
-void
-log_make_checkpoint_at(
-/*===================*/
- ib_uint64_t lsn, /*!< in: make a checkpoint at this or a
- later lsn, if IB_ULONGLONG_MAX, makes
- a checkpoint at the latest lsn */
- ibool write_always) /*!< in: the function normally checks if
- the new checkpoint would have a
- greater lsn than the previous one: if
- not, then no physical write is done;
- by setting this parameter TRUE, a
- physical write will always be made to
- log files */
-{
- /* Preflush pages synchronously */
-
- while (!log_preflush_pool_modified_pages(lsn, TRUE));
-
- while (!log_checkpoint(TRUE, write_always));
-}
-
-/****************************************************************//**
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for a
-checkpoint. NOTE: this function may only be called if the calling thread
-owns no synchronization objects! */
-static
-void
-log_checkpoint_margin(void)
-/*=======================*/
-{
- log_t* log = log_sys;
- ib_uint64_t age;
- ib_uint64_t checkpoint_age;
- ib_uint64_t advance;
- ib_uint64_t oldest_lsn;
- ibool sync;
- ibool checkpoint_sync;
- ibool do_checkpoint;
- ibool success;
-loop:
- sync = FALSE;
- checkpoint_sync = FALSE;
- do_checkpoint = FALSE;
-
- mutex_enter(&(log->mutex));
- ut_ad(!recv_no_log_write);
-
- if (log->check_flush_or_checkpoint == FALSE) {
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- age = log->lsn - oldest_lsn;
-
- if (age > log->max_modified_age_sync) {
-
- /* A flush is urgent: we have to do a synchronous preflush */
-
- sync = TRUE;
- advance = 2 * (age - log->max_modified_age_sync);
- } else if (age > log->max_modified_age_async) {
-
- /* A flush is not urgent: we do an asynchronous preflush */
- advance = age - log->max_modified_age_async;
- } else {
- advance = 0;
- }
-
- checkpoint_age = log->lsn - log->last_checkpoint_lsn;
-
- if (checkpoint_age > log->max_checkpoint_age) {
- /* A checkpoint is urgent: we do it synchronously */
-
- checkpoint_sync = TRUE;
-
- do_checkpoint = TRUE;
-
- } else if (checkpoint_age > log->max_checkpoint_age_async) {
- /* A checkpoint is not urgent: do it asynchronously */
-
- do_checkpoint = TRUE;
-
- log->check_flush_or_checkpoint = FALSE;
- } else {
- log->check_flush_or_checkpoint = FALSE;
- }
-
- mutex_exit(&(log->mutex));
-
- if (advance) {
- ib_uint64_t new_oldest = oldest_lsn + advance;
-
- success = log_preflush_pool_modified_pages(new_oldest, sync);
-
- /* If the flush succeeded, this thread has done its part
- and can proceed. If it did not succeed, there was another
- thread doing a flush at the same time. If sync was FALSE,
- the flush was not urgent, and we let this thread proceed.
- Otherwise, we let it start from the beginning again. */
-
- if (sync && !success) {
- mutex_enter(&(log->mutex));
-
- log->check_flush_or_checkpoint = TRUE;
-
- mutex_exit(&(log->mutex));
- goto loop;
- }
- }
-
- if (do_checkpoint) {
- log_checkpoint(checkpoint_sync, FALSE);
-
- if (checkpoint_sync) {
-
- goto loop;
- }
- }
-}
-
-/******************************************************//**
-Reads a specified log segment to a buffer. */
-UNIV_INTERN
-void
-log_group_read_log_seg(
-/*===================*/
- ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /*!< in: buffer where to read */
- log_group_t* group, /*!< in: log group */
- ib_uint64_t start_lsn, /*!< in: read area start */
- ib_uint64_t end_lsn) /*!< in: read area end */
-{
- ulint len;
- ulint source_offset;
- ibool sync;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- sync = (type == LOG_RECOVER);
-loop:
- source_offset = log_group_calc_lsn_offset(start_lsn, group);
-
- len = (ulint) (end_lsn - start_lsn);
-
- ut_ad(len != 0);
-
- if ((source_offset % group->file_size) + len > group->file_size) {
-
- len = group->file_size - (source_offset % group->file_size);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (type == LOG_ARCHIVE) {
-
- log_sys->n_pending_archive_ios++;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
- source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
- len, buf, NULL);
-
- start_lsn += len;
- buf += len;
-
- if (start_lsn != end_lsn) {
-
- goto loop;
- }
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
-void
-log_archived_file_name_gen(
-/*=======================*/
- char* buf, /*!< in: buffer where to write */
- ulint id __attribute__((unused)),
- /*!< in: group id;
- currently we only archive the first group */
- ulint file_no)/*!< in: file number */
-{
- sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
-}
-
-/******************************************************//**
-Writes a log file header to a log file space. */
-static
-void
-log_group_archive_file_header_write(
-/*================================*/
- log_group_t* group, /*!< in: log group */
- ulint nth_file, /*!< in: header to the nth file in the
- archive log file space */
- ulint file_no, /*!< in: archived file number */
- ib_uint64_t start_lsn) /*!< in: log file data starts at this
- lsn */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- ut_a(nth_file < group->n_files);
-
- buf = *(group->archive_file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
- mach_write_to_4(buf + LOG_FILE_NO, file_no);
-
- mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
-
- dest_offset = nth_file * group->file_size;
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- 2 * OS_FILE_LOG_BLOCK_SIZE,
- buf, &log_archive_io);
-}
-
-/******************************************************//**
-Writes a log file header to a completed archived log file. */
-static
-void
-log_group_archive_completed_header_write(
-/*=====================================*/
- log_group_t* group, /*!< in: log group */
- ulint nth_file, /*!< in: header to the nth file in the
- archive log file space */
- ib_uint64_t end_lsn) /*!< in: end lsn of the file */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_a(nth_file < group->n_files);
-
- buf = *(group->archive_file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
- mach_write_ull(buf + LOG_FILE_END_LSN, end_lsn);
-
- dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf + LOG_FILE_ARCH_COMPLETED,
- &log_archive_io);
-}
-
-/******************************************************//**
-Does the archive writes for a single log group. */
-static
-void
-log_group_archive(
-/*==============*/
- log_group_t* group) /*!< in: log group */
-{
- os_file_t file_handle;
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
- char name[1024];
- byte* buf;
- ulint len;
- ibool ret;
- ulint next_offset;
- ulint n_files;
- ulint open_mode;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- start_lsn = log_sys->archived_lsn;
-
- ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- end_lsn = log_sys->next_archived_lsn;
-
- ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- buf = log_sys->archive_buf;
-
- n_files = 0;
-
- next_offset = group->archived_offset;
-loop:
- if ((next_offset % group->file_size == 0)
- || (fil_space_get_size(group->archive_space_id) == 0)) {
-
- /* Add the file to the archive file space; create or open the
- file */
-
- if (next_offset % group->file_size == 0) {
- open_mode = OS_FILE_CREATE;
- } else {
- open_mode = OS_FILE_OPEN;
- }
-
- log_archived_file_name_gen(name, group->id,
- group->archived_file_no + n_files);
-
- file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
- OS_DATA_FILE, &ret);
-
- if (!ret && (open_mode == OS_FILE_CREATE)) {
- file_handle = os_file_create(
- name, OS_FILE_OPEN, OS_FILE_AIO,
- OS_DATA_FILE, &ret);
- }
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Cannot create or open"
- " archive log file %s.\n"
- "InnoDB: Cannot continue operation.\n"
- "InnoDB: Check that the log archive"
- " directory exists,\n"
- "InnoDB: you have access rights to it, and\n"
- "InnoDB: there is space available.\n", name);
- exit(1);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr, "Created archive file %s\n", name);
- }
-#endif /* UNIV_DEBUG */
-
- ret = os_file_close(file_handle);
-
- ut_a(ret);
-
- /* Add the archive file as a node to the space */
-
- fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
- group->archive_space_id, FALSE);
-
- if (next_offset % group->file_size == 0) {
- log_group_archive_file_header_write(
- group, n_files,
- group->archived_file_no + n_files,
- start_lsn);
-
- next_offset += LOG_FILE_HDR_SIZE;
- }
- }
-
- len = end_lsn - start_lsn;
-
- if (group->file_size < (next_offset % group->file_size) + len) {
-
- len = group->file_size - (next_offset % group->file_size);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Archiving starting at lsn %llu, len %lu"
- " to group %lu\n",
- start_lsn,
- (ulong) len, (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- log_sys->n_pending_archive_ios++;
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
- next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE,
- ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
- &log_archive_io);
-
- start_lsn += len;
- next_offset += len;
- buf += len;
-
- if (next_offset % group->file_size == 0) {
- n_files++;
- }
-
- if (end_lsn != start_lsn) {
-
- goto loop;
- }
-
- group->next_archived_file_no = group->archived_file_no + n_files;
- group->next_archived_offset = next_offset % group->file_size;
-
- ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
-}
-
-/*****************************************************//**
-(Writes to the archive of each log group.) Currently, only the first
-group is archived. */
-static
-void
-log_archive_groups(void)
-/*====================*/
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- log_group_archive(group);
-}
-
-/*****************************************************//**
-Completes the archiving write phase for (each log group), currently,
-the first log group. */
-static
-void
-log_archive_write_complete_groups(void)
-/*===================================*/
-{
- log_group_t* group;
- ulint end_offset;
- ulint trunc_files;
- ulint n_files;
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- group->archived_file_no = group->next_archived_file_no;
- group->archived_offset = group->next_archived_offset;
-
- /* Truncate from the archive file space all but the last
- file, or if it has been written full, all files */
-
- n_files = (UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id))
- / group->file_size;
- ut_ad(n_files > 0);
-
- end_offset = group->archived_offset;
-
- if (end_offset % group->file_size == 0) {
-
- trunc_files = n_files;
- } else {
- trunc_files = n_files - 1;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes && trunc_files) {
- fprintf(stderr,
- "Complete file(s) archived to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- /* Calculate the archive file space start lsn */
- start_lsn = log_sys->next_archived_lsn
- - (end_offset - LOG_FILE_HDR_SIZE + trunc_files
- * (group->file_size - LOG_FILE_HDR_SIZE));
- end_lsn = start_lsn;
-
- for (i = 0; i < trunc_files; i++) {
-
- end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
-
- /* Write a notice to the headers of archived log
- files that the file write has been completed */
-
- log_group_archive_completed_header_write(group, i, end_lsn);
- }
-
- fil_space_truncate_start(group->archive_space_id,
- trunc_files * group->file_size);
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fputs("Archiving writes completed\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_archive_check_completion_low(void)
-/*==================================*/
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->n_pending_archive_ios == 0
- && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fputs("Archiving read completed\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-
- /* Archive buffer has now been read in: start archive writes */
-
- log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
-
- log_archive_groups();
- }
-
- if (log_sys->n_pending_archive_ios == 0
- && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
-
- log_archive_write_complete_groups();
-
- log_sys->archived_lsn = log_sys->next_archived_lsn;
-
- rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
- }
-}
-
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void)
-/*=========================*/
-{
- log_group_t* group;
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- mutex_exit(&(log_sys->mutex));
-
- fil_flush(group->archive_space_id);
-
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(log_sys->n_pending_archive_ios > 0);
-
- log_sys->n_pending_archive_ios--;
-
- log_archive_check_completion_low();
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/********************************************************************//**
-Starts an archiving operation.
-@return TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is desired */
- ulint* n_bytes)/*!< out: archive log buffer size, 0 if nothing to
- archive */
-{
- ibool calc_new_limit;
- ib_uint64_t start_lsn;
- ib_uint64_t limit_lsn;
-
- calc_new_limit = TRUE;
-loop:
- mutex_enter(&(log_sys->mutex));
-
- switch (log_sys->archiving_state) {
- case LOG_ARCH_OFF:
-arch_none:
- mutex_exit(&(log_sys->mutex));
-
- *n_bytes = 0;
-
- return(TRUE);
- case LOG_ARCH_STOPPED:
- case LOG_ARCH_STOPPING2:
- mutex_exit(&(log_sys->mutex));
-
- os_event_wait(log_sys->archiving_on);
-
- goto loop;
- }
-
- start_lsn = log_sys->archived_lsn;
-
- if (calc_new_limit) {
- ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
- limit_lsn = start_lsn + log_sys->archive_buf_size;
-
- *n_bytes = log_sys->archive_buf_size;
-
- if (limit_lsn >= log_sys->lsn) {
-
- limit_lsn = ut_uint64_align_down(
- log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
- }
- }
-
- if (log_sys->archived_lsn >= limit_lsn) {
-
- goto arch_none;
- }
-
- if (log_sys->written_to_all_lsn < limit_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
- calc_new_limit = FALSE;
-
- goto loop;
- }
-
- if (log_sys->n_pending_archive_ios > 0) {
- /* An archiving operation is running */
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
- }
-
- *n_bytes = log_sys->archive_buf_size;
-
- return(FALSE);
- }
-
- rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
-
- log_sys->archiving_phase = LOG_ARCHIVE_READ;
-
- log_sys->next_archived_lsn = limit_lsn;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Archiving from lsn %llu to lsn %llu\n",
- log_sys->archived_lsn, limit_lsn);
- }
-#endif /* UNIV_DEBUG */
-
- /* Read the log segment to the archive buffer */
-
- log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
- UT_LIST_GET_FIRST(log_sys->log_groups),
- start_lsn, limit_lsn);
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
- }
-
- *n_bytes = log_sys->archive_buf_size;
-
- return(TRUE);
-}
-
-/****************************************************************//**
-Writes the log contents to the archive at least up to the lsn when this
-function was called. */
-static
-void
-log_archive_all(void)
-/*=================*/
-{
- ib_uint64_t present_lsn;
- ulint dummy;
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- present_lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_pad_current_log_block();
-
- for (;;) {
- mutex_enter(&(log_sys->mutex));
-
- if (present_lsn <= log_sys->archived_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_do(TRUE, &dummy);
- }
-}
-
-/*****************************************************//**
-Closes the possible open archive log file (for each group) the first group,
-and if it was open, increments the group file count by 2, if desired. */
-static
-void
-log_archive_close_groups(
-/*=====================*/
- ibool increment_file_count) /*!< in: TRUE if we want to increment
- the file count */
-{
- log_group_t* group;
- ulint trunc_len;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
- return;
- }
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- trunc_len = UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id);
- if (trunc_len > 0) {
- ut_a(trunc_len == group->file_size);
-
- /* Write a notice to the headers of archived log
- files that the file write has been completed */
-
- log_group_archive_completed_header_write(
- group, 0, log_sys->archived_lsn);
-
- fil_space_truncate_start(group->archive_space_id,
- trunc_len);
- if (increment_file_count) {
- group->archived_offset = 0;
- group->archived_file_no += 2;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Incrementing arch file no to %lu"
- " in log group %lu\n",
- (ulong) group->archived_file_no + 2,
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- }
-}
-
-/****************************************************************//**
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from 2 higher, so that the archiving will not write
-again to the archived log files which exist when this function returns.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_stop(void)
-/*==================*/
-{
- ibool success;
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state != LOG_ARCH_ON) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
- }
-
- log_sys->archiving_state = LOG_ARCH_STOPPING;
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_all();
-
- mutex_enter(&(log_sys->mutex));
-
- log_sys->archiving_state = LOG_ARCH_STOPPING2;
- os_event_reset(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for a possible archiving operation to end */
-
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
-
- mutex_enter(&(log_sys->mutex));
-
- /* Close all archived log files, incrementing the file count by 2,
- if appropriate */
-
- log_archive_close_groups(TRUE);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Make a checkpoint, so that if recovery is needed, the file numbers
- of new archived log files will start from the right value */
-
- success = FALSE;
-
- while (!success) {
- success = log_checkpoint(TRUE, TRUE);
- }
-
- mutex_enter(&(log_sys->mutex));
-
- log_sys->archiving_state = LOG_ARCH_STOPPED;
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void)
-/*===================*/
-{
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
- }
-
- log_sys->archiving_state = LOG_ARCH_ON;
-
- os_event_set(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void)
-/*==========================*/
-{
-loop:
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_STOPPED
- || log_sys->archiving_state == LOG_ARCH_OFF) {
-
- log_sys->archiving_state = LOG_ARCH_OFF;
-
- os_event_set(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_stop();
-
- os_thread_sleep(500000);
-
- goto loop;
-}
-
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void)
-/*========================*/
-{
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
- log_sys->archiving_state = LOG_ARCH_ON;
-
- log_sys->archived_lsn
- = ut_uint64_align_down(log_sys->lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
-}
-
-/****************************************************************//**
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for
-archiving. */
-static
-void
-log_archive_margin(void)
-/*====================*/
-{
- log_t* log = log_sys;
- ulint age;
- ibool sync;
- ulint dummy;
-loop:
- mutex_enter(&(log->mutex));
-
- if (log->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- age = log->lsn - log->archived_lsn;
-
- if (age > log->max_archived_lsn_age) {
-
- /* An archiving is urgent: we have to do synchronous i/o */
-
- sync = TRUE;
-
- } else if (age > log->max_archived_lsn_age_async) {
-
- /* An archiving is not urgent: we do asynchronous i/o */
-
- sync = FALSE;
- } else {
- /* No archiving required yet */
-
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- mutex_exit(&(log->mutex));
-
- log_archive_do(sync, &dummy);
-
- if (sync == TRUE) {
- /* Check again that enough was written to the archive */
-
- goto loop;
- }
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-/********************************************************************//**
-Checks that there is enough free space in the log to start a new query step.
-Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
-function may only be called if the calling thread owns no synchronization
-objects! */
-UNIV_INTERN
-void
-log_check_margins(void)
-/*===================*/
-{
-loop:
- log_flush_margin();
-
- log_checkpoint_margin();
-
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_margin();
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- if (log_sys->check_flush_or_checkpoint) {
-
- mutex_exit(&(log_sys->mutex));
-
- goto loop;
- }
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/****************************************************************//**
-Makes a checkpoint at the latest lsn and writes it to first page of each
-data file in the database, so that we know that the file spaces contain
-all modifications up to that lsn. This can only be called at database
-shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
-void
-logs_empty_and_mark_files_at_shutdown(void)
-/*=======================================*/
-{
- ib_uint64_t lsn;
- ulint arch_log_no;
-
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Starting shutdown...\n");
- }
- /* Wait until the master thread and all other operations are idle: our
- algorithm only works if the server is idle at shutdown */
-
- srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
-loop:
- os_thread_sleep(100000);
-
- mutex_enter(&kernel_mutex);
-
- /* We need the monitor threads to stop before we proceed with a
- normal shutdown. In case of very fast shutdown, however, we can
- proceed without waiting for monitor threads. */
-
- if (srv_fast_shutdown < 2
- && (srv_error_monitor_active
- || srv_lock_timeout_and_monitor_active)) {
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- /* Check that there are no longer transactions. We need this wait even
- for the 'very fast' shutdown, because the InnoDB layer may have
- committed or prepared transactions and we don't want to lose them. */
-
- if (trx_n_mysql_transactions > 0
- || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- if (srv_fast_shutdown == 2) {
- /* In this fastest shutdown we do not flush the buffer pool:
- it is essentially a 'crash' of the InnoDB server. Make sure
- that the log is all flushed to disk, so that we can recover
- all committed transactions in a crash recovery. We must not
- write the lsn stamps to the data files, since at a startup
- InnoDB deduces from the stamps if the previous shutdown was
- clean. */
-
- log_buffer_flush_to_disk();
-
- return; /* We SKIP ALL THE REST !! */
- }
-
- /* Check that the master thread is suspended */
-
- if (srv_n_threads_active[SRV_MASTER] != 0) {
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- mutex_exit(&kernel_mutex);
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->n_pending_checkpoint_writes
-#ifdef UNIV_LOG_ARCHIVE
- || log_sys->n_pending_archive_ios
-#endif /* UNIV_LOG_ARCHIVE */
- || log_sys->n_pending_writes) {
-
- mutex_exit(&(log_sys->mutex));
-
- goto loop;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (!buf_pool_check_no_pending_io()) {
-
- goto loop;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_all();
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- if (lsn != log_sys->last_checkpoint_lsn
-#ifdef UNIV_LOG_ARCHIVE
- || (srv_log_archive_on
- && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
-
- mutex_exit(&(log_sys->mutex));
-
- goto loop;
- }
-
- arch_log_no = 0;
-
-#ifdef UNIV_LOG_ARCHIVE
- UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
-
- if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
-
- arch_log_no--;
- }
-
- log_archive_close_groups(TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_exit(&(log_sys->mutex));
-
- mutex_enter(&kernel_mutex);
- /* Check that the master thread has stayed suspended */
- if (srv_n_threads_active[SRV_MASTER] != 0) {
- fprintf(stderr,
- "InnoDB: Warning: the master thread woke up"
- " during shutdown\n");
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- fil_flush_file_spaces(FIL_TABLESPACE);
- fil_flush_file_spaces(FIL_LOG);
-
- /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
- pool: therefore it is essential that the buffer pool has been
- completely flushed to disk! (We do not call fil_write... if the
- 'very fast' shutdown is enabled.) */
-
- if (!buf_all_freed()) {
-
- goto loop;
- }
-
- srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
-
- /* Make some checks that the server really is quiet */
- ut_a(srv_n_threads_active[SRV_MASTER] == 0);
- ut_a(buf_all_freed());
- ut_a(lsn == log_sys->lsn);
-
- if (lsn < srv_start_lsn) {
- fprintf(stderr,
- "InnoDB: Error: log sequence number"
- " at shutdown %llu\n"
- "InnoDB: is lower than at startup %llu!\n",
- lsn, srv_start_lsn);
- }
-
- srv_shutdown_lsn = lsn;
-
- fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- fil_close_all_files();
-
- /* Make some checks that the server really is quiet */
- ut_a(srv_n_threads_active[SRV_MASTER] == 0);
- ut_a(buf_all_freed());
- ut_a(lsn == log_sys->lsn);
-}
-
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
- const byte* buf, /*!< in: pointer to the start of
- the log segment in the
- log_sys->buf log buffer */
- ulint len, /*!< in: segment length in bytes */
- ib_uint64_t buf_start_lsn) /*!< in: buffer start lsn */
-{
- ib_uint64_t contiguous_lsn;
- ib_uint64_t scanned_lsn;
- const byte* start;
- const byte* end;
- byte* buf1;
- byte* scan_buf;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (len == 0) {
-
- return(TRUE);
- }
-
- start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
- end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
-
- buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
- scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_memcpy(scan_buf, start, end - start);
-
- recv_scan_log_recs((buf_pool->curr_size
- - recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
- FALSE, scan_buf, end - start,
- ut_uint64_align_down(buf_start_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- &contiguous_lsn, &scanned_lsn);
-
- ut_a(scanned_lsn == buf_start_lsn + len);
- ut_a(recv_sys->recovered_lsn == scanned_lsn);
-
- mem_free(buf1);
-
- return(TRUE);
-}
-#endif /* UNIV_LOG_DEBUG */
-
-/******************************************************//**
-Peeks the current lsn.
-@return TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
-ibool
-log_peek_lsn(
-/*=========*/
- ib_uint64_t* lsn) /*!< out: if returns TRUE, current lsn is here */
-{
- if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
- *lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/******************************************************//**
-Prints info of the log. */
-UNIV_INTERN
-void
-log_print(
-/*======*/
- FILE* file) /*!< in: file where to print */
-{
- double time_elapsed;
- time_t current_time;
-
- mutex_enter(&(log_sys->mutex));
-
- fprintf(file,
- "Log sequence number %llu\n"
- "Log flushed up to %llu\n"
- "Last checkpoint at %llu\n",
- log_sys->lsn,
- log_sys->flushed_to_disk_lsn,
- log_sys->last_checkpoint_lsn);
-
- current_time = time(NULL);
-
- time_elapsed = 0.001 + difftime(current_time,
- log_sys->last_printout_time);
- fprintf(file,
- "%lu pending log writes, %lu pending chkp writes\n"
- "%lu log i/o's done, %.2f log i/o's/second\n",
- (ulong) log_sys->n_pending_writes,
- (ulong) log_sys->n_pending_checkpoint_writes,
- (ulong) log_sys->n_log_ios,
- ((log_sys->n_log_ios - log_sys->n_log_ios_old)
- / time_elapsed));
-
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = current_time;
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-log_refresh_stats(void)
-/*===================*/
-{
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = time(NULL);
-}
-
-/**********************************************************************
-Closes a log group. */
-static
-void
-log_group_close(
-/*===========*/
- log_group_t* group) /* in,own: log group to close */
-{
- ulint i;
-
- for (i = 0; i < group->n_files; i++) {
- mem_free(group->file_header_bufs_ptr[i]);
-#ifdef UNIV_LOG_ARCHIVE
- mem_free(group->archive_file_header_bufs_ptr[i]);
-#endif /* UNIV_LOG_ARCHIVE */
- }
-
- mem_free(group->file_header_bufs_ptr);
- mem_free(group->file_header_bufs);
-
-#ifdef UNIV_LOG_ARCHIVE
- mem_free(group->archive_file_header_bufs_ptr);
- mem_free(group->archive_file_header_bufs);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mem_free(group->checkpoint_buf_ptr);
-
- mem_free(group);
-}
-
-/**********************************************************
-Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
-void
-log_shutdown(void)
-/*==============*/
-{
- log_group_t* group;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
- log_group_t* prev_group = group;
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
-
- log_group_close(prev_group);
- }
-
- mem_free(log_sys->buf_ptr);
- log_sys->buf_ptr = NULL;
- log_sys->buf = NULL;
- mem_free(log_sys->checkpoint_buf_ptr);
- log_sys->checkpoint_buf_ptr = NULL;
- log_sys->checkpoint_buf = NULL;
-
- os_event_free(log_sys->no_flush_event);
- os_event_free(log_sys->one_flushed_event);
-
- rw_lock_free(&log_sys->checkpoint_lock);
-
- mutex_free(&log_sys->mutex);
-
-#ifdef UNIV_LOG_ARCHIVE
- rw_lock_free(&log_sys->archive_lock);
- os_event_create(log_sys->archiving_on);
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifdef UNIV_LOG_DEBUG
- recv_sys_debug_free();
-#endif
-
- recv_sys_close();
-}
-
-/**********************************************************
-Free the log system data structures. */
-UNIV_INTERN
-void
-log_mem_free(void)
-/*==============*/
-{
- if (log_sys != NULL) {
- recv_sys_mem_free();
- mem_free(log_sys);
-
- log_sys = NULL;
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/log/log0recv.c b/storage/innodb_plugin/log/log0recv.c
deleted file mode 100644
index ddbc71d4b71..00000000000
--- a/storage/innodb_plugin/log/log0recv.c
+++ /dev/null
@@ -1,3764 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file log/log0recv.c
-Recovery
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "log0recv.h"
-
-#ifdef UNIV_NONINL
-#include "log0recv.ic"
-#endif
-
-#include "mem0mem.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "page0cur.h"
-#include "page0zip.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "ibuf0ibuf.h"
-#include "trx0undo.h"
-#include "trx0rec.h"
-#include "fil0fil.h"
-#ifndef UNIV_HOTBACKUP
-# include "buf0rea.h"
-# include "srv0srv.h"
-# include "srv0start.h"
-# include "trx0roll.h"
-# include "row0merge.h"
-# include "sync0sync.h"
-#else /* !UNIV_HOTBACKUP */
-
-/** This is set to FALSE if the backup was originally taken with the
-ibbackup --include regexp option: then we do not want to create tables in
-directories which were not included */
-UNIV_INTERN ibool recv_replay_file_ops = TRUE;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Log records are stored in the hash table in chunks at most of this size;
-this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
-#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
-
-/** Read-ahead area in applying log records to file pages */
-#define RECV_READ_AHEAD_AREA 32
-
-/** The recovery system */
-UNIV_INTERN recv_sys_t* recv_sys = NULL;
-/** TRUE when applying redo log records during crash recovery; FALSE
-otherwise. Note that this is FALSE while a background thread is
-rolling back incomplete transactions. */
-UNIV_INTERN ibool recv_recovery_on;
-#ifdef UNIV_LOG_ARCHIVE
-/** TRUE when applying redo log records from an archived log file */
-UNIV_INTERN ibool recv_recovery_from_backup_on;
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifndef UNIV_HOTBACKUP
-/** TRUE when recv_init_crash_recovery() has been called. */
-UNIV_INTERN ibool recv_needed_recovery;
-# ifdef UNIV_DEBUG
-/** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
-UNIV_INTERN ibool recv_no_log_write = FALSE;
-# endif /* UNIV_DEBUG */
-
-/** TRUE if buf_page_is_corrupted() should check if the log sequence
-number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-UNIV_INTERN ibool recv_lsn_checks_on;
-
-/** There are two conditions under which we scan the logs, the first
-is normal startup and the second is when we do a recovery from an
-archive.
-This flag is set if we are doing a scan from the last checkpoint during
-startup. If we find log entries that were written after the last checkpoint
-we know that the server was not cleanly shutdown. We must then initialize
-the crash recovery environment before attempting to store these entries in
-the log hash table. */
-static ibool recv_log_scan_is_startup_type;
-
-/** If the following is TRUE, the buffer pool file pages must be invalidated
-after recovery and no ibuf operations are allowed; this becomes TRUE if
-the log record hash table becomes too full, and log records must be merged
-to file pages already before the recovery is finished: in this case no
-ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state.
-
-TRUE means that recovery is running and no operations on the log files
-are allowed yet: the variable name is misleading. */
-UNIV_INTERN ibool recv_no_ibuf_operations;
-/** TRUE when the redo log is being backed up */
-# define recv_is_making_a_backup FALSE
-/** TRUE when recovering from a backed up redo log file */
-# define recv_is_from_backup FALSE
-#else /* !UNIV_HOTBACKUP */
-# define recv_needed_recovery FALSE
-/** TRUE when the redo log is being backed up */
-UNIV_INTERN ibool recv_is_making_a_backup = FALSE;
-/** TRUE when recovering from a backed up redo log file */
-UNIV_INTERN ibool recv_is_from_backup = FALSE;
-# define buf_pool_get_curr_size() (5 * 1024 * 1024)
-#endif /* !UNIV_HOTBACKUP */
-/** The following counter is used to decide when to print info on
-log scan */
-static ulint recv_scan_print_counter;
-
-/** The type of the previous parsed redo log record */
-static ulint recv_previous_parsed_rec_type;
-/** The offset of the previous parsed redo log record */
-static ulint recv_previous_parsed_rec_offset;
-/** The 'multi' flag of the previous parsed redo log record */
-static ulint recv_previous_parsed_rec_is_multi;
-
-/** Maximum page number encountered in the redo log */
-UNIV_INTERN ulint recv_max_parsed_page_no;
-
-/** This many frames must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free frames to read in pages when we start applying the
-log records to the database. */
-UNIV_INTERN ulint recv_n_pool_free_frames;
-
-/** The maximum lsn we see for a page during the recovery process. If this
-is bigger than the lsn we are able to scan up to, that is an indication that
-the recovery failed and the database may be corrupt. */
-UNIV_INTERN ib_uint64_t recv_max_page_lsn;
-
-/* prototypes */
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************//**
-Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
-static
-void
-recv_init_crash_recovery(void);
-/*===========================*/
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Creates the recovery system. */
-UNIV_INTERN
-void
-recv_sys_create(void)
-/*=================*/
-{
- if (recv_sys != NULL) {
-
- return;
- }
-
- recv_sys = mem_alloc(sizeof(*recv_sys));
- memset(recv_sys, 0x0, sizeof(*recv_sys));
-
- mutex_create(&recv_sys->mutex, SYNC_RECV);
-
- recv_sys->heap = NULL;
- recv_sys->addr_hash = NULL;
-}
-
-/********************************************************//**
-Release recovery system mutexes. */
-UNIV_INTERN
-void
-recv_sys_close(void)
-/*================*/
-{
- if (recv_sys != NULL) {
- if (recv_sys->addr_hash != NULL) {
- hash_table_free(recv_sys->addr_hash);
- }
-
- if (recv_sys->heap != NULL) {
- mem_heap_free(recv_sys->heap);
- }
-
- if (recv_sys->buf != NULL) {
- ut_free(recv_sys->buf);
- }
-
- if (recv_sys->last_block_buf_start != NULL) {
- mem_free(recv_sys->last_block_buf_start);
- }
-
- mutex_free(&recv_sys->mutex);
-
- mem_free(recv_sys);
- recv_sys = NULL;
- }
-}
-
-/********************************************************//**
-Frees the recovery system memory. */
-UNIV_INTERN
-void
-recv_sys_mem_free(void)
-/*===================*/
-{
- if (recv_sys != NULL) {
- if (recv_sys->addr_hash != NULL) {
- hash_table_free(recv_sys->addr_hash);
- }
-
- if (recv_sys->heap != NULL) {
- mem_heap_free(recv_sys->heap);
- }
-
- if (recv_sys->buf != NULL) {
- ut_free(recv_sys->buf);
- }
-
- if (recv_sys->last_block_buf_start != NULL) {
- mem_free(recv_sys->last_block_buf_start);
- }
-
- mem_free(recv_sys);
- recv_sys = NULL;
- }
-}
-
-/************************************************************
-Reset the state of the recovery system variables. */
-UNIV_INTERN
-void
-recv_sys_var_init(void)
-/*===================*/
-{
- recv_lsn_checks_on = FALSE;
-
- recv_n_pool_free_frames = 256;
-
- recv_recovery_on = FALSE;
-
-#ifdef UNIV_LOG_ARCHIVE
- recv_recovery_from_backup_on = FALSE;
-#endif /* UNIV_LOG_ARCHIVE */
-
- recv_needed_recovery = FALSE;
-
- recv_lsn_checks_on = FALSE;
-
- recv_log_scan_is_startup_type = FALSE;
-
- recv_no_ibuf_operations = FALSE;
-
- recv_scan_print_counter = 0;
-
- recv_previous_parsed_rec_type = 999999;
-
- recv_previous_parsed_rec_offset = 0;
-
- recv_previous_parsed_rec_is_multi = 0;
-
- recv_max_parsed_page_no = 0;
-
- recv_n_pool_free_frames = 256;
-
- recv_max_page_lsn = 0;
-}
-
-/************************************************************
-Inits the recovery system for a recovery operation. */
-UNIV_INTERN
-void
-recv_sys_init(
-/*==========*/
- ulint available_memory) /*!< in: available memory in bytes */
-{
- if (recv_sys->heap != NULL) {
-
- return;
- }
-
- mutex_enter(&(recv_sys->mutex));
-
-#ifndef UNIV_HOTBACKUP
- recv_sys->heap = mem_heap_create_in_buffer(256);
-#else /* !UNIV_HOTBACKUP */
- recv_sys->heap = mem_heap_create(256);
- recv_is_from_backup = TRUE;
-#endif /* !UNIV_HOTBACKUP */
-
- recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
- recv_sys->len = 0;
- recv_sys->recovered_offset = 0;
-
- recv_sys->addr_hash = hash_create(available_memory / 64);
- recv_sys->n_addrs = 0;
-
- recv_sys->apply_log_recs = FALSE;
- recv_sys->apply_batch_on = FALSE;
-
- recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
-
- recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
- OS_FILE_LOG_BLOCK_SIZE);
- recv_sys->found_corrupt_log = FALSE;
-
- recv_max_page_lsn = 0;
-
- mutex_exit(&(recv_sys->mutex));
-}
-
-/********************************************************//**
-Empties the hash table when it has been fully processed. */
-static
-void
-recv_sys_empty_hash(void)
-/*=====================*/
-{
- ut_ad(mutex_own(&(recv_sys->mutex)));
-
- if (recv_sys->n_addrs != 0) {
- fprintf(stderr,
- "InnoDB: Error: %lu pages with log records"
- " were left unprocessed!\n"
- "InnoDB: Maximum page number with"
- " log records on it %lu\n",
- (ulong) recv_sys->n_addrs,
- (ulong) recv_max_parsed_page_no);
- ut_error;
- }
-
- hash_table_free(recv_sys->addr_hash);
- mem_heap_empty(recv_sys->heap);
-
- recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
-}
-
-#ifndef UNIV_HOTBACKUP
-# ifndef UNIV_LOG_DEBUG
-/********************************************************//**
-Frees the recovery system. */
-static
-void
-recv_sys_debug_free(void)
-/*=====================*/
-{
- mutex_enter(&(recv_sys->mutex));
-
- hash_table_free(recv_sys->addr_hash);
- mem_heap_free(recv_sys->heap);
- ut_free(recv_sys->buf);
- mem_free(recv_sys->last_block_buf_start);
-
- recv_sys->buf = NULL;
- recv_sys->heap = NULL;
- recv_sys->addr_hash = NULL;
- recv_sys->last_block_buf_start = NULL;
-
- mutex_exit(&(recv_sys->mutex));
-}
-# endif /* UNIV_LOG_DEBUG */
-
-/********************************************************//**
-Truncates possible corrupted or extra records from a log group. */
-static
-void
-recv_truncate_group(
-/*================*/
- log_group_t* group, /*!< in: log group */
- ib_uint64_t recovered_lsn, /*!< in: recovery succeeded up to this
- lsn */
- ib_uint64_t limit_lsn, /*!< in: this was the limit for
- recovery */
- ib_uint64_t checkpoint_lsn, /*!< in: recovery was started from this
- checkpoint */
- ib_uint64_t archived_lsn) /*!< in: the log has been archived up to
- this lsn */
-{
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
- ib_uint64_t finish_lsn1;
- ib_uint64_t finish_lsn2;
- ib_uint64_t finish_lsn;
- ulint len;
- ulint i;
-
- if (archived_lsn == IB_ULONGLONG_MAX) {
- /* Checkpoint was taken in the NOARCHIVELOG mode */
- archived_lsn = checkpoint_lsn;
- }
-
- finish_lsn1 = ut_uint64_align_down(archived_lsn,
- OS_FILE_LOG_BLOCK_SIZE)
- + log_group_get_capacity(group);
-
- finish_lsn2 = ut_uint64_align_up(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE)
- + recv_sys->last_log_buf_size;
-
- if (limit_lsn != IB_ULONGLONG_MAX) {
- /* We do not know how far we should erase log records: erase
- as much as possible */
-
- finish_lsn = finish_lsn1;
- } else {
- /* It is enough to erase the length of the log buffer */
- finish_lsn = finish_lsn1 < finish_lsn2
- ? finish_lsn1 : finish_lsn2;
- }
-
- ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- /* Write the log buffer full of zeros */
- for (i = 0; i < RECV_SCAN_SIZE; i++) {
-
- *(log_sys->buf + i) = '\0';
- }
-
- start_lsn = ut_uint64_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
-
- if (start_lsn != recovered_lsn) {
- /* Copy the last incomplete log block to the log buffer and
- edit its data length: */
-
- ut_memcpy(log_sys->buf, recv_sys->last_block,
- OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_data_len(log_sys->buf,
- (ulint) (recovered_lsn - start_lsn));
- }
-
- if (start_lsn >= finish_lsn) {
-
- return;
- }
-
- for (;;) {
- end_lsn = start_lsn + RECV_SCAN_SIZE;
-
- if (end_lsn > finish_lsn) {
-
- end_lsn = finish_lsn;
- }
-
- len = (ulint) (end_lsn - start_lsn);
-
- log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
- if (end_lsn >= finish_lsn) {
-
- return;
- }
-
- /* Write the log buffer full of zeros */
- for (i = 0; i < RECV_SCAN_SIZE; i++) {
-
- *(log_sys->buf + i) = '\0';
- }
-
- start_lsn = end_lsn;
- }
-}
-
-/********************************************************//**
-Copies the log segment between group->recovered_lsn and recovered_lsn from the
-most up-to-date log group to group, so that it contains the latest log data. */
-static
-void
-recv_copy_group(
-/*============*/
- log_group_t* up_to_date_group, /*!< in: the most up-to-date log
- group */
- log_group_t* group, /*!< in: copy to this log
- group */
- ib_uint64_t recovered_lsn) /*!< in: recovery succeeded up
- to this lsn */
-{
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
- ulint len;
-
- if (group->scanned_lsn >= recovered_lsn) {
-
- return;
- }
-
- ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- start_lsn = ut_uint64_align_down(group->scanned_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- for (;;) {
- end_lsn = start_lsn + RECV_SCAN_SIZE;
-
- if (end_lsn > recovered_lsn) {
- end_lsn = ut_uint64_align_up(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- }
-
- log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
- up_to_date_group, start_lsn, end_lsn);
-
- len = (ulint) (end_lsn - start_lsn);
-
- log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
-
- if (end_lsn >= recovered_lsn) {
-
- return;
- }
-
- start_lsn = end_lsn;
- }
-}
-
-/********************************************************//**
-Copies a log segment from the most up-to-date log group to the other log
-groups, so that they all contain the latest log data. Also writes the info
-about the latest checkpoint to the groups, and inits the fields in the group
-memory structs to up-to-date values. */
-static
-void
-recv_synchronize_groups(
-/*====================*/
- log_group_t* up_to_date_group) /*!< in: the most up-to-date
- log group */
-{
- log_group_t* group;
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
- ib_uint64_t recovered_lsn;
- ib_uint64_t limit_lsn;
-
- recovered_lsn = recv_sys->recovered_lsn;
- limit_lsn = recv_sys->limit_lsn;
-
- /* Read the last recovered log block to the recovery system buffer:
- the block is always incomplete */
-
- start_lsn = ut_uint64_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_a(start_lsn != end_lsn);
-
- log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
- up_to_date_group, start_lsn, end_lsn);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- if (group != up_to_date_group) {
-
- /* Copy log data if needed */
-
- recv_copy_group(group, up_to_date_group,
- recovered_lsn);
- }
-
- /* Update the fields in the group struct to correspond to
- recovered_lsn */
-
- log_group_set_fields(group, recovered_lsn);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- /* Copy the checkpoint info to the groups; remember that we have
- incremented checkpoint_no by one, and the info will not be written
- over the max checkpoint info, thus making the preservation of max
- checkpoint info on disk certain */
-
- log_groups_write_checkpoint_info();
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
-
- mutex_enter(&(log_sys->mutex));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Checks the consistency of the checkpoint info
-@return TRUE if ok */
-static
-ibool
-recv_check_cp_is_consistent(
-/*========================*/
- const byte* buf) /*!< in: buffer containing checkpoint info */
-{
- ulint fold;
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-
- if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
- buf + LOG_CHECKPOINT_CHECKSUM_1)) {
- return(FALSE);
- }
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
-
- if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
- buf + LOG_CHECKPOINT_CHECKSUM_2)) {
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Looks for the maximum consistent checkpoint from the log groups.
-@return error code or DB_SUCCESS */
-static
-ulint
-recv_find_max_checkpoint(
-/*=====================*/
- log_group_t** max_group, /*!< out: max group */
- ulint* max_field) /*!< out: LOG_CHECKPOINT_1 or
- LOG_CHECKPOINT_2 */
-{
- log_group_t* group;
- ib_uint64_t max_no;
- ib_uint64_t checkpoint_no;
- ulint field;
- byte* buf;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- max_no = 0;
- *max_group = NULL;
- *max_field = 0;
-
- buf = log_sys->checkpoint_buf;
-
- while (group) {
- group->state = LOG_GROUP_CORRUPTED;
-
- for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
- field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
-
- log_group_read_checkpoint_info(group, field);
-
- if (!recv_check_cp_is_consistent(buf)) {
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Checkpoint in group"
- " %lu at %lu invalid, %lu\n",
- (ulong) group->id,
- (ulong) field,
- (ulong) mach_read_from_4(
- buf
- + LOG_CHECKPOINT_CHECKSUM_1));
-
- }
-#endif /* UNIV_DEBUG */
- goto not_consistent;
- }
-
- group->state = LOG_GROUP_OK;
-
- group->lsn = mach_read_ull(
- buf + LOG_CHECKPOINT_LSN);
- group->lsn_offset = mach_read_from_4(
- buf + LOG_CHECKPOINT_OFFSET);
- checkpoint_no = mach_read_ull(
- buf + LOG_CHECKPOINT_NO);
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Checkpoint number %lu"
- " found in group %lu\n",
- (ulong) checkpoint_no,
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- if (checkpoint_no >= max_no) {
- *max_group = group;
- *max_field = field;
- max_no = checkpoint_no;
- }
-
-not_consistent:
- ;
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- if (*max_group == NULL) {
-
- fprintf(stderr,
- "InnoDB: No valid checkpoint found.\n"
- "InnoDB: If this error appears when you are"
- " creating an InnoDB database,\n"
- "InnoDB: the problem may be that during"
- " an earlier attempt you managed\n"
- "InnoDB: to create the InnoDB data files,"
- " but log file creation failed.\n"
- "InnoDB: If that is the case, please refer to\n"
- "InnoDB: " REFMAN "error-creating-innodb.html\n");
- return(DB_ERROR);
- }
-
- return(DB_SUCCESS);
-}
-#else /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Reads the checkpoint info needed in hot backup.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-recv_read_cp_info_for_backup(
-/*=========================*/
- const byte* hdr, /*!< in: buffer containing the log group
- header */
- ib_uint64_t* lsn, /*!< out: checkpoint lsn */
- ulint* offset, /*!< out: checkpoint offset in the log group */
- ulint* fsp_limit,/*!< out: fsp limit of space 0,
- 1000000000 if the database is running
- with < version 3.23.50 of InnoDB */
- ib_uint64_t* cp_no, /*!< out: checkpoint number */
- ib_uint64_t* first_header_lsn)
- /*!< out: lsn of of the start of the
- first log file */
-{
- ulint max_cp = 0;
- ib_uint64_t max_cp_no = 0;
- const byte* cp_buf;
-
- cp_buf = hdr + LOG_CHECKPOINT_1;
-
- if (recv_check_cp_is_consistent(cp_buf)) {
- max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
- max_cp = LOG_CHECKPOINT_1;
- }
-
- cp_buf = hdr + LOG_CHECKPOINT_2;
-
- if (recv_check_cp_is_consistent(cp_buf)) {
- if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
- max_cp = LOG_CHECKPOINT_2;
- }
- }
-
- if (max_cp == 0) {
- return(FALSE);
- }
-
- cp_buf = hdr + max_cp;
-
- *lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
- *offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
-
- /* If the user is running a pre-3.23.50 version of InnoDB, its
- checkpoint data does not contain the fsp limit info */
- if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
- == LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
-
- *fsp_limit = mach_read_from_4(
- cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
-
- if (*fsp_limit == 0) {
- *fsp_limit = 1000000000;
- }
- } else {
- *fsp_limit = 1000000000;
- }
-
- /* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
-
- *cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
-
- *first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/******************************************************//**
-Checks the 4-byte checksum to the trailer checksum field of a log
-block. We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-static
-ibool
-log_block_checksum_is_ok_or_old_format(
-/*===================================*/
- const byte* block) /*!< in: pointer to a log block */
-{
-#ifdef UNIV_LOG_DEBUG
- return(TRUE);
-#endif /* UNIV_LOG_DEBUG */
- if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
-
- return(TRUE);
- }
-
- if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
-
- /* We assume the log block is in the format of
- InnoDB version < 3.23.52 and the block is ok */
-#if 0
- fprintf(stderr,
- "InnoDB: Scanned old format < InnoDB-3.23.52"
- " log block number %lu\n",
- log_block_get_hdr_no(block));
-#endif
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-UNIV_INTERN
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
- byte* buf, /*!< in: buffer containing log data */
- ulint buf_len, /*!< in: data length in that buffer */
- ib_uint64_t* scanned_lsn, /*!< in/out: lsn of buffer start,
- we return scanned lsn */
- ulint* scanned_checkpoint_no,
- /*!< in/out: 4 lowest bytes of the
- highest scanned checkpoint number so
- far */
- ulint* n_bytes_scanned)/*!< out: how much we were able to
- scan, smaller than buf_len if log
- data ended here */
-{
- ulint data_len;
- byte* log_block;
- ulint no;
-
- *n_bytes_scanned = 0;
-
- for (log_block = buf; log_block < buf + buf_len;
- log_block += OS_FILE_LOG_BLOCK_SIZE) {
-
- no = log_block_get_hdr_no(log_block);
-
-#if 0
- fprintf(stderr, "Log block header no %lu\n", no);
-#endif
-
- if (no != log_block_convert_lsn_to_no(*scanned_lsn)
- || !log_block_checksum_is_ok_or_old_format(log_block)) {
-#if 0
- fprintf(stderr,
- "Log block n:o %lu, scanned lsn n:o %lu\n",
- no, log_block_convert_lsn_to_no(*scanned_lsn));
-#endif
- /* Garbage or an incompletely written log block */
-
- log_block += OS_FILE_LOG_BLOCK_SIZE;
-#if 0
- fprintf(stderr,
- "Next log block n:o %lu\n",
- log_block_get_hdr_no(log_block));
-#endif
- break;
- }
-
- if (*scanned_checkpoint_no > 0
- && log_block_get_checkpoint_no(log_block)
- < *scanned_checkpoint_no
- && *scanned_checkpoint_no
- - log_block_get_checkpoint_no(log_block)
- > 0x80000000UL) {
-
- /* Garbage from a log buffer flush which was made
- before the most recent database recovery */
-#if 0
- fprintf(stderr,
- "Scanned cp n:o %lu, block cp n:o %lu\n",
- *scanned_checkpoint_no,
- log_block_get_checkpoint_no(log_block));
-#endif
- break;
- }
-
- data_len = log_block_get_data_len(log_block);
-
- *scanned_checkpoint_no
- = log_block_get_checkpoint_no(log_block);
- *scanned_lsn += data_len;
-
- *n_bytes_scanned += data_len;
-
- if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
- /* Log data ends here */
-
-#if 0
- fprintf(stderr, "Log block data len %lu\n",
- data_len);
-#endif
- break;
- }
- }
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Tries to parse a single log record body and also applies it to a page if
-specified. File ops are parsed, but not applied in this function.
-@return log record end, NULL if not a complete record */
-static
-byte*
-recv_parse_or_apply_log_rec_body(
-/*=============================*/
- byte type, /*!< in: type */
- byte* ptr, /*!< in: pointer to a buffer */
- byte* end_ptr,/*!< in: pointer to the buffer end */
- buf_block_t* block, /*!< in/out: buffer block or NULL; if
- not NULL, then the log record is
- applied to the page, and the log
- record should be complete then */
- mtr_t* mtr) /*!< in: mtr or NULL; should be non-NULL
- if and only if block is non-NULL */
-{
- dict_index_t* index = NULL;
- page_t* page;
- page_zip_des_t* page_zip;
-#ifdef UNIV_DEBUG
- ulint page_type;
-#endif /* UNIV_DEBUG */
-
- ut_ad(!block == !mtr);
-
- if (block) {
- page = block->frame;
- page_zip = buf_block_get_page_zip(block);
- ut_d(page_type = fil_page_get_type(page));
- } else {
- page = NULL;
- page_zip = NULL;
- ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
- }
-
- switch (type) {
-#ifdef UNIV_LOG_LSN_DEBUG
- case MLOG_LSN:
- /* The LSN is checked in recv_parse_log_rec(). */
- break;
-#endif /* UNIV_LOG_LSN_DEBUG */
- case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
-#ifdef UNIV_DEBUG
- if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
- && end_ptr >= ptr + 2) {
- /* It is OK to set FIL_PAGE_TYPE and certain
- list node fields on an empty page. Any other
- write is not OK. */
-
- /* NOTE: There may be bogus assertion failures for
- dict_hdr_create(), trx_rseg_header_create(),
- trx_sys_create_doublewrite_buf(), and
- trx_sysf_create().
- These are only called during database creation. */
- ulint offs = mach_read_from_2(ptr);
-
- switch (type) {
- default:
- ut_error;
- case MLOG_2BYTES:
- /* Note that this can fail when the
- redo log been written with something
- older than InnoDB Plugin 1.0.4. */
- ut_ad(offs == FIL_PAGE_TYPE
- || offs == IBUF_TREE_SEG_HEADER
- + IBUF_HEADER + FSEG_HDR_OFFSET
- || offs == PAGE_BTR_IBUF_FREE_LIST
- + PAGE_HEADER + FIL_ADDR_BYTE
- || offs == PAGE_BTR_IBUF_FREE_LIST
- + PAGE_HEADER + FIL_ADDR_BYTE
- + FIL_ADDR_SIZE
- || offs == PAGE_BTR_SEG_LEAF
- + PAGE_HEADER + FSEG_HDR_OFFSET
- || offs == PAGE_BTR_SEG_TOP
- + PAGE_HEADER + FSEG_HDR_OFFSET
- || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
- + PAGE_HEADER + FIL_ADDR_BYTE
- + 0 /*FLST_PREV*/
- || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
- + PAGE_HEADER + FIL_ADDR_BYTE
- + FIL_ADDR_SIZE /*FLST_NEXT*/);
- break;
- case MLOG_4BYTES:
- /* Note that this can fail when the
- redo log been written with something
- older than InnoDB Plugin 1.0.4. */
- ut_ad(0
- || offs == IBUF_TREE_SEG_HEADER
- + IBUF_HEADER + FSEG_HDR_SPACE
- || offs == IBUF_TREE_SEG_HEADER
- + IBUF_HEADER + FSEG_HDR_PAGE_NO
- || offs == PAGE_BTR_IBUF_FREE_LIST
- + PAGE_HEADER/* flst_init */
- || offs == PAGE_BTR_IBUF_FREE_LIST
- + PAGE_HEADER + FIL_ADDR_PAGE
- || offs == PAGE_BTR_IBUF_FREE_LIST
- + PAGE_HEADER + FIL_ADDR_PAGE
- + FIL_ADDR_SIZE
- || offs == PAGE_BTR_SEG_LEAF
- + PAGE_HEADER + FSEG_HDR_PAGE_NO
- || offs == PAGE_BTR_SEG_LEAF
- + PAGE_HEADER + FSEG_HDR_SPACE
- || offs == PAGE_BTR_SEG_TOP
- + PAGE_HEADER + FSEG_HDR_PAGE_NO
- || offs == PAGE_BTR_SEG_TOP
- + PAGE_HEADER + FSEG_HDR_SPACE
- || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
- + PAGE_HEADER + FIL_ADDR_PAGE
- + 0 /*FLST_PREV*/
- || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
- + PAGE_HEADER + FIL_ADDR_PAGE
- + FIL_ADDR_SIZE /*FLST_NEXT*/);
- break;
- }
- }
-#endif /* UNIV_DEBUG */
- ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
- break;
- case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_INSERT,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
- block, index, mtr);
- }
- break;
- case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_CLUST_DELETE_MARK,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_cur_parse_del_mark_set_clust_rec(
- ptr, end_ptr, page, page_zip, index);
- }
- break;
- case MLOG_COMP_REC_SEC_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- /* This log record type is obsolete, but we process it for
- backward compatibility with MySQL 5.0.3 and 5.0.4. */
- ut_a(!page || page_is_comp(page));
- ut_a(!page_zip);
- ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
- if (!ptr) {
- break;
- }
- /* Fall through */
- case MLOG_REC_SEC_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_UPDATE_IN_PLACE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
- page_zip, index);
- }
- break;
- case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
- case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
- block, index, mtr);
- }
- break;
- case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_LIST_END_COPY_CREATED,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_parse_copy_rec_list_to_created_page(
- ptr, end_ptr, block, index, mtr);
- }
- break;
- case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_PAGE_REORGANIZE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
- block, mtr);
- }
- break;
- case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
- /* Allow anything in page_type when creating a page. */
- ut_a(!page_zip);
- ptr = page_parse_create(ptr, end_ptr,
- type == MLOG_COMP_PAGE_CREATE,
- block, mtr);
- break;
- case MLOG_UNDO_INSERT:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
- break;
- case MLOG_UNDO_ERASE_END:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_INIT:
- /* Allow anything in page_type when creating a page. */
- ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_HDR_DISCARD:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_HDR_CREATE:
- case MLOG_UNDO_HDR_REUSE:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
- page, mtr);
- break;
- case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- /* On a compressed page, MLOG_COMP_REC_MIN_MARK
- will be followed by MLOG_COMP_REC_DELETE
- or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
- in the same mini-transaction. */
- ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
- ptr = btr_parse_set_min_rec_mark(
- ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
- page, mtr);
- break;
- case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_DELETE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_cur_parse_delete_rec(ptr, end_ptr,
- block, index, mtr);
- }
- break;
- case MLOG_IBUF_BITMAP_INIT:
- /* Allow anything in page_type when creating a page. */
- ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
- break;
- case MLOG_INIT_FILE_PAGE:
- /* Allow anything in page_type when creating a page. */
- ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
- break;
- case MLOG_WRITE_STRING:
- ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
- ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
- break;
- case MLOG_FILE_CREATE:
- case MLOG_FILE_RENAME:
- case MLOG_FILE_DELETE:
- case MLOG_FILE_CREATE2:
- ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
- break;
- case MLOG_ZIP_WRITE_NODE_PTR:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_ZIP_WRITE_BLOB_PTR:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_ZIP_WRITE_HEADER:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- ptr = page_zip_parse_write_header(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_ZIP_PAGE_COMPRESS:
- /* Allow anything in page_type when creating a page. */
- ptr = page_zip_parse_compress(ptr, end_ptr,
- page, page_zip);
- break;
- default:
- ptr = NULL;
- recv_sys->found_corrupt_log = TRUE;
- }
-
- if (index) {
- dict_table_t* table = index->table;
-
- dict_mem_index_free(index);
- dict_mem_table_free(table);
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Calculates the fold value of a page file address: used in inserting or
-searching for a log record in the hash table.
-@return folded value */
-UNIV_INLINE
-ulint
-recv_fold(
-/*======*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- return(ut_fold_ulint_pair(space, page_no));
-}
-
-/*********************************************************************//**
-Calculates the hash value of a page file address: used in inserting or
-searching for a log record in the hash table.
-@return folded value */
-UNIV_INLINE
-ulint
-recv_hash(
-/*======*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
-}
-
-/*********************************************************************//**
-Gets the hashed file address struct for a page.
-@return file address struct, NULL if not found from the hash table */
-static
-recv_addr_t*
-recv_get_fil_addr_struct(
-/*=====================*/
- ulint space, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- recv_addr_t* recv_addr;
-
- recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
- recv_hash(space, page_no));
- while (recv_addr) {
- if ((recv_addr->space == space)
- && (recv_addr->page_no == page_no)) {
-
- break;
- }
-
- recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
- }
-
- return(recv_addr);
-}
-
-/*******************************************************************//**
-Adds a new log record to the hash table of log records. */
-static
-void
-recv_add_to_hash_table(
-/*===================*/
- byte type, /*!< in: log record type */
- ulint space, /*!< in: space id */
- ulint page_no, /*!< in: page number */
- byte* body, /*!< in: log record body */
- byte* rec_end, /*!< in: log record end */
- ib_uint64_t start_lsn, /*!< in: start lsn of the mtr */
- ib_uint64_t end_lsn) /*!< in: end lsn of the mtr */
-{
- recv_t* recv;
- ulint len;
- recv_data_t* recv_data;
- recv_data_t** prev_field;
- recv_addr_t* recv_addr;
-
- if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
- /* The tablespace does not exist any more: do not store the
- log record */
-
- return;
- }
-
- len = rec_end - body;
-
- recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
- recv->type = type;
- recv->len = rec_end - body;
- recv->start_lsn = start_lsn;
- recv->end_lsn = end_lsn;
-
- recv_addr = recv_get_fil_addr_struct(space, page_no);
-
- if (recv_addr == NULL) {
- recv_addr = mem_heap_alloc(recv_sys->heap,
- sizeof(recv_addr_t));
- recv_addr->space = space;
- recv_addr->page_no = page_no;
- recv_addr->state = RECV_NOT_PROCESSED;
-
- UT_LIST_INIT(recv_addr->rec_list);
-
- HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
- recv_fold(space, page_no), recv_addr);
- recv_sys->n_addrs++;
-#if 0
- fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
- space, page_no);
-#endif
- }
-
- UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
-
- prev_field = &(recv->data);
-
- /* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
- recv_sys->heap grows into the buffer pool, and bigger chunks could not
- be allocated */
-
- while (rec_end > body) {
-
- len = rec_end - body;
-
- if (len > RECV_DATA_BLOCK_SIZE) {
- len = RECV_DATA_BLOCK_SIZE;
- }
-
- recv_data = mem_heap_alloc(recv_sys->heap,
- sizeof(recv_data_t) + len);
- *prev_field = recv_data;
-
- memcpy(recv_data + 1, body, len);
-
- prev_field = &(recv_data->next);
-
- body += len;
- }
-
- *prev_field = NULL;
-}
-
-/*********************************************************************//**
-Copies the log record body from recv to buf. */
-static
-void
-recv_data_copy_to_buf(
-/*==================*/
- byte* buf, /*!< in: buffer of length at least recv->len */
- recv_t* recv) /*!< in: log record */
-{
- recv_data_t* recv_data;
- ulint part_len;
- ulint len;
-
- len = recv->len;
- recv_data = recv->data;
-
- while (len > 0) {
- if (len > RECV_DATA_BLOCK_SIZE) {
- part_len = RECV_DATA_BLOCK_SIZE;
- } else {
- part_len = len;
- }
-
- ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
- part_len);
- buf += part_len;
- len -= part_len;
-
- recv_data = recv_data->next;
- }
-}
-
-/************************************************************************//**
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
-void
-recv_recover_page_func(
-/*===================*/
-#ifndef UNIV_HOTBACKUP
- ibool just_read_in,
- /*!< in: TRUE if the i/o handler calls
- this for a freshly read page */
-#endif /* !UNIV_HOTBACKUP */
- buf_block_t* block) /*!< in/out: buffer block */
-{
- page_t* page;
- page_zip_des_t* page_zip;
- recv_addr_t* recv_addr;
- recv_t* recv;
- byte* buf;
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
- ib_uint64_t page_lsn;
- ib_uint64_t page_newest_lsn;
- ibool modification_to_page;
-#ifndef UNIV_HOTBACKUP
- ibool success;
-#endif /* !UNIV_HOTBACKUP */
- mtr_t mtr;
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_sys->apply_log_recs == FALSE) {
-
- /* Log records should not be applied now */
-
- mutex_exit(&(recv_sys->mutex));
-
- return;
- }
-
- recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
- buf_block_get_page_no(block));
-
- if ((recv_addr == NULL)
- || (recv_addr->state == RECV_BEING_PROCESSED)
- || (recv_addr->state == RECV_PROCESSED)) {
-
- mutex_exit(&(recv_sys->mutex));
-
- return;
- }
-
-#if 0
- fprintf(stderr, "Recovering space %lu, page %lu\n",
- buf_block_get_space(block), buf_block_get_page_no(block));
-#endif
-
- recv_addr->state = RECV_BEING_PROCESSED;
-
- mutex_exit(&(recv_sys->mutex));
-
- mtr_start(&mtr);
- mtr_set_log_mode(&mtr, MTR_LOG_NONE);
-
- page = block->frame;
- page_zip = buf_block_get_page_zip(block);
-
-#ifndef UNIV_HOTBACKUP
- if (just_read_in) {
- /* Move the ownership of the x-latch on the page to
- this OS thread, so that we can acquire a second
- x-latch on it. This is needed for the operations to
- the page to pass the debug checks. */
-
- rw_lock_x_lock_move_ownership(&block->lock);
- }
-
- success = buf_page_get_known_nowait(RW_X_LATCH, block,
- BUF_KEEP_OLD,
- __FILE__, __LINE__,
- &mtr);
- ut_a(success);
-
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-#endif /* !UNIV_HOTBACKUP */
-
- /* Read the newest modification lsn from the page */
- page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
-
-#ifndef UNIV_HOTBACKUP
- /* It may be that the page has been modified in the buffer
- pool: read the newest modification lsn there */
-
- page_newest_lsn = buf_page_get_newest_modification(&block->page);
-
- if (page_newest_lsn) {
-
- page_lsn = page_newest_lsn;
- }
-#else /* !UNIV_HOTBACKUP */
- /* In recovery from a backup we do not really use the buffer pool */
- page_newest_lsn = 0;
-#endif /* !UNIV_HOTBACKUP */
-
- modification_to_page = FALSE;
- start_lsn = end_lsn = 0;
-
- recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
-
- while (recv) {
- end_lsn = recv->end_lsn;
-
- if (recv->len > RECV_DATA_BLOCK_SIZE) {
- /* We have to copy the record body to a separate
- buffer */
-
- buf = mem_alloc(recv->len);
-
- recv_data_copy_to_buf(buf, recv);
- } else {
- buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
- }
-
- if (recv->type == MLOG_INIT_FILE_PAGE) {
- page_lsn = page_newest_lsn;
-
- memset(FIL_PAGE_LSN + page, 0, 8);
- memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
- + page, 0, 8);
-
- if (page_zip) {
- memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
- }
- }
-
- if (recv->start_lsn >= page_lsn) {
-
- ib_uint64_t end_lsn;
-
- if (!modification_to_page) {
-
- modification_to_page = TRUE;
- start_lsn = recv->start_lsn;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Applying log rec"
- " type %lu len %lu"
- " to space %lu page no %lu\n",
- (ulong) recv->type, (ulong) recv->len,
- (ulong) recv_addr->space,
- (ulong) recv_addr->page_no);
- }
-#endif /* UNIV_DEBUG */
-
- recv_parse_or_apply_log_rec_body(recv->type, buf,
- buf + recv->len,
- block, &mtr);
-
- end_lsn = recv->start_lsn + recv->len;
- mach_write_ull(FIL_PAGE_LSN + page, end_lsn);
- mach_write_ull(UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM
- + page, end_lsn);
-
- if (page_zip) {
- mach_write_ull(FIL_PAGE_LSN
- + page_zip->data, end_lsn);
- }
- }
-
- if (recv->len > RECV_DATA_BLOCK_SIZE) {
- mem_free(buf);
- }
-
- recv = UT_LIST_GET_NEXT(rec_list, recv);
- }
-
-#ifdef UNIV_ZIP_DEBUG
- if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
-
- if (page_zip) {
- ut_a(page_zip_validate_low(page_zip, page, FALSE));
- }
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_max_page_lsn < page_lsn) {
- recv_max_page_lsn = page_lsn;
- }
-
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
-
- mutex_exit(&(recv_sys->mutex));
-
-#ifndef UNIV_HOTBACKUP
- if (modification_to_page) {
- ut_a(block);
-
- buf_flush_recv_note_modification(block, start_lsn, end_lsn);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- /* Make sure that committing mtr does not change the modification
- lsn values of page */
-
- mtr.modifications = FALSE;
-
- mtr_commit(&mtr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Reads in pages which have hashed log records, from an area around a given
-page number.
-@return number of pages found */
-static
-ulint
-recv_read_in_area(
-/*==============*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no)/*!< in: page number */
-{
- recv_addr_t* recv_addr;
- ulint page_nos[RECV_READ_AHEAD_AREA];
- ulint low_limit;
- ulint n;
-
- low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
-
- n = 0;
-
- for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
- page_no++) {
- recv_addr = recv_get_fil_addr_struct(space, page_no);
-
- if (recv_addr && !buf_page_peek(space, page_no)) {
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_addr->state == RECV_NOT_PROCESSED) {
- recv_addr->state = RECV_BEING_READ;
-
- page_nos[n] = page_no;
-
- n++;
- }
-
- mutex_exit(&(recv_sys->mutex));
- }
- }
-
- buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
- /*
- fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
- */
- return(n);
-}
-
-/*******************************************************************//**
-Empties the hash table of stored log records, applying them to appropriate
-pages. */
-UNIV_INTERN
-void
-recv_apply_hashed_log_recs(
-/*=======================*/
- ibool allow_ibuf) /*!< in: if TRUE, also ibuf operations are
- allowed during the application; if FALSE,
- no ibuf operations are allowed, and after
- the application all file pages are flushed to
- disk and invalidated in buffer pool: this
- alternative means that no new log records
- can be generated during the application;
- the caller must in this case own the log
- mutex */
-{
- recv_addr_t* recv_addr;
- ulint i;
- ulint n_pages;
- ibool has_printed = FALSE;
- mtr_t mtr;
-loop:
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_sys->apply_batch_on) {
-
- mutex_exit(&(recv_sys->mutex));
-
- os_thread_sleep(500000);
-
- goto loop;
- }
-
- ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
-
- if (!allow_ibuf) {
- recv_no_ibuf_operations = TRUE;
- }
-
- recv_sys->apply_log_recs = TRUE;
- recv_sys->apply_batch_on = TRUE;
-
- for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
-
- recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
-
- while (recv_addr) {
- ulint space = recv_addr->space;
- ulint zip_size = fil_space_get_zip_size(space);
- ulint page_no = recv_addr->page_no;
-
- if (recv_addr->state == RECV_NOT_PROCESSED) {
- if (!has_printed) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Starting an"
- " apply batch of log records"
- " to the database...\n"
- "InnoDB: Progress in percents: ",
- stderr);
- has_printed = TRUE;
- }
-
- mutex_exit(&(recv_sys->mutex));
-
- if (buf_page_peek(space, page_no)) {
- buf_block_t* block;
-
- mtr_start(&mtr);
-
- block = buf_page_get(
- space, zip_size, page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(
- block, SYNC_NO_ORDER_CHECK);
-
- recv_recover_page(FALSE, block);
- mtr_commit(&mtr);
- } else {
- recv_read_in_area(space, zip_size,
- page_no);
- }
-
- mutex_enter(&(recv_sys->mutex));
- }
-
- recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
- }
-
- if (has_printed
- && (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
- != ((i + 1) * 100)
- / hash_get_n_cells(recv_sys->addr_hash)) {
-
- fprintf(stderr, "%lu ", (ulong)
- ((i * 100)
- / hash_get_n_cells(recv_sys->addr_hash)));
- }
- }
-
- /* Wait until all the pages have been processed */
-
- while (recv_sys->n_addrs != 0) {
-
- mutex_exit(&(recv_sys->mutex));
-
- os_thread_sleep(500000);
-
- mutex_enter(&(recv_sys->mutex));
- }
-
- if (has_printed) {
-
- fprintf(stderr, "\n");
- }
-
- if (!allow_ibuf) {
- /* Flush all the file pages to disk and invalidate them in
- the buffer pool */
-
- ut_d(recv_no_log_write = TRUE);
- mutex_exit(&(recv_sys->mutex));
- mutex_exit(&(log_sys->mutex));
-
- n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
- IB_ULONGLONG_MAX);
- ut_a(n_pages != ULINT_UNDEFINED);
-
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
-
- buf_pool_invalidate();
-
- mutex_enter(&(log_sys->mutex));
- mutex_enter(&(recv_sys->mutex));
- ut_d(recv_no_log_write = FALSE);
-
- recv_no_ibuf_operations = FALSE;
- }
-
- recv_sys->apply_log_recs = FALSE;
- recv_sys->apply_batch_on = FALSE;
-
- recv_sys_empty_hash();
-
- if (has_printed) {
- fprintf(stderr, "InnoDB: Apply batch completed\n");
- }
-
- mutex_exit(&(recv_sys->mutex));
-}
-#else /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Applies log records in the hash table to a backup. */
-UNIV_INTERN
-void
-recv_apply_log_recs_for_backup(void)
-/*================================*/
-{
- recv_addr_t* recv_addr;
- ulint n_hash_cells;
- buf_block_t* block;
- ulint actual_size;
- ibool success;
- ulint error;
- ulint i;
-
- recv_sys->apply_log_recs = TRUE;
- recv_sys->apply_batch_on = TRUE;
-
- block = back_block1;
-
- fputs("InnoDB: Starting an apply batch of log records"
- " to the database...\n"
- "InnoDB: Progress in percents: ", stderr);
-
- n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
-
- for (i = 0; i < n_hash_cells; i++) {
- /* The address hash table is externally chained */
- recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
-
- while (recv_addr != NULL) {
-
- ulint zip_size
- = fil_space_get_zip_size(recv_addr->space);
-
- if (zip_size == ULINT_UNDEFINED) {
-#if 0
- fprintf(stderr,
- "InnoDB: Warning: cannot apply"
- " log record to"
- " tablespace %lu page %lu,\n"
- "InnoDB: because tablespace with"
- " that id does not exist.\n",
- recv_addr->space, recv_addr->page_no);
-#endif
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
-
- goto skip_this_recv_addr;
- }
-
- /* We simulate a page read made by the buffer pool, to
- make sure the recovery apparatus works ok. We must init
- the block. */
-
- buf_page_init_for_backup_restore(
- recv_addr->space, recv_addr->page_no,
- zip_size, block);
-
- /* Extend the tablespace's last file if the page_no
- does not fall inside its bounds; we assume the last
- file is auto-extending, and ibbackup copied the file
- when it still was smaller */
-
- success = fil_extend_space_to_desired_size(
- &actual_size,
- recv_addr->space, recv_addr->page_no + 1);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot extend"
- " tablespace %lu to hold %lu pages\n",
- recv_addr->space, recv_addr->page_no);
-
- exit(1);
- }
-
- /* Read the page from the tablespace file using the
- fil0fil.c routines */
-
- if (zip_size) {
- error = fil_io(OS_FILE_READ, TRUE,
- recv_addr->space, zip_size,
- recv_addr->page_no, 0, zip_size,
- block->page.zip.data, NULL);
- if (error == DB_SUCCESS
- && !buf_zip_decompress(block, TRUE)) {
- exit(1);
- }
- } else {
- error = fil_io(OS_FILE_READ, TRUE,
- recv_addr->space, 0,
- recv_addr->page_no, 0,
- UNIV_PAGE_SIZE,
- block->frame, NULL);
- }
-
- if (error != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read"
- " from tablespace"
- " %lu page number %lu\n",
- (ulong) recv_addr->space,
- (ulong) recv_addr->page_no);
-
- exit(1);
- }
-
- /* Apply the log records to this page */
- recv_recover_page(FALSE, block);
-
- /* Write the page back to the tablespace file using the
- fil0fil.c routines */
-
- buf_flush_init_for_writing(
- block->frame, buf_block_get_page_zip(block),
- mach_read_ull(block->frame + FIL_PAGE_LSN));
-
- if (zip_size) {
- error = fil_io(OS_FILE_WRITE, TRUE,
- recv_addr->space, zip_size,
- recv_addr->page_no, 0,
- zip_size,
- block->page.zip.data, NULL);
- } else {
- error = fil_io(OS_FILE_WRITE, TRUE,
- recv_addr->space, 0,
- recv_addr->page_no, 0,
- UNIV_PAGE_SIZE,
- block->frame, NULL);
- }
-skip_this_recv_addr:
- recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
- }
-
- if ((100 * i) / n_hash_cells
- != (100 * (i + 1)) / n_hash_cells) {
- fprintf(stderr, "%lu ",
- (ulong) ((100 * i) / n_hash_cells));
- fflush(stderr);
- }
- }
-
- recv_sys_empty_hash();
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Tries to parse a single log record and returns its length.
-@return length of the record, or 0 if the record was not complete */
-static
-ulint
-recv_parse_log_rec(
-/*===============*/
- byte* ptr, /*!< in: pointer to a buffer */
- byte* end_ptr,/*!< in: pointer to the buffer end */
- byte* type, /*!< out: type */
- ulint* space, /*!< out: space id */
- ulint* page_no,/*!< out: page number */
- byte** body) /*!< out: log record body start */
-{
- byte* new_ptr;
-
- *body = NULL;
-
- if (ptr == end_ptr) {
-
- return(0);
- }
-
- if (*ptr == MLOG_MULTI_REC_END) {
-
- *type = *ptr;
-
- return(1);
- }
-
- if (*ptr == MLOG_DUMMY_RECORD) {
- *type = *ptr;
-
- *space = ULINT_UNDEFINED - 1; /* For debugging */
-
- return(1);
- }
-
- new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
- page_no);
- *body = new_ptr;
-
- if (UNIV_UNLIKELY(!new_ptr)) {
-
- return(0);
- }
-
-#ifdef UNIV_LOG_LSN_DEBUG
- if (*type == MLOG_LSN) {
- ib_uint64_t lsn = (ib_uint64_t) *space << 32 | *page_no;
-# ifdef UNIV_LOG_DEBUG
- ut_a(lsn == log_sys->old_lsn);
-# else /* UNIV_LOG_DEBUG */
- ut_a(lsn == recv_sys->recovered_lsn);
-# endif /* UNIV_LOG_DEBUG */
- }
-#endif /* UNIV_LOG_LSN_DEBUG */
-
- /* Check that page_no is sensible */
-
- if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
-
- recv_sys->found_corrupt_log = TRUE;
-
- return(0);
- }
-
- new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
- NULL, NULL);
- if (UNIV_UNLIKELY(new_ptr == NULL)) {
-
- return(0);
- }
-
- if (*page_no > recv_max_parsed_page_no) {
- recv_max_parsed_page_no = *page_no;
- }
-
- return(new_ptr - ptr);
-}
-
-/*******************************************************//**
-Calculates the new value for lsn when more data is added to the log. */
-static
-ib_uint64_t
-recv_calc_lsn_on_data_add(
-/*======================*/
- ib_uint64_t lsn, /*!< in: old lsn */
- ib_uint64_t len) /*!< in: this many bytes of data is
- added, log block headers not included */
-{
- ulint frag_len;
- ulint lsn_len;
-
- frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_HDR_SIZE;
- ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- - LOG_BLOCK_TRL_SIZE);
- lsn_len = (ulint) len;
- lsn_len += (lsn_len + frag_len)
- / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- - LOG_BLOCK_TRL_SIZE)
- * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
-
- return(lsn + lsn_len);
-}
-
-#ifdef UNIV_LOG_DEBUG
-/*******************************************************//**
-Checks that the parser recognizes incomplete initial segments of a log
-record as incomplete. */
-static
-void
-recv_check_incomplete_log_recs(
-/*===========================*/
- byte* ptr, /*!< in: pointer to a complete log record */
- ulint len) /*!< in: length of the log record */
-{
- ulint i;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
-
- for (i = 0; i < len; i++) {
- ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
- &page_no, &body));
- }
-}
-#endif /* UNIV_LOG_DEBUG */
-
-/*******************************************************//**
-Prints diagnostic info of corrupt log. */
-static
-void
-recv_report_corrupt_log(
-/*====================*/
- byte* ptr, /*!< in: pointer to corrupt log record */
- byte type, /*!< in: type of the record */
- ulint space, /*!< in: space id, this may also be garbage */
- ulint page_no)/*!< in: page number, this may also be garbage */
-{
- fprintf(stderr,
- "InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
- "InnoDB: Log record type %lu, space id %lu, page number %lu\n"
- "InnoDB: Log parsing proceeded successfully up to %llu\n"
- "InnoDB: Previous log record type %lu, is multi %lu\n"
- "InnoDB: Recv offset %lu, prev %lu\n",
- (ulong) type, (ulong) space, (ulong) page_no,
- recv_sys->recovered_lsn,
- (ulong) recv_previous_parsed_rec_type,
- (ulong) recv_previous_parsed_rec_is_multi,
- (ulong) (ptr - recv_sys->buf),
- (ulong) recv_previous_parsed_rec_offset);
-
- if ((ulint)(ptr - recv_sys->buf + 100)
- > recv_previous_parsed_rec_offset
- && (ulint)(ptr - recv_sys->buf + 100
- - recv_previous_parsed_rec_offset)
- < 200000) {
- fputs("InnoDB: Hex dump of corrupt log starting"
- " 100 bytes before the start\n"
- "InnoDB: of the previous log rec,\n"
- "InnoDB: and ending 100 bytes after the start"
- " of the corrupt rec:\n",
- stderr);
-
- ut_print_buf(stderr,
- recv_sys->buf
- + recv_previous_parsed_rec_offset - 100,
- ptr - recv_sys->buf + 200
- - recv_previous_parsed_rec_offset);
- putc('\n', stderr);
- }
-
- fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
- "InnoDB: is possible that the log scan did not proceed\n"
- "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
- "InnoDB: on your InnoDB tables to check that they are ok!\n"
- "InnoDB: If mysqld crashes after this recovery, look at\n"
- "InnoDB: " REFMAN "forcing-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
-
- fflush(stderr);
-}
-
-/*******************************************************//**
-Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages.
-@return currently always returns FALSE */
-static
-ibool
-recv_parse_log_recs(
-/*================*/
- ibool store_to_hash) /*!< in: TRUE if the records should be stored
- to the hash table; this is set to FALSE if just
- debug checking is needed */
-{
- byte* ptr;
- byte* end_ptr;
- ulint single_rec;
- ulint len;
- ulint total_len;
- ib_uint64_t new_recovered_lsn;
- ib_uint64_t old_lsn;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
- ulint n_recs;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(recv_sys->parse_start_lsn != 0);
-loop:
- ptr = recv_sys->buf + recv_sys->recovered_offset;
-
- end_ptr = recv_sys->buf + recv_sys->len;
-
- if (ptr == end_ptr) {
-
- return(FALSE);
- }
-
- single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
-
- if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
- /* The mtr only modified a single page, or this is a file op */
-
- old_lsn = recv_sys->recovered_lsn;
-
- /* Try to parse a log record, fetching its type, space id,
- page no, and a pointer to the body of the log record */
-
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
-
- if (len == 0 || recv_sys->found_corrupt_log) {
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(ptr,
- type, space, page_no);
- }
-
- return(FALSE);
- }
-
- new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
-
- if (new_recovered_lsn > recv_sys->scanned_lsn) {
- /* The log record filled a log block, and we require
- that also the next log block should have been scanned
- in */
-
- return(FALSE);
- }
-
- recv_previous_parsed_rec_type = (ulint)type;
- recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
- recv_previous_parsed_rec_is_multi = 0;
-
- recv_sys->recovered_offset += len;
- recv_sys->recovered_lsn = new_recovered_lsn;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Parsed a single log rec"
- " type %lu len %lu space %lu page no %lu\n",
- (ulong) type, (ulong) len, (ulong) space,
- (ulong) page_no);
- }
-#endif /* UNIV_DEBUG */
-
- if (type == MLOG_DUMMY_RECORD) {
- /* Do nothing */
-
- } else if (!store_to_hash) {
- /* In debug checking, update a replicate page
- according to the log record, and check that it
- becomes identical with the original page */
-#ifdef UNIV_LOG_DEBUG
- recv_check_incomplete_log_recs(ptr, len);
-#endif/* UNIV_LOG_DEBUG */
-
- } else if (type == MLOG_FILE_CREATE
- || type == MLOG_FILE_CREATE2
- || type == MLOG_FILE_RENAME
- || type == MLOG_FILE_DELETE) {
- ut_a(space);
-#ifdef UNIV_HOTBACKUP
- if (recv_replay_file_ops) {
-
- /* In ibbackup --apply-log, replay an .ibd file
- operation, if possible; note that
- fil_path_to_mysql_datadir is set in ibbackup to
- point to the datadir we should use there */
-
- if (NULL == fil_op_log_parse_or_replay(
- body, end_ptr, type,
- space, page_no)) {
- fprintf(stderr,
- "InnoDB: Error: file op"
- " log record of type %lu"
- " space %lu not complete in\n"
- "InnoDB: the replay phase."
- " Path %s\n",
- (ulint)type, space,
- (char*)(body + 2));
-
- ut_error;
- }
- }
-#endif
- /* In normal mysqld crash recovery we do not try to
- replay file operations */
-#ifdef UNIV_LOG_LSN_DEBUG
- } else if (type == MLOG_LSN) {
- /* Do not add these records to the hash table.
- The page number and space id fields are misused
- for something else. */
-#endif /* UNIV_LOG_LSN_DEBUG */
- } else {
- recv_add_to_hash_table(type, space, page_no, body,
- ptr + len, old_lsn,
- recv_sys->recovered_lsn);
- }
- } else {
- /* Check that all the records associated with the single mtr
- are included within the buffer */
-
- total_len = 0;
- n_recs = 0;
-
- for (;;) {
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
- if (len == 0 || recv_sys->found_corrupt_log) {
-
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(
- ptr, type, space, page_no);
- }
-
- return(FALSE);
- }
-
- recv_previous_parsed_rec_type = (ulint)type;
- recv_previous_parsed_rec_offset
- = recv_sys->recovered_offset + total_len;
- recv_previous_parsed_rec_is_multi = 1;
-
-#ifdef UNIV_LOG_DEBUG
- if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
- recv_check_incomplete_log_recs(ptr, len);
- }
-#endif /* UNIV_LOG_DEBUG */
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Parsed a multi log rec"
- " type %lu len %lu"
- " space %lu page no %lu\n",
- (ulong) type, (ulong) len,
- (ulong) space, (ulong) page_no);
- }
-#endif /* UNIV_DEBUG */
-
- total_len += len;
- n_recs++;
-
- ptr += len;
-
- if (type == MLOG_MULTI_REC_END) {
-
- /* Found the end mark for the records */
-
- break;
- }
- }
-
- new_recovered_lsn = recv_calc_lsn_on_data_add(
- recv_sys->recovered_lsn, total_len);
-
- if (new_recovered_lsn > recv_sys->scanned_lsn) {
- /* The log record filled a log block, and we require
- that also the next log block should have been scanned
- in */
-
- return(FALSE);
- }
-
- /* Add all the records to the hash table */
-
- ptr = recv_sys->buf + recv_sys->recovered_offset;
-
- for (;;) {
- old_lsn = recv_sys->recovered_lsn;
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(ptr,
- type, space, page_no);
- }
-
- ut_a(len != 0);
- ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
-
- recv_sys->recovered_offset += len;
- recv_sys->recovered_lsn
- = recv_calc_lsn_on_data_add(old_lsn, len);
- if (type == MLOG_MULTI_REC_END) {
-
- /* Found the end mark for the records */
-
- break;
- }
-
- if (store_to_hash
-#ifdef UNIV_LOG_LSN_DEBUG
- && type != MLOG_LSN
-#endif /* UNIV_LOG_LSN_DEBUG */
- ) {
- recv_add_to_hash_table(type, space, page_no,
- body, ptr + len,
- old_lsn,
- new_recovered_lsn);
- }
-
- ptr += len;
- }
- }
-
- goto loop;
-}
-
-/*******************************************************//**
-Adds data from a new log block to the parsing buffer of recv_sys if
-recv_sys->parse_start_lsn is non-zero.
-@return TRUE if more data added */
-static
-ibool
-recv_sys_add_to_parsing_buf(
-/*========================*/
- const byte* log_block, /*!< in: log block */
- ib_uint64_t scanned_lsn) /*!< in: lsn of how far we were able
- to find data in this log block */
-{
- ulint more_len;
- ulint data_len;
- ulint start_offset;
- ulint end_offset;
-
- ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
-
- if (!recv_sys->parse_start_lsn) {
- /* Cannot start parsing yet because no start point for
- it found */
-
- return(FALSE);
- }
-
- data_len = log_block_get_data_len(log_block);
-
- if (recv_sys->parse_start_lsn >= scanned_lsn) {
-
- return(FALSE);
-
- } else if (recv_sys->scanned_lsn >= scanned_lsn) {
-
- return(FALSE);
-
- } else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
- more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
- } else {
- more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
- }
-
- if (more_len == 0) {
-
- return(FALSE);
- }
-
- ut_ad(data_len >= more_len);
-
- start_offset = data_len - more_len;
-
- if (start_offset < LOG_BLOCK_HDR_SIZE) {
- start_offset = LOG_BLOCK_HDR_SIZE;
- }
-
- end_offset = data_len;
-
- if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
- end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
- }
-
- ut_ad(start_offset <= end_offset);
-
- if (start_offset < end_offset) {
- ut_memcpy(recv_sys->buf + recv_sys->len,
- log_block + start_offset, end_offset - start_offset);
-
- recv_sys->len += end_offset - start_offset;
-
- ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
- }
-
- return(TRUE);
-}
-
-/*******************************************************//**
-Moves the parsing buffer data left to the buffer start. */
-static
-void
-recv_sys_justify_left_parsing_buf(void)
-/*===================================*/
-{
- ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
- recv_sys->len - recv_sys->recovered_offset);
-
- recv_sys->len -= recv_sys->recovered_offset;
-
- recv_sys->recovered_offset = 0;
-}
-
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer.
-Parses and hashes the log records if new data found. Unless
-UNIV_HOTBACKUP is defined, this function will apply log records
-automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
-recv_scan_log_recs(
-/*===============*/
- ulint available_memory,/*!< in: we let the hash table of recs
- to grow to this size, at the maximum */
- ibool store_to_hash, /*!< in: TRUE if the records should be
- stored to the hash table; this is set
- to FALSE if just debug checking is
- needed */
- const byte* buf, /*!< in: buffer containing a log
- segment or garbage */
- ulint len, /*!< in: buffer length */
- ib_uint64_t start_lsn, /*!< in: buffer start lsn */
- ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- ib_uint64_t* group_scanned_lsn)/*!< out: scanning succeeded up to
- this lsn */
-{
- const byte* log_block;
- ulint no;
- ib_uint64_t scanned_lsn;
- ibool finished;
- ulint data_len;
- ibool more_data;
-
- ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len > 0);
- ut_a(store_to_hash <= TRUE);
-
- finished = FALSE;
-
- log_block = buf;
- scanned_lsn = start_lsn;
- more_data = FALSE;
-
- do {
- no = log_block_get_hdr_no(log_block);
- /*
- fprintf(stderr, "Log block header no %lu\n", no);
-
- fprintf(stderr, "Scanned lsn no %lu\n",
- log_block_convert_lsn_to_no(scanned_lsn));
- */
- if (no != log_block_convert_lsn_to_no(scanned_lsn)
- || !log_block_checksum_is_ok_or_old_format(log_block)) {
-
- if (no == log_block_convert_lsn_to_no(scanned_lsn)
- && !log_block_checksum_is_ok_or_old_format(
- log_block)) {
- fprintf(stderr,
- "InnoDB: Log block no %lu at"
- " lsn %llu has\n"
- "InnoDB: ok header, but checksum field"
- " contains %lu, should be %lu\n",
- (ulong) no,
- scanned_lsn,
- (ulong) log_block_get_checksum(
- log_block),
- (ulong) log_block_calc_checksum(
- log_block));
- }
-
- /* Garbage or an incompletely written log block */
-
- finished = TRUE;
-
- break;
- }
-
- if (log_block_get_flush_bit(log_block)) {
- /* This block was a start of a log flush operation:
- we know that the previous flush operation must have
- been completed for all log groups before this block
- can have been flushed to any of the groups. Therefore,
- we know that log data is contiguous up to scanned_lsn
- in all non-corrupt log groups. */
-
- if (scanned_lsn > *contiguous_lsn) {
- *contiguous_lsn = scanned_lsn;
- }
- }
-
- data_len = log_block_get_data_len(log_block);
-
- if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
- && scanned_lsn + data_len > recv_sys->scanned_lsn
- && (recv_sys->scanned_checkpoint_no > 0)
- && (log_block_get_checkpoint_no(log_block)
- < recv_sys->scanned_checkpoint_no)
- && (recv_sys->scanned_checkpoint_no
- - log_block_get_checkpoint_no(log_block)
- > 0x80000000UL)) {
-
- /* Garbage from a log buffer flush which was made
- before the most recent database recovery */
-
- finished = TRUE;
-#ifdef UNIV_LOG_DEBUG
- /* This is not really an error, but currently
- we stop here in the debug version: */
-
- ut_error;
-#endif
- break;
- }
-
- if (!recv_sys->parse_start_lsn
- && (log_block_get_first_rec_group(log_block) > 0)) {
-
- /* We found a point from which to start the parsing
- of log records */
-
- recv_sys->parse_start_lsn = scanned_lsn
- + log_block_get_first_rec_group(log_block);
- recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
- recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
- }
-
- scanned_lsn += data_len;
-
- if (scanned_lsn > recv_sys->scanned_lsn) {
-
- /* We have found more entries. If this scan is
- of startup type, we must initiate crash recovery
- environment before parsing these log records. */
-
-#ifndef UNIV_HOTBACKUP
- if (recv_log_scan_is_startup_type
- && !recv_needed_recovery) {
-
- fprintf(stderr,
- "InnoDB: Log scan progressed"
- " past the checkpoint lsn %llu\n",
- recv_sys->scanned_lsn);
- recv_init_crash_recovery();
- }
-#endif /* !UNIV_HOTBACKUP */
-
- /* We were able to find more log data: add it to the
- parsing buffer if parse_start_lsn is already
- non-zero */
-
- if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
- >= RECV_PARSING_BUF_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: log parsing"
- " buffer overflow."
- " Recovery may have failed!\n");
-
- recv_sys->found_corrupt_log = TRUE;
-
- } else if (!recv_sys->found_corrupt_log) {
- more_data = recv_sys_add_to_parsing_buf(
- log_block, scanned_lsn);
- }
-
- recv_sys->scanned_lsn = scanned_lsn;
- recv_sys->scanned_checkpoint_no
- = log_block_get_checkpoint_no(log_block);
- }
-
- if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
- /* Log data for this group ends here */
-
- finished = TRUE;
- break;
- } else {
- log_block += OS_FILE_LOG_BLOCK_SIZE;
- }
- } while (log_block < buf + len && !finished);
-
- *group_scanned_lsn = scanned_lsn;
-
- if (recv_needed_recovery
- || (recv_is_from_backup && !recv_is_making_a_backup)) {
- recv_scan_print_counter++;
-
- if (finished || (recv_scan_print_counter % 80 == 0)) {
-
- fprintf(stderr,
- "InnoDB: Doing recovery: scanned up to"
- " log sequence number %llu\n",
- *group_scanned_lsn);
- }
- }
-
- if (more_data && !recv_sys->found_corrupt_log) {
- /* Try to parse more log records */
-
- recv_parse_log_recs(store_to_hash);
-
-#ifndef UNIV_HOTBACKUP
- if (store_to_hash && mem_heap_get_size(recv_sys->heap)
- > available_memory) {
-
- /* Hash table of log records has grown too big:
- empty it; FALSE means no ibuf operations
- allowed, as we cannot add new records to the
- log yet: they would be produced by ibuf
- operations */
-
- recv_apply_hashed_log_recs(FALSE);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
- /* Move parsing buffer data to the buffer start */
-
- recv_sys_justify_left_parsing_buf();
- }
- }
-
- return(finished);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-static
-void
-recv_group_scan_log_recs(
-/*=====================*/
- log_group_t* group, /*!< in: log group */
- ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- ib_uint64_t* group_scanned_lsn)/*!< out: scanning succeeded up to
- this lsn */
-{
- ibool finished;
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
-
- finished = FALSE;
-
- start_lsn = *contiguous_lsn;
-
- while (!finished) {
- end_lsn = start_lsn + RECV_SCAN_SIZE;
-
- log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
- group, start_lsn, end_lsn);
-
- finished = recv_scan_log_recs(
- (buf_pool->curr_size - recv_n_pool_free_frames)
- * UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
- start_lsn, contiguous_lsn, group_scanned_lsn);
- start_lsn = end_lsn;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Scanned group %lu up to"
- " log sequence number %llu\n",
- (ulong) group->id,
- *group_scanned_lsn);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/*******************************************************//**
-Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
-static
-void
-recv_init_crash_recovery(void)
-/*==========================*/
-{
- ut_a(!recv_needed_recovery);
-
- recv_needed_recovery = TRUE;
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Database was not"
- " shut down normally!\n"
- "InnoDB: Starting crash recovery.\n");
-
- fprintf(stderr,
- "InnoDB: Reading tablespace information"
- " from the .ibd files...\n");
-
- fil_load_single_table_tablespaces();
-
- /* If we are using the doublewrite method, we will
- check if there are half-written pages in data files,
- and restore them from the doublewrite buffer if
- possible */
-
- if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
-
- fprintf(stderr,
- "InnoDB: Restoring possible"
- " half-written data pages from"
- " the doublewrite\n"
- "InnoDB: buffer...\n");
- trx_sys_doublewrite_init_or_restore_pages(TRUE);
- }
-}
-
-/********************************************************//**
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_checkpoint_start_func(
-/*=====================================*/
-#ifdef UNIV_LOG_ARCHIVE
- ulint type, /*!< in: LOG_CHECKPOINT or
- LOG_ARCHIVE */
- ib_uint64_t limit_lsn, /*!< in: recover up to this lsn
- if possible */
-#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn from
- data files */
- ib_uint64_t max_flushed_lsn)/*!< in: max flushed lsn from
- data files */
-{
- log_group_t* group;
- log_group_t* max_cp_group;
- log_group_t* up_to_date_group;
- ulint max_cp_field;
- ib_uint64_t checkpoint_lsn;
- ib_uint64_t checkpoint_no;
- ib_uint64_t old_scanned_lsn;
- ib_uint64_t group_scanned_lsn;
- ib_uint64_t contiguous_lsn;
- ib_uint64_t archived_lsn;
- byte* buf;
- byte log_hdr_buf[LOG_FILE_HDR_SIZE];
- ulint err;
-
-#ifdef UNIV_LOG_ARCHIVE
- ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
-/** TRUE when recovering from a checkpoint */
-# define TYPE_CHECKPOINT (type == LOG_CHECKPOINT)
-/** Recover up to this log sequence number */
-# define LIMIT_LSN limit_lsn
-#else /* UNIV_LOG_ARCHIVE */
-/** TRUE when recovering from a checkpoint */
-# define TYPE_CHECKPOINT 1
-/** Recover up to this log sequence number */
-# define LIMIT_LSN IB_ULONGLONG_MAX
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (TYPE_CHECKPOINT) {
- recv_sys_create();
- recv_sys_init(buf_pool_get_curr_size());
- }
-
- if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
- fprintf(stderr,
- "InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
- fprintf(stderr,
- "InnoDB: Skipping log redo\n");
-
- return(DB_SUCCESS);
- }
-
- recv_recovery_on = TRUE;
-
- recv_sys->limit_lsn = LIMIT_LSN;
-
- mutex_enter(&(log_sys->mutex));
-
- /* Look for the latest checkpoint from any of the log groups */
-
- err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
-
- if (err != DB_SUCCESS) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(err);
- }
-
- log_group_read_checkpoint_info(max_cp_group, max_cp_field);
-
- buf = log_sys->checkpoint_buf;
-
- checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
- checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
- archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
-
- /* Read the first log file header to print a note if this is
- a recovery from a restored InnoDB Hot Backup */
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
- 0, 0, LOG_FILE_HDR_SIZE,
- log_hdr_buf, max_cp_group);
-
- if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
- /* This log file was created by ibbackup --restore: print
- a note to the user about it */
-
- fprintf(stderr,
- "InnoDB: The log file was created by"
- " ibbackup --apply-log at\n"
- "InnoDB: %s\n",
- log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
- fprintf(stderr,
- "InnoDB: NOTE: the following crash recovery"
- " is part of a normal restore.\n");
-
- /* Wipe over the label now */
-
- memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- ' ', 4);
- /* Write to the log file to wipe over the label */
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
- max_cp_group->space_id, 0,
- 0, 0, OS_FILE_LOG_BLOCK_SIZE,
- log_hdr_buf, max_cp_group);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- log_checkpoint_get_nth_group_info(buf, group->id,
- &(group->archived_file_no),
- &(group->archived_offset));
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (TYPE_CHECKPOINT) {
- /* Start reading the log groups from the checkpoint lsn up. The
- variable contiguous_lsn contains an lsn up to which the log is
- known to be contiguously written to all log groups. */
-
- recv_sys->parse_start_lsn = checkpoint_lsn;
- recv_sys->scanned_lsn = checkpoint_lsn;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = checkpoint_lsn;
-
- srv_start_lsn = checkpoint_lsn;
- }
-
- contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- if (TYPE_CHECKPOINT) {
- up_to_date_group = max_cp_group;
-#ifdef UNIV_LOG_ARCHIVE
- } else {
- ulint capacity;
-
- /* Try to recover the remaining part from logs: first from
- the logs of the archived group */
-
- group = recv_sys->archive_group;
- capacity = log_group_get_capacity(group);
-
- if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
- || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
-
- mutex_exit(&(log_sys->mutex));
-
- /* The group does not contain enough log: probably
- an archived log file was missing or corrupt */
-
- return(DB_ERROR);
- }
-
- recv_group_scan_log_recs(group, &contiguous_lsn,
- &group_scanned_lsn);
- if (recv_sys->scanned_lsn < checkpoint_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- /* The group did not contain enough log: an archived
- log file was missing or invalid, or the log group
- was corrupt */
-
- return(DB_ERROR);
- }
-
- group->scanned_lsn = group_scanned_lsn;
- up_to_date_group = group;
-#endif /* UNIV_LOG_ARCHIVE */
- }
-
- ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-#ifdef UNIV_LOG_ARCHIVE
- if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* Set the flag to publish that we are doing startup scan. */
- recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
- while (group) {
- old_scanned_lsn = recv_sys->scanned_lsn;
-
- recv_group_scan_log_recs(group, &contiguous_lsn,
- &group_scanned_lsn);
- group->scanned_lsn = group_scanned_lsn;
-
- if (old_scanned_lsn < group_scanned_lsn) {
- /* We found a more up-to-date group */
-
- up_to_date_group = group;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if ((type == LOG_ARCHIVE)
- && (group == recv_sys->archive_group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- /* Done with startup scan. Clear the flag. */
- recv_log_scan_is_startup_type = FALSE;
- if (TYPE_CHECKPOINT) {
- /* NOTE: we always do a 'recovery' at startup, but only if
- there is something wrong we will print a message to the
- user about recovery: */
-
- if (checkpoint_lsn != max_flushed_lsn
- || checkpoint_lsn != min_flushed_lsn) {
-
- if (checkpoint_lsn < max_flushed_lsn) {
- fprintf(stderr,
- "InnoDB: #########################"
- "#################################\n"
- "InnoDB: "
- "WARNING!\n"
- "InnoDB: The log sequence number"
- " in ibdata files is higher\n"
- "InnoDB: than the log sequence number"
- " in the ib_logfiles! Are you sure\n"
- "InnoDB: you are using the right"
- " ib_logfiles to start up"
- " the database?\n"
- "InnoDB: Log sequence number in"
- " ib_logfiles is %llu, log\n"
- "InnoDB: sequence numbers stamped"
- " to ibdata file headers are between\n"
- "InnoDB: %llu and %llu.\n"
- "InnoDB: #########################"
- "#################################\n",
- checkpoint_lsn,
- min_flushed_lsn,
- max_flushed_lsn);
- }
-
- if (!recv_needed_recovery) {
- fprintf(stderr,
- "InnoDB: The log sequence number"
- " in ibdata files does not match\n"
- "InnoDB: the log sequence number"
- " in the ib_logfiles!\n");
- recv_init_crash_recovery();
- }
- }
-
- if (!recv_needed_recovery) {
- /* Init the doublewrite buffer memory structure */
- trx_sys_doublewrite_init_or_restore_pages(FALSE);
- }
- }
-
- /* We currently have only one log group */
- if (group_scanned_lsn < checkpoint_lsn) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: We were only able to scan the log"
- " up to\n"
- "InnoDB: %llu, but a checkpoint was at %llu.\n"
- "InnoDB: It is possible that"
- " the database is now corrupt!\n",
- group_scanned_lsn,
- checkpoint_lsn);
- }
-
- if (group_scanned_lsn < recv_max_page_lsn) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: We were only able to scan the log"
- " up to %llu\n"
- "InnoDB: but a database page a had an lsn %llu."
- " It is possible that the\n"
- "InnoDB: database is now corrupt!\n",
- group_scanned_lsn,
- recv_max_page_lsn);
- }
-
- if (recv_sys->recovered_lsn < checkpoint_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- if (recv_sys->recovered_lsn >= LIMIT_LSN) {
-
- return(DB_SUCCESS);
- }
-
- ut_error;
-
- return(DB_ERROR);
- }
-
- /* Synchronize the uncorrupted log groups to the most up-to-date log
- group; we also copy checkpoint info to groups */
-
- log_sys->next_checkpoint_lsn = checkpoint_lsn;
- log_sys->next_checkpoint_no = checkpoint_no + 1;
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->archived_lsn = archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
- recv_synchronize_groups(up_to_date_group);
-
- if (!recv_needed_recovery) {
- ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
- } else {
- srv_start_lsn = recv_sys->recovered_lsn;
- }
-
- log_sys->lsn = recv_sys->recovered_lsn;
-
- ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
- log_sys->buf_next_to_write = log_sys->buf_free;
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->last_checkpoint_lsn = checkpoint_lsn;
-
- log_sys->next_checkpoint_no = checkpoint_no + 1;
-
-#ifdef UNIV_LOG_ARCHIVE
- if (archived_lsn == IB_ULONGLONG_MAX) {
-
- log_sys->archiving_state = LOG_ARCH_OFF;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_enter(&(recv_sys->mutex));
-
- recv_sys->apply_log_recs = TRUE;
-
- mutex_exit(&(recv_sys->mutex));
-
- mutex_exit(&(log_sys->mutex));
-
- recv_lsn_checks_on = TRUE;
-
- /* The database is now ready to start almost normal processing of user
- transactions: transaction rollbacks and the application of the log
- records in the hash table can be run in background. */
-
- return(DB_SUCCESS);
-
-#undef TYPE_CHECKPOINT
-#undef LIMIT_LSN
-}
-
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
-void
-recv_recovery_from_checkpoint_finish(void)
-/*======================================*/
-{
- int i;
-
- /* Apply the hashed log records to the respective file pages */
-
- if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
-
- recv_apply_hashed_log_recs(TRUE);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Log records applied to the database\n");
- }
-#endif /* UNIV_DEBUG */
-
- if (recv_needed_recovery) {
- trx_sys_print_mysql_master_log_pos();
- trx_sys_print_mysql_binlog_offset();
- }
-
- if (recv_sys->found_corrupt_log) {
-
- fprintf(stderr,
- "InnoDB: WARNING: the log file may have been"
- " corrupt and it\n"
- "InnoDB: is possible that the log scan or parsing"
- " did not proceed\n"
- "InnoDB: far enough in recovery. Please run"
- " CHECK TABLE\n"
- "InnoDB: on your InnoDB tables to check that"
- " they are ok!\n"
- "InnoDB: It may be safest to recover your"
- " InnoDB database from\n"
- "InnoDB: a backup!\n");
- }
-
- /* Free the resources of the recovery system */
-
- recv_recovery_on = FALSE;
-
-#ifndef UNIV_LOG_DEBUG
- recv_sys_debug_free();
-#endif
- /* Roll back any recovered data dictionary transactions, so
- that the data dictionary tables will be free of any locks.
- The data dictionary latch should guarantee that there is at
- most one data dictionary transaction active at a time. */
- trx_rollback_or_clean_recovered(FALSE);
-
- /* Drop partially created indexes. */
- row_merge_drop_temp_indexes();
-
-#ifdef UNIV_SYNC_DEBUG
- /* Wait for a while so that created threads have time to suspend
- themselves before we switch the latching order checks on */
- os_thread_sleep(1000000);
-
- /* Switch latching order checks on in sync0sync.c */
- sync_order_checks_on = TRUE;
-#endif
- if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
- /* Rollback the uncommitted transactions which have no user
- session */
-
- os_thread_create(trx_rollback_or_clean_all_recovered,
- (void *)&i, NULL);
- }
-}
-
-/******************************************************//**
-Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
-void
-recv_reset_logs(
-/*============*/
- ib_uint64_t lsn, /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /*!< in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created)/*!< in: TRUE if resetting logs
- is done at the log creation;
- FALSE if it is done after
- archive recovery */
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- group->lsn = log_sys->lsn;
- group->lsn_offset = LOG_FILE_HDR_SIZE;
-#ifdef UNIV_LOG_ARCHIVE
- group->archived_file_no = arch_log_no;
- group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (!new_logs_created) {
- recv_truncate_group(group, group->lsn, group->lsn,
- group->lsn, group->lsn);
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- log_sys->buf_next_to_write = 0;
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->next_checkpoint_no = 0;
- log_sys->last_checkpoint_lsn = 0;
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->archived_lsn = log_sys->lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn += LOG_BLOCK_HDR_SIZE;
-
- mutex_exit(&(log_sys->mutex));
-
- /* Reset the checkpoint fields in logs */
-
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
-
- mutex_enter(&(log_sys->mutex));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Creates new log files after a backup has been restored. */
-UNIV_INTERN
-void
-recv_reset_log_files_for_backup(
-/*============================*/
- const char* log_dir, /*!< in: log file directory path */
- ulint n_log_files, /*!< in: number of log files */
- ulint log_file_size, /*!< in: log file size */
- ib_uint64_t lsn) /*!< in: new start lsn, must be
- divisible by OS_FILE_LOG_BLOCK_SIZE */
-{
- os_file_t log_file;
- ibool success;
- byte* buf;
- ulint i;
- ulint log_dir_len;
- char name[5000];
- static const char ib_logfile_basename[] = "ib_logfile";
-
- log_dir_len = strlen(log_dir);
- /* full path name of ib_logfile consists of log dir path + basename
- + number. This must fit in the name buffer.
- */
- ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
-
- buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
-
- for (i = 0; i < n_log_files; i++) {
-
- sprintf(name, "%s%s%lu", log_dir,
- ib_logfile_basename, (ulong)i);
-
- log_file = os_file_create_simple(name, OS_FILE_CREATE,
- OS_FILE_READ_WRITE, &success);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Cannot create %s. Check that"
- " the file does not exist yet.\n", name);
-
- exit(1);
- }
-
- fprintf(stderr,
- "Setting log file size to %lu %lu\n",
- (ulong) ut_get_high32(log_file_size),
- (ulong) log_file_size & 0xFFFFFFFFUL);
-
- success = os_file_set_size(name, log_file,
- log_file_size & 0xFFFFFFFFUL,
- ut_get_high32(log_file_size));
-
- if (!success) {
- fprintf(stderr,
- "InnoDB: Cannot set %s size to %lu %lu\n",
- name, (ulong) ut_get_high32(log_file_size),
- (ulong) (log_file_size & 0xFFFFFFFFUL));
- exit(1);
- }
-
- os_file_flush(log_file);
- os_file_close(log_file);
- }
-
- /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
-
- log_reset_first_header_and_checkpoint(buf, lsn);
-
- log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
- log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
- LOG_BLOCK_HDR_SIZE);
- sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
-
- log_file = os_file_create_simple(name, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &success);
- if (!success) {
- fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
-
- exit(1);
- }
-
- os_file_write(name, log_file, buf, 0, 0,
- LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- os_file_flush(log_file);
- os_file_close(log_file);
-
- ut_free(buf);
-}
-#endif /* UNIV_HOTBACKUP */
-
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Reads from the archive of a log group and performs recovery.
-@return TRUE if no more complete consistent archive files */
-static
-ibool
-log_group_recover_from_archive_file(
-/*================================*/
- log_group_t* group) /*!< in: log group */
-{
- os_file_t file_handle;
- ib_uint64_t start_lsn;
- ib_uint64_t file_end_lsn;
- ib_uint64_t dummy_lsn;
- ib_uint64_t scanned_lsn;
- ulint len;
- ibool ret;
- byte* buf;
- ulint read_offset;
- ulint file_size;
- ulint file_size_high;
- int input_char;
- char name[10000];
-
- ut_a(0);
-
-try_open_again:
- buf = log_sys->buf;
-
- /* Add the file to the archive file space; open the file */
-
- log_archived_file_name_gen(name, group->id, group->archived_file_no);
-
- file_handle = os_file_create(name, OS_FILE_OPEN,
- OS_FILE_LOG, OS_FILE_AIO, &ret);
-
- if (ret == FALSE) {
-ask_again:
- fprintf(stderr,
- "InnoDB: Do you want to copy additional"
- " archived log files\n"
- "InnoDB: to the directory\n");
- fprintf(stderr,
- "InnoDB: or were these all the files needed"
- " in recovery?\n");
- fprintf(stderr,
- "InnoDB: (Y == copy more files; N == this is all)?");
-
- input_char = getchar();
-
- if (input_char == (int) 'N') {
-
- return(TRUE);
- } else if (input_char == (int) 'Y') {
-
- goto try_open_again;
- } else {
- goto ask_again;
- }
- }
-
- ret = os_file_get_size(file_handle, &file_size, &file_size_high);
- ut_a(ret);
-
- ut_a(file_size_high == 0);
-
- fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
-
- ret = os_file_close(file_handle);
-
- if (file_size < LOG_FILE_HDR_SIZE) {
- fprintf(stderr,
- "InnoDB: Archive file header incomplete %s\n", name);
-
- return(TRUE);
- }
-
- ut_a(ret);
-
- /* Add the archive file as a node to the space */
-
- fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
- group->archive_space_id, FALSE);
-#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
-# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
-#endif
-
- /* Read the archive file header */
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
- LOG_FILE_HDR_SIZE, buf, NULL);
-
- /* Check if the archive file header is consistent */
-
- if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
- || mach_read_from_4(buf + LOG_FILE_NO)
- != group->archived_file_no) {
- fprintf(stderr,
- "InnoDB: Archive file header inconsistent %s\n", name);
-
- return(TRUE);
- }
-
- if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
- fprintf(stderr,
- "InnoDB: Archive file not completely written %s\n",
- name);
-
- return(TRUE);
- }
-
- start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
- file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
-
- if (!recv_sys->scanned_lsn) {
-
- if (recv_sys->parse_start_lsn < start_lsn) {
- fprintf(stderr,
- "InnoDB: Archive log file %s"
- " starts from too big a lsn\n",
- name);
- return(TRUE);
- }
-
- recv_sys->scanned_lsn = start_lsn;
- }
-
- if (recv_sys->scanned_lsn != start_lsn) {
-
- fprintf(stderr,
- "InnoDB: Archive log file %s starts from"
- " a wrong lsn\n",
- name);
- return(TRUE);
- }
-
- read_offset = LOG_FILE_HDR_SIZE;
-
- for (;;) {
- len = RECV_SCAN_SIZE;
-
- if (read_offset + len > file_size) {
- len = ut_calc_align_down(file_size - read_offset,
- OS_FILE_LOG_BLOCK_SIZE);
- }
-
- if (len == 0) {
-
- break;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Archive read starting at"
- " lsn %llu, len %lu from file %s\n",
- start_lsn,
- (ulong) len, name);
- }
-#endif /* UNIV_DEBUG */
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
- group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
- read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
-
- ret = recv_scan_log_recs(
- (buf_pool->n_frames - recv_n_pool_free_frames)
- * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
- &dummy_lsn, &scanned_lsn);
-
- if (scanned_lsn == file_end_lsn) {
-
- return(FALSE);
- }
-
- if (ret) {
- fprintf(stderr,
- "InnoDB: Archive log file %s"
- " does not scan right\n",
- name);
- return(TRUE);
- }
-
- read_offset += len;
- start_lsn += len;
-
- ut_ad(start_lsn == scanned_lsn);
- }
-
- return(FALSE);
-}
-
-/********************************************************//**
-Recovers from archived log files, and also from log files, if they exist.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
- ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the
- data files */
- ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if
- possible */
- ulint first_log_no) /*!< in: number of the first archived
- log file to use in the recovery; the
- file will be searched from
- INNOBASE_LOG_ARCH_DIR specified in
- server config file */
-{
- log_group_t* group;
- ulint group_id;
- ulint trunc_len;
- ibool ret;
- ulint err;
-
- ut_a(0);
-
- recv_sys_create();
- recv_sys_init(buf_pool_get_curr_size());
-
- recv_recovery_on = TRUE;
- recv_recovery_from_backup_on = TRUE;
-
- recv_sys->limit_lsn = limit_lsn;
-
- group_id = 0;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- if (group->id == group_id) {
-
- break;
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- if (!group) {
- fprintf(stderr,
- "InnoDB: There is no log group defined with id %lu!\n",
- (ulong) group_id);
- return(DB_ERROR);
- }
-
- group->archived_file_no = first_log_no;
-
- recv_sys->parse_start_lsn = min_flushed_lsn;
-
- recv_sys->scanned_lsn = 0;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
-
- recv_sys->archive_group = group;
-
- ret = FALSE;
-
- mutex_enter(&(log_sys->mutex));
-
- while (!ret) {
- ret = log_group_recover_from_archive_file(group);
-
- /* Close and truncate a possible processed archive file
- from the file space */
-
- trunc_len = UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id);
- if (trunc_len > 0) {
- fil_space_truncate_start(group->archive_space_id,
- trunc_len);
- }
-
- group->archived_file_no++;
- }
-
- if (recv_sys->recovered_lsn < limit_lsn) {
-
- if (!recv_sys->scanned_lsn) {
-
- recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
- limit_lsn,
- IB_ULONGLONG_MAX,
- IB_ULONGLONG_MAX);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- mutex_enter(&(log_sys->mutex));
- }
-
- if (limit_lsn != IB_ULONGLONG_MAX) {
-
- recv_apply_hashed_log_recs(FALSE);
-
- recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/********************************************************//**
-Completes recovery from archive. */
-UNIV_INTERN
-void
-recv_recovery_from_archive_finish(void)
-/*===================================*/
-{
- recv_recovery_from_checkpoint_finish();
-
- recv_recovery_from_backup_on = FALSE;
-}
-#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innodb_plugin/mach/mach0data.c b/storage/innodb_plugin/mach/mach0data.c
deleted file mode 100644
index e030ce9aadf..00000000000
--- a/storage/innodb_plugin/mach/mach0data.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file mach/mach0data.c
-Utilities for converting data from the database file
-to the machine format.
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "mach0data.h"
-
-#ifdef UNIV_NONINL
-#include "mach0data.ic"
-#endif
-
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
-mach_parse_compressed(
-/*==================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- ulint* val) /*!< out: read value (< 2^32) */
-{
- ulint flag;
-
- ut_ad(ptr && end_ptr && val);
-
- if (ptr >= end_ptr) {
-
- return(NULL);
- }
-
- flag = mach_read_from_1(ptr);
-
- if (flag < 0x80UL) {
- *val = flag;
- return(ptr + 1);
-
- } else if (flag < 0xC0UL) {
- if (end_ptr < ptr + 2) {
- return(NULL);
- }
-
- *val = mach_read_from_2(ptr) & 0x7FFFUL;
-
- return(ptr + 2);
-
- } else if (flag < 0xE0UL) {
- if (end_ptr < ptr + 3) {
- return(NULL);
- }
-
- *val = mach_read_from_3(ptr) & 0x3FFFFFUL;
-
- return(ptr + 3);
- } else if (flag < 0xF0UL) {
- if (end_ptr < ptr + 4) {
- return(NULL);
- }
-
- *val = mach_read_from_4(ptr) & 0x1FFFFFFFUL;
-
- return(ptr + 4);
- } else {
- ut_ad(flag == 0xF0UL);
-
- if (end_ptr < ptr + 5) {
- return(NULL);
- }
-
- *val = mach_read_from_4(ptr + 1);
- return(ptr + 5);
- }
-}
-
-/*********************************************************//**
-Reads a dulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
-mach_dulint_parse_compressed(
-/*=========================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- dulint* val) /*!< out: read value */
-{
- ulint high;
- ulint low;
- ulint size;
-
- ut_ad(ptr && end_ptr && val);
-
- if (end_ptr < ptr + 5) {
-
- return(NULL);
- }
-
- high = mach_read_compressed(ptr);
-
- size = mach_get_compressed_size(high);
-
- ptr += size;
-
- if (end_ptr < ptr + 4) {
-
- return(NULL);
- }
-
- low = mach_read_from_4(ptr);
-
- *val = ut_dulint_create(high, low);
-
- return(ptr + 4);
-}
diff --git a/storage/innodb_plugin/mem/mem0dbg.c b/storage/innodb_plugin/mem/mem0dbg.c
deleted file mode 100644
index 01eda20ec45..00000000000
--- a/storage/innodb_plugin/mem/mem0dbg.c
+++ /dev/null
@@ -1,1037 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0dbg.c
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-/* The mutex which protects in the debug version the hash table
-containing the list of live memory heaps, and also the global
-variables below. */
-UNIV_INTERN mutex_t mem_hash_mutex;
-# endif /* !UNIV_HOTBACKUP */
-
-/* The following variables contain information about the
-extent of memory allocations. Only used in the debug version.
-Protected by mem_hash_mutex above. */
-
-static ulint mem_n_created_heaps = 0;
-static ulint mem_n_allocations = 0;
-static ulint mem_total_allocated_memory = 0;
-UNIV_INTERN ulint mem_current_allocated_memory = 0;
-static ulint mem_max_allocated_memory = 0;
-# ifndef UNIV_HOTBACKUP
-static ulint mem_last_print_info = 0;
-static ibool mem_hash_initialized = FALSE;
-# endif /* !UNIV_HOTBACKUP */
-
-/* Size of the hash table for memory management tracking */
-#define MEM_HASH_SIZE 997
-
-/* The node of the list containing currently allocated memory heaps */
-
-typedef struct mem_hash_node_struct mem_hash_node_t;
-struct mem_hash_node_struct {
- UT_LIST_NODE_T(mem_hash_node_t)
- list; /*!< hash list node */
- mem_heap_t* heap; /*!< memory heap */
- const char* file_name;/* file where heap was created*/
- ulint line; /*!< file line of creation */
- ulint nth_heap;/* this is the nth heap created */
- UT_LIST_NODE_T(mem_hash_node_t)
- all_list;/* list of all created heaps */
-};
-
-typedef UT_LIST_BASE_NODE_T(mem_hash_node_t) mem_hash_cell_t;
-
-/* The hash table of allocated heaps */
-static mem_hash_cell_t mem_hash_table[MEM_HASH_SIZE];
-
-/* The base node of the list of all allocated heaps */
-static mem_hash_cell_t mem_all_list_base;
-
-
-
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i);
-
-/* Accessor function for the hash table. Returns a pointer to the
-table cell. */
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i)
-{
- ut_a(i < MEM_HASH_SIZE);
-
- return(&(mem_hash_table[i]));
-}
-
-/* Accessor functions for a memory field in the debug version */
-UNIV_INTERN
-void
-mem_field_header_set_len(byte* field, ulint len)
-{
- mach_write_to_4(field - 2 * sizeof(ulint), len);
-}
-
-UNIV_INTERN
-ulint
-mem_field_header_get_len(byte* field)
-{
- return(mach_read_from_4(field - 2 * sizeof(ulint)));
-}
-
-UNIV_INTERN
-void
-mem_field_header_set_check(byte* field, ulint check)
-{
- mach_write_to_4(field - sizeof(ulint), check);
-}
-
-UNIV_INTERN
-ulint
-mem_field_header_get_check(byte* field)
-{
- return(mach_read_from_4(field - sizeof(ulint)));
-}
-
-UNIV_INTERN
-void
-mem_field_trailer_set_check(byte* field, ulint check)
-{
- mach_write_to_4(field + mem_field_header_get_len(field), check);
-}
-
-UNIV_INTERN
-ulint
-mem_field_trailer_get_check(byte* field)
-{
- return(mach_read_from_4(field
- + mem_field_header_get_len(field)));
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
- ulint size) /*!< in: common pool size in bytes */
-{
-#ifdef UNIV_MEM_DEBUG
-
- ulint i;
-
- /* Initialize the hash table */
- ut_a(FALSE == mem_hash_initialized);
-
- mutex_create(&mem_hash_mutex, SYNC_MEM_HASH);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
- UT_LIST_INIT(*mem_hash_get_nth_cell(i));
- }
-
- UT_LIST_INIT(mem_all_list_base);
-
- mem_hash_initialized = TRUE;
-#endif
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- /* When innodb_use_sys_malloc is set, the
- mem_comm_pool won't be used for any allocations. We
- create a dummy mem_comm_pool, because some statistics
- and debugging code relies on it being initialized. */
- size = 1;
- }
-
- mem_comm_pool = mem_pool_create(size);
-}
-
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void)
-/*===========*/
-{
- mem_pool_free(mem_comm_pool);
- mem_comm_pool = NULL;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_MEM_DEBUG
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
- byte* buf, /*!< in: memory field */
- ulint n) /*!< in: how many bytes the user requested */
-{
- ulint rnd;
- byte* usr_buf;
-
- usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
- /* In the debug version write the length field and the
- check fields to the start and the end of the allocated storage.
- The field header consists of a length field and
- a random number field, in this order. The field trailer contains
- the same random number as a check field. */
-
- mem_field_header_set_len(usr_buf, n);
-
- rnd = ut_rnd_gen_ulint();
-
- mem_field_header_set_check(usr_buf, rnd);
- mem_field_trailer_set_check(usr_buf, rnd);
-
- /* Update the memory allocation information */
-
- mutex_enter(&mem_hash_mutex);
-
- mem_total_allocated_memory += n;
- mem_current_allocated_memory += n;
- mem_n_allocations++;
-
- if (mem_current_allocated_memory > mem_max_allocated_memory) {
- mem_max_allocated_memory = mem_current_allocated_memory;
- }
-
- mutex_exit(&mem_hash_mutex);
-
- /* In the debug version set the buffer to a random
- combination of 0xBA and 0xBE */
-
- mem_init_buf(usr_buf, n);
-}
-
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
- byte* buf, /*!< in: memory field */
- ulint n __attribute__((unused)))
- /*!< in: how many bytes the user requested */
-{
- byte* usr_buf;
-
- usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
- mutex_enter(&mem_hash_mutex);
- mem_current_allocated_memory -= n;
- mutex_exit(&mem_hash_mutex);
-
- /* Check that the field lengths agree */
- ut_ad(n == (ulint)mem_field_header_get_len(usr_buf));
-
- /* In the debug version, set the freed space to a random
- combination of 0xDE and 0xAD */
-
- mem_erase_buf(buf, MEM_SPACE_NEEDED(n));
-}
-
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n) /*!< in: length of buffer */
-{
- byte* ptr;
-
- UNIV_MEM_ASSERT_W(buf, n);
-
- for (ptr = buf; ptr < buf + n; ptr++) {
-
- if (ut_rnd_gen_ibool()) {
- *ptr = 0xBA;
- } else {
- *ptr = 0xBE;
- }
- }
-
- UNIV_MEM_INVALID(buf, n);
-}
-
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n) /*!< in: length of buffer */
-{
- byte* ptr;
-
- UNIV_MEM_ASSERT_W(buf, n);
-
- for (ptr = buf; ptr < buf + n; ptr++) {
- if (ut_rnd_gen_ibool()) {
- *ptr = 0xDE;
- } else {
- *ptr = 0xAD;
- }
- }
-
- UNIV_MEM_FREE(buf, n);
-}
-
-/***************************************************************//**
-Inserts a created memory heap to the hash table of current allocated
-memory heaps. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
- mem_heap_t* heap, /*!< in: the created heap */
- const char* file_name, /*!< in: file name of creation */
- ulint line) /*!< in: line where created */
-{
- mem_hash_node_t* new_node;
- ulint cell_no ;
-
- ut_ad(mem_heap_check(heap));
-
- mutex_enter(&mem_hash_mutex);
-
- cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE);
-
- /* Allocate a new node to the list */
- new_node = ut_malloc(sizeof(mem_hash_node_t));
-
- new_node->heap = heap;
- new_node->file_name = file_name;
- new_node->line = line;
- new_node->nth_heap = mem_n_created_heaps;
-
- /* Insert into lists */
- UT_LIST_ADD_FIRST(list, *mem_hash_get_nth_cell(cell_no), new_node);
-
- UT_LIST_ADD_LAST(all_list, mem_all_list_base, new_node);
-
- mem_n_created_heaps++;
-
- mutex_exit(&mem_hash_mutex);
-}
-
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
- mem_heap_t* heap, /*!< in: the heap to be freed */
- const char* file_name, /*!< in: file name of freeing */
- ulint line) /*!< in: line where freed */
-{
- mem_hash_node_t* node;
- ulint cell_no;
- ibool error;
- ulint size;
-
- ut_ad(mem_heap_check(heap));
-
- mutex_enter(&mem_hash_mutex);
-
- cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE);
-
- /* Look for the heap in the hash table list */
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no));
-
- while (node != NULL) {
- if (node->heap == heap) {
-
- break;
- }
-
- node = UT_LIST_GET_NEXT(list, node);
- }
-
- if (node == NULL) {
- fprintf(stderr,
- "Memory heap or buffer freed in %s line %lu"
- " did not exist.\n",
- file_name, (ulong) line);
- ut_error;
- }
-
- /* Remove from lists */
- UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node);
-
- UT_LIST_REMOVE(all_list, mem_all_list_base, node);
-
- /* Validate the heap which will be freed */
- mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size,
- NULL, NULL);
- if (error) {
- fprintf(stderr,
- "Inconsistency in memory heap or"
- " buffer n:o %lu created\n"
- "in %s line %lu and tried to free in %s line %lu.\n"
- "Hex dump of 400 bytes around memory heap"
- " first block start:\n",
- node->nth_heap, node->file_name, (ulong) node->line,
- file_name, (ulong) line);
- ut_print_buf(stderr, (byte*)node->heap - 200, 400);
- fputs("\nDump of the mem heap:\n", stderr);
- mem_heap_validate_or_print(node->heap, NULL, TRUE, &error,
- &size, NULL, NULL);
- ut_error;
- }
-
- /* Free the memory occupied by the node struct */
- ut_free(node);
-
- mem_current_allocated_memory -= size;
-
- mutex_exit(&mem_hash_mutex);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
- mem_heap_t* heap, /*!< in: memory heap */
- byte* top __attribute__((unused)),
- /*!< in: calculate and validate only until
- this top pointer in the heap is reached,
- if this pointer is NULL, ignored */
- ibool print, /*!< in: if TRUE, prints the contents
- of the heap; works only in
- the debug version */
- ibool* error, /*!< out: TRUE if error */
- ulint* us_size,/*!< out: allocated memory
- (for the user) in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored; in the
- non-debug version this is always -1 */
- ulint* ph_size,/*!< out: physical size of the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
- ulint* n_blocks) /*!< out: number of blocks in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
-{
- mem_block_t* block;
- ulint total_len = 0;
- ulint block_count = 0;
- ulint phys_len = 0;
-#ifdef UNIV_MEM_DEBUG
- ulint len;
- byte* field;
- byte* user_field;
- ulint check_field;
-#endif
-
- /* Pessimistically, we set the parameters to error values */
- if (us_size != NULL) {
- *us_size = 0;
- }
- if (ph_size != NULL) {
- *ph_size = 0;
- }
- if (n_blocks != NULL) {
- *n_blocks = 0;
- }
- *error = TRUE;
-
- block = heap;
-
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- return;
- }
-
- if (print) {
- fputs("Memory heap:", stderr);
- }
-
- while (block != NULL) {
- phys_len += mem_block_get_len(block);
-
- if ((block->type == MEM_HEAP_BUFFER)
- && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) {
-
- fprintf(stderr,
- "InnoDB: Error: mem block %p"
- " length %lu > UNIV_PAGE_SIZE\n",
- (void*) block,
- (ulong) mem_block_get_len(block));
- /* error */
-
- return;
- }
-
-#ifdef UNIV_MEM_DEBUG
- /* We can trace the fields of the block only in the debug
- version */
- if (print) {
- fprintf(stderr, " Block %ld:", block_count);
- }
-
- field = (byte*)block + mem_block_get_start(block);
-
- if (top && (field == top)) {
-
- goto completed;
- }
-
- while (field < (byte*)block + mem_block_get_free(block)) {
-
- /* Calculate the pointer to the storage
- which was given to the user */
-
- user_field = field + MEM_FIELD_HEADER_SIZE;
-
- len = mem_field_header_get_len(user_field);
-
- if (print) {
- ut_print_buf(stderr, user_field, len);
- putc('\n', stderr);
- }
-
- total_len += len;
- check_field = mem_field_header_get_check(user_field);
-
- if (check_field
- != mem_field_trailer_get_check(user_field)) {
- /* error */
-
- fprintf(stderr,
- "InnoDB: Error: block %lx mem"
- " field %lx len %lu\n"
- "InnoDB: header check field is"
- " %lx but trailer %lx\n",
- (ulint)block,
- (ulint)field, len, check_field,
- mem_field_trailer_get_check(
- user_field));
-
- return;
- }
-
- /* Move to next field */
- field = field + MEM_SPACE_NEEDED(len);
-
- if (top && (field == top)) {
-
- goto completed;
- }
-
- }
-
- /* At the end check that we have arrived to the first free
- position */
-
- if (field != (byte*)block + mem_block_get_free(block)) {
- /* error */
-
- fprintf(stderr,
- "InnoDB: Error: block %lx end of"
- " mem fields %lx\n"
- "InnoDB: but block free at %lx\n",
- (ulint)block, (ulint)field,
- (ulint)((byte*)block
- + mem_block_get_free(block)));
-
- return;
- }
-
-#endif
-
- block = UT_LIST_GET_NEXT(list, block);
- block_count++;
- }
-#ifdef UNIV_MEM_DEBUG
-completed:
-#endif
- if (us_size != NULL) {
- *us_size = total_len;
- }
- if (ph_size != NULL) {
- *ph_size = phys_len;
- }
- if (n_blocks != NULL) {
- *n_blocks = block_count;
- }
- *error = FALSE;
-}
-
-/**************************************************************//**
-Prints the contents of a memory heap. */
-static
-void
-mem_heap_print(
-/*===========*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ibool error;
- ulint us_size;
- ulint phys_size;
- ulint n_blocks;
-
- ut_ad(mem_heap_check(heap));
-
- mem_heap_validate_or_print(heap, NULL, TRUE, &error,
- &us_size, &phys_size, &n_blocks);
- fprintf(stderr,
- "\nheap type: %lu; size: user size %lu;"
- " physical size %lu; blocks %lu.\n",
- (ulong) heap->type, (ulong) us_size,
- (ulong) phys_size, (ulong) n_blocks);
- ut_a(!error);
-}
-
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ibool error;
- ulint us_size;
- ulint phys_size;
- ulint n_blocks;
-
- ut_ad(mem_heap_check(heap));
-
- mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size,
- &phys_size, &n_blocks);
- if (error) {
- mem_heap_print(heap);
- }
-
- ut_a(!error);
-
- return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it).
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void)
-/*===============*/
-{
- mem_hash_node_t* node;
- ulint heap_count = 0;
- ulint i;
-
- mem_validate();
-
- mutex_enter(&mem_hash_mutex);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
-
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
- while (node != NULL) {
- heap_count++;
- node = UT_LIST_GET_NEXT(list, node);
- }
- }
-
- mutex_exit(&mem_hash_mutex);
-
- if (heap_count == 0) {
-# ifndef UNIV_HOTBACKUP
- ut_a(mem_pool_get_reserved(mem_comm_pool) == 0);
-# endif /* !UNIV_HOTBACKUP */
-
- return(TRUE);
- } else {
- return(FALSE);
- }
-}
-
-/*****************************************************************//**
-Validates the dynamic memory allocation system.
-@return TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void)
-/*========================*/
-{
- mem_hash_node_t* node;
- ulint n_heaps = 0;
- ulint allocated_mem;
- ulint ph_size;
- ulint total_allocated_mem = 0;
- ibool error = FALSE;
- ulint n_blocks;
- ulint i;
-
-# ifndef UNIV_HOTBACKUP
- mem_pool_validate(mem_comm_pool);
-# endif /* !UNIV_HOTBACKUP */
-
- mutex_enter(&mem_hash_mutex);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
-
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
-
- while (node != NULL) {
- n_heaps++;
-
- mem_heap_validate_or_print(node->heap, NULL,
- FALSE, &error,
- &allocated_mem,
- &ph_size, &n_blocks);
-
- if (error) {
- fprintf(stderr,
- "\nERROR!!!!!!!!!!!!!!!!!!!"
- "!!!!!!!!!!!!!!!!!!!!!!!\n\n"
- "Inconsistency in memory heap"
- " or buffer created\n"
- "in %s line %lu.\n",
- node->file_name, node->line);
-
- mutex_exit(&mem_hash_mutex);
-
- return(TRUE);
- }
-
- total_allocated_mem += allocated_mem;
- node = UT_LIST_GET_NEXT(list, node);
- }
- }
-
- if ((n_heaps == 0) && (mem_current_allocated_memory != 0)) {
- error = TRUE;
- }
-
- if (mem_total_allocated_memory < mem_current_allocated_memory) {
- error = TRUE;
- }
-
- if (mem_max_allocated_memory > mem_total_allocated_memory) {
- error = TRUE;
- }
-
- if (mem_n_created_heaps < n_heaps) {
- error = TRUE;
- }
-
- mutex_exit(&mem_hash_mutex);
-
- return(error);
-}
-
-/************************************************************//**
-Validates the dynamic memory
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void)
-/*==============*/
-{
- ut_a(!mem_validate_no_assert());
-
- return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
- void* ptr) /*!< in: pointer to place of possible corruption */
-{
- byte* p;
- ulint i;
- ulint dist;
-
- fputs("InnoDB: Apparent memory corruption: mem dump ", stderr);
- ut_print_buf(stderr, (byte*)ptr - 250, 500);
-
- fputs("\nInnoDB: Scanning backward trying to find"
- " previous allocated mem blocks\n", stderr);
-
- p = (byte*)ptr;
- dist = 0;
-
- for (i = 0; i < 10; i++) {
- for (;;) {
- if (((ulint)p) % 4 == 0) {
-
- if (*((ulint*)p) == MEM_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Mem block at - %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
-
- if (*((ulint*)p) == MEM_FREED_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Freed mem block at - %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
- }
-
- p--;
- dist++;
- }
-
- p--;
- dist++;
- }
-
- fprintf(stderr,
- "InnoDB: Scanning forward trying to find next"
- " allocated mem blocks\n");
-
- p = (byte*)ptr;
- dist = 0;
-
- for (i = 0; i < 10; i++) {
- for (;;) {
- if (((ulint)p) % 4 == 0) {
-
- if (*((ulint*)p) == MEM_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Mem block at + %lu, file %s,"
- " line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
-
- if (*((ulint*)p) == MEM_FREED_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Freed mem block at + %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
- }
-
- p++;
- dist++;
- }
-
- p++;
- dist++;
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated
-memory heaps or buffers. Can only be used in the debug version. */
-static
-void
-mem_print_info_low(
-/*===============*/
- ibool print_all) /*!< in: if TRUE, all heaps are printed,
- else only the heaps allocated after the
- previous call of this function */
-{
-#ifdef UNIV_MEM_DEBUG
- mem_hash_node_t* node;
- ulint n_heaps = 0;
- ulint allocated_mem;
- ulint ph_size;
- ulint total_allocated_mem = 0;
- ibool error;
- ulint n_blocks;
-#endif
- FILE* outfile;
-
- /* outfile = fopen("ibdebug", "a"); */
-
- outfile = stdout;
-
- fprintf(outfile, "\n");
- fprintf(outfile,
- "________________________________________________________\n");
- fprintf(outfile, "MEMORY ALLOCATION INFORMATION\n\n");
-
-#ifndef UNIV_MEM_DEBUG
-
- UT_NOT_USED(print_all);
-
- mem_pool_print_info(outfile, mem_comm_pool);
-
- fprintf(outfile,
- "Sorry, non-debug version cannot give more memory info\n");
-
- /* fclose(outfile); */
-
- return;
-#else
- mutex_enter(&mem_hash_mutex);
-
- fprintf(outfile, "LIST OF CREATED HEAPS AND ALLOCATED BUFFERS: \n\n");
-
- if (!print_all) {
- fprintf(outfile, "AFTER THE LAST PRINT INFO\n");
- }
-
- node = UT_LIST_GET_FIRST(mem_all_list_base);
-
- while (node != NULL) {
- n_heaps++;
-
- if (!print_all && node->nth_heap < mem_last_print_info) {
-
- goto next_heap;
- }
-
- mem_heap_validate_or_print(node->heap, NULL,
- FALSE, &error, &allocated_mem,
- &ph_size, &n_blocks);
- total_allocated_mem += allocated_mem;
-
- fprintf(outfile,
- "%lu: file %s line %lu of size %lu phys.size %lu"
- " with %lu blocks, type %lu\n",
- node->nth_heap, node->file_name, node->line,
- allocated_mem, ph_size, n_blocks,
- (node->heap)->type);
-next_heap:
- node = UT_LIST_GET_NEXT(all_list, node);
- }
-
- fprintf(outfile, "\n");
-
- fprintf(outfile, "Current allocated memory : %lu\n",
- mem_current_allocated_memory);
- fprintf(outfile, "Current allocated heaps and buffers : %lu\n",
- n_heaps);
- fprintf(outfile, "Cumulative allocated memory : %lu\n",
- mem_total_allocated_memory);
- fprintf(outfile, "Maximum allocated memory : %lu\n",
- mem_max_allocated_memory);
- fprintf(outfile, "Cumulative created heaps and buffers : %lu\n",
- mem_n_created_heaps);
- fprintf(outfile, "Cumulative number of allocations : %lu\n",
- mem_n_allocations);
-
- mem_last_print_info = mem_n_created_heaps;
-
- mutex_exit(&mem_hash_mutex);
-
- mem_pool_print_info(outfile, mem_comm_pool);
-
- /* mem_validate(); */
-
- /* fclose(outfile); */
-#endif
-}
-
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void)
-/*================*/
-{
- mem_print_info_low(TRUE);
-}
-
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void)
-/*====================*/
-{
- mem_print_info_low(FALSE);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/mem/mem0mem.c b/storage/innodb_plugin/mem/mem0mem.c
deleted file mode 100644
index ccb2fd8a7b4..00000000000
--- a/storage/innodb_plugin/mem/mem0mem.c
+++ /dev/null
@@ -1,555 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0mem.c
-The memory management
-
-Created 6/9/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0mem.h"
-#ifdef UNIV_NONINL
-#include "mem0mem.ic"
-#endif
-
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "mem0dbg.c"
-#include <stdarg.h>
-
-/*
- THE MEMORY MANAGEMENT
- =====================
-
-The basic element of the memory management is called a memory
-heap. A memory heap is conceptually a
-stack from which memory can be allocated. The stack may grow infinitely.
-The top element of the stack may be freed, or
-the whole stack can be freed at one time. The advantage of the
-memory heap concept is that we can avoid using the malloc and free
-functions of C which are quite expensive, for example, on the Solaris + GCC
-system (50 MHz Sparc, 1993) the pair takes 3 microseconds,
-on Win NT + 100MHz Pentium, 2.5 microseconds.
-When we use a memory heap,
-we can allocate larger blocks of memory at a time and thus
-reduce overhead. Slightly more efficient the method is when we
-allocate the memory from the index page buffer pool, as we can
-claim a new page fast. This is called buffer allocation.
-When we allocate the memory from the dynamic memory of the
-C environment, that is called dynamic allocation.
-
-The default way of operation of the memory heap is the following.
-First, when the heap is created, an initial block of memory is
-allocated. In dynamic allocation this may be about 50 bytes.
-If more space is needed, additional blocks are allocated
-and they are put into a linked list.
-After the initial block, each allocated block is twice the size of the
-previous, until a threshold is attained, after which the sizes
-of the blocks stay the same. An exception is, of course, the case
-where the caller requests a memory buffer whose size is
-bigger than the threshold. In that case a block big enough must
-be allocated.
-
-The heap is physically arranged so that if the current block
-becomes full, a new block is allocated and always inserted in the
-chain of blocks as the last block.
-
-In the debug version of the memory management, all the allocated
-heaps are kept in a list (which is implemented as a hash table).
-Thus we can notice if the caller tries to free an already freed
-heap. In addition, each buffer given to the caller contains
-start field at the start and a trailer field at the end of the buffer.
-
-The start field has the following content:
-A. sizeof(ulint) bytes of field length (in the standard byte order)
-B. sizeof(ulint) bytes of check field (a random number)
-
-The trailer field contains:
-A. sizeof(ulint) bytes of check field (the same random number as at the start)
-
-Thus we can notice if something has been copied over the
-borders of the buffer, which is illegal.
-The memory in the buffers is initialized to a random byte sequence.
-After freeing, all the blocks in the heap are set to random bytes
-to help us discover errors which result from the use of
-buffers in an already freed heap. */
-
-#ifdef MEM_PERIODIC_CHECK
-
-ibool mem_block_list_inited;
-/* List of all mem blocks allocated; protected by the mem_comm_pool mutex */
-UT_LIST_BASE_NODE_T(mem_block_t) mem_block_list;
-
-#endif
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INTERN
-char*
-mem_heap_strdup(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str) /*!< in: string to be copied */
-{
- return(mem_heap_dup(heap, str, strlen(str) + 1));
-}
-
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-UNIV_INTERN
-void*
-mem_heap_dup(
-/*=========*/
- mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
- const void* data, /*!< in: data to be copied */
- ulint len) /*!< in: length of data, in bytes */
-{
- return(memcpy(mem_heap_alloc(heap, len), data, len));
-}
-
-/**********************************************************************//**
-Concatenate two strings and return the result, using a memory heap.
-@return own: the result */
-UNIV_INTERN
-char*
-mem_heap_strcat(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* s1, /*!< in: string 1 */
- const char* s2) /*!< in: string 2 */
-{
- char* s;
- ulint s1_len = strlen(s1);
- ulint s2_len = strlen(s2);
-
- s = mem_heap_alloc(heap, s1_len + s2_len + 1);
-
- memcpy(s, s1, s1_len);
- memcpy(s + s1_len, s2, s2_len);
-
- s[s1_len + s2_len] = '\0';
-
- return(s);
-}
-
-
-/****************************************************************//**
-Helper function for mem_heap_printf.
-@return length of formatted string, including terminating NUL */
-static
-ulint
-mem_heap_printf_low(
-/*================*/
- char* buf, /*!< in/out: buffer to store formatted string
- in, or NULL to just calculate length */
- const char* format, /*!< in: format string */
- va_list ap) /*!< in: arguments */
-{
- ulint len = 0;
-
- while (*format) {
-
- /* Does this format specifier have the 'l' length modifier. */
- ibool is_long = FALSE;
-
- /* Length of one parameter. */
- size_t plen;
-
- if (*format++ != '%') {
- /* Non-format character. */
-
- len++;
-
- if (buf) {
- *buf++ = *(format - 1);
- }
-
- continue;
- }
-
- if (*format == 'l') {
- is_long = TRUE;
- format++;
- }
-
- switch (*format++) {
- case 's':
- /* string */
- {
- char* s = va_arg(ap, char*);
-
- /* "%ls" is a non-sensical format specifier. */
- ut_a(!is_long);
-
- plen = strlen(s);
- len += plen;
-
- if (buf) {
- memcpy(buf, s, plen);
- buf += plen;
- }
- }
-
- break;
-
- case 'u':
- /* unsigned int */
- {
- char tmp[32];
- unsigned long val;
-
- /* We only support 'long' values for now. */
- ut_a(is_long);
-
- val = va_arg(ap, unsigned long);
-
- plen = sprintf(tmp, "%lu", val);
- len += plen;
-
- if (buf) {
- memcpy(buf, tmp, plen);
- buf += plen;
- }
- }
-
- break;
-
- case '%':
-
- /* "%l%" is a non-sensical format specifier. */
- ut_a(!is_long);
-
- len++;
-
- if (buf) {
- *buf++ = '%';
- }
-
- break;
-
- default:
- ut_error;
- }
- }
-
- /* For the NUL character. */
- len++;
-
- if (buf) {
- *buf = '\0';
- }
-
- return(len);
-}
-
-/****************************************************************//**
-A simple (s)printf replacement that dynamically allocates the space for the
-formatted string from the given heap. This supports a very limited set of
-the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type).
-@return heap-allocated formatted string */
-UNIV_INTERN
-char*
-mem_heap_printf(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- const char* format, /*!< in: format string */
- ...)
-{
- va_list ap;
- char* str;
- ulint len;
-
- /* Calculate length of string */
- len = 0;
- va_start(ap, format);
- len = mem_heap_printf_low(NULL, format, ap);
- va_end(ap);
-
- /* Now create it for real. */
- str = mem_heap_alloc(heap, len);
- va_start(ap, format);
- mem_heap_printf_low(str, format, ap);
- va_end(ap);
-
- return(str);
-}
-
-/***************************************************************//**
-Creates a memory heap block where data can be allocated.
-@return own: memory heap block, NULL if did not succeed (only possible
-for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
-mem_block_t*
-mem_heap_create_block(
-/*==================*/
- mem_heap_t* heap, /*!< in: memory heap or NULL if first block
- should be created */
- ulint n, /*!< in: number of bytes needed for user data */
- ulint type, /*!< in: type of heap: MEM_HEAP_DYNAMIC or
- MEM_HEAP_BUFFER */
- const char* file_name,/*!< in: file name where created */
- ulint line) /*!< in: line where created */
-{
-#ifndef UNIV_HOTBACKUP
- buf_block_t* buf_block = NULL;
-#endif /* !UNIV_HOTBACKUP */
- mem_block_t* block;
- ulint len;
-
- ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
- || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
-
- if (heap && heap->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(heap);
- }
-
- /* In dynamic allocation, calculate the size: block header + data. */
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
-
-#ifndef UNIV_HOTBACKUP
- if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
-
- ut_ad(type == MEM_HEAP_DYNAMIC || n <= MEM_MAX_ALLOC_IN_BUF);
-
- block = mem_area_alloc(&len, mem_comm_pool);
- } else {
- len = UNIV_PAGE_SIZE;
-
- if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
- /* We cannot allocate the block from the
- buffer pool, but must get the free block from
- the heap header free block field */
-
- buf_block = heap->free_block;
- heap->free_block = NULL;
-
- if (UNIV_UNLIKELY(!buf_block)) {
-
- return(NULL);
- }
- } else {
- buf_block = buf_block_alloc(0);
- }
-
- block = (mem_block_t*) buf_block->frame;
- }
-
- ut_ad(block);
- block->buf_block = buf_block;
- block->free_block = NULL;
-#else /* !UNIV_HOTBACKUP */
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
- block = ut_malloc(len);
- ut_ad(block);
-#endif /* !UNIV_HOTBACKUP */
-
- block->magic_n = MEM_BLOCK_MAGIC_N;
- ut_strlcpy_rev(block->file_name, file_name, sizeof(block->file_name));
- block->line = line;
-
-#ifdef MEM_PERIODIC_CHECK
- mem_pool_mutex_enter();
-
- if (!mem_block_list_inited) {
- mem_block_list_inited = TRUE;
- UT_LIST_INIT(mem_block_list);
- }
-
- UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block);
-
- mem_pool_mutex_exit();
-#endif
- mem_block_set_len(block, len);
- mem_block_set_type(block, type);
- mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
- mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
-
- ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
-
- return(block);
-}
-
-/***************************************************************//**
-Adds a new block to a memory heap.
-@return created block, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
-mem_block_t*
-mem_heap_add_block(
-/*===============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: number of bytes user needs */
-{
- mem_block_t* block;
- mem_block_t* new_block;
- ulint new_size;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- /* We have to allocate a new block. The size is always at least
- doubled until the standard size is reached. After that the size
- stays the same, except in cases where the caller needs more space. */
-
- new_size = 2 * mem_block_get_len(block);
-
- if (heap->type != MEM_HEAP_DYNAMIC) {
- /* From the buffer pool we allocate buffer frames */
- ut_a(n <= MEM_MAX_ALLOC_IN_BUF);
-
- if (new_size > MEM_MAX_ALLOC_IN_BUF) {
- new_size = MEM_MAX_ALLOC_IN_BUF;
- }
- } else if (new_size > MEM_BLOCK_STANDARD_SIZE) {
-
- new_size = MEM_BLOCK_STANDARD_SIZE;
- }
-
- if (new_size < n) {
- new_size = n;
- }
-
- new_block = mem_heap_create_block(heap, new_size, heap->type,
- heap->file_name, heap->line);
- if (new_block == NULL) {
-
- return(NULL);
- }
-
- /* Add the new block as the last block */
-
- UT_LIST_INSERT_AFTER(list, heap->base, block, new_block);
-
- return(new_block);
-}
-
-/******************************************************************//**
-Frees a block from a memory heap. */
-UNIV_INTERN
-void
-mem_heap_block_free(
-/*================*/
- mem_heap_t* heap, /*!< in: heap */
- mem_block_t* block) /*!< in: block to free */
-{
- ulint type;
- ulint len;
-#ifndef UNIV_HOTBACKUP
- buf_block_t* buf_block = block->buf_block;
-#endif /* !UNIV_HOTBACKUP */
-
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(block);
- }
-
- UT_LIST_REMOVE(list, heap->base, block);
-
-#ifdef MEM_PERIODIC_CHECK
- mem_pool_mutex_enter();
-
- UT_LIST_REMOVE(mem_block_list, mem_block_list, block);
-
- mem_pool_mutex_exit();
-#endif
- type = heap->type;
- len = block->len;
- block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
-
-#ifndef UNIV_HOTBACKUP
- if (!srv_use_sys_malloc) {
-#ifdef UNIV_MEM_DEBUG
- /* In the debug version we set the memory to a random
- combination of hex 0xDE and 0xAD. */
-
- mem_erase_buf((byte*)block, len);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_AND_FREE(block, len);
-#endif /* UNIV_MEM_DEBUG */
-
- }
- if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
-
- ut_ad(!buf_block);
- mem_area_free(block, mem_comm_pool);
- } else {
- ut_ad(type & MEM_HEAP_BUFFER);
-
- buf_block_free(buf_block);
- }
-#else /* !UNIV_HOTBACKUP */
-#ifdef UNIV_MEM_DEBUG
- /* In the debug version we set the memory to a random
- combination of hex 0xDE and 0xAD. */
-
- mem_erase_buf((byte*)block, len);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_AND_FREE(block, len);
-#endif /* UNIV_MEM_DEBUG */
- ut_free(block);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Frees the free_block field from a memory heap. */
-UNIV_INTERN
-void
-mem_heap_free_block_free(
-/*=====================*/
- mem_heap_t* heap) /*!< in: heap */
-{
- if (UNIV_LIKELY_NULL(heap->free_block)) {
-
- buf_block_free(heap->free_block);
-
- heap->free_block = NULL;
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
-void
-mem_validate_all_blocks(void)
-/*=========================*/
-{
- mem_block_t* block;
-
- mem_pool_mutex_enter();
-
- block = UT_LIST_GET_FIRST(mem_block_list);
-
- while (block) {
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(block);
- }
-
- block = UT_LIST_GET_NEXT(mem_block_list, block);
- }
-
- mem_pool_mutex_exit();
-}
-#endif
diff --git a/storage/innodb_plugin/mem/mem0pool.c b/storage/innodb_plugin/mem/mem0pool.c
deleted file mode 100644
index c4f8af607e0..00000000000
--- a/storage/innodb_plugin/mem/mem0pool.c
+++ /dev/null
@@ -1,717 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0pool.c
-The lowest-level memory management
-
-Created 5/12/1997 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0pool.h"
-#ifdef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-
-/* We would like to use also the buffer frames to allocate memory. This
-would be desirable, because then the memory consumption of the database
-would be fixed, and we might even lock the buffer pool to the main memory.
-The problem here is that the buffer management routines can themselves call
-memory allocation, while the buffer pool mutex is reserved.
-
-The main components of the memory consumption are:
-
-1. buffer pool,
-2. parsed and optimized SQL statements,
-3. data dictionary cache,
-4. log buffer,
-5. locks for each transaction,
-6. hash table for the adaptive index,
-7. state and buffers for each SQL query currently being executed,
-8. session for each user, and
-9. stack for each OS thread.
-
-Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
-consume very much memory. Items 7 and 8 should consume quite little memory,
-and the OS should take care of item 9, which too should consume little memory.
-
-A solution to the memory management:
-
-1. the buffer pool size is set separately;
-2. log buffer size is set separately;
-3. the common pool size for all the other entries, except 8, is set separately.
-
-Problems: we may waste memory if the common pool is set too big. Another
-problem is the locks, which may take very much space in big transactions.
-Then the shared pool size should be set very big. We can allow locks to take
-space from the buffer pool, but the SQL optimizer is then unaware of the
-usable size of the buffer pool. We could also combine the objects in the
-common pool and the buffers in the buffer pool into a single LRU list and
-manage it uniformly, but this approach does not take into account the parsing
-and other costs unique to SQL statements.
-
-The locks for a transaction can be seen as a part of the state of the
-transaction. Hence, they should be stored in the common pool. We still
-have the problem of a very big update transaction, for example, which
-will set very many x-locks on rows, and the locks will consume a lot
-of memory, say, half of the buffer pool size.
-
-Another problem is what to do if we are not able to malloc a requested
-block of memory from the common pool. Then we can request memory from
-the operating system. If it does not help, a system error results.
-
-Because 5 and 6 may potentially consume very much memory, we let them grow
-into the buffer pool. We may let the locks of a transaction take frames
-from the buffer pool, when the corresponding memory heap block has grown to
-the size of a buffer frame. Similarly for the hash node cells of the locks,
-and for the adaptive index. Thus, for each individual transaction, its locks
-can occupy at most about the size of the buffer frame of memory in the common
-pool, and after that its locks will grow into the buffer pool. */
-
-/** Mask used to extract the free bit from area->size */
-#define MEM_AREA_FREE 1
-
-/** The smallest memory area total size */
-#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
-
-
-/** Data structure for a memory pool. The space is allocated using the buddy
-algorithm, where free list i contains areas of size 2 to power i. */
-struct mem_pool_struct{
- byte* buf; /*!< memory pool */
- ulint size; /*!< memory common pool size */
- ulint reserved; /*!< amount of currently allocated
- memory */
- mutex_t mutex; /*!< mutex protecting this struct */
- UT_LIST_BASE_NODE_T(mem_area_t)
- free_list[64]; /*!< lists of free memory areas: an
- area is put to the list whose number
- is the 2-logarithm of the area size */
-};
-
-/** The common memory pool */
-UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
-
-/* We use this counter to check that the mem pool mutex does not leak;
-this is to track a strange assertion failure reported at
-mysql@lists.mysql.com */
-
-UNIV_INTERN ulint mem_n_threads_inside = 0;
-
-/********************************************************************//**
-Reserves the mem pool mutex. */
-UNIV_INTERN
-void
-mem_pool_mutex_enter(void)
-/*======================*/
-{
- mutex_enter(&(mem_comm_pool->mutex));
-}
-
-/********************************************************************//**
-Releases the mem pool mutex. */
-UNIV_INTERN
-void
-mem_pool_mutex_exit(void)
-/*=====================*/
-{
- mutex_exit(&(mem_comm_pool->mutex));
-}
-
-/********************************************************************//**
-Returns memory area size.
-@return size */
-UNIV_INLINE
-ulint
-mem_area_get_size(
-/*==============*/
- mem_area_t* area) /*!< in: area */
-{
- return(area->size_and_free & ~MEM_AREA_FREE);
-}
-
-/********************************************************************//**
-Sets memory area size. */
-UNIV_INLINE
-void
-mem_area_set_size(
-/*==============*/
- mem_area_t* area, /*!< in: area */
- ulint size) /*!< in: size */
-{
- area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
- | size;
-}
-
-/********************************************************************//**
-Returns memory area free bit.
-@return TRUE if free */
-UNIV_INLINE
-ibool
-mem_area_get_free(
-/*==============*/
- mem_area_t* area) /*!< in: area */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
- return(area->size_and_free & MEM_AREA_FREE);
-}
-
-/********************************************************************//**
-Sets memory area free bit. */
-UNIV_INLINE
-void
-mem_area_set_free(
-/*==============*/
- mem_area_t* area, /*!< in: area */
- ibool free) /*!< in: free bit value */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
- area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
- | free;
-}
-
-/********************************************************************//**
-Creates a memory pool.
-@return memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
- ulint size) /*!< in: pool size in bytes */
-{
- mem_pool_t* pool;
- mem_area_t* area;
- ulint i;
- ulint used;
-
- pool = ut_malloc(sizeof(mem_pool_t));
-
- /* We do not set the memory to zero (FALSE) in the pool,
- but only when allocated at a higher level in mem0mem.c.
- This is to avoid masking useful Purify warnings. */
-
- pool->buf = ut_malloc_low(size, FALSE, TRUE);
- pool->size = size;
-
- mutex_create(&pool->mutex, SYNC_MEM_POOL);
-
- /* Initialize the free lists */
-
- for (i = 0; i < 64; i++) {
-
- UT_LIST_INIT(pool->free_list[i]);
- }
-
- used = 0;
-
- while (size - used >= MEM_AREA_MIN_SIZE) {
-
- i = ut_2_log(size - used);
-
- if (ut_2_exp(i) > size - used) {
-
- /* ut_2_log rounds upward */
-
- i--;
- }
-
- area = (mem_area_t*)(pool->buf + used);
-
- mem_area_set_size(area, ut_2_exp(i));
- mem_area_set_free(area, TRUE);
- UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
- ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
- used = used + ut_2_exp(i);
- }
-
- ut_ad(size >= used);
-
- pool->reserved = 0;
-
- return(pool);
-}
-
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
- mem_pool_t* pool) /*!< in, own: memory pool */
-{
- ut_free(pool->buf);
- ut_free(pool);
-}
-
-/********************************************************************//**
-Fills the specified free list.
-@return TRUE if we were able to insert a block to the free list */
-static
-ibool
-mem_pool_fill_free_list(
-/*====================*/
- ulint i, /*!< in: free list index */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* area2;
- ibool ret;
-
- ut_ad(mutex_own(&(pool->mutex)));
-
- if (UNIV_UNLIKELY(i >= 63)) {
- /* We come here when we have run out of space in the
- memory pool: */
-
- return(FALSE);
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
-
- if (area == NULL) {
- if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: mem pool free list %lu"
- " length is %lu\n"
- "InnoDB: though the list is empty!\n",
- (ulong) i + 1,
- (ulong)
- UT_LIST_GET_LEN(pool->free_list[i + 1]));
- }
-
- ret = mem_pool_fill_free_list(i + 1, pool);
-
- if (ret == FALSE) {
-
- return(FALSE);
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
- }
-
- if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
- mem_analyze_corruption(area);
-
- ut_error;
- }
-
- UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
-
- area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
- UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
-
- mem_area_set_size(area2, ut_2_exp(i));
- mem_area_set_free(area2, TRUE);
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
-
- mem_area_set_size(area, ut_2_exp(i));
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
- ulint* psize, /*!< in: requested size in bytes; for optimum
- space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE;
- out: allocated size in bytes (greater than
- or equal to the requested size) */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- ulint size;
- ulint n;
- ibool ret;
-
- /* If we are using os allocator just make a simple call
- to malloc */
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- return(malloc(*psize));
- }
-
- size = *psize;
- n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
-
- mutex_enter(&(pool->mutex));
- mem_n_threads_inside++;
-
- ut_a(mem_n_threads_inside == 1);
-
- area = UT_LIST_GET_FIRST(pool->free_list[n]);
-
- if (area == NULL) {
- ret = mem_pool_fill_free_list(n, pool);
-
- if (ret == FALSE) {
- /* Out of memory in memory pool: we try to allocate
- from the operating system with the regular malloc: */
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- return(ut_malloc(size));
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[n]);
- }
-
- if (!mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Error: Removing element from mem pool"
- " free list %lu though the\n"
- "InnoDB: element is not marked free!\n",
- (ulong) n);
-
- mem_analyze_corruption(area);
-
- /* Try to analyze a strange assertion failure reported at
- mysql@lists.mysql.com where the free bit IS 1 in the
- hex dump above */
-
- if (mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Probably a race condition"
- " because now the area is marked free!\n");
- }
-
- ut_error;
- }
-
- if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
- fprintf(stderr,
- "InnoDB: Error: Removing element from mem pool"
- " free list %lu\n"
- "InnoDB: though the list length is 0!\n",
- (ulong) n);
- mem_analyze_corruption(area);
-
- ut_error;
- }
-
- ut_ad(mem_area_get_size(area) == ut_2_exp(n));
-
- mem_area_set_free(area, FALSE);
-
- UT_LIST_REMOVE(free_list, pool->free_list[n], area);
-
- pool->reserved += mem_area_get_size(area);
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- ut_ad(mem_pool_validate(pool));
-
- *psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
- UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);
-
- return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
-}
-
-/********************************************************************//**
-Gets the buddy of an area, if it exists in pool.
-@return the buddy, NULL if no buddy in pool */
-UNIV_INLINE
-mem_area_t*
-mem_area_get_buddy(
-/*===============*/
- mem_area_t* area, /*!< in: memory area */
- ulint size, /*!< in: memory area size */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* buddy;
-
- ut_ad(size != 0);
-
- if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
-
- /* The buddy is in a higher address */
-
- buddy = (mem_area_t*)(((byte*)area) + size);
-
- if ((((byte*)buddy) - pool->buf) + size > pool->size) {
-
- /* The buddy is not wholly contained in the pool:
- there is no buddy */
-
- buddy = NULL;
- }
- } else {
- /* The buddy is in a lower address; NOTE that area cannot
- be at the pool lower end, because then we would end up to
- the upper branch in this if-clause: the remainder would be
- 0 */
-
- buddy = (mem_area_t*)(((byte*)area) - size);
- }
-
- return(buddy);
-}
-
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
- void* ptr, /*!< in, own: pointer to allocated memory
- buffer */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* buddy;
- void* new_ptr;
- ulint size;
- ulint n;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- free(ptr);
-
- return;
- }
-
- /* It may be that the area was really allocated from the OS with
- regular malloc: check if ptr points within our memory pool */
-
- if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
- ut_free(ptr);
-
- return;
- }
-
- area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
-
- if (mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Error: Freeing element to mem pool"
- " free list though the\n"
- "InnoDB: element is marked free!\n");
-
- mem_analyze_corruption(area);
- ut_error;
- }
-
- size = mem_area_get_size(area);
- UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
-
- if (size == 0) {
- fprintf(stderr,
- "InnoDB: Error: Mem area size is 0. Possibly a"
- " memory overrun of the\n"
- "InnoDB: previous allocated area!\n");
-
- mem_analyze_corruption(area);
- ut_error;
- }
-
-#ifdef UNIV_LIGHT_MEM_DEBUG
- if (((byte*)area) + size < pool->buf + pool->size) {
-
- ulint next_size;
-
- next_size = mem_area_get_size(
- (mem_area_t*)(((byte*)area) + size));
- if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
- fprintf(stderr,
- "InnoDB: Error: Memory area size %lu,"
- " next area size %lu not a power of 2!\n"
- "InnoDB: Possibly a memory overrun of"
- " the buffer being freed here.\n",
- (ulong) size, (ulong) next_size);
- mem_analyze_corruption(area);
-
- ut_error;
- }
- }
-#endif
- buddy = mem_area_get_buddy(area, size, pool);
-
- n = ut_2_log(size);
-
- mutex_enter(&(pool->mutex));
- mem_n_threads_inside++;
-
- ut_a(mem_n_threads_inside == 1);
-
- if (buddy && mem_area_get_free(buddy)
- && (size == mem_area_get_size(buddy))) {
-
- /* The buddy is in a free list */
-
- if ((byte*)buddy < (byte*)area) {
- new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
-
- mem_area_set_size(buddy, 2 * size);
- mem_area_set_free(buddy, FALSE);
- } else {
- new_ptr = ptr;
-
- mem_area_set_size(area, 2 * size);
- }
-
- /* Remove the buddy from its free list and merge it to area */
-
- UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
-
- pool->reserved += ut_2_exp(n);
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- mem_area_free(new_ptr, pool);
-
- return;
- } else {
- UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
-
- mem_area_set_free(area, TRUE);
-
- ut_ad(pool->reserved >= size);
-
- pool->reserved -= size;
- }
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- ut_ad(mem_pool_validate(pool));
-}
-
-/********************************************************************//**
-Validates a memory pool.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* buddy;
- ulint free;
- ulint i;
-
- mutex_enter(&(pool->mutex));
-
- free = 0;
-
- for (i = 0; i < 64; i++) {
-
- UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i],
- (void) 0);
-
- area = UT_LIST_GET_FIRST(pool->free_list[i]);
-
- while (area != NULL) {
- ut_a(mem_area_get_free(area));
- ut_a(mem_area_get_size(area) == ut_2_exp(i));
-
- buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
-
- ut_a(!buddy || !mem_area_get_free(buddy)
- || (ut_2_exp(i) != mem_area_get_size(buddy)));
-
- area = UT_LIST_GET_NEXT(free_list, area);
-
- free += ut_2_exp(i);
- }
- }
-
- ut_a(free + pool->reserved == pool->size);
-
- mutex_exit(&(pool->mutex));
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
- FILE* outfile,/*!< in: output file to write to */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- ulint i;
-
- mem_pool_validate(pool);
-
- fprintf(outfile, "INFO OF A MEMORY POOL\n");
-
- mutex_enter(&(pool->mutex));
-
- for (i = 0; i < 64; i++) {
- if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
-
- fprintf(outfile,
- "Free list length %lu for"
- " blocks of size %lu\n",
- (ulong) UT_LIST_GET_LEN(pool->free_list[i]),
- (ulong) ut_2_exp(i));
- }
- }
-
- fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
- (ulong) pool->reserved);
- mutex_exit(&(pool->mutex));
-}
-
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return reserved memory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- ulint reserved;
-
- mutex_enter(&(pool->mutex));
-
- reserved = pool->reserved;
-
- mutex_exit(&(pool->mutex));
-
- return(reserved);
-}
diff --git a/storage/innodb_plugin/mtr/mtr0log.c b/storage/innodb_plugin/mtr/mtr0log.c
deleted file mode 100644
index 3f3dab36b76..00000000000
--- a/storage/innodb_plugin/mtr/mtr0log.c
+++ /dev/null
@@ -1,612 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file mtr/mtr0log.c
-Mini-transaction log routines
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0log.h"
-
-#ifdef UNIV_NONINL
-#include "mtr0log.ic"
-#endif
-
-#include "buf0buf.h"
-#include "dict0dict.h"
-#include "log0recv.h"
-#include "page0page.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "dict0boot.h"
-
-/********************************************************//**
-Catenates n bytes to the mtr log. */
-UNIV_INTERN
-void
-mlog_catenate_string(
-/*=================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* str, /*!< in: string to write */
- ulint len) /*!< in: string length */
-{
- dyn_array_t* mlog;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return;
- }
-
- mlog = &(mtr->log);
-
- dyn_push_string(mlog, str, len);
-}
-
-/********************************************************//**
-Writes the initial part of a log record consisting of one-byte item
-type and four-byte space and page numbers. Also pushes info
-to the mtr memo that a buffer page has been modified. */
-UNIV_INTERN
-void
-mlog_write_initial_log_record(
-/*==========================*/
- const byte* ptr, /*!< in: pointer to (inside) a buffer
- frame holding the file page where
- modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(type <= MLOG_BIGGEST_TYPE);
- ut_ad(type > MLOG_8BYTES);
-
- log_ptr = mlog_open(mtr, 11);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
-
- mlog_close(mtr, log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Parses an initial log record written by mlog_write_initial_log_record.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_initial_log_record(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* type, /*!< out: log record type: MLOG_1BYTE, ... */
- ulint* space, /*!< out: space id */
- ulint* page_no)/*!< out: page number */
-{
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
- ut_ad(*type <= MLOG_BIGGEST_TYPE);
-
- ptr++;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, space);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, page_no);
-
- return(ptr);
-}
-
-/********************************************************//**
-Parses a log record written by mlog_write_ulint or mlog_write_dulint.
-@return parsed record end, NULL if not a complete record or a corrupt record */
-UNIV_INTERN
-byte*
-mlog_parse_nbytes(
-/*==============*/
- ulint type, /*!< in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip)/*!< in/out: compressed page, or NULL */
-{
- ulint offset;
- ulint val;
- dulint dval;
-
- ut_a(type <= MLOG_8BYTES);
- ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- if (offset >= UNIV_PAGE_SIZE) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (type == MLOG_8BYTES) {
- ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_8
- (((page_zip_des_t*) page_zip)->data
- + offset, dval);
- }
- mach_write_to_8(page + offset, dval);
- }
-
- return(ptr);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, &val);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- switch (type) {
- case MLOG_1BYTE:
- if (UNIV_UNLIKELY(val > 0xFFUL)) {
- goto corrupt;
- }
- if (page) {
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_1
- (((page_zip_des_t*) page_zip)->data
- + offset, val);
- }
- mach_write_to_1(page + offset, val);
- }
- break;
- case MLOG_2BYTES:
- if (UNIV_UNLIKELY(val > 0xFFFFUL)) {
- goto corrupt;
- }
- if (page) {
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_2
- (((page_zip_des_t*) page_zip)->data
- + offset, val);
- }
- mach_write_to_2(page + offset, val);
- }
- break;
- case MLOG_4BYTES:
- if (page) {
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_4
- (((page_zip_des_t*) page_zip)->data
- + offset, val);
- }
- mach_write_to_4(page + offset, val);
- }
- break;
- default:
- corrupt:
- recv_sys->found_corrupt_log = TRUE;
- ptr = NULL;
- }
-
- return(ptr);
-}
-
-/********************************************************//**
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_write_ulint(
-/*=============*/
- byte* ptr, /*!< in: pointer where to write */
- ulint val, /*!< in: value to write */
- byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
-
- switch (type) {
- case MLOG_1BYTE:
- mach_write_to_1(ptr, val);
- break;
- case MLOG_2BYTES:
- mach_write_to_2(ptr, val);
- break;
- case MLOG_4BYTES:
- mach_write_to_4(ptr, val);
- break;
- default:
- ut_error;
- }
-
- log_ptr = mlog_open(mtr, 11 + 2 + 5);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
-
- mach_write_to_2(log_ptr, page_offset(ptr));
- log_ptr += 2;
-
- log_ptr += mach_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/********************************************************//**
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_write_dulint(
-/*==============*/
- byte* ptr, /*!< in: pointer where to write */
- dulint val, /*!< in: value to write */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(ptr && mtr);
-
- mach_write_to_8(ptr, val);
-
- log_ptr = mlog_open(mtr, 11 + 2 + 9);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_8BYTES,
- log_ptr, mtr);
-
- mach_write_to_2(log_ptr, page_offset(ptr));
- log_ptr += 2;
-
- log_ptr += mach_dulint_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Writes a string to a file page buffered in the buffer pool. Writes the
-corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_write_string(
-/*==============*/
- byte* ptr, /*!< in: pointer where to write */
- const byte* str, /*!< in: string to write */
- ulint len, /*!< in: string length */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(ptr && mtr);
- ut_a(len < UNIV_PAGE_SIZE);
-
- memcpy(ptr, str, len);
-
- mlog_log_string(ptr, len, mtr);
-}
-
-/********************************************************//**
-Logs a write of a string to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_log_string(
-/*============*/
- byte* ptr, /*!< in: pointer written to */
- ulint len, /*!< in: string length */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(ptr && mtr);
- ut_ad(len <= UNIV_PAGE_SIZE);
-
- log_ptr = mlog_open(mtr, 30);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING,
- log_ptr, mtr);
- mach_write_to_2(log_ptr, page_offset(ptr));
- log_ptr += 2;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, ptr, len);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Parses a log record written by mlog_write_string.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_string(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip)/*!< in/out: compressed page, or NULL */
-{
- ulint offset;
- ulint len;
-
- ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
-
- if (end_ptr < ptr + 4) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
- len = mach_read_from_2(ptr);
- ptr += 2;
-
- if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
- || UNIV_UNLIKELY(len + offset) > UNIV_PAGE_SIZE) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- }
-
- if (page) {
- if (UNIV_LIKELY_NULL(page_zip)) {
- memcpy(((page_zip_des_t*) page_zip)->data
- + offset, ptr, len);
- }
- memcpy(page + offset, ptr, len);
- }
-
- return(ptr + len);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index.
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
-byte*
-mlog_open_and_write_index(
-/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* rec, /*!< in: index record or page */
- dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
- ulint size) /*!< in: requested buffer size in bytes
- (if 0, calls mlog_close() and returns NULL) */
-{
- byte* log_ptr;
- const byte* log_start;
- const byte* log_end;
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- if (!page_rec_is_comp(rec)) {
- log_start = log_ptr = mlog_open(mtr, 11 + size);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_ptr = mlog_write_initial_log_record_fast(rec, type,
- log_ptr, mtr);
- log_end = log_ptr + 11 + size;
- } else {
- ulint i;
- ulint n = dict_index_get_n_fields(index);
- /* total size needed */
- ulint total = 11 + size + (n + 2) * 2;
- ulint alloc = total;
- /* allocate at most DYN_ARRAY_DATA_SIZE at a time */
- if (alloc > DYN_ARRAY_DATA_SIZE) {
- alloc = DYN_ARRAY_DATA_SIZE;
- }
- log_start = log_ptr = mlog_open(mtr, alloc);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_end = log_ptr + alloc;
- log_ptr = mlog_write_initial_log_record_fast(rec, type,
- log_ptr, mtr);
- mach_write_to_2(log_ptr, n);
- log_ptr += 2;
- mach_write_to_2(log_ptr,
- dict_index_get_n_unique_in_tree(index));
- log_ptr += 2;
- for (i = 0; i < n; i++) {
- dict_field_t* field;
- const dict_col_t* col;
- ulint len;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
- len = field->fixed_len;
- ut_ad(len < 0x7fff);
- if (len == 0
- && (col->len > 255 || col->mtype == DATA_BLOB)) {
- /* variable-length field
- with maximum length > 255 */
- len = 0x7fff;
- }
- if (col->prtype & DATA_NOT_NULL) {
- len |= 0x8000;
- }
- if (log_ptr + 2 > log_end) {
- mlog_close(mtr, log_ptr);
- ut_a(total > (ulint) (log_ptr - log_start));
- total -= log_ptr - log_start;
- alloc = total;
- if (alloc > DYN_ARRAY_DATA_SIZE) {
- alloc = DYN_ARRAY_DATA_SIZE;
- }
- log_start = log_ptr = mlog_open(mtr, alloc);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_end = log_ptr + alloc;
- }
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- }
- }
- if (size == 0) {
- mlog_close(mtr, log_ptr);
- log_ptr = NULL;
- } else if (log_ptr + size > log_end) {
- mlog_close(mtr, log_ptr);
- log_ptr = mlog_open(mtr, size);
- }
- return(log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Parses a log record written by mlog_open_and_write_index.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_index(
-/*=============*/
- byte* ptr, /*!< in: buffer */
- const byte* end_ptr,/*!< in: buffer end */
- ibool comp, /*!< in: TRUE=compact record format */
- dict_index_t** index) /*!< out, own: dummy index */
-{
- ulint i, n, n_uniq;
- dict_table_t* table;
- dict_index_t* ind;
-
- ut_ad(comp == FALSE || comp == TRUE);
-
- if (comp) {
- if (end_ptr < ptr + 4) {
- return(NULL);
- }
- n = mach_read_from_2(ptr);
- ptr += 2;
- n_uniq = mach_read_from_2(ptr);
- ptr += 2;
- ut_ad(n_uniq <= n);
- if (end_ptr < ptr + n * 2) {
- return(NULL);
- }
- } else {
- n = n_uniq = 1;
- }
- table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n,
- comp ? DICT_TF_COMPACT : 0);
- ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY",
- DICT_HDR_SPACE, 0, n);
- ind->table = table;
- ind->n_uniq = (unsigned int) n_uniq;
- if (n_uniq != n) {
- ut_a(n_uniq + DATA_ROLL_PTR <= n);
- ind->type = DICT_CLUSTERED;
- }
- if (comp) {
- for (i = 0; i < n; i++) {
- ulint len = mach_read_from_2(ptr);
- ptr += 2;
- /* The high-order bit of len is the NOT NULL flag;
- the rest is 0 or 0x7fff for variable-length fields,
- and 1..0x7ffe for fixed-length fields. */
- dict_mem_table_add_col(
- table, NULL, NULL,
- ((len + 1) & 0x7fff) <= 1
- ? DATA_BINARY : DATA_FIXBINARY,
- len & 0x8000 ? DATA_NOT_NULL : 0,
- len & 0x7fff);
-
- dict_index_add_col(ind, table,
- dict_table_get_nth_col(table, i),
- 0);
- }
- dict_table_add_system_columns(table, table->heap);
- if (n_uniq != n) {
- /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */
- ut_a(DATA_TRX_ID_LEN
- == dict_index_get_nth_col(ind, DATA_TRX_ID - 1
- + n_uniq)->len);
- ut_a(DATA_ROLL_PTR_LEN
- == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1
- + n_uniq)->len);
- ind->fields[DATA_TRX_ID - 1 + n_uniq].col
- = &table->cols[n + DATA_TRX_ID];
- ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col
- = &table->cols[n + DATA_ROLL_PTR];
- }
- }
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- ind->cached = TRUE;
- *index = ind;
- return(ptr);
-}
diff --git a/storage/innodb_plugin/mtr/mtr0mtr.c b/storage/innodb_plugin/mtr/mtr0mtr.c
deleted file mode 100644
index 417e97732bb..00000000000
--- a/storage/innodb_plugin/mtr/mtr0mtr.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file mtr/mtr0mtr.c
-Mini-transaction buffer
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0mtr.h"
-
-#ifdef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
-#include "buf0buf.h"
-#include "page0types.h"
-#include "mtr0log.h"
-#include "log0log.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "log0recv.h"
-/*****************************************************************//**
-Releases the item in the slot given. */
-UNIV_INLINE
-void
-mtr_memo_slot_release(
-/*==================*/
- mtr_t* mtr, /*!< in: mtr */
- mtr_memo_slot_t* slot) /*!< in: memo slot */
-{
- void* object;
- ulint type;
-
- ut_ad(mtr && slot);
-
- object = slot->object;
- type = slot->type;
-
- if (UNIV_LIKELY(object != NULL)) {
- if (type <= MTR_MEMO_BUF_FIX) {
- buf_page_release((buf_block_t*)object, type, mtr);
- } else if (type == MTR_MEMO_S_LOCK) {
- rw_lock_s_unlock((rw_lock_t*)object);
-#ifdef UNIV_DEBUG
- } else if (type != MTR_MEMO_X_LOCK) {
- ut_ad(type == MTR_MEMO_MODIFY);
- ut_ad(mtr_memo_contains(mtr, object,
- MTR_MEMO_PAGE_X_FIX));
-#endif /* UNIV_DEBUG */
- } else {
- rw_lock_x_unlock((rw_lock_t*)object);
- }
- }
-
- slot->object = NULL;
-}
-
-/**********************************************************//**
-Releases the mlocks and other objects stored in an mtr memo. They are released
-in the order opposite to which they were pushed to the memo. NOTE! It is
-essential that the x-rw-lock on a modified buffer page is not released before
-buf_page_note_modification is called for that page! Otherwise, some thread
-might race to modify it, and the flush list sort order on lsn would be
-destroyed. */
-UNIV_INLINE
-void
-mtr_memo_pop_all(
-/*=============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
- commit */
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
-
- while (offset > 0) {
- offset -= sizeof(mtr_memo_slot_t);
- slot = dyn_array_get_element(memo, offset);
-
- mtr_memo_slot_release(mtr, slot);
- }
-}
-
-/************************************************************//**
-Writes the contents of a mini-transaction log, if any, to the database log. */
-static
-void
-mtr_log_reserve_and_write(
-/*======================*/
- mtr_t* mtr) /*!< in: mtr */
-{
- dyn_array_t* mlog;
- dyn_block_t* block;
- ulint data_size;
- byte* first_data;
-
- ut_ad(mtr);
-
- mlog = &(mtr->log);
-
- first_data = dyn_block_get_data(mlog);
-
- if (mtr->n_log_recs > 1) {
- mlog_catenate_ulint(mtr, MLOG_MULTI_REC_END, MLOG_1BYTE);
- } else {
- *first_data = (byte)((ulint)*first_data
- | MLOG_SINGLE_REC_FLAG);
- }
-
- if (mlog->heap == NULL) {
- mtr->end_lsn = log_reserve_and_write_fast(
- first_data, dyn_block_get_used(mlog),
- &mtr->start_lsn);
- if (mtr->end_lsn) {
-
- return;
- }
- }
-
- data_size = dyn_array_get_data_size(mlog);
-
- /* Open the database log for log_write_low */
- mtr->start_lsn = log_reserve_and_open(data_size);
-
- if (mtr->log_mode == MTR_LOG_ALL) {
-
- block = mlog;
-
- while (block != NULL) {
- log_write_low(dyn_block_get_data(block),
- dyn_block_get_used(block));
- block = dyn_array_get_next_block(mlog, block);
- }
- } else {
- ut_ad(mtr->log_mode == MTR_LOG_NONE);
- /* Do nothing */
- }
-
- mtr->end_lsn = log_close();
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
-void
-mtr_commit(
-/*=======*/
- mtr_t* mtr) /*!< in: mini-transaction */
-{
-#ifndef UNIV_HOTBACKUP
- ibool write_log;
-#endif /* !UNIV_HOTBACKUP */
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_d(mtr->state = MTR_COMMITTING);
-
-#ifndef UNIV_HOTBACKUP
- /* This is a dirty read, for debugging. */
- ut_ad(!recv_no_log_write);
- write_log = mtr->modifications && mtr->n_log_recs;
-
- if (write_log) {
- mtr_log_reserve_and_write(mtr);
- }
-
- /* We first update the modification info to buffer pages, and only
- after that release the log mutex: this guarantees that when the log
- mutex is free, all buffer pages contain an up-to-date info of their
- modifications. This fact is used in making a checkpoint when we look
- at the oldest modification of any page in the buffer pool. It is also
- required when we insert modified buffer pages in to the flush list
- which must be sorted on oldest_modification. */
-
- mtr_memo_pop_all(mtr);
-
- if (write_log) {
- log_release();
- }
-#endif /* !UNIV_HOTBACKUP */
-
- ut_d(mtr->state = MTR_COMMITTED);
- dyn_array_free(&(mtr->memo));
- dyn_array_free(&(mtr->log));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the latches stored in an mtr memo down to a savepoint.
-NOTE! The mtr must not have made changes to buffer pages after the
-savepoint, as these can be handled only by mtr_commit. */
-UNIV_INTERN
-void
-mtr_rollback_to_savepoint(
-/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint) /*!< in: savepoint */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
- ut_ad(offset >= savepoint);
-
- while (offset > savepoint) {
- offset -= sizeof(mtr_memo_slot_t);
-
- slot = dyn_array_get_element(memo, offset);
-
- ut_ad(slot->type != MTR_MEMO_MODIFY);
- mtr_memo_slot_release(mtr, slot);
- }
-}
-
-/***************************************************//**
-Releases an object in the memo stack. */
-UNIV_INTERN
-void
-mtr_memo_release(
-/*=============*/
- mtr_t* mtr, /*!< in: mtr */
- void* object, /*!< in: object */
- ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
-
- while (offset > 0) {
- offset -= sizeof(mtr_memo_slot_t);
-
- slot = dyn_array_get_element(memo, offset);
-
- if ((object == slot->object) && (type == slot->type)) {
-
- mtr_memo_slot_release(mtr, slot);
-
- break;
- }
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr __attribute__((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
- if (type == MLOG_1BYTE) {
- return(mach_read_from_1(ptr));
- } else if (type == MLOG_2BYTES) {
- return(mach_read_from_2(ptr));
- } else {
- ut_ad(type == MLOG_4BYTES);
- return(mach_read_from_4(ptr));
- }
-}
-
-/********************************************************//**
-Reads 8 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INTERN
-dulint
-mtr_read_dulint(
-/*============*/
- const byte* ptr, /*!< in: pointer from where to read */
- mtr_t* mtr __attribute__((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
- return(mach_read_from_8(ptr));
-}
-
-#ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given page.
-@return TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* ptr, /*!< in: pointer to buffer frame */
- ulint type) /*!< in: type of object */
-{
- return(mtr_memo_contains(mtr, buf_block_align(ptr), type));
-}
-
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
-void
-mtr_print(
-/*======*/
- mtr_t* mtr) /*!< in: mtr */
-{
- fprintf(stderr,
- "Mini-transaction handle: memo size %lu bytes"
- " log size %lu bytes\n",
- (ulong) dyn_array_get_data_size(&(mtr->memo)),
- (ulong) dyn_array_get_data_size(&(mtr->log)));
-}
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
diff --git a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0file.c
deleted file mode 100644
index 37edad442db..00000000000
--- a/storage/innodb_plugin/os/os0file.c
+++ /dev/null
@@ -1,4476 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
-
-/**************************************************//**
-@file os/os0file.c
-The interface to the operating system file i/o primitives
-
-Created 10/21/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0file.h"
-#include "ut0mem.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
-# include "os0thread.h"
-#else /* !UNIV_HOTBACKUP */
-# ifdef __WIN__
-/* Add includes for the _stat() call to compile on Windows */
-# include <sys/types.h>
-# include <sys/stat.h>
-# include <errno.h>
-# endif /* __WIN__ */
-#endif /* !UNIV_HOTBACKUP */
-
-/* This specifies the file permissions InnoDB uses when it creates files in
-Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
-my_umask */
-
-#ifndef __WIN__
-/** Umask for creating files */
-UNIV_INTERN ulint os_innodb_umask
- = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
-#else
-/** Umask for creating files */
-UNIV_INTERN ulint os_innodb_umask = 0;
-#endif
-
-#ifdef UNIV_DO_FLUSH
-/* If the following is set to TRUE, we do not call os_file_flush in every
-os_file_write. We can set this TRUE when the doublewrite buffer is used. */
-UNIV_INTERN ibool os_do_not_call_flush_at_each_write = FALSE;
-#else
-/* We do not call os_file_flush in every os_file_write. */
-#endif /* UNIV_DO_FLUSH */
-
-#ifdef UNIV_HOTBACKUP
-# define os_aio_use_native_aio FALSE
-#else /* UNIV_HOTBACKUP */
-/* We use these mutexes to protect lseek + file i/o operation, if the
-OS does not provide an atomic pread or pwrite, or similar */
-#define OS_FILE_N_SEEK_MUTEXES 16
-UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
-
-/* In simulated aio, merge at most this many consecutive i/os */
-#define OS_AIO_MERGE_N_CONSECUTIVE 64
-
-/** If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads */
-
-UNIV_INTERN ibool os_aio_use_native_aio = FALSE;
-
-/** Flag: enable debug printout for asynchronous i/o */
-UNIV_INTERN ibool os_aio_print_debug = FALSE;
-
-/** The asynchronous i/o array slot structure */
-typedef struct os_aio_slot_struct os_aio_slot_t;
-
-/** The asynchronous i/o array slot structure */
-struct os_aio_slot_struct{
- ibool is_read; /*!< TRUE if a read operation */
- ulint pos; /*!< index of the slot in the aio
- array */
- ibool reserved; /*!< TRUE if this slot is reserved */
- time_t reservation_time;/*!< time when reserved */
- ulint len; /*!< length of the block to read or
- write */
- byte* buf; /*!< buffer used in i/o */
- ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
- ulint offset; /*!< 32 low bits of file offset in
- bytes */
- ulint offset_high; /*!< 32 high bits of file offset */
- os_file_t file; /*!< file where to read or write */
- const char* name; /*!< file name or path */
- ibool io_already_done;/*!< used only in simulated aio:
- TRUE if the physical i/o already
- made and only the slot message
- needs to be passed to the caller
- of os_aio_simulated_handle */
- fil_node_t* message1; /*!< message which is given by the */
- void* message2; /*!< the requester of an aio operation
- and which can be used to identify
- which pending aio operation was
- completed */
-#ifdef WIN_ASYNC_IO
- os_event_t event; /*!< event object we need in the
- OVERLAPPED struct */
- OVERLAPPED control; /*!< Windows control block for the
- aio request */
-#endif
-};
-
-/** The asynchronous i/o array structure */
-typedef struct os_aio_array_struct os_aio_array_t;
-
-/** The asynchronous i/o array structure */
-struct os_aio_array_struct{
- os_mutex_t mutex; /*!< the mutex protecting the aio array */
- os_event_t not_full;
- /*!< The event which is set to the
- signaled state when there is space in
- the aio outside the ibuf segment */
- os_event_t is_empty;
- /*!< The event which is set to the
- signaled state when there are no
- pending i/os in this array */
- ulint n_slots;/*!< Total number of slots in the aio
- array. This must be divisible by
- n_threads. */
- ulint n_segments;
- /*!< Number of segments in the aio
- array of pending aio requests. A
- thread can wait separately for any one
- of the segments. */
- ulint n_reserved;
- /*!< Number of reserved slots in the
- aio array outside the ibuf segment */
- os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
-#ifdef __WIN__
- os_native_event_t* native_events;
- /*!< Pointer to an array of OS native
- event handles where we copied the
- handles from slots, in the same
- order. This can be used in
- WaitForMultipleObjects; used only in
- Windows */
-#endif
-};
-
-/** Array of events used in simulated aio */
-static os_event_t* os_aio_segment_wait_events = NULL;
-
-/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
-are NULL when the module has not yet been initialized. @{ */
-static os_aio_array_t* os_aio_read_array = NULL; /*!< Reads */
-static os_aio_array_t* os_aio_write_array = NULL; /*!< Writes */
-static os_aio_array_t* os_aio_ibuf_array = NULL; /*!< Insert buffer */
-static os_aio_array_t* os_aio_log_array = NULL; /*!< Redo log */
-static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */
-/* @} */
-
-/** Number of asynchronous I/O segments. Set by os_aio_init(). */
-static ulint os_aio_n_segments = ULINT_UNDEFINED;
-
-/** If the following is TRUE, read i/o handler threads try to
-wait until a batch of new read requests have been posted */
-static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
-#endif /* UNIV_HOTBACKUP */
-
-UNIV_INTERN ulint os_n_file_reads = 0;
-UNIV_INTERN ulint os_bytes_read_since_printout = 0;
-UNIV_INTERN ulint os_n_file_writes = 0;
-UNIV_INTERN ulint os_n_fsyncs = 0;
-UNIV_INTERN ulint os_n_file_reads_old = 0;
-UNIV_INTERN ulint os_n_file_writes_old = 0;
-UNIV_INTERN ulint os_n_fsyncs_old = 0;
-UNIV_INTERN time_t os_last_printout;
-
-UNIV_INTERN ibool os_has_said_disk_full = FALSE;
-
-#ifndef UNIV_HOTBACKUP
-/** The mutex protecting the following counts of pending I/O operations */
-static os_mutex_t os_file_count_mutex;
-#endif /* !UNIV_HOTBACKUP */
-/** Number of pending os_file_pread() operations */
-UNIV_INTERN ulint os_file_n_pending_preads = 0;
-/** Number of pending os_file_pwrite() operations */
-UNIV_INTERN ulint os_file_n_pending_pwrites = 0;
-/** Number of pending write operations */
-UNIV_INTERN ulint os_n_pending_writes = 0;
-/** Number of pending read operations */
-UNIV_INTERN ulint os_n_pending_reads = 0;
-
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
-UNIV_INTERN
-ulint
-os_get_os_version(void)
-/*===================*/
-{
-#ifdef __WIN__
- OSVERSIONINFO os_info;
-
- os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-
- ut_a(GetVersionEx(&os_info));
-
- if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
- return(OS_WIN31);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
- return(OS_WIN95);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
- if (os_info.dwMajorVersion <= 4) {
- return(OS_WINNT);
- } else {
- return(OS_WIN2000);
- }
- } else {
- ut_error;
- return(0);
- }
-#else
- ut_error;
-
- return(0);
-#endif
-}
-
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@return error number, or OS error number + 100 */
-UNIV_INTERN
-ulint
-os_file_get_last_error(
-/*===================*/
- ibool report_all_errors) /*!< in: TRUE if we want an error message
- printed of all errors */
-{
- ulint err;
-
-#ifdef __WIN__
-
- err = (ulint) GetLastError();
-
- if (report_all_errors
- || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
-
- if (err == ERROR_PATH_NOT_FOUND) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
-
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == ERROR_ACCESS_DENIED) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory. It may also be"
- " you have created a subdirectory\n"
- "InnoDB: of the same name as a data file.\n");
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- fprintf(stderr,
- "InnoDB: The error means that another program"
- " is using InnoDB's files.\n"
- "InnoDB: This might be a backup or antivirus"
- " software or another instance\n"
- "InnoDB: of MySQL."
- " Please close it to get rid of this error.\n");
- } else if (err == ERROR_WORKING_SET_QUOTA
- || err == ERROR_NO_SYSTEM_RESOURCES) {
- fprintf(stderr,
- "InnoDB: The error means that there are no"
- " sufficient system resources or quota to"
- " complete the operation.\n");
- } else if (err == ERROR_OPERATION_ABORTED) {
- fprintf(stderr,
- "InnoDB: The error means that the I/O"
- " operation has been aborted\n"
- "InnoDB: because of either a thread exit"
- " or an application request.\n"
- "InnoDB: Retry attempt is made.\n");
- } else {
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN
- "operating-system-error-codes.html\n");
- }
- }
-
- fflush(stderr);
-
- if (err == ERROR_FILE_NOT_FOUND) {
- return(OS_FILE_NOT_FOUND);
- } else if (err == ERROR_DISK_FULL) {
- return(OS_FILE_DISK_FULL);
- } else if (err == ERROR_FILE_EXISTS) {
- return(OS_FILE_ALREADY_EXISTS);
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- return(OS_FILE_SHARING_VIOLATION);
- } else if (err == ERROR_WORKING_SET_QUOTA
- || err == ERROR_NO_SYSTEM_RESOURCES) {
- return(OS_FILE_INSUFFICIENT_RESOURCE);
- } else if (err == ERROR_OPERATION_ABORTED) {
- return(OS_FILE_OPERATION_ABORTED);
- } else {
- return(100 + err);
- }
-#else
- err = (ulint) errno;
-
- if (report_all_errors
- || (err != ENOSPC && err != EEXIST)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
-
- if (err == ENOENT) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
-
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == EACCES) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory.\n");
- } else {
- if (strerror((int)err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu"
- " means '%s'.\n",
- err, strerror((int)err));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system"
- " error numbers are described at\n"
- "InnoDB: "
- REFMAN
- "operating-system-error-codes.html\n");
- }
- }
-
- fflush(stderr);
-
- if (err == ENOSPC) {
- return(OS_FILE_DISK_FULL);
- } else if (err == ENOENT) {
- return(OS_FILE_NOT_FOUND);
- } else if (err == EEXIST) {
- return(OS_FILE_ALREADY_EXISTS);
- } else if (err == EXDEV || err == ENOTDIR || err == EISDIR) {
- return(OS_FILE_PATH_ERROR);
- } else {
- return(100 + err);
- }
-#endif
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-Conditionally exits (calling exit(3)) based on should_exit value and the
-error type
-@return TRUE if we should retry the operation */
-static
-ibool
-os_file_handle_error_cond_exit(
-/*===========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool should_exit) /*!< in: call exit(3) if unknown error
- and this parameter is TRUE */
-{
- ulint err;
-
- err = os_file_get_last_error(FALSE);
-
- if (err == OS_FILE_DISK_FULL) {
- /* We only print a warning about disk full once */
-
- if (os_has_said_disk_full) {
-
- return(FALSE);
- }
-
- if (name) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Encountered a problem with"
- " file %s\n", name);
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Disk is full. Try to clean the disk"
- " to free space.\n");
-
- os_has_said_disk_full = TRUE;
-
- fflush(stderr);
-
- return(FALSE);
- } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
-
- return(TRUE);
- } else if (err == OS_FILE_ALREADY_EXISTS
- || err == OS_FILE_PATH_ERROR) {
-
- return(FALSE);
- } else if (err == OS_FILE_SHARING_VIOLATION) {
-
- os_thread_sleep(10000000); /* 10 sec */
- return(TRUE);
- } else if (err == OS_FILE_INSUFFICIENT_RESOURCE) {
-
- os_thread_sleep(100000); /* 100 ms */
- return(TRUE);
- } else if (err == OS_FILE_OPERATION_ABORTED) {
-
- os_thread_sleep(100000); /* 100 ms */
- return(TRUE);
- } else {
- if (name) {
- fprintf(stderr, "InnoDB: File name %s\n", name);
- }
-
- fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
- operation);
-
- if (should_exit) {
- fprintf(stderr, "InnoDB: Cannot continue operation.\n");
-
- fflush(stderr);
-
- exit(1);
- }
- }
-
- return(FALSE);
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-static
-ibool
-os_file_handle_error(
-/*=================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation)/*!< in: operation */
-{
- /* exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, TRUE));
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-static
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation)/*!< in: operation */
-{
- /* don't exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, FALSE));
-}
-
-#undef USE_FILE_LOCK
-#define USE_FILE_LOCK
-#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
-/* InnoDB Hot Backup does not lock the data files.
- * On Windows, mandatory locking is used.
- */
-# undef USE_FILE_LOCK
-#endif
-#ifdef USE_FILE_LOCK
-/****************************************************************//**
-Obtain an exclusive lock on a file.
-@return 0 on success */
-static
-int
-os_file_lock(
-/*=========*/
- int fd, /*!< in: file descriptor */
- const char* name) /*!< in: file name */
-{
- struct flock lk;
- lk.l_type = F_WRLCK;
- lk.l_whence = SEEK_SET;
- lk.l_start = lk.l_len = 0;
- if (fcntl(fd, F_SETLK, &lk) == -1) {
- fprintf(stderr,
- "InnoDB: Unable to lock %s, error: %d\n", name, errno);
-
- if (errno == EAGAIN || errno == EACCES) {
- fprintf(stderr,
- "InnoDB: Check that you do not already have"
- " another mysqld process\n"
- "InnoDB: using the same InnoDB data"
- " or log files.\n");
- }
-
- return(-1);
- }
-
- return(0);
-}
-#endif /* USE_FILE_LOCK */
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Creates the seek mutexes used in positioned reads and writes. */
-UNIV_INTERN
-void
-os_io_init_simple(void)
-/*===================*/
-{
- ulint i;
-
- os_file_count_mutex = os_mutex_create(NULL);
-
- for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
- os_file_seek_mutexes[i] = os_mutex_create(NULL);
- }
-}
-
-/***********************************************************************//**
-Creates a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the MySQL temporary directory.
-On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag.
-@return temporary file handle, or NULL on error */
-UNIV_INTERN
-FILE*
-os_file_create_tmpfile(void)
-/*========================*/
-{
-#ifdef __NETWARE__
- FILE* file = tmpfile();
-#else /* __NETWARE__ */
- FILE* file = NULL;
- int fd = innobase_mysql_tmpfile();
-
- if (fd >= 0) {
- file = fdopen(fd, "w+b");
- }
-#endif /* __NETWARE__ */
-
- if (!file) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: unable to create temporary file;"
- " errno: %d\n", errno);
-#ifndef __NETWARE__
- if (fd >= 0) {
- close(fd);
- }
-#endif /* !__NETWARE__ */
- }
-
- return(file);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing.
-@return directory stream, NULL if error */
-UNIV_INTERN
-os_file_dir_t
-os_file_opendir(
-/*============*/
- const char* dirname, /*!< in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal) /*!< in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
-{
- os_file_dir_t dir;
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- char path[OS_FILE_MAX_PATH + 3];
-
- ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
-
- strcpy(path, dirname);
- strcpy(path + strlen(path), "\\*");
-
- /* Note that in Windows opening the 'directory stream' also retrieves
- the first entry in the directory. Since it is '.', that is no problem,
- as we will skip over the '.' and '..' entries anyway. */
-
- lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
-
- dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
-
- ut_free(lpFindFileData);
-
- if (dir == INVALID_HANDLE_VALUE) {
-
- if (error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
- }
-
- return(NULL);
- }
-
- return(dir);
-#else
- dir = opendir(dirname);
-
- if (dir == NULL && error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
- }
-
- return(dir);
-#endif
-}
-
-/***********************************************************************//**
-Closes a directory stream.
-@return 0 if success, -1 if failure */
-UNIV_INTERN
-int
-os_file_closedir(
-/*=============*/
- os_file_dir_t dir) /*!< in: directory stream */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = FindClose(dir);
-
- if (!ret) {
- os_file_handle_error_no_exit(NULL, "closedir");
-
- return(-1);
- }
-
- return(0);
-#else
- int ret;
-
- ret = closedir(dir);
-
- if (ret) {
- os_file_handle_error_no_exit(NULL, "closedir");
- }
-
- return(ret);
-#endif
-}
-
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory.
-@return 0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
-int
-os_file_readdir_next_file(
-/*======================*/
- const char* dirname,/*!< in: directory name or path */
- os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
-{
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- BOOL ret;
-
- lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
-next_file:
- ret = FindNextFile(dir, lpFindFileData);
-
- if (ret) {
- ut_a(strlen((char *) lpFindFileData->cFileName)
- < OS_FILE_MAX_PATH);
-
- if (strcmp((char *) lpFindFileData->cFileName, ".") == 0
- || strcmp((char *) lpFindFileData->cFileName, "..") == 0) {
-
- goto next_file;
- }
-
- strcpy(info->name, (char *) lpFindFileData->cFileName);
-
- info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
- + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
- << 32);
-
- if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_REPARSE_POINT) {
- /* TODO: test Windows symlinks */
- /* TODO: MySQL has apparently its own symlink
- implementation in Windows, dbname.sym can
- redirect a database directory:
- REFMAN "windows-symbolic-links.html" */
- info->type = OS_FILE_TYPE_LINK;
- } else if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_DIRECTORY) {
- info->type = OS_FILE_TYPE_DIR;
- } else {
- /* It is probably safest to assume that all other
- file types are normal. Better to check them rather
- than blindly skip them. */
-
- info->type = OS_FILE_TYPE_FILE;
- }
- }
-
- ut_free(lpFindFileData);
-
- if (ret) {
- return(0);
- } else if (GetLastError() == ERROR_NO_MORE_FILES) {
-
- return(1);
- } else {
- os_file_handle_error_no_exit(dirname,
- "readdir_next_file");
- return(-1);
- }
-#else
- struct dirent* ent;
- char* full_path;
- int ret;
- struct stat statinfo;
-#ifdef HAVE_READDIR_R
- char dirent_buf[sizeof(struct dirent)
- + _POSIX_PATH_MAX + 100];
- /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
- the max file name len; but in most standards, the
- length is NAME_MAX; we add 100 to be even safer */
-#endif
-
-next_file:
-
-#ifdef HAVE_READDIR_R
- ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
-
- if (ret != 0) {
- fprintf(stderr,
- "InnoDB: cannot read directory %s, error %lu\n",
- dirname, (ulong)ret);
-
- return(-1);
- }
-
- if (ent == NULL) {
- /* End of directory */
-
- return(1);
- }
-
- ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
-#else
- ent = readdir(dir);
-
- if (ent == NULL) {
-
- return(1);
- }
-#endif
- ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
-
- if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
-
- goto next_file;
- }
-
- strcpy(info->name, ent->d_name);
-
- full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10);
-
- sprintf(full_path, "%s/%s", dirname, ent->d_name);
-
- ret = stat(full_path, &statinfo);
-
- if (ret) {
-
- if (errno == ENOENT) {
- /* readdir() returned a file that does not exist,
- it must have been deleted in the meantime. Do what
- would have happened if the file was deleted before
- readdir() - ignore and go to the next entry.
- If this is the last entry then info->name will still
- contain the name of the deleted file when this
- function returns, but this is not an issue since the
- caller shouldn't be looking at info when end of
- directory is returned. */
-
- ut_free(full_path);
-
- goto next_file;
- }
-
- os_file_handle_error_no_exit(full_path, "stat");
-
- ut_free(full_path);
-
- return(-1);
- }
-
- info->size = (ib_int64_t)statinfo.st_size;
-
- if (S_ISDIR(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_FILE;
- } else {
- info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- ut_free(full_path);
-
- return(0);
-#endif
-}
-
-/*****************************************************************//**
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true.
-@return TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
-os_file_create_directory(
-/*=====================*/
- const char* pathname, /*!< in: directory name as
- null-terminated string */
- ibool fail_if_exists) /*!< in: if TRUE, pre-existing directory
- is treated as an error. */
-{
-#ifdef __WIN__
- BOOL rcode;
-
- rcode = CreateDirectory((LPCTSTR) pathname, NULL);
- if (!(rcode != 0
- || (GetLastError() == ERROR_ALREADY_EXISTS
- && !fail_if_exists))) {
- /* failure */
- os_file_handle_error(pathname, "CreateDirectory");
-
- return(FALSE);
- }
-
- return (TRUE);
-#else
- int rcode;
-
- rcode = mkdir(pathname, 0770);
-
- if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
- /* failure */
- os_file_handle_error(pathname, "mkdir");
-
- return(FALSE);
- }
-
- return (TRUE);
-#endif
-}
-
-/****************************************************************//**
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple(
-/*==================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
- opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error), or
- OS_FILE_CREATE_PATH if new file
- (if exists, error) and subdirectories along
- its path are created (if needed)*/
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD create_flag;
- DWORD access;
- DWORD attributes = 0;
- ibool retry;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else if (create_mode == OS_FILE_CREATE_PATH) {
- /* create subdirs along the path if needed */
- *success = os_file_create_subdirs_if_needed(name);
- if (!*success) {
- ut_error;
- }
- create_flag = CREATE_NEW;
- create_mode = OS_FILE_CREATE;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (access_type == OS_FILE_READ_WRITE) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else {
- access = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- access,
- FILE_SHARE_READ | FILE_SHARE_WRITE,
- /* file can be read and written also
- by other processes */
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /*!< no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
-
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
- if (retry) {
- goto try_again;
- }
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
- ibool retry;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- if (access_type == OS_FILE_READ_ONLY) {
- create_flag = O_RDONLY;
- } else {
- create_flag = O_RDWR;
- }
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else if (create_mode == OS_FILE_CREATE_PATH) {
- /* create subdirs along the path if needed */
- *success = os_file_create_subdirs_if_needed(name);
- if (!*success) {
- return (-1);
- }
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- create_mode = OS_FILE_CREATE;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR
- | S_IRGRP | S_IWGRP);
- } else {
- file = open(name, create_flag);
- }
-
- if (file == -1) {
- *success = FALSE;
-
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
- if (retry) {
- goto try_again;
- }
-#ifdef USE_FILE_LOCK
- } else if (access_type == OS_FILE_READ_WRITE
- && os_file_lock(file, name)) {
- *success = FALSE;
- close(file);
- file = -1;
-#endif
- } else {
- *success = TRUE;
- }
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/****************************************************************//**
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_no_error_handling(
-/*====================================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error) */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD create_flag;
- DWORD access;
- DWORD attributes = 0;
- DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
-
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (access_type == OS_FILE_READ_WRITE) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
- access = GENERIC_READ;
- share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
- | FILE_SHARE_WRITE; /*!< A backup program has to give
- mysqld the maximum freedom to
- do what it likes with the
- file */
- } else {
- access = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- access,
- share_mode,
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /*!< no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
-
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- if (access_type == OS_FILE_READ_ONLY) {
- create_flag = O_RDONLY;
- } else {
- create_flag = O_RDWR;
- }
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR
- | S_IRGRP | S_IWGRP);
- } else {
- file = open(name, create_flag);
- }
-
- if (file == -1) {
- *success = FALSE;
-#ifdef USE_FILE_LOCK
- } else if (access_type == OS_FILE_READ_WRITE
- && os_file_lock(file, name)) {
- *success = FALSE;
- close(file);
- file = -1;
-#endif
- } else {
- *success = TRUE;
- }
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor. */
-UNIV_INTERN
-void
-os_file_set_nocache(
-/*================*/
- int fd, /*!< in: file descriptor to alter */
- const char* file_name, /*!< in: file name, used in the
- diagnostic message */
- const char* operation_name) /*!< in: "open" or "create"; used in the
- diagnostic message */
-{
- /* some versions of Solaris may not have DIRECTIO_ON */
-#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
- if (directio(fd, DIRECTIO_ON) == -1) {
- int errno_save;
- errno_save = (int)errno;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Failed to set DIRECTIO_ON "
- "on file %s: %s: %s, continuing anyway\n",
- file_name, operation_name, strerror(errno_save));
- }
-#elif defined(O_DIRECT)
- if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
- int errno_save;
- errno_save = (int)errno;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Failed to set O_DIRECT "
- "on file %s: %s: %s, continuing anyway\n",
- file_name, operation_name, strerror(errno_save));
- if (errno_save == EINVAL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: O_DIRECT is known to result in "
- "'Invalid argument' on Linux on tmpfs, "
- "see MySQL Bug#26662\n");
- }
- }
-#endif
-}
-
-/****************************************************************//**
-Opens an existing file or creates a new.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create(
-/*===========*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error),
- OS_FILE_OVERWRITE if a new file is created
- or an old overwritten;
- OS_FILE_OPEN_RAW, if a raw device or disk
- partition should be opened */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD share_mode = FILE_SHARE_READ;
- DWORD create_flag;
- DWORD attributes;
- ibool retry;
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN_RAW) {
- create_flag = OPEN_EXISTING;
- share_mode = FILE_SHARE_WRITE;
- } else if (create_mode == OS_FILE_OPEN
- || create_mode == OS_FILE_OPEN_RETRY) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else if (create_mode == OS_FILE_OVERWRITE) {
- create_flag = CREATE_ALWAYS;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (purpose == OS_FILE_AIO) {
- /* If specified, use asynchronous (overlapped) io and no
- buffering of writes in the OS */
- attributes = 0;
-#ifdef WIN_ASYNC_IO
- if (os_aio_use_native_aio) {
- attributes = attributes | FILE_FLAG_OVERLAPPED;
- }
-#endif
-#ifdef UNIV_NON_BUFFERED_IO
-# ifndef UNIV_HOTBACKUP
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
- } else if (srv_win_file_flush_method
- == SRV_WIN_IO_UNBUFFERED) {
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
- }
-# else /* !UNIV_HOTBACKUP */
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_NON_BUFFERED_IO */
- } else if (purpose == OS_FILE_NORMAL) {
- attributes = 0;
-#ifdef UNIV_NON_BUFFERED_IO
-# ifndef UNIV_HOTBACKUP
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
- } else if (srv_win_file_flush_method
- == SRV_WIN_IO_UNBUFFERED) {
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
- }
-# else /* !UNIV_HOTBACKUP */
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_NON_BUFFERED_IO */
- } else {
- attributes = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- GENERIC_READ | GENERIC_WRITE, /* read and write
- access */
- share_mode, /* File can be read also by other
- processes; we must give the read
- permission because of ibbackup. We do
- not give the write permission to
- others because if one would succeed to
- start 2 instances of mysqld on the
- SAME files, that could cause severe
- database corruption! When opening
- raw disk partitions, Microsoft manuals
- say that we must give also the write
- permission. */
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /*!< no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
-
- /* When srv_file_per_table is on, file creation failure may not
- be critical to the whole instance. Do not crash the server in
- case of unknown errors. */
- if (srv_file_per_table) {
- retry = os_file_handle_error_no_exit(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- } else {
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- }
-
- if (retry) {
- goto try_again;
- }
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
- ibool retry;
- const char* mode_str = NULL;
- const char* type_str = NULL;
- const char* purpose_str = NULL;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
- || create_mode == OS_FILE_OPEN_RETRY) {
- mode_str = "OPEN";
- create_flag = O_RDWR;
- } else if (create_mode == OS_FILE_CREATE) {
- mode_str = "CREATE";
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else if (create_mode == OS_FILE_OVERWRITE) {
- mode_str = "OVERWRITE";
- create_flag = O_RDWR | O_CREAT | O_TRUNC;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (type == OS_LOG_FILE) {
- type_str = "LOG";
- } else if (type == OS_DATA_FILE) {
- type_str = "DATA";
- } else {
- ut_error;
- }
-
- if (purpose == OS_FILE_AIO) {
- purpose_str = "AIO";
- } else if (purpose == OS_FILE_NORMAL) {
- purpose_str = "NORMAL";
- } else {
- ut_error;
- }
-
-#if 0
- fprintf(stderr, "Opening file %s, mode %s, type %s, purpose %s\n",
- name, mode_str, type_str, purpose_str);
-#endif
-#ifdef O_SYNC
- /* We let O_SYNC only affect log files; note that we map O_DSYNC to
- O_SYNC because the datasync options seemed to corrupt files in 2001
- in both Linux and Solaris */
- if (type == OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
-
-# if 0
- fprintf(stderr, "Using O_SYNC for file %s\n", name);
-# endif
-
- create_flag = create_flag | O_SYNC;
- }
-#endif /* O_SYNC */
-
- file = open(name, create_flag, os_innodb_umask);
-
- if (file == -1) {
- *success = FALSE;
-
- /* When srv_file_per_table is on, file creation failure may not
- be critical to the whole instance. Do not crash the server in
- case of unknown errors. */
- if (srv_file_per_table) {
- retry = os_file_handle_error_no_exit(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- } else {
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- }
-
- if (retry) {
- goto try_again;
- } else {
- return(file /* -1 */);
- }
- }
- /* else */
-
- *success = TRUE;
-
- /* We disable OS caching (O_DIRECT) only on data files */
- if (type != OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
-
- os_file_set_nocache(file, name, mode_str);
- }
-
-#ifdef USE_FILE_LOCK
- if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
-
- if (create_mode == OS_FILE_OPEN_RETRY) {
- int i;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Retrying to lock"
- " the first data file\n",
- stderr);
- for (i = 0; i < 100; i++) {
- os_thread_sleep(1000000);
- if (!os_file_lock(file, name)) {
- *success = TRUE;
- return(file);
- }
- }
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to open the first data file\n",
- stderr);
- }
-
- *success = FALSE;
- close(file);
- file = -1;
- }
-#endif /* USE_FILE_LOCK */
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_delete_if_exists(
-/*=====================*/
- const char* name) /*!< in: file path as a null-terminated string */
-{
-#ifdef __WIN__
- BOOL ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if ibbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR)name);
-
- if (ret) {
- return(TRUE);
- }
-
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
- /* the file does not exist, this not an error */
-
- return(TRUE);
- }
-
- count++;
-
- if (count > 100 && 0 == (count % 10)) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running ibbackup"
- " to back up the file?\n", name);
-
- os_file_get_last_error(TRUE); /* print error information */
- }
-
- os_thread_sleep(1000000); /* sleep for a second */
-
- if (count > 2000) {
-
- return(FALSE);
- }
-
- goto loop;
-#else
- int ret;
-
- ret = unlink(name);
-
- if (ret != 0 && errno != ENOENT) {
- os_file_handle_error_no_exit(name, "delete");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_delete(
-/*===========*/
- const char* name) /*!< in: file path as a null-terminated string */
-{
-#ifdef __WIN__
- BOOL ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if ibbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR)name);
-
- if (ret) {
- return(TRUE);
- }
-
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
- /* If the file does not exist, we classify this as a 'mild'
- error and return */
-
- return(FALSE);
- }
-
- count++;
-
- if (count > 100 && 0 == (count % 10)) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running ibbackup"
- " to back up the file?\n", name);
-
- os_file_get_last_error(TRUE); /* print error information */
- }
-
- os_thread_sleep(1000000); /* sleep for a second */
-
- if (count > 2000) {
-
- return(FALSE);
- }
-
- goto loop;
-#else
- int ret;
-
- ret = unlink(name);
-
- if (ret != 0) {
- os_file_handle_error_no_exit(name, "delete");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***********************************************************************//**
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_rename(
-/*===========*/
- const char* oldpath,/*!< in: old file path as a null-terminated
- string */
- const char* newpath)/*!< in: new file path */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_error_no_exit(oldpath, "rename");
-
- return(FALSE);
-#else
- int ret;
-
- ret = rename(oldpath, newpath);
-
- if (ret != 0) {
- os_file_handle_error_no_exit(oldpath, "rename");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***********************************************************************//**
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close(
-/*==========*/
- os_file_t file) /*!< in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- ret = CloseHandle(file);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_error(NULL, "close");
-
- return(FALSE);
-#else
- int ret;
-
- ret = close(file);
-
- if (ret == -1) {
- os_file_handle_error(NULL, "close");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************//**
-Closes a file handle.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_no_error_handling(
-/*============================*/
- os_file_t file) /*!< in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- ret = CloseHandle(file);
-
- if (ret) {
- return(TRUE);
- }
-
- return(FALSE);
-#else
- int ret;
-
- ret = close(file);
-
- if (ret == -1) {
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-#endif /* UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Gets a file size.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_get_size(
-/*=============*/
- os_file_t file, /*!< in: handle to a file */
- ulint* size, /*!< out: least significant 32 bits of file
- size */
- ulint* size_high)/*!< out: most significant 32 bits of size */
-{
-#ifdef __WIN__
- DWORD high;
- DWORD low;
-
- low = GetFileSize(file, &high);
-
- if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
- return(FALSE);
- }
-
- *size = low;
- *size_high = high;
-
- return(TRUE);
-#else
- off_t offs;
-
- offs = lseek(file, 0, SEEK_END);
-
- if (offs == ((off_t)-1)) {
-
- return(FALSE);
- }
-
- if (sizeof(off_t) > 4) {
- *size = (ulint)(offs & 0xFFFFFFFFUL);
- *size_high = (ulint)(offs >> 32);
- } else {
- *size = (ulint) offs;
- *size_high = 0;
- }
-
- return(TRUE);
-#endif
-}
-
-/***********************************************************************//**
-Gets file size as a 64-bit integer ib_int64_t.
-@return size in bytes, -1 if error */
-UNIV_INTERN
-ib_int64_t
-os_file_get_size_as_iblonglong(
-/*===========================*/
- os_file_t file) /*!< in: handle to a file */
-{
- ulint size;
- ulint size_high;
- ibool success;
-
- success = os_file_get_size(file, &size, &size_high);
-
- if (!success) {
-
- return(-1);
- }
-
- return((((ib_int64_t)size_high) << 32) + (ib_int64_t)size);
-}
-
-/***********************************************************************//**
-Write the specified number of zeros to a newly created file.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_size(
-/*=============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- ulint size, /*!< in: least significant 32 bits of file
- size */
- ulint size_high)/*!< in: most significant 32 bits of size */
-{
- ib_int64_t current_size;
- ib_int64_t desired_size;
- ibool ret;
- byte* buf;
- byte* buf2;
- ulint buf_size;
-
- ut_a(size == (size & 0xFFFFFFFF));
-
- current_size = 0;
- desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
-
- /* Write up to 1 megabyte at a time. */
- buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
- * UNIV_PAGE_SIZE;
- buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
-
- /* Align the buffer for possible raw i/o */
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
- /* Write buffer full of zeros */
- memset(buf, 0, buf_size);
-
- if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "InnoDB: Progress in MB:");
- }
-
- while (current_size < desired_size) {
- ulint n_bytes;
-
- if (desired_size - current_size < (ib_int64_t) buf_size) {
- n_bytes = (ulint) (desired_size - current_size);
- } else {
- n_bytes = buf_size;
- }
-
- ret = os_file_write(name, file, buf,
- (ulint)(current_size & 0xFFFFFFFF),
- (ulint)(current_size >> 32),
- n_bytes);
- if (!ret) {
- ut_free(buf2);
- goto error_handling;
- }
-
- /* Print about progress for each 100 MB written */
- if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
- != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, " %lu00",
- (ulong) ((current_size + n_bytes)
- / (ib_int64_t)(100 * 1024 * 1024)));
- }
-
- current_size += n_bytes;
- }
-
- if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "\n");
- }
-
- ut_free(buf2);
-
- ret = os_file_flush(file);
-
- if (ret) {
- return(TRUE);
- }
-
-error_handling:
- return(FALSE);
-}
-
-/***********************************************************************//**
-Truncates a file at its current position.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_eof(
-/*============*/
- FILE* file) /*!< in: file to be truncated */
-{
-#ifdef __WIN__
- HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
- return(SetEndOfFile(h));
-#else /* __WIN__ */
- return(!ftruncate(fileno(file), ftell(file)));
-#endif /* __WIN__ */
-}
-
-#ifndef __WIN__
-/***********************************************************************//**
-Wrapper to fsync(2) that retries the call on some errors.
-Returns the value 0 if successful; otherwise the value -1 is returned and
-the global variable errno is set to indicate the error.
-@return 0 if success, -1 otherwise */
-
-static
-int
-os_file_fsync(
-/*==========*/
- os_file_t file) /*!< in: handle to a file */
-{
- int ret;
- int failures;
- ibool retry;
-
- failures = 0;
-
- do {
- ret = fsync(file);
-
- os_n_fsyncs++;
-
- if (ret == -1 && errno == ENOLCK) {
-
- if (failures % 100 == 0) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: fsync(): "
- "No locks available; retrying\n");
- }
-
- os_thread_sleep(200000 /* 0.2 sec */);
-
- failures++;
-
- retry = TRUE;
- } else {
-
- retry = FALSE;
- }
- } while (retry);
-
- return(ret);
-}
-#endif /* !__WIN__ */
-
-/***********************************************************************//**
-Flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_flush(
-/*==========*/
- os_file_t file) /*!< in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- os_n_fsyncs++;
-
- ret = FlushFileBuffers(file);
-
- if (ret) {
- return(TRUE);
- }
-
- /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
- actually a raw device, we choose to ignore that error if we are using
- raw disks */
-
- if (srv_start_raw_disk_in_use && GetLastError()
- == ERROR_INVALID_FUNCTION) {
- return(TRUE);
- }
-
- os_file_handle_error(NULL, "flush");
-
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
-
- return(FALSE);
-#else
- int ret;
-
-#if defined(HAVE_DARWIN_THREADS)
-# ifndef F_FULLFSYNC
- /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
-# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
-# elif F_FULLFSYNC != 51
-# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
-# endif
- /* Apple has disabled fsync() for internal disk drives in OS X. That
- caused corruption for a user when he tested a power outage. Let us in
- OS X use a nonstandard flush method recommended by an Apple
- engineer. */
-
- if (!srv_have_fullfsync) {
- /* If we are not on an operating system that supports this,
- then fall back to a plain fsync. */
-
- ret = os_file_fsync(file);
- } else {
- ret = fcntl(file, F_FULLFSYNC, NULL);
-
- if (ret) {
- /* If we are not on a file system that supports this,
- then fall back to a plain fsync. */
- ret = os_file_fsync(file);
- }
- }
-#else
- ret = os_file_fsync(file);
-#endif
-
- if (ret == 0) {
- return(TRUE);
- }
-
- /* Since Linux returns EINVAL if the 'file' is actually a raw device,
- we choose to ignore that error if we are using raw disks */
-
- if (srv_start_raw_disk_in_use && errno == EINVAL) {
-
- return(TRUE);
- }
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: the OS said file flush did not succeed\n");
-
- os_file_handle_error(NULL, "flush");
-
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
-
- return(FALSE);
-#endif
-}
-
-#ifndef __WIN__
-/*******************************************************************//**
-Does a synchronous read operation in Posix.
-@return number of bytes read, -1 if error */
-static
-ssize_t
-os_file_pread(
-/*==========*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- ulint n, /*!< in: number of bytes to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset from where to read */
- ulint offset_high) /*!< in: most significant 32 bits of
- offset */
-{
- off_t offs;
-#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
- ssize_t n_bytes;
-#endif /* HAVE_PREAD && !HAVE_BROKEN_PREAD */
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
-
- if (sizeof(off_t) > 4) {
- offs = (off_t)offset + (((off_t)offset_high) << 32);
-
- } else {
- offs = (off_t)offset;
-
- if (offset_high > 0) {
- fprintf(stderr,
- "InnoDB: Error: file read at offset > 4 GB\n");
- }
- }
-
- os_n_file_reads++;
-
-#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_preads++;
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- n_bytes = pread(file, buf, (ssize_t)n, offs);
-
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_preads--;
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- return(n_bytes);
-#else
- {
- off_t ret_offset;
- ssize_t ret;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret_offset = lseek(file, offs, SEEK_SET);
-
- if (ret_offset < 0) {
- ret = -1;
- } else {
- ret = read(file, buf, (ssize_t)n);
- }
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- return(ret);
- }
-#endif
-}
-
-/*******************************************************************//**
-Does a synchronous write operation in Posix.
-@return number of bytes written, -1 if error */
-static
-ssize_t
-os_file_pwrite(
-/*===========*/
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from where to write */
- ulint n, /*!< in: number of bytes to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to write */
- ulint offset_high) /*!< in: most significant 32 bits of
- offset */
-{
- ssize_t ret;
- off_t offs;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
-
- if (sizeof(off_t) > 4) {
- offs = (off_t)offset + (((off_t)offset_high) << 32);
- } else {
- offs = (off_t)offset;
-
- if (offset_high > 0) {
- fprintf(stderr,
- "InnoDB: Error: file write"
- " at offset > 4 GB\n");
- }
- }
-
- os_n_file_writes++;
-
-#if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_pwrites++;
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
- ret = pwrite(file, buf, (ssize_t)n, offs);
-
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_pwrites--;
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
-# ifdef UNIV_DO_FLUSH
- if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && !os_do_not_call_flush_at_each_write) {
-
- /* Always do fsync to reduce the probability that when
- the OS crashes, a database page is only partially
- physically written to disk. */
-
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
- return(ret);
-#else
- {
- off_t ret_offset;
-# ifndef UNIV_HOTBACKUP
- ulint i;
-# endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
-# ifndef UNIV_HOTBACKUP
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-# endif /* UNIV_HOTBACKUP */
-
- ret_offset = lseek(file, offs, SEEK_SET);
-
- if (ret_offset < 0) {
- ret = -1;
-
- goto func_exit;
- }
-
- ret = write(file, buf, (ssize_t)n);
-
-# ifdef UNIV_DO_FLUSH
- if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && !os_do_not_call_flush_at_each_write) {
-
- /* Always do fsync to reduce the probability that when
- the OS crashes, a database page is only partially
- physically written to disk. */
-
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
-func_exit:
-# ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-# endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- return(ret);
- }
-#endif
-}
-#endif
-
-/*******************************************************************//**
-Requests a synchronous positioned read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read(
-/*=========*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high, /*!< in: most significant 32 bits of
- offset */
- ulint n) /*!< in: number of bytes to read */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ibool retry;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
-
-try_again:
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
- }
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
- return(TRUE);
- }
-#else /* __WIN__ */
- ibool retry;
- ssize_t ret;
-
- os_bytes_read_since_printout += n;
-
-try_again:
- ret = os_file_pread(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-
- fprintf(stderr,
- "InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
- "InnoDB: Was only able to read %ld.\n",
- (ulong)n, (ulong)offset_high,
- (ulong)offset, (long)ret);
-#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
- retry = os_file_handle_error(NULL, "read");
-
- if (retry) {
- goto try_again;
- }
-
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read from file."
- " OS error number %lu.\n",
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif
- );
- fflush(stderr);
-
- ut_error;
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_no_error_handling(
-/*===========================*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read */
- ulint offset_high, /*!< in: most significant 32 bits of
- offset */
- ulint n) /*!< in: number of bytes to read */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ibool retry;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
-
-try_again:
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
- }
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
- return(TRUE);
- }
-#else /* __WIN__ */
- ibool retry;
- ssize_t ret;
-
- os_bytes_read_since_printout += n;
-
-try_again:
- ret = os_file_pread(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-#endif /* __WIN__ */
-#ifdef __WIN__
-error_handling:
-#endif
- retry = os_file_handle_error_no_exit(NULL, "read");
-
- if (retry) {
- goto try_again;
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
-void
-os_file_read_string(
-/*================*/
- FILE* file, /*!< in: file to read from */
- char* str, /*!< in: buffer where to read */
- ulint size) /*!< in: size of buffer */
-{
- size_t flen;
-
- if (size == 0) {
- return;
- }
-
- rewind(file);
- flen = fread(str, 1, size - 1, file);
- str[flen] = '\0';
-}
-
-/*******************************************************************//**
-Requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_write(
-/*==========*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to write */
- ulint offset_high, /*!< in: most significant 32 bits of
- offset */
- ulint n) /*!< in: number of bytes to write */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ulint n_retries = 0;
- ulint err;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
-
- ut_a((offset & 0xFFFFFFFF) == offset);
-
- os_n_file_writes++;
-
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-retry:
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: File pointer positioning to"
- " file %s failed at\n"
- "InnoDB: offset %lu %lu. Operating system"
- " error number %lu.\n"
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n",
- name, (ulong) offset_high, (ulong) offset,
- (ulong) GetLastError());
-
- return(FALSE);
- }
-
- ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
-
- /* Always do fsync to reduce the probability that when the OS crashes,
- a database page is only partially physically written to disk. */
-
-# ifdef UNIV_DO_FLUSH
- if (!os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
-
- return(TRUE);
- }
-
- /* If some background file system backup tool is running, then, at
- least in Windows 2000, we may get here a specific error. Let us
- retry the operation 100 times, with 1 second waits. */
-
- if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
-
- os_thread_sleep(1000000);
-
- n_retries++;
-
- goto retry;
- }
-
- if (!os_has_said_disk_full) {
-
- err = (ulint)GetLastError();
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset %lu %lu.\n"
- "InnoDB: %lu bytes should have been written,"
- " only %lu were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, (ulong) offset_high, (ulong) offset,
- (ulong) n, (ulong) len, (ulong) err);
-
- if (strerror((int)err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulong) err, strerror((int)err));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n");
-
- os_has_said_disk_full = TRUE;
- }
-
- return(FALSE);
-#else
- ssize_t ret;
-
- ret = os_file_pwrite(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-
- if (!os_has_said_disk_full) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset %lu %lu.\n"
- "InnoDB: %lu bytes should have been written,"
- " only %ld were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, offset_high, offset, n, (long int)ret,
- (ulint)errno);
- if (strerror(errno) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulint)errno, strerror(errno));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n");
-
- os_has_said_disk_full = TRUE;
- }
-
- return(FALSE);
-#endif
-}
-
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return TRUE if call succeeded */
-UNIV_INTERN
-ibool
-os_file_status(
-/*===========*/
- const char* path, /*!< in: pathname of the file */
- ibool* exists, /*!< out: TRUE if file exists */
- os_file_type_t* type) /*!< out: type of the file (if it exists) */
-{
-#ifdef __WIN__
- int ret;
- struct _stat statinfo;
-
- ret = _stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (_S_IFDIR & statinfo.st_mode) {
- *type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
-
- *exists = TRUE;
-
- return(TRUE);
-#else
- int ret;
- struct stat statinfo;
-
- ret = stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (S_ISDIR(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
-
- *exists = TRUE;
-
- return(TRUE);
-#endif
-}
-
-/*******************************************************************//**
-This function returns information about the specified file
-@return TRUE if stat information found */
-UNIV_INTERN
-ibool
-os_file_get_status(
-/*===============*/
- const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info) /*!< information of a file in a
- directory */
-{
-#ifdef __WIN__
- int ret;
- struct _stat statinfo;
-
- ret = _stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
-
- return(FALSE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
- if (_S_IFDIR & statinfo.st_mode) {
- stat_info->type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
- stat_info->type = OS_FILE_TYPE_FILE;
- } else {
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
-
- return(TRUE);
-#else
- int ret;
- struct stat statinfo;
-
- ret = stat(path, &statinfo);
-
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
-
- return(FALSE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (S_ISDIR(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_FILE;
- } else {
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
-
- return(TRUE);
-#endif
-}
-
-/* path name separator character */
-#ifdef __WIN__
-# define OS_FILE_PATH_SEPARATOR '\\'
-#else
-# define OS_FILE_PATH_SEPARATOR '/'
-#endif
-
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' charac­
-ters are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
-
-@return own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
- const char* path) /*!< in: pathname */
-{
- /* Find the offset of the last slash */
- const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
- if (!last_slash) {
- /* No slash in the path, return "." */
-
- return(mem_strdup("."));
- }
-
- /* Ok, there is a slash */
-
- if (last_slash == path) {
- /* last slash is the first char of the path */
-
- return(mem_strdup("/"));
- }
-
- /* Non-trivial directory component */
-
- return(mem_strdupl(path, last_slash - path));
-}
-
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
- const char* path) /*!< in: path name */
-{
- char* subdir;
- ibool success, subdir_exists;
- os_file_type_t type;
-
- subdir = os_file_dirname(path);
- if (strlen(subdir) == 1
- && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
- /* subdir is root or cwd, nothing to do */
- mem_free(subdir);
-
- return(TRUE);
- }
-
- /* Test if subdir exists */
- success = os_file_status(subdir, &subdir_exists, &type);
- if (success && !subdir_exists) {
- /* subdir does not exist, create it */
- success = os_file_create_subdirs_if_needed(subdir);
- if (!success) {
- mem_free(subdir);
-
- return(FALSE);
- }
- success = os_file_create_directory(subdir, FALSE);
- }
-
- mem_free(subdir);
-
- return(success);
-}
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Returns a pointer to the nth slot in the aio array.
-@return pointer to slot */
-static
-os_aio_slot_t*
-os_aio_array_get_nth_slot(
-/*======================*/
- os_aio_array_t* array, /*!< in: aio array */
- ulint index) /*!< in: index of the slot */
-{
- ut_a(index < array->n_slots);
-
- return((array->slots) + index);
-}
-
-/************************************************************************//**
-Creates an aio wait array.
-@return own: aio array */
-static
-os_aio_array_t*
-os_aio_array_create(
-/*================*/
- ulint n, /*!< in: maximum number of pending aio operations
- allowed; n must be divisible by n_segments */
- ulint n_segments) /*!< in: number of segments in the aio array */
-{
- os_aio_array_t* array;
- ulint i;
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* over;
-#endif
- ut_a(n > 0);
- ut_a(n_segments > 0);
-
- array = ut_malloc(sizeof(os_aio_array_t));
-
- array->mutex = os_mutex_create(NULL);
- array->not_full = os_event_create(NULL);
- array->is_empty = os_event_create(NULL);
-
- os_event_set(array->is_empty);
-
- array->n_slots = n;
- array->n_segments = n_segments;
- array->n_reserved = 0;
- array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
-#ifdef __WIN__
- array->native_events = ut_malloc(n * sizeof(os_native_event_t));
-#endif
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- slot->pos = i;
- slot->reserved = FALSE;
-#ifdef WIN_ASYNC_IO
- slot->event = os_event_create(NULL);
-
- over = &(slot->control);
-
- over->hEvent = slot->event->handle;
-
- *((array->native_events) + i) = over->hEvent;
-#endif
- }
-
- return(array);
-}
-
-/************************************************************************//**
-Frees an aio wait array. */
-static
-void
-os_aio_array_free(
-/*==============*/
- os_aio_array_t* array) /*!< in, own: array to free */
-{
-#ifdef WIN_ASYNC_IO
- ulint i;
-
- for (i = 0; i < array->n_slots; i++) {
- os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
- os_event_free(slot->event);
- }
-#endif /* WIN_ASYNC_IO */
-
-#ifdef __WIN__
- ut_free(array->native_events);
-#endif /* __WIN__ */
- os_mutex_free(array->mutex);
- os_event_free(array->not_full);
- os_event_free(array->is_empty);
-
- ut_free(array->slots);
- ut_free(array);
-}
-
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
-and log i/o. Also creates one array each for read and write where each
-array is divided logically into n_read_segs and n_write_segs
-respectively. The caller must create an i/o handler thread for each
-segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-void
-os_aio_init(
-/*========*/
- ulint n_per_seg, /*<! in: maximum number of pending aio
- operations allowed per segment */
- ulint n_read_segs, /*<! in: number of reader threads */
- ulint n_write_segs, /*<! in: number of writer threads */
- ulint n_slots_sync) /*<! in: number of slots in the sync aio
- array */
-{
- ulint i;
- ulint n_segments = 2 + n_read_segs + n_write_segs;
-
- ut_ad(n_segments >= 4);
-
- os_io_init_simple();
-
- for (i = 0; i < n_segments; i++) {
- srv_set_io_thread_op_info(i, "not started yet");
- }
-
-
- /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
-
- os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
-
- srv_io_thread_function[0] = "insert buffer thread";
-
- os_aio_log_array = os_aio_array_create(n_per_seg, 1);
-
- srv_io_thread_function[1] = "log thread";
-
- os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
- n_read_segs);
- for (i = 2; i < 2 + n_read_segs; i++) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "read thread";
- }
-
- os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
- n_write_segs);
- for (i = 2 + n_read_segs; i < n_segments; i++) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "write thread";
- }
-
- os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
-
- os_aio_n_segments = n_segments;
-
- os_aio_validate();
-
- os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
-
- for (i = 0; i < n_segments; i++) {
- os_aio_segment_wait_events[i] = os_event_create(NULL);
- }
-
- os_last_printout = time(NULL);
-
-}
-
-/***********************************************************************
-Frees the asynchronous io system. */
-UNIV_INTERN
-void
-os_aio_free(void)
-/*=============*/
-{
- ulint i;
-
- os_aio_array_free(os_aio_ibuf_array);
- os_aio_ibuf_array = NULL;
- os_aio_array_free(os_aio_log_array);
- os_aio_log_array = NULL;
- os_aio_array_free(os_aio_read_array);
- os_aio_read_array = NULL;
- os_aio_array_free(os_aio_write_array);
- os_aio_write_array = NULL;
- os_aio_array_free(os_aio_sync_array);
- os_aio_sync_array = NULL;
-
- for (i = 0; i < os_aio_n_segments; i++) {
- os_event_free(os_aio_segment_wait_events[i]);
- }
-
- ut_free(os_aio_segment_wait_events);
- os_aio_segment_wait_events = 0;
- os_aio_n_segments = 0;
-}
-
-#ifdef WIN_ASYNC_IO
-/************************************************************************//**
-Wakes up all async i/o threads in the array in Windows async i/o at
-shutdown. */
-static
-void
-os_aio_array_wake_win_aio_at_shutdown(
-/*==================================*/
- os_aio_array_t* array) /*!< in: aio array */
-{
- ulint i;
-
- for (i = 0; i < array->n_slots; i++) {
-
- os_event_set((array->slots + i)->event);
- }
-}
-#endif
-
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
-shutdown. */
-UNIV_INTERN
-void
-os_aio_wake_all_threads_at_shutdown(void)
-/*=====================================*/
-{
- ulint i;
-
-#ifdef WIN_ASYNC_IO
- /* This code wakes up all ai/o threads in Windows native aio */
- os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
-#endif
- /* This loop wakes up all simulated ai/o threads */
-
- for (i = 0; i < os_aio_n_segments; i++) {
-
- os_event_set(os_aio_segment_wait_events[i]);
- }
-}
-
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
-be other, synchronous, pending writes. */
-UNIV_INTERN
-void
-os_aio_wait_until_no_pending_writes(void)
-/*=====================================*/
-{
- os_event_wait(os_aio_write_array->is_empty);
-}
-
-/**********************************************************************//**
-Calculates segment number for a slot.
-@return segment number (which is the number used by, for example,
-i/o-handler threads) */
-static
-ulint
-os_aio_get_segment_no_from_slot(
-/*============================*/
- os_aio_array_t* array, /*!< in: aio wait array */
- os_aio_slot_t* slot) /*!< in: slot in this array */
-{
- ulint segment;
- ulint seg_len;
-
- if (array == os_aio_ibuf_array) {
- segment = 0;
-
- } else if (array == os_aio_log_array) {
- segment = 1;
-
- } else if (array == os_aio_read_array) {
- seg_len = os_aio_read_array->n_slots
- / os_aio_read_array->n_segments;
-
- segment = 2 + slot->pos / seg_len;
- } else {
- ut_a(array == os_aio_write_array);
- seg_len = os_aio_write_array->n_slots
- / os_aio_write_array->n_segments;
-
- segment = os_aio_read_array->n_segments + 2
- + slot->pos / seg_len;
- }
-
- return(segment);
-}
-
-/**********************************************************************//**
-Calculates local segment number and aio array from global segment number.
-@return local segment number within the aio array */
-static
-ulint
-os_aio_get_array_and_local_segment(
-/*===============================*/
- os_aio_array_t** array, /*!< out: aio wait array */
- ulint global_segment)/*!< in: global segment number */
-{
- ulint segment;
-
- ut_a(global_segment < os_aio_n_segments);
-
- if (global_segment == 0) {
- *array = os_aio_ibuf_array;
- segment = 0;
-
- } else if (global_segment == 1) {
- *array = os_aio_log_array;
- segment = 0;
-
- } else if (global_segment < os_aio_read_array->n_segments + 2) {
- *array = os_aio_read_array;
-
- segment = global_segment - 2;
- } else {
- *array = os_aio_write_array;
-
- segment = global_segment - (os_aio_read_array->n_segments + 2);
- }
-
- return(segment);
-}
-
-/*******************************************************************//**
-Requests for a slot in the aio array. If no slot is available, waits until
-not_full-event becomes signaled.
-@return pointer to slot */
-static
-os_aio_slot_t*
-os_aio_array_reserve_slot(
-/*======================*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- os_aio_array_t* array, /*!< in: aio array */
- fil_node_t* message1,/*!< in: message to be passed along with
- the aio operation */
- void* message2,/*!< in: message to be passed along with
- the aio operation */
- os_file_t file, /*!< in: file handle */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset */
- ulint offset_high, /*!< in: most significant 32 bits of
- offset */
- ulint len) /*!< in: length of the block to read or write */
-{
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* control;
-#endif
- ulint i;
- ulint slots_per_seg;
- ulint local_seg;
-
- /* No need of a mutex. Only reading constant fields */
- slots_per_seg = array->n_slots / array->n_segments;
-
- /* We attempt to keep adjacent blocks in the same local
- segment. This can help in merging IO requests when we are
- doing simulated AIO */
- local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
- % array->n_segments;
-
-loop:
- os_mutex_enter(array->mutex);
-
- if (array->n_reserved == array->n_slots) {
- os_mutex_exit(array->mutex);
-
- if (!os_aio_use_native_aio) {
- /* If the handler threads are suspended, wake them
- so that we get more slots */
-
- os_aio_simulated_wake_handler_threads();
- }
-
- os_event_wait(array->not_full);
-
- goto loop;
- }
-
- /* First try to find a slot in the preferred local segment */
- for (i = local_seg * slots_per_seg; i < array->n_slots; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved == FALSE) {
- goto found;
- }
- }
-
- /* Fall back to a full scan. We are guaranteed to find a slot */
- for (i = 0;; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved == FALSE) {
- goto found;
- }
- }
-
-found:
- ut_a(slot->reserved == FALSE);
- array->n_reserved++;
-
- if (array->n_reserved == 1) {
- os_event_reset(array->is_empty);
- }
-
- if (array->n_reserved == array->n_slots) {
- os_event_reset(array->not_full);
- }
-
- slot->reserved = TRUE;
- slot->reservation_time = time(NULL);
- slot->message1 = message1;
- slot->message2 = message2;
- slot->file = file;
- slot->name = name;
- slot->len = len;
- slot->type = type;
- slot->buf = buf;
- slot->offset = offset;
- slot->offset_high = offset_high;
- slot->io_already_done = FALSE;
-
-#ifdef WIN_ASYNC_IO
- control = &(slot->control);
- control->Offset = (DWORD)offset;
- control->OffsetHigh = (DWORD)offset_high;
- os_event_reset(slot->event);
-#endif
-
- os_mutex_exit(array->mutex);
-
- return(slot);
-}
-
-/*******************************************************************//**
-Frees a slot in the aio array. */
-static
-void
-os_aio_array_free_slot(
-/*===================*/
- os_aio_array_t* array, /*!< in: aio array */
- os_aio_slot_t* slot) /*!< in: pointer to slot */
-{
- ut_ad(array);
- ut_ad(slot);
-
- os_mutex_enter(array->mutex);
-
- ut_ad(slot->reserved);
-
- slot->reserved = FALSE;
-
- array->n_reserved--;
-
- if (array->n_reserved == array->n_slots - 1) {
- os_event_set(array->not_full);
- }
-
- if (array->n_reserved == 0) {
- os_event_set(array->is_empty);
- }
-
-#ifdef WIN_ASYNC_IO
- os_event_reset(slot->event);
-#endif
- os_mutex_exit(array->mutex);
-}
-
-/**********************************************************************//**
-Wakes up a simulated aio i/o-handler thread if it has something to do. */
-static
-void
-os_aio_simulated_wake_handler_thread(
-/*=================================*/
- ulint global_segment) /*!< in: the number of the segment in the aio
- arrays */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint segment;
- ulint n;
- ulint i;
-
- ut_ad(!os_aio_use_native_aio);
-
- segment = os_aio_get_array_and_local_segment(&array, global_segment);
-
- n = array->n_slots / array->n_segments;
-
- /* Look through n slots after the segment * n'th slot */
-
- os_mutex_enter(array->mutex);
-
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
- if (slot->reserved) {
- /* Found an i/o request */
-
- break;
- }
- }
-
- os_mutex_exit(array->mutex);
-
- if (i < n) {
- os_event_set(os_aio_segment_wait_events[global_segment]);
- }
-}
-
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
-void
-os_aio_simulated_wake_handler_threads(void)
-/*=======================================*/
-{
- ulint i;
-
- if (os_aio_use_native_aio) {
- /* We do not use simulated aio: do nothing */
-
- return;
- }
-
- os_aio_recommend_sleep_for_read_threads = FALSE;
-
- for (i = 0; i < os_aio_n_segments; i++) {
- os_aio_simulated_wake_handler_thread(i);
- }
-}
-
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-UNIV_INTERN
-void
-os_aio_simulated_put_read_threads_to_sleep(void)
-/*============================================*/
-{
-
-/* The idea of putting background IO threads to sleep is only for
-Windows when using simulated AIO. Windows XP seems to schedule
-background threads too eagerly to allow for coalescing during
-readahead requests. */
-#ifdef __WIN__
- os_aio_array_t* array;
- ulint g;
-
- if (os_aio_use_native_aio) {
- /* We do not use simulated aio: do nothing */
-
- return;
- }
-
- os_aio_recommend_sleep_for_read_threads = TRUE;
-
- for (g = 0; g < os_aio_n_segments; g++) {
- os_aio_get_array_and_local_segment(&array, g);
-
- if (array == os_aio_read_array) {
-
- os_event_reset(os_aio_segment_wait_events[g]);
- }
- }
-#endif /* __WIN__ */
-}
-
-/*******************************************************************//**
-Requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
-os_aio(
-/*===*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- ulint offset, /*!< in: least significant 32 bits of file
- offset where to read or write */
- ulint offset_high, /*!< in: most significant 32 bits of
- offset */
- ulint n, /*!< in: number of bytes to read or write */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2)/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- ibool retval;
- BOOL ret = TRUE;
- DWORD len = (DWORD) n;
- struct fil_node_struct * dummy_mess1;
- void* dummy_mess2;
- ulint dummy_type;
-#endif
- ulint err = 0;
- ibool retry;
- ulint wake_later;
-
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
- ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(os_aio_validate());
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
-
- if (mode == OS_AIO_SYNC
-#ifdef WIN_ASYNC_IO
- && !os_aio_use_native_aio
-#endif
- ) {
- /* This is actually an ordinary synchronous read or write:
- no need to use an i/o-handler thread. NOTE that if we use
- Windows async i/o, Windows does not allow us to use
- ordinary synchronous os_file_read etc. on the same file,
- therefore we have built a special mechanism for synchronous
- wait in the Windows case. */
-
- if (type == OS_FILE_READ) {
- return(os_file_read(file, buf, offset,
- offset_high, n));
- }
-
- ut_a(type == OS_FILE_WRITE);
-
- return(os_file_write(name, file, buf, offset, offset_high, n));
- }
-
-try_again:
- if (mode == OS_AIO_NORMAL) {
- if (type == OS_FILE_READ) {
- array = os_aio_read_array;
- } else {
- array = os_aio_write_array;
- }
- } else if (mode == OS_AIO_IBUF) {
- ut_ad(type == OS_FILE_READ);
- /* Reduce probability of deadlock bugs in connection with ibuf:
- do not let the ibuf i/o handler sleep */
-
- wake_later = FALSE;
-
- array = os_aio_ibuf_array;
- } else if (mode == OS_AIO_LOG) {
-
- array = os_aio_log_array;
- } else if (mode == OS_AIO_SYNC) {
- array = os_aio_sync_array;
- } else {
- array = NULL; /* Eliminate compiler warning */
- ut_error;
- }
-
- slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
- name, buf, offset, offset_high, n);
- if (type == OS_FILE_READ) {
- if (os_aio_use_native_aio) {
-#ifdef WIN_ASYNC_IO
- os_n_file_reads++;
- os_bytes_read_since_printout += len;
-
- ret = ReadFile(file, buf, (DWORD)n, &len,
- &(slot->control));
-#endif
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(
- os_aio_get_segment_no_from_slot(
- array, slot));
- }
- }
- } else if (type == OS_FILE_WRITE) {
- if (os_aio_use_native_aio) {
-#ifdef WIN_ASYNC_IO
- os_n_file_writes++;
- ret = WriteFile(file, buf, (DWORD)n, &len,
- &(slot->control));
-#endif
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(
- os_aio_get_segment_no_from_slot(
- array, slot));
- }
- }
- } else {
- ut_error;
- }
-
-#ifdef WIN_ASYNC_IO
- if (os_aio_use_native_aio) {
- if ((ret && len == n)
- || (!ret && GetLastError() == ERROR_IO_PENDING)) {
- /* aio was queued successfully! */
-
- if (mode == OS_AIO_SYNC) {
- /* We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
-
- retval = os_aio_windows_handle(ULINT_UNDEFINED,
- slot->pos,
- &dummy_mess1,
- &dummy_mess2,
- &dummy_type);
-
- return(retval);
- }
-
- return(TRUE);
- }
-
- err = 1; /* Fall through the next if */
- }
-#endif
- if (err == 0) {
- /* aio was queued successfully! */
-
- return(TRUE);
- }
-
- os_aio_array_free_slot(array, slot);
-
- retry = os_file_handle_error(name,
- type == OS_FILE_READ
- ? "aio read" : "aio write");
- if (retry) {
-
- goto try_again;
- }
-
- return(FALSE);
-}
-
-#ifdef WIN_ASYNC_IO
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /*!< this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
-{
- ulint orig_seg = segment;
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n;
- ulint i;
- ibool ret_val;
- BOOL ret;
- DWORD len;
- BOOL retry = FALSE;
-
- if (segment == ULINT_UNDEFINED) {
- array = os_aio_sync_array;
- segment = 0;
- } else {
- segment = os_aio_get_array_and_local_segment(&array, segment);
- }
-
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- ut_ad(os_aio_validate());
- ut_ad(segment < array->n_segments);
-
- n = array->n_slots / array->n_segments;
-
- if (array == os_aio_sync_array) {
- os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
- i = pos;
- } else {
- srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
- i = os_event_wait_multiple(n,
- (array->native_events)
- + segment * n);
- }
-
- os_mutex_enter(array->mutex);
-
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
- ut_a(slot->reserved);
-
- if (orig_seg != ULINT_UNDEFINED) {
- srv_set_io_thread_op_info(orig_seg,
- "get windows aio return value");
- }
-
- ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
- *type = slot->type;
-
- if (ret && len == slot->len) {
- ret_val = TRUE;
-
-#ifdef UNIV_DO_FLUSH
- if (slot->type == OS_FILE_WRITE
- && !os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(slot->file));
- }
-#endif /* UNIV_DO_FLUSH */
- } else if (os_file_handle_error(slot->name, "Windows aio")) {
-
- retry = TRUE;
- } else {
-
- ret_val = FALSE;
- }
-
- os_mutex_exit(array->mutex);
-
- if (retry) {
- /* retry failed read/write operation synchronously.
- No need to hold array->mutex. */
-
- switch (slot->type) {
- case OS_FILE_WRITE:
- ret = WriteFile(slot->file, slot->buf,
- slot->len, &len,
- &(slot->control));
-
- break;
- case OS_FILE_READ:
- ret = ReadFile(slot->file, slot->buf,
- slot->len, &len,
- &(slot->control));
-
- break;
- default:
- ut_error;
- }
-
- if (!ret && GetLastError() == ERROR_IO_PENDING) {
- /* aio was queued successfully!
- We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
-
- ret = GetOverlappedResult(slot->file,
- &(slot->control),
- &len, TRUE);
- }
-
- ret_val = ret && len == slot->len;
- }
-
- os_aio_array_free_slot(array, slot);
-
- return(ret_val);
-}
-#endif
-
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
- ulint global_segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
-{
- os_aio_array_t* array;
- ulint segment;
- os_aio_slot_t* slot;
- os_aio_slot_t* slot2;
- os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
- ulint n_consecutive;
- ulint total_len;
- ulint offs;
- ulint lowest_offset;
- ulint biggest_age;
- ulint age;
- byte* combined_buf;
- byte* combined_buf2;
- ibool ret;
- ulint n;
- ulint i;
-
- segment = os_aio_get_array_and_local_segment(&array, global_segment);
-
-restart:
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (a)");
- ut_ad(os_aio_validate());
- ut_ad(segment < array->n_segments);
-
- n = array->n_slots / array->n_segments;
-
- /* Look through n slots after the segment * n'th slot */
-
- if (array == os_aio_read_array
- && os_aio_recommend_sleep_for_read_threads) {
-
- /* Give other threads chance to add several i/os to the array
- at once. */
-
- goto recommended_sleep;
- }
-
- os_mutex_enter(array->mutex);
-
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (b)");
-
- /* Check if there is a slot for which the i/o has already been
- done */
-
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
- if (slot->reserved && slot->io_already_done) {
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o for slot %lu"
- " already done, returning\n",
- (ulong) i);
- }
-
- ret = TRUE;
-
- goto slot_io_done;
- }
- }
-
- n_consecutive = 0;
-
- /* If there are at least 2 seconds old requests, then pick the oldest
- one to prevent starvation. If several requests have the same age,
- then pick the one at the lowest offset. */
-
- biggest_age = 0;
- lowest_offset = ULINT_MAX;
-
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
- if (slot->reserved) {
- age = (ulint)difftime(time(NULL),
- slot->reservation_time);
-
- if ((age >= 2 && age > biggest_age)
- || (age >= 2 && age == biggest_age
- && slot->offset < lowest_offset)) {
-
- /* Found an i/o request */
- consecutive_ios[0] = slot;
-
- n_consecutive = 1;
-
- biggest_age = age;
- lowest_offset = slot->offset;
- }
- }
- }
-
- if (n_consecutive == 0) {
- /* There were no old requests. Look for an i/o request at the
- lowest offset in the array (we ignore the high 32 bits of the
- offset in these heuristics) */
-
- lowest_offset = ULINT_MAX;
-
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array,
- i + segment * n);
-
- if (slot->reserved && slot->offset < lowest_offset) {
-
- /* Found an i/o request */
- consecutive_ios[0] = slot;
-
- n_consecutive = 1;
-
- lowest_offset = slot->offset;
- }
- }
- }
-
- if (n_consecutive == 0) {
-
- /* No i/o requested at the moment */
-
- goto wait_for_io;
- }
-
- slot = consecutive_ios[0];
-
- /* Check if there are several consecutive blocks to read or write */
-
-consecutive_loop:
- for (i = 0; i < n; i++) {
- slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
-
- if (slot2->reserved && slot2 != slot
- && slot2->offset == slot->offset + slot->len
- /* check that sum does not wrap over */
- && slot->offset + slot->len > slot->offset
- && slot2->offset_high == slot->offset_high
- && slot2->type == slot->type
- && slot2->file == slot->file) {
-
- /* Found a consecutive i/o request */
-
- consecutive_ios[n_consecutive] = slot2;
- n_consecutive++;
-
- slot = slot2;
-
- if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
-
- goto consecutive_loop;
- } else {
- break;
- }
- }
- }
-
- srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
-
- /* We have now collected n_consecutive i/o requests in the array;
- allocate a single buffer which can hold all data, and perform the
- i/o */
-
- total_len = 0;
- slot = consecutive_ios[0];
-
- for (i = 0; i < n_consecutive; i++) {
- total_len += consecutive_ios[i]->len;
- }
-
- if (n_consecutive == 1) {
- /* We can use the buffer of the i/o request */
- combined_buf = slot->buf;
- combined_buf2 = NULL;
- } else {
- combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
-
- ut_a(combined_buf2);
-
- combined_buf = ut_align(combined_buf2, UNIV_PAGE_SIZE);
- }
-
- /* We release the array mutex for the time of the i/o: NOTE that
- this assumes that there is just one i/o-handler thread serving
- a single segment of slots! */
-
- os_mutex_exit(array->mutex);
-
- if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
- /* Copy the buffers to the combined buffer */
- offs = 0;
-
- for (i = 0; i < n_consecutive; i++) {
-
- ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
- consecutive_ios[i]->len);
- offs += consecutive_ios[i]->len;
- }
- }
-
- srv_set_io_thread_op_info(global_segment, "doing file i/o");
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: doing i/o of type %lu at offset %lu %lu,"
- " length %lu\n",
- (ulong) slot->type, (ulong) slot->offset_high,
- (ulong) slot->offset, (ulong) total_len);
- }
-
- /* Do the i/o with ordinary, synchronous i/o functions: */
- if (slot->type == OS_FILE_WRITE) {
- ret = os_file_write(slot->name, slot->file, combined_buf,
- slot->offset, slot->offset_high,
- total_len);
- } else {
- ret = os_file_read(slot->file, combined_buf,
- slot->offset, slot->offset_high, total_len);
- }
-
- ut_a(ret);
- srv_set_io_thread_op_info(global_segment, "file i/o done");
-
-#if 0
- fprintf(stderr,
- "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
- n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
-#endif
-
- if (slot->type == OS_FILE_READ && n_consecutive > 1) {
- /* Copy the combined buffer to individual buffers */
- offs = 0;
-
- for (i = 0; i < n_consecutive; i++) {
-
- ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
- consecutive_ios[i]->len);
- offs += consecutive_ios[i]->len;
- }
- }
-
- if (combined_buf2) {
- ut_free(combined_buf2);
- }
-
- os_mutex_enter(array->mutex);
-
- /* Mark the i/os done in slots */
-
- for (i = 0; i < n_consecutive; i++) {
- consecutive_ios[i]->io_already_done = TRUE;
- }
-
- /* We return the messages for the first slot now, and if there were
- several slots, the messages will be returned with subsequent calls
- of this function */
-
-slot_io_done:
-
- ut_a(slot->reserved);
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
- *type = slot->type;
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
-
- return(ret);
-
-wait_for_io:
- srv_set_io_thread_op_info(global_segment, "resetting wait event");
-
- /* We wait here until there again can be i/os in the segment
- of this thread */
-
- os_event_reset(os_aio_segment_wait_events[global_segment]);
-
- os_mutex_exit(array->mutex);
-
-recommended_sleep:
- srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
-
- os_event_wait(os_aio_segment_wait_events[global_segment]);
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o handler thread for i/o"
- " segment %lu wakes up\n",
- (ulong) global_segment);
- }
-
- goto restart;
-}
-
-/**********************************************************************//**
-Validates the consistency of an aio array.
-@return TRUE if ok */
-static
-ibool
-os_aio_array_validate(
-/*==================*/
- os_aio_array_t* array) /*!< in: aio wait array */
-{
- os_aio_slot_t* slot;
- ulint n_reserved = 0;
- ulint i;
-
- ut_a(array);
-
- os_mutex_enter(array->mutex);
-
- ut_a(array->n_slots > 0);
- ut_a(array->n_segments > 0);
-
- for (i = 0; i < array->n_slots; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved) {
- n_reserved++;
- ut_a(slot->len > 0);
- }
- }
-
- ut_a(array->n_reserved == n_reserved);
-
- os_mutex_exit(array->mutex);
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Validates the consistency the aio system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void)
-/*=================*/
-{
- os_aio_array_validate(os_aio_read_array);
- os_aio_array_validate(os_aio_write_array);
- os_aio_array_validate(os_aio_ibuf_array);
- os_aio_array_validate(os_aio_log_array);
- os_aio_array_validate(os_aio_sync_array);
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
-void
-os_aio_print(
-/*=========*/
- FILE* file) /*!< in: file where to print */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n_reserved;
- time_t current_time;
- double time_elapsed;
- double avg_bytes_read;
- ulint i;
-
- for (i = 0; i < srv_n_file_io_threads; i++) {
- fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
- srv_io_thread_op_info[i],
- srv_io_thread_function[i]);
-
-#ifndef __WIN__
- if (os_aio_segment_wait_events[i]->is_set) {
- fprintf(file, " ev set");
- }
-#endif
-
- fprintf(file, "\n");
- }
-
- fputs("Pending normal aio reads:", file);
-
- array = os_aio_read_array;
-loop:
- ut_a(array);
-
- os_mutex_enter(array->mutex);
-
- ut_a(array->n_slots > 0);
- ut_a(array->n_segments > 0);
-
- n_reserved = 0;
-
- for (i = 0; i < array->n_slots; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved) {
- n_reserved++;
-#if 0
- fprintf(stderr, "Reserved slot, messages %p %p\n",
- (void*) slot->message1,
- (void*) slot->message2);
-#endif
- ut_a(slot->len > 0);
- }
- }
-
- ut_a(array->n_reserved == n_reserved);
-
- fprintf(file, " %lu", (ulong) n_reserved);
-
- os_mutex_exit(array->mutex);
-
- if (array == os_aio_read_array) {
- fputs(", aio writes:", file);
-
- array = os_aio_write_array;
-
- goto loop;
- }
-
- if (array == os_aio_write_array) {
- fputs(",\n ibuf aio reads:", file);
- array = os_aio_ibuf_array;
-
- goto loop;
- }
-
- if (array == os_aio_ibuf_array) {
- fputs(", log i/o's:", file);
- array = os_aio_log_array;
-
- goto loop;
- }
-
- if (array == os_aio_log_array) {
- fputs(", sync i/o's:", file);
- array = os_aio_sync_array;
-
- goto loop;
- }
-
- putc('\n', file);
- current_time = time(NULL);
- time_elapsed = 0.001 + difftime(current_time, os_last_printout);
-
- fprintf(file,
- "Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
- "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
- (ulong) fil_n_pending_log_flushes,
- (ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads, (ulong) os_n_file_writes,
- (ulong) os_n_fsyncs);
-
- if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
- fprintf(file,
- "%lu pending preads, %lu pending pwrites\n",
- (ulong) os_file_n_pending_preads,
- (ulong) os_file_n_pending_pwrites);
- }
-
- if (os_n_file_reads == os_n_file_reads_old) {
- avg_bytes_read = 0.0;
- } else {
- avg_bytes_read = (double) os_bytes_read_since_printout
- / (os_n_file_reads - os_n_file_reads_old);
- }
-
- fprintf(file,
- "%.2f reads/s, %lu avg bytes/read,"
- " %.2f writes/s, %.2f fsyncs/s\n",
- (os_n_file_reads - os_n_file_reads_old)
- / time_elapsed,
- (ulong)avg_bytes_read,
- (os_n_file_writes - os_n_file_writes_old)
- / time_elapsed,
- (os_n_fsyncs - os_n_fsyncs_old)
- / time_elapsed);
-
- os_n_file_reads_old = os_n_file_reads;
- os_n_file_writes_old = os_n_file_writes;
- os_n_fsyncs_old = os_n_fsyncs;
- os_bytes_read_since_printout = 0;
-
- os_last_printout = current_time;
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-os_aio_refresh_stats(void)
-/*======================*/
-{
- os_n_file_reads_old = os_n_file_reads;
- os_n_file_writes_old = os_n_file_writes;
- os_n_fsyncs_old = os_n_fsyncs;
- os_bytes_read_since_printout = 0;
-
- os_last_printout = time(NULL);
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations.
-@return TRUE if all free */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void)
-/*=======================*/
-{
- os_aio_array_t* array;
- ulint n_res = 0;
-
- array = os_aio_read_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_write_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_ibuf_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_log_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_sync_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- if (n_res == 0) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/os/os0proc.c b/storage/innodb_plugin/os/os0proc.c
deleted file mode 100644
index 48922886f23..00000000000
--- a/storage/innodb_plugin/os/os0proc.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file os/os0proc.c
-The interface to the operating system
-process control primitives
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0proc.h"
-#ifdef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#include "ut0mem.h"
-#include "ut0byte.h"
-
-/* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and
-MAP_ANON but MAP_ANON is marked as deprecated */
-#if defined(MAP_ANONYMOUS)
-#define OS_MAP_ANON MAP_ANONYMOUS
-#elif defined(MAP_ANON)
-#define OS_MAP_ANON MAP_ANON
-#endif
-
-UNIV_INTERN ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-UNIV_INTERN ulint os_large_page_size;
-
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return process id as a number */
-UNIV_INTERN
-ulint
-os_proc_get_number(void)
-/*====================*/
-{
-#ifdef __WIN__
- return((ulint)GetCurrentProcessId());
-#else
- return((ulint)getpid());
-#endif
-}
-
-/****************************************************************//**
-Allocates large pages memory.
-@return allocated memory */
-UNIV_INTERN
-void*
-os_mem_alloc_large(
-/*===============*/
- ulint* n) /*!< in/out: number of bytes */
-{
- void* ptr;
- ulint size;
-#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
- int shmid;
- struct shmid_ds buf;
-
- if (!os_use_large_pages || !os_large_page_size) {
- goto skip;
- }
-
- /* Align block size to os_large_page_size */
- ut_ad(ut_is_2pow(os_large_page_size));
- size = ut_2pow_round(*n + (os_large_page_size - 1),
- os_large_page_size);
-
- shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
- if (shmid < 0) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
- " %lu bytes. errno %d\n", size, errno);
- ptr = NULL;
- } else {
- ptr = shmat(shmid, NULL, 0);
- if (ptr == (void *)-1) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
- " attach shared memory segment, errno %d\n",
- errno);
- ptr = NULL;
- }
-
- /* Remove the shared memory segment so that it will be
- automatically freed after memory is detached or
- process exits */
- shmctl(shmid, IPC_RMID, &buf);
- }
-
- if (ptr) {
- *n = size;
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
-# ifdef UNIV_SET_MEM_TO_ZERO
- memset(ptr, '\0', size);
-# endif
- UNIV_MEM_ALLOC(ptr, size);
- return(ptr);
- }
-
- fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
- " memory pool\n");
-skip:
-#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
-
-#ifdef __WIN__
- SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
-
- /* Align block size to system page size */
- ut_ad(ut_is_2pow(system_info.dwPageSize));
- /* system_info.dwPageSize is only 32-bit. Casting to ulint is required
- on 64-bit Windows. */
- size = *n = ut_2pow_round(*n + (system_info.dwPageSize - 1),
- (ulint) system_info.dwPageSize);
- ptr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE,
- PAGE_READWRITE);
- if (!ptr) {
- fprintf(stderr, "InnoDB: VirtualAlloc(%lu bytes) failed;"
- " Windows error %lu\n",
- (ulong) size, (ulong) GetLastError());
- } else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_ALLOC(ptr, size);
- }
-#elif defined __NETWARE__ || !defined OS_MAP_ANON
- size = *n;
- ptr = ut_malloc_low(size, TRUE, FALSE);
-#else
-# ifdef HAVE_GETPAGESIZE
- size = getpagesize();
-# else
- size = UNIV_PAGE_SIZE;
-# endif
- /* Align block size to system page size */
- ut_ad(ut_is_2pow(size));
- size = *n = ut_2pow_round(*n + (size - 1), size);
- ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | OS_MAP_ANON, -1, 0);
- if (UNIV_UNLIKELY(ptr == (void*) -1)) {
- fprintf(stderr, "InnoDB: mmap(%lu bytes) failed;"
- " errno %lu\n",
- (ulong) size, (ulong) errno);
- ptr = NULL;
- } else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_ALLOC(ptr, size);
- }
-#endif
- return(ptr);
-}
-
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
-void
-os_mem_free_large(
-/*==============*/
- void *ptr, /*!< in: pointer returned by
- os_mem_alloc_large() */
- ulint size) /*!< in: size returned by
- os_mem_alloc_large() */
-{
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- os_fast_mutex_unlock(&ut_list_mutex);
-
-#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
- if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_FREE(ptr, size);
- return;
- }
-#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
-#ifdef __WIN__
- /* When RELEASE memory, the size parameter must be 0.
- Do not use MEM_RELEASE with MEM_DECOMMIT. */
- if (!VirtualFree(ptr, 0, MEM_RELEASE)) {
- fprintf(stderr, "InnoDB: VirtualFree(%p, %lu) failed;"
- " Windows error %lu\n",
- ptr, (ulong) size, (ulong) GetLastError());
- } else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_FREE(ptr, size);
- }
-#elif defined __NETWARE__ || !defined OS_MAP_ANON
- ut_free(ptr);
-#else
- if (munmap(ptr, size)) {
- fprintf(stderr, "InnoDB: munmap(%p, %lu) failed;"
- " errno %lu\n",
- ptr, (ulong) size, (ulong) errno);
- } else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_FREE(ptr, size);
- }
-#endif
-}
diff --git a/storage/innodb_plugin/os/os0sync.c b/storage/innodb_plugin/os/os0sync.c
deleted file mode 100644
index 60467242e14..00000000000
--- a/storage/innodb_plugin/os/os0sync.c
+++ /dev/null
@@ -1,725 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file os/os0sync.c
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0sync.h"
-#ifdef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#include "ut0mem.h"
-#include "srv0start.h"
-
-/* Type definition for an operating system mutex struct */
-struct os_mutex_struct{
- os_event_t event; /*!< Used by sync0arr.c for queing threads */
- void* handle; /*!< OS handle to mutex */
- ulint count; /*!< we use this counter to check
- that the same thread does not
- recursively lock the mutex: we
- do not assume that the OS mutex
- supports recursive locking, though
- NT seems to do that */
- UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
- /* list of all 'slow' OS mutexes created */
-};
-
-/** Mutex protecting counts and the lists of OS mutexes and events */
-UNIV_INTERN os_mutex_t os_sync_mutex;
-/** TRUE if os_sync_mutex has been initialized */
-static ibool os_sync_mutex_inited = FALSE;
-/** TRUE when os_sync_free() is being executed */
-static ibool os_sync_free_called = FALSE;
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-UNIV_INTERN ulint os_thread_count = 0;
-
-/** The list of all events created */
-static UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
-
-/** The list of all OS 'slow' mutexes */
-static UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
-
-UNIV_INTERN ulint os_event_count = 0;
-UNIV_INTERN ulint os_mutex_count = 0;
-UNIV_INTERN ulint os_fast_mutex_count = 0;
-
-/* Because a mutex is embedded inside an event and there is an
-event embedded inside a mutex, on free, this generates a recursive call.
-This version of the free event function doesn't acquire the global lock */
-static void os_event_free_internal(os_event_t event);
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void)
-/*==============*/
-{
- UT_LIST_INIT(os_event_list);
- UT_LIST_INIT(os_mutex_list);
-
- os_sync_mutex = NULL;
- os_sync_mutex_inited = FALSE;
-
- os_sync_mutex = os_mutex_create(NULL);
-
- os_sync_mutex_inited = TRUE;
-}
-
-/*********************************************************//**
-Frees created events and OS 'slow' mutexes. */
-UNIV_INTERN
-void
-os_sync_free(void)
-/*==============*/
-{
- os_event_t event;
- os_mutex_t mutex;
-
- os_sync_free_called = TRUE;
- event = UT_LIST_GET_FIRST(os_event_list);
-
- while (event) {
-
- os_event_free(event);
-
- event = UT_LIST_GET_FIRST(os_event_list);
- }
-
- mutex = UT_LIST_GET_FIRST(os_mutex_list);
-
- while (mutex) {
- if (mutex == os_sync_mutex) {
- /* Set the flag to FALSE so that we do not try to
- reserve os_sync_mutex any more in remaining freeing
- operations in shutdown */
- os_sync_mutex_inited = FALSE;
- }
-
- os_mutex_free(mutex);
-
- mutex = UT_LIST_GET_FIRST(os_mutex_list);
- }
- os_sync_free_called = FALSE;
-}
-
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two
-states: signaled and nonsignaled. The created event is manual reset: it
-must be reset explicitly by calling sync_os_reset_event.
-@return the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(
-/*============*/
- const char* name) /*!< in: the name of the event, if NULL
- the event is created without a name */
-{
-#ifdef __WIN__
- os_event_t event;
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- event->handle = CreateEvent(NULL, /* No security attributes */
- TRUE, /* Manual reset */
- FALSE, /* Initial state nonsignaled */
- (LPCTSTR) name);
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows event semaphore;"
- " Windows error %lu\n",
- (ulong) GetLastError());
- }
-#else /* Unix */
- os_event_t event;
-
- UT_NOT_USED(name);
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- os_fast_mutex_init(&(event->os_mutex));
-
- ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
-
- event->is_set = FALSE;
-
- /* We return this value in os_event_reset(), which can then be
- be used to pass to the os_event_wait_low(). The value of zero
- is reserved in os_event_wait_low() for the case when the
- caller does not want to pass any signal_count value. To
- distinguish between the two cases we initialize signal_count
- to 1 here. */
- event->signal_count = 1;
-#endif /* __WIN__ */
-
- /* The os_sync_mutex can be NULL because during startup an event
- can be created [ because it's embedded in the mutex/rwlock ] before
- this module has been initialized */
- if (os_sync_mutex != NULL) {
- os_mutex_enter(os_sync_mutex);
- }
-
- /* Put to the list of events */
- UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
- os_event_count++;
-
- if (os_sync_mutex != NULL) {
- os_mutex_exit(os_sync_mutex);
- }
-
- return(event);
-}
-
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
- os_event_t event) /*!< in: event to set */
-{
-#ifdef __WIN__
- ut_a(event);
- ut_a(SetEvent(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (event->is_set) {
- /* Do nothing */
- } else {
- event->is_set = TRUE;
- event->signal_count += 1;
- ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
- }
-
- os_fast_mutex_unlock(&(event->os_mutex));
-#endif
-}
-
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low().
-@return current signal_count. */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
- os_event_t event) /*!< in: event to reset */
-{
- ib_int64_t ret = 0;
-
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(ResetEvent(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (!event->is_set) {
- /* Do nothing */
- } else {
- event->is_set = FALSE;
- }
- ret = event->signal_count;
-
- os_fast_mutex_unlock(&(event->os_mutex));
-#endif
- return(ret);
-}
-
-/**********************************************************//**
-Frees an event object, without acquiring the global lock. */
-static
-void
-os_event_free_internal(
-/*===================*/
- os_event_t event) /*!< in: event to free */
-{
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
-
- /* This is to avoid freeing the mutex twice */
- os_fast_mutex_free(&(event->os_mutex));
-
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
- /* Remove from the list of events */
-
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- ut_free(event);
-}
-
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
- os_event_t event) /*!< in: event to free */
-
-{
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_free(&(event->os_mutex));
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
- /* Remove from the list of events */
-
- os_mutex_enter(os_sync_mutex);
-
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- os_mutex_exit(os_sync_mutex);
-
- ut_free(event);
-}
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state. If
-srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
-waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /*!< in: event to wait */
- ib_int64_t reset_sig_count)/*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(event);
-
- UT_NOT_USED(reset_sig_count);
-
- /* Specify an infinite time limit for waiting */
- err = WaitForSingleObject(event->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
-#else
- ib_int64_t old_signal_count;
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (reset_sig_count) {
- old_signal_count = reset_sig_count;
- } else {
- old_signal_count = event->signal_count;
- }
-
- for (;;) {
- if (event->is_set == TRUE
- || event->signal_count != old_signal_count) {
-
- os_fast_mutex_unlock(&(event->os_mutex));
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
-
- os_thread_exit(NULL);
- }
- /* Ok, we may return */
-
- return;
- }
-
- pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
-
- /* Solaris manual said that spurious wakeups may occur: we
- have to check if the event really has been signaled after
- we came here to wait */
- }
-#endif
-}
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite.
-@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time(
-/*===============*/
- os_event_t event, /*!< in: event to wait */
- ulint time) /*!< in: timeout in microseconds, or
- OS_SYNC_INFINITE_TIME */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(event);
-
- if (time != OS_SYNC_INFINITE_TIME) {
- err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
- } else {
- err = WaitForSingleObject(event->handle, INFINITE);
- }
-
- if (err == WAIT_OBJECT_0) {
-
- return(0);
- } else if (err == WAIT_TIMEOUT) {
-
- return(OS_SYNC_TIME_EXCEEDED);
- } else {
- ut_error;
- return(1000000); /* dummy value to eliminate compiler warn. */
- }
-#else
- UT_NOT_USED(time);
-
- /* In Posix this is just an ordinary, infinite wait */
-
- os_event_wait(event);
-
- return(0);
-#endif
-}
-
-#ifdef __WIN__
-/**********************************************************//**
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled.
-@return index of the event which was signaled */
-UNIV_INTERN
-ulint
-os_event_wait_multiple(
-/*===================*/
- ulint n, /*!< in: number of events in the
- array */
- os_native_event_t* native_event_array)
- /*!< in: pointer to an array of event
- handles */
-{
- DWORD index;
-
- ut_a(native_event_array);
- ut_a(n > 0);
-
- index = WaitForMultipleObjects((DWORD) n, native_event_array,
- FALSE, /* Wait for any 1 event */
- INFINITE); /* Infinite wait time
- limit */
- ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparison */
- ut_a(index < WAIT_OBJECT_0 + n);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
-
- return(index - WAIT_OBJECT_0);
-}
-#endif
-
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
-@return the mutex handle */
-UNIV_INTERN
-os_mutex_t
-os_mutex_create(
-/*============*/
- const char* name) /*!< in: the name of the mutex, if NULL
- the mutex is created without a name */
-{
-#ifdef __WIN__
- HANDLE mutex;
- os_mutex_t mutex_str;
-
- mutex = CreateMutex(NULL, /* No security attributes */
- FALSE, /* Initial state: no owner */
- (LPCTSTR) name);
- ut_a(mutex);
-#else
- os_fast_mutex_t* mutex;
- os_mutex_t mutex_str;
-
- UT_NOT_USED(name);
-
- mutex = ut_malloc(sizeof(os_fast_mutex_t));
-
- os_fast_mutex_init(mutex);
-#endif
- mutex_str = ut_malloc(sizeof(os_mutex_str_t));
-
- mutex_str->handle = mutex;
- mutex_str->count = 0;
- mutex_str->event = os_event_create(NULL);
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- /* When creating os_sync_mutex itself we cannot reserve it */
- os_mutex_enter(os_sync_mutex);
- }
-
- UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
-
- os_mutex_count++;
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_exit(os_sync_mutex);
- }
-
- return(mutex_str);
-}
-
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
- os_mutex_t mutex) /*!< in: mutex to acquire */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(mutex);
-
- /* Specify infinite time limit for waiting */
- err = WaitForSingleObject(mutex->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
-
- (mutex->count)++;
- ut_a(mutex->count == 1);
-#else
- os_fast_mutex_lock(mutex->handle);
-
- (mutex->count)++;
-
- ut_a(mutex->count == 1);
-#endif
-}
-
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
- os_mutex_t mutex) /*!< in: mutex to release */
-{
- ut_a(mutex);
-
- ut_a(mutex->count == 1);
-
- (mutex->count)--;
-#ifdef __WIN__
- ut_a(ReleaseMutex(mutex->handle));
-#else
- os_fast_mutex_unlock(mutex->handle);
-#endif
-}
-
-/**********************************************************//**
-Frees a mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
- os_mutex_t mutex) /*!< in: mutex to free */
-{
- ut_a(mutex);
-
- if (UNIV_LIKELY(!os_sync_free_called)) {
- os_event_free_internal(mutex->event);
- }
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_enter(os_sync_mutex);
- }
-
- UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
-
- os_mutex_count--;
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_exit(os_sync_mutex);
- }
-
-#ifdef __WIN__
- ut_a(CloseHandle(mutex->handle));
-
- ut_free(mutex);
-#else
- os_fast_mutex_free(mutex->handle);
- ut_free(mutex->handle);
- ut_free(mutex);
-#endif
-}
-
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /*!< in: fast mutex */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
-#endif
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- /* When creating os_sync_mutex itself (in Unix) we cannot
- reserve it */
-
- os_mutex_enter(os_sync_mutex);
- }
-
- os_fast_mutex_count++;
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_exit(os_sync_mutex);
- }
-}
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
-{
-#ifdef __WIN__
- EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- pthread_mutex_lock(fast_mutex);
-#endif
-}
-
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock(
-/*=================*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to release */
-{
-#ifdef __WIN__
- LeaveCriticalSection(fast_mutex);
-#else
- pthread_mutex_unlock(fast_mutex);
-#endif
-}
-
-/**********************************************************//**
-Frees a mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to free */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- int ret;
-
- ret = pthread_mutex_destroy(fast_mutex);
-
- if (UNIV_UNLIKELY(ret != 0)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error: return value %lu when calling\n"
- "InnoDB: pthread_mutex_destroy().\n", (ulint)ret);
- fprintf(stderr,
- "InnoDB: Byte contents of the pthread mutex at %p:\n",
- (void*) fast_mutex);
- ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
- putc('\n', stderr);
- }
-#endif
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- /* When freeing the last mutexes, we have
- already freed os_sync_mutex */
-
- os_mutex_enter(os_sync_mutex);
- }
-
- ut_ad(os_fast_mutex_count > 0);
- os_fast_mutex_count--;
-
- if (UNIV_LIKELY(os_sync_mutex_inited)) {
- os_mutex_exit(os_sync_mutex);
- }
-}
diff --git a/storage/innodb_plugin/os/os0thread.c b/storage/innodb_plugin/os/os0thread.c
deleted file mode 100644
index 34818ada804..00000000000
--- a/storage/innodb_plugin/os/os0thread.c
+++ /dev/null
@@ -1,375 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file os/os0thread.c
-The interface to the operating system thread control primitives
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0thread.h"
-#ifdef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#include "srv0srv.h"
-#include "os0sync.h"
-
-/***************************************************************//**
-Compares two thread ids for equality.
-@return TRUE if equal */
-UNIV_INTERN
-ibool
-os_thread_eq(
-/*=========*/
- os_thread_id_t a, /*!< in: OS thread or thread id */
- os_thread_id_t b) /*!< in: OS thread or thread id */
-{
-#ifdef __WIN__
- if (a == b) {
- return(TRUE);
- }
-
- return(FALSE);
-#else
- if (pthread_equal(a, b)) {
- return(TRUE);
- }
-
- return(FALSE);
-#endif
-}
-
-/****************************************************************//**
-Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though!
-@return thread identifier as a number */
-UNIV_INTERN
-ulint
-os_thread_pf(
-/*=========*/
- os_thread_id_t a) /*!< in: OS thread identifier */
-{
-#ifdef UNIV_HPUX10
- /* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2,
- field3. We do not know if field1 determines the thread uniquely. */
-
- return((ulint)(a.field1));
-#else
- return((ulint)a);
-#endif
-}
-
-/*****************************************************************//**
-Returns the thread identifier of current thread. Currently the thread
-identifier in Unix is the thread handle itself. Note that in HP-UX
-pthread_t is a struct of 3 fields.
-@return current thread identifier */
-UNIV_INTERN
-os_thread_id_t
-os_thread_get_curr_id(void)
-/*=======================*/
-{
-#ifdef __WIN__
- return(GetCurrentThreadId());
-#else
- return(pthread_self());
-#endif
-}
-
-/****************************************************************//**
-Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns an ulint.
-@return handle to the thread */
-UNIV_INTERN
-os_thread_t
-os_thread_create(
-/*=============*/
-#ifndef __WIN__
- os_posix_f_t start_f,
-#else
- ulint (*start_f)(void*), /*!< in: pointer to function
- from which to start */
-#endif
- void* arg, /*!< in: argument to start
- function */
- os_thread_id_t* thread_id) /*!< out: id of the created
- thread, or NULL */
-{
-#ifdef __WIN__
- os_thread_t thread;
- DWORD win_thread_id;
-
- os_mutex_enter(os_sync_mutex);
- os_thread_count++;
- os_mutex_exit(os_sync_mutex);
-
- thread = CreateThread(NULL, /* no security attributes */
- 0, /* default size stack */
- (LPTHREAD_START_ROUTINE)start_f,
- arg,
- 0, /* thread runs immediately */
- &win_thread_id);
-
- if (srv_set_thread_priorities) {
-
- /* Set created thread priority the same as a normal query
- in MYSQL: we try to prevent starvation of threads by
- assigning same priority QUERY_PRIOR to all */
-
- ut_a(SetThreadPriority(thread, srv_query_thread_priority));
- }
-
- if (thread_id) {
- *thread_id = win_thread_id;
- }
-
- return(thread);
-#else
- int ret;
- os_thread_t pthread;
- pthread_attr_t attr;
-
-#ifndef UNIV_HPUX10
- pthread_attr_init(&attr);
-#endif
-
-#ifdef UNIV_AIX
- /* We must make sure a thread stack is at least 32 kB, otherwise
- InnoDB might crash; we do not know if the default stack size on
- AIX is always big enough. An empirical test on AIX-4.3 suggested
- the size was 96 kB, though. */
-
- ret = pthread_attr_setstacksize(&attr,
- (size_t)(PTHREAD_STACK_MIN
- + 32 * 1024));
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_attr_setstacksize"
- " returned %d\n", ret);
- exit(1);
- }
-#endif
-#ifdef __NETWARE__
- ret = pthread_attr_setstacksize(&attr,
- (size_t) NW_THD_STACKSIZE);
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_attr_setstacksize"
- " returned %d\n", ret);
- exit(1);
- }
-#endif
- os_mutex_enter(os_sync_mutex);
- os_thread_count++;
- os_mutex_exit(os_sync_mutex);
-
-#ifdef UNIV_HPUX10
- ret = pthread_create(&pthread, pthread_attr_default, start_f, arg);
-#else
- ret = pthread_create(&pthread, &attr, start_f, arg);
-#endif
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_create returned %d\n", ret);
- exit(1);
- }
-
-#ifndef UNIV_HPUX10
- pthread_attr_destroy(&attr);
-#endif
- if (srv_set_thread_priorities) {
-
- my_pthread_setprio(pthread, srv_query_thread_priority);
- }
-
- if (thread_id) {
- *thread_id = pthread;
- }
-
- return(pthread);
-#endif
-}
-
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
-void
-os_thread_exit(
-/*===========*/
- void* exit_value) /*!< in: exit value; in Windows this void*
- is cast as a DWORD */
-{
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Thread exits, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- os_mutex_enter(os_sync_mutex);
- os_thread_count--;
- os_mutex_exit(os_sync_mutex);
-
-#ifdef __WIN__
- ExitThread((DWORD)exit_value);
-#else
- pthread_detach(pthread_self());
- pthread_exit(exit_value);
-#endif
-}
-
-/*****************************************************************//**
-Returns handle to the current thread.
-@return current thread handle */
-UNIV_INTERN
-os_thread_t
-os_thread_get_curr(void)
-/*====================*/
-{
-#ifdef __WIN__
- return(GetCurrentThread());
-#else
- return(pthread_self());
-#endif
-}
-
-/*****************************************************************//**
-Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
-void
-os_thread_yield(void)
-/*=================*/
-{
-#if defined(__WIN__)
- Sleep(0);
-#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H))
- sched_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
- pthread_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ONE_ARG)
- pthread_yield(0);
-#else
- os_thread_sleep(0);
-#endif
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*****************************************************************//**
-The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
-void
-os_thread_sleep(
-/*============*/
- ulint tm) /*!< in: time in microseconds */
-{
-#ifdef __WIN__
- Sleep((DWORD) tm / 1000);
-#elif defined(__NETWARE__)
- delay(tm / 1000);
-#else
- struct timeval t;
-
- t.tv_sec = tm / 1000000;
- t.tv_usec = tm % 1000000;
-
- select(0, NULL, NULL, NULL, &t);
-#endif
-}
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Sets a thread priority. */
-UNIV_INTERN
-void
-os_thread_set_priority(
-/*===================*/
- os_thread_t handle, /*!< in: OS handle to the thread */
- ulint pri) /*!< in: priority */
-{
-#ifdef __WIN__
- int os_pri;
-
- if (pri == OS_THREAD_PRIORITY_BACKGROUND) {
- os_pri = THREAD_PRIORITY_BELOW_NORMAL;
- } else if (pri == OS_THREAD_PRIORITY_NORMAL) {
- os_pri = THREAD_PRIORITY_NORMAL;
- } else if (pri == OS_THREAD_PRIORITY_ABOVE_NORMAL) {
- os_pri = THREAD_PRIORITY_HIGHEST;
- } else {
- ut_error;
- }
-
- ut_a(SetThreadPriority(handle, os_pri));
-#else
- UT_NOT_USED(handle);
- UT_NOT_USED(pri);
-#endif
-}
-
-/******************************************************************//**
-Gets a thread priority.
-@return priority */
-UNIV_INTERN
-ulint
-os_thread_get_priority(
-/*===================*/
- os_thread_t handle __attribute__((unused)))
- /*!< in: OS handle to the thread */
-{
-#ifdef __WIN__
- int os_pri;
- ulint pri;
-
- os_pri = GetThreadPriority(handle);
-
- if (os_pri == THREAD_PRIORITY_BELOW_NORMAL) {
- pri = OS_THREAD_PRIORITY_BACKGROUND;
- } else if (os_pri == THREAD_PRIORITY_NORMAL) {
- pri = OS_THREAD_PRIORITY_NORMAL;
- } else if (os_pri == THREAD_PRIORITY_HIGHEST) {
- pri = OS_THREAD_PRIORITY_ABOVE_NORMAL;
- } else {
- ut_error;
- }
-
- return(pri);
-#else
- return(0);
-#endif
-}
-
-/******************************************************************//**
-Gets the last operating system error code for the calling thread.
-@return last error on Windows, 0 otherwise */
-UNIV_INTERN
-ulint
-os_thread_get_last_error(void)
-/*==========================*/
-{
-#ifdef __WIN__
- return(GetLastError());
-#else
- return(0);
-#endif
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/page/page0cur.c b/storage/innodb_plugin/page/page0cur.c
deleted file mode 100644
index f10f16a7dd9..00000000000
--- a/storage/innodb_plugin/page/page0cur.c
+++ /dev/null
@@ -1,1987 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file page/page0cur.c
-The page cursor
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "page0cur.h"
-#ifdef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
-#include "page0zip.h"
-#include "mtr0log.h"
-#include "log0recv.h"
-#include "ut0ut.h"
-#ifndef UNIV_HOTBACKUP
-#include "rem0cmp.h"
-
-#ifdef PAGE_CUR_ADAPT
-# ifdef UNIV_SEARCH_PERF_STAT
-static ulint page_cur_short_succ = 0;
-# endif /* UNIV_SEARCH_PERF_STAT */
-
-/*******************************************************************//**
-This is a linear congruential generator PRNG. Returns a pseudo random
-number between 0 and 2^64-1 inclusive. The formula and the constants
-being used are:
-X[n+1] = (a * X[n] + c) mod m
-where:
-X[0] = ut_time_us(NULL)
-a = 1103515245 (3^5 * 5 * 7 * 129749)
-c = 12345 (3 * 5 * 823)
-m = 18446744073709551616 (2^64)
-
-@return number between 0 and 2^64-1 */
-static
-ib_uint64_t
-page_cur_lcg_prng(void)
-/*===================*/
-{
-#define LCG_a 1103515245
-#define LCG_c 12345
- static ib_uint64_t lcg_current = 0;
- static ibool initialized = FALSE;
-
- if (!initialized) {
- lcg_current = (ib_uint64_t) ut_time_us(NULL);
- initialized = TRUE;
- }
-
- /* no need to "% 2^64" explicitly because lcg_current is
- 64 bit and this will be done anyway */
- lcg_current = LCG_a * lcg_current + LCG_c;
-
- return(lcg_current);
-}
-
-/****************************************************************//**
-Tries a search shortcut based on the last insert.
-@return TRUE on success */
-UNIV_INLINE
-ibool
-page_cur_try_search_shortcut(
-/*=========================*/
- const buf_block_t* block, /*!< in: index page */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint* iup_matched_fields,
- /*!< in/out: already matched
- fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- ulint* ilow_matched_fields,
- /*!< in/out: already matched
- fields in lower limit record */
- ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor) /*!< out: page cursor */
-{
- const rec_t* rec;
- const rec_t* next_rec;
- ulint low_match;
- ulint low_bytes;
- ulint up_match;
- ulint up_bytes;
-#ifdef UNIV_SEARCH_DEBUG
- page_cur_t cursor2;
-#endif
- ibool success = FALSE;
- const page_t* page = buf_block_get_frame(block);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(dtuple_check_typed(tuple));
-
- rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
- offsets = rec_get_offsets(rec, index, offsets,
- dtuple_get_n_fields(tuple), &heap);
-
- ut_ad(rec);
- ut_ad(page_rec_is_user_rec(rec));
-
- ut_pair_min(&low_match, &low_bytes,
- *ilow_matched_fields, *ilow_matched_bytes,
- *iup_matched_fields, *iup_matched_bytes);
-
- up_match = low_match;
- up_bytes = low_bytes;
-
- if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
- &low_match, &low_bytes) < 0) {
- goto exit_func;
- }
-
- next_rec = page_rec_get_next_const(rec);
- offsets = rec_get_offsets(next_rec, index, offsets,
- dtuple_get_n_fields(tuple), &heap);
-
- if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
- &up_match, &up_bytes) >= 0) {
- goto exit_func;
- }
-
- page_cur_position(rec, block, cursor);
-
-#ifdef UNIV_SEARCH_DEBUG
- page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG,
- iup_matched_fields,
- iup_matched_bytes,
- ilow_matched_fields,
- ilow_matched_bytes,
- &cursor2);
- ut_a(cursor2.rec == cursor->rec);
-
- if (!page_rec_is_supremum(next_rec)) {
-
- ut_a(*iup_matched_fields == up_match);
- ut_a(*iup_matched_bytes == up_bytes);
- }
-
- ut_a(*ilow_matched_fields == low_match);
- ut_a(*ilow_matched_bytes == low_bytes);
-#endif
- if (!page_rec_is_supremum(next_rec)) {
-
- *iup_matched_fields = up_match;
- *iup_matched_bytes = up_bytes;
- }
-
- *ilow_matched_fields = low_match;
- *ilow_matched_bytes = low_bytes;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- page_cur_short_succ++;
-#endif
- success = TRUE;
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(success);
-}
-
-#endif
-
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-/****************************************************************//**
-Checks if the nth field in a record is a character type field which extends
-the nth field in tuple, i.e., the field is longer or equal in length and has
-common first characters.
-@return TRUE if rec field extends tuple field */
-static
-ibool
-page_cur_rec_field_extends(
-/*=======================*/
- const dtuple_t* tuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: compare nth field */
-{
- const dtype_t* type;
- const dfield_t* dfield;
- const byte* rec_f;
- ulint rec_f_len;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- dfield = dtuple_get_nth_field(tuple, n);
-
- type = dfield_get_type(dfield);
-
- rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len);
-
- if (type->mtype == DATA_VARCHAR
- || type->mtype == DATA_CHAR
- || type->mtype == DATA_FIXBINARY
- || type->mtype == DATA_BINARY
- || type->mtype == DATA_BLOB
- || type->mtype == DATA_VARMYSQL
- || type->mtype == DATA_MYSQL) {
-
- if (dfield_get_len(dfield) != UNIV_SQL_NULL
- && rec_f_len != UNIV_SQL_NULL
- && rec_f_len >= dfield_get_len(dfield)
- && !cmp_data_data_slow(type->mtype, type->prtype,
- dfield_get_data(dfield),
- dfield_get_len(dfield),
- rec_f, dfield_get_len(dfield))) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-
-/****************************************************************//**
-Searches the right position for a page cursor. */
-UNIV_INTERN
-void
-page_cur_search_with_match(
-/*=======================*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- ulint* iup_matched_fields,
- /*!< in/out: already matched
- fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- ulint* ilow_matched_fields,
- /*!< in/out: already matched
- fields in lower limit record */
- ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor) /*!< out: page cursor */
-{
- ulint up;
- ulint low;
- ulint mid;
- const page_t* page;
- const page_dir_slot_t* slot;
- const rec_t* up_rec;
- const rec_t* low_rec;
- const rec_t* mid_rec;
- ulint up_matched_fields;
- ulint up_matched_bytes;
- ulint low_matched_fields;
- ulint low_matched_bytes;
- ulint cur_matched_fields;
- ulint cur_matched_bytes;
- int cmp;
-#ifdef UNIV_SEARCH_DEBUG
- int dbg_cmp;
- ulint dbg_matched_fields;
- ulint dbg_matched_bytes;
-#endif
-#ifdef UNIV_ZIP_DEBUG
- const page_zip_des_t* page_zip = buf_block_get_page_zip(block);
-#endif /* UNIV_ZIP_DEBUG */
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes
- && ilow_matched_fields && ilow_matched_bytes && cursor);
- ut_ad(dtuple_validate(tuple));
-#ifdef UNIV_DEBUG
-# ifdef PAGE_CUR_DBG
- if (mode != PAGE_CUR_DBG)
-# endif /* PAGE_CUR_DBG */
-# ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode != PAGE_CUR_LE_OR_EXTENDS)
-# endif /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || mode == PAGE_CUR_G || mode == PAGE_CUR_GE);
-#endif /* UNIV_DEBUG */
- page = buf_block_get_frame(block);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- page_check_dir(page);
-
-#ifdef PAGE_CUR_ADAPT
- if (page_is_leaf(page)
- && (mode == PAGE_CUR_LE)
- && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
- && (page_header_get_ptr(page, PAGE_LAST_INSERT))
- && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
-
- if (page_cur_try_search_shortcut(
- block, index, tuple,
- iup_matched_fields, iup_matched_bytes,
- ilow_matched_fields, ilow_matched_bytes,
- cursor)) {
- return;
- }
- }
-# ifdef PAGE_CUR_DBG
- if (mode == PAGE_CUR_DBG) {
- mode = PAGE_CUR_LE;
- }
-# endif
-#endif
-
- /* The following flag does not work for non-latin1 char sets because
- cmp_full_field does not tell how many bytes matched */
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-
- /* If mode PAGE_CUR_G is specified, we are trying to position the
- cursor to answer a query of the form "tuple < X", where tuple is
- the input parameter, and X denotes an arbitrary physical record on
- the page. We want to position the cursor on the first X which
- satisfies the condition. */
-
- up_matched_fields = *iup_matched_fields;
- up_matched_bytes = *iup_matched_bytes;
- low_matched_fields = *ilow_matched_fields;
- low_matched_bytes = *ilow_matched_bytes;
-
- /* Perform binary search. First the search is done through the page
- directory, after that as a linear search in the list of records
- owned by the upper limit directory slot. */
-
- low = 0;
- up = page_dir_get_n_slots(page) - 1;
-
- /* Perform binary search until the lower and upper limit directory
- slots come to the distance 1 of each other */
-
- while (up - low > 1) {
- mid = (low + up) / 2;
- slot = page_dir_get_nth_slot(page, mid);
- mid_rec = page_dir_slot_get_rec(slot);
-
- ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
- low_matched_fields, low_matched_bytes,
- up_matched_fields, up_matched_bytes);
-
- offsets = rec_get_offsets(mid_rec, index, offsets,
- dtuple_get_n_fields_cmp(tuple),
- &heap);
-
- cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
- &cur_matched_fields,
- &cur_matched_bytes);
- if (UNIV_LIKELY(cmp > 0)) {
-low_slot_match:
- low = mid;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
-
- } else if (UNIV_EXPECT(cmp, -1)) {
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode == PAGE_CUR_LE_OR_EXTENDS
- && page_cur_rec_field_extends(
- tuple, mid_rec, offsets,
- cur_matched_fields)) {
-
- goto low_slot_match;
- }
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-up_slot_match:
- up = mid;
- up_matched_fields = cur_matched_fields;
- up_matched_bytes = cur_matched_bytes;
-
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- || mode == PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- ) {
-
- goto low_slot_match;
- } else {
-
- goto up_slot_match;
- }
- }
-
- slot = page_dir_get_nth_slot(page, low);
- low_rec = page_dir_slot_get_rec(slot);
- slot = page_dir_get_nth_slot(page, up);
- up_rec = page_dir_slot_get_rec(slot);
-
- /* Perform linear search until the upper and lower records come to
- distance 1 of each other. */
-
- while (page_rec_get_next_const(low_rec) != up_rec) {
-
- mid_rec = page_rec_get_next_const(low_rec);
-
- ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
- low_matched_fields, low_matched_bytes,
- up_matched_fields, up_matched_bytes);
-
- offsets = rec_get_offsets(mid_rec, index, offsets,
- dtuple_get_n_fields_cmp(tuple),
- &heap);
-
- cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
- &cur_matched_fields,
- &cur_matched_bytes);
- if (UNIV_LIKELY(cmp > 0)) {
-low_rec_match:
- low_rec = mid_rec;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
-
- } else if (UNIV_EXPECT(cmp, -1)) {
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode == PAGE_CUR_LE_OR_EXTENDS
- && page_cur_rec_field_extends(
- tuple, mid_rec, offsets,
- cur_matched_fields)) {
-
- goto low_rec_match;
- }
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-up_rec_match:
- up_rec = mid_rec;
- up_matched_fields = cur_matched_fields;
- up_matched_bytes = cur_matched_bytes;
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- || mode == PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- ) {
-
- goto low_rec_match;
- } else {
-
- goto up_rec_match;
- }
- }
-
-#ifdef UNIV_SEARCH_DEBUG
-
- /* Check that the lower and upper limit records have the
- right alphabetical order compared to tuple. */
- dbg_matched_fields = 0;
- dbg_matched_bytes = 0;
-
- offsets = rec_get_offsets(low_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
- &dbg_matched_fields,
- &dbg_matched_bytes);
- if (mode == PAGE_CUR_G) {
- ut_a(dbg_cmp >= 0);
- } else if (mode == PAGE_CUR_GE) {
- ut_a(dbg_cmp == 1);
- } else if (mode == PAGE_CUR_L) {
- ut_a(dbg_cmp == 1);
- } else if (mode == PAGE_CUR_LE) {
- ut_a(dbg_cmp >= 0);
- }
-
- if (!page_rec_is_infimum(low_rec)) {
-
- ut_a(low_matched_fields == dbg_matched_fields);
- ut_a(low_matched_bytes == dbg_matched_bytes);
- }
-
- dbg_matched_fields = 0;
- dbg_matched_bytes = 0;
-
- offsets = rec_get_offsets(up_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
- &dbg_matched_fields,
- &dbg_matched_bytes);
- if (mode == PAGE_CUR_G) {
- ut_a(dbg_cmp == -1);
- } else if (mode == PAGE_CUR_GE) {
- ut_a(dbg_cmp <= 0);
- } else if (mode == PAGE_CUR_L) {
- ut_a(dbg_cmp <= 0);
- } else if (mode == PAGE_CUR_LE) {
- ut_a(dbg_cmp == -1);
- }
-
- if (!page_rec_is_supremum(up_rec)) {
-
- ut_a(up_matched_fields == dbg_matched_fields);
- ut_a(up_matched_bytes == dbg_matched_bytes);
- }
-#endif
- if (mode <= PAGE_CUR_GE) {
- page_cur_position(up_rec, block, cursor);
- } else {
- page_cur_position(low_rec, block, cursor);
- }
-
- *iup_matched_fields = up_matched_fields;
- *iup_matched_bytes = up_matched_bytes;
- *ilow_matched_fields = low_matched_fields;
- *ilow_matched_bytes = low_matched_bytes;
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***********************************************************//**
-Positions a page cursor on a randomly chosen user record on a page. If there
-are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
-void
-page_cur_open_on_rnd_user_rec(
-/*==========================*/
- buf_block_t* block, /*!< in: page */
- page_cur_t* cursor) /*!< out: page cursor */
-{
- ulint rnd;
- ulint n_recs = page_get_n_recs(buf_block_get_frame(block));
-
- page_cur_set_before_first(block, cursor);
-
- if (UNIV_UNLIKELY(n_recs == 0)) {
-
- return;
- }
-
- rnd = (ulint) (page_cur_lcg_prng() % n_recs);
-
- do {
- page_cur_move_to_next(cursor);
- } while (rnd--);
-}
-
-/***********************************************************//**
-Writes the log record of a record insert on a page. */
-static
-void
-page_cur_insert_rec_write_log(
-/*==========================*/
- rec_t* insert_rec, /*!< in: inserted physical record */
- ulint rec_size, /*!< in: insert_rec size */
- rec_t* cursor_rec, /*!< in: record the
- cursor is pointing to */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint cur_rec_size;
- ulint extra_size;
- ulint cur_extra_size;
- const byte* ins_ptr;
- byte* log_ptr;
- const byte* log_end;
- ulint i;
-
- ut_a(rec_size < UNIV_PAGE_SIZE);
- ut_ad(page_align(insert_rec) == page_align(cursor_rec));
- ut_ad(!page_rec_is_comp(insert_rec)
- == !dict_table_is_comp(index->table));
-
- {
- mem_heap_t* heap = NULL;
- ulint cur_offs_[REC_OFFS_NORMAL_SIZE];
- ulint ins_offs_[REC_OFFS_NORMAL_SIZE];
-
- ulint* cur_offs;
- ulint* ins_offs;
-
- rec_offs_init(cur_offs_);
- rec_offs_init(ins_offs_);
-
- cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
- ULINT_UNDEFINED, &heap);
- ins_offs = rec_get_offsets(insert_rec, index, ins_offs_,
- ULINT_UNDEFINED, &heap);
-
- extra_size = rec_offs_extra_size(ins_offs);
- cur_extra_size = rec_offs_extra_size(cur_offs);
- ut_ad(rec_size == rec_offs_size(ins_offs));
- cur_rec_size = rec_offs_size(cur_offs);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- ins_ptr = insert_rec - extra_size;
-
- i = 0;
-
- if (cur_extra_size == extra_size) {
- ulint min_rec_size = ut_min(cur_rec_size, rec_size);
-
- const byte* cur_ptr = cursor_rec - cur_extra_size;
-
- /* Find out the first byte in insert_rec which differs from
- cursor_rec; skip the bytes in the record info */
-
- do {
- if (*ins_ptr == *cur_ptr) {
- i++;
- ins_ptr++;
- cur_ptr++;
- } else if ((i < extra_size)
- && (i >= extra_size
- - page_rec_get_base_extra_size
- (insert_rec))) {
- i = extra_size;
- ins_ptr = insert_rec;
- cur_ptr = cursor_rec;
- } else {
- break;
- }
- } while (i < min_rec_size);
- }
-
- if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
-
- if (page_rec_is_comp(insert_rec)) {
- log_ptr = mlog_open_and_write_index(
- mtr, insert_rec, index, MLOG_COMP_REC_INSERT,
- 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
- if (UNIV_UNLIKELY(!log_ptr)) {
- /* Logging in mtr is switched off
- during crash recovery: in that case
- mlog_open returns NULL */
- return;
- }
- } else {
- log_ptr = mlog_open(mtr, 11
- + 2 + 5 + 1 + 5 + 5
- + MLOG_BUF_MARGIN);
- if (UNIV_UNLIKELY(!log_ptr)) {
- /* Logging in mtr is switched off
- during crash recovery: in that case
- mlog_open returns NULL */
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(
- insert_rec, MLOG_REC_INSERT, log_ptr, mtr);
- }
-
- log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
- /* Write the cursor rec offset as a 2-byte ulint */
- mach_write_to_2(log_ptr, page_offset(cursor_rec));
- log_ptr += 2;
- } else {
- log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash
- recovery: in that case mlog_open returns NULL */
- return;
- }
- log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
- }
-
- if (page_rec_is_comp(insert_rec)) {
- if (UNIV_UNLIKELY
- (rec_get_info_and_status_bits(insert_rec, TRUE)
- != rec_get_info_and_status_bits(cursor_rec, TRUE))) {
-
- goto need_extra_info;
- }
- } else {
- if (UNIV_UNLIKELY
- (rec_get_info_and_status_bits(insert_rec, FALSE)
- != rec_get_info_and_status_bits(cursor_rec, FALSE))) {
-
- goto need_extra_info;
- }
- }
-
- if (extra_size != cur_extra_size || rec_size != cur_rec_size) {
-need_extra_info:
- /* Write the record end segment length
- and the extra info storage flag */
- log_ptr += mach_write_compressed(log_ptr,
- 2 * (rec_size - i) + 1);
-
- /* Write the info bits */
- mach_write_to_1(log_ptr,
- rec_get_info_and_status_bits(
- insert_rec,
- page_rec_is_comp(insert_rec)));
- log_ptr++;
-
- /* Write the record origin offset */
- log_ptr += mach_write_compressed(log_ptr, extra_size);
-
- /* Write the mismatch index */
- log_ptr += mach_write_compressed(log_ptr, i);
-
- ut_a(i < UNIV_PAGE_SIZE);
- ut_a(extra_size < UNIV_PAGE_SIZE);
- } else {
- /* Write the record end segment length
- and the extra info storage flag */
- log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i));
- }
-
- /* Write to the log the inserted index record end segment which
- differs from the cursor record */
-
- rec_size -= i;
-
- if (log_ptr + rec_size <= log_end) {
- memcpy(log_ptr, ins_ptr, rec_size);
- mlog_close(mtr, log_ptr + rec_size);
- } else {
- mlog_close(mtr, log_ptr);
- ut_a(rec_size < UNIV_PAGE_SIZE);
- mlog_catenate_string(mtr, ins_ptr, rec_size);
- }
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_cur_insert_rec_write_log(ins_rec,size,cur,index,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a log record of a record insert on a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_cur_parse_insert_rec(
-/*======================*/
- ibool is_short,/*!< in: TRUE if short inserts */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ulint origin_offset;
- ulint end_seg_len;
- ulint mismatch_index;
- page_t* page;
- rec_t* cursor_rec;
- byte buf1[1024];
- byte* buf;
- byte* ptr2 = ptr;
- ulint info_and_status_bits = 0; /* remove warning */
- page_cur_t cursor;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- page = block ? buf_block_get_frame(block) : NULL;
-
- if (is_short) {
- cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
- } else {
- ulint offset;
-
- /* Read the cursor rec offset as a 2-byte ulint */
-
- if (UNIV_UNLIKELY(end_ptr < ptr + 2)) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- cursor_rec = page + offset;
-
- if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) {
-
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (end_seg_len & 0x1UL) {
- /* Read the info bits */
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- info_and_status_bits = mach_read_from_1(ptr);
- ptr++;
-
- ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ut_a(origin_offset < UNIV_PAGE_SIZE);
-
- ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ut_a(mismatch_index < UNIV_PAGE_SIZE);
- }
-
- if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) {
-
- return(NULL);
- }
-
- if (!block) {
-
- return(ptr + (end_seg_len >> 1));
- }
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
- ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
-
- /* Read from the log the inserted index record end segment which
- differs from the cursor record */
-
- offsets = rec_get_offsets(cursor_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!(end_seg_len & 0x1UL)) {
- info_and_status_bits = rec_get_info_and_status_bits(
- cursor_rec, page_is_comp(page));
- origin_offset = rec_offs_extra_size(offsets);
- mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1);
- }
-
- end_seg_len >>= 1;
-
- if (mismatch_index + end_seg_len < sizeof buf1) {
- buf = buf1;
- } else {
- buf = mem_alloc(mismatch_index + end_seg_len);
- }
-
- /* Build the inserted record to buf */
-
- if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "Is short %lu, info_and_status_bits %lu, offset %lu, "
- "o_offset %lu\n"
- "mismatch index %lu, end_seg_len %lu\n"
- "parsed len %lu\n",
- (ulong) is_short, (ulong) info_and_status_bits,
- (ulong) page_offset(cursor_rec),
- (ulong) origin_offset,
- (ulong) mismatch_index, (ulong) end_seg_len,
- (ulong) (ptr - ptr2));
-
- fputs("Dump of 300 bytes of log:\n", stderr);
- ut_print_buf(stderr, ptr2, 300);
- putc('\n', stderr);
-
- buf_page_print(page, 0);
-
- ut_error;
- }
-
- ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
- ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
-
- if (page_is_comp(page)) {
- rec_set_info_and_status_bits(buf + origin_offset,
- info_and_status_bits);
- } else {
- rec_set_info_bits_old(buf + origin_offset,
- info_and_status_bits);
- }
-
- page_cur_position(cursor_rec, block, &cursor);
-
- offsets = rec_get_offsets(buf + origin_offset, index, offsets,
- ULINT_UNDEFINED, &heap);
- if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor,
- buf + origin_offset,
- index, offsets, mtr))) {
- /* The redo log record should only have been written
- after the write was successful. */
- ut_error;
- }
-
- if (buf != buf1) {
-
- mem_free(buf);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(ptr + end_seg_len);
-}
-
-/***********************************************************//**
-Inserts a record next to page cursor on an uncompressed page.
-Returns pointer to inserted record if succeed, i.e., enough
-space available, NULL otherwise. The cursor stays at the same position.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
-rec_t*
-page_cur_insert_rec_low(
-/*====================*/
- rec_t* current_rec,/*!< in: pointer to current record after
- which the new record is inserted */
- dict_index_t* index, /*!< in: record descriptor */
- const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
-{
- byte* insert_buf;
- ulint rec_size;
- page_t* page; /*!< the relevant page */
- rec_t* last_insert; /*!< cursor position at previous
- insert */
- rec_t* free_rec; /*!< a free record that was reused,
- or NULL */
- rec_t* insert_rec; /*!< inserted record */
- ulint heap_no; /*!< heap number of the inserted
- record */
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- page = page_align(current_rec);
- ut_ad(dict_table_is_comp(index->table)
- == (ibool) !!page_is_comp(page));
-
- ut_ad(!page_rec_is_supremum(current_rec));
-
- /* 1. Get the size of the physical record in the page */
- rec_size = rec_offs_size(offsets);
-
-#ifdef UNIV_DEBUG_VALGRIND
- {
- const void* rec_start
- = rec - rec_offs_extra_size(offsets);
- ulint extra_size
- = rec_offs_extra_size(offsets)
- - (rec_offs_comp(offsets)
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES);
-
- /* All data bytes of the record must be valid. */
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- /* The variable-length header must be valid. */
- UNIV_MEM_ASSERT_RW(rec_start, extra_size);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- /* 2. Try to find suitable space from page memory management */
-
- free_rec = page_header_get_ptr(page, PAGE_FREE);
- if (UNIV_LIKELY_NULL(free_rec)) {
- /* Try to allocate from the head of the free list. */
- ulint foffsets_[REC_OFFS_NORMAL_SIZE];
- ulint* foffsets = foffsets_;
- mem_heap_t* heap = NULL;
-
- rec_offs_init(foffsets_);
-
- foffsets = rec_get_offsets(free_rec, index, foffsets,
- ULINT_UNDEFINED, &heap);
- if (rec_offs_size(foffsets) < rec_size) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- goto use_heap;
- }
-
- insert_buf = free_rec - rec_offs_extra_size(foffsets);
-
- if (page_is_comp(page)) {
- heap_no = rec_get_heap_no_new(free_rec);
- page_mem_alloc_free(page, NULL,
- rec_get_next_ptr(free_rec, TRUE),
- rec_size);
- } else {
- heap_no = rec_get_heap_no_old(free_rec);
- page_mem_alloc_free(page, NULL,
- rec_get_next_ptr(free_rec, FALSE),
- rec_size);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- } else {
-use_heap:
- free_rec = NULL;
- insert_buf = page_mem_alloc_heap(page, NULL,
- rec_size, &heap_no);
-
- if (UNIV_UNLIKELY(insert_buf == NULL)) {
- return(NULL);
- }
- }
-
- /* 3. Create the record */
- insert_rec = rec_copy(insert_buf, rec, offsets);
- rec_offs_make_valid(insert_rec, index, offsets);
-
- /* 4. Insert the record in the linked list of records */
- ut_ad(current_rec != insert_rec);
-
- {
- /* next record after current before the insertion */
- rec_t* next_rec = page_rec_get_next(current_rec);
-#ifdef UNIV_DEBUG
- if (page_is_comp(page)) {
- ut_ad(rec_get_status(current_rec)
- <= REC_STATUS_INFIMUM);
- ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
- ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
- }
-#endif
- page_rec_set_next(insert_rec, next_rec);
- page_rec_set_next(current_rec, insert_rec);
- }
-
- page_header_set_field(page, NULL, PAGE_N_RECS,
- 1 + page_get_n_recs(page));
-
- /* 5. Set the n_owned field in the inserted record to zero,
- and set the heap_no field */
- if (page_is_comp(page)) {
- rec_set_n_owned_new(insert_rec, NULL, 0);
- rec_set_heap_no_new(insert_rec, heap_no);
- } else {
- rec_set_n_owned_old(insert_rec, 0);
- rec_set_heap_no_old(insert_rec, heap_no);
- }
-
- UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
- rec_offs_size(offsets));
- /* 6. Update the last insertion info in page header */
-
- last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
- ut_ad(!last_insert || !page_is_comp(page)
- || rec_get_node_ptr_flag(last_insert)
- == rec_get_node_ptr_flag(insert_rec));
-
- if (UNIV_UNLIKELY(last_insert == NULL)) {
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
-
- } else if ((last_insert == current_rec)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_LEFT)) {
-
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_RIGHT);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
-
- } else if ((page_rec_get_next(insert_rec) == last_insert)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_RIGHT)) {
-
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_LEFT);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
- } else {
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
- }
-
- page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec);
-
- /* 7. It remains to update the owner record. */
- {
- rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
- ulint n_owned;
- if (page_is_comp(page)) {
- n_owned = rec_get_n_owned_new(owner_rec);
- rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
- } else {
- n_owned = rec_get_n_owned_old(owner_rec);
- rec_set_n_owned_old(owner_rec, n_owned + 1);
- }
-
- /* 8. Now we have incremented the n_owned field of the owner
- record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
- we have to split the corresponding directory slot in two. */
-
- if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
- page_dir_split_slot(
- page, NULL,
- page_dir_find_owner_slot(owner_rec));
- }
- }
-
- /* 9. Write log record of the insert */
- if (UNIV_LIKELY(mtr != NULL)) {
- page_cur_insert_rec_write_log(insert_rec, rec_size,
- current_rec, index, mtr);
- }
-
- return(insert_rec);
-}
-
-/***********************************************************//**
-Compresses or reorganizes a page after an optimistic insert.
-@return rec if succeed, NULL otherwise */
-static
-rec_t*
-page_cur_insert_rec_zip_reorg(
-/*==========================*/
- rec_t** current_rec,/*!< in/out: pointer to current record after
- which the new record is inserted */
- buf_block_t* block, /*!< in: buffer block */
- dict_index_t* index, /*!< in: record descriptor */
- rec_t* rec, /*!< in: inserted record */
- page_t* page, /*!< in: uncompressed page */
- page_zip_des_t* page_zip,/*!< in: compressed page */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
-{
- ulint pos;
-
- /* Recompress or reorganize and recompress the page. */
- if (UNIV_LIKELY(page_zip_compress(page_zip, page, index, mtr))) {
- return(rec);
- }
-
- /* Before trying to reorganize the page,
- store the number of preceding records on the page. */
- pos = page_rec_get_n_recs_before(rec);
-
- if (page_zip_reorganize(block, index, mtr)) {
- /* The page was reorganized: Find rec by seeking to pos,
- and update *current_rec. */
- rec = page + PAGE_NEW_INFIMUM;
-
- while (--pos) {
- rec = page + rec_get_next_offs(rec, TRUE);
- }
-
- *current_rec = rec;
- rec = page + rec_get_next_offs(rec, TRUE);
-
- return(rec);
- }
-
- /* Out of space: restore the page */
- if (!page_zip_decompress(page_zip, page, FALSE)) {
- ut_error; /* Memory corrupted? */
- }
- ut_ad(page_validate(page, index));
- return(NULL);
-}
-
-/***********************************************************//**
-Inserts a record next to page cursor on a compressed and uncompressed
-page. Returns pointer to inserted record if succeed, i.e.,
-enough space available, NULL otherwise.
-The cursor stays at the same position.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
-rec_t*
-page_cur_insert_rec_zip(
-/*====================*/
- rec_t** current_rec,/*!< in/out: pointer to current record after
- which the new record is inserted */
- buf_block_t* block, /*!< in: buffer block of *current_rec */
- dict_index_t* index, /*!< in: record descriptor */
- const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
-{
- byte* insert_buf;
- ulint rec_size;
- page_t* page; /*!< the relevant page */
- rec_t* last_insert; /*!< cursor position at previous
- insert */
- rec_t* free_rec; /*!< a free record that was reused,
- or NULL */
- rec_t* insert_rec; /*!< inserted record */
- ulint heap_no; /*!< heap number of the inserted
- record */
- page_zip_des_t* page_zip;
-
- page_zip = buf_block_get_page_zip(block);
- ut_ad(page_zip);
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- page = page_align(*current_rec);
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(page_is_comp(page));
-
- ut_ad(!page_rec_is_supremum(*current_rec));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- /* 1. Get the size of the physical record in the page */
- rec_size = rec_offs_size(offsets);
-
-#ifdef UNIV_DEBUG_VALGRIND
- {
- const void* rec_start
- = rec - rec_offs_extra_size(offsets);
- ulint extra_size
- = rec_offs_extra_size(offsets)
- - (rec_offs_comp(offsets)
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES);
-
- /* All data bytes of the record must be valid. */
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- /* The variable-length header must be valid. */
- UNIV_MEM_ASSERT_RW(rec_start, extra_size);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- /* 2. Try to find suitable space from page memory management */
- if (!page_zip_available(page_zip, dict_index_is_clust(index),
- rec_size, 1)) {
-
- /* Try compressing the whole page afterwards. */
- insert_rec = page_cur_insert_rec_low(*current_rec,
- index, rec, offsets,
- NULL);
-
- if (UNIV_LIKELY(insert_rec != NULL)) {
- insert_rec = page_cur_insert_rec_zip_reorg(
- current_rec, block, index, insert_rec,
- page, page_zip, mtr);
- }
-
- return(insert_rec);
- }
-
- free_rec = page_header_get_ptr(page, PAGE_FREE);
- if (UNIV_LIKELY_NULL(free_rec)) {
- /* Try to allocate from the head of the free list. */
- lint extra_size_diff;
- ulint foffsets_[REC_OFFS_NORMAL_SIZE];
- ulint* foffsets = foffsets_;
- mem_heap_t* heap = NULL;
-
- rec_offs_init(foffsets_);
-
- foffsets = rec_get_offsets(free_rec, index, foffsets,
- ULINT_UNDEFINED, &heap);
- if (rec_offs_size(foffsets) < rec_size) {
-too_small:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- goto use_heap;
- }
-
- insert_buf = free_rec - rec_offs_extra_size(foffsets);
-
- /* On compressed pages, do not relocate records from
- the free list. If extra_size would grow, use the heap. */
- extra_size_diff
- = rec_offs_extra_size(offsets)
- - rec_offs_extra_size(foffsets);
-
- if (UNIV_UNLIKELY(extra_size_diff < 0)) {
- /* Add an offset to the extra_size. */
- if (rec_offs_size(foffsets)
- < rec_size - extra_size_diff) {
-
- goto too_small;
- }
-
- insert_buf -= extra_size_diff;
- } else if (UNIV_UNLIKELY(extra_size_diff)) {
- /* Do not allow extra_size to grow */
-
- goto too_small;
- }
-
- heap_no = rec_get_heap_no_new(free_rec);
- page_mem_alloc_free(page, page_zip,
- rec_get_next_ptr(free_rec, TRUE),
- rec_size);
-
- if (!page_is_leaf(page)) {
- /* Zero out the node pointer of free_rec,
- in case it will not be overwritten by
- insert_rec. */
-
- ut_ad(rec_size > REC_NODE_PTR_SIZE);
-
- if (rec_offs_extra_size(foffsets)
- + rec_offs_data_size(foffsets) > rec_size) {
-
- memset(rec_get_end(free_rec, foffsets)
- - REC_NODE_PTR_SIZE, 0,
- REC_NODE_PTR_SIZE);
- }
- } else if (dict_index_is_clust(index)) {
- /* Zero out the DB_TRX_ID and DB_ROLL_PTR
- columns of free_rec, in case it will not be
- overwritten by insert_rec. */
-
- ulint trx_id_col;
- ulint trx_id_offs;
- ulint len;
-
- trx_id_col = dict_index_get_sys_col_pos(index,
- DATA_TRX_ID);
- ut_ad(trx_id_col > 0);
- ut_ad(trx_id_col != ULINT_UNDEFINED);
-
- trx_id_offs = rec_get_nth_field_offs(foffsets,
- trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
-
- if (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + trx_id_offs
- + rec_offs_extra_size(foffsets) > rec_size) {
- /* We will have to zero out the
- DB_TRX_ID and DB_ROLL_PTR, because
- they will not be fully overwritten by
- insert_rec. */
-
- memset(free_rec + trx_id_offs, 0,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- }
-
- ut_ad(free_rec + trx_id_offs + DATA_TRX_ID_LEN
- == rec_get_nth_field(free_rec, foffsets,
- trx_id_col + 1, &len));
- ut_ad(len == DATA_ROLL_PTR_LEN);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- } else {
-use_heap:
- free_rec = NULL;
- insert_buf = page_mem_alloc_heap(page, page_zip,
- rec_size, &heap_no);
-
- if (UNIV_UNLIKELY(insert_buf == NULL)) {
- return(NULL);
- }
-
- page_zip_dir_add_slot(page_zip, dict_index_is_clust(index));
- }
-
- /* 3. Create the record */
- insert_rec = rec_copy(insert_buf, rec, offsets);
- rec_offs_make_valid(insert_rec, index, offsets);
-
- /* 4. Insert the record in the linked list of records */
- ut_ad(*current_rec != insert_rec);
-
- {
- /* next record after current before the insertion */
- rec_t* next_rec = page_rec_get_next(*current_rec);
- ut_ad(rec_get_status(*current_rec)
- <= REC_STATUS_INFIMUM);
- ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
- ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
-
- page_rec_set_next(insert_rec, next_rec);
- page_rec_set_next(*current_rec, insert_rec);
- }
-
- page_header_set_field(page, page_zip, PAGE_N_RECS,
- 1 + page_get_n_recs(page));
-
- /* 5. Set the n_owned field in the inserted record to zero,
- and set the heap_no field */
- rec_set_n_owned_new(insert_rec, NULL, 0);
- rec_set_heap_no_new(insert_rec, heap_no);
-
- UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
- rec_offs_size(offsets));
-
- page_zip_dir_insert(page_zip, *current_rec, free_rec, insert_rec);
-
- /* 6. Update the last insertion info in page header */
-
- last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
- ut_ad(!last_insert
- || rec_get_node_ptr_flag(last_insert)
- == rec_get_node_ptr_flag(insert_rec));
-
- if (UNIV_UNLIKELY(last_insert == NULL)) {
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
-
- } else if ((last_insert == *current_rec)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_LEFT)) {
-
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_RIGHT);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
-
- } else if ((page_rec_get_next(insert_rec) == last_insert)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_RIGHT)) {
-
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_LEFT);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
- } else {
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
- }
-
- page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
-
- /* 7. It remains to update the owner record. */
- {
- rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
- ulint n_owned;
-
- n_owned = rec_get_n_owned_new(owner_rec);
- rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
-
- /* 8. Now we have incremented the n_owned field of the owner
- record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
- we have to split the corresponding directory slot in two. */
-
- if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
- page_dir_split_slot(
- page, page_zip,
- page_dir_find_owner_slot(owner_rec));
- }
- }
-
- page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
-
- /* 9. Write log record of the insert */
- if (UNIV_LIKELY(mtr != NULL)) {
- page_cur_insert_rec_write_log(insert_rec, rec_size,
- *current_rec, index, mtr);
- }
-
- return(insert_rec);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Writes a log record of copying a record list end to a new created page.
-@return 4-byte field where to write the log data length, or NULL if
-logging is disabled */
-UNIV_INLINE
-byte*
-page_copy_rec_list_to_created_page_write_log(
-/*=========================================*/
- page_t* page, /*!< in: index page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, page, index,
- page_is_comp(page)
- ? MLOG_COMP_LIST_END_COPY_CREATED
- : MLOG_LIST_END_COPY_CREATED, 4);
- if (UNIV_LIKELY(log_ptr != NULL)) {
- mlog_close(mtr, log_ptr + 4);
- }
-
- return(log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Parses a log record of copying a record list end to a new created page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_copy_rec_list_to_created_page(
-/*=====================================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- byte* rec_end;
- ulint log_data_len;
- page_t* page;
- page_zip_des_t* page_zip;
-
- if (ptr + 4 > end_ptr) {
-
- return(NULL);
- }
-
- log_data_len = mach_read_from_4(ptr);
- ptr += 4;
-
- rec_end = ptr + log_data_len;
-
- if (rec_end > end_ptr) {
-
- return(NULL);
- }
-
- if (!block) {
-
- return(rec_end);
- }
-
- while (ptr < rec_end) {
- ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
- block, index, mtr);
- }
-
- ut_a(ptr == rec_end);
-
- page = buf_block_get_frame(block);
- page_zip = buf_block_get_page_zip(block);
-
- page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
-
- return(rec_end);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
-UNIV_INTERN
-void
-page_copy_rec_list_end_to_created_page(
-/*===================================*/
- page_t* new_page, /*!< in/out: index page to copy to */
- rec_t* rec, /*!< in: first record to copy */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_dir_slot_t* slot = 0; /* remove warning */
- byte* heap_top;
- rec_t* insert_rec = 0; /* remove warning */
- rec_t* prev_rec;
- ulint count;
- ulint n_recs;
- ulint slot_index;
- ulint rec_size;
- ulint log_mode;
- byte* log_ptr;
- ulint log_data_len;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
- ut_ad(page_align(rec) != new_page);
- ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
-
- if (page_rec_is_infimum(rec)) {
-
- rec = page_rec_get_next(rec);
- }
-
- if (page_rec_is_supremum(rec)) {
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- /* To pass the debug tests we have to set these dummy values
- in the debug version */
- page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
- page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
- new_page + UNIV_PAGE_SIZE - 1);
-#endif
-
- log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
- index, mtr);
-
- log_data_len = dyn_array_get_data_size(&(mtr->log));
-
- /* Individual inserts are logged in a shorter form */
-
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
-
- prev_rec = page_get_infimum_rec(new_page);
- if (page_is_comp(new_page)) {
- heap_top = new_page + PAGE_NEW_SUPREMUM_END;
- } else {
- heap_top = new_page + PAGE_OLD_SUPREMUM_END;
- }
- count = 0;
- slot_index = 0;
- n_recs = 0;
-
- do {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- insert_rec = rec_copy(heap_top, rec, offsets);
-
- if (page_is_comp(new_page)) {
- rec_set_next_offs_new(prev_rec,
- page_offset(insert_rec));
-
- rec_set_n_owned_new(insert_rec, NULL, 0);
- rec_set_heap_no_new(insert_rec,
- PAGE_HEAP_NO_USER_LOW + n_recs);
- } else {
- rec_set_next_offs_old(prev_rec,
- page_offset(insert_rec));
-
- rec_set_n_owned_old(insert_rec, 0);
- rec_set_heap_no_old(insert_rec,
- PAGE_HEAP_NO_USER_LOW + n_recs);
- }
-
- count++;
- n_recs++;
-
- if (UNIV_UNLIKELY
- (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) {
-
- slot_index++;
-
- slot = page_dir_get_nth_slot(new_page, slot_index);
-
- page_dir_slot_set_rec(slot, insert_rec);
- page_dir_slot_set_n_owned(slot, NULL, count);
-
- count = 0;
- }
-
- rec_size = rec_offs_size(offsets);
-
- ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
-
- heap_top += rec_size;
-
- page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
- index, mtr);
- prev_rec = insert_rec;
- rec = page_rec_get_next(rec);
- } while (!page_rec_is_supremum(rec));
-
- if ((slot_index > 0) && (count + 1
- + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
- <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
- /* We can merge the two last dir slots. This operation is
- here to make this function imitate exactly the equivalent
- task made using page_cur_insert_rec, which we use in database
- recovery to reproduce the task performed by this function.
- To be able to check the correctness of recovery, it is good
- that it imitates exactly. */
-
- count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
-
- page_dir_slot_set_n_owned(slot, NULL, 0);
-
- slot_index--;
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
-
- ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
-
- if (UNIV_LIKELY(log_ptr != NULL)) {
- mach_write_to_4(log_ptr, log_data_len);
- }
-
- if (page_is_comp(new_page)) {
- rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM);
- } else {
- rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
- }
-
- slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
-
- page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
- page_dir_slot_set_n_owned(slot, NULL, count + 1);
-
- page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
- page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top);
- page_dir_set_n_heap(new_page, NULL, PAGE_HEAP_NO_USER_LOW + n_recs);
- page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
-
- page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
- page_header_set_field(new_page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
-
- /* Restore the log mode */
-
- mtr_set_log_mode(mtr, log_mode);
-}
-
-/***********************************************************//**
-Writes log record of a record delete on a page. */
-UNIV_INLINE
-void
-page_cur_delete_rec_write_log(
-/*==========================*/
- rec_t* rec, /*!< in: record to be deleted */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index,
- page_rec_is_comp(rec)
- ? MLOG_COMP_REC_DELETE
- : MLOG_REC_DELETE, 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- /* Write the cursor rec offset as a 2-byte ulint */
- mach_write_to_2(log_ptr, page_offset(rec));
-
- mlog_close(mtr, log_ptr + 2);
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_cur_delete_rec_write_log(rec,index,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses log record of a record delete on a page.
-@return pointer to record end or NULL */
-UNIV_INTERN
-byte*
-page_cur_parse_delete_rec(
-/*======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ulint offset;
- page_cur_t cursor;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- /* Read the cursor rec offset as a 2-byte ulint */
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (block) {
- page_t* page = buf_block_get_frame(block);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_t* rec = page + offset;
- rec_offs_init(offsets_);
-
- page_cur_position(rec, block, &cursor);
- ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
-
- page_cur_delete_rec(&cursor, index,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- return(ptr);
-}
-
-/***********************************************************//**
-Deletes a record at the page cursor. The cursor is moved to the next
-record after the deleted one. */
-UNIV_INTERN
-void
-page_cur_delete_rec(
-/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- page_dir_slot_t* cur_dir_slot;
- page_dir_slot_t* prev_slot;
- page_t* page;
- page_zip_des_t* page_zip;
- rec_t* current_rec;
- rec_t* prev_rec = NULL;
- rec_t* next_rec;
- ulint cur_slot_no;
- ulint cur_n_owned;
- rec_t* rec;
-
- ut_ad(cursor && mtr);
-
- page = page_cur_get_page(cursor);
- page_zip = page_cur_get_page_zip(cursor);
-
- /* page_zip_validate() will fail here when
- btr_cur_pessimistic_delete() invokes btr_set_min_rec_mark().
- Then, both "page_zip" and "page" would have the min-rec-mark
- set on the smallest user record, but "page" would additionally
- have it set on the smallest-but-one record. Because sloppy
- page_zip_validate_low() only ignores min-rec-flag differences
- in the smallest user record, it cannot be used here either. */
-
- current_rec = cursor->rec;
- ut_ad(rec_offs_validate(current_rec, index, offsets));
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- /* The record must not be the supremum or infimum record. */
- ut_ad(page_rec_is_user_rec(current_rec));
-
- /* Save to local variables some data associated with current_rec */
- cur_slot_no = page_dir_find_owner_slot(current_rec);
- cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
- cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
-
- /* 0. Write the log record */
- page_cur_delete_rec_write_log(current_rec, index, mtr);
-
- /* 1. Reset the last insert info in the page header and increment
- the modify clock for the frame */
-
- page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
-
- /* The page gets invalid for optimistic searches: increment the
- frame modify clock */
-
- buf_block_modify_clock_inc(page_cur_get_block(cursor));
-
- /* 2. Find the next and the previous record. Note that the cursor is
- left at the next record. */
-
- ut_ad(cur_slot_no > 0);
- prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
-
- rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
-
- /* rec now points to the record of the previous directory slot. Look
- for the immediate predecessor of current_rec in a loop. */
-
- while(current_rec != rec) {
- prev_rec = rec;
- rec = page_rec_get_next(rec);
- }
-
- page_cur_move_to_next(cursor);
- next_rec = cursor->rec;
-
- /* 3. Remove the record from the linked list of records */
-
- page_rec_set_next(prev_rec, next_rec);
-
- /* 4. If the deleted record is pointed to by a dir slot, update the
- record pointer in slot. In the following if-clause we assume that
- prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
- >= 2. */
-
-#if PAGE_DIR_SLOT_MIN_N_OWNED < 2
-# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2"
-#endif
- ut_ad(cur_n_owned > 1);
-
- if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
- page_dir_slot_set_rec(cur_dir_slot, prev_rec);
- }
-
- /* 5. Update the number of owned records of the slot */
-
- page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
-
- /* 6. Free the memory occupied by the record */
- page_mem_free(page, page_zip, current_rec, index, offsets);
-
- /* 7. Now we have decremented the number of owned records of the slot.
- If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
- slots. */
-
- if (UNIV_UNLIKELY(cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED)) {
- page_dir_balance_slot(page, page_zip, cur_slot_no);
- }
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-/*******************************************************************//**
-Print the first n numbers, generated by page_cur_lcg_prng() to make sure
-(visually) that it works properly. */
-void
-test_page_cur_lcg_prng(
-/*===================*/
- int n) /*!< in: print first n numbers */
-{
- int i;
- unsigned long long rnd;
-
- for (i = 0; i < n; i++) {
- rnd = page_cur_lcg_prng();
- printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n",
- rnd,
- rnd % 2,
- rnd % 3,
- rnd % 5,
- rnd % 7,
- rnd % 11);
- }
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innodb_plugin/page/page0page.c b/storage/innodb_plugin/page/page0page.c
deleted file mode 100644
index ab2ba60570e..00000000000
--- a/storage/innodb_plugin/page/page0page.c
+++ /dev/null
@@ -1,2608 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file page/page0page.c
-Index page routines
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#define THIS_MODULE
-#include "page0page.h"
-#ifdef UNIV_NONINL
-#include "page0page.ic"
-#endif
-#undef THIS_MODULE
-
-#include "page0cur.h"
-#include "page0zip.h"
-#include "buf0buf.h"
-#include "btr0btr.h"
-#ifndef UNIV_HOTBACKUP
-# include "srv0srv.h"
-# include "lock0lock.h"
-# include "fut0lst.h"
-# include "btr0sea.h"
-#endif /* !UNIV_HOTBACKUP */
-
-/* THE INDEX PAGE
- ==============
-
-The index page consists of a page header which contains the page's
-id and other information. On top of it are the index records
-in a heap linked into a one way linear list according to alphabetic order.
-
-Just below page end is an array of pointers which we call page directory,
-to about every sixth record in the list. The pointers are placed in
-the directory in the alphabetical order of the records pointed to,
-enabling us to make binary search using the array. Each slot n:o I
-in the directory points to a record, where a 4-bit field contains a count
-of those records which are in the linear list between pointer I and
-the pointer I - 1 in the directory, including the record
-pointed to by pointer I and not including the record pointed to by I - 1.
-We say that the record pointed to by slot I, or that slot I, owns
-these records. The count is always kept in the range 4 to 8, with
-the exception that it is 1 for the first slot, and 1--8 for the second slot.
-
-An essentially binary search can be performed in the list of index
-records, like we could do if we had pointer to every record in the
-page directory. The data structure is, however, more efficient when
-we are doing inserts, because most inserts are just pushed on a heap.
-Only every 8th insert requires block move in the directory pointer
-table, which itself is quite small. A record is deleted from the page
-by just taking it off the linear list and updating the number of owned
-records-field of the record which owns it, and updating the page directory,
-if necessary. A special case is the one when the record owns itself.
-Because the overhead of inserts is so small, we may also increase the
-page size from the projected default of 8 kB to 64 kB without too
-much loss of efficiency in inserts. Bigger page becomes actual
-when the disk transfer rate compared to seek and latency time rises.
-On the present system, the page size is set so that the page transfer
-time (3 ms) is 20 % of the disk random access time (15 ms).
-
-When the page is split, merged, or becomes full but contains deleted
-records, we have to reorganize the page.
-
-Assuming a page size of 8 kB, a typical index page of a secondary
-index contains 300 index entries, and the size of the page directory
-is 50 x 4 bytes = 200 bytes. */
-
-/***************************************************************//**
-Looks for the directory slot which owns the given record.
-@return the directory slot number */
-UNIV_INTERN
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
- const rec_t* rec) /*!< in: the physical record */
-{
- const page_t* page;
- register uint16 rec_offs_bytes;
- register const page_dir_slot_t* slot;
- register const page_dir_slot_t* first_slot;
- register const rec_t* r = rec;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
- first_slot = page_dir_get_nth_slot(page, 0);
- slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
-
- if (page_is_comp(page)) {
- while (rec_get_n_owned_new(r) == 0) {
- r = rec_get_next_ptr_const(r, TRUE);
- ut_ad(r >= page + PAGE_NEW_SUPREMUM);
- ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
- }
- } else {
- while (rec_get_n_owned_old(r) == 0) {
- r = rec_get_next_ptr_const(r, FALSE);
- ut_ad(r >= page + PAGE_OLD_SUPREMUM);
- ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
- }
- }
-
- rec_offs_bytes = mach_encode_2(r - page);
-
- while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
-
- if (UNIV_UNLIKELY(slot == first_slot)) {
- fprintf(stderr,
- "InnoDB: Probable data corruption on"
- " page %lu\n"
- "InnoDB: Original record ",
- (ulong) page_get_page_no(page));
-
- if (page_is_comp(page)) {
- fputs("(compact record)", stderr);
- } else {
- rec_print_old(stderr, rec);
- }
-
- fputs("\n"
- "InnoDB: on that page.\n"
- "InnoDB: Cannot find the dir slot for record ",
- stderr);
- if (page_is_comp(page)) {
- fputs("(compact record)", stderr);
- } else {
- rec_print_old(stderr, page
- + mach_decode_2(rec_offs_bytes));
- }
- fputs("\n"
- "InnoDB: on that page!\n", stderr);
-
- buf_page_print(page, 0);
-
- ut_error;
- }
-
- slot += PAGE_DIR_SLOT_SIZE;
- }
-
- return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
-}
-
-/**************************************************************//**
-Used to check the consistency of a directory slot.
-@return TRUE if succeed */
-static
-ibool
-page_dir_slot_check(
-/*================*/
- page_dir_slot_t* slot) /*!< in: slot */
-{
- page_t* page;
- ulint n_slots;
- ulint n_owned;
-
- ut_a(slot);
-
- page = page_align(slot);
-
- n_slots = page_dir_get_n_slots(page);
-
- ut_a(slot <= page_dir_get_nth_slot(page, 0));
- ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
-
- ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
-
- if (page_is_comp(page)) {
- n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot));
- } else {
- n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot));
- }
-
- if (slot == page_dir_get_nth_slot(page, 0)) {
- ut_a(n_owned == 1);
- } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
- ut_a(n_owned >= 1);
- ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
- } else {
- ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
- ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
- }
-
- return(TRUE);
-}
-
-/*************************************************************//**
-Sets the max trx id field value. */
-UNIV_INTERN
-void
-page_set_max_trx_id(
-/*================*/
- buf_block_t* block, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */
-{
- page_t* page = buf_block_get_frame(block);
-#ifndef UNIV_HOTBACKUP
- const ibool is_hashed = block->is_hashed;
-
- if (is_hashed) {
- rw_lock_x_lock(&btr_search_latch);
- }
-
- ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-#endif /* !UNIV_HOTBACKUP */
-
- /* It is not necessary to write this change to the redo log, as
- during a database recovery we assume that the max trx id of every
- page is the maximum trx id assigned before the crash. */
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
- page_zip_write_header(page_zip,
- page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
- 8, mtr);
-#ifndef UNIV_HOTBACKUP
- } else if (mtr) {
- mlog_write_dulint(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
- trx_id, mtr);
-#endif /* !UNIV_HOTBACKUP */
- } else {
- mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
- }
-
-#ifndef UNIV_HOTBACKUP
- if (is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/************************************************************//**
-Allocates a block of memory from the heap of an index page.
-@return pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
-byte*
-page_mem_alloc_heap(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
- space available for inserting the record,
- or NULL */
- ulint need, /*!< in: total number of bytes needed */
- ulint* heap_no)/*!< out: this contains the heap number
- of the allocated record
- if allocation succeeds */
-{
- byte* block;
- ulint avl_space;
-
- ut_ad(page && heap_no);
-
- avl_space = page_get_max_insert_size(page, 1);
-
- if (avl_space >= need) {
- block = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
- page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP,
- block + need);
- *heap_no = page_dir_get_n_heap(page);
-
- page_dir_set_n_heap(page, page_zip, 1 + *heap_no);
-
- return(block);
- }
-
- return(NULL);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Writes a log record of page creation. */
-UNIV_INLINE
-void
-page_create_write_log(
-/*==================*/
- buf_frame_t* frame, /*!< in: a buffer frame where the page is
- created */
- mtr_t* mtr, /*!< in: mini-transaction handle */
- ibool comp) /*!< in: TRUE=compact page format */
-{
- mlog_write_initial_log_record(frame, comp
- ? MLOG_COMP_PAGE_CREATE
- : MLOG_PAGE_CREATE, mtr);
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_create_write_log(frame,mtr,comp) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_create(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- /* The record is empty, except for the record initial part */
-
- if (block) {
- page_create(block, mtr, comp);
- }
-
- return(ptr);
-}
-
-/**********************************************************//**
-The index page creation function.
-@return pointer to the page */
-static
-page_t*
-page_create_low(
-/*============*/
- buf_block_t* block, /*!< in: a buffer block where the
- page is created */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- page_dir_slot_t* slot;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* field;
- byte* heap_top;
- rec_t* infimum_rec;
- rec_t* supremum_rec;
- page_t* page;
- dict_index_t* index;
- ulint* offsets;
-
- ut_ad(block);
-#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
-#endif
-#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
-#endif
-
- /* The infimum and supremum records use a dummy index. */
- if (UNIV_LIKELY(comp)) {
- index = dict_ind_compact;
- } else {
- index = dict_ind_redundant;
- }
-
- /* 1. INCREMENT MODIFY CLOCK */
- buf_block_modify_clock_inc(block);
-
- page = buf_block_get_frame(block);
-
- fil_page_set_type(page, FIL_PAGE_INDEX);
-
- heap = mem_heap_create(200);
-
- /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
-
- /* Create first a data tuple for infimum record */
- tuple = dtuple_create(heap, 1);
- dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
- field = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(field, "infimum", 8);
- dtype_set(dfield_get_type(field),
- DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
- /* Set the corresponding physical record to its place in the page
- record heap */
-
- heap_top = page + PAGE_DATA;
-
- infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
-
- if (UNIV_LIKELY(comp)) {
- ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
-
- rec_set_n_owned_new(infimum_rec, NULL, 1);
- rec_set_heap_no_new(infimum_rec, 0);
- } else {
- ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
-
- rec_set_n_owned_old(infimum_rec, 1);
- rec_set_heap_no_old(infimum_rec, 0);
- }
-
- offsets = rec_get_offsets(infimum_rec, index, NULL,
- ULINT_UNDEFINED, &heap);
-
- heap_top = rec_get_end(infimum_rec, offsets);
-
- /* Create then a tuple for supremum */
-
- tuple = dtuple_create(heap, 1);
- dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
- field = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(field, "supremum", comp ? 8 : 9);
- dtype_set(dfield_get_type(field),
- DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
-
- supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
-
- if (UNIV_LIKELY(comp)) {
- ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
-
- rec_set_n_owned_new(supremum_rec, NULL, 1);
- rec_set_heap_no_new(supremum_rec, 1);
- } else {
- ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
-
- rec_set_n_owned_old(supremum_rec, 1);
- rec_set_heap_no_old(supremum_rec, 1);
- }
-
- offsets = rec_get_offsets(supremum_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- heap_top = rec_get_end(supremum_rec, offsets);
-
- ut_ad(heap_top == page
- + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
-
- mem_heap_free(heap);
-
- /* 4. INITIALIZE THE PAGE */
-
- page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
- page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
- page_header_set_field(page, NULL, PAGE_N_HEAP, comp
- ? 0x8000 | PAGE_HEAP_NO_USER_LOW
- : PAGE_HEAP_NO_USER_LOW);
- page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
- page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
- page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
- page_header_set_field(page, NULL, PAGE_N_RECS, 0);
- page_set_max_trx_id(block, NULL, ut_dulint_zero, NULL);
- memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
- - page_offset(heap_top));
-
- /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
-
- /* Set the slots to point to infimum and supremum. */
-
- slot = page_dir_get_nth_slot(page, 0);
- page_dir_slot_set_rec(slot, infimum_rec);
-
- slot = page_dir_get_nth_slot(page, 1);
- page_dir_slot_set_rec(slot, supremum_rec);
-
- /* Set the next pointers in infimum and supremum */
-
- if (UNIV_LIKELY(comp)) {
- rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
- rec_set_next_offs_new(supremum_rec, 0);
- } else {
- rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
- rec_set_next_offs_old(supremum_rec, 0);
- }
-
- return(page);
-}
-
-/**********************************************************//**
-Create an uncompressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
-page_t*
-page_create(
-/*========*/
- buf_block_t* block, /*!< in: a buffer block where the
- page is created */
- mtr_t* mtr, /*!< in: mini-transaction handle */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- page_create_write_log(buf_block_get_frame(block), mtr, comp);
- return(page_create_low(block, comp));
-}
-
-/**********************************************************//**
-Create a compressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
-page_t*
-page_create_zip(
-/*============*/
- buf_block_t* block, /*!< in/out: a buffer frame where the
- page is created */
- dict_index_t* index, /*!< in: the index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- page_t* page;
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
-
- ut_ad(block);
- ut_ad(page_zip);
- ut_ad(index);
- ut_ad(dict_table_is_comp(index->table));
-
- page = page_create_low(block, TRUE);
- mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
-
- if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
- /* The compression of a newly created page
- should always succeed. */
- ut_error;
- }
-
- return(page);
-}
-
-/*************************************************************//**
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page. */
-UNIV_INTERN
-void
-page_copy_rec_list_end_no_locks(
-/*============================*/
- buf_block_t* new_block, /*!< in: index page to copy to */
- buf_block_t* block, /*!< in: index page of rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* new_page = buf_block_get_frame(new_block);
- page_cur_t cur1;
- rec_t* cur2;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- page_cur_position(rec, block, &cur1);
-
- if (page_cur_is_before_first(&cur1)) {
-
- page_cur_move_to_next(&cur1);
- }
-
- ut_a((ibool)!!page_is_comp(new_page)
- == dict_table_is_comp(index->table));
- ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
- ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
- (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
-
- cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
-
- /* Copy records from the original page to the new page */
-
- while (!page_cur_is_after_last(&cur1)) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
- rec_t* ins_rec;
- offsets = rec_get_offsets(cur1_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- ins_rec = page_cur_insert_rec_low(cur2, index,
- cur1_rec, offsets, mtr);
- if (UNIV_UNLIKELY(!ins_rec)) {
- /* Track an assertion failure reported on the mailing
- list on June 18th, 2003 */
-
- buf_page_print(new_page, 0);
- buf_page_print(page_align(rec), 0);
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- "InnoDB: rec offset %lu, cur1 offset %lu,"
- " cur2 offset %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(page_cur_get_rec(&cur1)),
- (ulong) page_offset(cur2));
- ut_error;
- }
-
- page_cur_move_to_next(&cur1);
- cur2 = ins_rec;
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Copies records from page to new_page, from a given record onward,
-including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page.
-@return pointer to the original successor of the infimum record on
-new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
-rec_t*
-page_copy_rec_list_end(
-/*===================*/
- buf_block_t* new_block, /*!< in/out: index page to copy to */
- buf_block_t* block, /*!< in: index page containing rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* new_page = buf_block_get_frame(new_block);
- page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
- page_t* page = page_align(rec);
- rec_t* ret = page_rec_get_next(
- page_get_infimum_rec(new_page));
- ulint log_mode = 0; /* remove warning */
-
-#ifdef UNIV_ZIP_DEBUG
- if (new_page_zip) {
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
- ut_a(page_zip);
-
- /* Strict page_zip_validate() may fail here.
- Furthermore, btr_compress() may set FIL_PAGE_PREV to
- FIL_NULL on new_page while leaving it intact on
- new_page_zip. So, we cannot validate new_page_zip. */
- ut_a(page_zip_validate_low(page_zip, page, TRUE));
- }
-#endif /* UNIV_ZIP_DEBUG */
- ut_ad(buf_block_get_frame(block) == page);
- ut_ad(page_is_leaf(page) == page_is_leaf(new_page));
- ut_ad(page_is_comp(page) == page_is_comp(new_page));
- /* Here, "ret" may be pointing to a user record or the
- predefined supremum record. */
-
- if (UNIV_LIKELY_NULL(new_page_zip)) {
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
- }
-
- if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) {
- page_copy_rec_list_end_to_created_page(new_page, rec,
- index, mtr);
- } else {
- page_copy_rec_list_end_no_locks(new_block, block, rec,
- index, mtr);
- }
-
- if (UNIV_LIKELY_NULL(new_page_zip)) {
- mtr_set_log_mode(mtr, log_mode);
-
- if (UNIV_UNLIKELY
- (!page_zip_compress(new_page_zip, new_page, index, mtr))) {
- /* Before trying to reorganize the page,
- store the number of preceding records on the page. */
- ulint ret_pos
- = page_rec_get_n_recs_before(ret);
- /* Before copying, "ret" was the successor of
- the predefined infimum record. It must still
- have at least one predecessor (the predefined
- infimum record, or a freshly copied record
- that is smaller than "ret"). */
- ut_a(ret_pos > 0);
-
- if (UNIV_UNLIKELY
- (!page_zip_reorganize(new_block, index, mtr))) {
-
- if (UNIV_UNLIKELY
- (!page_zip_decompress(new_page_zip,
- new_page, FALSE))) {
- ut_error;
- }
- ut_ad(page_validate(new_page, index));
- return(NULL);
- } else {
- /* The page was reorganized:
- Seek to ret_pos. */
- ret = new_page + PAGE_NEW_INFIMUM;
-
- do {
- ret = rec_get_next_ptr(ret, TRUE);
- } while (--ret_pos);
- }
- }
- }
-
- /* Update the lock table, MAX_TRX_ID, and possible hash index */
-
- lock_move_rec_list_end(new_block, block, rec);
-
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
- page_update_max_trx_id(new_block, new_page_zip,
- page_get_max_trx_id(page), mtr);
- }
-
- btr_search_move_or_delete_hash_entries(new_block, block, index);
-
- return(ret);
-}
-
-/*************************************************************//**
-Copies records from page to new_page, up to the given record,
-NOT including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page.
-@return pointer to the original predecessor of the supremum record on
-new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
-rec_t*
-page_copy_rec_list_start(
-/*=====================*/
- buf_block_t* new_block, /*!< in/out: index page to copy to */
- buf_block_t* block, /*!< in: index page containing rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* new_page = buf_block_get_frame(new_block);
- page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
- page_cur_t cur1;
- rec_t* cur2;
- ulint log_mode = 0 /* remove warning */;
- mem_heap_t* heap = NULL;
- rec_t* ret
- = page_rec_get_prev(page_get_supremum_rec(new_page));
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- /* Here, "ret" may be pointing to a user record or the
- predefined infimum record. */
-
- if (page_rec_is_infimum(rec)) {
-
- return(ret);
- }
-
- if (UNIV_LIKELY_NULL(new_page_zip)) {
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
- }
-
- page_cur_set_before_first(block, &cur1);
- page_cur_move_to_next(&cur1);
-
- cur2 = ret;
-
- /* Copy records from the original page to the new page */
-
- while (page_cur_get_rec(&cur1) != rec) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
- offsets = rec_get_offsets(cur1_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- cur2 = page_cur_insert_rec_low(cur2, index,
- cur1_rec, offsets, mtr);
- ut_a(cur2);
-
- page_cur_move_to_next(&cur1);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (UNIV_LIKELY_NULL(new_page_zip)) {
- mtr_set_log_mode(mtr, log_mode);
-
- if (UNIV_UNLIKELY
- (!page_zip_compress(new_page_zip, new_page, index, mtr))) {
- /* Before trying to reorganize the page,
- store the number of preceding records on the page. */
- ulint ret_pos
- = page_rec_get_n_recs_before(ret);
- /* Before copying, "ret" was the predecessor
- of the predefined supremum record. If it was
- the predefined infimum record, then it would
- still be the infimum. Thus, the assertion
- ut_a(ret_pos > 0) would fail here. */
-
- if (UNIV_UNLIKELY
- (!page_zip_reorganize(new_block, index, mtr))) {
-
- if (UNIV_UNLIKELY
- (!page_zip_decompress(new_page_zip,
- new_page, FALSE))) {
- ut_error;
- }
- ut_ad(page_validate(new_page, index));
- return(NULL);
- } else {
- /* The page was reorganized:
- Seek to ret_pos. */
- ret = new_page + PAGE_NEW_INFIMUM;
-
- do {
- ret = rec_get_next_ptr(ret, TRUE);
- } while (--ret_pos);
- }
- }
- }
-
- /* Update MAX_TRX_ID, the lock table, and possible hash index */
-
- if (dict_index_is_sec_or_ibuf(index)
- && page_is_leaf(page_align(rec))) {
- page_update_max_trx_id(new_block, new_page_zip,
- page_get_max_trx_id(page_align(rec)),
- mtr);
- }
-
- lock_move_rec_list_start(new_block, block, rec, ret);
-
- btr_search_move_or_delete_hash_entries(new_block, block, index);
-
- return(ret);
-}
-
-/**********************************************************//**
-Writes a log record of a record list end or start deletion. */
-UNIV_INLINE
-void
-page_delete_rec_list_write_log(
-/*===========================*/
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: operation type:
- MLOG_LIST_END_DELETE, ... */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- ut_ad(type == MLOG_LIST_END_DELETE
- || type == MLOG_LIST_START_DELETE
- || type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE);
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
- if (log_ptr) {
- /* Write the parameter as a 2-byte ulint */
- mach_write_to_2(log_ptr, page_offset(rec));
- mlog_close(mtr, log_ptr + 2);
- }
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_delete_rec_list_write_log(rec,index,type,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Parses a log record of a record list end or start deletion.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_delete_rec_list(
-/*=======================*/
- byte type, /*!< in: MLOG_LIST_END_DELETE,
- MLOG_LIST_START_DELETE,
- MLOG_COMP_LIST_END_DELETE or
- MLOG_COMP_LIST_START_DELETE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in/out: buffer block or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- page_t* page;
- ulint offset;
-
- ut_ad(type == MLOG_LIST_END_DELETE
- || type == MLOG_LIST_START_DELETE
- || type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE);
-
- /* Read the record offset as a 2-byte ulint */
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- if (!block) {
-
- return(ptr);
- }
-
- page = buf_block_get_frame(block);
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- if (type == MLOG_LIST_END_DELETE
- || type == MLOG_COMP_LIST_END_DELETE) {
- page_delete_rec_list_end(page + offset, block, index,
- ULINT_UNDEFINED, ULINT_UNDEFINED,
- mtr);
- } else {
- page_delete_rec_list_start(page + offset, block, index, mtr);
- }
-
- return(ptr);
-}
-
-/*************************************************************//**
-Deletes records from a page from a given record onward, including that record.
-The infimum and supremum records are not deleted. */
-UNIV_INTERN
-void
-page_delete_rec_list_end(
-/*=====================*/
- rec_t* rec, /*!< in: pointer to record on page */
- buf_block_t* block, /*!< in: buffer block of the page */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n_recs, /*!< in: number of records to delete,
- or ULINT_UNDEFINED if not known */
- ulint size, /*!< in: the sum of the sizes of the
- records in the end of the chain to
- delete, or ULINT_UNDEFINED if not known */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_dir_slot_t*slot;
- ulint slot_index;
- rec_t* last_rec;
- rec_t* prev_rec;
- ulint n_owned;
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
- page_t* page = page_align(rec);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
- ut_ad(!page_zip || page_rec_is_comp(rec));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (page_rec_is_infimum(rec)) {
- rec = page_rec_get_next(rec);
- }
-
- if (page_rec_is_supremum(rec)) {
-
- return;
- }
-
- /* Reset the last insert info in the page header and increment
- the modify clock for the frame */
-
- page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
-
- /* The page gets invalid for optimistic searches: increment the
- frame modify clock */
-
- buf_block_modify_clock_inc(block);
-
- page_delete_rec_list_write_log(rec, index, page_is_comp(page)
- ? MLOG_COMP_LIST_END_DELETE
- : MLOG_LIST_END_DELETE, mtr);
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- ulint log_mode;
-
- ut_a(page_is_comp(page));
- /* Individual deletes are not logged */
-
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
- do {
- page_cur_t cur;
- page_cur_position(rec, block, &cur);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- rec = rec_get_next_ptr(rec, TRUE);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
-#endif /* UNIV_ZIP_DEBUG */
- page_cur_delete_rec(&cur, index, offsets, mtr);
- } while (page_offset(rec) != PAGE_NEW_SUPREMUM);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Restore log mode */
-
- mtr_set_log_mode(mtr, log_mode);
- return;
- }
-
- prev_rec = page_rec_get_prev(rec);
-
- last_rec = page_rec_get_prev(page_get_supremum_rec(page));
-
- if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
- rec_t* rec2 = rec;
- /* Calculate the sum of sizes and the number of records */
- size = 0;
- n_recs = 0;
-
- do {
- ulint s;
- offsets = rec_get_offsets(rec2, index, offsets,
- ULINT_UNDEFINED, &heap);
- s = rec_offs_size(offsets);
- ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
- < UNIV_PAGE_SIZE);
- ut_ad(size + s < UNIV_PAGE_SIZE);
- size += s;
- n_recs++;
-
- rec2 = page_rec_get_next(rec2);
- } while (!page_rec_is_supremum(rec2));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- ut_ad(size < UNIV_PAGE_SIZE);
-
- /* Update the page directory; there is no need to balance the number
- of the records owned by the supremum record, as it is allowed to be
- less than PAGE_DIR_SLOT_MIN_N_OWNED */
-
- if (page_is_comp(page)) {
- rec_t* rec2 = rec;
- ulint count = 0;
-
- while (rec_get_n_owned_new(rec2) == 0) {
- count++;
-
- rec2 = rec_get_next_ptr(rec2, TRUE);
- }
-
- ut_ad(rec_get_n_owned_new(rec2) > count);
-
- n_owned = rec_get_n_owned_new(rec2) - count;
- slot_index = page_dir_find_owner_slot(rec2);
- slot = page_dir_get_nth_slot(page, slot_index);
- } else {
- rec_t* rec2 = rec;
- ulint count = 0;
-
- while (rec_get_n_owned_old(rec2) == 0) {
- count++;
-
- rec2 = rec_get_next_ptr(rec2, FALSE);
- }
-
- ut_ad(rec_get_n_owned_old(rec2) > count);
-
- n_owned = rec_get_n_owned_old(rec2) - count;
- slot_index = page_dir_find_owner_slot(rec2);
- slot = page_dir_get_nth_slot(page, slot_index);
- }
-
- page_dir_slot_set_rec(slot, page_get_supremum_rec(page));
- page_dir_slot_set_n_owned(slot, NULL, n_owned);
-
- page_dir_set_n_slots(page, NULL, slot_index + 1);
-
- /* Remove the record chain segment from the record chain */
- page_rec_set_next(prev_rec, page_get_supremum_rec(page));
-
- /* Catenate the deleted chain segment to the page free list */
-
- page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
- page_header_set_ptr(page, NULL, PAGE_FREE, rec);
-
- page_header_set_field(page, NULL, PAGE_GARBAGE, size
- + page_header_get_field(page, PAGE_GARBAGE));
-
- page_header_set_field(page, NULL, PAGE_N_RECS,
- (ulint)(page_get_n_recs(page) - n_recs));
-}
-
-/*************************************************************//**
-Deletes records from page, up to the given record, NOT including
-that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
-void
-page_delete_rec_list_start(
-/*=======================*/
- rec_t* rec, /*!< in: record on page */
- buf_block_t* block, /*!< in: buffer block of the page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t cur1;
- ulint log_mode;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- mem_heap_t* heap = NULL;
- byte type;
-
- rec_offs_init(offsets_);
-
- ut_ad((ibool) !!page_rec_is_comp(rec)
- == dict_table_is_comp(index->table));
-#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
- page_t* page = buf_block_get_frame(block);
-
- /* page_zip_validate() would detect a min_rec_mark mismatch
- in btr_page_split_and_insert()
- between btr_attach_half_pages() and insert_page = ...
- when btr_page_get_split_rec_to_left() holds
- (direction == FSP_DOWN). */
- ut_a(!page_zip || page_zip_validate_low(page_zip, page, TRUE));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- if (page_rec_is_infimum(rec)) {
-
- return;
- }
-
- if (page_rec_is_comp(rec)) {
- type = MLOG_COMP_LIST_START_DELETE;
- } else {
- type = MLOG_LIST_START_DELETE;
- }
-
- page_delete_rec_list_write_log(rec, index, type, mtr);
-
- page_cur_set_before_first(block, &cur1);
- page_cur_move_to_next(&cur1);
-
- /* Individual deletes are not logged */
-
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
- while (page_cur_get_rec(&cur1) != rec) {
- offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
- offsets, ULINT_UNDEFINED, &heap);
- page_cur_delete_rec(&cur1, index, offsets, mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Restore log mode */
-
- mtr_set_log_mode(mtr, log_mode);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Moves record list end to another page. Moved records include
-split_rec.
-@return TRUE on success; FALSE on compression failure (new_block will
-be decompressed) */
-UNIV_INTERN
-ibool
-page_move_rec_list_end(
-/*===================*/
- buf_block_t* new_block, /*!< in/out: index page where to move */
- buf_block_t* block, /*!< in: index page from where to move */
- rec_t* split_rec, /*!< in: first record to move */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* new_page = buf_block_get_frame(new_block);
- ulint old_data_size;
- ulint new_data_size;
- ulint old_n_recs;
- ulint new_n_recs;
-
- old_data_size = page_get_data_size(new_page);
- old_n_recs = page_get_n_recs(new_page);
-#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* new_page_zip
- = buf_block_get_page_zip(new_block);
- page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(!new_page_zip == !page_zip);
- ut_a(!new_page_zip
- || page_zip_validate(new_page_zip, new_page));
- ut_a(!page_zip
- || page_zip_validate(page_zip, page_align(split_rec)));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block,
- split_rec, index, mtr))) {
- return(FALSE);
- }
-
- new_data_size = page_get_data_size(new_page);
- new_n_recs = page_get_n_recs(new_page);
-
- ut_ad(new_data_size >= old_data_size);
-
- page_delete_rec_list_end(split_rec, block, index,
- new_n_recs - old_n_recs,
- new_data_size - old_data_size, mtr);
-
- return(TRUE);
-}
-
-/*************************************************************//**
-Moves record list start to another page. Moved records do not include
-split_rec.
-@return TRUE on success; FALSE on compression failure */
-UNIV_INTERN
-ibool
-page_move_rec_list_start(
-/*=====================*/
- buf_block_t* new_block, /*!< in/out: index page where to move */
- buf_block_t* block, /*!< in/out: page containing split_rec */
- rec_t* split_rec, /*!< in: first record not to move */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block,
- split_rec, index, mtr))) {
- return(FALSE);
- }
-
- page_delete_rec_list_start(split_rec, block, index, mtr);
-
- return(TRUE);
-}
-
-/***********************************************************************//**
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary record. */
-UNIV_INTERN
-void
-page_rec_write_index_page_no(
-/*=========================*/
- rec_t* rec, /*!< in: record to update */
- ulint i, /*!< in: index of the field to update */
- ulint page_no,/*!< in: value to write */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* data;
- ulint len;
-
- data = rec_get_nth_field_old(rec, i, &len);
-
- ut_ad(len == 4);
-
- mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**************************************************************//**
-Used to delete n slots from the directory. This function updates
-also n_owned fields in the records, so that the first slot after
-the deleted ones inherits the records of the deleted slots. */
-UNIV_INLINE
-void
-page_dir_delete_slot(
-/*=================*/
- page_t* page, /*!< in/out: the index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint slot_no)/*!< in: slot to be deleted */
-{
- page_dir_slot_t* slot;
- ulint n_owned;
- ulint i;
- ulint n_slots;
-
- ut_ad(!page_zip || page_is_comp(page));
- ut_ad(slot_no > 0);
- ut_ad(slot_no + 1 < page_dir_get_n_slots(page));
-
- n_slots = page_dir_get_n_slots(page);
-
- /* 1. Reset the n_owned fields of the slots to be
- deleted */
- slot = page_dir_get_nth_slot(page, slot_no);
- n_owned = page_dir_slot_get_n_owned(slot);
- page_dir_slot_set_n_owned(slot, page_zip, 0);
-
- /* 2. Update the n_owned value of the first non-deleted slot */
-
- slot = page_dir_get_nth_slot(page, slot_no + 1);
- page_dir_slot_set_n_owned(slot, page_zip,
- n_owned + page_dir_slot_get_n_owned(slot));
-
- /* 3. Destroy the slot by copying slots */
- for (i = slot_no + 1; i < n_slots; i++) {
- rec_t* rec = (rec_t*)
- page_dir_slot_get_rec(page_dir_get_nth_slot(page, i));
- page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec);
- }
-
- /* 4. Zero out the last slot, which will be removed */
- mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0);
-
- /* 5. Update the page header */
- page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1);
-}
-
-/**************************************************************//**
-Used to add n slots to the directory. Does not set the record pointers
-in the added slots or update n_owned values: this is the responsibility
-of the caller. */
-UNIV_INLINE
-void
-page_dir_add_slot(
-/*==============*/
- page_t* page, /*!< in/out: the index page */
- page_zip_des_t* page_zip,/*!< in/out: comprssed page, or NULL */
- ulint start) /*!< in: the slot above which the new slots
- are added */
-{
- page_dir_slot_t* slot;
- ulint n_slots;
-
- n_slots = page_dir_get_n_slots(page);
-
- ut_ad(start < n_slots - 1);
-
- /* Update the page header */
- page_dir_set_n_slots(page, page_zip, n_slots + 1);
-
- /* Move slots up */
- slot = page_dir_get_nth_slot(page, n_slots);
- memmove(slot, slot + PAGE_DIR_SLOT_SIZE,
- (n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE);
-}
-
-/****************************************************************//**
-Splits a directory slot which owns too many records. */
-UNIV_INTERN
-void
-page_dir_split_slot(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be written, or NULL */
- ulint slot_no)/*!< in: the directory slot */
-{
- rec_t* rec;
- page_dir_slot_t* new_slot;
- page_dir_slot_t* prev_slot;
- page_dir_slot_t* slot;
- ulint i;
- ulint n_owned;
-
- ut_ad(page);
- ut_ad(!page_zip || page_is_comp(page));
- ut_ad(slot_no > 0);
-
- slot = page_dir_get_nth_slot(page, slot_no);
-
- n_owned = page_dir_slot_get_n_owned(slot);
- ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
-
- /* 1. We loop to find a record approximately in the middle of the
- records owned by the slot. */
-
- prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
- rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
-
- for (i = 0; i < n_owned / 2; i++) {
- rec = page_rec_get_next(rec);
- }
-
- ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
-
- /* 2. We add one directory slot immediately below the slot to be
- split. */
-
- page_dir_add_slot(page, page_zip, slot_no - 1);
-
- /* The added slot is now number slot_no, and the old slot is
- now number slot_no + 1 */
-
- new_slot = page_dir_get_nth_slot(page, slot_no);
- slot = page_dir_get_nth_slot(page, slot_no + 1);
-
- /* 3. We store the appropriate values to the new slot. */
-
- page_dir_slot_set_rec(new_slot, rec);
- page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2);
-
- /* 4. Finally, we update the number of records field of the
- original slot */
-
- page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2));
-}
-
-/*************************************************************//**
-Tries to balance the given directory slot with too few records with the upper
-neighbor, so that there are at least the minimum number of records owned by
-the slot; this may result in the merging of two slots. */
-UNIV_INTERN
-void
-page_dir_balance_slot(
-/*==================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint slot_no)/*!< in: the directory slot */
-{
- page_dir_slot_t* slot;
- page_dir_slot_t* up_slot;
- ulint n_owned;
- ulint up_n_owned;
- rec_t* old_rec;
- rec_t* new_rec;
-
- ut_ad(page);
- ut_ad(!page_zip || page_is_comp(page));
- ut_ad(slot_no > 0);
-
- slot = page_dir_get_nth_slot(page, slot_no);
-
- /* The last directory slot cannot be balanced with the upper
- neighbor, as there is none. */
-
- if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) {
-
- return;
- }
-
- up_slot = page_dir_get_nth_slot(page, slot_no + 1);
-
- n_owned = page_dir_slot_get_n_owned(slot);
- up_n_owned = page_dir_slot_get_n_owned(up_slot);
-
- ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
-
- /* If the upper slot has the minimum value of n_owned, we will merge
- the two slots, therefore we assert: */
- ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
-
- if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
-
- /* In this case we can just transfer one record owned
- by the upper slot to the property of the lower slot */
- old_rec = (rec_t*) page_dir_slot_get_rec(slot);
-
- if (page_is_comp(page)) {
- new_rec = rec_get_next_ptr(old_rec, TRUE);
-
- rec_set_n_owned_new(old_rec, page_zip, 0);
- rec_set_n_owned_new(new_rec, page_zip, n_owned + 1);
- } else {
- new_rec = rec_get_next_ptr(old_rec, FALSE);
-
- rec_set_n_owned_old(old_rec, 0);
- rec_set_n_owned_old(new_rec, n_owned + 1);
- }
-
- page_dir_slot_set_rec(slot, new_rec);
-
- page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1);
- } else {
- /* In this case we may merge the two slots */
- page_dir_delete_slot(page, page_zip, slot_no);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Returns the middle record of the record list. If there are an even number
-of records in the list, returns the first record of the upper half-list.
-@return middle record */
-UNIV_INTERN
-rec_t*
-page_get_middle_rec(
-/*================*/
- page_t* page) /*!< in: page */
-{
- page_dir_slot_t* slot;
- ulint middle;
- ulint i;
- ulint n_owned;
- ulint count;
- rec_t* rec;
-
- /* This many records we must leave behind */
- middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
-
- count = 0;
-
- for (i = 0;; i++) {
-
- slot = page_dir_get_nth_slot(page, i);
- n_owned = page_dir_slot_get_n_owned(slot);
-
- if (count + n_owned > middle) {
- break;
- } else {
- count += n_owned;
- }
- }
-
- ut_ad(i > 0);
- slot = page_dir_get_nth_slot(page, i - 1);
- rec = (rec_t*) page_dir_slot_get_rec(slot);
- rec = page_rec_get_next(rec);
-
- /* There are now count records behind rec */
-
- for (i = 0; i < middle - count; i++) {
- rec = page_rec_get_next(rec);
- }
-
- return(rec);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Returns the number of records before the given record in chain.
-The number includes infimum and supremum records.
-@return number of records */
-UNIV_INTERN
-ulint
-page_rec_get_n_recs_before(
-/*=======================*/
- const rec_t* rec) /*!< in: the physical record */
-{
- const page_dir_slot_t* slot;
- const rec_t* slot_rec;
- const page_t* page;
- ulint i;
- lint n = 0;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
- if (page_is_comp(page)) {
- while (rec_get_n_owned_new(rec) == 0) {
-
- rec = rec_get_next_ptr_const(rec, TRUE);
- n--;
- }
-
- for (i = 0; ; i++) {
- slot = page_dir_get_nth_slot(page, i);
- slot_rec = page_dir_slot_get_rec(slot);
-
- n += rec_get_n_owned_new(slot_rec);
-
- if (rec == slot_rec) {
-
- break;
- }
- }
- } else {
- while (rec_get_n_owned_old(rec) == 0) {
-
- rec = rec_get_next_ptr_const(rec, FALSE);
- n--;
- }
-
- for (i = 0; ; i++) {
- slot = page_dir_get_nth_slot(page, i);
- slot_rec = page_dir_slot_get_rec(slot);
-
- n += rec_get_n_owned_old(slot_rec);
-
- if (rec == slot_rec) {
-
- break;
- }
- }
- }
-
- n--;
-
- ut_ad(n >= 0);
-
- return((ulint) n);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Prints record contents including the data relevant only in
-the index page context. */
-UNIV_INTERN
-void
-page_rec_print(
-/*===========*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: record descriptor */
-{
- ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
- rec_print_new(stderr, rec, offsets);
- if (page_rec_is_comp(rec)) {
- fprintf(stderr,
- " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
- (ulong) rec_get_n_owned_new(rec),
- (ulong) rec_get_heap_no_new(rec),
- (ulong) rec_get_next_offs(rec, TRUE));
- } else {
- fprintf(stderr,
- " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
- (ulong) rec_get_n_owned_old(rec),
- (ulong) rec_get_heap_no_old(rec),
- (ulong) rec_get_next_offs(rec, TRUE));
- }
-
- page_rec_check(rec);
- rec_validate(rec, offsets);
-}
-
-/***************************************************************//**
-This is used to print the contents of the directory for
-debugging purposes. */
-UNIV_INTERN
-void
-page_dir_print(
-/*===========*/
- page_t* page, /*!< in: index page */
- ulint pr_n) /*!< in: print n first and n last entries */
-{
- ulint n;
- ulint i;
- page_dir_slot_t* slot;
-
- n = page_dir_get_n_slots(page);
-
- fprintf(stderr, "--------------------------------\n"
- "PAGE DIRECTORY\n"
- "Page address %p\n"
- "Directory stack top at offs: %lu; number of slots: %lu\n",
- page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)),
- (ulong) n);
- for (i = 0; i < n; i++) {
- slot = page_dir_get_nth_slot(page, i);
- if ((i == pr_n) && (i < n - pr_n)) {
- fputs(" ... \n", stderr);
- }
- if ((i < pr_n) || (i >= n - pr_n)) {
- fprintf(stderr,
- "Contents of slot: %lu: n_owned: %lu,"
- " rec offs: %lu\n",
- (ulong) i,
- (ulong) page_dir_slot_get_n_owned(slot),
- (ulong)
- page_offset(page_dir_slot_get_rec(slot)));
- }
- }
- fprintf(stderr, "Total of %lu records\n"
- "--------------------------------\n",
- (ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page)));
-}
-
-/***************************************************************//**
-This is used to print the contents of the page record list for
-debugging purposes. */
-UNIV_INTERN
-void
-page_print_list(
-/*============*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index, /*!< in: dictionary index of the page */
- ulint pr_n) /*!< in: print n first and n last entries */
-{
- page_t* page = block->frame;
- page_cur_t cur;
- ulint count;
- ulint n_recs;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- fprintf(stderr,
- "--------------------------------\n"
- "PAGE RECORD LIST\n"
- "Page address %p\n", page);
-
- n_recs = page_get_n_recs(page);
-
- page_cur_set_before_first(block, &cur);
- count = 0;
- for (;;) {
- offsets = rec_get_offsets(cur.rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(cur.rec, offsets);
-
- if (count == pr_n) {
- break;
- }
- if (page_cur_is_after_last(&cur)) {
- break;
- }
- page_cur_move_to_next(&cur);
- count++;
- }
-
- if (n_recs > 2 * pr_n) {
- fputs(" ... \n", stderr);
- }
-
- while (!page_cur_is_after_last(&cur)) {
- page_cur_move_to_next(&cur);
-
- if (count + pr_n >= n_recs) {
- offsets = rec_get_offsets(cur.rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(cur.rec, offsets);
- }
- count++;
- }
-
- fprintf(stderr,
- "Total of %lu records \n"
- "--------------------------------\n",
- (ulong) (count + 1));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***************************************************************//**
-Prints the info in a page header. */
-UNIV_INTERN
-void
-page_header_print(
-/*==============*/
- const page_t* page)
-{
- fprintf(stderr,
- "--------------------------------\n"
- "PAGE HEADER INFO\n"
- "Page address %p, n records %lu (%s)\n"
- "n dir slots %lu, heap top %lu\n"
- "Page n heap %lu, free %lu, garbage %lu\n"
- "Page last insert %lu, direction %lu, n direction %lu\n",
- page, (ulong) page_header_get_field(page, PAGE_N_RECS),
- page_is_comp(page) ? "compact format" : "original format",
- (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong) page_dir_get_n_heap(page),
- (ulong) page_header_get_field(page, PAGE_FREE),
- (ulong) page_header_get_field(page, PAGE_GARBAGE),
- (ulong) page_header_get_field(page, PAGE_LAST_INSERT),
- (ulong) page_header_get_field(page, PAGE_DIRECTION),
- (ulong) page_header_get_field(page, PAGE_N_DIRECTION));
-}
-
-/***************************************************************//**
-This is used to print the contents of the page for
-debugging purposes. */
-UNIV_INTERN
-void
-page_print(
-/*=======*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index, /*!< in: dictionary index of the page */
- ulint dn, /*!< in: print dn first and last entries
- in directory */
- ulint rn) /*!< in: print rn first and last records
- in directory */
-{
- page_t* page = block->frame;
-
- page_header_print(page);
- page_dir_print(page, dn);
- page_print_list(block, index, rn);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-The following is used to validate a record on a page. This function
-differs from rec_validate as it can also check the n_owned field and
-the heap_no field.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_rec_validate(
-/*==============*/
- rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n_owned;
- ulint heap_no;
- page_t* page;
-
- page = page_align(rec);
- ut_a(!page_is_comp(page) == !rec_offs_comp(offsets));
-
- page_rec_check(rec);
- rec_validate(rec, offsets);
-
- if (page_rec_is_comp(rec)) {
- n_owned = rec_get_n_owned_new(rec);
- heap_no = rec_get_heap_no_new(rec);
- } else {
- n_owned = rec_get_n_owned_old(rec);
- heap_no = rec_get_heap_no_old(rec);
- }
-
- if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
- fprintf(stderr,
- "InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
- (ulong) page_offset(rec), (ulong) n_owned);
- return(FALSE);
- }
-
- if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
- fprintf(stderr,
- "InnoDB: Heap no of rec %lu too big %lu %lu\n",
- (ulong) page_offset(rec), (ulong) heap_no,
- (ulong) page_dir_get_n_heap(page));
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Checks that the first directory slot points to the infimum record and
-the last to the supremum. This function is intended to track if the
-bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
-void
-page_check_dir(
-/*===========*/
- const page_t* page) /*!< in: index page */
-{
- ulint n_slots;
- ulint infimum_offs;
- ulint supremum_offs;
-
- n_slots = page_dir_get_n_slots(page);
- infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0));
- supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page,
- n_slots - 1));
-
- if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
-
- fprintf(stderr,
- "InnoDB: Page directory corruption:"
- " infimum not pointed to\n");
- buf_page_print(page, 0);
- }
-
- if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
-
- fprintf(stderr,
- "InnoDB: Page directory corruption:"
- " supremum not pointed to\n");
- buf_page_print(page, 0);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_simple_validate_old(
-/*=====================*/
- page_t* page) /*!< in: old-style index page */
-{
- page_dir_slot_t* slot;
- ulint slot_no;
- ulint n_slots;
- rec_t* rec;
- byte* rec_heap_top;
- ulint count;
- ulint own_count;
- ibool ret = FALSE;
-
- ut_a(!page_is_comp(page));
-
- /* Check first that the record heap and the directory do not
- overlap. */
-
- n_slots = page_dir_get_n_slots(page);
-
- if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
- fprintf(stderr,
- "InnoDB: Nonsensical number %lu of page dir slots\n",
- (ulong) n_slots);
-
- goto func_exit;
- }
-
- rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
- if (UNIV_UNLIKELY(rec_heap_top
- > page_dir_get_nth_slot(page, n_slots - 1))) {
-
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap on a page,"
- " heap top %lu, dir %lu\n",
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong)
- page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
-
- goto func_exit;
- }
-
- /* Validate the record list in a loop checking also that it is
- consistent with the page record directory. */
-
- count = 0;
- own_count = 1;
- slot_no = 0;
- slot = page_dir_get_nth_slot(page, slot_no);
-
- rec = page_get_infimum_rec(page);
-
- for (;;) {
- if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Record %lu is above"
- " rec heap top %lu\n",
- (ulong)(rec - page),
- (ulong)(rec_heap_top - page));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) {
- /* This is a record pointed to by a dir slot */
- if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
- != own_count)) {
-
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu,"
- " rec %lu\n",
- (ulong) rec_get_n_owned_old(rec),
- (ulong) own_count,
- (ulong)(rec - page));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY
- (page_dir_slot_get_rec(slot) != rec)) {
- fprintf(stderr,
- "InnoDB: Dir slot does not point"
- " to right rec %lu\n",
- (ulong)(rec - page));
-
- goto func_exit;
- }
-
- own_count = 0;
-
- if (!page_rec_is_supremum(rec)) {
- slot_no++;
- slot = page_dir_get_nth_slot(page, slot_no);
- }
- }
-
- if (page_rec_is_supremum(rec)) {
-
- break;
- }
-
- if (UNIV_UNLIKELY
- (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
- || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Next record offset"
- " nonsensical %lu for rec %lu\n",
- (ulong) rec_get_next_offs(rec, FALSE),
- (ulong) (rec - page));
-
- goto func_exit;
- }
-
- count++;
-
- if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page record list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next(rec);
- own_count++;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
- fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW
- != count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- /* Check then the free list */
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- while (rec != NULL) {
- if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
- || rec >= page + UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Free list record has"
- " a nonsensical offset %lu\n",
- (ulong) (rec - page));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Free list record %lu"
- " is above rec heap top %lu\n",
- (ulong) (rec - page),
- (ulong) (rec_heap_top - page));
-
- goto func_exit;
- }
-
- count++;
-
- if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page free list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next(rec);
- }
-
- if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
-
- fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- ret = TRUE;
-
-func_exit:
- return(ret);
-}
-
-/***************************************************************//**
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_simple_validate_new(
-/*=====================*/
- page_t* page) /*!< in: new-style index page */
-{
- page_dir_slot_t* slot;
- ulint slot_no;
- ulint n_slots;
- rec_t* rec;
- byte* rec_heap_top;
- ulint count;
- ulint own_count;
- ibool ret = FALSE;
-
- ut_a(page_is_comp(page));
-
- /* Check first that the record heap and the directory do not
- overlap. */
-
- n_slots = page_dir_get_n_slots(page);
-
- if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
- fprintf(stderr,
- "InnoDB: Nonsensical number %lu"
- " of page dir slots\n", (ulong) n_slots);
-
- goto func_exit;
- }
-
- rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
- if (UNIV_UNLIKELY(rec_heap_top
- > page_dir_get_nth_slot(page, n_slots - 1))) {
-
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap on a page,"
- " heap top %lu, dir %lu\n",
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong)
- page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
-
- goto func_exit;
- }
-
- /* Validate the record list in a loop checking also that it is
- consistent with the page record directory. */
-
- count = 0;
- own_count = 1;
- slot_no = 0;
- slot = page_dir_get_nth_slot(page, slot_no);
-
- rec = page_get_infimum_rec(page);
-
- for (;;) {
- if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Record %lu is above rec"
- " heap top %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(rec_heap_top));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
- /* This is a record pointed to by a dir slot */
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
- != own_count)) {
-
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu,"
- " rec %lu\n",
- (ulong) rec_get_n_owned_new(rec),
- (ulong) own_count,
- (ulong) page_offset(rec));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY
- (page_dir_slot_get_rec(slot) != rec)) {
- fprintf(stderr,
- "InnoDB: Dir slot does not point"
- " to right rec %lu\n",
- (ulong) page_offset(rec));
-
- goto func_exit;
- }
-
- own_count = 0;
-
- if (!page_rec_is_supremum(rec)) {
- slot_no++;
- slot = page_dir_get_nth_slot(page, slot_no);
- }
- }
-
- if (page_rec_is_supremum(rec)) {
-
- break;
- }
-
- if (UNIV_UNLIKELY
- (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
- || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Next record offset nonsensical %lu"
- " for rec %lu\n",
- (ulong) rec_get_next_offs(rec, TRUE),
- (ulong) page_offset(rec));
-
- goto func_exit;
- }
-
- count++;
-
- if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page record list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next(rec);
- own_count++;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
- fprintf(stderr, "InnoDB: n owned is zero"
- " in a supremum rec\n");
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW
- != count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- /* Check then the free list */
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- while (rec != NULL) {
- if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
- || rec >= page + UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Free list record has"
- " a nonsensical offset %lu\n",
- (ulong) page_offset(rec));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Free list record %lu"
- " is above rec heap top %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(rec_heap_top));
-
- goto func_exit;
- }
-
- count++;
-
- if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page free list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next(rec);
- }
-
- if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
-
- fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- ret = TRUE;
-
-func_exit:
- return(ret);
-}
-
-/***************************************************************//**
-This function checks the consistency of an index page.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_validate(
-/*==========*/
- page_t* page, /*!< in: index page */
- dict_index_t* index) /*!< in: data dictionary index containing
- the page record type definition */
-{
- page_dir_slot_t*slot;
- mem_heap_t* heap;
- byte* buf;
- ulint count;
- ulint own_count;
- ulint rec_own_count;
- ulint slot_no;
- ulint data_size;
- rec_t* rec;
- rec_t* old_rec = NULL;
- ulint offs;
- ulint n_slots;
- ibool ret = FALSE;
- ulint i;
- ulint* offsets = NULL;
- ulint* old_offsets = NULL;
-
- if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
- != dict_table_is_comp(index->table))) {
- fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
- goto func_exit2;
- }
- if (page_is_comp(page)) {
- if (UNIV_UNLIKELY(!page_simple_validate_new(page))) {
- goto func_exit2;
- }
- } else {
- if (UNIV_UNLIKELY(!page_simple_validate_old(page))) {
- goto func_exit2;
- }
- }
-
- heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
-
- /* The following buffer is used to check that the
- records in the page record heap do not overlap */
-
- buf = mem_heap_zalloc(heap, UNIV_PAGE_SIZE);
-
- /* Check first that the record heap and the directory do not
- overlap. */
-
- n_slots = page_dir_get_n_slots(page);
-
- if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
- <= page_dir_get_nth_slot(page, n_slots - 1)))) {
-
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap"
- " on space %lu page %lu index %s, %p, %p\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page), index->name,
- page_header_get_ptr(page, PAGE_HEAP_TOP),
- page_dir_get_nth_slot(page, n_slots - 1));
-
- goto func_exit;
- }
-
- /* Validate the record list in a loop checking also that
- it is consistent with the directory. */
- count = 0;
- data_size = 0;
- own_count = 1;
- slot_no = 0;
- slot = page_dir_get_nth_slot(page, slot_no);
-
- rec = page_get_infimum_rec(page);
-
- for (;;) {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (page_is_comp(page) && page_rec_is_user_rec(rec)
- && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
- == page_is_leaf(page))) {
- fputs("InnoDB: node_ptr flag mismatch\n", stderr);
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
- goto func_exit;
- }
-
-#ifndef UNIV_HOTBACKUP
- /* Check that the records are in the ascending order */
- if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
- && !page_rec_is_supremum(rec)) {
- if (UNIV_UNLIKELY
- (1 != cmp_rec_rec(rec, old_rec,
- offsets, old_offsets, index))) {
- fprintf(stderr,
- "InnoDB: Records in wrong order"
- " on space %lu page %lu index %s\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page),
- index->name);
- fputs("\nInnoDB: previous record ", stderr);
- rec_print_new(stderr, old_rec, old_offsets);
- fputs("\nInnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
-
- goto func_exit;
- }
- }
-#endif /* !UNIV_HOTBACKUP */
-
- if (page_rec_is_user_rec(rec)) {
-
- data_size += rec_offs_size(offsets);
- }
-
- offs = page_offset(rec_get_start(rec, offsets));
-
- for (i = rec_offs_size(offsets); i--; ) {
- if (UNIV_UNLIKELY(buf[offs + i])) {
- /* No other record may overlap this */
-
- fputs("InnoDB: Record overlaps another\n",
- stderr);
- goto func_exit;
- }
-
- buf[offs + i] = 1;
- }
-
- if (page_is_comp(page)) {
- rec_own_count = rec_get_n_owned_new(rec);
- } else {
- rec_own_count = rec_get_n_owned_old(rec);
- }
-
- if (UNIV_UNLIKELY(rec_own_count)) {
- /* This is a record pointed to by a dir slot */
- if (UNIV_UNLIKELY(rec_own_count != own_count)) {
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu\n",
- (ulong) rec_own_count,
- (ulong) own_count);
- goto func_exit;
- }
-
- if (page_dir_slot_get_rec(slot) != rec) {
- fputs("InnoDB: Dir slot does not"
- " point to right rec\n",
- stderr);
- goto func_exit;
- }
-
- page_dir_slot_check(slot);
-
- own_count = 0;
- if (!page_rec_is_supremum(rec)) {
- slot_no++;
- slot = page_dir_get_nth_slot(page, slot_no);
- }
- }
-
- if (page_rec_is_supremum(rec)) {
- break;
- }
-
- count++;
- own_count++;
- old_rec = rec;
- rec = page_rec_get_next(rec);
-
- /* set old_offsets to offsets; recycle offsets */
- {
- ulint* offs = old_offsets;
- old_offsets = offsets;
- offsets = offs;
- }
- }
-
- if (page_is_comp(page)) {
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
-
- goto n_owned_zero;
- }
- } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
-n_owned_zero:
- fputs("InnoDB: n owned is zero\n", stderr);
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW
- != count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
- fprintf(stderr,
- "InnoDB: Summed data size %lu, returned by func %lu\n",
- (ulong) data_size, (ulong) page_get_data_size(page));
- goto func_exit;
- }
-
- /* Check then the free list */
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- while (rec != NULL) {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
-
- goto func_exit;
- }
-
- count++;
- offs = page_offset(rec_get_start(rec, offsets));
-
- for (i = rec_offs_size(offsets); i--; ) {
-
- if (UNIV_UNLIKELY(buf[offs + i])) {
- fputs("InnoDB: Record overlaps another"
- " in free list\n", stderr);
- goto func_exit;
- }
-
- buf[offs + i] = 1;
- }
-
- rec = page_rec_get_next(rec);
- }
-
- if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
- fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) count + 1);
- goto func_exit;
- }
-
- ret = TRUE;
-
-func_exit:
- mem_heap_free(heap);
-
- if (UNIV_UNLIKELY(ret == FALSE)) {
-func_exit2:
- fprintf(stderr,
- "InnoDB: Apparent corruption"
- " in space %lu page %lu index %s\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page),
- index->name);
- buf_page_print(page, 0);
- }
-
- return(ret);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Looks in the page record list for a record with the given heap number.
-@return record, NULL if not found */
-UNIV_INTERN
-const rec_t*
-page_find_rec_with_heap_no(
-/*=======================*/
- const page_t* page, /*!< in: index page */
- ulint heap_no)/*!< in: heap number */
-{
- const rec_t* rec;
-
- if (page_is_comp(page)) {
- rec = page + PAGE_NEW_INFIMUM;
-
- for(;;) {
- ulint rec_heap_no = rec_get_heap_no_new(rec);
-
- if (rec_heap_no == heap_no) {
-
- return(rec);
- } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
-
- return(NULL);
- }
-
- rec = page + rec_get_next_offs(rec, TRUE);
- }
- } else {
- rec = page + PAGE_OLD_INFIMUM;
-
- for (;;) {
- ulint rec_heap_no = rec_get_heap_no_old(rec);
-
- if (rec_heap_no == heap_no) {
-
- return(rec);
- } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
-
- return(NULL);
- }
-
- rec = page + rec_get_next_offs(rec, FALSE);
- }
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/pars/lexyy.c b/storage/innodb_plugin/pars/lexyy.c
deleted file mode 100644
index 815395ea316..00000000000
--- a/storage/innodb_plugin/pars/lexyy.c
+++ /dev/null
@@ -1,2793 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-#include "univ.i"
-#line 2 "lexyy.c"
-
-#line 4 "lexyy.c"
-
-#define YY_INT_ALIGNED short int
-
-/* A lexical scanner generated by flex */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 31
-#if YY_FLEX_SUBMINOR_VERSION > 0
-#define FLEX_BETA
-#endif
-
-/* First, we deal with platform-specific or compiler-specific issues. */
-
-/* begin standard C headers. */
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/* end standard C headers. */
-
-/* flex integer type definitions */
-
-#ifndef FLEXINT_H
-#define FLEXINT_H
-
-/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
-
-#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L
-#include <inttypes.h>
-typedef int8_t flex_int8_t;
-typedef uint8_t flex_uint8_t;
-typedef int16_t flex_int16_t;
-typedef uint16_t flex_uint16_t;
-typedef int32_t flex_int32_t;
-typedef uint32_t flex_uint32_t;
-#else
-typedef signed char flex_int8_t;
-typedef short int flex_int16_t;
-typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
-typedef unsigned short int flex_uint16_t;
-typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
-
-/* Limits of integral types. */
-#ifndef INT8_MIN
-#define INT8_MIN (-128)
-#endif
-#ifndef INT16_MIN
-#define INT16_MIN (-32767-1)
-#endif
-#ifndef INT32_MIN
-#define INT32_MIN (-2147483647-1)
-#endif
-#ifndef INT8_MAX
-#define INT8_MAX (127)
-#endif
-#ifndef INT16_MAX
-#define INT16_MAX (32767)
-#endif
-#ifndef INT32_MAX
-#define INT32_MAX (2147483647)
-#endif
-#ifndef UINT8_MAX
-#define UINT8_MAX (255U)
-#endif
-#ifndef UINT16_MAX
-#define UINT16_MAX (65535U)
-#endif
-#ifndef UINT32_MAX
-#define UINT32_MAX (4294967295U)
-#endif
-
-#endif /* ! FLEXINT_H */
-
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else /* ! __cplusplus */
-
-#if __STDC__
-
-#define YY_USE_CONST
-
-#endif /* __STDC__ */
-#endif /* ! __cplusplus */
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-/* Returned upon end-of-file. */
-#define YY_NULL 0
-
-/* Promotes a possibly negative, possibly signed char to an unsigned
- * integer for use as an array index. If the signed char is negative,
- * we want to instead treat it as an 8-bit unsigned char, hence the
- * double cast.
- */
-#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
-
-/* Enter a start condition. This macro really ought to take a parameter,
- * but we do it the disgusting crufty way forced on us by the ()-less
- * definition of BEGIN.
- */
-#define BEGIN (yy_start) = 1 + 2 *
-
-/* Translate the current start state into a value that can be later handed
- * to BEGIN to return to the state. The YYSTATE alias is for lex
- * compatibility.
- */
-#define YY_START (((yy_start) - 1) / 2)
-#define YYSTATE YY_START
-
-/* Action number for EOF rule of a given start state. */
-#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
-
-/* Special action meaning "start processing a new file". */
-#define YY_NEW_FILE yyrestart(yyin )
-
-#define YY_END_OF_BUFFER_CHAR 0
-
-/* Size of default input buffer. */
-#ifndef YY_BUF_SIZE
-#define YY_BUF_SIZE 16384
-#endif
-
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-
-static int yyleng;
-
-static FILE *yyin, *yyout;
-
-#define EOB_ACT_CONTINUE_SCAN 0
-#define EOB_ACT_END_OF_FILE 1
-#define EOB_ACT_LAST_MATCH 2
-
- #define YY_LESS_LINENO(n)
-
-/* Return all but the first "n" matched characters back to the input stream. */
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- *yy_cp = (yy_hold_char); \
- YY_RESTORE_YY_MORE_OFFSET \
- (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
- YY_DO_BEFORE_ACTION; /* set up yytext again */ \
- } \
- while ( 0 )
-
-#define unput(c) yyunput( c, (yytext_ptr) )
-
-/* The following is because we cannot portably get our hands on size_t
- * (without autoconf's help, which isn't available because we want
- * flex-generated scanners to compile on their own).
- */
-
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef unsigned int yy_size_t;
-#endif
-
-#ifndef YY_STRUCT_YY_BUFFER_STATE
-#define YY_STRUCT_YY_BUFFER_STATE
-struct yy_buffer_state
- {
- FILE *yy_input_file;
-
- char *yy_ch_buf; /* input buffer */
- char *yy_buf_pos; /* current position in input buffer */
-
- /* Size of input buffer in bytes, not including room for EOB
- * characters.
- */
- yy_size_t yy_buf_size;
-
- /* Number of characters read into yy_ch_buf, not including EOB
- * characters.
- */
- int yy_n_chars;
-
- /* Whether we "own" the buffer - i.e., we know we created it,
- * and can realloc() it to grow it, and should free() it to
- * delete it.
- */
- int yy_is_our_buffer;
-
- /* Whether this is an "interactive" input source; if so, and
- * if we're using stdio for input, then we want to use getc()
- * instead of fread(), to make sure we stop fetching input after
- * each newline.
- */
- int yy_is_interactive;
-
- /* Whether we're considered to be at the beginning of a line.
- * If so, '^' rules will be active on the next match, otherwise
- * not.
- */
- int yy_at_bol;
-
- int yy_bs_lineno; /**< The line count. */
- int yy_bs_column; /**< The column count. */
-
- /* Whether to try to fill the input buffer when we reach the
- * end of it.
- */
- int yy_fill_buffer;
-
- int yy_buffer_status;
-
-#define YY_BUFFER_NEW 0
-#define YY_BUFFER_NORMAL 1
- /* When an EOF's been seen but there's still some text to process
- * then we mark the buffer as YY_EOF_PENDING, to indicate that we
- * shouldn't try reading from the input source any more. We might
- * still have a bunch of tokens to match, though, because of
- * possible backing-up.
- *
- * When we actually see the EOF, we change the status to "new"
- * (via yyrestart()), so that the user can continue scanning by
- * just pointing yyin at a new input file.
- */
-#define YY_BUFFER_EOF_PENDING 2
-
- };
-#endif /* !YY_STRUCT_YY_BUFFER_STATE */
-
-/* Stack of input buffers. */
-static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
-static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
-static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
-
-/* We provide macros for accessing buffer states in case in the
- * future we want to put the buffer states in a more general
- * "scanner state".
- *
- * Returns the top of the stack, or NULL.
- */
-#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
- ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
- : NULL)
-
-/* Same as previous macro, but useful when we know that the buffer stack is not
- * NULL or when we need an lvalue. For internal use only.
- */
-#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
-
-/* yy_hold_char holds the character lost when yytext is formed. */
-static char yy_hold_char;
-static int yy_n_chars; /* number of characters read into yy_ch_buf */
-static int yyleng;
-
-/* Points to current character in buffer. */
-static char *yy_c_buf_p = (char *) 0;
-static int yy_init = 1; /* whether we need to initialize */
-static int yy_start = 0; /* start state number */
-
-/* Flag which is used to allow yywrap()'s to do buffer switches
- * instead of setting up a fresh yyin. A bit of a hack ...
- */
-static int yy_did_buffer_switch_on_eof;
-
-static void yyrestart (FILE *input_file );
-__attribute__((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer );
-static YY_BUFFER_STATE yy_create_buffer (FILE *file,int size );
-static void yy_delete_buffer (YY_BUFFER_STATE b );
-static void yy_flush_buffer (YY_BUFFER_STATE b );
-__attribute__((unused)) static void yypush_buffer_state (YY_BUFFER_STATE new_buffer );
-__attribute__((unused)) static void yypop_buffer_state (void );
-
-static void yyensure_buffer_stack (void );
-static void yy_load_buffer_state (void );
-static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file );
-
-#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER )
-
-YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size );
-YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str );
-YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len );
-
-static void *yyalloc (yy_size_t );
-static void *yyrealloc (void *,yy_size_t );
-static void yyfree (void * );
-
-#define yy_new_buffer yy_create_buffer
-
-#define yy_set_interactive(is_interactive) \
- { \
- if ( ! YY_CURRENT_BUFFER ){ \
- yyensure_buffer_stack (); \
- YY_CURRENT_BUFFER_LVALUE = \
- yy_create_buffer(yyin,YY_BUF_SIZE ); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
- }
-
-#define yy_set_bol(at_bol) \
- { \
- if ( ! YY_CURRENT_BUFFER ){\
- yyensure_buffer_stack (); \
- YY_CURRENT_BUFFER_LVALUE = \
- yy_create_buffer(yyin,YY_BUF_SIZE ); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
- }
-
-#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
-
-/* Begin user sect3 */
-
-#define yywrap(n) 1
-#define YY_SKIP_YYWRAP
-
-typedef unsigned char YY_CHAR;
-
-static FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
-
-typedef int yy_state_type;
-
-static int yylineno;
-
-static int yylineno = 1;
-
-static char *yytext;
-#define yytext_ptr yytext
-
-static yy_state_type yy_get_previous_state (void );
-static yy_state_type yy_try_NUL_trans (yy_state_type current_state );
-static int yy_get_next_buffer (void );
-static void yy_fatal_error (yyconst char msg[] );
-
-/* Done after the current pattern has been matched and before the
- * corresponding action - sets up yytext.
- */
-#define YY_DO_BEFORE_ACTION \
- (yytext_ptr) = yy_bp; \
- yyleng = (size_t) (yy_cp - yy_bp); \
- (yy_hold_char) = *yy_cp; \
- *yy_cp = '\0'; \
- (yy_c_buf_p) = yy_cp;
-
-#define YY_NUM_RULES 119
-#define YY_END_OF_BUFFER 120
-/* This struct is not used in this scanner,
- but its presence is necessary. */
-struct yy_trans_info
- {
- flex_int32_t yy_verify;
- flex_int32_t yy_nxt;
- };
-static yyconst flex_int16_t yy_accept[399] =
- { 0,
- 0, 0, 114, 114, 0, 0, 0, 0, 120, 118,
- 117, 117, 8, 118, 109, 5, 98, 104, 107, 105,
- 102, 106, 118, 108, 1, 118, 103, 101, 99, 100,
- 112, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 110, 111, 114, 115, 6, 7, 9, 10, 117, 4,
- 93, 113, 2, 1, 3, 94, 95, 97, 96, 92,
- 92, 92, 92, 92, 92, 44, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 28, 17, 25, 92, 92, 92, 92, 92,
-
- 54, 61, 92, 14, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 114, 115, 115, 116, 6, 7, 9, 10,
- 2, 13, 45, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 27, 92, 92, 92, 41, 92, 92, 92, 92,
- 21, 92, 92, 92, 92, 15, 92, 92, 92, 18,
- 92, 92, 92, 92, 92, 80, 92, 92, 92, 51,
- 92, 12, 92, 36, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 20, 24,
-
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 46, 92, 92, 30, 92, 87, 92, 92, 39, 92,
- 92, 92, 92, 92, 48, 92, 89, 32, 91, 92,
- 11, 64, 92, 92, 92, 42, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 29, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 85, 92, 26, 92,
- 66, 92, 92, 92, 37, 92, 92, 92, 92, 92,
- 92, 92, 31, 65, 23, 92, 57, 92, 75, 92,
- 92, 92, 43, 92, 92, 92, 92, 92, 92, 92,
- 92, 90, 92, 92, 56, 92, 92, 92, 92, 92,
-
- 92, 92, 40, 33, 79, 19, 92, 83, 74, 55,
- 92, 63, 92, 52, 92, 92, 92, 47, 92, 76,
- 92, 78, 92, 92, 34, 92, 92, 92, 35, 72,
- 92, 92, 92, 92, 58, 92, 50, 49, 92, 92,
- 53, 62, 92, 92, 92, 22, 92, 92, 73, 81,
- 92, 92, 77, 92, 68, 92, 92, 92, 92, 38,
- 92, 88, 67, 92, 84, 92, 92, 92, 86, 92,
- 59, 92, 16, 92, 70, 69, 92, 92, 82, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 71,
- 92, 92, 92, 92, 92, 92, 60, 0
-
- } ;
-
-static yyconst flex_int32_t yy_ec[256] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 1, 4, 1, 5, 6, 1, 7, 8,
- 9, 10, 11, 12, 13, 14, 15, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 17, 18, 19,
- 20, 21, 22, 1, 23, 24, 25, 26, 27, 28,
- 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
- 39, 40, 41, 42, 43, 44, 45, 46, 47, 32,
- 1, 1, 1, 1, 48, 1, 32, 32, 32, 32,
-
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 49, 1, 50, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1
- } ;
-
-static yyconst flex_int32_t yy_meta[51] =
- { 0,
- 1, 1, 1, 2, 1, 1, 3, 1, 1, 4,
- 1, 1, 1, 1, 1, 5, 1, 1, 1, 6,
- 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 1, 1
- } ;
-
-static yyconst flex_int16_t yy_base[409] =
- { 0,
- 0, 0, 437, 436, 438, 437, 439, 438, 441, 448,
- 49, 51, 448, 0, 448, 448, 448, 448, 448, 448,
- 448, 448, 426, 429, 41, 418, 448, 38, 448, 417,
- 448, 20, 33, 32, 46, 40, 44, 0, 54, 52,
- 399, 48, 60, 395, 65, 67, 81, 27, 411, 75,
- 448, 448, 0, 98, 0, 426, 0, 428, 113, 0,
- 448, 448, 415, 54, 410, 448, 448, 448, 448, 0,
- 403, 68, 399, 391, 389, 0, 402, 80, 84, 397,
- 383, 96, 381, 394, 379, 393, 387, 375, 379, 375,
- 377, 377, 0, 98, 0, 376, 97, 385, 368, 375,
-
- 0, 0, 381, 381, 364, 94, 103, 379, 98, 65,
- 381, 369, 109, 361, 377, 373, 351, 97, 372, 363,
- 115, 356, 0, 137, 138, 448, 0, 388, 0, 390,
- 377, 0, 0, 365, 360, 367, 365, 348, 346, 345,
- 350, 359, 347, 359, 95, 347, 353, 354, 336, 336,
- 123, 0, 334, 350, 351, 0, 338, 347, 344, 122,
- 124, 341, 336, 330, 340, 338, 331, 328, 336, 0,
- 326, 336, 334, 325, 315, 309, 322, 307, 327, 0,
- 313, 0, 311, 0, 325, 316, 313, 131, 309, 316,
- 323, 302, 304, 309, 309, 301, 304, 299, 0, 0,
-
- 311, 295, 305, 312, 292, 291, 305, 294, 307, 287,
- 0, 297, 279, 0, 298, 0, 295, 282, 0, 281,
- 276, 281, 280, 290, 0, 276, 0, 0, 0, 280,
- 0, 0, 276, 273, 287, 0, 272, 272, 270, 286,
- 271, 283, 280, 264, 282, 277, 0, 272, 272, 258,
- 257, 270, 256, 270, 269, 268, 0, 252, 0, 246,
- 0, 265, 249, 248, 0, 262, 252, 247, 246, 258,
- 248, 247, 0, 0, 0, 251, 0, 239, 0, 253,
- 249, 235, 0, 249, 250, 233, 238, 231, 249, 231,
- 228, 0, 229, 226, 0, 231, 243, 230, 237, 227,
-
- 235, 220, 0, 0, 0, 212, 219, 0, 0, 0,
- 216, 0, 230, 0, 231, 218, 217, 0, 213, 0,
- 216, 0, 208, 210, 0, 209, 223, 216, 0, 0,
- 219, 222, 204, 219, 0, 215, 0, 0, 199, 213,
- 0, 0, 197, 196, 201, 0, 210, 195, 0, 0,
- 201, 197, 0, 192, 0, 204, 204, 192, 202, 0,
- 179, 0, 0, 199, 0, 183, 177, 183, 0, 174,
- 0, 193, 0, 192, 0, 0, 183, 187, 0, 174,
- 174, 180, 166, 189, 181, 180, 166, 151, 118, 0,
- 130, 136, 127, 123, 119, 111, 0, 448, 167, 173,
-
- 179, 152, 181, 124, 187, 193, 199, 205
- } ;
-
-static yyconst flex_int16_t yy_def[409] =
- { 0,
- 398, 1, 399, 399, 400, 400, 401, 401, 398, 398,
- 398, 398, 398, 402, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 403, 398, 398, 398, 398,
- 398, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 398, 398, 405, 406, 407, 398, 408, 398, 398, 402,
- 398, 398, 398, 398, 403, 398, 398, 398, 398, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
-
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 405, 406, 406, 398, 407, 398, 408, 398,
- 398, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
-
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
-
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 0, 398, 398,
-
- 398, 398, 398, 398, 398, 398, 398, 398
- } ;
-
-static yyconst flex_int16_t yy_nxt[499] =
- { 0,
- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 38,
- 39, 38, 38, 40, 41, 42, 43, 44, 38, 45,
- 46, 47, 48, 49, 50, 38, 38, 38, 51, 52,
- 59, 59, 59, 59, 63, 71, 64, 67, 68, 73,
- 72, 77, 118, 74, 119, 78, 75, 63, 79, 64,
- 88, 80, 82, 85, 81, 86, 83, 89, 96, 76,
- 90, 93, 84, 91, 99, 87, 92, 101, 97, 94,
- 100, 107, 133, 110, 95, 102, 111, 103, 179, 104,
-
- 108, 109, 105, 115, 121, 112, 180, 125, 134, 113,
- 116, 122, 126, 114, 59, 59, 139, 117, 141, 142,
- 146, 163, 140, 159, 171, 173, 143, 189, 70, 147,
- 172, 177, 183, 164, 207, 208, 148, 190, 160, 161,
- 174, 193, 178, 184, 175, 194, 398, 125, 222, 214,
- 224, 398, 126, 215, 248, 249, 60, 397, 396, 395,
- 225, 394, 393, 223, 392, 391, 250, 53, 53, 53,
- 53, 53, 53, 55, 55, 55, 55, 55, 55, 57,
- 57, 57, 57, 57, 57, 65, 65, 123, 123, 123,
- 390, 123, 123, 124, 124, 124, 124, 124, 124, 127,
-
- 127, 389, 127, 127, 127, 129, 388, 129, 129, 129,
- 129, 387, 386, 385, 384, 383, 382, 381, 380, 379,
- 378, 377, 376, 375, 374, 373, 372, 371, 370, 369,
- 368, 367, 366, 365, 364, 363, 362, 361, 360, 359,
- 358, 357, 356, 355, 354, 353, 352, 351, 350, 349,
- 348, 347, 346, 345, 344, 343, 342, 341, 340, 339,
- 338, 337, 336, 335, 334, 333, 332, 331, 330, 329,
- 328, 327, 326, 325, 324, 323, 322, 321, 320, 319,
- 318, 317, 316, 315, 314, 313, 312, 311, 310, 309,
- 308, 307, 306, 305, 304, 303, 302, 301, 300, 299,
-
- 298, 297, 296, 295, 294, 293, 292, 291, 290, 289,
- 288, 287, 286, 285, 284, 283, 282, 281, 280, 279,
- 278, 277, 276, 275, 274, 273, 272, 271, 270, 269,
- 268, 267, 266, 265, 264, 263, 262, 261, 260, 259,
- 258, 257, 256, 255, 254, 253, 252, 251, 247, 246,
- 245, 244, 243, 242, 241, 240, 239, 238, 237, 236,
- 235, 234, 233, 232, 231, 230, 229, 228, 227, 226,
- 221, 220, 219, 218, 217, 216, 213, 212, 211, 210,
- 209, 206, 205, 204, 203, 202, 201, 200, 199, 198,
- 197, 196, 131, 130, 128, 195, 192, 191, 188, 187,
-
- 186, 185, 182, 181, 176, 170, 169, 168, 167, 166,
- 165, 162, 158, 157, 156, 155, 154, 153, 152, 151,
- 150, 149, 145, 144, 138, 137, 136, 135, 132, 398,
- 131, 130, 128, 120, 106, 98, 69, 66, 62, 61,
- 398, 58, 58, 56, 56, 54, 54, 9, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398
-
- } ;
-
-static yyconst flex_int16_t yy_chk[499] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 11, 11, 12, 12, 25, 32, 25, 28, 28, 33,
- 32, 34, 48, 33, 48, 34, 33, 64, 34, 64,
- 37, 34, 35, 36, 34, 36, 35, 37, 40, 33,
- 37, 39, 35, 37, 42, 36, 37, 43, 40, 39,
- 42, 45, 72, 46, 39, 43, 46, 43, 110, 43,
-
- 45, 45, 43, 47, 50, 46, 110, 54, 72, 46,
- 47, 50, 54, 46, 59, 59, 78, 47, 79, 79,
- 82, 97, 78, 94, 106, 107, 79, 118, 404, 82,
- 106, 109, 113, 97, 145, 145, 82, 118, 94, 94,
- 107, 121, 109, 113, 107, 121, 124, 125, 160, 151,
- 161, 124, 125, 151, 188, 188, 402, 396, 395, 394,
- 161, 393, 392, 160, 391, 389, 188, 399, 399, 399,
- 399, 399, 399, 400, 400, 400, 400, 400, 400, 401,
- 401, 401, 401, 401, 401, 403, 403, 405, 405, 405,
- 388, 405, 405, 406, 406, 406, 406, 406, 406, 407,
-
- 407, 387, 407, 407, 407, 408, 386, 408, 408, 408,
- 408, 385, 384, 383, 382, 381, 380, 378, 377, 374,
- 372, 370, 368, 367, 366, 364, 361, 359, 358, 357,
- 356, 354, 352, 351, 348, 347, 345, 344, 343, 340,
- 339, 336, 334, 333, 332, 331, 328, 327, 326, 324,
- 323, 321, 319, 317, 316, 315, 313, 311, 307, 306,
- 302, 301, 300, 299, 298, 297, 296, 294, 293, 291,
- 290, 289, 288, 287, 286, 285, 284, 282, 281, 280,
- 278, 276, 272, 271, 270, 269, 268, 267, 266, 264,
- 263, 262, 260, 258, 256, 255, 254, 253, 252, 251,
-
- 250, 249, 248, 246, 245, 244, 243, 242, 241, 240,
- 239, 238, 237, 235, 234, 233, 230, 226, 224, 223,
- 222, 221, 220, 218, 217, 215, 213, 212, 210, 209,
- 208, 207, 206, 205, 204, 203, 202, 201, 198, 197,
- 196, 195, 194, 193, 192, 191, 190, 189, 187, 186,
- 185, 183, 181, 179, 178, 177, 176, 175, 174, 173,
- 172, 171, 169, 168, 167, 166, 165, 164, 163, 162,
- 159, 158, 157, 155, 154, 153, 150, 149, 148, 147,
- 146, 144, 143, 142, 141, 140, 139, 138, 137, 136,
- 135, 134, 131, 130, 128, 122, 120, 119, 117, 116,
-
- 115, 114, 112, 111, 108, 105, 104, 103, 100, 99,
- 98, 96, 92, 91, 90, 89, 88, 87, 86, 85,
- 84, 83, 81, 80, 77, 75, 74, 73, 71, 65,
- 63, 58, 56, 49, 44, 41, 30, 26, 24, 23,
- 9, 8, 7, 6, 5, 4, 3, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398
-
- } ;
-
-static yy_state_type yy_last_accepting_state;
-static char *yy_last_accepting_cpos;
-
-static int yy_flex_debug;
-static int yy_flex_debug = 0;
-
-/* The intent behind this definition is that it'll catch
- * any uses of REJECT which flex missed.
- */
-#define REJECT reject_used_but_not_detected
-#define yymore() yymore_used_but_not_detected
-#define YY_MORE_ADJ 0
-#define YY_RESTORE_YY_MORE_OFFSET
-static char *yytext;
-#line 1 "pars0lex.l"
-/**************************************************//**
-SQL parser lexical analyzer: input file for the GNU Flex lexer generator
-
-(c) 1997 Innobase Oy
-
-Created 12/14/1997 Heikki Tuuri
-Published under the GPL version 2
-
-The InnoDB parser is frozen because MySQL takes care of SQL parsing.
-Therefore we normally keep the InnoDB parser C files as they are, and do
-not automatically generate them from pars0grm.y and pars0lex.l.
-
-How to make the InnoDB parser and lexer C files:
-
-1. Run ./make_flex.sh to generate lexer files.
-
-2. Run ./make_bison.sh to generate parser files.
-
-These instructions seem to work at least with bison-1.875d and flex-2.5.31 on
-Linux.
-*******************************************************/
-#define YY_NO_INPUT 1
-#define YY_NO_UNISTD_H 1
-#line 38 "pars0lex.l"
-#define YYSTYPE que_node_t*
-
-#include "univ.i"
-#include "pars0pars.h"
-#include "pars0grm.h"
-#include "pars0sym.h"
-#include "mem0mem.h"
-#include "os0proc.h"
-
-#define malloc(A) ut_malloc(A)
-#define free(A) ut_free(A)
-#define realloc(P, A) ut_realloc(P, A)
-#define exit(A) ut_error
-
-#define YY_INPUT(buf, result, max_size) pars_get_lex_chars(buf, &result, max_size)
-
-/* String buffer for removing quotes */
-static ulint stringbuf_len_alloc = 0; /* Allocated length */
-static ulint stringbuf_len = 0; /* Current length */
-static char* stringbuf; /* Start of buffer */
-/** Appends a string to the buffer. */
-static
-void
-string_append(
-/*==========*/
- const char* str, /*!< in: string to be appended */
- ulint len) /*!< in: length of the string */
-{
- if (stringbuf == NULL) {
- stringbuf = malloc(1);
- stringbuf_len_alloc = 1;
- }
-
- if (stringbuf_len + len > stringbuf_len_alloc) {
- while (stringbuf_len + len > stringbuf_len_alloc) {
- stringbuf_len_alloc <<= 1;
- }
- stringbuf = realloc(stringbuf, stringbuf_len_alloc);
- }
-
- memcpy(stringbuf + stringbuf_len, str, len);
- stringbuf_len += len;
-}
-
-
-
-
-#line 759 "lexyy.c"
-
-#define INITIAL 0
-#define comment 1
-#define quoted 2
-#define id 3
-
-#ifndef YY_NO_UNISTD_H
-/* Special case for "unistd.h", since it is non-ANSI. We include it way
- * down here because we want the user's section 1 to have been scanned first.
- * The user has a chance to override it with an option.
- */
-#include <unistd.h>
-#endif
-
-#ifndef YY_EXTRA_TYPE
-#define YY_EXTRA_TYPE void *
-#endif
-
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
-
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int yywrap (void );
-#else
-extern int yywrap (void );
-#endif
-#endif
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int );
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * );
-#endif
-
-#ifndef YY_NO_INPUT
-
-#ifdef __cplusplus
-static int yyinput (void );
-#else
-static int input (void );
-#endif
-
-#endif
-
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#define YY_READ_BUF_SIZE 8192
-#endif
-
-/* Copy whatever the last rule matched to the standard output. */
-#ifndef ECHO
-/* This used to be an fputs(), but since the string might contain NUL's,
- * we now use fwrite().
- */
-#define ECHO (void) fwrite( yytext, yyleng, 1, yyout )
-#endif
-
-/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
- * is returned in "result".
- */
-#ifndef YY_INPUT
-#define YY_INPUT(buf,result,max_size) \
- if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
- { \
- int c = '*'; \
- size_t n; \
- for ( n = 0; n < max_size && \
- (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
- buf[n] = (char) c; \
- if ( c == '\n' ) \
- buf[n++] = (char) c; \
- if ( c == EOF && ferror( yyin ) ) \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- result = n; \
- } \
- else \
- { \
- errno=0; \
- while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
- { \
- if( errno != EINTR) \
- { \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- break; \
- } \
- errno=0; \
- clearerr(yyin); \
- } \
- }\
-\
-
-#endif
-
-/* No semi-colon after return; correct usage is to write "yyterminate();" -
- * we don't want an extra ';' after the "return" because that will cause
- * some compilers to complain about unreachable statements.
- */
-#ifndef yyterminate
-#define yyterminate() return YY_NULL
-#endif
-
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
-
-/* Report a fatal error. */
-#ifndef YY_FATAL_ERROR
-#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
-#endif
-
-/* end tables serialization structures and prototypes */
-
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL_IS_OURS 1
-
-UNIV_INTERN int yylex (void);
-
-#define YY_DECL UNIV_INTERN int yylex (void)
-#endif /* !YY_DECL */
-
-/* Code executed at the beginning of each rule, after yytext and yyleng
- * have been set up.
- */
-#ifndef YY_USER_ACTION
-#define YY_USER_ACTION
-#endif
-
-/* Code executed at the end of each rule. */
-#ifndef YY_BREAK
-#define YY_BREAK break;
-#endif
-
-#define YY_RULE_SETUP \
- YY_USER_ACTION
-
-/** The main scanner function which does all the work.
- */
-YY_DECL
-{
- register yy_state_type yy_current_state;
- register char *yy_cp, *yy_bp;
- register int yy_act;
-
-#line 92 "pars0lex.l"
-
-
-#line 914 "lexyy.c"
-
- if ( (yy_init) )
- {
- (yy_init) = 0;
-
-#ifdef YY_USER_INIT
- YY_USER_INIT;
-#endif
-
- if ( ! (yy_start) )
- (yy_start) = 1; /* first start state */
-
- if ( ! yyin )
- yyin = stdin;
-
- if ( ! yyout )
- yyout = stdout;
-
- if ( ! YY_CURRENT_BUFFER ) {
- yyensure_buffer_stack ();
- YY_CURRENT_BUFFER_LVALUE =
- yy_create_buffer(yyin,YY_BUF_SIZE );
- }
-
- yy_load_buffer_state( );
- }
-
- while ( 1 ) /* loops until end-of-file is reached */
- {
- yy_cp = (yy_c_buf_p);
-
- /* Support of yytext. */
- *yy_cp = (yy_hold_char);
-
- /* yy_bp points to the position in yy_ch_buf of the start of
- * the current run.
- */
- yy_bp = yy_cp;
-
- yy_current_state = (yy_start);
-yy_match:
- do
- {
- register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 399 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- ++yy_cp;
- }
- while ( yy_current_state != 398 );
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
-
-yy_find_action:
- yy_act = yy_accept[yy_current_state];
-
- YY_DO_BEFORE_ACTION;
-
-do_action: /* This label is used only to access EOF actions. */
-
- switch ( yy_act )
- { /* beginning of action switch */
- case 0: /* must back up */
- /* undo the effects of YY_DO_BEFORE_ACTION */
- *yy_cp = (yy_hold_char);
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
- goto yy_find_action;
-
-case 1:
-YY_RULE_SETUP
-#line 94 "pars0lex.l"
-{
- yylval = sym_tab_add_int_lit(pars_sym_tab_global,
- atoi(yytext));
- return(PARS_INT_LIT);
-}
- YY_BREAK
-case 2:
-YY_RULE_SETUP
-#line 100 "pars0lex.l"
-{
- ut_error; /* not implemented */
-
- return(PARS_FLOAT_LIT);
-}
- YY_BREAK
-case 3:
-YY_RULE_SETUP
-#line 106 "pars0lex.l"
-{
- ulint type;
-
- yylval = sym_tab_add_bound_lit(pars_sym_tab_global,
- yytext + 1, &type);
-
- return((int) type);
-}
- YY_BREAK
-case 4:
-YY_RULE_SETUP
-#line 115 "pars0lex.l"
-{
- yylval = sym_tab_add_bound_id(pars_sym_tab_global,
- yytext + 1);
-
- return(PARS_ID_TOKEN);
-}
- YY_BREAK
-case 5:
-YY_RULE_SETUP
-#line 122 "pars0lex.l"
-{
-/* Quoted character string literals are handled in an explicit
-start state 'quoted'. This state is entered and the buffer for
-the scanned string is emptied upon encountering a starting quote.
-
-In the state 'quoted', only two actions are possible (defined below). */
- BEGIN(quoted);
- stringbuf_len = 0;
-}
- YY_BREAK
-case 6:
-/* rule 6 can match eol */
-YY_RULE_SETUP
-#line 131 "pars0lex.l"
-{
- /* Got a sequence of characters other than "'":
- append to string buffer */
- string_append(yytext, yyleng);
-}
- YY_BREAK
-case 7:
-YY_RULE_SETUP
-#line 136 "pars0lex.l"
-{
- /* Got a sequence of "'" characters:
- append half of them to string buffer,
- as "''" represents a single "'".
- We apply truncating division,
- so that "'''" will result in "'". */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- string literal. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_str_lit(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
- return(PARS_STR_LIT);
- }
-}
- YY_BREAK
-case 8:
-YY_RULE_SETUP
-#line 160 "pars0lex.l"
-{
-/* Quoted identifiers are handled in an explicit start state 'id'.
-This state is entered and the buffer for the scanned string is emptied
-upon encountering a starting quote.
-
-In the state 'id', only two actions are possible (defined below). */
- BEGIN(id);
- stringbuf_len = 0;
-}
- YY_BREAK
-case 9:
-/* rule 9 can match eol */
-YY_RULE_SETUP
-#line 169 "pars0lex.l"
-{
- /* Got a sequence of characters other than '"':
- append to string buffer */
- string_append(yytext, yyleng);
-}
- YY_BREAK
-case 10:
-YY_RULE_SETUP
-#line 174 "pars0lex.l"
-{
- /* Got a sequence of '"' characters:
- append half of them to string buffer,
- as '""' represents a single '"'.
- We apply truncating division,
- so that '"""' will result in '"'. */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- identifier. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_id(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
-
- return(PARS_ID_TOKEN);
- }
-}
- YY_BREAK
-case 11:
-YY_RULE_SETUP
-#line 199 "pars0lex.l"
-{
- yylval = sym_tab_add_null_lit(pars_sym_tab_global);
-
- return(PARS_NULL_LIT);
-}
- YY_BREAK
-case 12:
-YY_RULE_SETUP
-#line 205 "pars0lex.l"
-{
- /* Implicit cursor name */
- yylval = sym_tab_add_str_lit(pars_sym_tab_global,
- (byte*) yytext, yyleng);
- return(PARS_SQL_TOKEN);
-}
- YY_BREAK
-case 13:
-YY_RULE_SETUP
-#line 212 "pars0lex.l"
-{
- return(PARS_AND_TOKEN);
-}
- YY_BREAK
-case 14:
-YY_RULE_SETUP
-#line 216 "pars0lex.l"
-{
- return(PARS_OR_TOKEN);
-}
- YY_BREAK
-case 15:
-YY_RULE_SETUP
-#line 220 "pars0lex.l"
-{
- return(PARS_NOT_TOKEN);
-}
- YY_BREAK
-case 16:
-YY_RULE_SETUP
-#line 224 "pars0lex.l"
-{
- return(PARS_PROCEDURE_TOKEN);
-}
- YY_BREAK
-case 17:
-YY_RULE_SETUP
-#line 228 "pars0lex.l"
-{
- return(PARS_IN_TOKEN);
-}
- YY_BREAK
-case 18:
-YY_RULE_SETUP
-#line 232 "pars0lex.l"
-{
- return(PARS_OUT_TOKEN);
-}
- YY_BREAK
-case 19:
-YY_RULE_SETUP
-#line 236 "pars0lex.l"
-{
- return(PARS_BINARY_TOKEN);
-}
- YY_BREAK
-case 20:
-YY_RULE_SETUP
-#line 240 "pars0lex.l"
-{
- return(PARS_BLOB_TOKEN);
-}
- YY_BREAK
-case 21:
-YY_RULE_SETUP
-#line 244 "pars0lex.l"
-{
- return(PARS_INT_TOKEN);
-}
- YY_BREAK
-case 22:
-YY_RULE_SETUP
-#line 248 "pars0lex.l"
-{
- return(PARS_INT_TOKEN);
-}
- YY_BREAK
-case 23:
-YY_RULE_SETUP
-#line 252 "pars0lex.l"
-{
- return(PARS_FLOAT_TOKEN);
-}
- YY_BREAK
-case 24:
-YY_RULE_SETUP
-#line 256 "pars0lex.l"
-{
- return(PARS_CHAR_TOKEN);
-}
- YY_BREAK
-case 25:
-YY_RULE_SETUP
-#line 260 "pars0lex.l"
-{
- return(PARS_IS_TOKEN);
-}
- YY_BREAK
-case 26:
-YY_RULE_SETUP
-#line 264 "pars0lex.l"
-{
- return(PARS_BEGIN_TOKEN);
-}
- YY_BREAK
-case 27:
-YY_RULE_SETUP
-#line 268 "pars0lex.l"
-{
- return(PARS_END_TOKEN);
-}
- YY_BREAK
-case 28:
-YY_RULE_SETUP
-#line 272 "pars0lex.l"
-{
- return(PARS_IF_TOKEN);
-}
- YY_BREAK
-case 29:
-YY_RULE_SETUP
-#line 276 "pars0lex.l"
-{
- return(PARS_THEN_TOKEN);
-}
- YY_BREAK
-case 30:
-YY_RULE_SETUP
-#line 280 "pars0lex.l"
-{
- return(PARS_ELSE_TOKEN);
-}
- YY_BREAK
-case 31:
-YY_RULE_SETUP
-#line 284 "pars0lex.l"
-{
- return(PARS_ELSIF_TOKEN);
-}
- YY_BREAK
-case 32:
-YY_RULE_SETUP
-#line 288 "pars0lex.l"
-{
- return(PARS_LOOP_TOKEN);
-}
- YY_BREAK
-case 33:
-YY_RULE_SETUP
-#line 292 "pars0lex.l"
-{
- return(PARS_WHILE_TOKEN);
-}
- YY_BREAK
-case 34:
-YY_RULE_SETUP
-#line 296 "pars0lex.l"
-{
- return(PARS_RETURN_TOKEN);
-}
- YY_BREAK
-case 35:
-YY_RULE_SETUP
-#line 300 "pars0lex.l"
-{
- return(PARS_SELECT_TOKEN);
-}
- YY_BREAK
-case 36:
-YY_RULE_SETUP
-#line 304 "pars0lex.l"
-{
- return(PARS_SUM_TOKEN);
-}
- YY_BREAK
-case 37:
-YY_RULE_SETUP
-#line 308 "pars0lex.l"
-{
- return(PARS_COUNT_TOKEN);
-}
- YY_BREAK
-case 38:
-YY_RULE_SETUP
-#line 312 "pars0lex.l"
-{
- return(PARS_DISTINCT_TOKEN);
-}
- YY_BREAK
-case 39:
-YY_RULE_SETUP
-#line 316 "pars0lex.l"
-{
- return(PARS_FROM_TOKEN);
-}
- YY_BREAK
-case 40:
-YY_RULE_SETUP
-#line 320 "pars0lex.l"
-{
- return(PARS_WHERE_TOKEN);
-}
- YY_BREAK
-case 41:
-YY_RULE_SETUP
-#line 324 "pars0lex.l"
-{
- return(PARS_FOR_TOKEN);
-}
- YY_BREAK
-case 42:
-YY_RULE_SETUP
-#line 328 "pars0lex.l"
-{
- return(PARS_READ_TOKEN);
-}
- YY_BREAK
-case 43:
-YY_RULE_SETUP
-#line 332 "pars0lex.l"
-{
- return(PARS_ORDER_TOKEN);
-}
- YY_BREAK
-case 44:
-YY_RULE_SETUP
-#line 336 "pars0lex.l"
-{
- return(PARS_BY_TOKEN);
-}
- YY_BREAK
-case 45:
-YY_RULE_SETUP
-#line 340 "pars0lex.l"
-{
- return(PARS_ASC_TOKEN);
-}
- YY_BREAK
-case 46:
-YY_RULE_SETUP
-#line 344 "pars0lex.l"
-{
- return(PARS_DESC_TOKEN);
-}
- YY_BREAK
-case 47:
-YY_RULE_SETUP
-#line 348 "pars0lex.l"
-{
- return(PARS_INSERT_TOKEN);
-}
- YY_BREAK
-case 48:
-YY_RULE_SETUP
-#line 352 "pars0lex.l"
-{
- return(PARS_INTO_TOKEN);
-}
- YY_BREAK
-case 49:
-YY_RULE_SETUP
-#line 356 "pars0lex.l"
-{
- return(PARS_VALUES_TOKEN);
-}
- YY_BREAK
-case 50:
-YY_RULE_SETUP
-#line 360 "pars0lex.l"
-{
- return(PARS_UPDATE_TOKEN);
-}
- YY_BREAK
-case 51:
-YY_RULE_SETUP
-#line 364 "pars0lex.l"
-{
- return(PARS_SET_TOKEN);
-}
- YY_BREAK
-case 52:
-YY_RULE_SETUP
-#line 368 "pars0lex.l"
-{
- return(PARS_DELETE_TOKEN);
-}
- YY_BREAK
-case 53:
-YY_RULE_SETUP
-#line 372 "pars0lex.l"
-{
- return(PARS_CURRENT_TOKEN);
-}
- YY_BREAK
-case 54:
-YY_RULE_SETUP
-#line 376 "pars0lex.l"
-{
- return(PARS_OF_TOKEN);
-}
- YY_BREAK
-case 55:
-YY_RULE_SETUP
-#line 380 "pars0lex.l"
-{
- return(PARS_CREATE_TOKEN);
-}
- YY_BREAK
-case 56:
-YY_RULE_SETUP
-#line 384 "pars0lex.l"
-{
- return(PARS_TABLE_TOKEN);
-}
- YY_BREAK
-case 57:
-YY_RULE_SETUP
-#line 388 "pars0lex.l"
-{
- return(PARS_INDEX_TOKEN);
-}
- YY_BREAK
-case 58:
-YY_RULE_SETUP
-#line 392 "pars0lex.l"
-{
- return(PARS_UNIQUE_TOKEN);
-}
- YY_BREAK
-case 59:
-YY_RULE_SETUP
-#line 396 "pars0lex.l"
-{
- return(PARS_CLUSTERED_TOKEN);
-}
- YY_BREAK
-case 60:
-YY_RULE_SETUP
-#line 400 "pars0lex.l"
-{
- return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
-}
- YY_BREAK
-case 61:
-YY_RULE_SETUP
-#line 404 "pars0lex.l"
-{
- return(PARS_ON_TOKEN);
-}
- YY_BREAK
-case 62:
-YY_RULE_SETUP
-#line 408 "pars0lex.l"
-{
- return(PARS_DECLARE_TOKEN);
-}
- YY_BREAK
-case 63:
-YY_RULE_SETUP
-#line 412 "pars0lex.l"
-{
- return(PARS_CURSOR_TOKEN);
-}
- YY_BREAK
-case 64:
-YY_RULE_SETUP
-#line 416 "pars0lex.l"
-{
- return(PARS_OPEN_TOKEN);
-}
- YY_BREAK
-case 65:
-YY_RULE_SETUP
-#line 420 "pars0lex.l"
-{
- return(PARS_FETCH_TOKEN);
-}
- YY_BREAK
-case 66:
-YY_RULE_SETUP
-#line 424 "pars0lex.l"
-{
- return(PARS_CLOSE_TOKEN);
-}
- YY_BREAK
-case 67:
-YY_RULE_SETUP
-#line 428 "pars0lex.l"
-{
- return(PARS_NOTFOUND_TOKEN);
-}
- YY_BREAK
-case 68:
-YY_RULE_SETUP
-#line 432 "pars0lex.l"
-{
- return(PARS_TO_CHAR_TOKEN);
-}
- YY_BREAK
-case 69:
-YY_RULE_SETUP
-#line 436 "pars0lex.l"
-{
- return(PARS_TO_NUMBER_TOKEN);
-}
- YY_BREAK
-case 70:
-YY_RULE_SETUP
-#line 440 "pars0lex.l"
-{
- return(PARS_TO_BINARY_TOKEN);
-}
- YY_BREAK
-case 71:
-YY_RULE_SETUP
-#line 444 "pars0lex.l"
-{
- return(PARS_BINARY_TO_NUMBER_TOKEN);
-}
- YY_BREAK
-case 72:
-YY_RULE_SETUP
-#line 448 "pars0lex.l"
-{
- return(PARS_SUBSTR_TOKEN);
-}
- YY_BREAK
-case 73:
-YY_RULE_SETUP
-#line 452 "pars0lex.l"
-{
- return(PARS_REPLSTR_TOKEN);
-}
- YY_BREAK
-case 74:
-YY_RULE_SETUP
-#line 456 "pars0lex.l"
-{
- return(PARS_CONCAT_TOKEN);
-}
- YY_BREAK
-case 75:
-YY_RULE_SETUP
-#line 460 "pars0lex.l"
-{
- return(PARS_INSTR_TOKEN);
-}
- YY_BREAK
-case 76:
-YY_RULE_SETUP
-#line 464 "pars0lex.l"
-{
- return(PARS_LENGTH_TOKEN);
-}
- YY_BREAK
-case 77:
-YY_RULE_SETUP
-#line 468 "pars0lex.l"
-{
- return(PARS_SYSDATE_TOKEN);
-}
- YY_BREAK
-case 78:
-YY_RULE_SETUP
-#line 472 "pars0lex.l"
-{
- return(PARS_PRINTF_TOKEN);
-}
- YY_BREAK
-case 79:
-YY_RULE_SETUP
-#line 476 "pars0lex.l"
-{
- return(PARS_ASSERT_TOKEN);
-}
- YY_BREAK
-case 80:
-YY_RULE_SETUP
-#line 480 "pars0lex.l"
-{
- return(PARS_RND_TOKEN);
-}
- YY_BREAK
-case 81:
-YY_RULE_SETUP
-#line 484 "pars0lex.l"
-{
- return(PARS_RND_STR_TOKEN);
-}
- YY_BREAK
-case 82:
-YY_RULE_SETUP
-#line 488 "pars0lex.l"
-{
- return(PARS_ROW_PRINTF_TOKEN);
-}
- YY_BREAK
-case 83:
-YY_RULE_SETUP
-#line 492 "pars0lex.l"
-{
- return(PARS_COMMIT_TOKEN);
-}
- YY_BREAK
-case 84:
-YY_RULE_SETUP
-#line 496 "pars0lex.l"
-{
- return(PARS_ROLLBACK_TOKEN);
-}
- YY_BREAK
-case 85:
-YY_RULE_SETUP
-#line 500 "pars0lex.l"
-{
- return(PARS_WORK_TOKEN);
-}
- YY_BREAK
-case 86:
-YY_RULE_SETUP
-#line 504 "pars0lex.l"
-{
- return(PARS_UNSIGNED_TOKEN);
-}
- YY_BREAK
-case 87:
-YY_RULE_SETUP
-#line 508 "pars0lex.l"
-{
- return(PARS_EXIT_TOKEN);
-}
- YY_BREAK
-case 88:
-YY_RULE_SETUP
-#line 512 "pars0lex.l"
-{
- return(PARS_FUNCTION_TOKEN);
-}
- YY_BREAK
-case 89:
-YY_RULE_SETUP
-#line 516 "pars0lex.l"
-{
- return(PARS_LOCK_TOKEN);
-}
- YY_BREAK
-case 90:
-YY_RULE_SETUP
-#line 520 "pars0lex.l"
-{
- return(PARS_SHARE_TOKEN);
-}
- YY_BREAK
-case 91:
-YY_RULE_SETUP
-#line 524 "pars0lex.l"
-{
- return(PARS_MODE_TOKEN);
-}
- YY_BREAK
-case 92:
-YY_RULE_SETUP
-#line 528 "pars0lex.l"
-{
- yylval = sym_tab_add_id(pars_sym_tab_global,
- (byte*)yytext,
- ut_strlen(yytext));
- return(PARS_ID_TOKEN);
-}
- YY_BREAK
-case 93:
-YY_RULE_SETUP
-#line 535 "pars0lex.l"
-{
- return(PARS_DDOT_TOKEN);
-}
- YY_BREAK
-case 94:
-YY_RULE_SETUP
-#line 539 "pars0lex.l"
-{
- return(PARS_ASSIGN_TOKEN);
-}
- YY_BREAK
-case 95:
-YY_RULE_SETUP
-#line 543 "pars0lex.l"
-{
- return(PARS_LE_TOKEN);
-}
- YY_BREAK
-case 96:
-YY_RULE_SETUP
-#line 547 "pars0lex.l"
-{
- return(PARS_GE_TOKEN);
-}
- YY_BREAK
-case 97:
-YY_RULE_SETUP
-#line 551 "pars0lex.l"
-{
- return(PARS_NE_TOKEN);
-}
- YY_BREAK
-case 98:
-YY_RULE_SETUP
-#line 555 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 99:
-YY_RULE_SETUP
-#line 560 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 100:
-YY_RULE_SETUP
-#line 565 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 101:
-YY_RULE_SETUP
-#line 570 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 102:
-YY_RULE_SETUP
-#line 575 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 103:
-YY_RULE_SETUP
-#line 580 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 104:
-YY_RULE_SETUP
-#line 585 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 105:
-YY_RULE_SETUP
-#line 590 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 106:
-YY_RULE_SETUP
-#line 595 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 107:
-YY_RULE_SETUP
-#line 600 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 108:
-YY_RULE_SETUP
-#line 605 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 109:
-YY_RULE_SETUP
-#line 610 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 110:
-YY_RULE_SETUP
-#line 615 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 111:
-YY_RULE_SETUP
-#line 620 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 112:
-YY_RULE_SETUP
-#line 625 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 113:
-YY_RULE_SETUP
-#line 630 "pars0lex.l"
-BEGIN(comment); /* eat up comment */
- YY_BREAK
-case 114:
-/* rule 114 can match eol */
-YY_RULE_SETUP
-#line 632 "pars0lex.l"
-
- YY_BREAK
-case 115:
-/* rule 115 can match eol */
-YY_RULE_SETUP
-#line 633 "pars0lex.l"
-
- YY_BREAK
-case 116:
-YY_RULE_SETUP
-#line 634 "pars0lex.l"
-BEGIN(INITIAL);
- YY_BREAK
-case 117:
-/* rule 117 can match eol */
-YY_RULE_SETUP
-#line 636 "pars0lex.l"
-/* eat up whitespace */
- YY_BREAK
-case 118:
-YY_RULE_SETUP
-#line 639 "pars0lex.l"
-{
- fprintf(stderr,"Unrecognized character: %02x\n",
- *yytext);
-
- ut_error;
-
- return(0);
-}
- YY_BREAK
-case 119:
-YY_RULE_SETUP
-#line 648 "pars0lex.l"
-YY_FATAL_ERROR( "flex scanner jammed" );
- YY_BREAK
-#line 1916 "lexyy.c"
-case YY_STATE_EOF(INITIAL):
-case YY_STATE_EOF(comment):
-case YY_STATE_EOF(quoted):
-case YY_STATE_EOF(id):
- yyterminate();
-
- case YY_END_OF_BUFFER:
- {
- /* Amount of text matched not including the EOB char. */
- int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
-
- /* Undo the effects of YY_DO_BEFORE_ACTION. */
- *yy_cp = (yy_hold_char);
- YY_RESTORE_YY_MORE_OFFSET
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
- {
- /* We're scanning a new file or input source. It's
- * possible that this happened because the user
- * just pointed yyin at a new source and called
- * yylex(). If so, then we have to assure
- * consistency between YY_CURRENT_BUFFER and our
- * globals. Here is the right place to do so, because
- * this is the first action (other than possibly a
- * back-up) that will match for the new input source.
- */
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
- }
-
- /* Note that here we test for yy_c_buf_p "<=" to the position
- * of the first EOB in the buffer, since yy_c_buf_p will
- * already have been incremented past the NUL character
- * (since all states make transitions on EOB to the
- * end-of-buffer state). Contrast this with the test
- * in input().
- */
- if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
- { /* This was really a NUL. */
- yy_state_type yy_next_state;
-
- (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( );
-
- /* Okay, we're now positioned to make the NUL
- * transition. We couldn't have
- * yy_get_previous_state() go ahead and do it
- * for us because it doesn't know how to deal
- * with the possibility of jamming (and we don't
- * want to build jamming into it because then it
- * will run more slowly).
- */
-
- yy_next_state = yy_try_NUL_trans( yy_current_state );
-
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
-
- if ( yy_next_state )
- {
- /* Consume the NUL. */
- yy_cp = ++(yy_c_buf_p);
- yy_current_state = yy_next_state;
- goto yy_match;
- }
-
- else
- {
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
- goto yy_find_action;
- }
- }
-
- else switch ( yy_get_next_buffer( ) )
- {
- case EOB_ACT_END_OF_FILE:
- {
- (yy_did_buffer_switch_on_eof) = 0;
-
- if ( yywrap( ) )
- {
- /* Note: because we've taken care in
- * yy_get_next_buffer() to have set up
- * yytext, we can now set up
- * yy_c_buf_p so that if some total
- * hoser (like flex itself) wants to
- * call the scanner after we return the
- * YY_NULL, it'll still work - another
- * YY_NULL will get returned.
- */
- (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
-
- yy_act = YY_STATE_EOF(YY_START);
- goto do_action;
- }
-
- else
- {
- if ( ! (yy_did_buffer_switch_on_eof) )
- YY_NEW_FILE;
- }
- break;
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- (yy_c_buf_p) =
- (yytext_ptr) + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( );
-
- yy_cp = (yy_c_buf_p);
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
- goto yy_match;
-
- case EOB_ACT_LAST_MATCH:
- (yy_c_buf_p) =
- &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
-
- yy_current_state = yy_get_previous_state( );
-
- yy_cp = (yy_c_buf_p);
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
- goto yy_find_action;
- }
- break;
- }
-
- default:
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--no action found" );
- } /* end of action switch */
- } /* end of scanning one token */
-} /* end of yylex */
-
-/* yy_get_next_buffer - try to read in a new buffer
- *
- * Returns a code representing an action:
- * EOB_ACT_LAST_MATCH -
- * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
- * EOB_ACT_END_OF_FILE - end of file
- */
-static int yy_get_next_buffer (void)
-{
- register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
- register char *source = (yytext_ptr);
- register int number_to_move, i;
- int ret_val;
-
- if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--end of buffer missed" );
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
- { /* Don't try to fill the buffer, so this is an EOF. */
- if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
- {
- /* We matched a single character, the EOB, so
- * treat this as a final EOF.
- */
- return EOB_ACT_END_OF_FILE;
- }
-
- else
- {
- /* We matched some text prior to the EOB, first
- * process it.
- */
- return EOB_ACT_LAST_MATCH;
- }
- }
-
- /* Try to read more data. */
-
- /* First move last chars to start of buffer. */
- number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
-
- for ( i = 0; i < number_to_move; ++i )
- *(dest++) = *(source++);
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
- /* don't do the read, it's not guaranteed to return an EOF,
- * just force an EOF
- */
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
-
- else
- {
- size_t num_to_read =
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
-
- while ( num_to_read <= 0 )
- { /* Not enough room in the buffer - grow it. */
-
- /* just a shorter name for the current buffer */
- YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
-
- int yy_c_buf_p_offset =
- (int) ((yy_c_buf_p) - b->yy_ch_buf);
-
- if ( b->yy_is_our_buffer )
- {
- int new_size = b->yy_buf_size * 2;
-
- if ( new_size <= 0 )
- b->yy_buf_size += b->yy_buf_size / 8;
- else
- b->yy_buf_size *= 2;
-
- b->yy_ch_buf = (char *)
- /* Include room in for 2 EOB chars. */
- yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 );
- }
- else
- /* Can't grow it, we don't own it. */
- b->yy_ch_buf = 0;
-
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR(
- "fatal error - scanner input buffer overflow" );
-
- (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
-
- num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
- number_to_move - 1;
-
- }
-
- if ( num_to_read > YY_READ_BUF_SIZE )
- num_to_read = YY_READ_BUF_SIZE;
-
- /* Read in more data. */
- YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- (yy_n_chars), num_to_read );
-
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- if ( (yy_n_chars) == 0 )
- {
- if ( number_to_move == YY_MORE_ADJ )
- {
- ret_val = EOB_ACT_END_OF_FILE;
- yyrestart(yyin );
- }
-
- else
- {
- ret_val = EOB_ACT_LAST_MATCH;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
- YY_BUFFER_EOF_PENDING;
- }
- }
-
- else
- ret_val = EOB_ACT_CONTINUE_SCAN;
-
- (yy_n_chars) += number_to_move;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
-
- (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
-
- return ret_val;
-}
-
-/* yy_get_previous_state - get the state just before the EOB char was reached */
-
- static yy_state_type yy_get_previous_state (void)
-{
- register yy_state_type yy_current_state;
- register char *yy_cp;
-
- yy_current_state = (yy_start);
-
- for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
- {
- register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 399 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- }
-
- return yy_current_state;
-}
-
-/* yy_try_NUL_trans - try to make a transition on the NUL character
- *
- * synopsis
- * next_state = yy_try_NUL_trans( current_state );
- */
- static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
-{
- register int yy_is_jam;
- register char *yy_cp = (yy_c_buf_p);
-
- register YY_CHAR yy_c = 1;
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 399 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 398);
-
- return yy_is_jam ? 0 : yy_current_state;
-}
-
-#ifndef YY_NO_INPUT
-#ifdef __cplusplus
- static int yyinput (void)
-#else
- static int input (void)
-#endif
-
-{
- int c;
-
- *(yy_c_buf_p) = (yy_hold_char);
-
- if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
- {
- /* yy_c_buf_p now points to the character we want to return.
- * If this occurs *before* the EOB characters, then it's a
- * valid NUL; if not, then we've hit the end of the buffer.
- */
- if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
- /* This was really a NUL. */
- *(yy_c_buf_p) = '\0';
-
- else
- { /* need more input */
- int offset = (int)((yy_c_buf_p) - (yytext_ptr));
- ++(yy_c_buf_p);
-
- switch ( yy_get_next_buffer( ) )
- {
- case EOB_ACT_LAST_MATCH:
- /* This happens because yy_g_n_b()
- * sees that we've accumulated a
- * token and flags that we need to
- * try matching the token before
- * proceeding. But for input(),
- * there's no matching to consider.
- * So convert the EOB_ACT_LAST_MATCH
- * to EOB_ACT_END_OF_FILE.
- */
-
- /* Reset buffer status. */
- yyrestart(yyin );
-
- /*FALLTHROUGH*/
-
- case EOB_ACT_END_OF_FILE:
- {
- if ( yywrap( ) )
- return EOF;
-
- if ( ! (yy_did_buffer_switch_on_eof) )
- YY_NEW_FILE;
-#ifdef __cplusplus
- return yyinput();
-#else
- return input();
-#endif
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- (yy_c_buf_p) = (yytext_ptr) + offset;
- break;
- }
- }
- }
-
- c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
- *(yy_c_buf_p) = '\0'; /* preserve yytext */
- (yy_hold_char) = *++(yy_c_buf_p);
-
- return c;
-}
-#endif /* ifndef YY_NO_INPUT */
-
-/** Immediately switch to a different input stream.
- * @param input_file A readable stream.
- *
- * @note This function does not reset the start condition to @c INITIAL .
- */
- static void yyrestart (FILE * input_file )
-{
-
- if ( ! YY_CURRENT_BUFFER ){
- yyensure_buffer_stack ();
- YY_CURRENT_BUFFER_LVALUE =
- yy_create_buffer(yyin,YY_BUF_SIZE );
- }
-
- yy_init_buffer(YY_CURRENT_BUFFER,input_file );
- yy_load_buffer_state( );
-}
-
-/** Switch to a different input buffer.
- * @param new_buffer The new input buffer.
- *
- */
- __attribute__((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
-{
-
- /* TODO. We should be able to replace this entire function body
- * with
- * yypop_buffer_state();
- * yypush_buffer_state(new_buffer);
- */
- yyensure_buffer_stack ();
- if ( YY_CURRENT_BUFFER == new_buffer )
- return;
-
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *(yy_c_buf_p) = (yy_hold_char);
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
- yy_load_buffer_state( );
-
- /* We don't actually know whether we did this switch during
- * EOF (yywrap()) processing, but the only time this flag
- * is looked at is after yywrap() is called, so it's safe
- * to go ahead and always set it.
- */
- (yy_did_buffer_switch_on_eof) = 1;
-}
-
-static void yy_load_buffer_state (void)
-{
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
- yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
- (yy_hold_char) = *(yy_c_buf_p);
-}
-
-/** Allocate and initialize an input buffer state.
- * @param file A readable stream.
- * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
- * @return the allocated buffer state.
- */
- static YY_BUFFER_STATE yy_create_buffer (FILE * file, int size )
-{
- YY_BUFFER_STATE b;
-
- b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
- b->yy_buf_size = size;
-
- /* yy_ch_buf has to be 2 characters longer than the size given because
- * we need to put in 2 end-of-buffer characters.
- */
- b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 );
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
- b->yy_is_our_buffer = 1;
-
- yy_init_buffer(b,file );
-
- return b;
-}
-
-/** Destroy the buffer.
- * @param b a buffer created with yy_create_buffer()
- *
- */
- static void yy_delete_buffer (YY_BUFFER_STATE b )
-{
-
- if ( ! b )
- return;
-
- if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
- YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
-
- if ( b->yy_is_our_buffer )
- yyfree((void *) b->yy_ch_buf );
-
- yyfree((void *) b );
-}
-
-/* Initializes or reinitializes a buffer.
- * This function is sometimes called more than once on the same buffer,
- * such as during a yyrestart() or at EOF.
- */
- static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file )
-
-{
- int oerrno = errno;
-
- yy_flush_buffer(b );
-
- b->yy_input_file = file;
- b->yy_fill_buffer = 1;
-
- /* If b is the current buffer, then yy_init_buffer was _probably_
- * called from yyrestart() or through yy_get_next_buffer.
- * In that case, we don't want to reset the lineno or column.
- */
- if (b != YY_CURRENT_BUFFER){
- b->yy_bs_lineno = 1;
- b->yy_bs_column = 0;
- }
-
- b->yy_is_interactive = 0;
-
- errno = oerrno;
-}
-
-/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
- * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
- */
- static void yy_flush_buffer (YY_BUFFER_STATE b )
-{
- if ( ! b )
- return;
-
- b->yy_n_chars = 0;
-
- /* We always need two end-of-buffer characters. The first causes
- * a transition to the end-of-buffer state. The second causes
- * a jam in that state.
- */
- b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
- b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
-
- b->yy_buf_pos = &b->yy_ch_buf[0];
-
- b->yy_at_bol = 1;
- b->yy_buffer_status = YY_BUFFER_NEW;
-
- if ( b == YY_CURRENT_BUFFER )
- yy_load_buffer_state( );
-}
-
-/** Pushes the new state onto the stack. The new state becomes
- * the current state. This function will allocate the stack
- * if necessary.
- * @param new_buffer The new state.
- *
- */
-__attribute__((unused)) static void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
-{
- if (new_buffer == NULL)
- return;
-
- yyensure_buffer_stack();
-
- /* This block is copied from yy_switch_to_buffer. */
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *(yy_c_buf_p) = (yy_hold_char);
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- /* Only push if top exists. Otherwise, replace top. */
- if (YY_CURRENT_BUFFER)
- (yy_buffer_stack_top)++;
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
-
- /* copied from yy_switch_to_buffer. */
- yy_load_buffer_state( );
- (yy_did_buffer_switch_on_eof) = 1;
-}
-
-/** Removes and deletes the top of the stack, if present.
- * The next element becomes the new top.
- *
- */
-__attribute__((unused)) static void yypop_buffer_state (void)
-{
- if (!YY_CURRENT_BUFFER)
- return;
-
- yy_delete_buffer(YY_CURRENT_BUFFER );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- if ((yy_buffer_stack_top) > 0)
- --(yy_buffer_stack_top);
-
- if (YY_CURRENT_BUFFER) {
- yy_load_buffer_state( );
- (yy_did_buffer_switch_on_eof) = 1;
- }
-}
-
-/* Allocates the stack if it does not exist.
- * Guarantees space for at least one push.
- */
-static void yyensure_buffer_stack (void)
-{
- int num_to_alloc;
-
- if (!(yy_buffer_stack)) {
-
- /* First allocation is just for 2 elements, since we don't know if this
- * scanner will even need a stack. We use 2 instead of 1 to avoid an
- * immediate realloc on the next call.
- */
- num_to_alloc = 1;
- (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
- (num_to_alloc * sizeof(struct yy_buffer_state*)
- );
-
- memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
- (yy_buffer_stack_max) = num_to_alloc;
- (yy_buffer_stack_top) = 0;
- return;
- }
-
- if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
-
- /* Increase the buffer to prepare for a possible push. */
- int grow_size = 8 /* arbitrary grow size */;
-
- num_to_alloc = (yy_buffer_stack_max) + grow_size;
- (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
- ((yy_buffer_stack),
- num_to_alloc * sizeof(struct yy_buffer_state*)
- );
-
- /* zero only the new slots.*/
- memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
- (yy_buffer_stack_max) = num_to_alloc;
- }
-}
-
-#ifndef YY_EXIT_FAILURE
-#define YY_EXIT_FAILURE 2
-#endif
-
-static void yy_fatal_error (yyconst char* msg )
-{
- (void) fprintf( stderr, "%s\n", msg );
- exit( YY_EXIT_FAILURE );
-}
-
-/* Redefine yyless() so it works in section 3 code. */
-
-#undef yyless
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- yytext[yyleng] = (yy_hold_char); \
- (yy_c_buf_p) = yytext + yyless_macro_arg; \
- (yy_hold_char) = *(yy_c_buf_p); \
- *(yy_c_buf_p) = '\0'; \
- yyleng = yyless_macro_arg; \
- } \
- while ( 0 )
-
-/* Accessor methods (get/set functions) to struct members. */
-
-/** Get the current line number.
- *
- */
-__attribute__((unused)) static int yyget_lineno (void)
-{
-
- return yylineno;
-}
-
-/** Get the input stream.
- *
- */
-__attribute__((unused)) static FILE *yyget_in (void)
-{
- return yyin;
-}
-
-/** Get the output stream.
- *
- */
-__attribute__((unused)) static FILE *yyget_out (void)
-{
- return yyout;
-}
-
-/** Get the length of the current token.
- *
- */
-__attribute__((unused)) static int yyget_leng (void)
-{
- return yyleng;
-}
-
-/** Get the current token.
- *
- */
-
-__attribute__((unused)) static char *yyget_text (void)
-{
- return yytext;
-}
-
-/** Set the current line number.
- * @param line_number
- *
- */
-__attribute__((unused)) static void yyset_lineno (int line_number )
-{
-
- yylineno = line_number;
-}
-
-/** Set the input stream. This does not discard the current
- * input buffer.
- * @param in_str A readable stream.
- *
- * @see yy_switch_to_buffer
- */
-__attribute__((unused)) static void yyset_in (FILE * in_str )
-{
- yyin = in_str ;
-}
-
-__attribute__((unused)) static void yyset_out (FILE * out_str )
-{
- yyout = out_str ;
-}
-
-__attribute__((unused)) static int yyget_debug (void)
-{
- return yy_flex_debug;
-}
-
-__attribute__((unused)) static void yyset_debug (int bdebug )
-{
- yy_flex_debug = bdebug ;
-}
-
-/* yylex_destroy is for both reentrant and non-reentrant scanners. */
-__attribute__((unused)) static int yylex_destroy (void)
-{
-
- /* Pop the buffer stack, destroying each element. */
- while(YY_CURRENT_BUFFER){
- yy_delete_buffer(YY_CURRENT_BUFFER );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- yypop_buffer_state();
- }
-
- /* Destroy the stack itself. */
- yyfree((yy_buffer_stack) );
- (yy_buffer_stack) = NULL;
-
- return 0;
-}
-
-/*
- * Internal utility routines.
- */
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
-{
- register int i;
- for ( i = 0; i < n; ++i )
- s1[i] = s2[i];
-}
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s )
-{
- register int n;
- for ( n = 0; s[n]; ++n )
- ;
-
- return n;
-}
-#endif
-
-static void *yyalloc (yy_size_t size )
-{
- return (void *) malloc( size );
-}
-
-static void *yyrealloc (void * ptr, yy_size_t size )
-{
- /* The cast to (char *) in the following accommodates both
- * implementations that use char* generic pointers, and those
- * that use void* generic pointers. It works with the latter
- * because both ANSI C and C++ allow castless assignment from
- * any pointer type to void*, and deal with argument conversions
- * as though doing an assignment.
- */
- return (void *) realloc( (char *) ptr, size );
-}
-
-static void yyfree (void * ptr )
-{
- free( (char *) ptr ); /* see yyrealloc() for (char *) cast */
-}
-
-#define YYTABLES_NAME "yytables"
-
-#undef YY_NEW_FILE
-#undef YY_FLUSH_BUFFER
-#undef yy_set_bol
-#undef yy_new_buffer
-#undef yy_set_interactive
-#undef yytext_ptr
-#undef YY_DO_BEFORE_ACTION
-
-#ifdef YY_DECL_IS_OURS
-#undef YY_DECL_IS_OURS
-#undef YY_DECL
-#endif
-#line 648 "pars0lex.l"
-
-
-
-
-/**********************************************************************
-Release any resources used by the lexer. */
-UNIV_INTERN
-void
-pars_lexer_close(void)
-/*==================*/
-{
- yylex_destroy();
- free(stringbuf);
- stringbuf = NULL;
- stringbuf_len_alloc = stringbuf_len = 0;
-}
diff --git a/storage/innodb_plugin/pars/make_bison.sh b/storage/innodb_plugin/pars/make_bison.sh
deleted file mode 100755
index 09bb86e3106..00000000000
--- a/storage/innodb_plugin/pars/make_bison.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-# Place, Suite 330, Boston, MA 02111-1307 USA
-#
-# generate parser files from bison input files.
-
-set -eu
-TMPFILE=pars0grm.tab.c
-OUTFILE=pars0grm.c
-
-bison -d pars0grm.y
-mv pars0grm.tab.h ../include/pars0grm.h
-
-sed -e '
-s/'"$TMPFILE"'/'"$OUTFILE"'/;
-s/^\(\(YYSTYPE\|int\) yy\(char\|nerrs\)\)/static \1/;
-s/\(\(YYSTYPE\|int\) yy\(lval\|parse\)\)/UNIV_INTERN \1/;
-' < "$TMPFILE" > "$OUTFILE"
-
-rm "$TMPFILE"
diff --git a/storage/innodb_plugin/pars/make_flex.sh b/storage/innodb_plugin/pars/make_flex.sh
deleted file mode 100755
index 89308a6636f..00000000000
--- a/storage/innodb_plugin/pars/make_flex.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-# Place, Suite 330, Boston, MA 02111-1307 USA
-#
-# generate lexer files from flex input files.
-
-set -eu
-
-TMPFILE=_flex_tmp.c
-OUTFILE=lexyy.c
-
-flex -o $TMPFILE pars0lex.l
-
-# AIX needs its includes done in a certain order, so include "univ.i" first
-# to be sure we get it right.
-echo '#include "univ.i"' > $OUTFILE
-
-# flex assigns a pointer to an int in one place without a cast, resulting in
-# a warning on Win64. Add the cast. Also define some symbols as static.
-sed -e '
-s/'"$TMPFILE"'/'"$OUTFILE"'/;
-s/\(int offset = \)\((yy_c_buf_p) - (yytext_ptr)\);/\1(int)(\2);/;
-s/\(void yy\(restart\|_\(delete\|flush\)_buffer\)\)/static \1/;
-s/\(void yy_switch_to_buffer\)/__attribute__((unused)) static \1/;
-s/\(void yy\(push\|pop\)_buffer_state\)/__attribute__((unused)) static \1/;
-s/\(YY_BUFFER_STATE yy_create_buffer\)/static \1/;
-s/\(\(int\|void\) yy[gs]et_\)/__attribute__((unused)) static \1/;
-s/\(void \*\?yy\(\(re\)\?alloc\|free\)\)/static \1/;
-s/\(extern \)\?\(int yy\(leng\|lineno\|_flex_debug\)\)/static \2/;
-s/\(int yylex_destroy\)/__attribute__((unused)) static \1/;
-s/\(extern \)\?\(int yylex \)/UNIV_INTERN \2/;
-s/^\(\(FILE\|char\) *\* *yyget\)/__attribute__((unused)) static \1/;
-s/^\(extern \)\?\(\(FILE\|char\) *\* *yy\)/static \2/;
-' < $TMPFILE >> $OUTFILE
-
-rm $TMPFILE
diff --git a/storage/innodb_plugin/pars/pars0grm.c b/storage/innodb_plugin/pars/pars0grm.c
deleted file mode 100644
index d667970735e..00000000000
--- a/storage/innodb_plugin/pars/pars0grm.c
+++ /dev/null
@@ -1,2601 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software
-Foundation, Inc.
-
-As a special exception, when this file is copied by Bison into a
-Bison output file, you may use that output file without restriction.
-This special exception was added by the Free Software Foundation
-in version 1.24 of Bison.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/* A Bison parser, made by GNU Bison 2.0. */
-
-/* Written by Richard Stallman by simplifying the original so called
- ``semantic'' parser. */
-
-/* All symbols defined below should begin with yy or YY, to avoid
- infringing on user name space. This should be done even for local
- variables, as they might otherwise be expanded by user macros.
- There are some unavoidable exceptions within include files to
- define necessary library symbols; they are noted "INFRINGES ON
- USER NAME SPACE" below. */
-
-/* Identify Bison output. */
-#define YYBISON 1
-
-/* Skeleton name. */
-#define YYSKELETON_NAME "yacc.c"
-
-/* Pure parsers. */
-#define YYPURE 0
-
-/* Using locations. */
-#define YYLSP_NEEDED 0
-
-
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- NEG = 350
- };
-#endif
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define NEG 350
-
-
-
-
-/* Copy the first part of user declarations. */
-#line 13 "pars0grm.y"
-
-/* The value of the semantic attribute is a pointer to a query tree node
-que_node_t */
-
-#include "univ.i"
-#include <math.h> /* Can't be before univ.i */
-#include "pars0pars.h"
-#include "mem0mem.h"
-#include "que0types.h"
-#include "que0que.h"
-#include "row0sel.h"
-
-#define YYSTYPE que_node_t*
-
-/* #define __STDC__ */
-
-int
-yylex(void);
-
-
-/* Enabling traces. */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
-
-/* Enabling verbose error messages. */
-#ifdef YYERROR_VERBOSE
-# undef YYERROR_VERBOSE
-# define YYERROR_VERBOSE 1
-#else
-# define YYERROR_VERBOSE 0
-#endif
-
-#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-
-
-/* Copy the second part of user declarations. */
-
-
-/* Line 213 of yacc.c. */
-#line 297 "pars0grm.c"
-
-#if ! defined (yyoverflow) || YYERROR_VERBOSE
-
-# ifndef YYFREE
-# define YYFREE free
-# endif
-# ifndef YYMALLOC
-# define YYMALLOC malloc
-# endif
-
-/* The parser invokes alloca or malloc; define the necessary symbols. */
-
-# ifdef YYSTACK_USE_ALLOCA
-# if YYSTACK_USE_ALLOCA
-# ifdef __GNUC__
-# define YYSTACK_ALLOC __builtin_alloca
-# else
-# define YYSTACK_ALLOC alloca
-# endif
-# endif
-# endif
-
-# ifdef YYSTACK_ALLOC
- /* Pacify GCC's `empty if-body' warning. */
-# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0)
-# else
-# if defined (__STDC__) || defined (__cplusplus)
-# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# define YYSIZE_T size_t
-# endif
-# define YYSTACK_ALLOC YYMALLOC
-# define YYSTACK_FREE YYFREE
-# endif
-#endif /* ! defined (yyoverflow) || YYERROR_VERBOSE */
-
-
-#if (! defined (yyoverflow) \
- && (! defined (__cplusplus) \
- || (defined (YYSTYPE_IS_TRIVIAL) && YYSTYPE_IS_TRIVIAL)))
-
-/* A type that is properly aligned for any stack member. */
-union yyalloc
-{
- short int yyss;
- YYSTYPE yyvs;
- };
-
-/* The size of the maximum gap between one aligned stack and the next. */
-# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
-
-/* The size of an array large to enough to hold all stacks, each with
- N elements. */
-# define YYSTACK_BYTES(N) \
- ((N) * (sizeof (short int) + sizeof (YYSTYPE)) \
- + YYSTACK_GAP_MAXIMUM)
-
-/* Copy COUNT objects from FROM to TO. The source and destination do
- not overlap. */
-# ifndef YYCOPY
-# if defined (__GNUC__) && 1 < __GNUC__
-# define YYCOPY(To, From, Count) \
- __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
-# else
-# define YYCOPY(To, From, Count) \
- do \
- { \
- register YYSIZE_T yyi; \
- for (yyi = 0; yyi < (Count); yyi++) \
- (To)[yyi] = (From)[yyi]; \
- } \
- while (0)
-# endif
-# endif
-
-/* Relocate STACK from its old location to the new one. The
- local variables YYSIZE and YYSTACKSIZE give the old and new number of
- elements in the stack, and YYPTR gives the new location of the
- stack. Advance YYPTR to a properly aligned location for the next
- stack. */
-# define YYSTACK_RELOCATE(Stack) \
- do \
- { \
- YYSIZE_T yynewbytes; \
- YYCOPY (&yyptr->Stack, Stack, yysize); \
- Stack = &yyptr->Stack; \
- yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
- yyptr += yynewbytes / sizeof (*yyptr); \
- } \
- while (0)
-
-#endif
-
-#if defined (__STDC__) || defined (__cplusplus)
- typedef signed char yysigned_char;
-#else
- typedef short int yysigned_char;
-#endif
-
-/* YYFINAL -- State number of the termination state. */
-#define YYFINAL 5
-/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 752
-
-/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 111
-/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 70
-/* YYNRULES -- Number of rules. */
-#define YYNRULES 175
-/* YYNRULES -- Number of states. */
-#define YYNSTATES 339
-
-/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
-#define YYUNDEFTOK 2
-#define YYMAXUTOK 350
-
-#define YYTRANSLATE(YYX) \
- ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
-
-/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
-static const unsigned char yytranslate[] =
-{
- 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 103, 2, 2,
- 105, 106, 100, 99, 108, 98, 2, 101, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 104,
- 96, 95, 97, 107, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 109, 2, 110, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
- 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
- 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
- 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
- 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
- 102
-};
-
-#if YYDEBUG
-/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
- YYRHS. */
-static const unsigned short int yyprhs[] =
-{
- 0, 0, 3, 6, 8, 11, 14, 17, 20, 23,
- 26, 29, 32, 35, 38, 41, 44, 47, 50, 53,
- 56, 59, 62, 65, 68, 71, 73, 76, 78, 83,
- 85, 87, 89, 91, 93, 95, 97, 101, 105, 109,
- 113, 116, 120, 124, 128, 132, 136, 140, 144, 148,
- 152, 155, 159, 163, 165, 167, 169, 171, 173, 175,
- 177, 179, 181, 183, 185, 186, 188, 192, 199, 204,
- 206, 208, 210, 214, 216, 220, 221, 223, 227, 228,
- 230, 234, 236, 241, 247, 252, 253, 255, 259, 261,
- 265, 267, 268, 271, 272, 275, 276, 281, 282, 284,
- 286, 287, 292, 301, 305, 311, 314, 318, 320, 324,
- 329, 334, 337, 340, 344, 347, 350, 353, 357, 362,
- 364, 367, 368, 371, 373, 381, 388, 399, 401, 403,
- 406, 409, 414, 419, 425, 427, 431, 432, 436, 437,
- 439, 440, 443, 444, 446, 454, 456, 460, 461, 463,
- 464, 466, 477, 480, 483, 485, 487, 489, 491, 493,
- 497, 501, 502, 504, 508, 512, 513, 515, 518, 525,
- 530, 532, 534, 535, 537, 540
-};
-
-/* YYRHS -- A `-1'-separated list of the rules' RHS. */
-static const short int yyrhs[] =
-{
- 112, 0, -1, 180, 104, -1, 118, -1, 119, 104,
- -1, 151, 104, -1, 152, 104, -1, 153, 104, -1,
- 150, 104, -1, 154, 104, -1, 146, 104, -1, 133,
- 104, -1, 135, 104, -1, 145, 104, -1, 143, 104,
- -1, 144, 104, -1, 140, 104, -1, 141, 104, -1,
- 155, 104, -1, 157, 104, -1, 156, 104, -1, 169,
- 104, -1, 170, 104, -1, 164, 104, -1, 168, 104,
- -1, 113, -1, 114, 113, -1, 9, -1, 116, 105,
- 124, 106, -1, 3, -1, 4, -1, 5, -1, 6,
- -1, 7, -1, 8, -1, 66, -1, 115, 99, 115,
- -1, 115, 98, 115, -1, 115, 100, 115, -1, 115,
- 101, 115, -1, 98, 115, -1, 105, 115, 106, -1,
- 115, 95, 115, -1, 115, 96, 115, -1, 115, 97,
- 115, -1, 115, 13, 115, -1, 115, 14, 115, -1,
- 115, 15, 115, -1, 115, 10, 115, -1, 115, 11,
- 115, -1, 12, 115, -1, 9, 103, 70, -1, 66,
- 103, 70, -1, 71, -1, 72, -1, 73, -1, 74,
- -1, 75, -1, 77, -1, 78, -1, 79, -1, 80,
- -1, 83, -1, 84, -1, -1, 107, -1, 117, 108,
- 107, -1, 109, 9, 105, 117, 106, 110, -1, 120,
- 105, 124, 106, -1, 76, -1, 81, -1, 82, -1,
- 9, 105, 106, -1, 9, -1, 122, 108, 9, -1,
- -1, 9, -1, 123, 108, 9, -1, -1, 115, -1,
- 124, 108, 115, -1, 115, -1, 37, 105, 100, 106,
- -1, 37, 105, 38, 9, 106, -1, 36, 105, 115,
- 106, -1, -1, 125, -1, 126, 108, 125, -1, 100,
- -1, 126, 49, 123, -1, 126, -1, -1, 40, 115,
- -1, -1, 41, 51, -1, -1, 92, 17, 93, 94,
- -1, -1, 46, -1, 47, -1, -1, 44, 45, 9,
- 131, -1, 35, 127, 39, 122, 128, 129, 130, 132,
- -1, 48, 49, 9, -1, 134, 50, 105, 124, 106,
- -1, 134, 133, -1, 9, 95, 115, -1, 136, -1,
- 137, 108, 136, -1, 40, 54, 55, 9, -1, 51,
- 9, 52, 137, -1, 139, 128, -1, 139, 138, -1,
- 53, 39, 9, -1, 142, 128, -1, 142, 138, -1,
- 85, 133, -1, 9, 63, 115, -1, 31, 115, 29,
- 114, -1, 147, -1, 148, 147, -1, -1, 30, 114,
- -1, 148, -1, 28, 115, 29, 114, 149, 27, 28,
- -1, 33, 115, 32, 114, 27, 32, -1, 41, 9,
- 17, 115, 42, 115, 32, 114, 27, 32, -1, 90,
- -1, 34, -1, 67, 9, -1, 69, 9, -1, 68,
- 9, 49, 123, -1, 68, 9, 49, 121, -1, 9,
- 171, 160, 161, 162, -1, 158, -1, 159, 108, 158,
- -1, -1, 105, 3, 106, -1, -1, 89, -1, -1,
- 12, 8, -1, -1, 61, -1, 56, 57, 9, 105,
- 159, 106, 163, -1, 9, -1, 165, 108, 9, -1,
- -1, 59, -1, -1, 60, -1, 56, 166, 167, 58,
- 9, 62, 9, 105, 165, 106, -1, 86, 88, -1,
- 87, 88, -1, 21, -1, 22, -1, 24, -1, 19,
- -1, 20, -1, 9, 17, 171, -1, 9, 18, 171,
- -1, -1, 172, -1, 173, 108, 172, -1, 9, 171,
- 104, -1, -1, 174, -1, 175, 174, -1, 64, 65,
- 9, 25, 133, 104, -1, 64, 91, 9, 104, -1,
- 176, -1, 177, -1, -1, 178, -1, 179, 178, -1,
- 16, 9, 105, 173, 106, 25, 175, 179, 26, 114,
- 27, -1
-};
-
-/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
-static const unsigned short int yyrline[] =
-{
- 0, 138, 138, 141, 142, 143, 144, 145, 146, 147,
- 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
- 158, 159, 160, 161, 162, 166, 167, 172, 173, 175,
- 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
- 186, 187, 188, 189, 190, 191, 192, 193, 194, 195,
- 196, 197, 199, 204, 205, 206, 207, 209, 210, 211,
- 212, 213, 214, 215, 218, 220, 221, 225, 230, 235,
- 236, 237, 241, 245, 246, 251, 252, 253, 258, 259,
- 260, 264, 265, 270, 276, 283, 284, 285, 290, 292,
- 294, 298, 299, 303, 304, 309, 310, 315, 316, 317,
- 321, 322, 327, 337, 342, 344, 349, 353, 354, 359,
- 365, 372, 377, 382, 388, 393, 398, 403, 408, 414,
- 415, 420, 421, 423, 427, 434, 440, 448, 452, 456,
- 462, 468, 470, 475, 480, 481, 486, 487, 492, 493,
- 499, 500, 506, 507, 513, 519, 520, 525, 526, 530,
- 531, 535, 543, 548, 553, 554, 555, 556, 557, 561,
- 564, 570, 571, 572, 577, 581, 583, 584, 588, 594,
- 599, 600, 603, 605, 606, 610
-};
-#endif
-
-#if YYDEBUG || YYERROR_VERBOSE
-/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
- First, the terminals, then, starting at YYNTOKENS, nonterminals. */
-static const char *const yytname[] =
-{
- "$end", "error", "$undefined", "PARS_INT_LIT", "PARS_FLOAT_LIT",
- "PARS_STR_LIT", "PARS_FIXBINARY_LIT", "PARS_BLOB_LIT", "PARS_NULL_LIT",
- "PARS_ID_TOKEN", "PARS_AND_TOKEN", "PARS_OR_TOKEN", "PARS_NOT_TOKEN",
- "PARS_GE_TOKEN", "PARS_LE_TOKEN", "PARS_NE_TOKEN",
- "PARS_PROCEDURE_TOKEN", "PARS_IN_TOKEN", "PARS_OUT_TOKEN",
- "PARS_BINARY_TOKEN", "PARS_BLOB_TOKEN", "PARS_INT_TOKEN",
- "PARS_INTEGER_TOKEN", "PARS_FLOAT_TOKEN", "PARS_CHAR_TOKEN",
- "PARS_IS_TOKEN", "PARS_BEGIN_TOKEN", "PARS_END_TOKEN", "PARS_IF_TOKEN",
- "PARS_THEN_TOKEN", "PARS_ELSE_TOKEN", "PARS_ELSIF_TOKEN",
- "PARS_LOOP_TOKEN", "PARS_WHILE_TOKEN", "PARS_RETURN_TOKEN",
- "PARS_SELECT_TOKEN", "PARS_SUM_TOKEN", "PARS_COUNT_TOKEN",
- "PARS_DISTINCT_TOKEN", "PARS_FROM_TOKEN", "PARS_WHERE_TOKEN",
- "PARS_FOR_TOKEN", "PARS_DDOT_TOKEN", "PARS_READ_TOKEN",
- "PARS_ORDER_TOKEN", "PARS_BY_TOKEN", "PARS_ASC_TOKEN", "PARS_DESC_TOKEN",
- "PARS_INSERT_TOKEN", "PARS_INTO_TOKEN", "PARS_VALUES_TOKEN",
- "PARS_UPDATE_TOKEN", "PARS_SET_TOKEN", "PARS_DELETE_TOKEN",
- "PARS_CURRENT_TOKEN", "PARS_OF_TOKEN", "PARS_CREATE_TOKEN",
- "PARS_TABLE_TOKEN", "PARS_INDEX_TOKEN", "PARS_UNIQUE_TOKEN",
- "PARS_CLUSTERED_TOKEN", "PARS_DOES_NOT_FIT_IN_MEM_TOKEN",
- "PARS_ON_TOKEN", "PARS_ASSIGN_TOKEN", "PARS_DECLARE_TOKEN",
- "PARS_CURSOR_TOKEN", "PARS_SQL_TOKEN", "PARS_OPEN_TOKEN",
- "PARS_FETCH_TOKEN", "PARS_CLOSE_TOKEN", "PARS_NOTFOUND_TOKEN",
- "PARS_TO_CHAR_TOKEN", "PARS_TO_NUMBER_TOKEN", "PARS_TO_BINARY_TOKEN",
- "PARS_BINARY_TO_NUMBER_TOKEN", "PARS_SUBSTR_TOKEN", "PARS_REPLSTR_TOKEN",
- "PARS_CONCAT_TOKEN", "PARS_INSTR_TOKEN", "PARS_LENGTH_TOKEN",
- "PARS_SYSDATE_TOKEN", "PARS_PRINTF_TOKEN", "PARS_ASSERT_TOKEN",
- "PARS_RND_TOKEN", "PARS_RND_STR_TOKEN", "PARS_ROW_PRINTF_TOKEN",
- "PARS_COMMIT_TOKEN", "PARS_ROLLBACK_TOKEN", "PARS_WORK_TOKEN",
- "PARS_UNSIGNED_TOKEN", "PARS_EXIT_TOKEN", "PARS_FUNCTION_TOKEN",
- "PARS_LOCK_TOKEN", "PARS_SHARE_TOKEN", "PARS_MODE_TOKEN", "'='", "'<'",
- "'>'", "'-'", "'+'", "'*'", "'/'", "NEG", "'%'", "';'", "'('", "')'",
- "'?'", "','", "'{'", "'}'", "$accept", "top_statement", "statement",
- "statement_list", "exp", "function_name", "question_mark_list",
- "stored_procedure_call", "predefined_procedure_call",
- "predefined_procedure_name", "user_function_call", "table_list",
- "variable_list", "exp_list", "select_item", "select_item_list",
- "select_list", "search_condition", "for_update_clause",
- "lock_shared_clause", "order_direction", "order_by_clause",
- "select_statement", "insert_statement_start", "insert_statement",
- "column_assignment", "column_assignment_list", "cursor_positioned",
- "update_statement_start", "update_statement_searched",
- "update_statement_positioned", "delete_statement_start",
- "delete_statement_searched", "delete_statement_positioned",
- "row_printf_statement", "assignment_statement", "elsif_element",
- "elsif_list", "else_part", "if_statement", "while_statement",
- "for_statement", "exit_statement", "return_statement",
- "open_cursor_statement", "close_cursor_statement", "fetch_statement",
- "column_def", "column_def_list", "opt_column_len", "opt_unsigned",
- "opt_not_null", "not_fit_in_memory", "create_table", "column_list",
- "unique_def", "clustered_def", "create_index", "commit_statement",
- "rollback_statement", "type_name", "parameter_declaration",
- "parameter_declaration_list", "variable_declaration",
- "variable_declaration_list", "cursor_declaration",
- "function_declaration", "declaration", "declaration_list",
- "procedure_definition", 0
-};
-#endif
-
-# ifdef YYPRINT
-/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
- token YYLEX-NUM. */
-static const unsigned short int yytoknum[] =
-{
- 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
- 265, 266, 267, 268, 269, 270, 271, 272, 273, 274,
- 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
- 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
- 295, 296, 297, 298, 299, 300, 301, 302, 303, 304,
- 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
- 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
- 325, 326, 327, 328, 329, 330, 331, 332, 333, 334,
- 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
- 345, 346, 347, 348, 349, 61, 60, 62, 45, 43,
- 42, 47, 350, 37, 59, 40, 41, 63, 44, 123,
- 125
-};
-# endif
-
-/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
-static const unsigned char yyr1[] =
-{
- 0, 111, 112, 113, 113, 113, 113, 113, 113, 113,
- 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
- 113, 113, 113, 113, 113, 114, 114, 115, 115, 115,
- 115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
- 115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
- 115, 115, 115, 116, 116, 116, 116, 116, 116, 116,
- 116, 116, 116, 116, 117, 117, 117, 118, 119, 120,
- 120, 120, 121, 122, 122, 123, 123, 123, 124, 124,
- 124, 125, 125, 125, 125, 126, 126, 126, 127, 127,
- 127, 128, 128, 129, 129, 130, 130, 131, 131, 131,
- 132, 132, 133, 134, 135, 135, 136, 137, 137, 138,
- 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
- 148, 149, 149, 149, 150, 151, 152, 153, 154, 155,
- 156, 157, 157, 158, 159, 159, 160, 160, 161, 161,
- 162, 162, 163, 163, 164, 165, 165, 166, 166, 167,
- 167, 168, 169, 170, 171, 171, 171, 171, 171, 172,
- 172, 173, 173, 173, 174, 175, 175, 175, 176, 177,
- 178, 178, 179, 179, 179, 180
-};
-
-/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
-static const unsigned char yyr2[] =
-{
- 0, 2, 2, 1, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 1, 2, 1, 4, 1,
- 1, 1, 1, 1, 1, 1, 3, 3, 3, 3,
- 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 2, 3, 3, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 0, 1, 3, 6, 4, 1,
- 1, 1, 3, 1, 3, 0, 1, 3, 0, 1,
- 3, 1, 4, 5, 4, 0, 1, 3, 1, 3,
- 1, 0, 2, 0, 2, 0, 4, 0, 1, 1,
- 0, 4, 8, 3, 5, 2, 3, 1, 3, 4,
- 4, 2, 2, 3, 2, 2, 2, 3, 4, 1,
- 2, 0, 2, 1, 7, 6, 10, 1, 1, 2,
- 2, 4, 4, 5, 1, 3, 0, 3, 0, 1,
- 0, 2, 0, 1, 7, 1, 3, 0, 1, 0,
- 1, 10, 2, 2, 1, 1, 1, 1, 1, 3,
- 3, 0, 1, 3, 3, 0, 1, 2, 6, 4,
- 1, 1, 0, 1, 2, 11
-};
-
-/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
- STATE-NUM when YYTABLE doesn't specify something else to do. Zero
- means the default is an error. */
-static const unsigned char yydefact[] =
-{
- 0, 0, 0, 0, 0, 1, 2, 161, 0, 162,
- 0, 0, 0, 0, 0, 157, 158, 154, 155, 156,
- 159, 160, 165, 163, 0, 166, 172, 0, 0, 167,
- 170, 171, 173, 0, 164, 0, 0, 0, 174, 0,
- 0, 0, 0, 0, 128, 85, 0, 0, 0, 0,
- 147, 0, 0, 0, 69, 70, 71, 0, 0, 0,
- 127, 0, 25, 0, 3, 0, 0, 0, 0, 0,
- 91, 0, 0, 91, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 169, 0, 29, 30, 31, 32, 33, 34, 27,
- 0, 35, 53, 54, 55, 56, 57, 58, 59, 60,
- 61, 62, 63, 0, 0, 0, 0, 0, 0, 0,
- 88, 81, 86, 90, 0, 0, 0, 0, 0, 0,
- 148, 149, 129, 0, 130, 116, 152, 153, 0, 175,
- 26, 4, 78, 11, 0, 105, 12, 0, 111, 112,
- 16, 17, 114, 115, 14, 15, 13, 10, 8, 5,
- 6, 7, 9, 18, 20, 19, 23, 24, 21, 22,
- 0, 117, 0, 50, 0, 40, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 78, 0, 0, 0, 75, 0, 0, 0, 103, 0,
- 113, 0, 150, 0, 75, 64, 79, 0, 78, 0,
- 92, 168, 51, 52, 41, 48, 49, 45, 46, 47,
- 121, 42, 43, 44, 37, 36, 38, 39, 0, 0,
- 0, 0, 0, 76, 89, 87, 73, 91, 0, 0,
- 107, 110, 0, 0, 76, 132, 131, 65, 0, 68,
- 0, 0, 0, 0, 0, 119, 123, 0, 28, 0,
- 84, 0, 82, 0, 0, 0, 93, 0, 0, 0,
- 0, 134, 0, 0, 0, 0, 0, 80, 104, 109,
- 122, 0, 120, 0, 125, 83, 77, 74, 0, 95,
- 0, 106, 108, 136, 142, 0, 0, 72, 67, 66,
- 0, 124, 94, 0, 100, 0, 0, 138, 143, 144,
- 135, 0, 118, 0, 0, 102, 0, 0, 139, 140,
- 0, 0, 0, 0, 137, 0, 133, 145, 0, 96,
- 97, 126, 141, 151, 0, 98, 99, 101, 146
-};
-
-/* YYDEFGOTO[NTERM-NUM]. */
-static const short int yydefgoto[] =
-{
- -1, 2, 62, 63, 206, 116, 248, 64, 65, 66,
- 245, 237, 234, 207, 122, 123, 124, 148, 289, 304,
- 337, 315, 67, 68, 69, 240, 241, 149, 70, 71,
- 72, 73, 74, 75, 76, 77, 255, 256, 257, 78,
- 79, 80, 81, 82, 83, 84, 85, 271, 272, 307,
- 319, 326, 309, 86, 328, 131, 203, 87, 88, 89,
- 20, 9, 10, 25, 26, 30, 31, 32, 33, 3
-};
-
-/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
- STATE-NUM. */
-#define YYPACT_NINF -177
-static const short int yypact[] =
-{
- 28, 38, 54, -46, -29, -177, -177, 56, 50, -177,
- -75, 8, 8, 46, 56, -177, -177, -177, -177, -177,
- -177, -177, 63, -177, 8, -177, 2, -26, -51, -177,
- -177, -177, -177, -13, -177, 71, 72, 587, -177, 57,
- -21, 26, 272, 272, -177, 13, 91, 55, 96, 67,
- -22, 99, 100, 103, -177, -177, -177, 75, 29, 35,
- -177, 116, -177, 396, -177, 22, 23, 27, -9, 30,
- 87, 31, 32, 87, 47, 49, 52, 58, 59, 60,
- 61, 62, 65, 66, 74, 77, 78, 86, 89, 102,
- 75, -177, 272, -177, -177, -177, -177, -177, -177, 39,
- 272, 51, -177, -177, -177, -177, -177, -177, -177, -177,
- -177, -177, -177, 272, 272, 361, 25, 489, 45, 90,
- -177, 651, -177, -39, 93, 142, 124, 108, 152, 170,
- -177, 131, -177, 143, -177, -177, -177, -177, 98, -177,
- -177, -177, 272, -177, 110, -177, -177, 256, -177, -177,
- -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
- -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
- 112, 651, 137, 101, 147, 204, 88, 272, 272, 272,
- 272, 272, 587, 272, 272, 272, 272, 272, 272, 272,
- 272, 587, 272, -30, 211, 168, 212, 272, -177, 213,
- -177, 118, -177, 167, 217, 122, 651, -63, 272, 175,
- 651, -177, -177, -177, -177, 101, 101, 21, 21, 651,
- 332, 21, 21, 21, -6, -6, 204, 204, -60, 460,
- 198, 222, 126, -177, 125, -177, -177, -33, 584, 140,
- -177, 128, 228, 229, 139, -177, 125, -177, -53, -177,
- 272, -49, 240, 587, 272, -177, 224, 226, -177, 225,
- -177, 150, -177, 258, 272, 260, 230, 272, 272, 213,
- 8, -177, -45, 208, 166, 164, 176, 651, -177, -177,
- 587, 631, -177, 254, -177, -177, -177, -177, 234, 194,
- 638, 651, -177, 182, 227, 228, 280, -177, -177, -177,
- 587, -177, -177, 273, 247, 587, 289, 214, -177, -177,
- -177, 195, 587, 209, 261, -177, 524, 199, -177, 295,
- 292, 215, 299, 279, -177, 304, -177, -177, -44, -177,
- -8, -177, -177, -177, 305, -177, -177, -177, -177
-};
-
-/* YYPGOTO[NTERM-NUM]. */
-static const short int yypgoto[] =
-{
- -177, -177, -62, -176, -40, -177, -177, -177, -177, -177,
- -177, -177, 109, -166, 120, -177, -177, -69, -177, -177,
- -177, -177, -34, -177, -177, 48, -177, 243, -177, -177,
- -177, -177, -177, -177, -177, -177, 64, -177, -177, -177,
- -177, -177, -177, -177, -177, -177, -177, 24, -177, -177,
- -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
- -12, 307, -177, 297, -177, -177, -177, 285, -177, -177
-};
-
-/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
- positive, shift that token. If negative, reduce the rule which
- number is the opposite. If zero, do what YYDEFACT says.
- If YYTABLE_NINF, syntax error. */
-#define YYTABLE_NINF -1
-static const unsigned short int yytable[] =
-{
- 21, 140, 115, 117, 152, 121, 220, 264, 231, 181,
- 194, 24, 27, 37, 35, 229, 93, 94, 95, 96,
- 97, 98, 99, 135, 228, 100, 45, 15, 16, 17,
- 18, 13, 19, 14, 145, 129, 181, 130, 335, 336,
- 36, 144, 251, 249, 1, 250, 258, 4, 250, 118,
- 119, 28, 171, 275, 5, 276, 170, 278, 6, 250,
- 173, 294, 333, 295, 334, 8, 28, 11, 12, 195,
- 232, 22, 24, 175, 176, 265, 7, 280, 34, 101,
- 39, 40, 90, 91, 102, 103, 104, 105, 106, 92,
- 107, 108, 109, 110, 188, 189, 111, 112, 177, 178,
- 125, 179, 180, 181, 126, 127, 128, 210, 132, 133,
- 45, 113, 134, 120, 179, 180, 181, 136, 114, 186,
- 187, 188, 189, 137, 312, 138, 141, 147, 142, 316,
- 190, 143, 196, 198, 146, 150, 151, 215, 216, 217,
- 218, 219, 172, 221, 222, 223, 224, 225, 226, 227,
- 192, 154, 230, 155, 174, 121, 156, 238, 140, 197,
- 199, 200, 157, 158, 159, 160, 161, 140, 266, 162,
- 163, 93, 94, 95, 96, 97, 98, 99, 164, 201,
- 100, 165, 166, 183, 184, 185, 186, 187, 188, 189,
- 167, 202, 204, 168, 214, 193, 183, 184, 185, 186,
- 187, 188, 189, 205, 118, 119, 169, 212, 177, 178,
- 277, 179, 180, 181, 281, 208, 211, 213, 140, 181,
- 233, 236, 239, 242, 210, 243, 244, 290, 291, 247,
- 252, 261, 262, 263, 101, 268, 269, 270, 273, 102,
- 103, 104, 105, 106, 274, 107, 108, 109, 110, 279,
- 140, 111, 112, 283, 140, 254, 285, 284, 293, 93,
- 94, 95, 96, 97, 98, 99, 113, 286, 100, 287,
- 296, 288, 297, 114, 298, 93, 94, 95, 96, 97,
- 98, 99, 301, 299, 100, 302, 303, 306, 308, 311,
- 313, 314, 317, 183, 184, 185, 186, 187, 188, 189,
- 320, 327, 321, 318, 260, 324, 322, 325, 330, 329,
- 209, 331, 332, 246, 338, 235, 153, 292, 38, 310,
- 282, 23, 101, 29, 0, 0, 0, 102, 103, 104,
- 105, 106, 0, 107, 108, 109, 110, 0, 101, 111,
- 112, 41, 0, 102, 103, 104, 105, 106, 0, 107,
- 108, 109, 110, 0, 113, 111, 112, 0, 0, 0,
- 42, 114, 253, 254, 0, 43, 44, 45, 0, 0,
- 113, 177, 178, 46, 179, 180, 181, 114, 0, 0,
- 47, 0, 0, 48, 0, 49, 0, 0, 50, 0,
- 182, 0, 0, 0, 0, 0, 0, 0, 0, 51,
- 52, 53, 0, 0, 0, 41, 0, 0, 54, 0,
- 0, 0, 0, 55, 56, 0, 0, 57, 58, 59,
- 0, 0, 60, 139, 42, 0, 0, 0, 0, 43,
- 44, 45, 0, 0, 0, 0, 0, 46, 0, 0,
- 0, 61, 0, 0, 47, 0, 0, 48, 0, 49,
- 0, 0, 50, 0, 0, 0, 183, 184, 185, 186,
- 187, 188, 189, 51, 52, 53, 0, 0, 0, 41,
- 0, 0, 54, 0, 0, 0, 0, 55, 56, 0,
- 0, 57, 58, 59, 0, 0, 60, 259, 42, 0,
- 0, 0, 0, 43, 44, 45, 0, 0, 0, 177,
- 178, 46, 179, 180, 181, 61, 0, 0, 47, 0,
- 0, 48, 0, 49, 0, 0, 50, 0, 0, 0,
- 0, 191, 0, 0, 0, 0, 0, 51, 52, 53,
- 0, 0, 0, 41, 0, 0, 54, 0, 0, 0,
- 0, 55, 56, 0, 0, 57, 58, 59, 0, 0,
- 60, 323, 42, 0, 0, 0, 0, 43, 44, 45,
- 0, 0, 0, 0, 0, 46, 0, 0, 0, 61,
- 0, 0, 47, 0, 0, 48, 0, 49, 0, 0,
- 50, 0, 0, 0, 183, 184, 185, 186, 187, 188,
- 189, 51, 52, 53, 177, 178, 41, 179, 180, 181,
- 54, 0, 0, 0, 0, 55, 56, 0, 0, 57,
- 58, 59, 0, 0, 60, 42, 0, 0, 0, 0,
- 43, 44, 45, 0, 0, 0, 267, 0, 46, 0,
- 0, 0, 0, 61, 0, 47, 0, 0, 48, 0,
- 49, 177, 178, 50, 179, 180, 181, 0, 177, 178,
- 0, 179, 180, 181, 51, 52, 53, 0, 0, 0,
- 300, 177, 178, 54, 179, 180, 181, 0, 55, 56,
- 305, 0, 57, 58, 59, 0, 0, 60, 0, 183,
- 184, 185, 186, 187, 188, 189, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 61, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 183, 184, 185, 186,
- 187, 188, 189, 183, 184, 185, 186, 187, 188, 189,
- 0, 0, 0, 0, 0, 0, 183, 184, 185, 186,
- 187, 188, 189
-};
-
-static const short int yycheck[] =
-{
- 12, 63, 42, 43, 73, 45, 182, 40, 38, 15,
- 49, 9, 24, 26, 65, 191, 3, 4, 5, 6,
- 7, 8, 9, 57, 190, 12, 35, 19, 20, 21,
- 22, 106, 24, 108, 68, 57, 15, 59, 46, 47,
- 91, 50, 208, 106, 16, 108, 106, 9, 108, 36,
- 37, 64, 92, 106, 0, 108, 90, 106, 104, 108,
- 100, 106, 106, 108, 108, 9, 64, 17, 18, 108,
- 100, 25, 9, 113, 114, 108, 105, 253, 104, 66,
- 9, 9, 25, 104, 71, 72, 73, 74, 75, 63,
- 77, 78, 79, 80, 100, 101, 83, 84, 10, 11,
- 9, 13, 14, 15, 49, 9, 39, 147, 9, 9,
- 35, 98, 9, 100, 13, 14, 15, 88, 105, 98,
- 99, 100, 101, 88, 300, 9, 104, 40, 105, 305,
- 105, 104, 39, 9, 104, 104, 104, 177, 178, 179,
- 180, 181, 103, 183, 184, 185, 186, 187, 188, 189,
- 105, 104, 192, 104, 103, 195, 104, 197, 220, 17,
- 52, 9, 104, 104, 104, 104, 104, 229, 237, 104,
- 104, 3, 4, 5, 6, 7, 8, 9, 104, 9,
- 12, 104, 104, 95, 96, 97, 98, 99, 100, 101,
- 104, 60, 49, 104, 106, 105, 95, 96, 97, 98,
- 99, 100, 101, 105, 36, 37, 104, 70, 10, 11,
- 250, 13, 14, 15, 254, 105, 104, 70, 280, 15,
- 9, 9, 9, 105, 264, 58, 9, 267, 268, 107,
- 55, 9, 106, 108, 66, 95, 108, 9, 9, 71,
- 72, 73, 74, 75, 105, 77, 78, 79, 80, 9,
- 312, 83, 84, 27, 316, 31, 106, 32, 270, 3,
- 4, 5, 6, 7, 8, 9, 98, 9, 12, 9,
- 62, 41, 106, 105, 110, 3, 4, 5, 6, 7,
- 8, 9, 28, 107, 12, 51, 92, 105, 61, 9,
- 17, 44, 3, 95, 96, 97, 98, 99, 100, 101,
- 105, 9, 93, 89, 106, 106, 45, 12, 9, 94,
- 54, 32, 8, 204, 9, 195, 73, 269, 33, 295,
- 256, 14, 66, 26, -1, -1, -1, 71, 72, 73,
- 74, 75, -1, 77, 78, 79, 80, -1, 66, 83,
- 84, 9, -1, 71, 72, 73, 74, 75, -1, 77,
- 78, 79, 80, -1, 98, 83, 84, -1, -1, -1,
- 28, 105, 30, 31, -1, 33, 34, 35, -1, -1,
- 98, 10, 11, 41, 13, 14, 15, 105, -1, -1,
- 48, -1, -1, 51, -1, 53, -1, -1, 56, -1,
- 29, -1, -1, -1, -1, -1, -1, -1, -1, 67,
- 68, 69, -1, -1, -1, 9, -1, -1, 76, -1,
- -1, -1, -1, 81, 82, -1, -1, 85, 86, 87,
- -1, -1, 90, 27, 28, -1, -1, -1, -1, 33,
- 34, 35, -1, -1, -1, -1, -1, 41, -1, -1,
- -1, 109, -1, -1, 48, -1, -1, 51, -1, 53,
- -1, -1, 56, -1, -1, -1, 95, 96, 97, 98,
- 99, 100, 101, 67, 68, 69, -1, -1, -1, 9,
- -1, -1, 76, -1, -1, -1, -1, 81, 82, -1,
- -1, 85, 86, 87, -1, -1, 90, 27, 28, -1,
- -1, -1, -1, 33, 34, 35, -1, -1, -1, 10,
- 11, 41, 13, 14, 15, 109, -1, -1, 48, -1,
- -1, 51, -1, 53, -1, -1, 56, -1, -1, -1,
- -1, 32, -1, -1, -1, -1, -1, 67, 68, 69,
- -1, -1, -1, 9, -1, -1, 76, -1, -1, -1,
- -1, 81, 82, -1, -1, 85, 86, 87, -1, -1,
- 90, 27, 28, -1, -1, -1, -1, 33, 34, 35,
- -1, -1, -1, -1, -1, 41, -1, -1, -1, 109,
- -1, -1, 48, -1, -1, 51, -1, 53, -1, -1,
- 56, -1, -1, -1, 95, 96, 97, 98, 99, 100,
- 101, 67, 68, 69, 10, 11, 9, 13, 14, 15,
- 76, -1, -1, -1, -1, 81, 82, -1, -1, 85,
- 86, 87, -1, -1, 90, 28, -1, -1, -1, -1,
- 33, 34, 35, -1, -1, -1, 42, -1, 41, -1,
- -1, -1, -1, 109, -1, 48, -1, -1, 51, -1,
- 53, 10, 11, 56, 13, 14, 15, -1, 10, 11,
- -1, 13, 14, 15, 67, 68, 69, -1, -1, -1,
- 29, 10, 11, 76, 13, 14, 15, -1, 81, 82,
- 32, -1, 85, 86, 87, -1, -1, 90, -1, 95,
- 96, 97, 98, 99, 100, 101, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 109, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 95, 96, 97, 98,
- 99, 100, 101, 95, 96, 97, 98, 99, 100, 101,
- -1, -1, -1, -1, -1, -1, 95, 96, 97, 98,
- 99, 100, 101
-};
-
-/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
- symbol of state STATE-NUM. */
-static const unsigned char yystos[] =
-{
- 0, 16, 112, 180, 9, 0, 104, 105, 9, 172,
- 173, 17, 18, 106, 108, 19, 20, 21, 22, 24,
- 171, 171, 25, 172, 9, 174, 175, 171, 64, 174,
- 176, 177, 178, 179, 104, 65, 91, 26, 178, 9,
- 9, 9, 28, 33, 34, 35, 41, 48, 51, 53,
- 56, 67, 68, 69, 76, 81, 82, 85, 86, 87,
- 90, 109, 113, 114, 118, 119, 120, 133, 134, 135,
- 139, 140, 141, 142, 143, 144, 145, 146, 150, 151,
- 152, 153, 154, 155, 156, 157, 164, 168, 169, 170,
- 25, 104, 63, 3, 4, 5, 6, 7, 8, 9,
- 12, 66, 71, 72, 73, 74, 75, 77, 78, 79,
- 80, 83, 84, 98, 105, 115, 116, 115, 36, 37,
- 100, 115, 125, 126, 127, 9, 49, 9, 39, 57,
- 59, 166, 9, 9, 9, 133, 88, 88, 9, 27,
- 113, 104, 105, 104, 50, 133, 104, 40, 128, 138,
- 104, 104, 128, 138, 104, 104, 104, 104, 104, 104,
- 104, 104, 104, 104, 104, 104, 104, 104, 104, 104,
- 133, 115, 103, 115, 103, 115, 115, 10, 11, 13,
- 14, 15, 29, 95, 96, 97, 98, 99, 100, 101,
- 105, 32, 105, 105, 49, 108, 39, 17, 9, 52,
- 9, 9, 60, 167, 49, 105, 115, 124, 105, 54,
- 115, 104, 70, 70, 106, 115, 115, 115, 115, 115,
- 114, 115, 115, 115, 115, 115, 115, 115, 124, 114,
- 115, 38, 100, 9, 123, 125, 9, 122, 115, 9,
- 136, 137, 105, 58, 9, 121, 123, 107, 117, 106,
- 108, 124, 55, 30, 31, 147, 148, 149, 106, 27,
- 106, 9, 106, 108, 40, 108, 128, 42, 95, 108,
- 9, 158, 159, 9, 105, 106, 108, 115, 106, 9,
- 114, 115, 147, 27, 32, 106, 9, 9, 41, 129,
- 115, 115, 136, 171, 106, 108, 62, 106, 110, 107,
- 29, 28, 51, 92, 130, 32, 105, 160, 61, 163,
- 158, 9, 114, 17, 44, 132, 114, 3, 89, 161,
- 105, 93, 45, 27, 106, 12, 162, 9, 165, 94,
- 9, 32, 8, 106, 108, 46, 47, 131, 9
-};
-
-#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
-# define YYSIZE_T __SIZE_TYPE__
-#endif
-#if ! defined (YYSIZE_T) && defined (size_t)
-# define YYSIZE_T size_t
-#endif
-#if ! defined (YYSIZE_T)
-# if defined (__STDC__) || defined (__cplusplus)
-# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
-# define YYSIZE_T size_t
-# endif
-#endif
-#if ! defined (YYSIZE_T)
-# define YYSIZE_T unsigned int
-#endif
-
-#define yyerrok (yyerrstatus = 0)
-#define yyclearin (yychar = YYEMPTY)
-#define YYEMPTY (-2)
-#define YYEOF 0
-
-#define YYACCEPT goto yyacceptlab
-#define YYABORT goto yyabortlab
-#define YYERROR goto yyerrorlab
-
-
-/* Like YYERROR except do call yyerror. This remains here temporarily
- to ease the transition to the new meaning of YYERROR, for GCC.
- Once GCC version 2 has supplanted version 1, this can go. */
-
-#define YYFAIL goto yyerrlab
-
-#define YYRECOVERING() (!!yyerrstatus)
-
-#define YYBACKUP(Token, Value) \
-do \
- if (yychar == YYEMPTY && yylen == 1) \
- { \
- yychar = (Token); \
- yylval = (Value); \
- yytoken = YYTRANSLATE (yychar); \
- YYPOPSTACK; \
- goto yybackup; \
- } \
- else \
- { \
- yyerror ("syntax error: cannot back up");\
- YYERROR; \
- } \
-while (0)
-
-
-#define YYTERROR 1
-#define YYERRCODE 256
-
-
-/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
- If N is 0, then set CURRENT to the empty location which ends
- the previous symbol: RHS[0] (always defined). */
-
-#define YYRHSLOC(Rhs, K) ((Rhs)[K])
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N) \
- do \
- if (N) \
- { \
- (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
- (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
- (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
- (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
- } \
- else \
- { \
- (Current).first_line = (Current).last_line = \
- YYRHSLOC (Rhs, 0).last_line; \
- (Current).first_column = (Current).last_column = \
- YYRHSLOC (Rhs, 0).last_column; \
- } \
- while (0)
-#endif
-
-
-/* YY_LOCATION_PRINT -- Print the location on the stream.
- This macro was not mandated originally: define only if we know
- we won't break user code: when these are the locations we know. */
-
-#ifndef YY_LOCATION_PRINT
-# if YYLTYPE_IS_TRIVIAL
-# define YY_LOCATION_PRINT(File, Loc) \
- fprintf (File, "%d.%d-%d.%d", \
- (Loc).first_line, (Loc).first_column, \
- (Loc).last_line, (Loc).last_column)
-# else
-# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
-# endif
-#endif
-
-
-/* YYLEX -- calling `yylex' with the right arguments. */
-
-#ifdef YYLEX_PARAM
-# define YYLEX yylex (YYLEX_PARAM)
-#else
-# define YYLEX yylex ()
-#endif
-
-/* Enable debugging if requested. */
-#if YYDEBUG
-
-# ifndef YYFPRINTF
-# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
-# define YYFPRINTF fprintf
-# endif
-
-# define YYDPRINTF(Args) \
-do { \
- if (yydebug) \
- YYFPRINTF Args; \
-} while (0)
-
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
-do { \
- if (yydebug) \
- { \
- YYFPRINTF (stderr, "%s ", Title); \
- yysymprint (stderr, \
- Type, Value); \
- YYFPRINTF (stderr, "\n"); \
- } \
-} while (0)
-
-/*------------------------------------------------------------------.
-| yy_stack_print -- Print the state stack from its BOTTOM up to its |
-| TOP (included). |
-`------------------------------------------------------------------*/
-
-#if defined (__STDC__) || defined (__cplusplus)
-static void
-yy_stack_print (short int *bottom, short int *top)
-#else
-static void
-yy_stack_print (bottom, top)
- short int *bottom;
- short int *top;
-#endif
-{
- YYFPRINTF (stderr, "Stack now");
- for (/* Nothing. */; bottom <= top; ++bottom)
- YYFPRINTF (stderr, " %d", *bottom);
- YYFPRINTF (stderr, "\n");
-}
-
-# define YY_STACK_PRINT(Bottom, Top) \
-do { \
- if (yydebug) \
- yy_stack_print ((Bottom), (Top)); \
-} while (0)
-
-
-/*------------------------------------------------.
-| Report that the YYRULE is going to be reduced. |
-`------------------------------------------------*/
-
-#if defined (__STDC__) || defined (__cplusplus)
-static void
-yy_reduce_print (int yyrule)
-#else
-static void
-yy_reduce_print (yyrule)
- int yyrule;
-#endif
-{
- int yyi;
- unsigned int yylno = yyrline[yyrule];
- YYFPRINTF (stderr, "Reducing stack by rule %d (line %u), ",
- yyrule - 1, yylno);
- /* Print the symbols being reduced, and their result. */
- for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++)
- YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]);
- YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]);
-}
-
-# define YY_REDUCE_PRINT(Rule) \
-do { \
- if (yydebug) \
- yy_reduce_print (Rule); \
-} while (0)
-
-/* Nonzero means print parse trace. It is left uninitialized so that
- multiple parsers can coexist. */
-int yydebug;
-#else /* !YYDEBUG */
-# define YYDPRINTF(Args)
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
-# define YY_STACK_PRINT(Bottom, Top)
-# define YY_REDUCE_PRINT(Rule)
-#endif /* !YYDEBUG */
-
-
-/* YYINITDEPTH -- initial size of the parser's stacks. */
-#ifndef YYINITDEPTH
-# define YYINITDEPTH 200
-#endif
-
-/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
- if the built-in stack extension method is used).
-
- Do not make this value too large; the results are undefined if
- SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
- evaluated with infinite-precision integer arithmetic. */
-
-#ifndef YYMAXDEPTH
-# define YYMAXDEPTH 10000
-#endif
-
-
-
-#if YYERROR_VERBOSE
-
-# ifndef yystrlen
-# if defined (__GLIBC__) && defined (_STRING_H)
-# define yystrlen strlen
-# else
-/* Return the length of YYSTR. */
-static YYSIZE_T
-# if defined (__STDC__) || defined (__cplusplus)
-yystrlen (const char *yystr)
-# else
-yystrlen (yystr)
- const char *yystr;
-# endif
-{
- register const char *yys = yystr;
-
- while (*yys++ != '\0')
- continue;
-
- return yys - yystr - 1;
-}
-# endif
-# endif
-
-# ifndef yystpcpy
-# if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE)
-# define yystpcpy stpcpy
-# else
-/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
- YYDEST. */
-static char *
-# if defined (__STDC__) || defined (__cplusplus)
-yystpcpy (char *yydest, const char *yysrc)
-# else
-yystpcpy (yydest, yysrc)
- char *yydest;
- const char *yysrc;
-# endif
-{
- register char *yyd = yydest;
- register const char *yys = yysrc;
-
- while ((*yyd++ = *yys++) != '\0')
- continue;
-
- return yyd - 1;
-}
-# endif
-# endif
-
-#endif /* !YYERROR_VERBOSE */
-
-
-
-#if YYDEBUG
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-#if defined (__STDC__) || defined (__cplusplus)
-static void
-yysymprint (FILE *yyoutput, int yytype, YYSTYPE *yyvaluep)
-#else
-static void
-yysymprint (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE *yyvaluep;
-#endif
-{
- /* Pacify ``unused variable'' warnings. */
- (void) yyvaluep;
-
- if (yytype < YYNTOKENS)
- YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
- else
- YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
-
-
-# ifdef YYPRINT
- if (yytype < YYNTOKENS)
- YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
-# endif
- switch (yytype)
- {
- default:
- break;
- }
- YYFPRINTF (yyoutput, ")");
-}
-
-#endif /* ! YYDEBUG */
-/*-----------------------------------------------.
-| Release the memory associated to this symbol. |
-`-----------------------------------------------*/
-
-#if defined (__STDC__) || defined (__cplusplus)
-static void
-yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
-#else
-static void
-yydestruct (yymsg, yytype, yyvaluep)
- const char *yymsg;
- int yytype;
- YYSTYPE *yyvaluep;
-#endif
-{
- /* Pacify ``unused variable'' warnings. */
- (void) yyvaluep;
-
- if (!yymsg)
- yymsg = "Deleting";
- YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
-
- switch (yytype)
- {
-
- default:
- break;
- }
-}
-
-
-/* Prevent warnings from -Wmissing-prototypes. */
-
-#ifdef YYPARSE_PARAM
-# if defined (__STDC__) || defined (__cplusplus)
-UNIV_INTERN int yyparse (void *YYPARSE_PARAM);
-# else
-UNIV_INTERN int yyparse ();
-# endif
-#else /* ! YYPARSE_PARAM */
-#if defined (__STDC__) || defined (__cplusplus)
-UNIV_INTERN int yyparse (void);
-#else
-UNIV_INTERN int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
-
-
-
-/* The look-ahead symbol. */
-static int yychar;
-
-/* The semantic value of the look-ahead symbol. */
-UNIV_INTERN YYSTYPE yylval;
-
-/* Number of syntax errors so far. */
-static int yynerrs;
-
-
-
-/*----------.
-| yyparse. |
-`----------*/
-
-#ifdef YYPARSE_PARAM
-# if defined (__STDC__) || defined (__cplusplus)
-UNIV_INTERN int yyparse (void *YYPARSE_PARAM)
-# else
-UNIV_INTERN int yyparse (YYPARSE_PARAM)
- void *YYPARSE_PARAM;
-# endif
-#else /* ! YYPARSE_PARAM */
-#if defined (__STDC__) || defined (__cplusplus)
-int
-yyparse (void)
-#else
-int
-yyparse ()
-
-#endif
-#endif
-{
-
- register int yystate;
- register int yyn;
- int yyresult;
- /* Number of tokens to shift before error messages enabled. */
- int yyerrstatus;
- /* Look-ahead token as an internal (translated) token number. */
- int yytoken = 0;
-
- /* Three stacks and their tools:
- `yyss': related to states,
- `yyvs': related to semantic values,
- `yyls': related to locations.
-
- Refer to the stacks thru separate pointers, to allow yyoverflow
- to reallocate them elsewhere. */
-
- /* The state stack. */
- short int yyssa[YYINITDEPTH];
- short int *yyss = yyssa;
- register short int *yyssp;
-
- /* The semantic value stack. */
- YYSTYPE yyvsa[YYINITDEPTH];
- YYSTYPE *yyvs = yyvsa;
- register YYSTYPE *yyvsp;
-
-
-
-#define YYPOPSTACK (yyvsp--, yyssp--)
-
- YYSIZE_T yystacksize = YYINITDEPTH;
-
- /* The variables used to return semantic value and location from the
- action routines. */
- YYSTYPE yyval;
-
-
- /* When reducing, the number of symbols on the RHS of the reduced
- rule. */
- int yylen;
-
- YYDPRINTF ((stderr, "Starting parse\n"));
-
- yystate = 0;
- yyerrstatus = 0;
- yynerrs = 0;
- yychar = YYEMPTY; /* Cause a token to be read. */
-
- /* Initialize stack pointers.
- Waste one element of value and location stack
- so that they stay on the same level as the state stack.
- The wasted elements are never initialized. */
-
- yyssp = yyss;
- yyvsp = yyvs;
-
-
- yyvsp[0] = yylval;
-
- goto yysetstate;
-
-/*------------------------------------------------------------.
-| yynewstate -- Push a new state, which is found in yystate. |
-`------------------------------------------------------------*/
- yynewstate:
- /* In all cases, when you get here, the value and location stacks
- have just been pushed. so pushing a state here evens the stacks.
- */
- yyssp++;
-
- yysetstate:
- *yyssp = yystate;
-
- if (yyss + yystacksize - 1 <= yyssp)
- {
- /* Get the current used size of the three stacks, in elements. */
- YYSIZE_T yysize = yyssp - yyss + 1;
-
-#ifdef yyoverflow
- {
- /* Give user a chance to reallocate the stack. Use copies of
- these so that the &'s don't force the real ones into
- memory. */
- YYSTYPE *yyvs1 = yyvs;
- short int *yyss1 = yyss;
-
-
- /* Each stack pointer address is followed by the size of the
- data in use in that stack, in bytes. This used to be a
- conditional around just the two extra args, but that might
- be undefined if yyoverflow is a macro. */
- yyoverflow ("parser stack overflow",
- &yyss1, yysize * sizeof (*yyssp),
- &yyvs1, yysize * sizeof (*yyvsp),
-
- &yystacksize);
-
- yyss = yyss1;
- yyvs = yyvs1;
- }
-#else /* no yyoverflow */
-# ifndef YYSTACK_RELOCATE
- goto yyoverflowlab;
-# else
- /* Extend the stack our own way. */
- if (YYMAXDEPTH <= yystacksize)
- goto yyoverflowlab;
- yystacksize *= 2;
- if (YYMAXDEPTH < yystacksize)
- yystacksize = YYMAXDEPTH;
-
- {
- short int *yyss1 = yyss;
- union yyalloc *yyptr =
- (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
- if (! yyptr)
- goto yyoverflowlab;
- YYSTACK_RELOCATE (yyss);
- YYSTACK_RELOCATE (yyvs);
-
-# undef YYSTACK_RELOCATE
- if (yyss1 != yyssa)
- YYSTACK_FREE (yyss1);
- }
-# endif
-#endif /* no yyoverflow */
-
- yyssp = yyss + yysize - 1;
- yyvsp = yyvs + yysize - 1;
-
-
- YYDPRINTF ((stderr, "Stack size increased to %lu\n",
- (unsigned long int) yystacksize));
-
- if (yyss + yystacksize - 1 <= yyssp)
- YYABORT;
- }
-
- YYDPRINTF ((stderr, "Entering state %d\n", yystate));
-
- goto yybackup;
-
-/*-----------.
-| yybackup. |
-`-----------*/
-yybackup:
-
-/* Do appropriate processing given the current state. */
-/* Read a look-ahead token if we need one and don't already have one. */
-/* yyresume: */
-
- /* First try to decide what to do without reference to look-ahead token. */
-
- yyn = yypact[yystate];
- if (yyn == YYPACT_NINF)
- goto yydefault;
-
- /* Not known => get a look-ahead token if don't already have one. */
-
- /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */
- if (yychar == YYEMPTY)
- {
- YYDPRINTF ((stderr, "Reading a token: "));
- yychar = YYLEX;
- }
-
- if (yychar <= YYEOF)
- {
- yychar = yytoken = YYEOF;
- YYDPRINTF ((stderr, "Now at end of input.\n"));
- }
- else
- {
- yytoken = YYTRANSLATE (yychar);
- YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
- }
-
- /* If the proper action on seeing token YYTOKEN is to reduce or to
- detect an error, take that action. */
- yyn += yytoken;
- if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
- goto yydefault;
- yyn = yytable[yyn];
- if (yyn <= 0)
- {
- if (yyn == 0 || yyn == YYTABLE_NINF)
- goto yyerrlab;
- yyn = -yyn;
- goto yyreduce;
- }
-
- if (yyn == YYFINAL)
- YYACCEPT;
-
- /* Shift the look-ahead token. */
- YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
-
- /* Discard the token being shifted unless it is eof. */
- if (yychar != YYEOF)
- yychar = YYEMPTY;
-
- *++yyvsp = yylval;
-
-
- /* Count tokens shifted since error; after three, turn off error
- status. */
- if (yyerrstatus)
- yyerrstatus--;
-
- yystate = yyn;
- goto yynewstate;
-
-
-/*-----------------------------------------------------------.
-| yydefault -- do the default action for the current state. |
-`-----------------------------------------------------------*/
-yydefault:
- yyn = yydefact[yystate];
- if (yyn == 0)
- goto yyerrlab;
- goto yyreduce;
-
-
-/*-----------------------------.
-| yyreduce -- Do a reduction. |
-`-----------------------------*/
-yyreduce:
- /* yyn is the number of a rule to reduce with. */
- yylen = yyr2[yyn];
-
- /* If YYLEN is nonzero, implement the default value of the action:
- `$$ = $1'.
-
- Otherwise, the following line sets YYVAL to garbage.
- This behavior is undocumented and Bison
- users should not rely upon it. Assigning to YYVAL
- unconditionally makes the parser a bit smaller, and it avoids a
- GCC warning that YYVAL may be used uninitialized. */
- yyval = yyvsp[1-yylen];
-
-
- YY_REDUCE_PRINT (yyn);
- switch (yyn)
- {
- case 25:
-#line 166 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 26:
-#line 168 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-1]), (yyvsp[0])); ;}
- break;
-
- case 27:
-#line 172 "pars0grm.y"
- { (yyval) = (yyvsp[0]);;}
- break;
-
- case 28:
-#line 174 "pars0grm.y"
- { (yyval) = pars_func((yyvsp[-3]), (yyvsp[-1])); ;}
- break;
-
- case 29:
-#line 175 "pars0grm.y"
- { (yyval) = (yyvsp[0]);;}
- break;
-
- case 30:
-#line 176 "pars0grm.y"
- { (yyval) = (yyvsp[0]);;}
- break;
-
- case 31:
-#line 177 "pars0grm.y"
- { (yyval) = (yyvsp[0]);;}
- break;
-
- case 32:
-#line 178 "pars0grm.y"
- { (yyval) = (yyvsp[0]);;}
- break;
-
- case 33:
-#line 179 "pars0grm.y"
- { (yyval) = (yyvsp[0]);;}
- break;
-
- case 34:
-#line 180 "pars0grm.y"
- { (yyval) = (yyvsp[0]);;}
- break;
-
- case 35:
-#line 181 "pars0grm.y"
- { (yyval) = (yyvsp[0]);;}
- break;
-
- case 36:
-#line 182 "pars0grm.y"
- { (yyval) = pars_op('+', (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 37:
-#line 183 "pars0grm.y"
- { (yyval) = pars_op('-', (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 38:
-#line 184 "pars0grm.y"
- { (yyval) = pars_op('*', (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 39:
-#line 185 "pars0grm.y"
- { (yyval) = pars_op('/', (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 40:
-#line 186 "pars0grm.y"
- { (yyval) = pars_op('-', (yyvsp[0]), NULL); ;}
- break;
-
- case 41:
-#line 187 "pars0grm.y"
- { (yyval) = (yyvsp[-1]); ;}
- break;
-
- case 42:
-#line 188 "pars0grm.y"
- { (yyval) = pars_op('=', (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 43:
-#line 189 "pars0grm.y"
- { (yyval) = pars_op('<', (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 44:
-#line 190 "pars0grm.y"
- { (yyval) = pars_op('>', (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 45:
-#line 191 "pars0grm.y"
- { (yyval) = pars_op(PARS_GE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 46:
-#line 192 "pars0grm.y"
- { (yyval) = pars_op(PARS_LE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 47:
-#line 193 "pars0grm.y"
- { (yyval) = pars_op(PARS_NE_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 48:
-#line 194 "pars0grm.y"
- { (yyval) = pars_op(PARS_AND_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 49:
-#line 195 "pars0grm.y"
- { (yyval) = pars_op(PARS_OR_TOKEN, (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 50:
-#line 196 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOT_TOKEN, (yyvsp[0]), NULL); ;}
- break;
-
- case 51:
-#line 198 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[-2]), NULL); ;}
- break;
-
- case 52:
-#line 200 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[-2]), NULL); ;}
- break;
-
- case 53:
-#line 204 "pars0grm.y"
- { (yyval) = &pars_to_char_token; ;}
- break;
-
- case 54:
-#line 205 "pars0grm.y"
- { (yyval) = &pars_to_number_token; ;}
- break;
-
- case 55:
-#line 206 "pars0grm.y"
- { (yyval) = &pars_to_binary_token; ;}
- break;
-
- case 56:
-#line 208 "pars0grm.y"
- { (yyval) = &pars_binary_to_number_token; ;}
- break;
-
- case 57:
-#line 209 "pars0grm.y"
- { (yyval) = &pars_substr_token; ;}
- break;
-
- case 58:
-#line 210 "pars0grm.y"
- { (yyval) = &pars_concat_token; ;}
- break;
-
- case 59:
-#line 211 "pars0grm.y"
- { (yyval) = &pars_instr_token; ;}
- break;
-
- case 60:
-#line 212 "pars0grm.y"
- { (yyval) = &pars_length_token; ;}
- break;
-
- case 61:
-#line 213 "pars0grm.y"
- { (yyval) = &pars_sysdate_token; ;}
- break;
-
- case 62:
-#line 214 "pars0grm.y"
- { (yyval) = &pars_rnd_token; ;}
- break;
-
- case 63:
-#line 215 "pars0grm.y"
- { (yyval) = &pars_rnd_str_token; ;}
- break;
-
- case 67:
-#line 226 "pars0grm.y"
- { (yyval) = pars_stored_procedure_call((yyvsp[-4])); ;}
- break;
-
- case 68:
-#line 231 "pars0grm.y"
- { (yyval) = pars_procedure_call((yyvsp[-3]), (yyvsp[-1])); ;}
- break;
-
- case 69:
-#line 235 "pars0grm.y"
- { (yyval) = &pars_replstr_token; ;}
- break;
-
- case 70:
-#line 236 "pars0grm.y"
- { (yyval) = &pars_printf_token; ;}
- break;
-
- case 71:
-#line 237 "pars0grm.y"
- { (yyval) = &pars_assert_token; ;}
- break;
-
- case 72:
-#line 241 "pars0grm.y"
- { (yyval) = (yyvsp[-2]); ;}
- break;
-
- case 73:
-#line 245 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 74:
-#line 247 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 75:
-#line 251 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 76:
-#line 252 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 77:
-#line 254 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 78:
-#line 258 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 79:
-#line 259 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0]));;}
- break;
-
- case 80:
-#line 260 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 81:
-#line 264 "pars0grm.y"
- { (yyval) = (yyvsp[0]); ;}
- break;
-
- case 82:
-#line 266 "pars0grm.y"
- { (yyval) = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- sym_tab_add_int_lit(
- pars_sym_tab_global, 1))); ;}
- break;
-
- case 83:
-#line 271 "pars0grm.y"
- { (yyval) = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- pars_func(&pars_distinct_token,
- que_node_list_add_last(
- NULL, (yyvsp[-1]))))); ;}
- break;
-
- case 84:
-#line 277 "pars0grm.y"
- { (yyval) = pars_func(&pars_sum_token,
- que_node_list_add_last(NULL,
- (yyvsp[-1]))); ;}
- break;
-
- case 85:
-#line 283 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 86:
-#line 284 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 87:
-#line 286 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 88:
-#line 290 "pars0grm.y"
- { (yyval) = pars_select_list(&pars_star_denoter,
- NULL); ;}
- break;
-
- case 89:
-#line 293 "pars0grm.y"
- { (yyval) = pars_select_list((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 90:
-#line 294 "pars0grm.y"
- { (yyval) = pars_select_list((yyvsp[0]), NULL); ;}
- break;
-
- case 91:
-#line 298 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 92:
-#line 299 "pars0grm.y"
- { (yyval) = (yyvsp[0]); ;}
- break;
-
- case 93:
-#line 303 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 94:
-#line 305 "pars0grm.y"
- { (yyval) = &pars_update_token; ;}
- break;
-
- case 95:
-#line 309 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 96:
-#line 311 "pars0grm.y"
- { yyval = &pars_share_token; ;}
- break;
-
- case 97:
-#line 315 "pars0grm.y"
- { (yyval) = &pars_asc_token; ;}
- break;
-
- case 98:
-#line 316 "pars0grm.y"
- { (yyval) = &pars_asc_token; ;}
- break;
-
- case 99:
-#line 317 "pars0grm.y"
- { (yyval) = &pars_desc_token; ;}
- break;
-
- case 100:
-#line 321 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 101:
-#line 323 "pars0grm.y"
- { (yyval) = pars_order_by((yyvsp[-1]), (yyvsp[0])); ;}
- break;
-
- case 102:
-#line 332 "pars0grm.y"
- { (yyval) = pars_select_statement((yyvsp[-6]), (yyvsp[-4]), (yyvsp[-3]),
- (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); ;}
- break;
-
- case 103:
-#line 338 "pars0grm.y"
- { (yyval) = (yyvsp[0]); ;}
- break;
-
- case 104:
-#line 343 "pars0grm.y"
- { (yyval) = pars_insert_statement((yyvsp[-4]), (yyvsp[-1]), NULL); ;}
- break;
-
- case 105:
-#line 345 "pars0grm.y"
- { (yyval) = pars_insert_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
- break;
-
- case 106:
-#line 349 "pars0grm.y"
- { (yyval) = pars_column_assignment((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 107:
-#line 353 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 108:
-#line 355 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 109:
-#line 361 "pars0grm.y"
- { (yyval) = (yyvsp[0]); ;}
- break;
-
- case 110:
-#line 367 "pars0grm.y"
- { (yyval) = pars_update_statement_start(FALSE,
- (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 111:
-#line 373 "pars0grm.y"
- { (yyval) = pars_update_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
- break;
-
- case 112:
-#line 378 "pars0grm.y"
- { (yyval) = pars_update_statement((yyvsp[-1]), (yyvsp[0]), NULL); ;}
- break;
-
- case 113:
-#line 383 "pars0grm.y"
- { (yyval) = pars_update_statement_start(TRUE,
- (yyvsp[0]), NULL); ;}
- break;
-
- case 114:
-#line 389 "pars0grm.y"
- { (yyval) = pars_update_statement((yyvsp[-1]), NULL, (yyvsp[0])); ;}
- break;
-
- case 115:
-#line 394 "pars0grm.y"
- { (yyval) = pars_update_statement((yyvsp[-1]), (yyvsp[0]), NULL); ;}
- break;
-
- case 116:
-#line 399 "pars0grm.y"
- { (yyval) = pars_row_printf_statement((yyvsp[0])); ;}
- break;
-
- case 117:
-#line 404 "pars0grm.y"
- { (yyval) = pars_assignment_statement((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 118:
-#line 410 "pars0grm.y"
- { (yyval) = pars_elsif_element((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 119:
-#line 414 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 120:
-#line 416 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-1]), (yyvsp[0])); ;}
- break;
-
- case 121:
-#line 420 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 122:
-#line 422 "pars0grm.y"
- { (yyval) = (yyvsp[0]); ;}
- break;
-
- case 123:
-#line 423 "pars0grm.y"
- { (yyval) = (yyvsp[0]); ;}
- break;
-
- case 124:
-#line 430 "pars0grm.y"
- { (yyval) = pars_if_statement((yyvsp[-5]), (yyvsp[-3]), (yyvsp[-2])); ;}
- break;
-
- case 125:
-#line 436 "pars0grm.y"
- { (yyval) = pars_while_statement((yyvsp[-4]), (yyvsp[-2])); ;}
- break;
-
- case 126:
-#line 444 "pars0grm.y"
- { (yyval) = pars_for_statement((yyvsp[-8]), (yyvsp[-6]), (yyvsp[-4]), (yyvsp[-2])); ;}
- break;
-
- case 127:
-#line 448 "pars0grm.y"
- { (yyval) = pars_exit_statement(); ;}
- break;
-
- case 128:
-#line 452 "pars0grm.y"
- { (yyval) = pars_return_statement(); ;}
- break;
-
- case 129:
-#line 457 "pars0grm.y"
- { (yyval) = pars_open_statement(
- ROW_SEL_OPEN_CURSOR, (yyvsp[0])); ;}
- break;
-
- case 130:
-#line 463 "pars0grm.y"
- { (yyval) = pars_open_statement(
- ROW_SEL_CLOSE_CURSOR, (yyvsp[0])); ;}
- break;
-
- case 131:
-#line 469 "pars0grm.y"
- { (yyval) = pars_fetch_statement((yyvsp[-2]), (yyvsp[0]), NULL); ;}
- break;
-
- case 132:
-#line 471 "pars0grm.y"
- { (yyval) = pars_fetch_statement((yyvsp[-2]), NULL, (yyvsp[0])); ;}
- break;
-
- case 133:
-#line 476 "pars0grm.y"
- { (yyval) = pars_column_def((yyvsp[-4]), (yyvsp[-3]), (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); ;}
- break;
-
- case 134:
-#line 480 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 135:
-#line 482 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 136:
-#line 486 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 137:
-#line 488 "pars0grm.y"
- { (yyval) = (yyvsp[-1]); ;}
- break;
-
- case 138:
-#line 492 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 139:
-#line 494 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 140:
-#line 499 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 141:
-#line 501 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 142:
-#line 506 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 143:
-#line 508 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 144:
-#line 515 "pars0grm.y"
- { (yyval) = pars_create_table((yyvsp[-4]), (yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 145:
-#line 519 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 146:
-#line 521 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 147:
-#line 525 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 148:
-#line 526 "pars0grm.y"
- { (yyval) = &pars_unique_token; ;}
- break;
-
- case 149:
-#line 530 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 150:
-#line 531 "pars0grm.y"
- { (yyval) = &pars_clustered_token; ;}
- break;
-
- case 151:
-#line 539 "pars0grm.y"
- { (yyval) = pars_create_index((yyvsp[-8]), (yyvsp[-7]), (yyvsp[-5]), (yyvsp[-3]), (yyvsp[-1])); ;}
- break;
-
- case 152:
-#line 544 "pars0grm.y"
- { (yyval) = pars_commit_statement(); ;}
- break;
-
- case 153:
-#line 549 "pars0grm.y"
- { (yyval) = pars_rollback_statement(); ;}
- break;
-
- case 154:
-#line 553 "pars0grm.y"
- { (yyval) = &pars_int_token; ;}
- break;
-
- case 155:
-#line 554 "pars0grm.y"
- { (yyval) = &pars_int_token; ;}
- break;
-
- case 156:
-#line 555 "pars0grm.y"
- { (yyval) = &pars_char_token; ;}
- break;
-
- case 157:
-#line 556 "pars0grm.y"
- { (yyval) = &pars_binary_token; ;}
- break;
-
- case 158:
-#line 557 "pars0grm.y"
- { (yyval) = &pars_blob_token; ;}
- break;
-
- case 159:
-#line 562 "pars0grm.y"
- { (yyval) = pars_parameter_declaration((yyvsp[-2]),
- PARS_INPUT, (yyvsp[0])); ;}
- break;
-
- case 160:
-#line 565 "pars0grm.y"
- { (yyval) = pars_parameter_declaration((yyvsp[-2]),
- PARS_OUTPUT, (yyvsp[0])); ;}
- break;
-
- case 161:
-#line 570 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 162:
-#line 571 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[0])); ;}
- break;
-
- case 163:
-#line 573 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[-2]), (yyvsp[0])); ;}
- break;
-
- case 164:
-#line 578 "pars0grm.y"
- { (yyval) = pars_variable_declaration((yyvsp[-2]), (yyvsp[-1])); ;}
- break;
-
- case 168:
-#line 590 "pars0grm.y"
- { (yyval) = pars_cursor_declaration((yyvsp[-3]), (yyvsp[-1])); ;}
- break;
-
- case 169:
-#line 595 "pars0grm.y"
- { (yyval) = pars_function_declaration((yyvsp[-1])); ;}
- break;
-
- case 175:
-#line 616 "pars0grm.y"
- { (yyval) = pars_procedure_definition((yyvsp[-9]), (yyvsp[-7]),
- (yyvsp[-1])); ;}
- break;
-
-
- }
-
-/* Line 1010 of yacc.c. */
-#line 2345 "pars0grm.c"
-
- yyvsp -= yylen;
- yyssp -= yylen;
-
-
- YY_STACK_PRINT (yyss, yyssp);
-
- *++yyvsp = yyval;
-
-
- /* Now `shift' the result of the reduction. Determine what state
- that goes to, based on the state we popped back to and the rule
- number reduced by. */
-
- yyn = yyr1[yyn];
-
- yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
- if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
- yystate = yytable[yystate];
- else
- yystate = yydefgoto[yyn - YYNTOKENS];
-
- goto yynewstate;
-
-
-/*------------------------------------.
-| yyerrlab -- here on detecting error |
-`------------------------------------*/
-yyerrlab:
- /* If not already recovering from an error, report this error. */
- if (!yyerrstatus)
- {
- ++yynerrs;
-#if YYERROR_VERBOSE
- yyn = yypact[yystate];
-
- if (YYPACT_NINF < yyn && yyn < YYLAST)
- {
- YYSIZE_T yysize = 0;
- int yytype = YYTRANSLATE (yychar);
- const char* yyprefix;
- char *yymsg;
- int yyx;
-
- /* Start YYX at -YYN if negative to avoid negative indexes in
- YYCHECK. */
- int yyxbegin = yyn < 0 ? -yyn : 0;
-
- /* Stay within bounds of both yycheck and yytname. */
- int yychecklim = YYLAST - yyn;
- int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
- int yycount = 0;
-
- yyprefix = ", expecting ";
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
- {
- yysize += yystrlen (yyprefix) + yystrlen (yytname [yyx]);
- yycount += 1;
- if (yycount == 5)
- {
- yysize = 0;
- break;
- }
- }
- yysize += (sizeof ("syntax error, unexpected ")
- + yystrlen (yytname[yytype]));
- yymsg = (char *) YYSTACK_ALLOC (yysize);
- if (yymsg != 0)
- {
- char *yyp = yystpcpy (yymsg, "syntax error, unexpected ");
- yyp = yystpcpy (yyp, yytname[yytype]);
-
- if (yycount < 5)
- {
- yyprefix = ", expecting ";
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
- {
- yyp = yystpcpy (yyp, yyprefix);
- yyp = yystpcpy (yyp, yytname[yyx]);
- yyprefix = " or ";
- }
- }
- yyerror (yymsg);
- YYSTACK_FREE (yymsg);
- }
- else
- yyerror ("syntax error; also virtual memory exhausted");
- }
- else
-#endif /* YYERROR_VERBOSE */
- yyerror ("syntax error");
- }
-
-
-
- if (yyerrstatus == 3)
- {
- /* If just tried and failed to reuse look-ahead token after an
- error, discard it. */
-
- if (yychar <= YYEOF)
- {
- /* If at end of input, pop the error token,
- then the rest of the stack, then return failure. */
- if (yychar == YYEOF)
- for (;;)
- {
-
- YYPOPSTACK;
- if (yyssp == yyss)
- YYABORT;
- yydestruct ("Error: popping",
- yystos[*yyssp], yyvsp);
- }
- }
- else
- {
- yydestruct ("Error: discarding", yytoken, &yylval);
- yychar = YYEMPTY;
- }
- }
-
- /* Else will try to reuse look-ahead token after shifting the error
- token. */
- goto yyerrlab1;
-
-
-/*---------------------------------------------------.
-| yyerrorlab -- error raised explicitly by YYERROR. |
-`---------------------------------------------------*/
-yyerrorlab:
-
-#ifdef __GNUC__
- /* Pacify GCC when the user code never invokes YYERROR and the label
- yyerrorlab therefore never appears in user code. */
- if (0)
- goto yyerrorlab;
-#endif
-
-yyvsp -= yylen;
- yyssp -= yylen;
- yystate = *yyssp;
- goto yyerrlab1;
-
-
-/*-------------------------------------------------------------.
-| yyerrlab1 -- common code for both syntax error and YYERROR. |
-`-------------------------------------------------------------*/
-yyerrlab1:
- yyerrstatus = 3; /* Each real token shifted decrements this. */
-
- for (;;)
- {
- yyn = yypact[yystate];
- if (yyn != YYPACT_NINF)
- {
- yyn += YYTERROR;
- if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
- {
- yyn = yytable[yyn];
- if (0 < yyn)
- break;
- }
- }
-
- /* Pop the current state because it cannot handle the error token. */
- if (yyssp == yyss)
- YYABORT;
-
-
- yydestruct ("Error: popping", yystos[yystate], yyvsp);
- YYPOPSTACK;
- yystate = *yyssp;
- YY_STACK_PRINT (yyss, yyssp);
- }
-
- if (yyn == YYFINAL)
- YYACCEPT;
-
- *++yyvsp = yylval;
-
-
- /* Shift the error token. */
- YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
-
- yystate = yyn;
- goto yynewstate;
-
-
-/*-------------------------------------.
-| yyacceptlab -- YYACCEPT comes here. |
-`-------------------------------------*/
-yyacceptlab:
- yyresult = 0;
- goto yyreturn;
-
-/*-----------------------------------.
-| yyabortlab -- YYABORT comes here. |
-`-----------------------------------*/
-yyabortlab:
- yydestruct ("Error: discarding lookahead",
- yytoken, &yylval);
- yychar = YYEMPTY;
- yyresult = 1;
- goto yyreturn;
-
-#ifndef yyoverflow
-/*----------------------------------------------.
-| yyoverflowlab -- parser overflow comes here. |
-`----------------------------------------------*/
-yyoverflowlab:
- yyerror ("parser stack overflow");
- yyresult = 2;
- /* Fall through. */
-#endif
-
-yyreturn:
-#ifndef yyoverflow
- if (yyss != yyssa)
- YYSTACK_FREE (yyss);
-#endif
- return yyresult;
-}
-
-
-#line 620 "pars0grm.y"
-
-
diff --git a/storage/innodb_plugin/pars/pars0grm.y b/storage/innodb_plugin/pars/pars0grm.y
deleted file mode 100644
index 14d64f1826f..00000000000
--- a/storage/innodb_plugin/pars/pars0grm.y
+++ /dev/null
@@ -1,635 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************
-SQL parser: input file for the GNU Bison parser generator
-
-Look from pars0lex.l for instructions how to generate the C files for
-the InnoDB parser.
-
-Created 12/14/1997 Heikki Tuuri
-*******************************************************/
-
-%{
-/* The value of the semantic attribute is a pointer to a query tree node
-que_node_t */
-
-#include "univ.i"
-#include <math.h> /* Can't be before univ.i */
-#include "pars0pars.h"
-#include "mem0mem.h"
-#include "que0types.h"
-#include "que0que.h"
-#include "row0sel.h"
-
-#define YYSTYPE que_node_t*
-
-/* #define __STDC__ */
-
-int
-yylex(void);
-%}
-
-%token PARS_INT_LIT
-%token PARS_FLOAT_LIT
-%token PARS_STR_LIT
-%token PARS_FIXBINARY_LIT
-%token PARS_BLOB_LIT
-%token PARS_NULL_LIT
-%token PARS_ID_TOKEN
-%token PARS_AND_TOKEN
-%token PARS_OR_TOKEN
-%token PARS_NOT_TOKEN
-%token PARS_GE_TOKEN
-%token PARS_LE_TOKEN
-%token PARS_NE_TOKEN
-%token PARS_PROCEDURE_TOKEN
-%token PARS_IN_TOKEN
-%token PARS_OUT_TOKEN
-%token PARS_BINARY_TOKEN
-%token PARS_BLOB_TOKEN
-%token PARS_INT_TOKEN
-%token PARS_INTEGER_TOKEN
-%token PARS_FLOAT_TOKEN
-%token PARS_CHAR_TOKEN
-%token PARS_IS_TOKEN
-%token PARS_BEGIN_TOKEN
-%token PARS_END_TOKEN
-%token PARS_IF_TOKEN
-%token PARS_THEN_TOKEN
-%token PARS_ELSE_TOKEN
-%token PARS_ELSIF_TOKEN
-%token PARS_LOOP_TOKEN
-%token PARS_WHILE_TOKEN
-%token PARS_RETURN_TOKEN
-%token PARS_SELECT_TOKEN
-%token PARS_SUM_TOKEN
-%token PARS_COUNT_TOKEN
-%token PARS_DISTINCT_TOKEN
-%token PARS_FROM_TOKEN
-%token PARS_WHERE_TOKEN
-%token PARS_FOR_TOKEN
-%token PARS_DDOT_TOKEN
-%token PARS_READ_TOKEN
-%token PARS_ORDER_TOKEN
-%token PARS_BY_TOKEN
-%token PARS_ASC_TOKEN
-%token PARS_DESC_TOKEN
-%token PARS_INSERT_TOKEN
-%token PARS_INTO_TOKEN
-%token PARS_VALUES_TOKEN
-%token PARS_UPDATE_TOKEN
-%token PARS_SET_TOKEN
-%token PARS_DELETE_TOKEN
-%token PARS_CURRENT_TOKEN
-%token PARS_OF_TOKEN
-%token PARS_CREATE_TOKEN
-%token PARS_TABLE_TOKEN
-%token PARS_INDEX_TOKEN
-%token PARS_UNIQUE_TOKEN
-%token PARS_CLUSTERED_TOKEN
-%token PARS_DOES_NOT_FIT_IN_MEM_TOKEN
-%token PARS_ON_TOKEN
-%token PARS_ASSIGN_TOKEN
-%token PARS_DECLARE_TOKEN
-%token PARS_CURSOR_TOKEN
-%token PARS_SQL_TOKEN
-%token PARS_OPEN_TOKEN
-%token PARS_FETCH_TOKEN
-%token PARS_CLOSE_TOKEN
-%token PARS_NOTFOUND_TOKEN
-%token PARS_TO_CHAR_TOKEN
-%token PARS_TO_NUMBER_TOKEN
-%token PARS_TO_BINARY_TOKEN
-%token PARS_BINARY_TO_NUMBER_TOKEN
-%token PARS_SUBSTR_TOKEN
-%token PARS_REPLSTR_TOKEN
-%token PARS_CONCAT_TOKEN
-%token PARS_INSTR_TOKEN
-%token PARS_LENGTH_TOKEN
-%token PARS_SYSDATE_TOKEN
-%token PARS_PRINTF_TOKEN
-%token PARS_ASSERT_TOKEN
-%token PARS_RND_TOKEN
-%token PARS_RND_STR_TOKEN
-%token PARS_ROW_PRINTF_TOKEN
-%token PARS_COMMIT_TOKEN
-%token PARS_ROLLBACK_TOKEN
-%token PARS_WORK_TOKEN
-%token PARS_UNSIGNED_TOKEN
-%token PARS_EXIT_TOKEN
-%token PARS_FUNCTION_TOKEN
-%token PARS_LOCK_TOKEN
-%token PARS_SHARE_TOKEN
-%token PARS_MODE_TOKEN
-
-%left PARS_AND_TOKEN PARS_OR_TOKEN
-%left PARS_NOT_TOKEN
-%left '=' '<' '>' PARS_GE_TOKEN PARS_LE_TOKEN
-%left '-' '+'
-%left '*' '/'
-%left NEG /* negation--unary minus */
-%left '%'
-
-/* Grammar follows */
-%%
-
-top_statement:
- procedure_definition ';'
-
-statement:
- stored_procedure_call
- | predefined_procedure_call ';'
- | while_statement ';'
- | for_statement ';'
- | exit_statement ';'
- | if_statement ';'
- | return_statement ';'
- | assignment_statement ';'
- | select_statement ';'
- | insert_statement ';'
- | row_printf_statement ';'
- | delete_statement_searched ';'
- | delete_statement_positioned ';'
- | update_statement_searched ';'
- | update_statement_positioned ';'
- | open_cursor_statement ';'
- | fetch_statement ';'
- | close_cursor_statement ';'
- | commit_statement ';'
- | rollback_statement ';'
- | create_table ';'
- | create_index ';'
-;
-
-statement_list:
- statement { $$ = que_node_list_add_last(NULL, $1); }
- | statement_list statement
- { $$ = que_node_list_add_last($1, $2); }
-;
-
-exp:
- PARS_ID_TOKEN { $$ = $1;}
- | function_name '(' exp_list ')'
- { $$ = pars_func($1, $3); }
- | PARS_INT_LIT { $$ = $1;}
- | PARS_FLOAT_LIT { $$ = $1;}
- | PARS_STR_LIT { $$ = $1;}
- | PARS_FIXBINARY_LIT { $$ = $1;}
- | PARS_BLOB_LIT { $$ = $1;}
- | PARS_NULL_LIT { $$ = $1;}
- | PARS_SQL_TOKEN { $$ = $1;}
- | exp '+' exp { $$ = pars_op('+', $1, $3); }
- | exp '-' exp { $$ = pars_op('-', $1, $3); }
- | exp '*' exp { $$ = pars_op('*', $1, $3); }
- | exp '/' exp { $$ = pars_op('/', $1, $3); }
- | '-' exp %prec NEG { $$ = pars_op('-', $2, NULL); }
- | '(' exp ')' { $$ = $2; }
- | exp '=' exp { $$ = pars_op('=', $1, $3); }
- | exp '<' exp { $$ = pars_op('<', $1, $3); }
- | exp '>' exp { $$ = pars_op('>', $1, $3); }
- | exp PARS_GE_TOKEN exp { $$ = pars_op(PARS_GE_TOKEN, $1, $3); }
- | exp PARS_LE_TOKEN exp { $$ = pars_op(PARS_LE_TOKEN, $1, $3); }
- | exp PARS_NE_TOKEN exp { $$ = pars_op(PARS_NE_TOKEN, $1, $3); }
- | exp PARS_AND_TOKEN exp{ $$ = pars_op(PARS_AND_TOKEN, $1, $3); }
- | exp PARS_OR_TOKEN exp { $$ = pars_op(PARS_OR_TOKEN, $1, $3); }
- | PARS_NOT_TOKEN exp { $$ = pars_op(PARS_NOT_TOKEN, $2, NULL); }
- | PARS_ID_TOKEN '%' PARS_NOTFOUND_TOKEN
- { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); }
- | PARS_SQL_TOKEN '%' PARS_NOTFOUND_TOKEN
- { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); }
-;
-
-function_name:
- PARS_TO_CHAR_TOKEN { $$ = &pars_to_char_token; }
- | PARS_TO_NUMBER_TOKEN { $$ = &pars_to_number_token; }
- | PARS_TO_BINARY_TOKEN { $$ = &pars_to_binary_token; }
- | PARS_BINARY_TO_NUMBER_TOKEN
- { $$ = &pars_binary_to_number_token; }
- | PARS_SUBSTR_TOKEN { $$ = &pars_substr_token; }
- | PARS_CONCAT_TOKEN { $$ = &pars_concat_token; }
- | PARS_INSTR_TOKEN { $$ = &pars_instr_token; }
- | PARS_LENGTH_TOKEN { $$ = &pars_length_token; }
- | PARS_SYSDATE_TOKEN { $$ = &pars_sysdate_token; }
- | PARS_RND_TOKEN { $$ = &pars_rnd_token; }
- | PARS_RND_STR_TOKEN { $$ = &pars_rnd_str_token; }
-;
-
-question_mark_list:
- /* Nothing */
- | '?'
- | question_mark_list ',' '?'
-;
-
-stored_procedure_call:
- '{' PARS_ID_TOKEN '(' question_mark_list ')' '}'
- { $$ = pars_stored_procedure_call($2); }
-;
-
-predefined_procedure_call:
- predefined_procedure_name '(' exp_list ')'
- { $$ = pars_procedure_call($1, $3); }
-;
-
-predefined_procedure_name:
- PARS_REPLSTR_TOKEN { $$ = &pars_replstr_token; }
- | PARS_PRINTF_TOKEN { $$ = &pars_printf_token; }
- | PARS_ASSERT_TOKEN { $$ = &pars_assert_token; }
-;
-
-user_function_call:
- PARS_ID_TOKEN '(' ')' { $$ = $1; }
-;
-
-table_list:
- PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
- | table_list ',' PARS_ID_TOKEN
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-variable_list:
- /* Nothing */ { $$ = NULL; }
- | PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
- | variable_list ',' PARS_ID_TOKEN
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-exp_list:
- /* Nothing */ { $$ = NULL; }
- | exp { $$ = que_node_list_add_last(NULL, $1);}
- | exp_list ',' exp { $$ = que_node_list_add_last($1, $3); }
-;
-
-select_item:
- exp { $$ = $1; }
- | PARS_COUNT_TOKEN '(' '*' ')'
- { $$ = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- sym_tab_add_int_lit(
- pars_sym_tab_global, 1))); }
- | PARS_COUNT_TOKEN '(' PARS_DISTINCT_TOKEN PARS_ID_TOKEN ')'
- { $$ = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- pars_func(&pars_distinct_token,
- que_node_list_add_last(
- NULL, $4)))); }
- | PARS_SUM_TOKEN '(' exp ')'
- { $$ = pars_func(&pars_sum_token,
- que_node_list_add_last(NULL,
- $3)); }
-;
-
-select_item_list:
- /* Nothing */ { $$ = NULL; }
- | select_item { $$ = que_node_list_add_last(NULL, $1); }
- | select_item_list ',' select_item
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-select_list:
- '*' { $$ = pars_select_list(&pars_star_denoter,
- NULL); }
- | select_item_list PARS_INTO_TOKEN variable_list
- { $$ = pars_select_list($1, $3); }
- | select_item_list { $$ = pars_select_list($1, NULL); }
-;
-
-search_condition:
- /* Nothing */ { $$ = NULL; }
- | PARS_WHERE_TOKEN exp { $$ = $2; }
-;
-
-for_update_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_FOR_TOKEN PARS_UPDATE_TOKEN
- { $$ = &pars_update_token; }
-;
-
-lock_shared_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_LOCK_TOKEN PARS_IN_TOKEN PARS_SHARE_TOKEN PARS_MODE_TOKEN
- { $$ = &pars_share_token; }
-;
-
-order_direction:
- /* Nothing */ { $$ = &pars_asc_token; }
- | PARS_ASC_TOKEN { $$ = &pars_asc_token; }
- | PARS_DESC_TOKEN { $$ = &pars_desc_token; }
-;
-
-order_by_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_ORDER_TOKEN PARS_BY_TOKEN PARS_ID_TOKEN order_direction
- { $$ = pars_order_by($3, $4); }
-;
-
-select_statement:
- PARS_SELECT_TOKEN select_list
- PARS_FROM_TOKEN table_list
- search_condition
- for_update_clause
- lock_shared_clause
- order_by_clause { $$ = pars_select_statement($2, $4, $5,
- $6, $7, $8); }
-;
-
-insert_statement_start:
- PARS_INSERT_TOKEN PARS_INTO_TOKEN
- PARS_ID_TOKEN { $$ = $3; }
-;
-
-insert_statement:
- insert_statement_start PARS_VALUES_TOKEN '(' exp_list ')'
- { $$ = pars_insert_statement($1, $4, NULL); }
- | insert_statement_start select_statement
- { $$ = pars_insert_statement($1, NULL, $2); }
-;
-
-column_assignment:
- PARS_ID_TOKEN '=' exp { $$ = pars_column_assignment($1, $3); }
-;
-
-column_assignment_list:
- column_assignment { $$ = que_node_list_add_last(NULL, $1); }
- | column_assignment_list ',' column_assignment
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-cursor_positioned:
- PARS_WHERE_TOKEN
- PARS_CURRENT_TOKEN PARS_OF_TOKEN
- PARS_ID_TOKEN { $$ = $4; }
-;
-
-update_statement_start:
- PARS_UPDATE_TOKEN PARS_ID_TOKEN
- PARS_SET_TOKEN
- column_assignment_list { $$ = pars_update_statement_start(FALSE,
- $2, $4); }
-;
-
-update_statement_searched:
- update_statement_start
- search_condition { $$ = pars_update_statement($1, NULL, $2); }
-;
-
-update_statement_positioned:
- update_statement_start
- cursor_positioned { $$ = pars_update_statement($1, $2, NULL); }
-;
-
-delete_statement_start:
- PARS_DELETE_TOKEN PARS_FROM_TOKEN
- PARS_ID_TOKEN { $$ = pars_update_statement_start(TRUE,
- $3, NULL); }
-;
-
-delete_statement_searched:
- delete_statement_start
- search_condition { $$ = pars_update_statement($1, NULL, $2); }
-;
-
-delete_statement_positioned:
- delete_statement_start
- cursor_positioned { $$ = pars_update_statement($1, $2, NULL); }
-;
-
-row_printf_statement:
- PARS_ROW_PRINTF_TOKEN select_statement
- { $$ = pars_row_printf_statement($2); }
-;
-
-assignment_statement:
- PARS_ID_TOKEN PARS_ASSIGN_TOKEN exp
- { $$ = pars_assignment_statement($1, $3); }
-;
-
-elsif_element:
- PARS_ELSIF_TOKEN
- exp PARS_THEN_TOKEN statement_list
- { $$ = pars_elsif_element($2, $4); }
-;
-
-elsif_list:
- elsif_element { $$ = que_node_list_add_last(NULL, $1); }
- | elsif_list elsif_element
- { $$ = que_node_list_add_last($1, $2); }
-;
-
-else_part:
- /* Nothing */ { $$ = NULL; }
- | PARS_ELSE_TOKEN statement_list
- { $$ = $2; }
- | elsif_list { $$ = $1; }
-;
-
-if_statement:
- PARS_IF_TOKEN exp PARS_THEN_TOKEN statement_list
- else_part
- PARS_END_TOKEN PARS_IF_TOKEN
- { $$ = pars_if_statement($2, $4, $5); }
-;
-
-while_statement:
- PARS_WHILE_TOKEN exp PARS_LOOP_TOKEN statement_list
- PARS_END_TOKEN PARS_LOOP_TOKEN
- { $$ = pars_while_statement($2, $4); }
-;
-
-for_statement:
- PARS_FOR_TOKEN PARS_ID_TOKEN PARS_IN_TOKEN
- exp PARS_DDOT_TOKEN exp
- PARS_LOOP_TOKEN statement_list
- PARS_END_TOKEN PARS_LOOP_TOKEN
- { $$ = pars_for_statement($2, $4, $6, $8); }
-;
-
-exit_statement:
- PARS_EXIT_TOKEN { $$ = pars_exit_statement(); }
-;
-
-return_statement:
- PARS_RETURN_TOKEN { $$ = pars_return_statement(); }
-;
-
-open_cursor_statement:
- PARS_OPEN_TOKEN PARS_ID_TOKEN
- { $$ = pars_open_statement(
- ROW_SEL_OPEN_CURSOR, $2); }
-;
-
-close_cursor_statement:
- PARS_CLOSE_TOKEN PARS_ID_TOKEN
- { $$ = pars_open_statement(
- ROW_SEL_CLOSE_CURSOR, $2); }
-;
-
-fetch_statement:
- PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN variable_list
- { $$ = pars_fetch_statement($2, $4, NULL); }
- | PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN user_function_call
- { $$ = pars_fetch_statement($2, NULL, $4); }
-;
-
-column_def:
- PARS_ID_TOKEN type_name opt_column_len opt_unsigned opt_not_null
- { $$ = pars_column_def($1, $2, $3, $4, $5); }
-;
-
-column_def_list:
- column_def { $$ = que_node_list_add_last(NULL, $1); }
- | column_def_list ',' column_def
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-opt_column_len:
- /* Nothing */ { $$ = NULL; }
- | '(' PARS_INT_LIT ')'
- { $$ = $2; }
-;
-
-opt_unsigned:
- /* Nothing */ { $$ = NULL; }
- | PARS_UNSIGNED_TOKEN
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-opt_not_null:
- /* Nothing */ { $$ = NULL; }
- | PARS_NOT_TOKEN PARS_NULL_LIT
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-not_fit_in_memory:
- /* Nothing */ { $$ = NULL; }
- | PARS_DOES_NOT_FIT_IN_MEM_TOKEN
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-create_table:
- PARS_CREATE_TOKEN PARS_TABLE_TOKEN
- PARS_ID_TOKEN '(' column_def_list ')'
- not_fit_in_memory { $$ = pars_create_table($3, $5, $7); }
-;
-
-column_list:
- PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
- | column_list ',' PARS_ID_TOKEN
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-unique_def:
- /* Nothing */ { $$ = NULL; }
- | PARS_UNIQUE_TOKEN { $$ = &pars_unique_token; }
-;
-
-clustered_def:
- /* Nothing */ { $$ = NULL; }
- | PARS_CLUSTERED_TOKEN { $$ = &pars_clustered_token; }
-;
-
-create_index:
- PARS_CREATE_TOKEN unique_def
- clustered_def
- PARS_INDEX_TOKEN
- PARS_ID_TOKEN PARS_ON_TOKEN PARS_ID_TOKEN
- '(' column_list ')' { $$ = pars_create_index($2, $3, $5, $7, $9); }
-;
-
-commit_statement:
- PARS_COMMIT_TOKEN PARS_WORK_TOKEN
- { $$ = pars_commit_statement(); }
-;
-
-rollback_statement:
- PARS_ROLLBACK_TOKEN PARS_WORK_TOKEN
- { $$ = pars_rollback_statement(); }
-;
-
-type_name:
- PARS_INT_TOKEN { $$ = &pars_int_token; }
- | PARS_INTEGER_TOKEN { $$ = &pars_int_token; }
- | PARS_CHAR_TOKEN { $$ = &pars_char_token; }
- | PARS_BINARY_TOKEN { $$ = &pars_binary_token; }
- | PARS_BLOB_TOKEN { $$ = &pars_blob_token; }
-;
-
-parameter_declaration:
- PARS_ID_TOKEN PARS_IN_TOKEN type_name
- { $$ = pars_parameter_declaration($1,
- PARS_INPUT, $3); }
- | PARS_ID_TOKEN PARS_OUT_TOKEN type_name
- { $$ = pars_parameter_declaration($1,
- PARS_OUTPUT, $3); }
-;
-
-parameter_declaration_list:
- /* Nothing */ { $$ = NULL; }
- | parameter_declaration { $$ = que_node_list_add_last(NULL, $1); }
- | parameter_declaration_list ',' parameter_declaration
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-variable_declaration:
- PARS_ID_TOKEN type_name ';'
- { $$ = pars_variable_declaration($1, $2); }
-;
-
-variable_declaration_list:
- /* Nothing */
- | variable_declaration
- | variable_declaration_list variable_declaration
-;
-
-cursor_declaration:
- PARS_DECLARE_TOKEN PARS_CURSOR_TOKEN PARS_ID_TOKEN
- PARS_IS_TOKEN select_statement ';'
- { $$ = pars_cursor_declaration($3, $5); }
-;
-
-function_declaration:
- PARS_DECLARE_TOKEN PARS_FUNCTION_TOKEN PARS_ID_TOKEN ';'
- { $$ = pars_function_declaration($3); }
-;
-
-declaration:
- cursor_declaration
- | function_declaration
-;
-
-declaration_list:
- /* Nothing */
- | declaration
- | declaration_list declaration
-;
-
-procedure_definition:
- PARS_PROCEDURE_TOKEN PARS_ID_TOKEN '(' parameter_declaration_list ')'
- PARS_IS_TOKEN
- variable_declaration_list
- declaration_list
- PARS_BEGIN_TOKEN
- statement_list
- PARS_END_TOKEN { $$ = pars_procedure_definition($2, $4,
- $10); }
-;
-
-%%
diff --git a/storage/innodb_plugin/pars/pars0lex.l b/storage/innodb_plugin/pars/pars0lex.l
deleted file mode 100644
index 55ed17f82e1..00000000000
--- a/storage/innodb_plugin/pars/pars0lex.l
+++ /dev/null
@@ -1,676 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************
-SQL parser lexical analyzer: input file for the GNU Flex lexer generator
-
-The InnoDB parser is frozen because MySQL takes care of SQL parsing.
-Therefore we normally keep the InnoDB parser C files as they are, and do
-not automatically generate them from pars0grm.y and pars0lex.l.
-
-How to make the InnoDB parser and lexer C files:
-
-1. Run ./make_flex.sh to generate lexer files.
-
-2. Run ./make_bison.sh to generate parser files.
-
-These instructions seem to work at least with bison-1.875d and flex-2.5.31 on
-Linux.
-
-Created 12/14/1997 Heikki Tuuri
-*******************************************************/
-
-%option nostdinit
-%option 8bit
-%option warn
-%option pointer
-%option never-interactive
-%option nodefault
-%option noinput
-%option nounput
-%option noyywrap
-%option noyy_scan_buffer
-%option noyy_scan_bytes
-%option noyy_scan_string
-%option nounistd
-
-%{
-#define YYSTYPE que_node_t*
-
-#include "univ.i"
-#include "pars0pars.h"
-#include "pars0grm.h"
-#include "pars0sym.h"
-#include "mem0mem.h"
-#include "os0proc.h"
-
-#define malloc(A) ut_malloc(A)
-#define free(A) ut_free(A)
-#define realloc(P, A) ut_realloc(P, A)
-#define exit(A) ut_error
-
-#define YY_INPUT(buf, result, max_size) pars_get_lex_chars(buf, &result, max_size)
-
-/* String buffer for removing quotes */
-static ulint stringbuf_len_alloc = 0; /* Allocated length */
-static ulint stringbuf_len = 0; /* Current length */
-static char* stringbuf; /* Start of buffer */
-/** Appends a string to the buffer. */
-static
-void
-string_append(
-/*==========*/
- const char* str, /*!< in: string to be appended */
- ulint len) /*!< in: length of the string */
-{
- if (stringbuf == NULL) {
- stringbuf = malloc(1);
- stringbuf_len_alloc = 1;
- }
-
- if (stringbuf_len + len > stringbuf_len_alloc) {
- while (stringbuf_len + len > stringbuf_len_alloc) {
- stringbuf_len_alloc <<= 1;
- }
- stringbuf = realloc(stringbuf, stringbuf_len_alloc);
- }
-
- memcpy(stringbuf + stringbuf_len, str, len);
- stringbuf_len += len;
-}
-
-%}
-
-DIGIT [0-9]
-ID [a-z_A-Z][a-z_A-Z0-9]*
-BOUND_LIT \:[a-z_A-Z0-9]+
-BOUND_ID \$[a-z_A-Z0-9]+
-
-%x comment
-%x quoted
-%x id
-%%
-
-{DIGIT}+ {
- yylval = sym_tab_add_int_lit(pars_sym_tab_global,
- atoi(yytext));
- return(PARS_INT_LIT);
-}
-
-{DIGIT}+"."{DIGIT}* {
- ut_error; /* not implemented */
-
- return(PARS_FLOAT_LIT);
-}
-
-{BOUND_LIT} {
- ulint type;
-
- yylval = sym_tab_add_bound_lit(pars_sym_tab_global,
- yytext + 1, &type);
-
- return((int) type);
-}
-
-{BOUND_ID} {
- yylval = sym_tab_add_bound_id(pars_sym_tab_global,
- yytext + 1);
-
- return(PARS_ID_TOKEN);
-}
-
-"'" {
-/* Quoted character string literals are handled in an explicit
-start state 'quoted'. This state is entered and the buffer for
-the scanned string is emptied upon encountering a starting quote.
-
-In the state 'quoted', only two actions are possible (defined below). */
- BEGIN(quoted);
- stringbuf_len = 0;
-}
-<quoted>[^\']+ {
- /* Got a sequence of characters other than "'":
- append to string buffer */
- string_append(yytext, yyleng);
-}
-<quoted>"'"+ {
- /* Got a sequence of "'" characters:
- append half of them to string buffer,
- as "''" represents a single "'".
- We apply truncating division,
- so that "'''" will result in "'". */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- string literal. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_str_lit(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
- return(PARS_STR_LIT);
- }
-}
-
-\" {
-/* Quoted identifiers are handled in an explicit start state 'id'.
-This state is entered and the buffer for the scanned string is emptied
-upon encountering a starting quote.
-
-In the state 'id', only two actions are possible (defined below). */
- BEGIN(id);
- stringbuf_len = 0;
-}
-<id>[^\"]+ {
- /* Got a sequence of characters other than '"':
- append to string buffer */
- string_append(yytext, yyleng);
-}
-<id>\"+ {
- /* Got a sequence of '"' characters:
- append half of them to string buffer,
- as '""' represents a single '"'.
- We apply truncating division,
- so that '"""' will result in '"'. */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- identifier. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_id(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
-
- return(PARS_ID_TOKEN);
- }
-}
-
-"NULL" {
- yylval = sym_tab_add_null_lit(pars_sym_tab_global);
-
- return(PARS_NULL_LIT);
-}
-
-"SQL" {
- /* Implicit cursor name */
- yylval = sym_tab_add_str_lit(pars_sym_tab_global,
- (byte*) yytext, yyleng);
- return(PARS_SQL_TOKEN);
-}
-
-"AND" {
- return(PARS_AND_TOKEN);
-}
-
-"OR" {
- return(PARS_OR_TOKEN);
-}
-
-"NOT" {
- return(PARS_NOT_TOKEN);
-}
-
-"PROCEDURE" {
- return(PARS_PROCEDURE_TOKEN);
-}
-
-"IN" {
- return(PARS_IN_TOKEN);
-}
-
-"OUT" {
- return(PARS_OUT_TOKEN);
-}
-
-"BINARY" {
- return(PARS_BINARY_TOKEN);
-}
-
-"BLOB" {
- return(PARS_BLOB_TOKEN);
-}
-
-"INT" {
- return(PARS_INT_TOKEN);
-}
-
-"INTEGER" {
- return(PARS_INT_TOKEN);
-}
-
-"FLOAT" {
- return(PARS_FLOAT_TOKEN);
-}
-
-"CHAR" {
- return(PARS_CHAR_TOKEN);
-}
-
-"IS" {
- return(PARS_IS_TOKEN);
-}
-
-"BEGIN" {
- return(PARS_BEGIN_TOKEN);
-}
-
-"END" {
- return(PARS_END_TOKEN);
-}
-
-"IF" {
- return(PARS_IF_TOKEN);
-}
-
-"THEN" {
- return(PARS_THEN_TOKEN);
-}
-
-"ELSE" {
- return(PARS_ELSE_TOKEN);
-}
-
-"ELSIF" {
- return(PARS_ELSIF_TOKEN);
-}
-
-"LOOP" {
- return(PARS_LOOP_TOKEN);
-}
-
-"WHILE" {
- return(PARS_WHILE_TOKEN);
-}
-
-"RETURN" {
- return(PARS_RETURN_TOKEN);
-}
-
-"SELECT" {
- return(PARS_SELECT_TOKEN);
-}
-
-"SUM" {
- return(PARS_SUM_TOKEN);
-}
-
-"COUNT" {
- return(PARS_COUNT_TOKEN);
-}
-
-"DISTINCT" {
- return(PARS_DISTINCT_TOKEN);
-}
-
-"FROM" {
- return(PARS_FROM_TOKEN);
-}
-
-"WHERE" {
- return(PARS_WHERE_TOKEN);
-}
-
-"FOR" {
- return(PARS_FOR_TOKEN);
-}
-
-"READ" {
- return(PARS_READ_TOKEN);
-}
-
-"ORDER" {
- return(PARS_ORDER_TOKEN);
-}
-
-"BY" {
- return(PARS_BY_TOKEN);
-}
-
-"ASC" {
- return(PARS_ASC_TOKEN);
-}
-
-"DESC" {
- return(PARS_DESC_TOKEN);
-}
-
-"INSERT" {
- return(PARS_INSERT_TOKEN);
-}
-
-"INTO" {
- return(PARS_INTO_TOKEN);
-}
-
-"VALUES" {
- return(PARS_VALUES_TOKEN);
-}
-
-"UPDATE" {
- return(PARS_UPDATE_TOKEN);
-}
-
-"SET" {
- return(PARS_SET_TOKEN);
-}
-
-"DELETE" {
- return(PARS_DELETE_TOKEN);
-}
-
-"CURRENT" {
- return(PARS_CURRENT_TOKEN);
-}
-
-"OF" {
- return(PARS_OF_TOKEN);
-}
-
-"CREATE" {
- return(PARS_CREATE_TOKEN);
-}
-
-"TABLE" {
- return(PARS_TABLE_TOKEN);
-}
-
-"INDEX" {
- return(PARS_INDEX_TOKEN);
-}
-
-"UNIQUE" {
- return(PARS_UNIQUE_TOKEN);
-}
-
-"CLUSTERED" {
- return(PARS_CLUSTERED_TOKEN);
-}
-
-"DOES_NOT_FIT_IN_MEMORY" {
- return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
-}
-
-"ON" {
- return(PARS_ON_TOKEN);
-}
-
-"DECLARE" {
- return(PARS_DECLARE_TOKEN);
-}
-
-"CURSOR" {
- return(PARS_CURSOR_TOKEN);
-}
-
-"OPEN" {
- return(PARS_OPEN_TOKEN);
-}
-
-"FETCH" {
- return(PARS_FETCH_TOKEN);
-}
-
-"CLOSE" {
- return(PARS_CLOSE_TOKEN);
-}
-
-"NOTFOUND" {
- return(PARS_NOTFOUND_TOKEN);
-}
-
-"TO_CHAR" {
- return(PARS_TO_CHAR_TOKEN);
-}
-
-"TO_NUMBER" {
- return(PARS_TO_NUMBER_TOKEN);
-}
-
-"TO_BINARY" {
- return(PARS_TO_BINARY_TOKEN);
-}
-
-"BINARY_TO_NUMBER" {
- return(PARS_BINARY_TO_NUMBER_TOKEN);
-}
-
-"SUBSTR" {
- return(PARS_SUBSTR_TOKEN);
-}
-
-"REPLSTR" {
- return(PARS_REPLSTR_TOKEN);
-}
-
-"CONCAT" {
- return(PARS_CONCAT_TOKEN);
-}
-
-"INSTR" {
- return(PARS_INSTR_TOKEN);
-}
-
-"LENGTH" {
- return(PARS_LENGTH_TOKEN);
-}
-
-"SYSDATE" {
- return(PARS_SYSDATE_TOKEN);
-}
-
-"PRINTF" {
- return(PARS_PRINTF_TOKEN);
-}
-
-"ASSERT" {
- return(PARS_ASSERT_TOKEN);
-}
-
-"RND" {
- return(PARS_RND_TOKEN);
-}
-
-"RND_STR" {
- return(PARS_RND_STR_TOKEN);
-}
-
-"ROW_PRINTF" {
- return(PARS_ROW_PRINTF_TOKEN);
-}
-
-"COMMIT" {
- return(PARS_COMMIT_TOKEN);
-}
-
-"ROLLBACK" {
- return(PARS_ROLLBACK_TOKEN);
-}
-
-"WORK" {
- return(PARS_WORK_TOKEN);
-}
-
-"UNSIGNED" {
- return(PARS_UNSIGNED_TOKEN);
-}
-
-"EXIT" {
- return(PARS_EXIT_TOKEN);
-}
-
-"FUNCTION" {
- return(PARS_FUNCTION_TOKEN);
-}
-
-"LOCK" {
- return(PARS_LOCK_TOKEN);
-}
-
-"SHARE" {
- return(PARS_SHARE_TOKEN);
-}
-
-"MODE" {
- return(PARS_MODE_TOKEN);
-}
-
-{ID} {
- yylval = sym_tab_add_id(pars_sym_tab_global,
- (byte*)yytext,
- ut_strlen(yytext));
- return(PARS_ID_TOKEN);
-}
-
-".." {
- return(PARS_DDOT_TOKEN);
-}
-
-":=" {
- return(PARS_ASSIGN_TOKEN);
-}
-
-"<=" {
- return(PARS_LE_TOKEN);
-}
-
-">=" {
- return(PARS_GE_TOKEN);
-}
-
-"<>" {
- return(PARS_NE_TOKEN);
-}
-
-"(" {
-
- return((int)(*yytext));
-}
-
-"=" {
-
- return((int)(*yytext));
-}
-
-">" {
-
- return((int)(*yytext));
-}
-
-"<" {
-
- return((int)(*yytext));
-}
-
-"," {
-
- return((int)(*yytext));
-}
-
-";" {
-
- return((int)(*yytext));
-}
-
-")" {
-
- return((int)(*yytext));
-}
-
-"+" {
-
- return((int)(*yytext));
-}
-
-"-" {
-
- return((int)(*yytext));
-}
-
-"*" {
-
- return((int)(*yytext));
-}
-
-"/" {
-
- return((int)(*yytext));
-}
-
-"%" {
-
- return((int)(*yytext));
-}
-
-"{" {
-
- return((int)(*yytext));
-}
-
-"}" {
-
- return((int)(*yytext));
-}
-
-"?" {
-
- return((int)(*yytext));
-}
-
-"/*" BEGIN(comment); /* eat up comment */
-
-<comment>[^*]*
-<comment>"*"+[^*/]*
-<comment>"*"+"/" BEGIN(INITIAL);
-
-[ \t\n]+ /* eat up whitespace */
-
-
-. {
- fprintf(stderr,"Unrecognized character: %02x\n",
- *yytext);
-
- ut_error;
-
- return(0);
-}
-
-%%
-
-/**********************************************************************
-Release any resources used by the lexer. */
-UNIV_INTERN
-void
-pars_lexer_close(void)
-/*==================*/
-{
- yylex_destroy();
- free(stringbuf);
- stringbuf = NULL;
- stringbuf_len_alloc = stringbuf_len = 0;
-}
diff --git a/storage/innodb_plugin/pars/pars0opt.c b/storage/innodb_plugin/pars/pars0opt.c
deleted file mode 100644
index 2e392ba4836..00000000000
--- a/storage/innodb_plugin/pars/pars0opt.c
+++ /dev/null
@@ -1,1216 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file pars/pars0opt.c
-Simple SQL optimizer
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
-
-#include "pars0opt.h"
-
-#ifdef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
-
-#include "row0sel.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "dict0dict.h"
-#include "dict0mem.h"
-#include "que0que.h"
-#include "pars0grm.h"
-#include "pars0pars.h"
-#include "lock0lock.h"
-
-#define OPT_EQUAL 1 /* comparison by = */
-#define OPT_COMPARISON 2 /* comparison by <, >, <=, or >= */
-
-#define OPT_NOT_COND 1
-#define OPT_END_COND 2
-#define OPT_TEST_COND 3
-#define OPT_SCROLL_COND 4
-
-
-/*******************************************************************//**
-Inverts a comparison operator.
-@return the equivalent operator when the order of the arguments is switched */
-static
-int
-opt_invert_cmp_op(
-/*==============*/
- int op) /*!< in: operator */
-{
- if (op == '<') {
- return('>');
- } else if (op == '>') {
- return('<');
- } else if (op == '=') {
- return('=');
- } else if (op == PARS_LE_TOKEN) {
- return(PARS_GE_TOKEN);
- } else if (op == PARS_GE_TOKEN) {
- return(PARS_LE_TOKEN);
- } else {
- ut_error;
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Checks if the value of an expression can be calculated BEFORE the nth table
-in a join is accessed. If this is the case, it can possibly be used in an
-index search for the nth table.
-@return TRUE if already determined */
-static
-ibool
-opt_check_exp_determined_before(
-/*============================*/
- que_node_t* exp, /*!< in: expression */
- sel_node_t* sel_node, /*!< in: select node */
- ulint nth_table) /*!< in: nth table will be accessed */
-{
- func_node_t* func_node;
- sym_node_t* sym_node;
- dict_table_t* table;
- que_node_t* arg;
- ulint i;
-
- ut_ad(exp && sel_node);
-
- if (que_node_get_type(exp) == QUE_NODE_FUNC) {
- func_node = exp;
-
- arg = func_node->args;
-
- while (arg) {
- if (!opt_check_exp_determined_before(arg, sel_node,
- nth_table)) {
- return(FALSE);
- }
-
- arg = que_node_get_next(arg);
- }
-
- return(TRUE);
- }
-
- ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL);
-
- sym_node = exp;
-
- if (sym_node->token_type != SYM_COLUMN) {
-
- return(TRUE);
- }
-
- for (i = 0; i < nth_table; i++) {
-
- table = sel_node_get_nth_plan(sel_node, i)->table;
-
- if (sym_node->table == table) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Looks in a comparison condition if a column value is already restricted by
-it BEFORE the nth table is accessed.
-@return expression restricting the value of the column, or NULL if not known */
-static
-que_node_t*
-opt_look_for_col_in_comparison_before(
-/*==================================*/
- ulint cmp_type, /*!< in: OPT_EQUAL, OPT_COMPARISON */
- ulint col_no, /*!< in: column number */
- func_node_t* search_cond, /*!< in: comparison condition */
- sel_node_t* sel_node, /*!< in: select node */
- ulint nth_table, /*!< in: nth table in a join (a query
- from a single table is considered a
- join of 1 table) */
- ulint* op) /*!< out: comparison operator ('=',
- PARS_GE_TOKEN, ... ); this is inverted
- if the column appears on the right
- side */
-{
- sym_node_t* sym_node;
- dict_table_t* table;
- que_node_t* exp;
- que_node_t* arg;
-
- ut_ad(search_cond);
-
- ut_a((search_cond->func == '<')
- || (search_cond->func == '>')
- || (search_cond->func == '=')
- || (search_cond->func == PARS_GE_TOKEN)
- || (search_cond->func == PARS_LE_TOKEN));
-
- table = sel_node_get_nth_plan(sel_node, nth_table)->table;
-
- if ((cmp_type == OPT_EQUAL) && (search_cond->func != '=')) {
-
- return(NULL);
-
- } else if ((cmp_type == OPT_COMPARISON)
- && (search_cond->func != '<')
- && (search_cond->func != '>')
- && (search_cond->func != PARS_GE_TOKEN)
- && (search_cond->func != PARS_LE_TOKEN)) {
-
- return(NULL);
- }
-
- arg = search_cond->args;
-
- if (que_node_get_type(arg) == QUE_NODE_SYMBOL) {
- sym_node = arg;
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)
- && (sym_node->col_no == col_no)) {
-
- /* sym_node contains the desired column id */
-
- /* Check if the expression on the right side of the
- operator is already determined */
-
- exp = que_node_get_next(arg);
-
- if (opt_check_exp_determined_before(exp, sel_node,
- nth_table)) {
- *op = search_cond->func;
-
- return(exp);
- }
- }
- }
-
- exp = search_cond->args;
- arg = que_node_get_next(arg);
-
- if (que_node_get_type(arg) == QUE_NODE_SYMBOL) {
- sym_node = arg;
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)
- && (sym_node->col_no == col_no)) {
-
- if (opt_check_exp_determined_before(exp, sel_node,
- nth_table)) {
- *op = opt_invert_cmp_op(search_cond->func);
-
- return(exp);
- }
- }
- }
-
- return(NULL);
-}
-
-/*******************************************************************//**
-Looks in a search condition if a column value is already restricted by the
-search condition BEFORE the nth table is accessed. Takes into account that
-if we will fetch in an ascending order, we cannot utilize an upper limit for
-a column value; in a descending order, respectively, a lower limit.
-@return expression restricting the value of the column, or NULL if not known */
-static
-que_node_t*
-opt_look_for_col_in_cond_before(
-/*============================*/
- ulint cmp_type, /*!< in: OPT_EQUAL, OPT_COMPARISON */
- ulint col_no, /*!< in: column number */
- func_node_t* search_cond, /*!< in: search condition or NULL */
- sel_node_t* sel_node, /*!< in: select node */
- ulint nth_table, /*!< in: nth table in a join (a query
- from a single table is considered a
- join of 1 table) */
- ulint* op) /*!< out: comparison operator ('=',
- PARS_GE_TOKEN, ... ) */
-{
- func_node_t* new_cond;
- que_node_t* exp;
-
- if (search_cond == NULL) {
-
- return(NULL);
- }
-
- ut_a(que_node_get_type(search_cond) == QUE_NODE_FUNC);
- ut_a(search_cond->func != PARS_OR_TOKEN);
- ut_a(search_cond->func != PARS_NOT_TOKEN);
-
- if (search_cond->func == PARS_AND_TOKEN) {
- new_cond = search_cond->args;
-
- exp = opt_look_for_col_in_cond_before(cmp_type, col_no,
- new_cond, sel_node,
- nth_table, op);
- if (exp) {
-
- return(exp);
- }
-
- new_cond = que_node_get_next(new_cond);
-
- exp = opt_look_for_col_in_cond_before(cmp_type, col_no,
- new_cond, sel_node,
- nth_table, op);
- return(exp);
- }
-
- exp = opt_look_for_col_in_comparison_before(cmp_type, col_no,
- search_cond, sel_node,
- nth_table, op);
- if (exp == NULL) {
-
- return(NULL);
- }
-
- /* If we will fetch in an ascending order, we cannot utilize an upper
- limit for a column value; in a descending order, respectively, a lower
- limit */
-
- if (sel_node->asc && ((*op == '<') || (*op == PARS_LE_TOKEN))) {
-
- return(NULL);
-
- } else if (!sel_node->asc
- && ((*op == '>') || (*op == PARS_GE_TOKEN))) {
-
- return(NULL);
- }
-
- return(exp);
-}
-
-/*******************************************************************//**
-Calculates the goodness for an index according to a select node. The
-goodness is 4 times the number of first fields in index whose values we
-already know exactly in the query. If we have a comparison condition for
-an additional field, 2 point are added. If the index is unique, and we know
-all the unique fields for the index we add 1024 points. For a clustered index
-we add 1 point.
-@return goodness */
-static
-ulint
-opt_calc_index_goodness(
-/*====================*/
- dict_index_t* index, /*!< in: index */
- sel_node_t* sel_node, /*!< in: parsed select node */
- ulint nth_table, /*!< in: nth table in a join */
- que_node_t** index_plan, /*!< in/out: comparison expressions for
- this index */
- ulint* last_op) /*!< out: last comparison operator, if
- goodness > 1 */
-{
- que_node_t* exp;
- ulint goodness;
- ulint n_fields;
- ulint col_no;
- ulint op;
- ulint j;
-
- goodness = 0;
-
- /* Note that as higher level node pointers in the B-tree contain
- page addresses as the last field, we must not put more fields in
- the search tuple than dict_index_get_n_unique_in_tree(index); see
- the note in btr_cur_search_to_nth_level. */
-
- n_fields = dict_index_get_n_unique_in_tree(index);
-
- for (j = 0; j < n_fields; j++) {
-
- col_no = dict_index_get_nth_col_no(index, j);
-
- exp = opt_look_for_col_in_cond_before(
- OPT_EQUAL, col_no, sel_node->search_cond,
- sel_node, nth_table, &op);
- if (exp) {
- /* The value for this column is exactly known already
- at this stage of the join */
-
- index_plan[j] = exp;
- *last_op = op;
- goodness += 4;
- } else {
- /* Look for non-equality comparisons */
-
- exp = opt_look_for_col_in_cond_before(
- OPT_COMPARISON, col_no, sel_node->search_cond,
- sel_node, nth_table, &op);
- if (exp) {
- index_plan[j] = exp;
- *last_op = op;
- goodness += 2;
- }
-
- break;
- }
- }
-
- if (goodness >= 4 * dict_index_get_n_unique(index)) {
- goodness += 1024;
-
- if (dict_index_is_clust(index)) {
-
- goodness += 1024;
- }
- }
-
- /* We have to test for goodness here, as last_op may note be set */
- if (goodness && dict_index_is_clust(index)) {
-
- goodness++;
- }
-
- return(goodness);
-}
-
-/*******************************************************************//**
-Calculates the number of matched fields based on an index goodness.
-@return number of excatly or partially matched fields */
-UNIV_INLINE
-ulint
-opt_calc_n_fields_from_goodness(
-/*============================*/
- ulint goodness) /*!< in: goodness */
-{
- return(((goodness % 1024) + 2) / 4);
-}
-
-/*******************************************************************//**
-Converts a comparison operator to the corresponding search mode PAGE_CUR_GE,
-...
-@return search mode */
-UNIV_INLINE
-ulint
-opt_op_to_search_mode(
-/*==================*/
- ibool asc, /*!< in: TRUE if the rows should be fetched in an
- ascending order */
- ulint op) /*!< in: operator '=', PARS_GE_TOKEN, ... */
-{
- if (op == '=') {
- if (asc) {
- return(PAGE_CUR_GE);
- } else {
- return(PAGE_CUR_LE);
- }
- } else if (op == '<') {
- ut_a(!asc);
- return(PAGE_CUR_L);
- } else if (op == '>') {
- ut_a(asc);
- return(PAGE_CUR_G);
- } else if (op == PARS_GE_TOKEN) {
- ut_a(asc);
- return(PAGE_CUR_GE);
- } else if (op == PARS_LE_TOKEN) {
- ut_a(!asc);
- return(PAGE_CUR_LE);
- } else {
- ut_error;
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Determines if a node is an argument node of a function node.
-@return TRUE if is an argument */
-static
-ibool
-opt_is_arg(
-/*=======*/
- que_node_t* arg_node, /*!< in: possible argument node */
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg;
-
- arg = func_node->args;
-
- while (arg) {
- if (arg == arg_node) {
-
- return(TRUE);
- }
-
- arg = que_node_get_next(arg);
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Decides if the fetching of rows should be made in a descending order, and
-also checks that the chosen query plan produces a result which satisfies
-the order-by. */
-static
-void
-opt_check_order_by(
-/*===============*/
- sel_node_t* sel_node) /*!< in: select node; asserts an error
- if the plan does not agree with the
- order-by */
-{
- order_node_t* order_node;
- dict_table_t* order_table;
- ulint order_col_no;
- plan_t* plan;
- ulint i;
-
- if (!sel_node->order_by) {
-
- return;
- }
-
- order_node = sel_node->order_by;
- order_col_no = order_node->column->col_no;
- order_table = order_node->column->table;
-
- /* If there is an order-by clause, the first non-exactly matched field
- in the index used for the last table in the table list should be the
- column defined in the order-by clause, and for all the other tables
- we should get only at most a single row, otherwise we cannot presently
- calculate the order-by, as we have no sort utility */
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- if (i < sel_node->n_tables - 1) {
- ut_a(dict_index_get_n_unique(plan->index)
- <= plan->n_exact_match);
- } else {
- ut_a(plan->table == order_table);
-
- ut_a((dict_index_get_n_unique(plan->index)
- <= plan->n_exact_match)
- || (dict_index_get_nth_col_no(plan->index,
- plan->n_exact_match)
- == order_col_no));
- }
- }
-}
-
-/*******************************************************************//**
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-static
-void
-opt_search_plan_for_table(
-/*======================*/
- sel_node_t* sel_node, /*!< in: parsed select node */
- ulint i, /*!< in: this is the ith table */
- dict_table_t* table) /*!< in: table */
-{
- plan_t* plan;
- dict_index_t* index;
- dict_index_t* best_index;
- ulint n_fields;
- ulint goodness;
- ulint last_op = 75946965; /* Eliminate a Purify
- warning */
- ulint best_goodness;
- ulint best_last_op = 0; /* remove warning */
- que_node_t* index_plan[256];
- que_node_t* best_index_plan[256];
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- plan->table = table;
- plan->asc = sel_node->asc;
- plan->pcur_is_open = FALSE;
- plan->cursor_at_end = FALSE;
-
- /* Calculate goodness for each index of the table */
-
- index = dict_table_get_first_index(table);
- best_index = index; /* Eliminate compiler warning */
- best_goodness = 0;
-
- /* should be do ... until ? comment by Jani */
- while (index) {
- goodness = opt_calc_index_goodness(index, sel_node, i,
- index_plan, &last_op);
- if (goodness > best_goodness) {
-
- best_index = index;
- best_goodness = goodness;
- n_fields = opt_calc_n_fields_from_goodness(goodness);
-
- ut_memcpy(best_index_plan, index_plan,
- n_fields * sizeof(void*));
- best_last_op = last_op;
- }
-
- index = dict_table_get_next_index(index);
- }
-
- plan->index = best_index;
-
- n_fields = opt_calc_n_fields_from_goodness(best_goodness);
-
- if (n_fields == 0) {
- plan->tuple = NULL;
- plan->n_exact_match = 0;
- } else {
- plan->tuple = dtuple_create(pars_sym_tab_global->heap,
- n_fields);
- dict_index_copy_types(plan->tuple, plan->index, n_fields);
-
- plan->tuple_exps = mem_heap_alloc(pars_sym_tab_global->heap,
- n_fields * sizeof(void*));
-
- ut_memcpy(plan->tuple_exps, best_index_plan,
- n_fields * sizeof(void*));
- if (best_last_op == '=') {
- plan->n_exact_match = n_fields;
- } else {
- plan->n_exact_match = n_fields - 1;
- }
-
- plan->mode = opt_op_to_search_mode(sel_node->asc,
- best_last_op);
- }
-
- if (dict_index_is_clust(best_index)
- && (plan->n_exact_match >= dict_index_get_n_unique(best_index))) {
-
- plan->unique_search = TRUE;
- } else {
- plan->unique_search = FALSE;
- }
-
- plan->old_vers_heap = NULL;
-
- btr_pcur_init(&(plan->pcur));
- btr_pcur_init(&(plan->clust_pcur));
-}
-
-/*******************************************************************//**
-Looks at a comparison condition and decides if it can, and need, be tested for
-a table AFTER the table has been accessed.
-@return OPT_NOT_COND if not for this table, else OPT_END_COND,
-OPT_TEST_COND, or OPT_SCROLL_COND, where the last means that the
-condition need not be tested, except when scroll cursors are used */
-static
-ulint
-opt_classify_comparison(
-/*====================*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i, /*!< in: ith table in the join */
- func_node_t* cond) /*!< in: comparison condition */
-{
- plan_t* plan;
- ulint n_fields;
- ulint op;
- ulint j;
-
- ut_ad(cond && sel_node);
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- /* Check if the condition is determined after the ith table has been
- accessed, but not after the i - 1:th */
-
- if (!opt_check_exp_determined_before(cond, sel_node, i + 1)) {
-
- return(OPT_NOT_COND);
- }
-
- if ((i > 0) && opt_check_exp_determined_before(cond, sel_node, i)) {
-
- return(OPT_NOT_COND);
- }
-
- /* If the condition is an exact match condition used in constructing
- the search tuple, it is classified as OPT_END_COND */
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
- } else {
- n_fields = 0;
- }
-
- for (j = 0; j < plan->n_exact_match; j++) {
-
- if (opt_is_arg(plan->tuple_exps[j], cond)) {
-
- return(OPT_END_COND);
- }
- }
-
- /* If the condition is an non-exact match condition used in
- constructing the search tuple, it is classified as OPT_SCROLL_COND.
- When the cursor is positioned, and if a non-scroll cursor is used,
- there is no need to test this condition; if a scroll cursor is used
- the testing is necessary when the cursor is reversed. */
-
- if ((n_fields > plan->n_exact_match)
- && opt_is_arg(plan->tuple_exps[n_fields - 1], cond)) {
-
- return(OPT_SCROLL_COND);
- }
-
- /* If the condition is a non-exact match condition on the first field
- in index for which there is no exact match, and it limits the search
- range from the opposite side of the search tuple already BEFORE we
- access the table, it is classified as OPT_END_COND */
-
- if ((dict_index_get_n_fields(plan->index) > plan->n_exact_match)
- && opt_look_for_col_in_comparison_before(
- OPT_COMPARISON,
- dict_index_get_nth_col_no(plan->index,
- plan->n_exact_match),
- cond, sel_node, i, &op)) {
-
- if (sel_node->asc && ((op == '<') || (op == PARS_LE_TOKEN))) {
-
- return(OPT_END_COND);
- }
-
- if (!sel_node->asc && ((op == '>') || (op == PARS_GE_TOKEN))) {
-
- return(OPT_END_COND);
- }
- }
-
- /* Otherwise, cond is classified as OPT_TEST_COND */
-
- return(OPT_TEST_COND);
-}
-
-/*******************************************************************//**
-Recursively looks for test conditions for a table in a join. */
-static
-void
-opt_find_test_conds(
-/*================*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i, /*!< in: ith table in the join */
- func_node_t* cond) /*!< in: conjunction of search
- conditions or NULL */
-{
- func_node_t* new_cond;
- ulint class;
- plan_t* plan;
-
- if (cond == NULL) {
-
- return;
- }
-
- if (cond->func == PARS_AND_TOKEN) {
- new_cond = cond->args;
-
- opt_find_test_conds(sel_node, i, new_cond);
-
- new_cond = que_node_get_next(new_cond);
-
- opt_find_test_conds(sel_node, i, new_cond);
-
- return;
- }
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- class = opt_classify_comparison(sel_node, i, cond);
-
- if (class == OPT_END_COND) {
- UT_LIST_ADD_LAST(cond_list, plan->end_conds, cond);
-
- } else if (class == OPT_TEST_COND) {
- UT_LIST_ADD_LAST(cond_list, plan->other_conds, cond);
-
- }
-}
-
-/*******************************************************************//**
-Normalizes a list of comparison conditions so that a column of the table
-appears on the left side of the comparison if possible. This is accomplished
-by switching the arguments of the operator. */
-static
-void
-opt_normalize_cmp_conds(
-/*====================*/
- func_node_t* cond, /*!< in: first in a list of comparison
- conditions, or NULL */
- dict_table_t* table) /*!< in: table */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- sym_node_t* sym_node;
-
- while (cond) {
- arg1 = cond->args;
- arg2 = que_node_get_next(arg1);
-
- if (que_node_get_type(arg2) == QUE_NODE_SYMBOL) {
-
- sym_node = arg2;
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)) {
-
- /* Switch the order of the arguments */
-
- cond->args = arg2;
- que_node_list_add_last(NULL, arg2);
- que_node_list_add_last(arg2, arg1);
-
- /* Invert the operator */
- cond->func = opt_invert_cmp_op(cond->func);
- }
- }
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-}
-
-/*******************************************************************//**
-Finds out the search condition conjuncts we can, and need, to test as the ith
-table in a join is accessed. The search tuple can eliminate the need to test
-some conjuncts. */
-static
-void
-opt_determine_and_normalize_test_conds(
-/*===================================*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i) /*!< in: ith table in the join */
-{
- plan_t* plan;
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- UT_LIST_INIT(plan->end_conds);
- UT_LIST_INIT(plan->other_conds);
-
- /* Recursively go through the conjuncts and classify them */
-
- opt_find_test_conds(sel_node, i, sel_node->search_cond);
-
- opt_normalize_cmp_conds(UT_LIST_GET_FIRST(plan->end_conds),
- plan->table);
-
- ut_a(UT_LIST_GET_LEN(plan->end_conds) >= plan->n_exact_match);
-}
-
-/*******************************************************************//**
-Looks for occurrences of the columns of the table in the query subgraph and
-adds them to the list of columns if an occurrence of the same column does not
-already exist in the list. If the column is already in the list, puts a value
-indirection to point to the occurrence in the column list, except if the
-column occurrence we are looking at is in the column list, in which case
-nothing is done. */
-UNIV_INTERN
-void
-opt_find_all_cols(
-/*==============*/
- ibool copy_val, /*!< in: if TRUE, new found columns are
- added as columns to copy */
- dict_index_t* index, /*!< in: index of the table to use */
- sym_node_list_t* col_list, /*!< in: base node of a list where
- to add new found columns */
- plan_t* plan, /*!< in: plan or NULL */
- que_node_t* exp) /*!< in: expression or condition or
- NULL */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- sym_node_t* col_node;
- ulint col_pos;
-
- if (exp == NULL) {
-
- return;
- }
-
- if (que_node_get_type(exp) == QUE_NODE_FUNC) {
- func_node = exp;
-
- arg = func_node->args;
-
- while (arg) {
- opt_find_all_cols(copy_val, index, col_list, plan,
- arg);
- arg = que_node_get_next(arg);
- }
-
- return;
- }
-
- ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL);
-
- sym_node = exp;
-
- if (sym_node->token_type != SYM_COLUMN) {
-
- return;
- }
-
- if (sym_node->table != index->table) {
-
- return;
- }
-
- /* Look for an occurrence of the same column in the plan column
- list */
-
- col_node = UT_LIST_GET_FIRST(*col_list);
-
- while (col_node) {
- if (col_node->col_no == sym_node->col_no) {
-
- if (col_node == sym_node) {
- /* sym_node was already in a list: do
- nothing */
-
- return;
- }
-
- /* Put an indirection */
- sym_node->indirection = col_node;
- sym_node->alias = col_node;
-
- return;
- }
-
- col_node = UT_LIST_GET_NEXT(col_var_list, col_node);
- }
-
- /* The same column did not occur in the list: add it */
-
- UT_LIST_ADD_LAST(col_var_list, *col_list, sym_node);
-
- sym_node->copy_val = copy_val;
-
- /* Fill in the field_no fields in sym_node */
-
- sym_node->field_nos[SYM_CLUST_FIELD_NO] = dict_index_get_nth_col_pos(
- dict_table_get_first_index(index->table), sym_node->col_no);
- if (!dict_index_is_clust(index)) {
-
- ut_a(plan);
-
- col_pos = dict_index_get_nth_col_pos(index, sym_node->col_no);
-
- if (col_pos == ULINT_UNDEFINED) {
-
- plan->must_get_clust = TRUE;
- }
-
- sym_node->field_nos[SYM_SEC_FIELD_NO] = col_pos;
- }
-}
-
-/*******************************************************************//**
-Looks for occurrences of the columns of the table in conditions which are
-not yet determined AFTER the join operation has fetched a row in the ith
-table. The values for these column must be copied to dynamic memory for
-later use. */
-static
-void
-opt_find_copy_cols(
-/*===============*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i, /*!< in: ith table in the join */
- func_node_t* search_cond) /*!< in: search condition or NULL */
-{
- func_node_t* new_cond;
- plan_t* plan;
-
- if (search_cond == NULL) {
-
- return;
- }
-
- ut_ad(que_node_get_type(search_cond) == QUE_NODE_FUNC);
-
- if (search_cond->func == PARS_AND_TOKEN) {
- new_cond = search_cond->args;
-
- opt_find_copy_cols(sel_node, i, new_cond);
-
- new_cond = que_node_get_next(new_cond);
-
- opt_find_copy_cols(sel_node, i, new_cond);
-
- return;
- }
-
- if (!opt_check_exp_determined_before(search_cond, sel_node, i + 1)) {
-
- /* Any ith table columns occurring in search_cond should be
- copied, as this condition cannot be tested already on the
- fetch from the ith table */
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan,
- search_cond);
- }
-}
-
-/*******************************************************************//**
-Classifies the table columns according to whether we use the column only while
-holding the latch on the page, or whether we have to copy the column value to
-dynamic memory. Puts the first occurrence of a column to either list in the
-plan node, and puts indirections to later occurrences of the column. */
-static
-void
-opt_classify_cols(
-/*==============*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i) /*!< in: ith table in the join */
-{
- plan_t* plan;
- que_node_t* exp;
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- /* The final value of the following field will depend on the
- environment of the select statement: */
-
- plan->must_get_clust = FALSE;
-
- UT_LIST_INIT(plan->columns);
-
- /* All select list columns should be copied: therefore TRUE as the
- first argument */
-
- exp = sel_node->select_list;
-
- while (exp) {
- opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan,
- exp);
- exp = que_node_get_next(exp);
- }
-
- opt_find_copy_cols(sel_node, i, sel_node->search_cond);
-
- /* All remaining columns in the search condition are temporary
- columns: therefore FALSE */
-
- opt_find_all_cols(FALSE, plan->index, &(plan->columns), plan,
- sel_node->search_cond);
-}
-
-/*******************************************************************//**
-Fills in the info in plan which is used in accessing a clustered index
-record. The columns must already be classified for the plan node. */
-static
-void
-opt_clust_access(
-/*=============*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint n) /*!< in: nth table in select */
-{
- plan_t* plan;
- dict_table_t* table;
- dict_index_t* clust_index;
- dict_index_t* index;
- mem_heap_t* heap;
- ulint n_fields;
- ulint pos;
- ulint i;
-
- plan = sel_node_get_nth_plan(sel_node, n);
-
- index = plan->index;
-
- /* The final value of the following field depends on the environment
- of the select statement: */
-
- plan->no_prefetch = FALSE;
-
- if (dict_index_is_clust(index)) {
- plan->clust_map = NULL;
- plan->clust_ref = NULL;
-
- return;
- }
-
- table = index->table;
-
- clust_index = dict_table_get_first_index(table);
-
- n_fields = dict_index_get_n_unique(clust_index);
-
- heap = pars_sym_tab_global->heap;
-
- plan->clust_ref = dtuple_create(heap, n_fields);
-
- dict_index_copy_types(plan->clust_ref, clust_index, n_fields);
-
- plan->clust_map = mem_heap_alloc(heap, n_fields * sizeof(ulint));
-
- for (i = 0; i < n_fields; i++) {
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- /* We optimize here only queries to InnoDB's internal system
- tables, and they should not contain column prefix indexes. */
-
- if (dict_index_get_nth_field(index, pos)->prefix_len != 0
- || dict_index_get_nth_field(clust_index, i)
- ->prefix_len != 0) {
- fprintf(stderr,
- "InnoDB: Error in pars0opt.c:"
- " table %s has prefix_len != 0\n",
- index->table_name);
- }
-
- *(plan->clust_map + i) = pos;
-
- ut_ad(pos != ULINT_UNDEFINED);
- }
-}
-
-/*******************************************************************//**
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-UNIV_INTERN
-void
-opt_search_plan(
-/*============*/
- sel_node_t* sel_node) /*!< in: parsed select node */
-{
- sym_node_t* table_node;
- dict_table_t* table;
- order_node_t* order_by;
- ulint i;
-
- sel_node->plans = mem_heap_alloc(pars_sym_tab_global->heap,
- sel_node->n_tables * sizeof(plan_t));
-
- /* Analyze the search condition to find out what we know at each
- join stage about the conditions that the columns of a table should
- satisfy */
-
- table_node = sel_node->table_list;
-
- if (sel_node->order_by == NULL) {
- sel_node->asc = TRUE;
- } else {
- order_by = sel_node->order_by;
-
- sel_node->asc = order_by->asc;
- }
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- table = table_node->table;
-
- /* Choose index through which to access the table */
-
- opt_search_plan_for_table(sel_node, i, table);
-
- /* Determine the search condition conjuncts we can test at
- this table; normalize the end conditions */
-
- opt_determine_and_normalize_test_conds(sel_node, i);
-
- table_node = que_node_get_next(table_node);
- }
-
- table_node = sel_node->table_list;
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- /* Classify the table columns into those we only need to access
- but not copy, and to those we must copy to dynamic memory */
-
- opt_classify_cols(sel_node, i);
-
- /* Calculate possible info for accessing the clustered index
- record */
-
- opt_clust_access(sel_node, i);
-
- table_node = que_node_get_next(table_node);
- }
-
- /* Check that the plan obeys a possible order-by clause: if not,
- an assertion error occurs */
-
- opt_check_order_by(sel_node);
-
-#ifdef UNIV_SQL_DEBUG
- opt_print_query_plan(sel_node);
-#endif
-}
-
-/********************************************************************//**
-Prints info of a query plan. */
-UNIV_INTERN
-void
-opt_print_query_plan(
-/*=================*/
- sel_node_t* sel_node) /*!< in: select node */
-{
- plan_t* plan;
- ulint n_fields;
- ulint i;
-
- fputs("QUERY PLAN FOR A SELECT NODE\n", stderr);
-
- fputs(sel_node->asc ? "Asc. search; " : "Desc. search; ", stderr);
-
- if (sel_node->set_x_locks) {
- fputs("sets row x-locks; ", stderr);
- ut_a(sel_node->row_lock_mode == LOCK_X);
- ut_a(!sel_node->consistent_read);
- } else if (sel_node->consistent_read) {
- fputs("consistent read; ", stderr);
- } else {
- ut_a(sel_node->row_lock_mode == LOCK_S);
- fputs("sets row s-locks; ", stderr);
- }
-
- putc('\n', stderr);
-
- for (i = 0; i < sel_node->n_tables; i++) {
- plan = sel_node_get_nth_plan(sel_node, i);
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
- } else {
- n_fields = 0;
- }
-
- fputs("Table ", stderr);
- dict_index_name_print(stderr, NULL, plan->index);
- fprintf(stderr,"; exact m. %lu, match %lu, end conds %lu\n",
- (unsigned long) plan->n_exact_match,
- (unsigned long) n_fields,
- (unsigned long) UT_LIST_GET_LEN(plan->end_conds));
- }
-}
diff --git a/storage/innodb_plugin/pars/pars0pars.c b/storage/innodb_plugin/pars/pars0pars.c
deleted file mode 100644
index 9faf36d00a8..00000000000
--- a/storage/innodb_plugin/pars/pars0pars.c
+++ /dev/null
@@ -1,2196 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file pars/pars0pars.c
-SQL parser
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
-
-/* Historical note: Innobase executed its first SQL string (CREATE TABLE)
-on 1/27/1998 */
-
-#include "pars0pars.h"
-
-#ifdef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
-#include "row0sel.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "dict0dict.h"
-#include "dict0mem.h"
-#include "dict0crea.h"
-#include "que0que.h"
-#include "pars0grm.h"
-#include "pars0opt.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "lock0lock.h"
-#include "eval0eval.h"
-
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-UNIV_INTERN ibool pars_print_lexed = FALSE;
-#endif /* UNIV_SQL_DEBUG */
-
-/* Global variable used while parsing a single procedure or query : the code is
-NOT re-entrant */
-UNIV_INTERN sym_tab_t* pars_sym_tab_global;
-
-/* Global variables used to denote certain reserved words, used in
-constructing the parsing tree */
-
-UNIV_INTERN pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
-UNIV_INTERN pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
-UNIV_INTERN pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
-UNIV_INTERN pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN};
-UNIV_INTERN pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN};
-UNIV_INTERN pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN};
-UNIV_INTERN pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN};
-UNIV_INTERN pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_binary_token = {PARS_BINARY_TOKEN};
-UNIV_INTERN pars_res_word_t pars_blob_token = {PARS_BLOB_TOKEN};
-UNIV_INTERN pars_res_word_t pars_int_token = {PARS_INT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN};
-UNIV_INTERN pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN};
-UNIV_INTERN pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN};
-UNIV_INTERN pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN};
-
-/** Global variable used to denote the '*' in SELECT * FROM.. */
-UNIV_INTERN ulint pars_star_denoter = 12345678;
-
-
-/*********************************************************************//**
-Determines the class of a function code.
-@return function class: PARS_FUNC_ARITH, ... */
-static
-ulint
-pars_func_get_class(
-/*================*/
- int func) /*!< in: function code: '=', PARS_GE_TOKEN, ... */
-{
- switch (func) {
- case '+': case '-': case '*': case '/':
- return(PARS_FUNC_ARITH);
-
- case '=': case '<': case '>':
- case PARS_GE_TOKEN: case PARS_LE_TOKEN: case PARS_NE_TOKEN:
- return(PARS_FUNC_CMP);
-
- case PARS_AND_TOKEN: case PARS_OR_TOKEN: case PARS_NOT_TOKEN:
- return(PARS_FUNC_LOGICAL);
-
- case PARS_COUNT_TOKEN: case PARS_SUM_TOKEN:
- return(PARS_FUNC_AGGREGATE);
-
- case PARS_TO_CHAR_TOKEN:
- case PARS_TO_NUMBER_TOKEN:
- case PARS_TO_BINARY_TOKEN:
- case PARS_BINARY_TO_NUMBER_TOKEN:
- case PARS_SUBSTR_TOKEN:
- case PARS_CONCAT_TOKEN:
- case PARS_LENGTH_TOKEN:
- case PARS_INSTR_TOKEN:
- case PARS_SYSDATE_TOKEN:
- case PARS_NOTFOUND_TOKEN:
- case PARS_PRINTF_TOKEN:
- case PARS_ASSERT_TOKEN:
- case PARS_RND_TOKEN:
- case PARS_RND_STR_TOKEN:
- case PARS_REPLSTR_TOKEN:
- return(PARS_FUNC_PREDEFINED);
-
- default:
- return(PARS_FUNC_OTHER);
- }
-}
-
-/*********************************************************************//**
-Parses an operator or predefined function expression.
-@return own: function node in a query tree */
-static
-func_node_t*
-pars_func_low(
-/*==========*/
- int func, /*!< in: function token code */
- que_node_t* arg) /*!< in: first argument in the argument list */
-{
- func_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(func_node_t));
-
- node->common.type = QUE_NODE_FUNC;
- dfield_set_data(&(node->common.val), NULL, 0);
- node->common.val_buf_size = 0;
-
- node->func = func;
-
- node->class = pars_func_get_class(func);
-
- node->args = arg;
-
- UT_LIST_ADD_LAST(func_node_list, pars_sym_tab_global->func_node_list,
- node);
- return(node);
-}
-
-/*********************************************************************//**
-Parses a function expression.
-@return own: function node in a query tree */
-UNIV_INTERN
-func_node_t*
-pars_func(
-/*======*/
- que_node_t* res_word,/*!< in: function name reserved word */
- que_node_t* arg) /*!< in: first argument in the argument list */
-{
- return(pars_func_low(((pars_res_word_t*)res_word)->code, arg));
-}
-
-/*********************************************************************//**
-Parses an operator expression.
-@return own: function node in a query tree */
-UNIV_INTERN
-func_node_t*
-pars_op(
-/*====*/
- int func, /*!< in: operator token code */
- que_node_t* arg1, /*!< in: first argument */
- que_node_t* arg2) /*!< in: second argument or NULL for an unary
- operator */
-{
- que_node_list_add_last(NULL, arg1);
-
- if (arg2) {
- que_node_list_add_last(arg1, arg2);
- }
-
- return(pars_func_low(func, arg1));
-}
-
-/*********************************************************************//**
-Parses an ORDER BY clause. Order by a single column only is supported.
-@return own: order-by node in a query tree */
-UNIV_INTERN
-order_node_t*
-pars_order_by(
-/*==========*/
- sym_node_t* column, /*!< in: column name */
- pars_res_word_t* asc) /*!< in: &pars_asc_token or pars_desc_token */
-{
- order_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(order_node_t));
-
- node->common.type = QUE_NODE_ORDER;
-
- node->column = column;
-
- if (asc == &pars_asc_token) {
- node->asc = TRUE;
- } else {
- ut_a(asc == &pars_desc_token);
- node->asc = FALSE;
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Determine if a data type is a built-in string data type of the InnoDB
-SQL parser.
-@return TRUE if string data type */
-static
-ibool
-pars_is_string_type(
-/*================*/
- ulint mtype) /*!< in: main data type */
-{
- switch (mtype) {
- case DATA_VARCHAR: case DATA_CHAR:
- case DATA_FIXBINARY: case DATA_BINARY:
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Resolves the data type of a function in an expression. The argument data
-types must already be resolved. */
-static
-void
-pars_resolve_func_data_type(
-/*========================*/
- func_node_t* node) /*!< in: function node */
-{
- que_node_t* arg;
-
- ut_a(que_node_get_type(node) == QUE_NODE_FUNC);
-
- arg = node->args;
-
- switch (node->func) {
- case PARS_SUM_TOKEN:
- case '+': case '-': case '*': case '/':
- /* Inherit the data type from the first argument (which must
- not be the SQL null literal whose type is DATA_ERROR) */
-
- dtype_copy(que_node_get_data_type(node),
- que_node_get_data_type(arg));
-
- ut_a(dtype_get_mtype(que_node_get_data_type(node))
- == DATA_INT);
- break;
-
- case PARS_COUNT_TOKEN:
- ut_a(arg);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_TO_CHAR_TOKEN:
- case PARS_RND_STR_TOKEN:
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- break;
-
- case PARS_TO_BINARY_TOKEN:
- if (dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT) {
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- } else {
- dtype_set(que_node_get_data_type(node), DATA_BINARY,
- 0, 0);
- }
- break;
-
- case PARS_TO_NUMBER_TOKEN:
- case PARS_BINARY_TO_NUMBER_TOKEN:
- case PARS_LENGTH_TOKEN:
- case PARS_INSTR_TOKEN:
- ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_SYSDATE_TOKEN:
- ut_a(arg == NULL);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_SUBSTR_TOKEN:
- case PARS_CONCAT_TOKEN:
- ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- break;
-
- case '>': case '<': case '=':
- case PARS_GE_TOKEN:
- case PARS_LE_TOKEN:
- case PARS_NE_TOKEN:
- case PARS_AND_TOKEN:
- case PARS_OR_TOKEN:
- case PARS_NOT_TOKEN:
- case PARS_NOTFOUND_TOKEN:
-
- /* We currently have no iboolean type: use integer type */
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_RND_TOKEN:
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- default:
- ut_error;
- }
-}
-
-/*********************************************************************//**
-Resolves the meaning of variables in an expression and the data types of
-functions. It is an error if some identifier cannot be resolved here. */
-static
-void
-pars_resolve_exp_variables_and_types(
-/*=================================*/
- sel_node_t* select_node, /*!< in: select node or NULL; if
- this is not NULL then the variable
- sym nodes are added to the
- copy_variables list of select_node */
- que_node_t* exp_node) /*!< in: expression */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- sym_node_t* node;
-
- ut_a(exp_node);
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
- func_node = exp_node;
-
- arg = func_node->args;
-
- while (arg) {
- pars_resolve_exp_variables_and_types(select_node, arg);
-
- arg = que_node_get_next(arg);
- }
-
- pars_resolve_func_data_type(func_node);
-
- return;
- }
-
- ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL);
-
- sym_node = exp_node;
-
- if (sym_node->resolved) {
-
- return;
- }
-
- /* Not resolved yet: look in the symbol table for a variable
- or a cursor or a function with the same name */
-
- node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list);
-
- while (node) {
- if (node->resolved
- && ((node->token_type == SYM_VAR)
- || (node->token_type == SYM_CURSOR)
- || (node->token_type == SYM_FUNCTION))
- && node->name
- && (sym_node->name_len == node->name_len)
- && (ut_memcmp(sym_node->name, node->name,
- node->name_len) == 0)) {
-
- /* Found a variable or a cursor declared with
- the same name */
-
- break;
- }
-
- node = UT_LIST_GET_NEXT(sym_list, node);
- }
-
- if (!node) {
- fprintf(stderr, "PARSER ERROR: Unresolved identifier %s\n",
- sym_node->name);
- }
-
- ut_a(node);
-
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_IMPLICIT_VAR;
- sym_node->alias = node;
- sym_node->indirection = node;
-
- if (select_node) {
- UT_LIST_ADD_LAST(col_var_list, select_node->copy_variables,
- sym_node);
- }
-
- dfield_set_type(que_node_get_val(sym_node),
- que_node_get_data_type(node));
-}
-
-/*********************************************************************//**
-Resolves the meaning of variables in an expression list. It is an error if
-some identifier cannot be resolved here. Resolves also the data types of
-functions. */
-static
-void
-pars_resolve_exp_list_variables_and_types(
-/*======================================*/
- sel_node_t* select_node, /*!< in: select node or NULL */
- que_node_t* exp_node) /*!< in: expression list first node, or
- NULL */
-{
- while (exp_node) {
- pars_resolve_exp_variables_and_types(select_node, exp_node);
-
- exp_node = que_node_get_next(exp_node);
- }
-}
-
-/*********************************************************************//**
-Resolves the columns in an expression. */
-static
-void
-pars_resolve_exp_columns(
-/*=====================*/
- sym_node_t* table_node, /*!< in: first node in a table list */
- que_node_t* exp_node) /*!< in: expression */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- dict_table_t* table;
- sym_node_t* t_node;
- ulint n_cols;
- ulint i;
-
- ut_a(exp_node);
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
- func_node = exp_node;
-
- arg = func_node->args;
-
- while (arg) {
- pars_resolve_exp_columns(table_node, arg);
-
- arg = que_node_get_next(arg);
- }
-
- return;
- }
-
- ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL);
-
- sym_node = exp_node;
-
- if (sym_node->resolved) {
-
- return;
- }
-
- /* Not resolved yet: look in the table list for a column with the
- same name */
-
- t_node = table_node;
-
- while (t_node) {
- table = t_node->table;
-
- n_cols = dict_table_get_n_cols(table);
-
- for (i = 0; i < n_cols; i++) {
- const dict_col_t* col
- = dict_table_get_nth_col(table, i);
- const char* col_name
- = dict_table_get_col_name(table, i);
-
- if ((sym_node->name_len == ut_strlen(col_name))
- && (0 == ut_memcmp(sym_node->name, col_name,
- sym_node->name_len))) {
- /* Found */
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_COLUMN;
- sym_node->table = table;
- sym_node->col_no = i;
- sym_node->prefetch_buf = NULL;
-
- dict_col_copy_type(
- col,
- dfield_get_type(&sym_node
- ->common.val));
-
- return;
- }
- }
-
- t_node = que_node_get_next(t_node);
- }
-}
-
-/*********************************************************************//**
-Resolves the meaning of columns in an expression list. */
-static
-void
-pars_resolve_exp_list_columns(
-/*==========================*/
- sym_node_t* table_node, /*!< in: first node in a table list */
- que_node_t* exp_node) /*!< in: expression list first node, or
- NULL */
-{
- while (exp_node) {
- pars_resolve_exp_columns(table_node, exp_node);
-
- exp_node = que_node_get_next(exp_node);
- }
-}
-
-/*********************************************************************//**
-Retrieves the table definition for a table name id. */
-static
-void
-pars_retrieve_table_def(
-/*====================*/
- sym_node_t* sym_node) /*!< in: table node */
-{
- const char* table_name;
-
- ut_a(sym_node);
- ut_a(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
-
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_TABLE;
-
- table_name = (const char*) sym_node->name;
-
- sym_node->table = dict_table_get_low(table_name);
-
- ut_a(sym_node->table);
-}
-
-/*********************************************************************//**
-Retrieves the table definitions for a list of table name ids.
-@return number of tables */
-static
-ulint
-pars_retrieve_table_list_defs(
-/*==========================*/
- sym_node_t* sym_node) /*!< in: first table node in list */
-{
- ulint count = 0;
-
- if (sym_node == NULL) {
-
- return(count);
- }
-
- while (sym_node) {
- pars_retrieve_table_def(sym_node);
-
- count++;
-
- sym_node = que_node_get_next(sym_node);
- }
-
- return(count);
-}
-
-/*********************************************************************//**
-Adds all columns to the select list if the query is SELECT * FROM ... */
-static
-void
-pars_select_all_columns(
-/*====================*/
- sel_node_t* select_node) /*!< in: select node already containing
- the table list */
-{
- sym_node_t* col_node;
- sym_node_t* table_node;
- dict_table_t* table;
- ulint i;
-
- select_node->select_list = NULL;
-
- table_node = select_node->table_list;
-
- while (table_node) {
- table = table_node->table;
-
- for (i = 0; i < dict_table_get_n_user_cols(table); i++) {
- const char* col_name = dict_table_get_col_name(
- table, i);
-
- col_node = sym_tab_add_id(pars_sym_tab_global,
- (byte*)col_name,
- ut_strlen(col_name));
-
- select_node->select_list = que_node_list_add_last(
- select_node->select_list, col_node);
- }
-
- table_node = que_node_get_next(table_node);
- }
-}
-
-/*********************************************************************//**
-Parses a select list; creates a query graph node for the whole SELECT
-statement.
-@return own: select node in a query tree */
-UNIV_INTERN
-sel_node_t*
-pars_select_list(
-/*=============*/
- que_node_t* select_list, /*!< in: select list */
- sym_node_t* into_list) /*!< in: variables list or NULL */
-{
- sel_node_t* node;
-
- node = sel_node_create(pars_sym_tab_global->heap);
-
- node->select_list = select_list;
- node->into_list = into_list;
-
- pars_resolve_exp_list_variables_and_types(NULL, into_list);
-
- return(node);
-}
-
-/*********************************************************************//**
-Checks if the query is an aggregate query, in which case the selct list must
-contain only aggregate function items. */
-static
-void
-pars_check_aggregate(
-/*=================*/
- sel_node_t* select_node) /*!< in: select node already containing
- the select list */
-{
- que_node_t* exp_node;
- func_node_t* func_node;
- ulint n_nodes = 0;
- ulint n_aggregate_nodes = 0;
-
- exp_node = select_node->select_list;
-
- while (exp_node) {
-
- n_nodes++;
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
-
- func_node = exp_node;
-
- if (func_node->class == PARS_FUNC_AGGREGATE) {
-
- n_aggregate_nodes++;
- }
- }
-
- exp_node = que_node_get_next(exp_node);
- }
-
- if (n_aggregate_nodes > 0) {
- ut_a(n_nodes == n_aggregate_nodes);
-
- select_node->is_aggregate = TRUE;
- } else {
- select_node->is_aggregate = FALSE;
- }
-}
-
-/*********************************************************************//**
-Parses a select statement.
-@return own: select node in a query tree */
-UNIV_INTERN
-sel_node_t*
-pars_select_statement(
-/*==================*/
- sel_node_t* select_node, /*!< in: select node already containing
- the select list */
- sym_node_t* table_list, /*!< in: table list */
- que_node_t* search_cond, /*!< in: search condition or NULL */
- pars_res_word_t* for_update, /*!< in: NULL or &pars_update_token */
- pars_res_word_t* lock_shared, /*!< in: NULL or &pars_share_token */
- order_node_t* order_by) /*!< in: NULL or an order-by node */
-{
- select_node->state = SEL_NODE_OPEN;
-
- select_node->table_list = table_list;
- select_node->n_tables = pars_retrieve_table_list_defs(table_list);
-
- if (select_node->select_list == &pars_star_denoter) {
-
- /* SELECT * FROM ... */
- pars_select_all_columns(select_node);
- }
-
- if (select_node->into_list) {
- ut_a(que_node_list_get_len(select_node->into_list)
- == que_node_list_get_len(select_node->select_list));
- }
-
- UT_LIST_INIT(select_node->copy_variables);
-
- pars_resolve_exp_list_columns(table_list, select_node->select_list);
- pars_resolve_exp_list_variables_and_types(select_node,
- select_node->select_list);
- pars_check_aggregate(select_node);
-
- select_node->search_cond = search_cond;
-
- if (search_cond) {
- pars_resolve_exp_columns(table_list, search_cond);
- pars_resolve_exp_variables_and_types(select_node, search_cond);
- }
-
- if (for_update) {
- ut_a(!lock_shared);
-
- select_node->set_x_locks = TRUE;
- select_node->row_lock_mode = LOCK_X;
-
- select_node->consistent_read = FALSE;
- select_node->read_view = NULL;
- } else if (lock_shared){
- select_node->set_x_locks = FALSE;
- select_node->row_lock_mode = LOCK_S;
-
- select_node->consistent_read = FALSE;
- select_node->read_view = NULL;
- } else {
- select_node->set_x_locks = FALSE;
- select_node->row_lock_mode = LOCK_S;
-
- select_node->consistent_read = TRUE;
- }
-
- select_node->order_by = order_by;
-
- if (order_by) {
- pars_resolve_exp_columns(table_list, order_by->column);
- }
-
- /* The final value of the following fields depend on the environment
- where the select statement appears: */
-
- select_node->can_get_updated = FALSE;
- select_node->explicit_cursor = NULL;
-
- opt_search_plan(select_node);
-
- return(select_node);
-}
-
-/*********************************************************************//**
-Parses a cursor declaration.
-@return sym_node */
-UNIV_INTERN
-que_node_t*
-pars_cursor_declaration(
-/*====================*/
- sym_node_t* sym_node, /*!< in: cursor id node in the symbol
- table */
- sel_node_t* select_node) /*!< in: select node */
-{
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_CURSOR;
- sym_node->cursor_def = select_node;
-
- select_node->state = SEL_NODE_CLOSED;
- select_node->explicit_cursor = sym_node;
-
- return(sym_node);
-}
-
-/*********************************************************************//**
-Parses a function declaration.
-@return sym_node */
-UNIV_INTERN
-que_node_t*
-pars_function_declaration(
-/*======================*/
- sym_node_t* sym_node) /*!< in: function id node in the symbol
- table */
-{
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_FUNCTION;
-
- /* Check that the function exists. */
- ut_a(pars_info_get_user_func(pars_sym_tab_global->info,
- sym_node->name));
-
- return(sym_node);
-}
-
-/*********************************************************************//**
-Parses a delete or update statement start.
-@return own: update node in a query tree */
-UNIV_INTERN
-upd_node_t*
-pars_update_statement_start(
-/*========================*/
- ibool is_delete, /*!< in: TRUE if delete */
- sym_node_t* table_sym, /*!< in: table name node */
- col_assign_node_t* col_assign_list)/*!< in: column assignment list, NULL
- if delete */
-{
- upd_node_t* node;
-
- node = upd_node_create(pars_sym_tab_global->heap);
-
- node->is_delete = is_delete;
-
- node->table_sym = table_sym;
- node->col_assign_list = col_assign_list;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a column assignment in an update.
-@return column assignment node */
-UNIV_INTERN
-col_assign_node_t*
-pars_column_assignment(
-/*===================*/
- sym_node_t* column, /*!< in: column to assign */
- que_node_t* exp) /*!< in: value to assign */
-{
- col_assign_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(col_assign_node_t));
- node->common.type = QUE_NODE_COL_ASSIGNMENT;
-
- node->col = column;
- node->val = exp;
-
- return(node);
-}
-
-/*********************************************************************//**
-Processes an update node assignment list. */
-static
-void
-pars_process_assign_list(
-/*=====================*/
- upd_node_t* node) /*!< in: update node */
-{
- col_assign_node_t* col_assign_list;
- sym_node_t* table_sym;
- col_assign_node_t* assign_node;
- upd_field_t* upd_field;
- dict_index_t* clust_index;
- sym_node_t* col_sym;
- ulint changes_ord_field;
- ulint changes_field_size;
- ulint n_assigns;
- ulint i;
-
- table_sym = node->table_sym;
- col_assign_list = node->col_assign_list;
- clust_index = dict_table_get_first_index(node->table);
-
- assign_node = col_assign_list;
- n_assigns = 0;
-
- while (assign_node) {
- pars_resolve_exp_columns(table_sym, assign_node->col);
- pars_resolve_exp_columns(table_sym, assign_node->val);
- pars_resolve_exp_variables_and_types(NULL, assign_node->val);
-#if 0
- ut_a(dtype_get_mtype(
- dfield_get_type(que_node_get_val(
- assign_node->col)))
- == dtype_get_mtype(
- dfield_get_type(que_node_get_val(
- assign_node->val))));
-#endif
-
- /* Add to the update node all the columns found in assignment
- values as columns to copy: therefore, TRUE */
-
- opt_find_all_cols(TRUE, clust_index, &(node->columns), NULL,
- assign_node->val);
- n_assigns++;
-
- assign_node = que_node_get_next(assign_node);
- }
-
- node->update = upd_create(n_assigns, pars_sym_tab_global->heap);
-
- assign_node = col_assign_list;
-
- changes_field_size = UPD_NODE_NO_SIZE_CHANGE;
-
- for (i = 0; i < n_assigns; i++) {
- upd_field = upd_get_nth_field(node->update, i);
-
- col_sym = assign_node->col;
-
- upd_field_set_field_no(upd_field, dict_index_get_nth_col_pos(
- clust_index, col_sym->col_no),
- clust_index, NULL);
- upd_field->exp = assign_node->val;
-
- if (!dict_col_get_fixed_size(
- dict_index_get_nth_col(clust_index,
- upd_field->field_no),
- dict_table_is_comp(node->table))) {
- changes_field_size = 0;
- }
-
- assign_node = que_node_get_next(assign_node);
- }
-
- /* Find out if the update can modify an ordering field in any index */
-
- changes_ord_field = UPD_NODE_NO_ORD_CHANGE;
-
- if (row_upd_changes_some_index_ord_field_binary(node->table,
- node->update)) {
- changes_ord_field = 0;
- }
-
- node->cmpl_info = changes_ord_field | changes_field_size;
-}
-
-/*********************************************************************//**
-Parses an update or delete statement.
-@return own: update node in a query tree */
-UNIV_INTERN
-upd_node_t*
-pars_update_statement(
-/*==================*/
- upd_node_t* node, /*!< in: update node */
- sym_node_t* cursor_sym, /*!< in: pointer to a cursor entry in
- the symbol table or NULL */
- que_node_t* search_cond) /*!< in: search condition or NULL */
-{
- sym_node_t* table_sym;
- sel_node_t* sel_node;
- plan_t* plan;
-
- table_sym = node->table_sym;
-
- pars_retrieve_table_def(table_sym);
- node->table = table_sym->table;
-
- UT_LIST_INIT(node->columns);
-
- /* Make the single table node into a list of table nodes of length 1 */
-
- que_node_list_add_last(NULL, table_sym);
-
- if (cursor_sym) {
- pars_resolve_exp_variables_and_types(NULL, cursor_sym);
-
- sel_node = cursor_sym->alias->cursor_def;
-
- node->searched_update = FALSE;
- } else {
- sel_node = pars_select_list(NULL, NULL);
-
- pars_select_statement(sel_node, table_sym, search_cond, NULL,
- &pars_share_token, NULL);
- node->searched_update = TRUE;
- sel_node->common.parent = node;
- }
-
- node->select = sel_node;
-
- ut_a(!node->is_delete || (node->col_assign_list == NULL));
- ut_a(node->is_delete || (node->col_assign_list != NULL));
-
- if (node->is_delete) {
- node->cmpl_info = 0;
- } else {
- pars_process_assign_list(node);
- }
-
- if (node->searched_update) {
- node->has_clust_rec_x_lock = TRUE;
- sel_node->set_x_locks = TRUE;
- sel_node->row_lock_mode = LOCK_X;
- } else {
- node->has_clust_rec_x_lock = sel_node->set_x_locks;
- }
-
- ut_a(sel_node->n_tables == 1);
- ut_a(sel_node->consistent_read == FALSE);
- ut_a(sel_node->order_by == NULL);
- ut_a(sel_node->is_aggregate == FALSE);
-
- sel_node->can_get_updated = TRUE;
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- plan = sel_node_get_nth_plan(sel_node, 0);
-
- plan->no_prefetch = TRUE;
-
- if (!dict_index_is_clust(plan->index)) {
-
- plan->must_get_clust = TRUE;
-
- node->pcur = &(plan->clust_pcur);
- } else {
- node->pcur = &(plan->pcur);
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an insert statement.
-@return own: update node in a query tree */
-UNIV_INTERN
-ins_node_t*
-pars_insert_statement(
-/*==================*/
- sym_node_t* table_sym, /*!< in: table name node */
- que_node_t* values_list, /*!< in: value expression list or NULL */
- sel_node_t* select) /*!< in: select condition or NULL */
-{
- ins_node_t* node;
- dtuple_t* row;
- ulint ins_type;
-
- ut_a(values_list || select);
- ut_a(!values_list || !select);
-
- if (values_list) {
- ins_type = INS_VALUES;
- } else {
- ins_type = INS_SEARCHED;
- }
-
- pars_retrieve_table_def(table_sym);
-
- node = ins_node_create(ins_type, table_sym->table,
- pars_sym_tab_global->heap);
-
- row = dtuple_create(pars_sym_tab_global->heap,
- dict_table_get_n_cols(node->table));
-
- dict_table_copy_types(row, table_sym->table);
-
- ins_node_set_new_row(node, row);
-
- node->select = select;
-
- if (select) {
- select->common.parent = node;
-
- ut_a(que_node_list_get_len(select->select_list)
- == dict_table_get_n_user_cols(table_sym->table));
- }
-
- node->values_list = values_list;
-
- if (node->values_list) {
- pars_resolve_exp_list_variables_and_types(NULL, values_list);
-
- ut_a(que_node_list_get_len(values_list)
- == dict_table_get_n_user_cols(table_sym->table));
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Set the type of a dfield. */
-static
-void
-pars_set_dfield_type(
-/*=================*/
- dfield_t* dfield, /*!< in: dfield */
- pars_res_word_t* type, /*!< in: pointer to a type
- token */
- ulint len, /*!< in: length, or 0 */
- ibool is_unsigned, /*!< in: if TRUE, column is
- UNSIGNED. */
- ibool is_not_null) /*!< in: if TRUE, column is
- NOT NULL. */
-{
- ulint flags = 0;
-
- if (is_not_null) {
- flags |= DATA_NOT_NULL;
- }
-
- if (is_unsigned) {
- flags |= DATA_UNSIGNED;
- }
-
- if (type == &pars_int_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_INT, flags, 4);
-
- } else if (type == &pars_char_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_VARCHAR,
- DATA_ENGLISH | flags, 0);
- } else if (type == &pars_binary_token) {
- ut_a(len != 0);
-
- dtype_set(dfield_get_type(dfield), DATA_FIXBINARY,
- DATA_BINARY_TYPE | flags, len);
- } else if (type == &pars_blob_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_BLOB,
- DATA_BINARY_TYPE | flags, 0);
- } else {
- ut_error;
- }
-}
-
-/*********************************************************************//**
-Parses a variable declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_variable_declaration(
-/*======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the variable */
- pars_res_word_t* type) /*!< in: pointer to a type token */
-{
- node->resolved = TRUE;
- node->token_type = SYM_VAR;
-
- node->param_type = PARS_NOT_PARAM;
-
- pars_set_dfield_type(que_node_get_val(node), type, 0, FALSE, FALSE);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a procedure parameter declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the parameter */
- ulint param_type,
- /*!< in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type) /*!< in: pointer to a type token */
-{
- ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT));
-
- pars_variable_declaration(node, type);
-
- node->param_type = param_type;
-
- return(node);
-}
-
-/*********************************************************************//**
-Sets the parent field in a query node list. */
-static
-void
-pars_set_parent_in_list(
-/*====================*/
- que_node_t* node_list, /*!< in: first node in a list */
- que_node_t* parent) /*!< in: parent value to set in all
- nodes of the list */
-{
- que_common_t* common;
-
- common = node_list;
-
- while (common) {
- common->parent = parent;
-
- common = que_node_get_next(common);
- }
-}
-
-/*********************************************************************//**
-Parses an elsif element.
-@return elsif node */
-UNIV_INTERN
-elsif_node_t*
-pars_elsif_element(
-/*===============*/
- que_node_t* cond, /*!< in: if-condition */
- que_node_t* stat_list) /*!< in: statement list */
-{
- elsif_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(elsif_node_t));
-
- node->common.type = QUE_NODE_ELSIF;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an if-statement.
-@return if-statement node */
-UNIV_INTERN
-if_node_t*
-pars_if_statement(
-/*==============*/
- que_node_t* cond, /*!< in: if-condition */
- que_node_t* stat_list, /*!< in: statement list */
- que_node_t* else_part) /*!< in: else-part statement list
- or elsif element list */
-{
- if_node_t* node;
- elsif_node_t* elsif_node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(if_node_t));
-
- node->common.type = QUE_NODE_IF;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- if (else_part && (que_node_get_type(else_part) == QUE_NODE_ELSIF)) {
-
- /* There is a list of elsif conditions */
-
- node->else_part = NULL;
- node->elsif_list = else_part;
-
- elsif_node = else_part;
-
- while (elsif_node) {
- pars_set_parent_in_list(elsif_node->stat_list, node);
-
- elsif_node = que_node_get_next(elsif_node);
- }
- } else {
- node->else_part = else_part;
- node->elsif_list = NULL;
-
- pars_set_parent_in_list(else_part, node);
- }
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a while-statement.
-@return while-statement node */
-UNIV_INTERN
-while_node_t*
-pars_while_statement(
-/*=================*/
- que_node_t* cond, /*!< in: while-condition */
- que_node_t* stat_list) /*!< in: statement list */
-{
- while_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(while_node_t));
-
- node->common.type = QUE_NODE_WHILE;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a for-loop-statement.
-@return for-statement node */
-UNIV_INTERN
-for_node_t*
-pars_for_statement(
-/*===============*/
- sym_node_t* loop_var, /*!< in: loop variable */
- que_node_t* loop_start_limit,/*!< in: loop start expression */
- que_node_t* loop_end_limit, /*!< in: loop end expression */
- que_node_t* stat_list) /*!< in: statement list */
-{
- for_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(for_node_t));
-
- node->common.type = QUE_NODE_FOR;
-
- pars_resolve_exp_variables_and_types(NULL, loop_var);
- pars_resolve_exp_variables_and_types(NULL, loop_start_limit);
- pars_resolve_exp_variables_and_types(NULL, loop_end_limit);
-
- node->loop_var = loop_var->indirection;
-
- ut_a(loop_var->indirection);
-
- node->loop_start_limit = loop_start_limit;
- node->loop_end_limit = loop_end_limit;
-
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an exit statement.
-@return exit statement node */
-UNIV_INTERN
-exit_node_t*
-pars_exit_statement(void)
-/*=====================*/
-{
- exit_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(exit_node_t));
- node->common.type = QUE_NODE_EXIT;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a return-statement.
-@return return-statement node */
-UNIV_INTERN
-return_node_t*
-pars_return_statement(void)
-/*=======================*/
-{
- return_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(return_node_t));
- node->common.type = QUE_NODE_RETURN;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an assignment statement.
-@return assignment statement node */
-UNIV_INTERN
-assign_node_t*
-pars_assignment_statement(
-/*======================*/
- sym_node_t* var, /*!< in: variable to assign */
- que_node_t* val) /*!< in: value to assign */
-{
- assign_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(assign_node_t));
- node->common.type = QUE_NODE_ASSIGNMENT;
-
- node->var = var;
- node->val = val;
-
- pars_resolve_exp_variables_and_types(NULL, var);
- pars_resolve_exp_variables_and_types(NULL, val);
-
- ut_a(dtype_get_mtype(dfield_get_type(que_node_get_val(var)))
- == dtype_get_mtype(dfield_get_type(que_node_get_val(val))));
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a procedure call.
-@return function node */
-UNIV_INTERN
-func_node_t*
-pars_procedure_call(
-/*================*/
- que_node_t* res_word,/*!< in: procedure name reserved word */
- que_node_t* args) /*!< in: argument list */
-{
- func_node_t* node;
-
- node = pars_func(res_word, args);
-
- pars_resolve_exp_list_variables_and_types(NULL, args);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL.
-@return fetch statement node */
-UNIV_INTERN
-fetch_node_t*
-pars_fetch_statement(
-/*=================*/
- sym_node_t* cursor, /*!< in: cursor node */
- sym_node_t* into_list, /*!< in: variables to set, or NULL */
- sym_node_t* user_func) /*!< in: user function name, or NULL */
-{
- sym_node_t* cursor_decl;
- fetch_node_t* node;
-
- /* Logical XOR. */
- ut_a(!into_list != !user_func);
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(fetch_node_t));
-
- node->common.type = QUE_NODE_FETCH;
-
- pars_resolve_exp_variables_and_types(NULL, cursor);
-
- if (into_list) {
- pars_resolve_exp_list_variables_and_types(NULL, into_list);
- node->into_list = into_list;
- node->func = NULL;
- } else {
- pars_resolve_exp_variables_and_types(NULL, user_func);
-
- node->func = pars_info_get_user_func(pars_sym_tab_global->info,
- user_func->name);
- ut_a(node->func);
-
- node->into_list = NULL;
- }
-
- cursor_decl = cursor->alias;
-
- ut_a(cursor_decl->token_type == SYM_CURSOR);
-
- node->cursor_def = cursor_decl->cursor_def;
-
- if (into_list) {
- ut_a(que_node_list_get_len(into_list)
- == que_node_list_get_len(node->cursor_def->select_list));
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an open or close cursor statement.
-@return fetch statement node */
-UNIV_INTERN
-open_node_t*
-pars_open_statement(
-/*================*/
- ulint type, /*!< in: ROW_SEL_OPEN_CURSOR
- or ROW_SEL_CLOSE_CURSOR */
- sym_node_t* cursor) /*!< in: cursor node */
-{
- sym_node_t* cursor_decl;
- open_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(open_node_t));
-
- node->common.type = QUE_NODE_OPEN;
-
- pars_resolve_exp_variables_and_types(NULL, cursor);
-
- cursor_decl = cursor->alias;
-
- ut_a(cursor_decl->token_type == SYM_CURSOR);
-
- node->op_type = type;
- node->cursor_def = cursor_decl->cursor_def;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a row_printf-statement.
-@return row_printf-statement node */
-UNIV_INTERN
-row_printf_node_t*
-pars_row_printf_statement(
-/*======================*/
- sel_node_t* sel_node) /*!< in: select node */
-{
- row_printf_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(row_printf_node_t));
- node->common.type = QUE_NODE_ROW_PRINTF;
-
- node->sel_node = sel_node;
-
- sel_node->common.parent = node;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a commit statement.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-pars_commit_statement(void)
-/*=======================*/
-{
- return(commit_node_create(pars_sym_tab_global->heap));
-}
-
-/*********************************************************************//**
-Parses a rollback statement.
-@return own: rollback node struct */
-UNIV_INTERN
-roll_node_t*
-pars_rollback_statement(void)
-/*=========================*/
-{
- return(roll_node_create(pars_sym_tab_global->heap));
-}
-
-/*********************************************************************//**
-Parses a column definition at a table creation.
-@return column sym table node */
-UNIV_INTERN
-sym_node_t*
-pars_column_def(
-/*============*/
- sym_node_t* sym_node, /*!< in: column node in the
- symbol table */
- pars_res_word_t* type, /*!< in: data type */
- sym_node_t* len, /*!< in: length of column, or
- NULL */
- void* is_unsigned, /*!< in: if not NULL, column
- is of type UNSIGNED. */
- void* is_not_null) /*!< in: if not NULL, column
- is of type NOT NULL. */
-{
- ulint len2;
-
- if (len) {
- len2 = eval_node_get_int_val(len);
- } else {
- len2 = 0;
- }
-
- pars_set_dfield_type(que_node_get_val(sym_node), type, len2,
- is_unsigned != NULL, is_not_null != NULL);
-
- return(sym_node);
-}
-
-/*********************************************************************//**
-Parses a table creation operation.
-@return table create subgraph */
-UNIV_INTERN
-tab_node_t*
-pars_create_table(
-/*==============*/
- sym_node_t* table_sym, /*!< in: table name node in the symbol
- table */
- sym_node_t* column_defs, /*!< in: list of column names */
- void* not_fit_in_memory __attribute__((unused)))
- /*!< in: a non-NULL pointer means that
- this is a table which in simulations
- should be simulated as not fitting
- in memory; thread is put to sleep
- to simulate disk accesses; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about non-NULL value if
- it has to reload the table definition
- from disk */
-{
- dict_table_t* table;
- sym_node_t* column;
- tab_node_t* node;
- const dtype_t* dtype;
- ulint n_cols;
-
- n_cols = que_node_list_get_len(column_defs);
-
- /* As the InnoDB SQL parser is for internal use only,
- for creating some system tables, this function will only
- create tables in the old (not compact) record format. */
- table = dict_mem_table_create(table_sym->name, 0, n_cols, 0);
-
-#ifdef UNIV_DEBUG
- if (not_fit_in_memory != NULL) {
- table->does_not_fit_in_memory = TRUE;
- }
-#endif /* UNIV_DEBUG */
- column = column_defs;
-
- while (column) {
- dtype = dfield_get_type(que_node_get_val(column));
-
- dict_mem_table_add_col(table, table->heap,
- column->name, dtype->mtype,
- dtype->prtype, dtype->len);
- column->resolved = TRUE;
- column->token_type = SYM_COLUMN;
-
- column = que_node_get_next(column);
- }
-
- node = tab_create_graph_create(table, pars_sym_tab_global->heap);
-
- table_sym->resolved = TRUE;
- table_sym->token_type = SYM_TABLE;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an index creation operation.
-@return index create subgraph */
-UNIV_INTERN
-ind_node_t*
-pars_create_index(
-/*==============*/
- pars_res_word_t* unique_def, /*!< in: not NULL if a unique index */
- pars_res_word_t* clustered_def, /*!< in: not NULL if a clustered index */
- sym_node_t* index_sym, /*!< in: index name node in the symbol
- table */
- sym_node_t* table_sym, /*!< in: table name node in the symbol
- table */
- sym_node_t* column_list) /*!< in: list of column names */
-{
- dict_index_t* index;
- sym_node_t* column;
- ind_node_t* node;
- ulint n_fields;
- ulint ind_type;
-
- n_fields = que_node_list_get_len(column_list);
-
- ind_type = 0;
-
- if (unique_def) {
- ind_type = ind_type | DICT_UNIQUE;
- }
-
- if (clustered_def) {
- ind_type = ind_type | DICT_CLUSTERED;
- }
-
- index = dict_mem_index_create(table_sym->name, index_sym->name, 0,
- ind_type, n_fields);
- column = column_list;
-
- while (column) {
- dict_mem_index_add_field(index, column->name, 0);
-
- column->resolved = TRUE;
- column->token_type = SYM_COLUMN;
-
- column = que_node_get_next(column);
- }
-
- node = ind_create_graph_create(index, pars_sym_tab_global->heap);
-
- table_sym->resolved = TRUE;
- table_sym->token_type = SYM_TABLE;
-
- index_sym->resolved = TRUE;
- index_sym->token_type = SYM_TABLE;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a procedure definition.
-@return query fork node */
-UNIV_INTERN
-que_fork_t*
-pars_procedure_definition(
-/*======================*/
- sym_node_t* sym_node, /*!< in: procedure id node in the symbol
- table */
- sym_node_t* param_list, /*!< in: parameter declaration list */
- que_node_t* stat_list) /*!< in: statement list */
-{
- proc_node_t* node;
- que_fork_t* fork;
- que_thr_t* thr;
- mem_heap_t* heap;
-
- heap = pars_sym_tab_global->heap;
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE, heap);
- fork->trx = NULL;
-
- thr = que_thr_create(fork, heap);
-
- node = mem_heap_alloc(heap, sizeof(proc_node_t));
-
- node->common.type = QUE_NODE_PROC;
- node->common.parent = thr;
-
- sym_node->token_type = SYM_PROCEDURE_NAME;
- sym_node->resolved = TRUE;
-
- node->proc_id = sym_node;
- node->param_list = param_list;
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- node->sym_tab = pars_sym_tab_global;
-
- thr->child = node;
-
- pars_sym_tab_global->query_graph = fork;
-
- return(fork);
-}
-
-/*************************************************************//**
-Parses a stored procedure call, when this is not within another stored
-procedure, that is, the client issues a procedure call directly.
-In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return query graph */
-UNIV_INTERN
-que_fork_t*
-pars_stored_procedure_call(
-/*=======================*/
- sym_node_t* sym_node __attribute__((unused)))
- /*!< in: stored procedure name */
-{
- ut_error;
- return(NULL);
-}
-
-/*************************************************************//**
-Retrieves characters to the lexical analyzer. */
-UNIV_INTERN
-void
-pars_get_lex_chars(
-/*===============*/
- char* buf, /*!< in/out: buffer where to copy */
- int* result, /*!< out: number of characters copied or EOF */
- int max_size) /*!< in: maximum number of characters which fit
- in the buffer */
-{
- int len;
-
- len = pars_sym_tab_global->string_len
- - pars_sym_tab_global->next_char_pos;
- if (len == 0) {
-#ifdef YYDEBUG
- /* fputs("SQL string ends\n", stderr); */
-#endif
- *result = 0;
-
- return;
- }
-
- if (len > max_size) {
- len = max_size;
- }
-
-#ifdef UNIV_SQL_DEBUG
- if (pars_print_lexed) {
-
- if (len >= 5) {
- len = 5;
- }
-
- fwrite(pars_sym_tab_global->sql_string
- + pars_sym_tab_global->next_char_pos,
- 1, len, stderr);
- }
-#endif /* UNIV_SQL_DEBUG */
-
- ut_memcpy(buf, pars_sym_tab_global->sql_string
- + pars_sym_tab_global->next_char_pos, len);
- *result = len;
-
- pars_sym_tab_global->next_char_pos += len;
-}
-
-/*************************************************************//**
-Called by yyparse on error. */
-UNIV_INTERN
-void
-yyerror(
-/*====*/
- const char* s __attribute__((unused)))
- /*!< in: error message string */
-{
- ut_ad(s);
-
- fputs("PARSER ERROR: Syntax error in SQL string\n", stderr);
-
- ut_error;
-}
-
-/*************************************************************//**
-Parses an SQL string returning the query graph.
-@return own: the query graph */
-UNIV_INTERN
-que_t*
-pars_sql(
-/*=====*/
- pars_info_t* info, /*!< in: extra information, or NULL */
- const char* str) /*!< in: SQL string */
-{
- sym_node_t* sym_node;
- mem_heap_t* heap;
- que_t* graph;
-
- ut_ad(str);
-
- heap = mem_heap_create(256);
-
- /* Currently, the parser is not reentrant: */
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- pars_sym_tab_global = sym_tab_create(heap);
-
- pars_sym_tab_global->string_len = strlen(str);
- pars_sym_tab_global->sql_string = mem_heap_dup(
- heap, str, pars_sym_tab_global->string_len + 1);
- pars_sym_tab_global->next_char_pos = 0;
- pars_sym_tab_global->info = info;
-
- yyparse();
-
- sym_node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list);
-
- while (sym_node) {
- ut_a(sym_node->resolved);
-
- sym_node = UT_LIST_GET_NEXT(sym_list, sym_node);
- }
-
- graph = pars_sym_tab_global->query_graph;
-
- graph->sym_tab = pars_sym_tab_global;
- graph->info = info;
-
- /* fprintf(stderr, "SQL graph size %lu\n", mem_heap_get_size(heap)); */
-
- return(graph);
-}
-
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
-above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE.
-@return query thread node to run */
-UNIV_INTERN
-que_thr_t*
-pars_complete_graph_for_exec(
-/*=========================*/
- que_node_t* node, /*!< in: root node for an incomplete
- query graph */
- trx_t* trx, /*!< in: transaction handle */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
-{
- que_fork_t* fork;
- que_thr_t* thr;
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_MYSQL_INTERFACE, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
-
- thr->child = node;
-
- que_node_set_parent(node, thr);
-
- trx->graph = NULL;
-
- return(thr);
-}
-
-/****************************************************************//**
-Create parser info struct.
-@return own: info struct */
-UNIV_INTERN
-pars_info_t*
-pars_info_create(void)
-/*==================*/
-{
- pars_info_t* info;
- mem_heap_t* heap;
-
- heap = mem_heap_create(512);
-
- info = mem_heap_alloc(heap, sizeof(*info));
-
- info->heap = heap;
- info->funcs = NULL;
- info->bound_lits = NULL;
- info->bound_ids = NULL;
- info->graph_owns_us = TRUE;
-
- return(info);
-}
-
-/****************************************************************//**
-Free info struct and everything it contains. */
-UNIV_INTERN
-void
-pars_info_free(
-/*===========*/
- pars_info_t* info) /*!< in, own: info struct */
-{
- mem_heap_free(info->heap);
-}
-
-/****************************************************************//**
-Add bound literal. */
-UNIV_INTERN
-void
-pars_info_add_literal(
-/*==================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const void* address, /*!< in: address */
- ulint length, /*!< in: length of data */
- ulint type, /*!< in: type, e.g. DATA_FIXBINARY */
- ulint prtype) /*!< in: precise type, e.g.
- DATA_UNSIGNED */
-{
- pars_bound_lit_t* pbl;
-
- ut_ad(!pars_info_get_bound_lit(info, name));
-
- pbl = mem_heap_alloc(info->heap, sizeof(*pbl));
-
- pbl->name = name;
- pbl->address = address;
- pbl->length = length;
- pbl->type = type;
- pbl->prtype = prtype;
-
- if (!info->bound_lits) {
- info->bound_lits = ib_vector_create(info->heap, 8);
- }
-
- ib_vector_push(info->bound_lits, pbl);
-}
-
-/****************************************************************//**
-Equivalent to pars_info_add_literal(info, name, str, strlen(str),
-DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
-void
-pars_info_add_str_literal(
-/*======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const char* str) /*!< in: string */
-{
- pars_info_add_literal(info, name, str, strlen(str),
- DATA_VARCHAR, DATA_ENGLISH);
-}
-
-/****************************************************************//**
-Equivalent to:
-
-char buf[4];
-mach_write_to_4(buf, val);
-pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_add_int4_literal(
-/*=======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- lint val) /*!< in: value */
-{
- byte* buf = mem_heap_alloc(info->heap, 4);
-
- mach_write_to_4(buf, val);
- pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-}
-
-/****************************************************************//**
-Equivalent to:
-
-char buf[8];
-mach_write_to_8(buf, val);
-pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_add_dulint_literal(
-/*=========================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- dulint val) /*!< in: value */
-{
- byte* buf = mem_heap_alloc(info->heap, 8);
-
- mach_write_to_8(buf, val);
-
- pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
-}
-
-/****************************************************************//**
-Add user function. */
-UNIV_INTERN
-void
-pars_info_add_function(
-/*===================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: function name */
- pars_user_func_cb_t func, /*!< in: function address */
- void* arg) /*!< in: user-supplied argument */
-{
- pars_user_func_t* puf;
-
- ut_ad(!pars_info_get_user_func(info, name));
-
- puf = mem_heap_alloc(info->heap, sizeof(*puf));
-
- puf->name = name;
- puf->func = func;
- puf->arg = arg;
-
- if (!info->funcs) {
- info->funcs = ib_vector_create(info->heap, 8);
- }
-
- ib_vector_push(info->funcs, puf);
-}
-
-/****************************************************************//**
-Add bound id. */
-UNIV_INTERN
-void
-pars_info_add_id(
-/*=============*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const char* id) /*!< in: id */
-{
- pars_bound_id_t* bid;
-
- ut_ad(!pars_info_get_bound_id(info, name));
-
- bid = mem_heap_alloc(info->heap, sizeof(*bid));
-
- bid->name = name;
- bid->id = id;
-
- if (!info->bound_ids) {
- info->bound_ids = ib_vector_create(info->heap, 8);
- }
-
- ib_vector_push(info->bound_ids, bid);
-}
-
-/****************************************************************//**
-Get user function with the given name.
-@return user func, or NULL if not found */
-UNIV_INTERN
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name) /*!< in: function name to find*/
-{
- ulint i;
- ib_vector_t* vec;
-
- if (!info || !info->funcs) {
- return(NULL);
- }
-
- vec = info->funcs;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_user_func_t* puf = ib_vector_get(vec, i);
-
- if (strcmp(puf->name, name) == 0) {
- return(puf);
- }
- }
-
- return(NULL);
-}
-
-/****************************************************************//**
-Get bound literal with the given name.
-@return bound literal, or NULL if not found */
-UNIV_INTERN
-pars_bound_lit_t*
-pars_info_get_bound_lit(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name) /*!< in: bound literal name to find */
-{
- ulint i;
- ib_vector_t* vec;
-
- if (!info || !info->bound_lits) {
- return(NULL);
- }
-
- vec = info->bound_lits;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_bound_lit_t* pbl = ib_vector_get(vec, i);
-
- if (strcmp(pbl->name, name) == 0) {
- return(pbl);
- }
- }
-
- return(NULL);
-}
-
-/****************************************************************//**
-Get bound id with the given name.
-@return bound id, or NULL if not found */
-UNIV_INTERN
-pars_bound_id_t*
-pars_info_get_bound_id(
-/*===================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name) /*!< in: bound id name to find */
-{
- ulint i;
- ib_vector_t* vec;
-
- if (!info || !info->bound_ids) {
- return(NULL);
- }
-
- vec = info->bound_ids;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_bound_id_t* bid = ib_vector_get(vec, i);
-
- if (strcmp(bid->name, name) == 0) {
- return(bid);
- }
- }
-
- return(NULL);
-}
diff --git a/storage/innodb_plugin/pars/pars0sym.c b/storage/innodb_plugin/pars/pars0sym.c
deleted file mode 100644
index b56350116bb..00000000000
--- a/storage/innodb_plugin/pars/pars0sym.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file pars/pars0sym.c
-SQL parser symbol table
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
-
-#include "pars0sym.h"
-
-#ifdef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
-#include "mem0mem.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "pars0grm.h"
-#include "pars0pars.h"
-#include "que0que.h"
-#include "eval0eval.h"
-#include "row0sel.h"
-
-/******************************************************************//**
-Creates a symbol table for a single stored procedure or query.
-@return own: symbol table */
-UNIV_INTERN
-sym_tab_t*
-sym_tab_create(
-/*===========*/
- mem_heap_t* heap) /*!< in: memory heap where to create */
-{
- sym_tab_t* sym_tab;
-
- sym_tab = mem_heap_alloc(heap, sizeof(sym_tab_t));
-
- UT_LIST_INIT(sym_tab->sym_list);
- UT_LIST_INIT(sym_tab->func_node_list);
-
- sym_tab->heap = heap;
-
- return(sym_tab);
-}
-
-/******************************************************************//**
-Frees the memory allocated dynamically AFTER parsing phase for variables
-etc. in the symbol table. Does not free the mem heap where the table was
-originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
-void
-sym_tab_free_private(
-/*=================*/
- sym_tab_t* sym_tab) /*!< in, own: symbol table */
-{
- sym_node_t* sym;
- func_node_t* func;
-
- sym = UT_LIST_GET_FIRST(sym_tab->sym_list);
-
- while (sym) {
- eval_node_free_val_buf(sym);
-
- if (sym->prefetch_buf) {
- sel_col_prefetch_buf_free(sym->prefetch_buf);
- }
-
- if (sym->cursor_def) {
- que_graph_free_recursive(sym->cursor_def);
- }
-
- sym = UT_LIST_GET_NEXT(sym_list, sym);
- }
-
- func = UT_LIST_GET_FIRST(sym_tab->func_node_list);
-
- while (func) {
- eval_node_free_val_buf(func);
-
- func = UT_LIST_GET_NEXT(func_node_list, func);
- }
-}
-
-/******************************************************************//**
-Adds an integer literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_int_lit(
-/*================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- ulint val) /*!< in: integer value */
-{
- sym_node_t* node;
- byte* data;
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- dtype_set(dfield_get_type(&node->common.val), DATA_INT, 0, 4);
-
- data = mem_heap_alloc(sym_tab->heap, 4);
- mach_write_to_4(data, val);
-
- dfield_set_data(&(node->common.val), data, 4);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Adds a string literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_str_lit(
-/*================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- byte* str, /*!< in: string with no quotes around
- it */
- ulint len) /*!< in: string length */
-{
- sym_node_t* node;
- byte* data;
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- dtype_set(dfield_get_type(&node->common.val),
- DATA_VARCHAR, DATA_ENGLISH, 0);
-
- if (len) {
- data = mem_heap_alloc(sym_tab->heap, len);
- ut_memcpy(data, str, len);
- } else {
- data = NULL;
- }
-
- dfield_set_data(&(node->common.val), data, len);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Add a bound literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_bound_lit(
-/*==================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const char* name, /*!< in: name of bound literal */
- ulint* lit_type) /*!< out: type of literal (PARS_*_LIT) */
-{
- sym_node_t* node;
- pars_bound_lit_t* blit;
- ulint len = 0;
-
- blit = pars_info_get_bound_lit(sym_tab->info, name);
- ut_a(blit);
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- switch (blit->type) {
- case DATA_FIXBINARY:
- len = blit->length;
- *lit_type = PARS_FIXBINARY_LIT;
- break;
-
- case DATA_BLOB:
- *lit_type = PARS_BLOB_LIT;
- break;
-
- case DATA_VARCHAR:
- *lit_type = PARS_STR_LIT;
- break;
-
- case DATA_CHAR:
- ut_a(blit->length > 0);
-
- len = blit->length;
- *lit_type = PARS_STR_LIT;
- break;
-
- case DATA_INT:
- ut_a(blit->length > 0);
- ut_a(blit->length <= 8);
-
- len = blit->length;
- *lit_type = PARS_INT_LIT;
- break;
-
- default:
- ut_error;
- }
-
- dtype_set(dfield_get_type(&node->common.val),
- blit->type, blit->prtype, len);
-
- dfield_set_data(&(node->common.val), blit->address, blit->length);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Adds an SQL null literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_null_lit(
-/*=================*/
- sym_tab_t* sym_tab) /*!< in: symbol table */
-{
- sym_node_t* node;
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- dfield_get_type(&node->common.val)->mtype = DATA_ERROR;
-
- dfield_set_null(&node->common.val);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Adds an identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_id(
-/*===========*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- byte* name, /*!< in: identifier name */
- ulint len) /*!< in: identifier length */
-{
- sym_node_t* node;
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = FALSE;
- node->indirection = NULL;
-
- node->name = mem_heap_strdupl(sym_tab->heap, (char*) name, len);
- node->name_len = len;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- dfield_set_null(&node->common.val);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Add a bound identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_bound_id(
-/*===========*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const char* name) /*!< in: name of bound id */
-{
- sym_node_t* node;
- pars_bound_id_t* bid;
-
- bid = pars_info_get_bound_id(sym_tab->info, name);
- ut_a(bid);
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = FALSE;
- node->indirection = NULL;
-
- node->name = mem_heap_strdup(sym_tab->heap, bid->id);
- node->name_len = strlen(node->name);
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- dfield_set_null(&node->common.val);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- node->sym_table = sym_tab;
-
- return(node);
-}
diff --git a/storage/innodb_plugin/plug.in b/storage/innodb_plugin/plug.in
deleted file mode 100644
index 94a2c969694..00000000000
--- a/storage/innodb_plugin/plug.in
+++ /dev/null
@@ -1,224 +0,0 @@
-#
-# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-# Place, Suite 330, Boston, MA 02111-1307 USA
-#
-
-MYSQL_STORAGE_ENGINE(innodb_plugin,, [InnoDB Storage Engine],
- [Transactional Tables using InnoDB], [max,max-no-ndb])
-MYSQL_PLUGIN_DIRECTORY(innodb_plugin, [storage/innodb_plugin])
-MYSQL_PLUGIN_DYNAMIC(innodb_plugin, [ha_innodb_plugin.la])
-MYSQL_PLUGIN_ACTIONS(innodb_plugin, [
- AC_CHECK_HEADERS(sched.h)
- AC_CHECK_SIZEOF(int, 4)
- AC_CHECK_SIZEOF(long, 4)
- AC_CHECK_SIZEOF(void*, 4)
- AC_CHECK_FUNCS(sched_yield fdatasync localtime_r)
- AC_C_BIGENDIAN
- case "$target_os" in
- lin*)
- CFLAGS="$CFLAGS -DUNIV_LINUX";;
- hpux10*)
- CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";;
- hp*)
- CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";;
- aix*)
- CFLAGS="$CFLAGS -DUNIV_AIX";;
- irix*|osf*|sysv5uw7*|openbsd*)
- CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
- *solaris*|*SunOS*)
- CFLAGS="$CFLAGS -DUNIV_SOLARIS";;
- esac
-
- INNODB_DYNAMIC_CFLAGS="-DMYSQL_DYNAMIC_PLUGIN"
-
- case "$target_cpu" in
- x86_64)
- # The AMD64 ABI forbids absolute addresses in shared libraries
- ;;
- *86)
- # Use absolute addresses on IA-32
- INNODB_DYNAMIC_CFLAGS="$INNODB_DYNAMIC_CFLAGS -prefer-non-pic"
- ;;
- esac
- AC_SUBST(INNODB_DYNAMIC_CFLAGS)
-
- AC_MSG_CHECKING(whether GCC atomic builtins are available)
- # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
- AC_TRY_RUN(
- [
- int main()
- {
- long x;
- long y;
- long res;
- char c;
-
- x = 10;
- y = 123;
- res = __sync_bool_compare_and_swap(&x, x, y);
- if (!res || x != y) {
- return(1);
- }
-
- x = 10;
- y = 123;
- res = __sync_bool_compare_and_swap(&x, x + 1, y);
- if (res || x != 10) {
- return(1);
- }
-
- x = 10;
- y = 123;
- res = __sync_add_and_fetch(&x, y);
- if (res != 123 + 10 || x != 123 + 10) {
- return(1);
- }
-
- c = 10;
- res = __sync_lock_test_and_set(&c, 123);
- if (res != 10 || c != 123) {
- return(1);
- }
-
- return(0);
- }
- ],
- [
- AC_DEFINE([HAVE_IB_GCC_ATOMIC_BUILTINS], [1],
- [GCC atomic builtins are available])
- AC_MSG_RESULT(yes)
- ],
- [
- AC_MSG_RESULT(no)
- ]
- )
-
- AC_MSG_CHECKING(whether pthread_t can be used by GCC atomic builtins)
- # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
- AC_TRY_RUN(
- [
- #include <pthread.h>
- #include <string.h>
-
- int main(int argc, char** argv) {
- pthread_t x1;
- pthread_t x2;
- pthread_t x3;
-
- memset(&x1, 0x0, sizeof(x1));
- memset(&x2, 0x0, sizeof(x2));
- memset(&x3, 0x0, sizeof(x3));
-
- __sync_bool_compare_and_swap(&x1, x2, x3);
-
- return(0);
- }
- ],
- [
- AC_DEFINE([HAVE_IB_ATOMIC_PTHREAD_T_GCC], [1],
- [pthread_t can be used by GCC atomic builtins])
- AC_MSG_RESULT(yes)
- ],
- [
- AC_MSG_RESULT(no)
- ]
- )
-
- AC_MSG_CHECKING(whether Solaris libc atomic functions are available)
- # either define HAVE_IB_SOLARIS_ATOMICS or not
- AC_CHECK_FUNCS(atomic_add_long \
- atomic_cas_32 \
- atomic_cas_64 \
- atomic_cas_ulong,
-
- AC_DEFINE([HAVE_IB_SOLARIS_ATOMICS], [1],
- [Define to 1 if Solaris libc atomic functions \
- are available])
- )
-
- AC_MSG_CHECKING(whether pthread_t can be used by Solaris libc atomic functions)
- # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
- AC_TRY_RUN(
- [
- #include <pthread.h>
- #include <string.h>
-
- int main(int argc, char** argv) {
- pthread_t x1;
- pthread_t x2;
- pthread_t x3;
-
- memset(&x1, 0x0, sizeof(x1));
- memset(&x2, 0x0, sizeof(x2));
- memset(&x3, 0x0, sizeof(x3));
-
- if (sizeof(pthread_t) == 4) {
-
- atomic_cas_32(&x1, x2, x3);
-
- } else if (sizeof(pthread_t) == 8) {
-
- atomic_cas_64(&x1, x2, x3);
-
- } else {
-
- return(1);
- }
-
- return(0);
- }
- ],
- [
- AC_DEFINE([HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS], [1],
- [pthread_t can be used by solaris atomics])
- AC_MSG_RESULT(yes)
- ],
- [
- AC_MSG_RESULT(no)
- ]
- )
-
- # this is needed to know which one of atomic_cas_32() or atomic_cas_64()
- # to use in the source
- AC_CHECK_SIZEOF([pthread_t], [], [#include <pthread.h>])
-
- # Check for x86 PAUSE instruction
- AC_MSG_CHECKING(for x86 PAUSE instruction)
- # We have to actually try running the test program, because of a bug
- # in Solaris on x86_64, where it wrongly reports that PAUSE is not
- # supported when trying to run an application. See
- # http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684
- # We use ib_ prefix to avoid collisoins if this code is added to
- # mysql's configure.in.
- AC_TRY_RUN(
- [
- int main() {
- __asm__ __volatile__ ("pause");
- return(0);
- }
- ],
- [
- AC_DEFINE([HAVE_IB_PAUSE_INSTRUCTION], [1], [Does x86 PAUSE instruction exist])
- AC_MSG_RESULT(yes)
- ],
- [
- AC_MSG_RESULT(no)
- ],
- [
- AC_MSG_RESULT(no)
- ]
- )
- ])
-
-# vim: set ft=config:
diff --git a/storage/innodb_plugin/que/que0que.c b/storage/innodb_plugin/que/que0que.c
deleted file mode 100644
index 2fe046fa9b8..00000000000
--- a/storage/innodb_plugin/que/que0que.c
+++ /dev/null
@@ -1,1436 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file que/que0que.c
-Query graph
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-
-#ifdef UNIV_NONINL
-#include "que0que.ic"
-#endif
-
-#include "srv0que.h"
-#include "usr0sess.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "row0undo.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "row0sel.h"
-#include "row0purge.h"
-#include "dict0crea.h"
-#include "log0log.h"
-#include "eval0proc.h"
-#include "eval0eval.h"
-#include "pars0types.h"
-
-#define QUE_PARALLELIZE_LIMIT (64 * 256 * 256 * 256)
-#define QUE_ROUND_ROBIN_LIMIT (64 * 256 * 256 * 256)
-#define QUE_MAX_LOOPS_WITHOUT_CHECK 16
-
-#ifdef UNIV_DEBUG
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-UNIV_INTERN ibool que_trace_on = FALSE;
-#endif /* UNIV_DEBUG */
-
-/* Short introduction to query graphs
- ==================================
-
-A query graph consists of nodes linked to each other in various ways. The
-execution starts at que_run_threads() which takes a que_thr_t parameter.
-que_thr_t contains two fields that control query graph execution: run_node
-and prev_node. run_node is the next node to execute and prev_node is the
-last node executed.
-
-Each node has a pointer to a 'next' statement, i.e., its brother, and a
-pointer to its parent node. The next pointer is NULL in the last statement
-of a block.
-
-Loop nodes contain a link to the first statement of the enclosed statement
-list. While the loop runs, que_thr_step() checks if execution to the loop
-node came from its parent or from one of the statement nodes in the loop. If
-it came from the parent of the loop node it starts executing the first
-statement node in the loop. If it came from one of the statement nodes in
-the loop, then it checks if the statement node has another statement node
-following it, and runs it if so.
-
-To signify loop ending, the loop statements (see e.g. while_step()) set
-que_thr_t->run_node to the loop node's parent node. This is noticed on the
-next call of que_thr_step() and execution proceeds to the node pointed to by
-the loop node's 'next' pointer.
-
-For example, the code:
-
-X := 1;
-WHILE X < 5 LOOP
- X := X + 1;
- X := X + 1;
-X := 5
-
-will result in the following node hierarchy, with the X-axis indicating
-'next' links and the Y-axis indicating parent/child links:
-
-A - W - A
- |
- |
- A - A
-
-A = assign_node_t, W = while_node_t. */
-
-/* How a stored procedure containing COMMIT or ROLLBACK commands
-is executed?
-
-The commit or rollback can be seen as a subprocedure call.
-The problem is that if there are several query threads
-currently running within the transaction, their action could
-mess the commit or rollback operation. Or, at the least, the
-operation would be difficult to visualize and keep in control.
-
-Therefore the query thread requesting a commit or a rollback
-sends to the transaction a signal, which moves the transaction
-to TRX_QUE_SIGNALED state. All running query threads of the
-transaction will eventually notice that the transaction is now in
-this state and voluntarily suspend themselves. Only the last
-query thread which suspends itself will trigger handling of
-the signal.
-
-When the transaction starts to handle a rollback or commit
-signal, it builds a query graph which, when executed, will
-roll back or commit the incomplete transaction. The transaction
-is moved to the TRX_QUE_ROLLING_BACK or TRX_QUE_COMMITTING state.
-If specified, the SQL cursors opened by the transaction are closed.
-When the execution of the graph completes, it is like returning
-from a subprocedure: the query thread which requested the operation
-starts running again. */
-
-/**********************************************************************//**
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction.
-***NOTE***: This is the only function in which such a transition is allowed
-to happen! */
-static
-void
-que_thr_move_to_run_state(
-/*======================*/
- que_thr_t* thr); /*!< in: an query thread */
-
-/***********************************************************************//**
-Adds a query graph to the session's list of graphs. */
-UNIV_INTERN
-void
-que_graph_publish(
-/*==============*/
- que_t* graph, /*!< in: graph */
- sess_t* sess) /*!< in: session */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- UT_LIST_ADD_LAST(graphs, sess->graphs, graph);
-}
-
-/***********************************************************************//**
-Creates a query graph fork node.
-@return own: fork node */
-UNIV_INTERN
-que_fork_t*
-que_fork_create(
-/*============*/
- que_t* graph, /*!< in: graph, if NULL then this
- fork node is assumed to be the
- graph root */
- que_node_t* parent, /*!< in: parent node */
- ulint fork_type, /*!< in: fork type */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- que_fork_t* fork;
-
- ut_ad(heap);
-
- fork = mem_heap_alloc(heap, sizeof(que_fork_t));
-
- fork->common.type = QUE_NODE_FORK;
- fork->n_active_thrs = 0;
-
- fork->state = QUE_FORK_COMMAND_WAIT;
-
- if (graph != NULL) {
- fork->graph = graph;
- } else {
- fork->graph = fork;
- }
-
- fork->common.parent = parent;
- fork->fork_type = fork_type;
-
- fork->caller = NULL;
-
- UT_LIST_INIT(fork->thrs);
-
- fork->sym_tab = NULL;
- fork->info = NULL;
-
- fork->heap = heap;
-
- return(fork);
-}
-
-/***********************************************************************//**
-Creates a query graph thread node.
-@return own: query thread node */
-UNIV_INTERN
-que_thr_t*
-que_thr_create(
-/*===========*/
- que_fork_t* parent, /*!< in: parent node, i.e., a fork node */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- que_thr_t* thr;
-
- ut_ad(parent && heap);
-
- thr = mem_heap_alloc(heap, sizeof(que_thr_t));
-
- thr->common.type = QUE_NODE_THR;
- thr->common.parent = parent;
-
- thr->magic_n = QUE_THR_MAGIC_N;
-
- thr->graph = parent->graph;
-
- thr->state = QUE_THR_COMMAND_WAIT;
-
- thr->is_active = FALSE;
-
- thr->run_node = NULL;
- thr->resource = 0;
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
-
- UT_LIST_ADD_LAST(thrs, parent->thrs, thr);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Moves a suspended query thread to the QUE_THR_RUNNING state and may release
-a single worker thread to execute it. This function should be used to end
-the wait state of a query thread waiting for a lock or a stored procedure
-completion. */
-UNIV_INTERN
-void
-que_thr_end_wait(
-/*=============*/
- que_thr_t* thr, /*!< in: query thread in the
- QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if NULL is passed
- as the parameter, it is ignored */
-{
- ibool was_active;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(thr);
- ut_ad((thr->state == QUE_THR_LOCK_WAIT)
- || (thr->state == QUE_THR_PROCEDURE_WAIT)
- || (thr->state == QUE_THR_SIG_REPLY_WAIT));
- ut_ad(thr->run_node);
-
- thr->prev_node = thr->run_node;
-
- was_active = thr->is_active;
-
- que_thr_move_to_run_state(thr);
-
- if (was_active) {
-
- return;
- }
-
- if (next_thr && *next_thr == NULL) {
- *next_thr = thr;
- } else {
- ut_a(0);
- srv_que_task_enqueue_low(thr);
- }
-}
-
-/**********************************************************************//**
-Same as que_thr_end_wait, but no parameter next_thr available. */
-UNIV_INTERN
-void
-que_thr_end_wait_no_next_thr(
-/*=========================*/
- que_thr_t* thr) /*!< in: query thread in the QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
-{
- ibool was_active;
-
- ut_a(thr->state == QUE_THR_LOCK_WAIT); /* In MySQL this is the
- only possible state here */
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(thr);
- ut_ad((thr->state == QUE_THR_LOCK_WAIT)
- || (thr->state == QUE_THR_PROCEDURE_WAIT)
- || (thr->state == QUE_THR_SIG_REPLY_WAIT));
-
- was_active = thr->is_active;
-
- que_thr_move_to_run_state(thr);
-
- if (was_active) {
-
- return;
- }
-
- /* In MySQL we let the OS thread (not just the query thread) to wait
- for the lock to be released: */
-
- srv_release_mysql_thread_if_suspended(thr);
-
- /* srv_que_task_enqueue_low(thr); */
-}
-
-/**********************************************************************//**
-Inits a query thread for a command. */
-UNIV_INLINE
-void
-que_thr_init_command(
-/*=================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- que_thr_move_to_run_state(thr);
-}
-
-/**********************************************************************//**
-Starts execution of a command in a query fork. Picks a query thread which
-is not in the QUE_THR_RUNNING state and moves it to that state. If none
-can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned.
-@return a query thread of the graph moved to QUE_THR_RUNNING state, or
-NULL; the query thread should be executed by que_run_threads by the
-caller */
-UNIV_INTERN
-que_thr_t*
-que_fork_start_command(
-/*===================*/
- que_fork_t* fork) /*!< in: a query fork */
-{
- que_thr_t* thr;
- que_thr_t* suspended_thr = NULL;
- que_thr_t* completed_thr = NULL;
-
- fork->state = QUE_FORK_ACTIVE;
-
- fork->last_sel_node = NULL;
-
- suspended_thr = NULL;
- completed_thr = NULL;
-
- /* Choose the query thread to run: usually there is just one thread,
- but in a parallelized select, which necessarily is non-scrollable,
- there may be several to choose from */
-
- /* First we try to find a query thread in the QUE_THR_COMMAND_WAIT
- state. Then we try to find a query thread in the QUE_THR_SUSPENDED
- state, finally we try to find a query thread in the QUE_THR_COMPLETED
- state */
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- /* We make a single pass over the thr list within which we note which
- threads are ready to run. */
- while (thr) {
- switch (thr->state) {
- case QUE_THR_COMMAND_WAIT:
-
- /* We have to send the initial message to query thread
- to start it */
-
- que_thr_init_command(thr);
-
- return(thr);
-
- case QUE_THR_SUSPENDED:
- /* In this case the execution of the thread was
- suspended: no initial message is needed because
- execution can continue from where it was left */
- if (!suspended_thr) {
- suspended_thr = thr;
- }
-
- break;
-
- case QUE_THR_COMPLETED:
- if (!completed_thr) {
- completed_thr = thr;
- }
-
- break;
-
- case QUE_THR_LOCK_WAIT:
- ut_error;
-
- }
-
- thr = UT_LIST_GET_NEXT(thrs, thr);
- }
-
- if (suspended_thr) {
-
- thr = suspended_thr;
- que_thr_move_to_run_state(thr);
-
- } else if (completed_thr) {
-
- thr = completed_thr;
- que_thr_init_command(thr);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-After signal handling is finished, returns control to a query graph error
-handling routine. (Currently, just returns the control to the root of the
-graph so that the graph can communicate an error message to the client.) */
-UNIV_INTERN
-void
-que_fork_error_handle(
-/*==================*/
- trx_t* trx __attribute__((unused)), /*!< in: trx */
- que_t* fork) /*!< in: query graph which was run before signal
- handling started, NULL not allowed */
-{
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->sess->state == SESS_ERROR);
- ut_ad(UT_LIST_GET_LEN(trx->reply_signals) == 0);
- ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- while (thr != NULL) {
- ut_ad(!thr->is_active);
- ut_ad(thr->state != QUE_THR_SIG_REPLY_WAIT);
- ut_ad(thr->state != QUE_THR_LOCK_WAIT);
-
- thr->run_node = thr;
- thr->prev_node = thr->child;
- thr->state = QUE_THR_COMPLETED;
-
- thr = UT_LIST_GET_NEXT(thrs, thr);
- }
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- que_thr_move_to_run_state(thr);
-
- ut_a(0);
- srv_que_task_enqueue_low(thr);
-}
-
-/****************************************************************//**
-Tests if all the query threads in the same fork have a given state.
-@return TRUE if all the query threads in the same fork were in the
-given state */
-UNIV_INLINE
-ibool
-que_fork_all_thrs_in_state(
-/*=======================*/
- que_fork_t* fork, /*!< in: query fork */
- ulint state) /*!< in: state */
-{
- que_thr_t* thr_node;
-
- thr_node = UT_LIST_GET_FIRST(fork->thrs);
-
- while (thr_node != NULL) {
- if (thr_node->state != state) {
-
- return(FALSE);
- }
-
- thr_node = UT_LIST_GET_NEXT(thrs, thr_node);
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Calls que_graph_free_recursive for statements in a statement list. */
-static
-void
-que_graph_free_stat_list(
-/*=====================*/
- que_node_t* node) /*!< in: first query graph node in the list */
-{
- while (node) {
- que_graph_free_recursive(node);
-
- node = que_node_get_next(node);
- }
-}
-
-/**********************************************************************//**
-Frees a query graph, but not the heap where it was created. Does not free
-explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
-void
-que_graph_free_recursive(
-/*=====================*/
- que_node_t* node) /*!< in: query graph node */
-{
- que_fork_t* fork;
- que_thr_t* thr;
- undo_node_t* undo;
- sel_node_t* sel;
- ins_node_t* ins;
- upd_node_t* upd;
- tab_node_t* cre_tab;
- ind_node_t* cre_ind;
- purge_node_t* purge;
-
- if (node == NULL) {
-
- return;
- }
-
- switch (que_node_get_type(node)) {
-
- case QUE_NODE_FORK:
- fork = node;
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- while (thr) {
- que_graph_free_recursive(thr);
-
- thr = UT_LIST_GET_NEXT(thrs, thr);
- }
-
- break;
- case QUE_NODE_THR:
-
- thr = node;
-
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt;"
- " magic n %lu\n",
- (unsigned long) thr->magic_n);
- mem_analyze_corruption(thr);
- ut_error;
- }
-
- thr->magic_n = QUE_THR_MAGIC_FREED;
-
- que_graph_free_recursive(thr->child);
-
- break;
- case QUE_NODE_UNDO:
-
- undo = node;
-
- mem_heap_free(undo->heap);
-
- break;
- case QUE_NODE_SELECT:
-
- sel = node;
-
- sel_node_free_private(sel);
-
- break;
- case QUE_NODE_INSERT:
-
- ins = node;
-
- que_graph_free_recursive(ins->select);
-
- mem_heap_free(ins->entry_sys_heap);
-
- break;
- case QUE_NODE_PURGE:
- purge = node;
-
- mem_heap_free(purge->heap);
-
- break;
-
- case QUE_NODE_UPDATE:
-
- upd = node;
-
- if (upd->in_mysql_interface) {
-
- btr_pcur_free_for_mysql(upd->pcur);
- }
-
- que_graph_free_recursive(upd->cascade_node);
-
- if (upd->cascade_heap) {
- mem_heap_free(upd->cascade_heap);
- }
-
- que_graph_free_recursive(upd->select);
-
- mem_heap_free(upd->heap);
-
- break;
- case QUE_NODE_CREATE_TABLE:
- cre_tab = node;
-
- que_graph_free_recursive(cre_tab->tab_def);
- que_graph_free_recursive(cre_tab->col_def);
- que_graph_free_recursive(cre_tab->commit_node);
-
- mem_heap_free(cre_tab->heap);
-
- break;
- case QUE_NODE_CREATE_INDEX:
- cre_ind = node;
-
- que_graph_free_recursive(cre_ind->ind_def);
- que_graph_free_recursive(cre_ind->field_def);
- que_graph_free_recursive(cre_ind->commit_node);
-
- mem_heap_free(cre_ind->heap);
-
- break;
- case QUE_NODE_PROC:
- que_graph_free_stat_list(((proc_node_t*)node)->stat_list);
-
- break;
- case QUE_NODE_IF:
- que_graph_free_stat_list(((if_node_t*)node)->stat_list);
- que_graph_free_stat_list(((if_node_t*)node)->else_part);
- que_graph_free_stat_list(((if_node_t*)node)->elsif_list);
-
- break;
- case QUE_NODE_ELSIF:
- que_graph_free_stat_list(((elsif_node_t*)node)->stat_list);
-
- break;
- case QUE_NODE_WHILE:
- que_graph_free_stat_list(((while_node_t*)node)->stat_list);
-
- break;
- case QUE_NODE_FOR:
- que_graph_free_stat_list(((for_node_t*)node)->stat_list);
-
- break;
-
- case QUE_NODE_ASSIGNMENT:
- case QUE_NODE_EXIT:
- case QUE_NODE_RETURN:
- case QUE_NODE_COMMIT:
- case QUE_NODE_ROLLBACK:
- case QUE_NODE_LOCK:
- case QUE_NODE_FUNC:
- case QUE_NODE_ORDER:
- case QUE_NODE_ROW_PRINTF:
- case QUE_NODE_OPEN:
- case QUE_NODE_FETCH:
- /* No need to do anything */
-
- break;
- default:
- fprintf(stderr,
- "que_node struct appears corrupt; type %lu\n",
- (unsigned long) que_node_get_type(node));
- mem_analyze_corruption(node);
- ut_error;
- }
-}
-
-/**********************************************************************//**
-Frees a query graph. */
-UNIV_INTERN
-void
-que_graph_free(
-/*===========*/
- que_t* graph) /*!< in: query graph; we assume that the memory
- heap where this graph was created is private
- to this graph: if not, then use
- que_graph_free_recursive and free the heap
- afterwards! */
-{
- ut_ad(graph);
-
- if (graph->sym_tab) {
- /* The following call frees dynamic memory allocated
- for variables etc. during execution. Frees also explicit
- cursor definitions. */
-
- sym_tab_free_private(graph->sym_tab);
- }
-
- if (graph->info && graph->info->graph_owns_us) {
- pars_info_free(graph->info);
- }
-
- que_graph_free_recursive(graph);
-
- mem_heap_free(graph->heap);
-}
-
-/****************************************************************//**
-Performs an execution step on a thr node.
-@return query thread to run next, or NULL if none */
-static
-que_thr_t*
-que_thr_node_step(
-/*==============*/
- que_thr_t* thr) /*!< in: query thread where run_node must
- be the thread node itself */
-{
- ut_ad(thr->run_node == thr);
-
- if (thr->prev_node == thr->common.parent) {
- /* If control to the node came from above, it is just passed
- on */
-
- thr->run_node = thr->child;
-
- return(thr);
- }
-
- mutex_enter(&kernel_mutex);
-
- if (que_thr_peek_stop(thr)) {
-
- mutex_exit(&kernel_mutex);
-
- return(thr);
- }
-
- /* Thread execution completed */
-
- thr->state = QUE_THR_COMPLETED;
-
- mutex_exit(&kernel_mutex);
-
- return(NULL);
-}
-
-/**********************************************************************//**
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction if thr was
-not active.
-***NOTE***: This and ..._mysql are the only functions in which such a
-transition is allowed to happen! */
-static
-void
-que_thr_move_to_run_state(
-/*======================*/
- que_thr_t* thr) /*!< in: an query thread */
-{
- trx_t* trx;
-
- ut_ad(thr->state != QUE_THR_RUNNING);
-
- trx = thr_get_trx(thr);
-
- if (!thr->is_active) {
-
- (thr->graph)->n_active_thrs++;
-
- trx->n_active_thrs++;
-
- thr->is_active = TRUE;
-
- ut_ad((thr->graph)->n_active_thrs == 1);
- ut_ad(trx->n_active_thrs == 1);
- }
-
- thr->state = QUE_THR_RUNNING;
-}
-
-/**********************************************************************//**
-Decrements the query thread reference counts in the query graph and the
-transaction. May start signal handling, e.g., a rollback.
-*** NOTE ***:
-This and que_thr_stop_for_mysql are the only functions where the reference
-count can be decremented and this function may only be called from inside
-que_run_threads or que_thr_check_if_switch! These restrictions exist to make
-the rollback code easier to maintain. */
-static
-void
-que_thr_dec_refer_count(
-/*====================*/
- que_thr_t* thr, /*!< in: query thread */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- que_fork_t* fork;
- trx_t* trx;
- ulint fork_type;
- ibool stopped;
-
- fork = thr->common.parent;
- trx = thr_get_trx(thr);
-
- mutex_enter(&kernel_mutex);
-
- ut_a(thr->is_active);
-
- if (thr->state == QUE_THR_RUNNING) {
-
- stopped = que_thr_stop(thr);
-
- if (!stopped) {
- /* The reason for the thr suspension or wait was
- already canceled before we came here: continue
- running the thread */
-
- /* fputs("!!!!!!!! Wait already ended: continue thr\n",
- stderr); */
-
- if (next_thr && *next_thr == NULL) {
- /* Normally srv_suspend_mysql_thread resets
- the state to DB_SUCCESS before waiting, but
- in this case we have to do it here,
- otherwise nobody does it. */
- trx->error_state = DB_SUCCESS;
-
- *next_thr = thr;
- } else {
- ut_error;
- srv_que_task_enqueue_low(thr);
- }
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
- }
-
- ut_ad(fork->n_active_thrs == 1);
- ut_ad(trx->n_active_thrs == 1);
-
- fork->n_active_thrs--;
- trx->n_active_thrs--;
-
- thr->is_active = FALSE;
-
- if (trx->n_active_thrs > 0) {
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- fork_type = fork->fork_type;
-
- /* Check if all query threads in the same fork are completed */
-
- if (que_fork_all_thrs_in_state(fork, QUE_THR_COMPLETED)) {
-
- switch (fork_type) {
- case QUE_FORK_ROLLBACK:
- /* This is really the undo graph used in rollback,
- no roll_node in this graph */
-
- ut_ad(UT_LIST_GET_LEN(trx->signals) > 0);
- ut_ad(trx->handling_signals == TRUE);
-
- trx_finish_rollback_off_kernel(fork, trx, next_thr);
- break;
-
- case QUE_FORK_PURGE:
- case QUE_FORK_RECOVERY:
- case QUE_FORK_MYSQL_INTERFACE:
-
- /* Do nothing */
- break;
-
- default:
- ut_error; /*!< not used in MySQL */
- }
- }
-
- if (UT_LIST_GET_LEN(trx->signals) > 0 && trx->n_active_thrs == 0) {
-
- /* If the trx is signaled and its query thread count drops to
- zero, then we start processing a signal; from it we may get
- a new query thread to run */
-
- trx_sig_start_handle(trx, next_thr);
- }
-
- if (trx->handling_signals && UT_LIST_GET_LEN(trx->signals) == 0) {
-
- trx_end_signal_handling(trx);
- }
-
- mutex_exit(&kernel_mutex);
-}
-
-/**********************************************************************//**
-Stops a query thread if graph or trx is in a state requiring it. The
-conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved.
-@return TRUE if stopped */
-UNIV_INTERN
-ibool
-que_thr_stop(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- que_t* graph;
- ibool ret = TRUE;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- graph = thr->graph;
- trx = graph->trx;
-
- if (graph->state == QUE_FORK_COMMAND_WAIT) {
- thr->state = QUE_THR_SUSPENDED;
-
- } else if (trx->que_state == TRX_QUE_LOCK_WAIT) {
-
- UT_LIST_ADD_FIRST(trx_thrs, trx->wait_thrs, thr);
- thr->state = QUE_THR_LOCK_WAIT;
-
- } else if (trx->error_state != DB_SUCCESS
- && trx->error_state != DB_LOCK_WAIT) {
-
- /* Error handling built for the MySQL interface */
- thr->state = QUE_THR_COMPLETED;
-
- } else if (UT_LIST_GET_LEN(trx->signals) > 0
- && graph->fork_type != QUE_FORK_ROLLBACK) {
-
- thr->state = QUE_THR_SUSPENDED;
- } else {
- ut_ad(graph->state == QUE_FORK_ACTIVE);
-
- ret = FALSE;
- }
-
- return(ret);
-}
-
-/**********************************************************************//**
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
-query thread is stopped and made inactive, except in the case where
-it was put to the lock wait state in lock0lock.c, but the lock has already
-been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
-void
-que_thr_stop_for_mysql(
-/*===================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- mutex_enter(&kernel_mutex);
-
- if (thr->state == QUE_THR_RUNNING) {
-
- if (trx->error_state != DB_SUCCESS
- && trx->error_state != DB_LOCK_WAIT) {
-
- /* Error handling built for the MySQL interface */
- thr->state = QUE_THR_COMPLETED;
- } else {
- /* It must have been a lock wait but the lock was
- already released, or this transaction was chosen
- as a victim in selective deadlock resolution */
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
- }
-
- ut_ad(thr->is_active == TRUE);
- ut_ad(trx->n_active_thrs == 1);
- ut_ad(thr->graph->n_active_thrs == 1);
-
- thr->is_active = FALSE;
- (thr->graph)->n_active_thrs--;
-
- trx->n_active_thrs--;
-
- mutex_exit(&kernel_mutex);
-}
-
-/**********************************************************************//**
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction if thr was
-not active. */
-UNIV_INTERN
-void
-que_thr_move_to_run_state_for_mysql(
-/*================================*/
- que_thr_t* thr, /*!< in: an query thread */
- trx_t* trx) /*!< in: transaction */
-{
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt; magic n %lu\n",
- (unsigned long) thr->magic_n);
-
- mem_analyze_corruption(thr);
-
- ut_error;
- }
-
- if (!thr->is_active) {
-
- thr->graph->n_active_thrs++;
-
- trx->n_active_thrs++;
-
- thr->is_active = TRUE;
- }
-
- thr->state = QUE_THR_RUNNING;
-}
-
-/**********************************************************************//**
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL
-select, when there is no error or lock wait. */
-UNIV_INTERN
-void
-que_thr_stop_for_mysql_no_error(
-/*============================*/
- que_thr_t* thr, /*!< in: query thread */
- trx_t* trx) /*!< in: transaction */
-{
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_ad(thr->is_active == TRUE);
- ut_ad(trx->n_active_thrs == 1);
- ut_ad(thr->graph->n_active_thrs == 1);
-
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt; magic n %lu\n",
- (unsigned long) thr->magic_n);
-
- mem_analyze_corruption(thr);
-
- ut_error;
- }
-
- thr->state = QUE_THR_COMPLETED;
-
- thr->is_active = FALSE;
- (thr->graph)->n_active_thrs--;
-
- trx->n_active_thrs--;
-}
-
-/****************************************************************//**
-Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop.
-@return containing loop node, or NULL. */
-UNIV_INTERN
-que_node_t*
-que_node_get_containing_loop_node(
-/*==============================*/
- que_node_t* node) /*!< in: node */
-{
- ut_ad(node);
-
- for (;;) {
- ulint type;
-
- node = que_node_get_parent(node);
-
- if (!node) {
- break;
- }
-
- type = que_node_get_type(node);
-
- if ((type == QUE_NODE_FOR) || (type == QUE_NODE_WHILE)) {
- break;
- }
- }
-
- return(node);
-}
-
-/**********************************************************************//**
-Prints info of an SQL query graph node. */
-UNIV_INTERN
-void
-que_node_print_info(
-/*================*/
- que_node_t* node) /*!< in: query graph node */
-{
- ulint type;
- const char* str;
-
- type = que_node_get_type(node);
-
- if (type == QUE_NODE_SELECT) {
- str = "SELECT";
- } else if (type == QUE_NODE_INSERT) {
- str = "INSERT";
- } else if (type == QUE_NODE_UPDATE) {
- str = "UPDATE";
- } else if (type == QUE_NODE_WHILE) {
- str = "WHILE";
- } else if (type == QUE_NODE_ASSIGNMENT) {
- str = "ASSIGNMENT";
- } else if (type == QUE_NODE_IF) {
- str = "IF";
- } else if (type == QUE_NODE_FETCH) {
- str = "FETCH";
- } else if (type == QUE_NODE_OPEN) {
- str = "OPEN";
- } else if (type == QUE_NODE_PROC) {
- str = "STORED PROCEDURE";
- } else if (type == QUE_NODE_FUNC) {
- str = "FUNCTION";
- } else if (type == QUE_NODE_LOCK) {
- str = "LOCK";
- } else if (type == QUE_NODE_THR) {
- str = "QUERY THREAD";
- } else if (type == QUE_NODE_COMMIT) {
- str = "COMMIT";
- } else if (type == QUE_NODE_UNDO) {
- str = "UNDO ROW";
- } else if (type == QUE_NODE_PURGE) {
- str = "PURGE ROW";
- } else if (type == QUE_NODE_ROLLBACK) {
- str = "ROLLBACK";
- } else if (type == QUE_NODE_CREATE_TABLE) {
- str = "CREATE TABLE";
- } else if (type == QUE_NODE_CREATE_INDEX) {
- str = "CREATE INDEX";
- } else if (type == QUE_NODE_FOR) {
- str = "FOR LOOP";
- } else if (type == QUE_NODE_RETURN) {
- str = "RETURN";
- } else if (type == QUE_NODE_EXIT) {
- str = "EXIT";
- } else {
- str = "UNKNOWN NODE TYPE";
- }
-
- fprintf(stderr, "Node type %lu: %s, address %p\n",
- (ulong) type, str, (void*) node);
-}
-
-/**********************************************************************//**
-Performs an execution step on a query thread.
-@return query thread to run next: it may differ from the input
-parameter if, e.g., a subprocedure call is made */
-UNIV_INLINE
-que_thr_t*
-que_thr_step(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- que_node_t* node;
- que_thr_t* old_thr;
- trx_t* trx;
- ulint type;
-
- trx = thr_get_trx(thr);
-
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_a(trx->error_state == DB_SUCCESS);
-
- thr->resource++;
-
- node = thr->run_node;
- type = que_node_get_type(node);
-
- old_thr = thr;
-
-#ifdef UNIV_DEBUG
- if (que_trace_on) {
- fputs("To execute: ", stderr);
- que_node_print_info(node);
- }
-#endif
- if (type & QUE_NODE_CONTROL_STAT) {
- if ((thr->prev_node != que_node_get_parent(node))
- && que_node_get_next(thr->prev_node)) {
-
- /* The control statements, like WHILE, always pass the
- control to the next child statement if there is any
- child left */
-
- thr->run_node = que_node_get_next(thr->prev_node);
-
- } else if (type == QUE_NODE_IF) {
- if_step(thr);
- } else if (type == QUE_NODE_FOR) {
- for_step(thr);
- } else if (type == QUE_NODE_PROC) {
-
- /* We can access trx->undo_no without reserving
- trx->undo_mutex, because there cannot be active query
- threads doing updating or inserting at the moment! */
-
- if (thr->prev_node == que_node_get_parent(node)) {
- trx->last_sql_stat_start.least_undo_no
- = trx->undo_no;
- }
-
- proc_step(thr);
- } else if (type == QUE_NODE_WHILE) {
- while_step(thr);
- } else {
- ut_error;
- }
- } else if (type == QUE_NODE_ASSIGNMENT) {
- assign_step(thr);
- } else if (type == QUE_NODE_SELECT) {
- thr = row_sel_step(thr);
- } else if (type == QUE_NODE_INSERT) {
- thr = row_ins_step(thr);
- } else if (type == QUE_NODE_UPDATE) {
- thr = row_upd_step(thr);
- } else if (type == QUE_NODE_FETCH) {
- thr = fetch_step(thr);
- } else if (type == QUE_NODE_OPEN) {
- thr = open_step(thr);
- } else if (type == QUE_NODE_FUNC) {
- proc_eval_step(thr);
-
- } else if (type == QUE_NODE_LOCK) {
-
- ut_error;
- /*
- thr = que_lock_step(thr);
- */
- } else if (type == QUE_NODE_THR) {
- thr = que_thr_node_step(thr);
- } else if (type == QUE_NODE_COMMIT) {
- thr = trx_commit_step(thr);
- } else if (type == QUE_NODE_UNDO) {
- thr = row_undo_step(thr);
- } else if (type == QUE_NODE_PURGE) {
- thr = row_purge_step(thr);
- } else if (type == QUE_NODE_RETURN) {
- thr = return_step(thr);
- } else if (type == QUE_NODE_EXIT) {
- thr = exit_step(thr);
- } else if (type == QUE_NODE_ROLLBACK) {
- thr = trx_rollback_step(thr);
- } else if (type == QUE_NODE_CREATE_TABLE) {
- thr = dict_create_table_step(thr);
- } else if (type == QUE_NODE_CREATE_INDEX) {
- thr = dict_create_index_step(thr);
- } else if (type == QUE_NODE_ROW_PRINTF) {
- thr = row_printf_step(thr);
- } else {
- ut_error;
- }
-
- if (type == QUE_NODE_EXIT) {
- old_thr->prev_node = que_node_get_containing_loop_node(node);
- } else {
- old_thr->prev_node = node;
- }
-
- if (thr) {
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Run a query thread until it finishes or encounters e.g. a lock wait. */
-static
-void
-que_run_threads_low(
-/*================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- que_thr_t* next_thr;
- ulint cumul_resource;
- ulint loop_count;
-
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
- ut_ad(!mutex_own(&kernel_mutex));
-
- /* cumul_resource counts how much resources the OS thread (NOT the
- query thread) has spent in this function */
-
- loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;
- cumul_resource = 0;
-loop:
- /* Check that there is enough space in the log to accommodate
- possible log entries by this query step; if the operation can touch
- more than about 4 pages, checks must be made also within the query
- step! */
-
- log_free_check();
-
- /* Perform the actual query step: note that the query thread
- may change if, e.g., a subprocedure call is made */
-
- /*-------------------------*/
- next_thr = que_thr_step(thr);
- /*-------------------------*/
-
- ut_a(!next_thr || (thr_get_trx(next_thr)->error_state == DB_SUCCESS));
-
- loop_count++;
-
- if (next_thr != thr) {
- ut_a(next_thr == NULL);
-
- /* This can change next_thr to a non-NULL value if there was
- a lock wait that already completed. */
- que_thr_dec_refer_count(thr, &next_thr);
-
- if (next_thr == NULL) {
-
- return;
- }
-
- loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;
-
- thr = next_thr;
- }
-
- goto loop;
-}
-
-/**********************************************************************//**
-Run a query thread. Handles lock waits. */
-UNIV_INTERN
-void
-que_run_threads(
-/*============*/
- que_thr_t* thr) /*!< in: query thread */
-{
-loop:
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
- que_run_threads_low(thr);
-
- mutex_enter(&kernel_mutex);
-
- switch (thr->state) {
-
- case QUE_THR_RUNNING:
- /* There probably was a lock wait, but it already ended
- before we came here: continue running thr */
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
-
- case QUE_THR_LOCK_WAIT:
- mutex_exit(&kernel_mutex);
-
- /* The ..._mysql_... function works also for InnoDB's
- internal threads. Let us wait that the lock wait ends. */
-
- srv_suspend_mysql_thread(thr);
-
- if (thr_get_trx(thr)->error_state != DB_SUCCESS) {
- /* thr was chosen as a deadlock victim or there was
- a lock wait timeout */
-
- que_thr_dec_refer_count(thr, NULL);
-
- return;
- }
-
- goto loop;
-
- case QUE_THR_COMPLETED:
- case QUE_THR_COMMAND_WAIT:
- /* Do nothing */
- break;
-
- default:
- ut_error;
- }
-
- mutex_exit(&kernel_mutex);
-}
-
-/*********************************************************************//**
-Evaluate the given SQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-que_eval_sql(
-/*=========*/
- pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql, /*!< in: SQL string */
- ibool reserve_dict_mutex,
- /*!< in: if TRUE, acquire/release
- dict_sys->mutex around call to pars_sql. */
- trx_t* trx) /*!< in: trx */
-{
- que_thr_t* thr;
- que_t* graph;
-
- ut_a(trx->error_state == DB_SUCCESS);
-
- if (reserve_dict_mutex) {
- mutex_enter(&dict_sys->mutex);
- }
-
- graph = pars_sql(info, sql);
-
- if (reserve_dict_mutex) {
- mutex_exit(&dict_sys->mutex);
- }
-
- ut_a(graph);
-
- graph->trx = trx;
- trx->graph = NULL;
-
- graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
-
- ut_a(thr = que_fork_start_command(graph));
-
- que_run_threads(thr);
-
- que_graph_free(graph);
-
- return(trx->error_state);
-}
diff --git a/storage/innodb_plugin/read/read0read.c b/storage/innodb_plugin/read/read0read.c
deleted file mode 100644
index 85adae4ddff..00000000000
--- a/storage/innodb_plugin/read/read0read.c
+++ /dev/null
@@ -1,540 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file read/read0read.c
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#include "read0read.h"
-
-#ifdef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#include "srv0srv.h"
-#include "trx0sys.h"
-
-/*
--------------------------------------------------------------------------------
-FACT A: Cursor read view on a secondary index sees only committed versions
--------
-of the records in the secondary index or those versions of rows created
-by transaction which created a cursor before cursor was created even
-if transaction which created the cursor has changed that clustered index page.
-
-PROOF: We must show that read goes always to the clustered index record
-to see that record is visible in the cursor read view. Consider e.g.
-following table and SQL-clauses:
-
-create table t1(a int not null, b int, primary key(a), index(b));
-insert into t1 values (1,1),(2,2);
-commit;
-
-Now consider that we have a cursor for a query
-
-select b from t1 where b >= 1;
-
-This query will use secondary key on the table t1. Now after the first fetch
-on this cursor if we do a update:
-
-update t1 set b = 5 where b = 2;
-
-Now second fetch of the cursor should not see record (2,5) instead it should
-see record (2,2).
-
-We also should show that if we have delete t1 where b = 5; we still
-can see record (2,2).
-
-When we access a secondary key record maximum transaction id is fetched
-from this record and this trx_id is compared to up_limit_id in the view.
-If trx_id in the record is greater or equal than up_limit_id in the view
-cluster record is accessed. Because trx_id of the creating
-transaction is stored when this view was created to the list of
-trx_ids not seen by this read view previous version of the
-record is requested to be built. This is build using clustered record.
-If the secondary key record is delete marked it's corresponding
-clustered record can be already be purged only if records
-trx_id < low_limit_no. Purge can't remove any record deleted by a
-transaction which was active when cursor was created. But, we still
-may have a deleted secondary key record but no clustered record. But,
-this is not a problem because this case is handled in
-row_sel_get_clust_rec() function which is called
-whenever we note that this read view does not see trx_id in the
-record. Thus, we see correct version. Q. E. D.
-
--------------------------------------------------------------------------------
-FACT B: Cursor read view on a clustered index sees only committed versions
--------
-of the records in the clustered index or those versions of rows created
-by transaction which created a cursor before cursor was created even
-if transaction which created the cursor has changed that clustered index page.
-
-PROOF: Consider e.g.following table and SQL-clauses:
-
-create table t1(a int not null, b int, primary key(a));
-insert into t1 values (1),(2);
-commit;
-
-Now consider that we have a cursor for a query
-
-select a from t1 where a >= 1;
-
-This query will use clustered key on the table t1. Now after the first fetch
-on this cursor if we do a update:
-
-update t1 set a = 5 where a = 2;
-
-Now second fetch of the cursor should not see record (5) instead it should
-see record (2).
-
-We also should show that if we have execute delete t1 where a = 5; after
-the cursor is opened we still can see record (2).
-
-When accessing clustered record we always check if this read view sees
-trx_id stored to clustered record. By default we don't see any changes
-if record trx_id >= low_limit_id i.e. change was made transaction
-which started after transaction which created the cursor. If row
-was changed by the future transaction a previous version of the
-clustered record is created. Thus we see only committed version in
-this case. We see all changes made by committed transactions i.e.
-record trx_id < up_limit_id. In this case we don't need to do anything,
-we already see correct version of the record. We don't see any changes
-made by active transaction except creating transaction. We have stored
-trx_id of creating transaction to list of trx_ids when this view was
-created. Thus we can easily see if this record was changed by the
-creating transaction. Because we already have clustered record we can
-access roll_ptr. Using this roll_ptr we can fetch undo record.
-We can now check that undo_no of the undo record is less than undo_no of the
-trancaction which created a view when cursor was created. We see this
-clustered record only in case when record undo_no is less than undo_no
-in the view. If this is not true we build based on undo_rec previous
-version of the record. This record is found because purge can't remove
-records accessed by active transaction. Thus we see correct version. Q. E. D.
--------------------------------------------------------------------------------
-FACT C: Purge does not remove any delete marked row that is visible
--------
-to cursor view.
-
-TODO: proof this
-
-*/
-
-/*********************************************************************//**
-Creates a read view object.
-@return own: read view struct */
-UNIV_INLINE
-read_view_t*
-read_view_create_low(
-/*=================*/
- ulint n, /*!< in: number of cells in the trx_ids array */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
-{
- read_view_t* view;
-
- view = mem_heap_alloc(heap, sizeof(read_view_t));
-
- view->n_trx_ids = n;
- view->trx_ids = mem_heap_alloc(heap, n * sizeof *view->trx_ids);
-
- return(view);
-}
-
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, with the exception that also
-the creating trx of the oldest view is set as not visible in the 'copied'
-view. Opens a new view if no views currently exist. The view must be closed
-with ..._close. This is used in purge.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_oldest_copy_or_open_new(
-/*==============================*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or ut_dulint_zero
- used in purge */
- mem_heap_t* heap) /*!< in: memory heap from which
- allocated */
-{
- read_view_t* old_view;
- read_view_t* view_copy;
- ibool needs_insert = TRUE;
- ulint insert_done = 0;
- ulint n;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- old_view = UT_LIST_GET_LAST(trx_sys->view_list);
-
- if (old_view == NULL) {
-
- return(read_view_open_now(cr_trx_id, heap));
- }
-
- n = old_view->n_trx_ids;
-
- if (!ut_dulint_is_zero(old_view->creator_trx_id)) {
- n++;
- } else {
- needs_insert = FALSE;
- }
-
- view_copy = read_view_create_low(n, heap);
-
- /* Insert the id of the creator in the right place of the descending
- array of ids, if needs_insert is TRUE: */
-
- i = 0;
- while (i < n) {
- if (needs_insert
- && (i >= old_view->n_trx_ids
- || ut_dulint_cmp(old_view->creator_trx_id,
- read_view_get_nth_trx_id(old_view, i))
- > 0)) {
-
- read_view_set_nth_trx_id(view_copy, i,
- old_view->creator_trx_id);
- needs_insert = FALSE;
- insert_done = 1;
- } else {
- read_view_set_nth_trx_id(view_copy, i,
- read_view_get_nth_trx_id(
- old_view,
- i - insert_done));
- }
-
- i++;
- }
-
- view_copy->creator_trx_id = cr_trx_id;
-
- view_copy->low_limit_no = old_view->low_limit_no;
- view_copy->low_limit_id = old_view->low_limit_id;
-
-
- if (n > 0) {
- /* The last active transaction has the smallest id: */
- view_copy->up_limit_id = read_view_get_nth_trx_id(
- view_copy, n - 1);
- } else {
- view_copy->up_limit_id = old_view->up_limit_id;
- }
-
- UT_LIST_ADD_LAST(view_list, trx_sys->view_list, view_copy);
-
- return(view_copy);
-}
-
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or ut_dulint_zero
- used in purge */
- mem_heap_t* heap) /*!< in: memory heap from which
- allocated */
-{
- read_view_t* view;
- trx_t* trx;
- ulint n;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- view = read_view_create_low(UT_LIST_GET_LEN(trx_sys->trx_list), heap);
-
- view->creator_trx_id = cr_trx_id;
- view->type = VIEW_NORMAL;
- view->undo_no = ut_dulint_zero;
-
- /* No future transactions should be visible in the view */
-
- view->low_limit_no = trx_sys->max_trx_id;
- view->low_limit_id = view->low_limit_no;
-
- n = 0;
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- /* No active transaction should be visible, except cr_trx */
-
- while (trx) {
- if (ut_dulint_cmp(trx->id, cr_trx_id) != 0
- && (trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED)) {
-
- read_view_set_nth_trx_id(view, n, trx->id);
-
- n++;
-
- /* NOTE that a transaction whose trx number is <
- trx_sys->max_trx_id can still be active, if it is
- in the middle of its commit! Note that when a
- transaction starts, we initialize trx->no to
- ut_dulint_max. */
-
- if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
-
- view->low_limit_no = trx->no;
- }
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- view->n_trx_ids = n;
-
- if (n > 0) {
- /* The last active transaction has the smallest id: */
- view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
- } else {
- view->up_limit_id = view->low_limit_id;
- }
-
-
- UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
-
- return(view);
-}
-
-/*********************************************************************//**
-Closes a read view. */
-UNIV_INTERN
-void
-read_view_close(
-/*============*/
- read_view_t* view) /*!< in: read view */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-}
-
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
-void
-read_view_close_for_mysql(
-/*======================*/
- trx_t* trx) /*!< in: trx which has a read view */
-{
- ut_a(trx->global_read_view);
-
- mutex_enter(&kernel_mutex);
-
- read_view_close(trx->global_read_view);
-
- mem_heap_empty(trx->global_read_view_heap);
-
- trx->read_view = NULL;
- trx->global_read_view = NULL;
-
- mutex_exit(&kernel_mutex);
-}
-
-/*********************************************************************//**
-Prints a read view to stderr. */
-UNIV_INTERN
-void
-read_view_print(
-/*============*/
- const read_view_t* view) /*!< in: read view */
-{
- ulint n_ids;
- ulint i;
-
- if (view->type == VIEW_HIGH_GRANULARITY) {
- fprintf(stderr,
- "High-granularity read view undo_n:o %lu %lu\n",
- (ulong) ut_dulint_get_high(view->undo_no),
- (ulong) ut_dulint_get_low(view->undo_no));
- } else {
- fprintf(stderr, "Normal read view\n");
- }
-
- fprintf(stderr, "Read view low limit trx n:o %lu %lu\n",
- (ulong) ut_dulint_get_high(view->low_limit_no),
- (ulong) ut_dulint_get_low(view->low_limit_no));
-
- fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(view->up_limit_id));
-
- fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(view->low_limit_id));
-
- fprintf(stderr, "Read view individually stored trx ids:\n");
-
- n_ids = view->n_trx_ids;
-
- for (i = 0; i < n_ids; i++) {
- fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(
- read_view_get_nth_trx_id(view, i)));
- }
-}
-
-/*********************************************************************//**
-Create a high-granularity consistent cursor view for mysql to be used
-in cursors. In this consistent read view modifications done by the
-creating transaction after the cursor is created or future transactions
-are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
- trx_t* cr_trx) /*!< in: trx where cursor view is created */
-{
- cursor_view_t* curview;
- read_view_t* view;
- mem_heap_t* heap;
- trx_t* trx;
- ulint n;
-
- ut_a(cr_trx);
-
- /* Use larger heap than in trx_create when creating a read_view
- because cursors are quite long. */
-
- heap = mem_heap_create(512);
-
- curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(cursor_view_t));
- curview->heap = heap;
-
- /* Drop cursor tables from consideration when evaluating the need of
- auto-commit */
- curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
- cr_trx->n_mysql_tables_in_use = 0;
-
- mutex_enter(&kernel_mutex);
-
- curview->read_view = read_view_create_low(
- UT_LIST_GET_LEN(trx_sys->trx_list), curview->heap);
-
- view = curview->read_view;
- view->creator_trx_id = cr_trx->id;
- view->type = VIEW_HIGH_GRANULARITY;
- view->undo_no = cr_trx->undo_no;
-
- /* No future transactions should be visible in the view */
-
- view->low_limit_no = trx_sys->max_trx_id;
- view->low_limit_id = view->low_limit_no;
-
- n = 0;
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- /* No active transaction should be visible */
-
- while (trx) {
-
- if (trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED) {
-
- read_view_set_nth_trx_id(view, n, trx->id);
-
- n++;
-
- /* NOTE that a transaction whose trx number is <
- trx_sys->max_trx_id can still be active, if it is
- in the middle of its commit! Note that when a
- transaction starts, we initialize trx->no to
- ut_dulint_max. */
-
- if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
-
- view->low_limit_no = trx->no;
- }
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- view->n_trx_ids = n;
-
- if (n > 0) {
- /* The last active transaction has the smallest id: */
- view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
- } else {
- view->up_limit_id = view->low_limit_id;
- }
-
- UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
-
- mutex_exit(&kernel_mutex);
-
- return(curview);
-}
-
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
- trx_t* trx, /*!< in: trx */
- cursor_view_t* curview)/*!< in: cursor view to be closed */
-{
- ut_a(curview);
- ut_a(curview->read_view);
- ut_a(curview->heap);
-
- /* Add cursor's tables to the global count of active tables that
- belong to this transaction */
- trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
-
- mutex_enter(&kernel_mutex);
-
- read_view_close(curview->read_view);
- trx->read_view = trx->global_read_view;
-
- mutex_exit(&kernel_mutex);
-
- mem_heap_free(curview->heap);
-}
-
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
- trx_t* trx, /*!< in: transaction where cursor is set */
- cursor_view_t* curview)/*!< in: consistent cursor view to be set */
-{
- ut_a(trx);
-
- mutex_enter(&kernel_mutex);
-
- if (UNIV_LIKELY(curview != NULL)) {
- trx->read_view = curview->read_view;
- } else {
- trx->read_view = trx->global_read_view;
- }
-
- mutex_exit(&kernel_mutex);
-}
diff --git a/storage/innodb_plugin/rem/rem0cmp.c b/storage/innodb_plugin/rem/rem0cmp.c
deleted file mode 100644
index e6dab0bc66b..00000000000
--- a/storage/innodb_plugin/rem/rem0cmp.c
+++ /dev/null
@@ -1,1194 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file rem/rem0cmp.c
-Comparison services for records
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-#include "rem0cmp.h"
-
-#ifdef UNIV_NONINL
-#include "rem0cmp.ic"
-#endif
-
-#include "srv0srv.h"
-
-/* ALPHABETICAL ORDER
- ==================
-
-The records are put into alphabetical order in the following
-way: let F be the first field where two records disagree.
-If there is a character in some position n where the
-records disagree, the order is determined by comparison of
-the characters at position n, possibly after
-collating transformation. If there is no such character,
-but the corresponding fields have different lengths, then
-if the data type of the fields is paddable,
-shorter field is padded with a padding character. If the
-data type is not paddable, longer field is considered greater.
-Finally, the SQL null is bigger than any other value.
-
-At the present, the comparison functions return 0 in the case,
-where two records disagree only in the way that one
-has more fields than the other. */
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields);/*!< in/out: number of already
- completely matched fields; when function
- returns, contains the value for current
- comparison */
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the one in ha_innobase.cc!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-extern
-int
-innobase_mysql_cmp(
-/*===============*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length); /*!< in: data field length,
- not UNIV_SQL_NULL */
-/*********************************************************************//**
-Transforms the character code so that it is ordered appropriately for the
-language. This is only used for the latin1 char set. MySQL does the
-comparisons for other char sets.
-@return collation order position */
-UNIV_INLINE
-ulint
-cmp_collate(
-/*========*/
- ulint code) /*!< in: code of a character stored in database record */
-{
- return((ulint) srv_latin1_ordering[code]);
-}
-
-/*************************************************************//**
-Returns TRUE if two columns are equal for comparison purposes.
-@return TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
-ibool
-cmp_cols_are_equal(
-/*===============*/
- const dict_col_t* col1, /*!< in: column 1 */
- const dict_col_t* col2, /*!< in: column 2 */
- ibool check_charsets)
- /*!< in: whether to check charsets */
-{
- if (dtype_is_non_binary_string_type(col1->mtype, col1->prtype)
- && dtype_is_non_binary_string_type(col2->mtype, col2->prtype)) {
-
- /* Both are non-binary string types: they can be compared if
- and only if the charset-collation is the same */
-
- if (check_charsets) {
- return(dtype_get_charset_coll(col1->prtype)
- == dtype_get_charset_coll(col2->prtype));
- } else {
- return(TRUE);
- }
- }
-
- if (dtype_is_binary_string_type(col1->mtype, col1->prtype)
- && dtype_is_binary_string_type(col2->mtype, col2->prtype)) {
-
- /* Both are binary string types: they can be compared */
-
- return(TRUE);
- }
-
- if (col1->mtype != col2->mtype) {
-
- return(FALSE);
- }
-
- if (col1->mtype == DATA_INT
- && (col1->prtype & DATA_UNSIGNED)
- != (col2->prtype & DATA_UNSIGNED)) {
-
- /* The storage format of an unsigned integer is different
- from a signed integer: in a signed integer we OR
- 0x8000... to the value of positive integers. */
-
- return(FALSE);
- }
-
- return(col1->mtype != DATA_INT || col1->len == col2->len);
-}
-
-/*************************************************************//**
-Innobase uses this function to compare two data fields for which the data type
-is such that we must compare whole fields or call MySQL to do the comparison
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-static
-int
-cmp_whole_field(
-/*============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const byte* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
-{
- float f_1;
- float f_2;
- double d_1;
- double d_2;
- int swap_flag = 1;
-
- switch (mtype) {
-
- case DATA_DECIMAL:
- /* Remove preceding spaces */
- for (; a_length && *a == ' '; a++, a_length--);
- for (; b_length && *b == ' '; b++, b_length--);
-
- if (*a == '-') {
- if (*b != '-') {
- return(-1);
- }
-
- a++; b++;
- a_length--;
- b_length--;
-
- swap_flag = -1;
-
- } else if (*b == '-') {
-
- return(1);
- }
-
- while (a_length > 0 && (*a == '+' || *a == '0')) {
- a++; a_length--;
- }
-
- while (b_length > 0 && (*b == '+' || *b == '0')) {
- b++; b_length--;
- }
-
- if (a_length != b_length) {
- if (a_length < b_length) {
- return(-swap_flag);
- }
-
- return(swap_flag);
- }
-
- while (a_length > 0 && *a == *b) {
-
- a++; b++; a_length--;
- }
-
- if (a_length == 0) {
-
- return(0);
- }
-
- if (*a > *b) {
- return(swap_flag);
- }
-
- return(-swap_flag);
- case DATA_DOUBLE:
- d_1 = mach_double_read(a);
- d_2 = mach_double_read(b);
-
- if (d_1 > d_2) {
- return(1);
- } else if (d_2 > d_1) {
- return(-1);
- }
-
- return(0);
-
- case DATA_FLOAT:
- f_1 = mach_float_read(a);
- f_2 = mach_float_read(b);
-
- if (f_1 > f_2) {
- return(1);
- } else if (f_2 > f_1) {
- return(-1);
- }
-
- return(0);
- case DATA_BLOB:
- if (prtype & DATA_BINARY_TYPE) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: comparing a binary BLOB"
- " with a character set sensitive\n"
- "InnoDB: comparison!\n");
- }
- /* fall through */
- case DATA_VARMYSQL:
- case DATA_MYSQL:
- return(innobase_mysql_cmp(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint)dtype_get_charset_coll(prtype),
- a, a_length, b, b_length));
- default:
- fprintf(stderr,
- "InnoDB: unknown type number %lu\n",
- (ulong) mtype);
- ut_error;
- }
-
- return(0);
-}
-
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /*!< in: data field length or UNIV_SQL_NULL */
-{
- ulint data1_byte;
- ulint data2_byte;
- ulint cur_bytes;
-
- if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) {
-
- if (len1 == len2) {
-
- return(0);
- }
-
- if (len1 == UNIV_SQL_NULL) {
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
-
- return(-1);
- }
-
- return(1);
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- return(cmp_whole_field(mtype, prtype,
- data1, (unsigned) len1,
- data2, (unsigned) len2));
- }
-
- /* Compare then the fields */
-
- cur_bytes = 0;
-
- for (;;) {
- if (len1 <= cur_bytes) {
- if (len2 <= cur_bytes) {
-
- return(0);
- }
-
- data1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (data1_byte == ULINT_UNDEFINED) {
-
- return(-1);
- }
- } else {
- data1_byte = *data1;
- }
-
- if (len2 <= cur_bytes) {
- data2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (data2_byte == ULINT_UNDEFINED) {
-
- return(1);
- }
- } else {
- data2_byte = *data2;
- }
-
- if (data1_byte == data2_byte) {
- /* If the bytes are equal, they will remain such even
- after the collation transformation below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE))) {
-
- data1_byte = cmp_collate(data1_byte);
- data2_byte = cmp_collate(data2_byte);
- }
-
- if (data1_byte > data2_byte) {
-
- return(1);
- } else if (data1_byte < data2_byte) {
-
- return(-1);
- }
-next_byte:
- /* Next byte */
- cur_bytes++;
- data1++;
- data2++;
- }
-
- return(0); /* Not reached */
-}
-
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
-int
-cmp_dtuple_rec_with_match(
-/*======================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns,
- contains the value for current comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns, contains the
- value for current comparison */
-{
- const dfield_t* dtuple_field; /* current field in logical record */
- ulint dtuple_f_len; /* the length of the current field
- in the logical record */
- const byte* dtuple_b_ptr; /* pointer to the current byte in
- logical field data */
- ulint dtuple_byte; /* value of current byte to be compared
- in dtuple*/
- ulint rec_f_len; /* length of current field in rec */
- const byte* rec_b_ptr; /* pointer to the current byte in
- rec field */
- ulint rec_byte; /* value of current byte to be
- compared in rec */
- ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched bytes
- in current field */
- int ret = 3333; /* return value */
-
- ut_ad(dtuple && rec && matched_fields && matched_bytes);
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- cur_field = *matched_fields;
- cur_bytes = *matched_bytes;
-
- ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple));
- ut_ad(cur_field <= rec_offs_n_fields(offsets));
-
- if (cur_bytes == 0 && cur_field == 0) {
- ulint rec_info = rec_get_info_bits(rec,
- rec_offs_comp(offsets));
- ulint tup_info = dtuple_get_info_bits(dtuple);
-
- if (UNIV_UNLIKELY(rec_info & REC_INFO_MIN_REC_FLAG)) {
- ret = !(tup_info & REC_INFO_MIN_REC_FLAG);
- goto order_resolved;
- } else if (UNIV_UNLIKELY(tup_info & REC_INFO_MIN_REC_FLAG)) {
- ret = -1;
- goto order_resolved;
- }
- }
-
- /* Match fields in a loop; stop if we run out of fields in dtuple
- or find an externally stored field */
-
- while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
-
- ulint mtype;
- ulint prtype;
-
- dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
- {
- const dtype_t* type
- = dfield_get_type(dtuple_field);
-
- mtype = type->mtype;
- prtype = type->prtype;
- }
-
- dtuple_f_len = dfield_get_len(dtuple_field);
-
- rec_b_ptr = rec_get_nth_field(rec, offsets,
- cur_field, &rec_f_len);
-
- /* If we have matched yet 0 bytes, it may be that one or
- both the fields are SQL null, or the record or dtuple may be
- the predefined minimum record, or the field is externally
- stored */
-
- if (UNIV_LIKELY(cur_bytes == 0)) {
- if (rec_offs_nth_extern(offsets, cur_field)) {
- /* We do not compare to an externally
- stored field */
-
- ret = 0;
-
- goto order_resolved;
- }
-
- if (dtuple_f_len == UNIV_SQL_NULL) {
- if (rec_f_len == UNIV_SQL_NULL) {
-
- goto next_field;
- }
-
- ret = -1;
- goto order_resolved;
- } else if (rec_f_len == UNIV_SQL_NULL) {
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
-
- ret = 1;
- goto order_resolved;
- }
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- ret = cmp_whole_field(mtype, prtype,
- dfield_get_data(dtuple_field),
- (unsigned) dtuple_f_len,
- rec_b_ptr, (unsigned) rec_f_len);
-
- if (ret != 0) {
- cur_bytes = 0;
-
- goto order_resolved;
- } else {
- goto next_field;
- }
- }
-
- /* Set the pointers at the current byte */
-
- rec_b_ptr = rec_b_ptr + cur_bytes;
- dtuple_b_ptr = (byte*)dfield_get_data(dtuple_field)
- + cur_bytes;
- /* Compare then the fields */
-
- for (;;) {
- if (UNIV_UNLIKELY(rec_f_len <= cur_bytes)) {
- if (dtuple_f_len <= cur_bytes) {
-
- goto next_field;
- }
-
- rec_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec_byte == ULINT_UNDEFINED) {
- ret = 1;
-
- goto order_resolved;
- }
- } else {
- rec_byte = *rec_b_ptr;
- }
-
- if (UNIV_UNLIKELY(dtuple_f_len <= cur_bytes)) {
- dtuple_byte = dtype_get_pad_char(mtype,
- prtype);
-
- if (dtuple_byte == ULINT_UNDEFINED) {
- ret = -1;
-
- goto order_resolved;
- }
- } else {
- dtuple_byte = *dtuple_b_ptr;
- }
-
- if (dtuple_byte == rec_byte) {
- /* If the bytes are equal, they will
- remain such even after the collation
- transformation below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec_byte = cmp_collate(rec_byte);
- dtuple_byte = cmp_collate(dtuple_byte);
- }
-
- ret = (int) (dtuple_byte - rec_byte);
- if (UNIV_LIKELY(ret)) {
- if (ret < 0) {
- ret = -1;
- goto order_resolved;
- } else {
- ret = 1;
- goto order_resolved;
- }
- }
-next_byte:
- /* Next byte */
- cur_bytes++;
- rec_b_ptr++;
- dtuple_b_ptr++;
- }
-
-next_field:
- cur_field++;
- cur_bytes = 0;
- }
-
- ut_ad(cur_bytes == 0);
-
- ret = 0; /* If we ran out of fields, dtuple was equal to rec
- up to the common fields */
-order_resolved:
- ut_ad((ret >= - 1) && (ret <= 1));
- ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets,
- matched_fields));
- ut_ad(*matched_fields == cur_field); /* In the debug version, the
- above cmp_debug_... sets
- *matched_fields to a value */
- *matched_fields = cur_field;
- *matched_bytes = cur_bytes;
-
- return(ret);
-}
-
-/**************************************************************//**
-Compares a data tuple to a physical record.
-@see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
-int
-cmp_dtuple_rec(
-/*===========*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- &matched_fields, &matched_bytes));
-}
-
-/**************************************************************//**
-Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record.
-@return TRUE if prefix */
-UNIV_INTERN
-ibool
-cmp_dtuple_is_prefix_of_rec(
-/*========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- n_fields = dtuple_get_n_fields(dtuple);
-
- if (n_fields > rec_offs_n_fields(offsets)) {
-
- return(FALSE);
- }
-
- cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- &matched_fields, &matched_bytes);
- if (matched_fields == n_fields) {
-
- return(TRUE);
- }
-
- if (matched_fields == n_fields - 1
- && matched_bytes == dfield_get_len(
- dtuple_get_nth_field(dtuple, n_fields - 1))) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
-none of which are stored externally.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
-UNIV_INTERN
-int
-cmp_rec_rec_simple(
-/*===============*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
- const dict_index_t* index) /*!< in: data dictionary index */
-{
- ulint rec1_f_len; /*!< length of current field in rec1 */
- const byte* rec1_b_ptr; /*!< pointer to the current byte
- in rec1 field */
- ulint rec1_byte; /*!< value of current byte to be
- compared in rec1 */
- ulint rec2_f_len; /*!< length of current field in rec2 */
- const byte* rec2_b_ptr; /*!< pointer to the current byte
- in rec2 field */
- ulint rec2_byte; /*!< value of current byte to be
- compared in rec2 */
- ulint cur_field; /*!< current field number */
- ulint n_uniq;
-
- n_uniq = dict_index_get_n_unique(index);
- ut_ad(rec_offs_n_fields(offsets1) >= n_uniq);
- ut_ad(rec_offs_n_fields(offsets2) >= n_uniq);
-
- ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
-
- for (cur_field = 0; cur_field < n_uniq; cur_field++) {
-
- ulint cur_bytes;
- ulint mtype;
- ulint prtype;
-
- {
- const dict_col_t* col
- = dict_index_get_nth_col(index, cur_field);
-
- mtype = col->mtype;
- prtype = col->prtype;
- }
-
- ut_ad(!rec_offs_nth_extern(offsets1, cur_field));
- ut_ad(!rec_offs_nth_extern(offsets2, cur_field));
-
- rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
- cur_field, &rec1_f_len);
- rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
- cur_field, &rec2_f_len);
-
- if (rec1_f_len == UNIV_SQL_NULL
- || rec2_f_len == UNIV_SQL_NULL) {
-
- if (rec1_f_len == rec2_f_len) {
-
- goto next_field;
-
- } else if (rec2_f_len == UNIV_SQL_NULL) {
-
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
-
- return(1);
- } else {
- return(-1);
- }
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
- int ret = cmp_whole_field(mtype, prtype,
- rec1_b_ptr,
- (unsigned) rec1_f_len,
- rec2_b_ptr,
- (unsigned) rec2_f_len);
- if (ret) {
- return(ret);
- }
-
- goto next_field;
- }
-
- /* Compare the fields */
- for (cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
- if (rec2_f_len <= cur_bytes) {
-
- if (rec1_f_len <= cur_bytes) {
-
- goto next_field;
- }
-
- rec2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec2_byte == ULINT_UNDEFINED) {
- return(1);
- }
- } else {
- rec2_byte = *rec2_b_ptr;
- }
-
- if (rec1_f_len <= cur_bytes) {
- rec1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec1_byte == ULINT_UNDEFINED) {
- return(-1);
- }
- } else {
- rec1_byte = *rec1_b_ptr;
- }
-
- if (rec1_byte == rec2_byte) {
- /* If the bytes are equal, they will remain
- such even after the collation transformation
- below */
-
- continue;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec1_byte = cmp_collate(rec1_byte);
- rec2_byte = cmp_collate(rec2_byte);
- }
-
- if (rec1_byte < rec2_byte) {
- return(-1);
- } else if (rec1_byte > rec2_byte) {
- return(1);
- }
- }
-next_field:
- continue;
- }
-
- /* If we ran out of fields, rec1 was equal to rec2. */
- return(0);
-}
-
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
-int
-cmp_rec_rec_with_match(
-/*===================*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /*!< in: data dictionary index */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when the function returns,
- contains the value the for current
- comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when the function returns, contains
- the value for the current comparison */
-{
- ulint rec1_n_fields; /* the number of fields in rec */
- ulint rec1_f_len; /* length of current field in rec */
- const byte* rec1_b_ptr; /* pointer to the current byte
- in rec field */
- ulint rec1_byte; /* value of current byte to be
- compared in rec */
- ulint rec2_n_fields; /* the number of fields in rec */
- ulint rec2_f_len; /* length of current field in rec */
- const byte* rec2_b_ptr; /* pointer to the current byte
- in rec field */
- ulint rec2_byte; /* value of current byte to be
- compared in rec */
- ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched
- bytes in current field */
- int ret = 0; /* return value */
- ulint comp;
-
- ut_ad(rec1 && rec2 && index);
- ut_ad(rec_offs_validate(rec1, index, offsets1));
- ut_ad(rec_offs_validate(rec2, index, offsets2));
- ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
-
- comp = rec_offs_comp(offsets1);
- rec1_n_fields = rec_offs_n_fields(offsets1);
- rec2_n_fields = rec_offs_n_fields(offsets2);
-
- cur_field = *matched_fields;
- cur_bytes = *matched_bytes;
-
- /* Match fields in a loop */
-
- while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) {
-
- ulint mtype;
- ulint prtype;
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- /* This is for the insert buffer B-tree. */
- mtype = DATA_BINARY;
- prtype = 0;
- } else {
- const dict_col_t* col
- = dict_index_get_nth_col(index, cur_field);
-
- mtype = col->mtype;
- prtype = col->prtype;
- }
-
- rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
- cur_field, &rec1_f_len);
- rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
- cur_field, &rec2_f_len);
-
- if (cur_bytes == 0) {
- if (cur_field == 0) {
- /* Test if rec is the predefined minimum
- record */
- if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
- & REC_INFO_MIN_REC_FLAG)) {
-
- if (!(rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG)) {
- ret = -1;
- }
-
- goto order_resolved;
-
- } else if (UNIV_UNLIKELY
- (rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG)) {
-
- ret = 1;
-
- goto order_resolved;
- }
- }
-
- if (rec_offs_nth_extern(offsets1, cur_field)
- || rec_offs_nth_extern(offsets2, cur_field)) {
- /* We do not compare to an externally
- stored field */
-
- goto order_resolved;
- }
-
- if (rec1_f_len == UNIV_SQL_NULL
- || rec2_f_len == UNIV_SQL_NULL) {
-
- if (rec1_f_len == rec2_f_len) {
-
- goto next_field;
-
- } else if (rec2_f_len == UNIV_SQL_NULL) {
-
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
-
- ret = 1;
- } else {
- ret = -1;
- }
-
- goto order_resolved;
- }
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- ret = cmp_whole_field(mtype, prtype,
- rec1_b_ptr,
- (unsigned) rec1_f_len,
- rec2_b_ptr,
- (unsigned) rec2_f_len);
- if (ret != 0) {
- cur_bytes = 0;
-
- goto order_resolved;
- } else {
- goto next_field;
- }
- }
-
- /* Set the pointers at the current byte */
- rec1_b_ptr = rec1_b_ptr + cur_bytes;
- rec2_b_ptr = rec2_b_ptr + cur_bytes;
-
- /* Compare then the fields */
- for (;;) {
- if (rec2_f_len <= cur_bytes) {
-
- if (rec1_f_len <= cur_bytes) {
-
- goto next_field;
- }
-
- rec2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec2_byte == ULINT_UNDEFINED) {
- ret = 1;
-
- goto order_resolved;
- }
- } else {
- rec2_byte = *rec2_b_ptr;
- }
-
- if (rec1_f_len <= cur_bytes) {
- rec1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec1_byte == ULINT_UNDEFINED) {
- ret = -1;
-
- goto order_resolved;
- }
- } else {
- rec1_byte = *rec1_b_ptr;
- }
-
- if (rec1_byte == rec2_byte) {
- /* If the bytes are equal, they will remain
- such even after the collation transformation
- below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec1_byte = cmp_collate(rec1_byte);
- rec2_byte = cmp_collate(rec2_byte);
- }
-
- if (rec1_byte < rec2_byte) {
- ret = -1;
- goto order_resolved;
- } else if (rec1_byte > rec2_byte) {
- ret = 1;
- goto order_resolved;
- }
-next_byte:
- /* Next byte */
-
- cur_bytes++;
- rec1_b_ptr++;
- rec2_b_ptr++;
- }
-
-next_field:
- cur_field++;
- cur_bytes = 0;
- }
-
- ut_ad(cur_bytes == 0);
-
- /* If we ran out of fields, rec1 was equal to rec2 up
- to the common fields */
- ut_ad(ret == 0);
-order_resolved:
-
- ut_ad((ret >= - 1) && (ret <= 1));
-
- *matched_fields = cur_field;
- *matched_bytes = cur_bytes;
-
- return(ret);
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has. If encounters an
-externally stored field, returns 0.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields) /*!< in/out: number of already
- completely matched fields; when function
- returns, contains the value for current
- comparison */
-{
- const dfield_t* dtuple_field; /* current field in logical record */
- ulint dtuple_f_len; /* the length of the current field
- in the logical record */
- const byte* dtuple_f_data; /* pointer to the current logical
- field data */
- ulint rec_f_len; /* length of current field in rec */
- const byte* rec_f_data; /* pointer to the current rec field */
- int ret = 3333; /* return value */
- ulint cur_field; /* current field number */
-
- ut_ad(dtuple && rec && matched_fields);
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- ut_ad(*matched_fields <= dtuple_get_n_fields_cmp(dtuple));
- ut_ad(*matched_fields <= rec_offs_n_fields(offsets));
-
- cur_field = *matched_fields;
-
- if (cur_field == 0) {
- if (UNIV_UNLIKELY
- (rec_get_info_bits(rec, rec_offs_comp(offsets))
- & REC_INFO_MIN_REC_FLAG)) {
-
- ret = !(dtuple_get_info_bits(dtuple)
- & REC_INFO_MIN_REC_FLAG);
-
- goto order_resolved;
- }
-
- if (UNIV_UNLIKELY
- (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG)) {
- ret = -1;
-
- goto order_resolved;
- }
- }
-
- /* Match fields in a loop; stop if we run out of fields in dtuple */
-
- while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
-
- ulint mtype;
- ulint prtype;
-
- dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
- {
- const dtype_t* type
- = dfield_get_type(dtuple_field);
-
- mtype = type->mtype;
- prtype = type->prtype;
- }
-
- dtuple_f_data = dfield_get_data(dtuple_field);
- dtuple_f_len = dfield_get_len(dtuple_field);
-
- rec_f_data = rec_get_nth_field(rec, offsets,
- cur_field, &rec_f_len);
-
- if (rec_offs_nth_extern(offsets, cur_field)) {
- /* We do not compare to an externally stored field */
-
- ret = 0;
-
- goto order_resolved;
- }
-
- ret = cmp_data_data(mtype, prtype, dtuple_f_data, dtuple_f_len,
- rec_f_data, rec_f_len);
- if (ret != 0) {
- goto order_resolved;
- }
-
- cur_field++;
- }
-
- ret = 0; /* If we ran out of fields, dtuple was equal to rec
- up to the common fields */
-order_resolved:
- ut_ad((ret >= - 1) && (ret <= 1));
-
- *matched_fields = cur_field;
-
- return(ret);
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innodb_plugin/rem/rem0rec.c b/storage/innodb_plugin/rem/rem0rec.c
deleted file mode 100644
index 1c8b3fd8c1e..00000000000
--- a/storage/innodb_plugin/rem/rem0rec.c
+++ /dev/null
@@ -1,1720 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file rem/rem0rec.c
-Record manager
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "rem0rec.h"
-
-#ifdef UNIV_NONINL
-#include "rem0rec.ic"
-#endif
-
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-
-/* PHYSICAL RECORD (OLD STYLE)
- ===========================
-
-The physical record, which is the data type of all the records
-found in index pages of the database, has the following format
-(lower addresses and more significant bits inside a byte are below
-represented on a higher text line):
-
-| offset of the end of the last field of data, the most significant
- bit is set to 1 if and only if the field is SQL-null,
- if the offset is 2-byte, then the second most significant
- bit is set to 1 if the field is stored on another page:
- mostly this will occur in the case of big BLOB fields |
-...
-| offset of the end of the first field of data + the SQL-null bit |
-| 4 bits used to delete mark a record, and mark a predefined
- minimum record in alphabetical order |
-| 4 bits giving the number of records owned by this record
- (this term is explained in page0page.h) |
-| 13 bits giving the order number of this record in the
- heap of the index page |
-| 10 bits giving the number of fields in this record |
-| 1 bit which is set to 1 if the offsets above are given in
- one byte format, 0 if in two byte format |
-| two bytes giving an absolute pointer to the next record in the page |
-ORIGIN of the record
-| first field of data |
-...
-| last field of data |
-
-The origin of the record is the start address of the first field
-of data. The offsets are given relative to the origin.
-The offsets of the data fields are stored in an inverted
-order because then the offset of the first fields are near the
-origin, giving maybe a better processor cache hit rate in searches.
-
-The offsets of the data fields are given as one-byte
-(if there are less than 127 bytes of data in the record)
-or two-byte unsigned integers. The most significant bit
-is not part of the offset, instead it indicates the SQL-null
-if the bit is set to 1. */
-
-/* PHYSICAL RECORD (NEW STYLE)
- ===========================
-
-The physical record, which is the data type of all the records
-found in index pages of the database, has the following format
-(lower addresses and more significant bits inside a byte are below
-represented on a higher text line):
-
-| length of the last non-null variable-length field of data:
- if the maximum length is 255, one byte; otherwise,
- 0xxxxxxx (one byte, length=0..127), or 1exxxxxxxxxxxxxx (two bytes,
- length=128..16383, extern storage flag) |
-...
-| length of first variable-length field of data |
-| SQL-null flags (1 bit per nullable field), padded to full bytes |
-| 4 bits used to delete mark a record, and mark a predefined
- minimum record in alphabetical order |
-| 4 bits giving the number of records owned by this record
- (this term is explained in page0page.h) |
-| 13 bits giving the order number of this record in the
- heap of the index page |
-| 3 bits record type: 000=conventional, 001=node pointer (inside B-tree),
- 010=infimum, 011=supremum, 1xx=reserved |
-| two bytes giving a relative pointer to the next record in the page |
-ORIGIN of the record
-| first field of data |
-...
-| last field of data |
-
-The origin of the record is the start address of the first field
-of data. The offsets are given relative to the origin.
-The offsets of the data fields are stored in an inverted
-order because then the offset of the first fields are near the
-origin, giving maybe a better processor cache hit rate in searches.
-
-The offsets of the data fields are given as one-byte
-(if there are less than 127 bytes of data in the record)
-or two-byte unsigned integers. The most significant bit
-is not part of the offset, instead it indicates the SQL-null
-if the bit is set to 1. */
-
-/* CANONICAL COORDINATES. A record can be seen as a single
-string of 'characters' in the following way: catenate the bytes
-in each field, in the order of fields. An SQL-null field
-is taken to be an empty sequence of bytes. Then after
-the position of each field insert in the string
-the 'character' <FIELD-END>, except that after an SQL-null field
-insert <NULL-FIELD-END>. Now the ordinal position of each
-byte in this canonical string is its canonical coordinate.
-So, for the record ("AA", SQL-NULL, "BB", ""), the canonical
-string is "AA<FIELD_END><NULL-FIELD-END>BB<FIELD-END><FIELD-END>".
-We identify prefixes (= initial segments) of a record
-with prefixes of the canonical string. The canonical
-length of the prefix is the length of the corresponding
-prefix of the canonical string. The canonical length of
-a record is the length of its canonical string.
-
-For example, the maximal common prefix of records
-("AA", SQL-NULL, "BB", "C") and ("AA", SQL-NULL, "B", "C")
-is "AA<FIELD-END><NULL-FIELD-END>B", and its canonical
-length is 5.
-
-A complete-field prefix of a record is a prefix which ends at the
-end of some field (containing also <FIELD-END>).
-A record is a complete-field prefix of another record, if
-the corresponding canonical strings have the same property. */
-
-/* this is used to fool compiler in rec_validate */
-UNIV_INTERN ulint rec_dummy;
-
-/***************************************************************//**
-Validates the consistency of an old-style physical record.
-@return TRUE if ok */
-static
-ibool
-rec_validate_old(
-/*=============*/
- const rec_t* rec); /*!< in: physical record */
-
-/******************************************************//**
-Determine how many of the first n columns in a compact
-physical record are stored externally.
-@return number of externally stored columns */
-UNIV_INTERN
-ulint
-rec_get_n_extern_new(
-/*=================*/
- const rec_t* rec, /*!< in: compact physical record */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n) /*!< in: number of columns to scan */
-{
- const byte* nulls;
- const byte* lens;
- dict_field_t* field;
- ulint null_mask;
- ulint n_extern;
- ulint i;
-
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
- ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index));
-
- if (n == ULINT_UNDEFINED) {
- n = dict_index_get_n_fields(index);
- }
-
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- null_mask = 1;
- n_extern = 0;
- i = 0;
-
- /* read the lengths of fields 0..n */
- do {
- ulint len;
-
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields. */
- continue;
- }
- null_mask <<= 1;
- }
-
- if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
- len = *lens--;
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
- if (len & 0x40) {
- n_extern++;
- }
- lens--;
- }
- }
- }
- } while (++i < n);
-
- return(n_extern);
-}
-
-/******************************************************//**
-Determine the offset to each field in a leaf-page record
-in ROW_FORMAT=COMPACT. This is a special case of
-rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
-void
-rec_init_offsets_comp_ordinary(
-/*===========================*/
- const rec_t* rec, /*!< in: physical record in
- ROW_FORMAT=COMPACT */
- ulint extra, /*!< in: number of bytes to reserve
- between the record header and
- the data payload
- (usually REC_N_NEW_EXTRA_BYTES) */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-{
- ulint i = 0;
- ulint offs = 0;
- ulint any_ext = 0;
- const byte* nulls = rec - (extra + 1);
- const byte* lens = nulls
- - UT_BITS_IN_BYTES(index->n_nullable);
- dict_field_t* field;
- ulint null_mask = 1;
-
-#ifdef UNIV_DEBUG
- /* We cannot invoke rec_offs_make_valid() here, because it can hold
- that extra != REC_N_NEW_EXTRA_BYTES. Similarly, rec_offs_validate()
- will fail in that case, because it invokes rec_get_status(). */
- offsets[2] = (ulint) rec;
- offsets[3] = (ulint) index;
-#endif /* UNIV_DEBUG */
-
- /* read the lengths of fields 0..n */
- do {
- ulint len;
-
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype
- & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields.
- We do not advance offs, and we set
- the length to zero and enable the
- SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
- goto resolved;
- }
- null_mask <<= 1;
- }
-
- if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
- len = *lens--;
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype
- == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
- len <<= 8;
- len |= *lens--;
-
- offs += len & 0x3fff;
- if (UNIV_UNLIKELY(len
- & 0x4000)) {
- ut_ad(dict_index_is_clust
- (index));
- any_ext = REC_OFFS_EXTERNAL;
- len = offs
- | REC_OFFS_EXTERNAL;
- } else {
- len = offs;
- }
-
- goto resolved;
- }
- }
-
- len = offs += len;
- } else {
- len = offs += field->fixed_len;
- }
-resolved:
- rec_offs_base(offsets)[i + 1] = len;
- } while (++i < rec_offs_n_fields(offsets));
-
- *rec_offs_base(offsets)
- = (rec - (lens + 1)) | REC_OFFS_COMPACT | any_ext;
-}
-
-/******************************************************//**
-The following function determines the offsets to each field in the
-record. The offsets are written to a previously allocated array of
-ulint, where rec_offs_n_fields(offsets) has been initialized to the
-number of fields in the record. The rest of the array will be
-initialized by this function. rec_offs_base(offsets)[0] will be set
-to the extra size (if REC_OFFS_COMPACT is set, the record is in the
-new format; if REC_OFFS_EXTERNAL is set, the record contains externally
-stored columns), and rec_offs_base(offsets)[1..n_fields] will be set to
-offsets past the end of fields 0..n_fields, or to the beginning of
-fields 1..n_fields+1. When the high-order bit of the offset at [i+1]
-is set (REC_OFFS_SQL_NULL), the field i is NULL. When the second
-high-order bit of the offset at [i+1] is set (REC_OFFS_EXTERNAL), the
-field i is being stored externally. */
-static
-void
-rec_init_offsets(
-/*=============*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-{
- ulint i = 0;
- ulint offs;
-
- rec_offs_make_valid(rec, index, offsets);
-
- if (dict_table_is_comp(index->table)) {
- const byte* nulls;
- const byte* lens;
- dict_field_t* field;
- ulint null_mask;
- ulint status = rec_get_status(rec);
- ulint n_node_ptr_field = ULINT_UNDEFINED;
-
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* the field is 8 bytes long */
- rec_offs_base(offsets)[0]
- = REC_N_NEW_EXTRA_BYTES | REC_OFFS_COMPACT;
- rec_offs_base(offsets)[1] = 8;
- return;
- case REC_STATUS_NODE_PTR:
- n_node_ptr_field
- = dict_index_get_n_unique_in_tree(index);
- break;
- case REC_STATUS_ORDINARY:
- rec_init_offsets_comp_ordinary(rec,
- REC_N_NEW_EXTRA_BYTES,
- index, offsets);
- return;
- }
-
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- offs = 0;
- null_mask = 1;
-
- /* read the lengths of fields 0..n */
- do {
- ulint len;
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- len = offs += 4;
- goto resolved;
- }
-
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype
- & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields.
- We do not advance offs, and we set
- the length to zero and enable the
- SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
- goto resolved;
- }
- null_mask <<= 1;
- }
-
- if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
- len = *lens--;
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype
- == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
-
- len <<= 8;
- len |= *lens--;
-
- /* B-tree node pointers
- must not contain externally
- stored columns. Thus
- the "e" flag must be 0. */
- ut_a(!(len & 0x4000));
- offs += len & 0x3fff;
- len = offs;
-
- goto resolved;
- }
- }
-
- len = offs += len;
- } else {
- len = offs += field->fixed_len;
- }
-resolved:
- rec_offs_base(offsets)[i + 1] = len;
- } while (++i < rec_offs_n_fields(offsets));
-
- *rec_offs_base(offsets)
- = (rec - (lens + 1)) | REC_OFFS_COMPACT;
- } else {
- /* Old-style record: determine extra size and end offsets */
- offs = REC_N_OLD_EXTRA_BYTES;
- if (rec_get_1byte_offs_flag(rec)) {
- offs += rec_offs_n_fields(offsets);
- *rec_offs_base(offsets) = offs;
- /* Determine offsets to fields */
- do {
- offs = rec_1_get_field_end_info(rec, i);
- if (offs & REC_1BYTE_SQL_NULL_MASK) {
- offs &= ~REC_1BYTE_SQL_NULL_MASK;
- offs |= REC_OFFS_SQL_NULL;
- }
- rec_offs_base(offsets)[1 + i] = offs;
- } while (++i < rec_offs_n_fields(offsets));
- } else {
- offs += 2 * rec_offs_n_fields(offsets);
- *rec_offs_base(offsets) = offs;
- /* Determine offsets to fields */
- do {
- offs = rec_2_get_field_end_info(rec, i);
- if (offs & REC_2BYTE_SQL_NULL_MASK) {
- offs &= ~REC_2BYTE_SQL_NULL_MASK;
- offs |= REC_OFFS_SQL_NULL;
- }
- if (offs & REC_2BYTE_EXTERN_MASK) {
- offs &= ~REC_2BYTE_EXTERN_MASK;
- offs |= REC_OFFS_EXTERNAL;
- *rec_offs_base(offsets) |= REC_OFFS_EXTERNAL;
- }
- rec_offs_base(offsets)[1 + i] = offs;
- } while (++i < rec_offs_n_fields(offsets));
- }
- }
-}
-
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously returned array.
-@return the new offsets */
-UNIV_INTERN
-ulint*
-rec_get_offsets_func(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: array consisting of
- offsets[0] allocated elements,
- or an array from rec_get_offsets(),
- or NULL */
- ulint n_fields,/*!< in: maximum number of
- initialized fields
- (ULINT_UNDEFINED if all fields) */
- mem_heap_t** heap, /*!< in/out: memory heap */
- const char* file, /*!< in: file name where called */
- ulint line) /*!< in: line number where called */
-{
- ulint n;
- ulint size;
-
- ut_ad(rec);
- ut_ad(index);
- ut_ad(heap);
-
- if (dict_table_is_comp(index->table)) {
- switch (UNIV_EXPECT(rec_get_status(rec),
- REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
- n = dict_index_get_n_fields(index);
- break;
- case REC_STATUS_NODE_PTR:
- n = dict_index_get_n_unique_in_tree(index) + 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record */
- n = 1;
- break;
- default:
- ut_error;
- return(NULL);
- }
- } else {
- n = rec_get_n_fields_old(rec);
- }
-
- if (UNIV_UNLIKELY(n_fields < n)) {
- n = n_fields;
- }
-
- size = n + (1 + REC_OFFS_HEADER_SIZE);
-
- if (UNIV_UNLIKELY(!offsets)
- || UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) {
- if (UNIV_UNLIKELY(!*heap)) {
- *heap = mem_heap_create_func(size * sizeof(ulint),
- MEM_HEAP_DYNAMIC,
- file, line);
- }
- offsets = mem_heap_alloc(*heap, size * sizeof(ulint));
- rec_offs_set_n_alloc(offsets, size);
- }
-
- rec_offs_set_n_fields(offsets, n);
- rec_init_offsets(rec, index, offsets);
- return(offsets);
-}
-
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array. */
-UNIV_INTERN
-void
-rec_get_offsets_reverse(
-/*====================*/
- const byte* extra, /*!< in: the extra bytes of a
- compact record in reverse order,
- excluding the fixed-size
- REC_N_NEW_EXTRA_BYTES */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint node_ptr,/*!< in: nonzero=node pointer,
- 0=leaf node */
- ulint* offsets)/*!< in/out: array consisting of
- offsets[0] allocated elements */
-{
- ulint n;
- ulint i;
- ulint offs;
- ulint any_ext;
- const byte* nulls;
- const byte* lens;
- dict_field_t* field;
- ulint null_mask;
- ulint n_node_ptr_field;
-
- ut_ad(extra);
- ut_ad(index);
- ut_ad(offsets);
- ut_ad(dict_table_is_comp(index->table));
-
- if (UNIV_UNLIKELY(node_ptr)) {
- n_node_ptr_field = dict_index_get_n_unique_in_tree(index);
- n = n_node_ptr_field + 1;
- } else {
- n_node_ptr_field = ULINT_UNDEFINED;
- n = dict_index_get_n_fields(index);
- }
-
- ut_a(rec_offs_get_n_alloc(offsets) >= n + (1 + REC_OFFS_HEADER_SIZE));
- rec_offs_set_n_fields(offsets, n);
-
- nulls = extra;
- lens = nulls + UT_BITS_IN_BYTES(index->n_nullable);
- i = offs = 0;
- null_mask = 1;
- any_ext = 0;
-
- /* read the lengths of fields 0..n */
- do {
- ulint len;
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- len = offs += 4;
- goto resolved;
- }
-
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls++;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields.
- We do not advance offs, and we set
- the length to zero and enable the
- SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
- goto resolved;
- }
- null_mask <<= 1;
- }
-
- if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
- len = *lens++;
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
- len <<= 8;
- len |= *lens++;
-
- offs += len & 0x3fff;
- if (UNIV_UNLIKELY(len & 0x4000)) {
- any_ext = REC_OFFS_EXTERNAL;
- len = offs | REC_OFFS_EXTERNAL;
- } else {
- len = offs;
- }
-
- goto resolved;
- }
- }
-
- len = offs += len;
- } else {
- len = offs += field->fixed_len;
- }
-resolved:
- rec_offs_base(offsets)[i + 1] = len;
- } while (++i < rec_offs_n_fields(offsets));
-
- ut_ad(lens >= extra);
- *rec_offs_base(offsets) = (lens - extra + REC_N_NEW_EXTRA_BYTES)
- | REC_OFFS_COMPACT | any_ext;
-}
-
-/************************************************************//**
-The following function is used to get the offset to the nth
-data field in an old-style record.
-@return offset to the field */
-UNIV_INTERN
-ulint
-rec_get_nth_field_offs_old(
-/*=======================*/
- const rec_t* rec, /*!< in: record */
- ulint n, /*!< in: index of the field */
- ulint* len) /*!< out: length of the field;
- UNIV_SQL_NULL if SQL null */
-{
- ulint os;
- ulint next_os;
-
- ut_ad(rec && len);
- ut_ad(n < rec_get_n_fields_old(rec));
-
- if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "Error: trying to access field %lu in rec\n",
- (ulong) n);
- ut_error;
- }
-
- if (UNIV_UNLIKELY(rec == NULL)) {
- fputs("Error: rec is NULL pointer\n", stderr);
- ut_error;
- }
-
- if (rec_get_1byte_offs_flag(rec)) {
- os = rec_1_get_field_start_offs(rec, n);
-
- next_os = rec_1_get_field_end_info(rec, n);
-
- if (next_os & REC_1BYTE_SQL_NULL_MASK) {
- *len = UNIV_SQL_NULL;
-
- return(os);
- }
-
- next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK;
- } else {
- os = rec_2_get_field_start_offs(rec, n);
-
- next_os = rec_2_get_field_end_info(rec, n);
-
- if (next_os & REC_2BYTE_SQL_NULL_MASK) {
- *len = UNIV_SQL_NULL;
-
- return(os);
- }
-
- next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
- | REC_2BYTE_EXTERN_MASK);
- }
-
- *len = next_os - os;
-
- ut_ad(*len < UNIV_PAGE_SIZE);
-
- return(os);
-}
-
-/**********************************************************//**
-Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_comp_prefix(
-/*===============================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
-{
- ulint extra_size;
- ulint data_size;
- ulint i;
- ut_ad(index);
- ut_ad(fields);
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= dict_index_get_n_fields(index));
-
- extra_size = REC_N_NEW_EXTRA_BYTES
- + UT_BITS_IN_BYTES(index->n_nullable);
- data_size = 0;
-
- /* read the lengths of fields 0..n */
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- ulint len;
- const dict_col_t* col;
-
- field = dict_index_get_nth_field(index, i);
- len = dfield_get_len(&fields[i]);
- col = dict_field_get_col(field);
-
- ut_ad(dict_col_type_assert_equal(col,
- dfield_get_type(&fields[i])));
-
- if (dfield_is_null(&fields[i])) {
- /* No length is stored for NULL fields. */
- ut_ad(!(col->prtype & DATA_NOT_NULL));
- continue;
- }
-
- ut_ad(len <= col->len || col->mtype == DATA_BLOB);
-
- if (field->fixed_len) {
- ut_ad(len == field->fixed_len);
- /* dict_index_add_col() should guarantee this */
- ut_ad(!field->prefix_len
- || field->fixed_len == field->prefix_len);
- } else if (dfield_is_ext(&fields[i])) {
- extra_size += 2;
- } else if (len < 128
- || (col->len < 256 && col->mtype != DATA_BLOB)) {
- extra_size++;
- } else {
- /* For variable-length columns, we look up the
- maximum length from the column itself. If this
- is a prefix index column shorter than 256 bytes,
- this will waste one byte. */
- extra_size += 2;
- }
- data_size += len;
- }
-
- if (UNIV_LIKELY_NULL(extra)) {
- *extra = extra_size;
- }
-
- return(extra_size + data_size);
-}
-
-/**********************************************************//**
-Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_comp(
-/*========================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
- ulint status, /*!< in: status bits of the record */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
-{
- ulint size;
- ut_ad(index);
- ut_ad(fields);
- ut_ad(n_fields > 0);
-
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
- ut_ad(n_fields == dict_index_get_n_fields(index));
- size = 0;
- break;
- case REC_STATUS_NODE_PTR:
- n_fields--;
- ut_ad(n_fields == dict_index_get_n_unique_in_tree(index));
- ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE);
- size = REC_NODE_PTR_SIZE; /* child page number */
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record, 8 data bytes */
- if (UNIV_LIKELY_NULL(extra)) {
- *extra = REC_N_NEW_EXTRA_BYTES;
- }
- return(REC_N_NEW_EXTRA_BYTES + 8);
- default:
- ut_error;
- return(ULINT_UNDEFINED);
- }
-
- return(size + rec_get_converted_size_comp_prefix(index, fields,
- n_fields, extra));
-}
-
-/***********************************************************//**
-Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
-void
-rec_set_nth_field_null_bit(
-/*=======================*/
- rec_t* rec, /*!< in: record */
- ulint i, /*!< in: ith field */
- ibool val) /*!< in: value to set */
-{
- ulint info;
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- info = rec_1_get_field_end_info(rec, i);
-
- if (val) {
- info = info | REC_1BYTE_SQL_NULL_MASK;
- } else {
- info = info & ~REC_1BYTE_SQL_NULL_MASK;
- }
-
- rec_1_set_field_end_info(rec, i, info);
-
- return;
- }
-
- info = rec_2_get_field_end_info(rec, i);
-
- if (val) {
- info = info | REC_2BYTE_SQL_NULL_MASK;
- } else {
- info = info & ~REC_2BYTE_SQL_NULL_MASK;
- }
-
- rec_2_set_field_end_info(rec, i, info);
-}
-
-/***********************************************************//**
-Sets an old-style record field to SQL null.
-The physical size of the field is not changed. */
-UNIV_INTERN
-void
-rec_set_nth_field_sql_null(
-/*=======================*/
- rec_t* rec, /*!< in: record */
- ulint n) /*!< in: index of the field */
-{
- ulint offset;
-
- offset = rec_get_field_start_offs(rec, n);
-
- data_write_sql_null(rec + offset, rec_get_nth_field_size(rec, n));
-
- rec_set_nth_field_null_bit(rec, n, TRUE);
-}
-
-/*********************************************************//**
-Builds an old-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
-static
-rec_t*
-rec_convert_dtuple_to_rec_old(
-/*==========================*/
- byte* buf, /*!< in: start address of the physical record */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- const dfield_t* field;
- ulint n_fields;
- ulint data_size;
- rec_t* rec;
- ulint end_offset;
- ulint ored_offset;
- ulint len;
- ulint i;
-
- ut_ad(buf && dtuple);
- ut_ad(dtuple_validate(dtuple));
- ut_ad(dtuple_check_typed(dtuple));
-
- n_fields = dtuple_get_n_fields(dtuple);
- data_size = dtuple_get_data_size(dtuple, 0);
-
- ut_ad(n_fields > 0);
-
- /* Calculate the offset of the origin in the physical record */
-
- rec = buf + rec_get_converted_extra_size(data_size, n_fields, n_ext);
-#ifdef UNIV_DEBUG
- /* Suppress Valgrind warnings of ut_ad()
- in mach_write_to_1(), mach_write_to_2() et al. */
- memset(buf, 0xff, rec - buf + data_size);
-#endif /* UNIV_DEBUG */
- /* Store the number of fields */
- rec_set_n_fields_old(rec, n_fields);
-
- /* Set the info bits of the record */
- rec_set_info_bits_old(rec, dtuple_get_info_bits(dtuple)
- & REC_INFO_BITS_MASK);
-
- /* Store the data and the offsets */
-
- end_offset = 0;
-
- if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
-
- rec_set_1byte_offs_flag(rec, TRUE);
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(dtuple, i);
-
- if (dfield_is_null(field)) {
- len = dtype_get_sql_null_size(
- dfield_get_type(field), 0);
- data_write_sql_null(rec + end_offset, len);
-
- end_offset += len;
- ored_offset = end_offset
- | REC_1BYTE_SQL_NULL_MASK;
- } else {
- /* If the data is not SQL null, store it */
- len = dfield_get_len(field);
-
- memcpy(rec + end_offset,
- dfield_get_data(field), len);
-
- end_offset += len;
- ored_offset = end_offset;
- }
-
- rec_1_set_field_end_info(rec, i, ored_offset);
- }
- } else {
- rec_set_1byte_offs_flag(rec, FALSE);
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(dtuple, i);
-
- if (dfield_is_null(field)) {
- len = dtype_get_sql_null_size(
- dfield_get_type(field), 0);
- data_write_sql_null(rec + end_offset, len);
-
- end_offset += len;
- ored_offset = end_offset
- | REC_2BYTE_SQL_NULL_MASK;
- } else {
- /* If the data is not SQL null, store it */
- len = dfield_get_len(field);
-
- memcpy(rec + end_offset,
- dfield_get_data(field), len);
-
- end_offset += len;
- ored_offset = end_offset;
-
- if (dfield_is_ext(field)) {
- ored_offset |= REC_2BYTE_EXTERN_MASK;
- }
- }
-
- rec_2_set_field_end_info(rec, i, ored_offset);
- }
- }
-
- return(rec);
-}
-
-/*********************************************************//**
-Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
-void
-rec_convert_dtuple_to_rec_comp(
-/*===========================*/
- rec_t* rec, /*!< in: origin of record */
- ulint extra, /*!< in: number of bytes to
- reserve between the record
- header and the data payload
- (normally REC_N_NEW_EXTRA_BYTES) */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint status, /*!< in: status bits of the record */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields)/*!< in: number of data fields */
-{
- const dfield_t* field;
- const dtype_t* type;
- byte* end;
- byte* nulls;
- byte* lens;
- ulint len;
- ulint i;
- ulint n_node_ptr_field;
- ulint fixed_len;
- ulint null_mask = 1;
- ut_ad(extra == 0 || dict_table_is_comp(index->table));
- ut_ad(extra == 0 || extra == REC_N_NEW_EXTRA_BYTES);
- ut_ad(n_fields > 0);
-
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
- ut_ad(n_fields <= dict_index_get_n_fields(index));
- n_node_ptr_field = ULINT_UNDEFINED;
- break;
- case REC_STATUS_NODE_PTR:
- ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1);
- n_node_ptr_field = n_fields - 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- ut_ad(n_fields == 1);
- n_node_ptr_field = ULINT_UNDEFINED;
- break;
- default:
- ut_error;
- return;
- }
-
- end = rec;
- nulls = rec - (extra + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- /* clear the SQL-null flags */
- memset(lens + 1, 0, nulls - lens);
-
- /* Store the data and the offsets */
-
- for (i = 0, field = fields; i < n_fields; i++, field++) {
- type = dfield_get_type(field);
- len = dfield_get_len(field);
-
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
- ut_ad(len == 4);
- memcpy(end, dfield_get_data(field), len);
- end += 4;
- break;
- }
-
- if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
- /* nullable field */
- ut_ad(index->n_nullable > 0);
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- ut_ad(*nulls < null_mask);
-
- /* set the null flag if necessary */
- if (dfield_is_null(field)) {
- *nulls |= null_mask;
- null_mask <<= 1;
- continue;
- }
-
- null_mask <<= 1;
- }
- /* only nullable fields can be null */
- ut_ad(!dfield_is_null(field));
-
- fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
-
- if (fixed_len) {
- ut_ad(len == fixed_len);
- ut_ad(!dfield_is_ext(field));
- } else if (dfield_is_ext(field)) {
- ut_ad(len <= REC_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE);
- *lens-- = (byte) (len >> 8) | 0xc0;
- *lens-- = (byte) len;
- } else {
- ut_ad(len <= dtype_get_len(type)
- || dtype_get_mtype(type) == DATA_BLOB);
- if (len < 128
- || (dtype_get_len(type) < 256
- && dtype_get_mtype(type) != DATA_BLOB)) {
-
- *lens-- = (byte) len;
- } else {
- ut_ad(len < 16384);
- *lens-- = (byte) (len >> 8) | 0x80;
- *lens-- = (byte) len;
- }
- }
-
- memcpy(end, dfield_get_data(field), len);
- end += len;
- }
-}
-
-/*********************************************************//**
-Builds a new-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
-static
-rec_t*
-rec_convert_dtuple_to_rec_new(
-/*==========================*/
- byte* buf, /*!< in: start address of
- the physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple) /*!< in: data tuple */
-{
- ulint extra_size;
- ulint status;
- rec_t* rec;
-
- status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
- rec_get_converted_size_comp(index, status,
- dtuple->fields, dtuple->n_fields,
- &extra_size);
- rec = buf + extra_size;
-
- rec_convert_dtuple_to_rec_comp(
- rec, REC_N_NEW_EXTRA_BYTES, index, status,
- dtuple->fields, dtuple->n_fields);
-
- /* Set the info bits of the record */
- rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
-
- return(rec);
-}
-
-/*********************************************************//**
-Builds a physical record out of a data tuple and
-stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
-UNIV_INTERN
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
- byte* buf, /*!< in: start address of the
- physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext) /*!< in: number of
- externally stored columns */
-{
- rec_t* rec;
-
- ut_ad(buf && index && dtuple);
- ut_ad(dtuple_validate(dtuple));
- ut_ad(dtuple_check_typed(dtuple));
-
- if (dict_table_is_comp(index->table)) {
- rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple);
- } else {
- rec = rec_convert_dtuple_to_rec_old(buf, dtuple, n_ext);
- }
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index,
- offsets_, ULINT_UNDEFINED, &heap);
- ut_ad(rec_validate(rec, offsets));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
- return(rec);
-}
-
-/**************************************************************//**
-Copies the first n fields of a physical record to a data tuple. The fields
-are copied to the memory heap. */
-UNIV_INTERN
-void
-rec_copy_prefix_to_dtuple(
-/*======================*/
- dtuple_t* tuple, /*!< out: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index, offsets, n_fields, &heap);
-
- ut_ad(rec_validate(rec, offsets));
- ut_ad(dtuple_check_typed(tuple));
-
- dtuple_set_info_bits(tuple, rec_get_info_bits(
- rec, dict_table_is_comp(index->table)));
-
- for (i = 0; i < n_fields; i++) {
- dfield_t* field;
- const byte* data;
- ulint len;
-
- field = dtuple_get_nth_field(tuple, i);
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- dfield_set_data(field,
- mem_heap_dup(heap, data, len), len);
- ut_ad(!rec_offs_nth_extern(offsets, i));
- } else {
- dfield_set_null(field);
- }
- }
-}
-
-/**************************************************************//**
-Copies the first n fields of an old-style physical record
-to a new physical record in a buffer.
-@return own: copied record */
-static
-rec_t*
-rec_copy_prefix_to_buf_old(
-/*=======================*/
- const rec_t* rec, /*!< in: physical record */
- ulint n_fields, /*!< in: number of fields to copy */
- ulint area_end, /*!< in: end of the prefix data */
- byte** buf, /*!< in/out: memory buffer for
- the copied prefix, or NULL */
- ulint* buf_size) /*!< in/out: buffer size */
-{
- rec_t* copy_rec;
- ulint area_start;
- ulint prefix_len;
-
- if (rec_get_1byte_offs_flag(rec)) {
- area_start = REC_N_OLD_EXTRA_BYTES + n_fields;
- } else {
- area_start = REC_N_OLD_EXTRA_BYTES + 2 * n_fields;
- }
-
- prefix_len = area_start + area_end;
-
- if ((*buf == NULL) || (*buf_size < prefix_len)) {
- if (*buf != NULL) {
- mem_free(*buf);
- }
-
- *buf = mem_alloc2(prefix_len, buf_size);
- }
-
- ut_memcpy(*buf, rec - area_start, prefix_len);
-
- copy_rec = *buf + area_start;
-
- rec_set_n_fields_old(copy_rec, n_fields);
-
- return(copy_rec);
-}
-
-/**************************************************************//**
-Copies the first n fields of a physical record to a new physical record in
-a buffer.
-@return own: copied record */
-UNIV_INTERN
-rec_t*
-rec_copy_prefix_to_buf(
-/*===================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- byte** buf, /*!< in/out: memory buffer
- for the copied prefix,
- or NULL */
- ulint* buf_size) /*!< in/out: buffer size */
-{
- const byte* nulls;
- const byte* lens;
- ulint i;
- ulint prefix_len;
- ulint null_mask;
- ulint status;
-
- UNIV_PREFETCH_RW(*buf);
-
- if (!dict_table_is_comp(index->table)) {
- ut_ad(rec_validate_old(rec));
- return(rec_copy_prefix_to_buf_old(
- rec, n_fields,
- rec_get_field_start_offs(rec, n_fields),
- buf, buf_size));
- }
-
- status = rec_get_status(rec);
-
- switch (status) {
- case REC_STATUS_ORDINARY:
- ut_ad(n_fields <= dict_index_get_n_fields(index));
- break;
- case REC_STATUS_NODE_PTR:
- /* it doesn't make sense to copy the child page number field */
- ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index));
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record: no sense to copy anything */
- default:
- ut_error;
- return(NULL);
- }
-
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- UNIV_PREFETCH_R(lens);
- prefix_len = 0;
- null_mask = 1;
-
- /* read the lengths of fields 0..n */
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- const dict_col_t* col;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- continue;
- }
-
- null_mask <<= 1;
- }
-
- if (field->fixed_len) {
- prefix_len += field->fixed_len;
- } else {
- ulint len = *lens--;
- if (col->len > 255 || col->mtype == DATA_BLOB) {
- if (len & 0x80) {
- /* 1exxxxxx */
- len &= 0x3f;
- len <<= 8;
- len |= *lens--;
- UNIV_PREFETCH_R(lens);
- }
- }
- prefix_len += len;
- }
- }
-
- UNIV_PREFETCH_R(rec + prefix_len);
-
- prefix_len += rec - (lens + 1);
-
- if ((*buf == NULL) || (*buf_size < prefix_len)) {
- if (*buf != NULL) {
- mem_free(*buf);
- }
-
- *buf = mem_alloc2(prefix_len, buf_size);
- }
-
- memcpy(*buf, lens + 1, prefix_len);
-
- return(*buf + (rec - (lens + 1)));
-}
-
-/***************************************************************//**
-Validates the consistency of an old-style physical record.
-@return TRUE if ok */
-static
-ibool
-rec_validate_old(
-/*=============*/
- const rec_t* rec) /*!< in: physical record */
-{
- const byte* data;
- ulint len;
- ulint n_fields;
- ulint len_sum = 0;
- ulint sum = 0;
- ulint i;
-
- ut_a(rec);
- n_fields = rec_get_n_fields_old(rec);
-
- if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
- (ulong) n_fields);
- return(FALSE);
- }
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field_old(rec, i, &len);
-
- if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
- fprintf(stderr,
- "InnoDB: Error: record field %lu len %lu\n",
- (ulong) i,
- (ulong) len);
- return(FALSE);
- }
-
- if (len != UNIV_SQL_NULL) {
- len_sum += len;
- sum += *(data + len -1); /* dereference the
- end of the field to
- cause a memory trap
- if possible */
- } else {
- len_sum += rec_get_nth_field_size(rec, i);
- }
- }
-
- if (len_sum != rec_get_data_size_old(rec)) {
- fprintf(stderr,
- "InnoDB: Error: record len should be %lu, len %lu\n",
- (ulong) len_sum,
- rec_get_data_size_old(rec));
- return(FALSE);
- }
-
- rec_dummy = sum; /* This is here only to fool the compiler */
-
- return(TRUE);
-}
-
-/***************************************************************//**
-Validates the consistency of a physical record.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-rec_validate(
-/*=========*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- const byte* data;
- ulint len;
- ulint n_fields;
- ulint len_sum = 0;
- ulint sum = 0;
- ulint i;
-
- ut_a(rec);
- n_fields = rec_offs_n_fields(offsets);
-
- if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
- (ulong) n_fields);
- return(FALSE);
- }
-
- ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec));
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
- fprintf(stderr,
- "InnoDB: Error: record field %lu len %lu\n",
- (ulong) i,
- (ulong) len);
- return(FALSE);
- }
-
- if (len != UNIV_SQL_NULL) {
- len_sum += len;
- sum += *(data + len -1); /* dereference the
- end of the field to
- cause a memory trap
- if possible */
- } else if (!rec_offs_comp(offsets)) {
- len_sum += rec_get_nth_field_size(rec, i);
- }
- }
-
- if (len_sum != rec_offs_data_size(offsets)) {
- fprintf(stderr,
- "InnoDB: Error: record len should be %lu, len %lu\n",
- (ulong) len_sum,
- (ulong) rec_offs_data_size(offsets));
- return(FALSE);
- }
-
- rec_dummy = sum; /* This is here only to fool the compiler */
-
- if (!rec_offs_comp(offsets)) {
- ut_a(rec_validate_old(rec));
- }
-
- return(TRUE);
-}
-
-/***************************************************************//**
-Prints an old-style physical record. */
-UNIV_INTERN
-void
-rec_print_old(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec) /*!< in: physical record */
-{
- const byte* data;
- ulint len;
- ulint n;
- ulint i;
-
- ut_ad(rec);
-
- n = rec_get_n_fields_old(rec);
-
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " %u-byte offsets; info bits %lu\n",
- (ulong) n,
- rec_get_1byte_offs_flag(rec) ? 1 : 2,
- (ulong) rec_get_info_bits(rec, FALSE));
-
- for (i = 0; i < n; i++) {
-
- data = rec_get_nth_field_old(rec, i, &len);
-
- fprintf(file, " %lu:", (ulong) i);
-
- if (len != UNIV_SQL_NULL) {
- if (len <= 30) {
-
- ut_print_buf(file, data, len);
- } else {
- ut_print_buf(file, data, 30);
-
- fprintf(file, " (total %lu bytes)",
- (ulong) len);
- }
- } else {
- fprintf(file, " SQL NULL, size %lu ",
- rec_get_nth_field_size(rec, i));
- }
-
- putc(';', file);
- putc('\n', file);
- }
-
- rec_validate_old(rec);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
-record header. */
-UNIV_INTERN
-void
-rec_print_comp(
-/*===========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint i;
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- const byte* data;
- ulint len;
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- fprintf(file, " %lu:", (ulong) i);
-
- if (len != UNIV_SQL_NULL) {
- if (len <= 30) {
-
- ut_print_buf(file, data, len);
- } else {
- ut_print_buf(file, data, 30);
-
- fprintf(file, " (total %lu bytes)",
- (ulong) len);
- }
- } else {
- fputs(" SQL NULL", file);
- }
- putc(';', file);
- putc('\n', file);
- }
-}
-
-/***************************************************************//**
-Prints a physical record. */
-UNIV_INTERN
-void
-rec_print_new(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec);
- ut_ad(offsets);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (!rec_offs_comp(offsets)) {
- rec_print_old(file, rec);
- return;
- }
-
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " compact format; info bits %lu\n",
- (ulong) rec_offs_n_fields(offsets),
- (ulong) rec_get_info_bits(rec, TRUE));
-
- rec_print_comp(file, rec, offsets);
- rec_validate(rec, offsets);
-}
-
-/***************************************************************//**
-Prints a physical record. */
-UNIV_INTERN
-void
-rec_print(
-/*======*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- dict_index_t* index) /*!< in: record descriptor */
-{
- ut_ad(index);
-
- if (!dict_table_is_comp(index->table)) {
- rec_print_old(file, rec);
- return;
- } else {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- rec_print_new(file, rec,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c
deleted file mode 100644
index fe51fce82c4..00000000000
--- a/storage/innodb_plugin/row/row0ins.c
+++ /dev/null
@@ -1,2508 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0ins.c
-Insert into a table
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0ins.h"
-
-#ifdef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
-#include "ha_prototypes.h"
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0undo.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "mach0data.h"
-#include "que0que.h"
-#include "row0upd.h"
-#include "row0sel.h"
-#include "row0row.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "log0log.h"
-#include "eval0eval.h"
-#include "data0data.h"
-#include "usr0sess.h"
-#include "buf0lru.h"
-
-#define ROW_INS_PREV 1
-#define ROW_INS_NEXT 2
-
-
-/*********************************************************************//**
-Creates an insert node struct.
-@return own: insert node struct */
-UNIV_INTERN
-ins_node_t*
-ins_node_create(
-/*============*/
- ulint ins_type, /*!< in: INS_VALUES, ... */
- dict_table_t* table, /*!< in: table where to insert */
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- ins_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(ins_node_t));
-
- node->common.type = QUE_NODE_INSERT;
-
- node->ins_type = ins_type;
-
- node->state = INS_NODE_SET_IX_LOCK;
- node->table = table;
- node->index = NULL;
- node->entry = NULL;
-
- node->select = NULL;
-
- node->trx_id = ut_dulint_zero;
-
- node->entry_sys_heap = mem_heap_create(128);
-
- node->magic_n = INS_NODE_MAGIC_N;
-
- return(node);
-}
-
-/***********************************************************//**
-Creates an entry template for each index of a table. */
-UNIV_INTERN
-void
-ins_node_create_entry_list(
-/*=======================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- dict_index_t* index;
- dtuple_t* entry;
-
- ut_ad(node->entry_sys_heap);
-
- UT_LIST_INIT(node->entry_list);
-
- index = dict_table_get_first_index(node->table);
-
- while (index != NULL) {
- entry = row_build_index_entry(node->row, NULL, index,
- node->entry_sys_heap);
- UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
-
- index = dict_table_get_next_index(index);
- }
-}
-
-/*****************************************************************//**
-Adds system field buffers to a row. */
-static
-void
-row_ins_alloc_sys_fields(
-/*=====================*/
- ins_node_t* node) /*!< in: insert node */
-{
- dtuple_t* row;
- dict_table_t* table;
- mem_heap_t* heap;
- const dict_col_t* col;
- dfield_t* dfield;
- byte* ptr;
-
- row = node->row;
- table = node->table;
- heap = node->entry_sys_heap;
-
- ut_ad(row && table && heap);
- ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
-
- /* 1. Allocate buffer for row id */
-
- col = dict_table_get_sys_col(table, DATA_ROW_ID);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
-
- ptr = mem_heap_zalloc(heap, DATA_ROW_ID_LEN);
-
- dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
-
- node->row_id_buf = ptr;
-
- /* 3. Allocate buffer for trx id */
-
- col = dict_table_get_sys_col(table, DATA_TRX_ID);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = mem_heap_zalloc(heap, DATA_TRX_ID_LEN);
-
- dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
-
- node->trx_id_buf = ptr;
-
- /* 4. Allocate buffer for roll ptr */
-
- col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = mem_heap_zalloc(heap, DATA_ROLL_PTR_LEN);
-
- dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
-}
-
-/*********************************************************************//**
-Sets a new row to insert for an INS_DIRECT node. This function is only used
-if we have constructed the row separately, which is a rare case; this
-function is quite slow. */
-UNIV_INTERN
-void
-ins_node_set_new_row(
-/*=================*/
- ins_node_t* node, /*!< in: insert node */
- dtuple_t* row) /*!< in: new row (or first row) for the node */
-{
- node->state = INS_NODE_SET_IX_LOCK;
- node->index = NULL;
- node->entry = NULL;
-
- node->row = row;
-
- mem_heap_empty(node->entry_sys_heap);
-
- /* Create templates for index entries */
-
- ins_node_create_entry_list(node);
-
- /* Allocate from entry_sys_heap buffers for sys fields */
-
- row_ins_alloc_sys_fields(node);
-
- /* As we allocated a new trx id buf, the trx id should be written
- there again: */
-
- node->trx_id = ut_dulint_zero;
-}
-
-/*******************************************************************//**
-Does an insert operation by updating a delete-marked existing record
-in the index. This situation can occur if the delete-marked record is
-kept in the index for consistent reads.
-@return DB_SUCCESS or error code */
-static
-ulint
-row_ins_sec_index_entry_by_modify(
-/*==============================*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether mtr holds just a leaf
- latch or also a tree latch */
- btr_cur_t* cursor, /*!< in: B-tree cursor */
- const dtuple_t* entry, /*!< in: index entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
-{
- big_rec_t* dummy_big_rec;
- mem_heap_t* heap;
- upd_t* update;
- rec_t* rec;
- ulint err;
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad(!dict_index_is_clust(cursor->index));
- ut_ad(rec_get_deleted_flag(rec,
- dict_table_is_comp(cursor->index->table)));
-
- /* We know that in the alphabetical ordering, entry and rec are
- identified. But in their binary form there may be differences if
- there are char fields in them. Therefore we have to calculate the
- difference. */
-
- heap = mem_heap_create(1024);
-
- update = row_upd_build_sec_rec_difference_binary(
- cursor->index, entry, rec, thr_get_trx(thr), heap);
- if (mode == BTR_MODIFY_LEAF) {
- /* Try an optimistic updating of the record, keeping changes
- within the page */
-
- err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
- update, 0, thr, mtr);
- switch (err) {
- case DB_OVERFLOW:
- case DB_UNDERFLOW:
- case DB_ZIP_OVERFLOW:
- err = DB_FAIL;
- }
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- if (buf_LRU_buf_pool_running_out()) {
-
- err = DB_LOCK_TABLE_FULL;
-
- goto func_exit;
- }
-
- err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
- &heap, &dummy_big_rec, update,
- 0, thr, mtr);
- ut_ad(!dummy_big_rec);
- }
-func_exit:
- mem_heap_free(heap);
-
- return(err);
-}
-
-/*******************************************************************//**
-Does an insert operation by delete unmarking and updating a delete marked
-existing record in the index. This situation can occur if the delete marked
-record is kept in the index for consistent reads.
-@return DB_SUCCESS, DB_FAIL, or error code */
-static
-ulint
-row_ins_clust_index_entry_by_modify(
-/*================================*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether mtr holds just a leaf
- latch or also a tree latch */
- btr_cur_t* cursor, /*!< in: B-tree cursor */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
- big_rec_t** big_rec,/*!< out: possible big rec vector of fields
- which have to be stored externally by the
- caller */
- const dtuple_t* entry, /*!< in: index entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
-{
- rec_t* rec;
- upd_t* update;
- ulint err;
-
- ut_ad(dict_index_is_clust(cursor->index));
-
- *big_rec = NULL;
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad(rec_get_deleted_flag(rec,
- dict_table_is_comp(cursor->index->table)));
-
- if (!*heap) {
- *heap = mem_heap_create(1024);
- }
-
- /* Build an update vector containing all the fields to be modified;
- NOTE that this vector may NOT contain system columns trx_id or
- roll_ptr */
-
- update = row_upd_build_difference_binary(cursor->index, entry, rec,
- thr_get_trx(thr), *heap);
- if (mode == BTR_MODIFY_LEAF) {
- /* Try optimistic updating of the record, keeping changes
- within the page */
-
- err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
- mtr);
- switch (err) {
- case DB_OVERFLOW:
- case DB_UNDERFLOW:
- case DB_ZIP_OVERFLOW:
- err = DB_FAIL;
- }
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- if (buf_LRU_buf_pool_running_out()) {
-
- return(DB_LOCK_TABLE_FULL);
-
- }
- err = btr_cur_pessimistic_update(0, cursor,
- heap, big_rec, update,
- 0, thr, mtr);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Returns TRUE if in a cascaded update/delete an ancestor node of node
-updates (not DELETE, but UPDATE) table.
-@return TRUE if an ancestor updates table */
-static
-ibool
-row_ins_cascade_ancestor_updates_table(
-/*===================================*/
- que_node_t* node, /*!< in: node in a query graph */
- dict_table_t* table) /*!< in: table */
-{
- que_node_t* parent;
- upd_node_t* upd_node;
-
- parent = que_node_get_parent(node);
-
- while (que_node_get_type(parent) == QUE_NODE_UPDATE) {
-
- upd_node = parent;
-
- if (upd_node->table == table && upd_node->is_delete == FALSE) {
-
- return(TRUE);
- }
-
- parent = que_node_get_parent(parent);
-
- ut_a(parent);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Returns the number of ancestor UPDATE or DELETE nodes of a
-cascaded update/delete node.
-@return number of ancestors */
-static
-ulint
-row_ins_cascade_n_ancestors(
-/*========================*/
- que_node_t* node) /*!< in: node in a query graph */
-{
- que_node_t* parent;
- ulint n_ancestors = 0;
-
- parent = que_node_get_parent(node);
-
- while (que_node_get_type(parent) == QUE_NODE_UPDATE) {
- n_ancestors++;
-
- parent = que_node_get_parent(parent);
-
- ut_a(parent);
- }
-
- return(n_ancestors);
-}
-
-/******************************************************************//**
-Calculates the update vector node->cascade->update for a child table in
-a cascaded update.
-@return number of fields in the calculated update vector; the value
-can also be 0 if no foreign key fields changed; the returned value is
-ULINT_UNDEFINED if the column type in the child table is too short to
-fit the new value in the parent table: that means the update fails */
-static
-ulint
-row_ins_cascade_calc_update_vec(
-/*============================*/
- upd_node_t* node, /*!< in: update node of the parent
- table */
- dict_foreign_t* foreign, /*!< in: foreign key constraint whose
- type is != 0 */
- mem_heap_t* heap) /*!< in: memory heap to use as
- temporary storage */
-{
- upd_node_t* cascade = node->cascade_node;
- dict_table_t* table = foreign->foreign_table;
- dict_index_t* index = foreign->foreign_index;
- upd_t* update;
- upd_field_t* ufield;
- dict_table_t* parent_table;
- dict_index_t* parent_index;
- upd_t* parent_update;
- upd_field_t* parent_ufield;
- ulint n_fields_updated;
- ulint parent_field_no;
- ulint i;
- ulint j;
-
- ut_a(node);
- ut_a(foreign);
- ut_a(cascade);
- ut_a(table);
- ut_a(index);
-
- /* Calculate the appropriate update vector which will set the fields
- in the child index record to the same value (possibly padded with
- spaces if the column is a fixed length CHAR or FIXBINARY column) as
- the referenced index record will get in the update. */
-
- parent_table = node->table;
- ut_a(parent_table == foreign->referenced_table);
- parent_index = foreign->referenced_index;
- parent_update = node->update;
-
- update = cascade->update;
-
- update->info_bits = 0;
- update->n_fields = foreign->n_fields;
-
- n_fields_updated = 0;
-
- for (i = 0; i < foreign->n_fields; i++) {
-
- parent_field_no = dict_table_get_nth_col_pos(
- parent_table,
- dict_index_get_nth_col_no(parent_index, i));
-
- for (j = 0; j < parent_update->n_fields; j++) {
- parent_ufield = parent_update->fields + j;
-
- if (parent_ufield->field_no == parent_field_no) {
-
- ulint min_size;
- const dict_col_t* col;
- ulint ufield_len;
-
- col = dict_index_get_nth_col(index, i);
-
- /* A field in the parent index record is
- updated. Let us make the update vector
- field for the child table. */
-
- ufield = update->fields + n_fields_updated;
-
- ufield->field_no
- = dict_table_get_nth_col_pos(
- table, dict_col_get_no(col));
- ufield->exp = NULL;
-
- ufield->new_val = parent_ufield->new_val;
- ufield_len = dfield_get_len(&ufield->new_val);
-
- /* Clear the "external storage" flag */
- dfield_set_len(&ufield->new_val, ufield_len);
-
- /* Do not allow a NOT NULL column to be
- updated as NULL */
-
- if (dfield_is_null(&ufield->new_val)
- && (col->prtype & DATA_NOT_NULL)) {
-
- return(ULINT_UNDEFINED);
- }
-
- /* If the new value would not fit in the
- column, do not allow the update */
-
- if (!dfield_is_null(&ufield->new_val)
- && dtype_get_at_most_n_mbchars(
- col->prtype,
- col->mbminlen, col->mbmaxlen,
- col->len,
- ufield_len,
- dfield_get_data(&ufield->new_val))
- < ufield_len) {
-
- return(ULINT_UNDEFINED);
- }
-
- /* If the parent column type has a different
- length than the child column type, we may
- need to pad with spaces the new value of the
- child column */
-
- min_size = dict_col_get_min_size(col);
-
- /* Because UNIV_SQL_NULL (the marker
- of SQL NULL values) exceeds all possible
- values of min_size, the test below will
- not hold for SQL NULL columns. */
-
- if (min_size > ufield_len) {
-
- char* pad_start;
- const char* pad_end;
- char* padded_data
- = mem_heap_alloc(
- heap, min_size);
- pad_start = padded_data + ufield_len;
- pad_end = padded_data + min_size;
-
- memcpy(padded_data,
- dfield_get_data(&ufield
- ->new_val),
- dfield_get_len(&ufield
- ->new_val));
-
- switch (UNIV_EXPECT(col->mbminlen,1)) {
- default:
- ut_error;
- return(ULINT_UNDEFINED);
- case 1:
- if (UNIV_UNLIKELY
- (dtype_get_charset_coll(
- col->prtype)
- == DATA_MYSQL_BINARY_CHARSET_COLL)) {
- /* Do not pad BINARY
- columns. */
- return(ULINT_UNDEFINED);
- }
-
- /* space=0x20 */
- memset(pad_start, 0x20,
- pad_end - pad_start);
- break;
- case 2:
- /* space=0x0020 */
- ut_a(!(ufield_len % 2));
- ut_a(!(min_size % 2));
- do {
- *pad_start++ = 0x00;
- *pad_start++ = 0x20;
- } while (pad_start < pad_end);
- break;
- }
-
- dfield_set_data(&ufield->new_val,
- padded_data, min_size);
- }
-
- n_fields_updated++;
- }
- }
- }
-
- update->n_fields = n_fields_updated;
-
- return(n_fields_updated);
-}
-
-/*********************************************************************//**
-Set detailed error message associated with foreign key errors for
-the given transaction. */
-static
-void
-row_ins_set_detailed(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- dict_foreign_t* foreign) /*!< in: foreign key constraint */
-{
- mutex_enter(&srv_misc_tmpfile_mutex);
- rewind(srv_misc_tmpfile);
-
- if (os_file_set_eof(srv_misc_tmpfile)) {
- ut_print_name(srv_misc_tmpfile, trx, TRUE,
- foreign->foreign_table_name);
- dict_print_info_on_foreign_key_in_create_format(
- srv_misc_tmpfile, trx, foreign, FALSE);
- trx_set_detailed_error_from_file(trx, srv_misc_tmpfile);
- } else {
- trx_set_detailed_error(trx, "temp file operation failed");
- }
-
- mutex_exit(&srv_misc_tmpfile_mutex);
-}
-
-/*********************************************************************//**
-Reports a foreign key error associated with an update or a delete of a
-parent table index entry. */
-static
-void
-row_ins_foreign_report_err(
-/*=======================*/
- const char* errstr, /*!< in: error string from the viewpoint
- of the parent table */
- que_thr_t* thr, /*!< in: query thread whose run_node
- is an update node */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- const rec_t* rec, /*!< in: a matching index record in the
- child table */
- const dtuple_t* entry) /*!< in: index entry in the parent
- table */
-{
- FILE* ef = dict_foreign_err_file;
- trx_t* trx = thr_get_trx(thr);
-
- row_ins_set_detailed(trx, foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Transaction:\n", ef);
- trx_print(ef, trx, 600);
-
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(":\n", ef);
- dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign,
- TRUE);
- putc('\n', ef);
- fputs(errstr, ef);
- fputs(" in parent table, in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
- if (entry) {
- fputs(" tuple:\n", ef);
- dtuple_print(ef, entry);
- }
- fputs("\nBut in child table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(", in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
- if (rec) {
- fputs(", there is a record:\n", ef);
- rec_print(ef, rec, foreign->foreign_index);
- } else {
- fputs(", the record is not available\n", ef);
- }
- putc('\n', ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*********************************************************************//**
-Reports a foreign key error to dict_foreign_err_file when we are trying
-to add an index entry to a child table. Note that the adding may be the result
-of an update, too. */
-static
-void
-row_ins_foreign_report_add_err(
-/*===========================*/
- trx_t* trx, /*!< in: transaction */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- const rec_t* rec, /*!< in: a record in the parent table:
- it does not match entry because we
- have an error! */
- const dtuple_t* entry) /*!< in: index entry to insert in the
- child table */
-{
- FILE* ef = dict_foreign_err_file;
-
- row_ins_set_detailed(trx, foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Transaction:\n", ef);
- trx_print(ef, trx, 600);
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(":\n", ef);
- dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign,
- TRUE);
- fputs("\nTrying to add in child table, in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
- if (entry) {
- fputs(" tuple:\n", ef);
- /* TODO: DB_TRX_ID and DB_ROLL_PTR may be uninitialized.
- It would be better to only display the user columns. */
- dtuple_print(ef, entry);
- }
- fputs("\nBut in parent table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->referenced_table_name);
- fputs(", in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
- fputs(",\nthe closest match we can find is record:\n", ef);
- if (rec && page_rec_is_supremum(rec)) {
- /* If the cursor ended on a supremum record, it is better
- to report the previous record in the error message, so that
- the user gets a more descriptive error message. */
- rec = page_rec_get_prev_const(rec);
- }
-
- if (rec) {
- rec_print(ef, rec, foreign->referenced_index);
- }
- putc('\n', ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*********************************************************************//**
-Invalidate the query cache for the given table. */
-static
-void
-row_ins_invalidate_query_cache(
-/*===========================*/
- que_thr_t* thr, /*!< in: query thread whose run_node
- is an update node */
- const char* name) /*!< in: table name prefixed with
- database name and a '/' character */
-{
- char* buf;
- char* ptr;
- ulint len = strlen(name) + 1;
-
- buf = mem_strdupl(name, len);
-
- ptr = strchr(buf, '/');
- ut_a(ptr);
- *ptr = '\0';
-
- innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
- mem_free(buf);
-}
-
-/*********************************************************************//**
-Perform referential actions or checks when a parent row is deleted or updated
-and the constraint had an ON DELETE or ON UPDATE condition which was not
-RESTRICT.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
-static
-ulint
-row_ins_foreign_check_on_constraint(
-/*================================*/
- que_thr_t* thr, /*!< in: query thread whose run_node
- is an update node */
- dict_foreign_t* foreign, /*!< in: foreign key constraint whose
- type is != 0 */
- btr_pcur_t* pcur, /*!< in: cursor placed on a matching
- index record in the child table */
- dtuple_t* entry, /*!< in: index entry in the parent
- table */
- mtr_t* mtr) /*!< in: mtr holding the latch of pcur
- page */
-{
- upd_node_t* node;
- upd_node_t* cascade;
- dict_table_t* table = foreign->foreign_table;
- dict_index_t* index;
- dict_index_t* clust_index;
- dtuple_t* ref;
- mem_heap_t* upd_vec_heap = NULL;
- const rec_t* rec;
- const rec_t* clust_rec;
- const buf_block_t* clust_block;
- upd_t* update;
- ulint n_to_update;
- ulint err;
- ulint i;
- trx_t* trx;
- mem_heap_t* tmp_heap = NULL;
-
- ut_a(thr);
- ut_a(foreign);
- ut_a(pcur);
- ut_a(mtr);
-
- trx = thr_get_trx(thr);
-
- /* Since we are going to delete or update a row, we have to invalidate
- the MySQL query cache for table. A deadlock of threads is not possible
- here because the caller of this function does not hold any latches with
- the sync0sync.h rank above the kernel mutex. The query cache mutex has
- a rank just above the kernel mutex. */
-
- row_ins_invalidate_query_cache(thr, table->name);
-
- node = thr->run_node;
-
- if (node->is_delete && 0 == (foreign->type
- & (DICT_FOREIGN_ON_DELETE_CASCADE
- | DICT_FOREIGN_ON_DELETE_SET_NULL))) {
-
- row_ins_foreign_report_err("Trying to delete",
- thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- return(DB_ROW_IS_REFERENCED);
- }
-
- if (!node->is_delete && 0 == (foreign->type
- & (DICT_FOREIGN_ON_UPDATE_CASCADE
- | DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
-
- /* This is an UPDATE */
-
- row_ins_foreign_report_err("Trying to update",
- thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- return(DB_ROW_IS_REFERENCED);
- }
-
- if (node->cascade_node == NULL) {
- /* Extend our query graph by creating a child to current
- update node. The child is used in the cascade or set null
- operation. */
-
- node->cascade_heap = mem_heap_create(128);
- node->cascade_node = row_create_update_node_for_mysql(
- table, node->cascade_heap);
- que_node_set_parent(node->cascade_node, node);
- }
-
- /* Initialize cascade_node to do the operation we want. Note that we
- use the SAME cascade node to do all foreign key operations of the
- SQL DELETE: the table of the cascade node may change if there are
- several child tables to the table where the delete is done! */
-
- cascade = node->cascade_node;
-
- cascade->table = table;
-
- cascade->foreign = foreign;
-
- if (node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) {
- cascade->is_delete = TRUE;
- } else {
- cascade->is_delete = FALSE;
-
- if (foreign->n_fields > cascade->update_n_fields) {
- /* We have to make the update vector longer */
-
- cascade->update = upd_create(foreign->n_fields,
- node->cascade_heap);
- cascade->update_n_fields = foreign->n_fields;
- }
- }
-
- /* We do not allow cyclic cascaded updating (DELETE is allowed,
- but not UPDATE) of the same table, as this can lead to an infinite
- cycle. Check that we are not updating the same table which is
- already being modified in this cascade chain. We have to check
- this also because the modification of the indexes of a 'parent'
- table may still be incomplete, and we must avoid seeing the indexes
- of the parent table in an inconsistent state! */
-
- if (!cascade->is_delete
- && row_ins_cascade_ancestor_updates_table(cascade, table)) {
-
- /* We do not know if this would break foreign key
- constraints, but play safe and return an error */
-
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying an update, possibly causing a cyclic"
- " cascaded update\n"
- "in the child table,", thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- if (row_ins_cascade_n_ancestors(cascade) >= 15) {
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying a too deep cascaded delete or update\n",
- thr, foreign, btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- ut_a(index == foreign->foreign_index);
-
- rec = btr_pcur_get_rec(pcur);
-
- if (dict_index_is_clust(index)) {
- /* pcur is already positioned in the clustered index of
- the child table */
-
- clust_index = index;
- clust_rec = rec;
- clust_block = btr_pcur_get_block(pcur);
- } else {
- /* We have to look for the record in the clustered index
- in the child table */
-
- clust_index = dict_table_get_first_index(table);
-
- tmp_heap = mem_heap_create(256);
-
- ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec,
- tmp_heap);
- btr_pcur_open_with_no_init(clust_index, ref,
- PAGE_CUR_LE, BTR_SEARCH_LEAF,
- cascade->pcur, 0, mtr);
-
- clust_rec = btr_pcur_get_rec(cascade->pcur);
- clust_block = btr_pcur_get_block(cascade->pcur);
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(cascade->pcur)
- < dict_index_get_n_unique(clust_index)) {
-
- fputs("InnoDB: error in cascade of a foreign key op\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
-
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, rec, index);
- fputs("\n"
- "InnoDB: clustered record ", stderr);
- rec_print(stderr, clust_rec, clust_index);
- fputs("\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com\n", stderr);
-
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
- }
-
- /* Set an X-lock on the row to delete or update in the child table */
-
- err = lock_table(0, table, LOCK_IX, thr);
-
- if (err == DB_SUCCESS) {
- /* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
- we already have a normal shared lock on the appropriate
- gap if the search criterion was not unique */
-
- err = lock_clust_rec_read_check_and_lock_alt(
- 0, clust_block, clust_rec, clust_index,
- LOCK_X, LOCK_REC_NOT_GAP, thr);
- }
-
- if (err != DB_SUCCESS) {
-
- goto nonstandard_exit_func;
- }
-
- if (rec_get_deleted_flag(clust_rec, dict_table_is_comp(table))) {
- /* This can happen if there is a circular reference of
- rows such that cascading delete comes to delete a row
- already in the process of being delete marked */
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
-
- if ((node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL))
- || (!node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
-
- /* Build the appropriate update vector which sets
- foreign->n_fields first fields in rec to SQL NULL */
-
- update = cascade->update;
-
- update->info_bits = 0;
- update->n_fields = foreign->n_fields;
-
- for (i = 0; i < foreign->n_fields; i++) {
- upd_field_t* ufield = &update->fields[i];
-
- ufield->field_no = dict_table_get_nth_col_pos(
- table,
- dict_index_get_nth_col_no(index, i));
- ufield->orig_len = 0;
- ufield->exp = NULL;
- dfield_set_null(&ufield->new_val);
- }
- }
-
- if (!node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) {
-
- /* Build the appropriate update vector which sets changing
- foreign->n_fields first fields in rec to new values */
-
- upd_vec_heap = mem_heap_create(256);
-
- n_to_update = row_ins_cascade_calc_update_vec(node, foreign,
- upd_vec_heap);
- if (n_to_update == ULINT_UNDEFINED) {
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying a cascaded update where the"
- " updated value in the child\n"
- "table would not fit in the length"
- " of the column, or the value would\n"
- "be NULL and the column is"
- " declared as not NULL in the child table,",
- thr, foreign, btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- if (cascade->update->n_fields == 0) {
-
- /* The update does not change any columns referred
- to in this foreign key constraint: no need to do
- anything */
-
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
- }
-
- /* Store pcur position and initialize or store the cascade node
- pcur stored position */
-
- btr_pcur_store_position(pcur, mtr);
-
- if (index == clust_index) {
- btr_pcur_copy_stored_position(cascade->pcur, pcur);
- } else {
- btr_pcur_store_position(cascade->pcur, mtr);
- }
-
- mtr_commit(mtr);
-
- ut_a(cascade->pcur->rel_pos == BTR_PCUR_ON);
-
- cascade->state = UPD_NODE_UPDATE_CLUSTERED;
-
- err = row_update_cascade_for_mysql(thr, cascade,
- foreign->foreign_table);
-
- if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
- fprintf(stderr,
- "InnoDB: error: table %s has the counter 0"
- " though there is\n"
- "InnoDB: a FOREIGN KEY check running on it.\n",
- foreign->foreign_table->name);
- }
-
- /* Release the data dictionary latch for a while, so that we do not
- starve other threads from doing CREATE TABLE etc. if we have a huge
- cascaded operation running. The counter n_foreign_key_checks_running
- will prevent other users from dropping or ALTERing the table when we
- release the latch. */
-
- row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
- row_mysql_freeze_data_dictionary(thr_get_trx(thr));
-
- mtr_start(mtr);
-
- /* Restore pcur position */
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
-
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- if (upd_vec_heap) {
- mem_heap_free(upd_vec_heap);
- }
-
- return(err);
-
-nonstandard_exit_func:
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- if (upd_vec_heap) {
- mem_heap_free(upd_vec_heap);
- }
-
- btr_pcur_store_position(pcur, mtr);
-
- mtr_commit(mtr);
- mtr_start(mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
-
- return(err);
-}
-
-/*********************************************************************//**
-Sets a shared lock on a record. Used in locking possible duplicate key
-records and also in checking foreign key constraints.
-@return DB_SUCCESS or error code */
-static
-ulint
-row_ins_set_shared_rec_lock(
-/*========================*/
- ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP type lock */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (dict_index_is_clust(index)) {
- err = lock_clust_rec_read_check_and_lock(
- 0, block, rec, index, offsets, LOCK_S, type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, block, rec, index, offsets, LOCK_S, type, thr);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Sets a exclusive lock on a record. Used in locking possible duplicate key
-records
-@return DB_SUCCESS or error code */
-static
-ulint
-row_ins_set_exclusive_rec_lock(
-/*===========================*/
- ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP type lock */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (dict_index_is_clust(index)) {
- err = lock_clust_rec_read_check_and_lock(
- 0, block, rec, index, offsets, LOCK_X, type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, block, rec, index, offsets, LOCK_X, type, thr);
- }
-
- return(err);
-}
-
-/***************************************************************//**
-Checks if foreign key constraint fails for an index entry. Sets shared locks
-which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_operation_lock.
-@return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
-UNIV_INTERN
-ulint
-row_ins_check_foreign_constraint(
-/*=============================*/
- ibool check_ref,/*!< in: TRUE if we want to check that
- the referenced table is ok, FALSE if we
- want to check the foreign key table */
- dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the
- tables mentioned in it must be in the
- dictionary cache if they exist at all */
- dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign
- table, else the referenced table */
- dtuple_t* entry, /*!< in: index entry for index */
- que_thr_t* thr) /*!< in: query thread */
-{
- upd_node_t* upd_node;
- dict_table_t* check_table;
- dict_index_t* check_index;
- ulint n_fields_cmp;
- btr_pcur_t pcur;
- ibool moved;
- int cmp;
- ulint err;
- ulint i;
- mtr_t mtr;
- trx_t* trx = thr_get_trx(thr);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
-run_again:
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- err = DB_SUCCESS;
-
- if (trx->check_foreigns == FALSE) {
- /* The user has suppressed foreign key checks currently for
- this session */
- goto exit_func;
- }
-
- /* If any of the foreign key fields in entry is SQL NULL, we
- suppress the foreign key check: this is compatible with Oracle,
- for example */
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
- goto exit_func;
- }
- }
-
- if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) {
- upd_node = thr->run_node;
-
- if (!(upd_node->is_delete) && upd_node->foreign == foreign) {
- /* If a cascaded update is done as defined by a
- foreign key constraint, do not check that
- constraint for the child row. In ON UPDATE CASCADE
- the update of the parent row is only half done when
- we come here: if we would check the constraint here
- for the child row it would fail.
-
- A QUESTION remains: if in the child table there are
- several constraints which refer to the same parent
- table, we should merge all updates to the child as
- one update? And the updates can be contradictory!
- Currently we just perform the update associated
- with each foreign key constraint, one after
- another, and the user has problems predicting in
- which order they are performed. */
-
- goto exit_func;
- }
- }
-
- if (check_ref) {
- check_table = foreign->referenced_table;
- check_index = foreign->referenced_index;
- } else {
- check_table = foreign->foreign_table;
- check_index = foreign->foreign_index;
- }
-
- if (check_table == NULL || check_table->ibd_file_missing) {
- if (check_ref) {
- FILE* ef = dict_foreign_err_file;
-
- row_ins_set_detailed(trx, foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Transaction:\n", ef);
- trx_print(ef, trx, 600);
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->foreign_table_name);
- fputs(":\n", ef);
- dict_print_info_on_foreign_key_in_create_format(
- ef, trx, foreign, TRUE);
- fputs("\nTrying to add to index ", ef);
- ut_print_name(ef, trx, FALSE,
- foreign->foreign_index->name);
- fputs(" tuple:\n", ef);
- dtuple_print(ef, entry);
- fputs("\nBut the parent table ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->referenced_table_name);
- fputs("\nor its .ibd file does"
- " not currently exist!\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- err = DB_NO_REFERENCED_ROW;
- }
-
- goto exit_func;
- }
-
- ut_a(check_table);
- ut_a(check_index);
-
- if (check_table != table) {
- /* We already have a LOCK_IX on table, but not necessarily
- on check_table */
-
- err = lock_table(0, check_table, LOCK_IS, thr);
-
- if (err != DB_SUCCESS) {
-
- goto do_possible_lock_wait;
- }
- }
-
- mtr_start(&mtr);
-
- /* Store old value on n_fields_cmp */
-
- n_fields_cmp = dtuple_get_n_fields_cmp(entry);
-
- dtuple_set_n_fields_cmp(entry, foreign->n_fields);
-
- btr_pcur_open(check_index, entry, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-
- /* Scan index records and check if there is a matching record */
-
- for (;;) {
- const rec_t* rec = btr_pcur_get_rec(&pcur);
- const buf_block_t* block = btr_pcur_get_block(&pcur);
-
- if (page_rec_is_infimum(rec)) {
-
- goto next_rec;
- }
-
- offsets = rec_get_offsets(rec, check_index,
- offsets, ULINT_UNDEFINED, &heap);
-
- if (page_rec_is_supremum(rec)) {
-
- err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block,
- rec, check_index,
- offsets, thr);
- if (err != DB_SUCCESS) {
-
- break;
- }
-
- goto next_rec;
- }
-
- cmp = cmp_dtuple_rec(entry, rec, offsets);
-
- if (cmp == 0) {
- if (rec_get_deleted_flag(rec,
- rec_offs_comp(offsets))) {
- err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, block,
- rec, check_index, offsets, thr);
- if (err != DB_SUCCESS) {
-
- break;
- }
- } else {
- /* Found a matching record. Lock only
- a record because we can allow inserts
- into gaps */
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP, block,
- rec, check_index, offsets, thr);
-
- if (err != DB_SUCCESS) {
-
- break;
- }
-
- if (check_ref) {
- err = DB_SUCCESS;
-
- break;
- } else if (foreign->type != 0) {
- /* There is an ON UPDATE or ON DELETE
- condition: check them in a separate
- function */
-
- err = row_ins_foreign_check_on_constraint(
- thr, foreign, &pcur, entry,
- &mtr);
- if (err != DB_SUCCESS) {
- /* Since reporting a plain
- "duplicate key" error
- message to the user in
- cases where a long CASCADE
- operation would lead to a
- duplicate key in some
- other table is very
- confusing, map duplicate
- key errors resulting from
- FK constraints to a
- separate error code. */
-
- if (err == DB_DUPLICATE_KEY) {
- err = DB_FOREIGN_DUPLICATE_KEY;
- }
-
- break;
- }
-
- /* row_ins_foreign_check_on_constraint
- may have repositioned pcur on a
- different block */
- block = btr_pcur_get_block(&pcur);
- } else {
- row_ins_foreign_report_err(
- "Trying to delete or update",
- thr, foreign, rec, entry);
-
- err = DB_ROW_IS_REFERENCED;
- break;
- }
- }
- }
-
- if (cmp < 0) {
- err = row_ins_set_shared_rec_lock(
- LOCK_GAP, block,
- rec, check_index, offsets, thr);
- if (err != DB_SUCCESS) {
-
- break;
- }
-
- if (check_ref) {
- err = DB_NO_REFERENCED_ROW;
- row_ins_foreign_report_add_err(
- trx, foreign, rec, entry);
- } else {
- err = DB_SUCCESS;
- }
-
- break;
- }
-
- ut_a(cmp == 0);
-next_rec:
- moved = btr_pcur_move_to_next(&pcur, &mtr);
-
- if (!moved) {
- if (check_ref) {
- rec = btr_pcur_get_rec(&pcur);
- row_ins_foreign_report_add_err(
- trx, foreign, rec, entry);
- err = DB_NO_REFERENCED_ROW;
- } else {
- err = DB_SUCCESS;
- }
-
- break;
- }
- }
-
- btr_pcur_close(&pcur);
-
- mtr_commit(&mtr);
-
- /* Restore old value */
- dtuple_set_n_fields_cmp(entry, n_fields_cmp);
-
-do_possible_lock_wait:
- if (err == DB_LOCK_WAIT) {
- trx->error_state = err;
-
- que_thr_stop_for_mysql(thr);
-
- srv_suspend_mysql_thread(thr);
-
- if (trx->error_state == DB_SUCCESS) {
-
- goto run_again;
- }
-
- err = trx->error_state;
- }
-
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/***************************************************************//**
-Checks if foreign key constraints fail for an index entry. If index
-is not mentioned in any constraint, this function does nothing,
-Otherwise does searches to the indexes of referenced tables and
-sets shared locks which lock either the success or the failure of
-a constraint.
-@return DB_SUCCESS or error code */
-static
-ulint
-row_ins_check_foreign_constraints(
-/*==============================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry for index */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_foreign_t* foreign;
- ulint err;
- trx_t* trx;
- ibool got_s_lock = FALSE;
-
- trx = thr_get_trx(thr);
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign) {
- if (foreign->foreign_index == index) {
-
- if (foreign->referenced_table == NULL) {
- dict_table_get(foreign->referenced_table_name,
- FALSE);
- }
-
- if (0 == trx->dict_operation_lock_mode) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- if (foreign->referenced_table) {
- mutex_enter(&(dict_sys->mutex));
-
- (foreign->referenced_table
- ->n_foreign_key_checks_running)++;
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_operation_lock temporarily!
- But the counter on the table protects the referenced
- table from being dropped while the check is running. */
-
- err = row_ins_check_foreign_constraint(
- TRUE, foreign, table, entry, thr);
-
- if (foreign->referenced_table) {
- mutex_enter(&(dict_sys->mutex));
-
- ut_a(foreign->referenced_table
- ->n_foreign_key_checks_running > 0);
- (foreign->referenced_table
- ->n_foreign_key_checks_running)--;
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************//**
-Checks if a unique key violation to rec would occur at the index entry
-insert.
-@return TRUE if error */
-static
-ibool
-row_ins_dupl_error_with_rec(
-/*========================*/
- const rec_t* rec, /*!< in: user record; NOTE that we assume
- that the caller already has a record lock on
- the record! */
- const dtuple_t* entry, /*!< in: entry to insert */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ulint matched_fields;
- ulint matched_bytes;
- ulint n_unique;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- n_unique = dict_index_get_n_unique(index);
-
- matched_fields = 0;
- matched_bytes = 0;
-
- cmp_dtuple_rec_with_match(entry, rec, offsets,
- &matched_fields, &matched_bytes);
-
- if (matched_fields < n_unique) {
-
- return(FALSE);
- }
-
- /* In a unique secondary index we allow equal key values if they
- contain SQL NULLs */
-
- if (!dict_index_is_clust(index)) {
-
- for (i = 0; i < n_unique; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
- return(FALSE);
- }
- }
- }
-
- return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
-}
-
-/***************************************************************//**
-Scans a unique non-clustered index at a given index entry to determine
-whether a uniqueness violation has occurred for the key value of the entry.
-Set shared locks on possible duplicate records.
-@return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
-static
-ulint
-row_ins_scan_sec_index_for_duplicate(
-/*=================================*/
- dict_index_t* index, /*!< in: non-clustered unique index */
- dtuple_t* entry, /*!< in: index entry */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint n_unique;
- ulint i;
- int cmp;
- ulint n_fields_cmp;
- btr_pcur_t pcur;
- ulint err = DB_SUCCESS;
- unsigned allow_duplicates;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- n_unique = dict_index_get_n_unique(index);
-
- /* If the secondary index is unique, but one of the fields in the
- n_unique first fields is NULL, a unique key violation cannot occur,
- since we define NULL != NULL in this case */
-
- for (i = 0; i < n_unique; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
- return(DB_SUCCESS);
- }
- }
-
- mtr_start(&mtr);
-
- /* Store old value on n_fields_cmp */
-
- n_fields_cmp = dtuple_get_n_fields_cmp(entry);
-
- dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index));
-
- btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
-
- allow_duplicates = thr_get_trx(thr)->duplicates & TRX_DUP_IGNORE;
-
- /* Scan index records and check if there is a duplicate */
-
- do {
- const rec_t* rec = btr_pcur_get_rec(&pcur);
- const buf_block_t* block = btr_pcur_get_block(&pcur);
-
- if (page_rec_is_infimum(rec)) {
-
- continue;
- }
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (allow_duplicates) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- LOCK_ORDINARY, block,
- rec, index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, block,
- rec, index, offsets, thr);
- }
-
- if (err != DB_SUCCESS) {
-
- break;
- }
-
- if (page_rec_is_supremum(rec)) {
-
- continue;
- }
-
- cmp = cmp_dtuple_rec(entry, rec, offsets);
-
- if (cmp == 0) {
- if (row_ins_dupl_error_with_rec(rec, entry,
- index, offsets)) {
- err = DB_DUPLICATE_KEY;
-
- thr_get_trx(thr)->error_info = index;
-
- break;
- }
- }
-
- if (cmp < 0) {
- break;
- }
-
- ut_a(cmp == 0);
- } while (btr_pcur_move_to_next(&pcur, &mtr));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- mtr_commit(&mtr);
-
- /* Restore old value */
- dtuple_set_n_fields_cmp(entry, n_fields_cmp);
-
- return(err);
-}
-
-/***************************************************************//**
-Checks if a unique key violation error would occur at an index entry
-insert. Sets shared locks on possible duplicate records. Works only
-for a clustered index!
-@return DB_SUCCESS if no error, DB_DUPLICATE_KEY if error,
-DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate
-record */
-static
-ulint
-row_ins_duplicate_error_in_clust(
-/*=============================*/
- btr_cur_t* cursor, /*!< in: B-tree cursor */
- dtuple_t* entry, /*!< in: entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint err;
- rec_t* rec;
- ulint n_unique;
- trx_t* trx = thr_get_trx(thr);
- mem_heap_t*heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- UT_NOT_USED(mtr);
-
- ut_a(dict_index_is_clust(cursor->index));
- ut_ad(dict_index_is_unique(cursor->index));
-
- /* NOTE: For unique non-clustered indexes there may be any number
- of delete marked records with the same value for the non-clustered
- index key (remember multiversioning), and which differ only in
- the row refererence part of the index record, containing the
- clustered index key fields. For such a secondary index record,
- to avoid race condition, we must FIRST do the insertion and after
- that check that the uniqueness condition is not breached! */
-
- /* NOTE: A problem is that in the B-tree node pointers on an
- upper level may match more to the entry than the actual existing
- user records on the leaf level. So, even if low_match would suggest
- that a duplicate key violation may occur, this may not be the case. */
-
- n_unique = dict_index_get_n_unique(cursor->index);
-
- if (cursor->low_match >= n_unique) {
-
- rec = btr_cur_get_rec(cursor);
-
- if (!page_rec_is_infimum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- /* We set a lock on the possible duplicate: this
- is needed in logical logging of MySQL to make
- sure that in roll-forward we get the same duplicate
- errors as in original execution */
-
- if (trx->duplicates & TRX_DUP_IGNORE) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- LOCK_REC_NOT_GAP,
- btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP,
- btr_cur_get_block(cursor), rec,
- cursor->index, offsets, thr);
- }
-
- if (err != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (row_ins_dupl_error_with_rec(
- rec, entry, cursor->index, offsets)) {
- trx->error_info = cursor->index;
- err = DB_DUPLICATE_KEY;
- goto func_exit;
- }
- }
- }
-
- if (cursor->up_match >= n_unique) {
-
- rec = page_rec_get_next(btr_cur_get_rec(cursor));
-
- if (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (trx->duplicates & TRX_DUP_IGNORE) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- LOCK_REC_NOT_GAP,
- btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP,
- btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
- }
-
- if (err != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (row_ins_dupl_error_with_rec(
- rec, entry, cursor->index, offsets)) {
- trx->error_info = cursor->index;
- err = DB_DUPLICATE_KEY;
- goto func_exit;
- }
- }
-
- ut_a(!dict_index_is_clust(cursor->index));
- /* This should never happen */
- }
-
- err = DB_SUCCESS;
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/***************************************************************//**
-Checks if an index entry has long enough common prefix with an existing
-record so that the intended insert of the entry must be changed to a modify of
-the existing record. In the case of a clustered index, the prefix must be
-n_unique fields long, and in the case of a secondary index, all fields must be
-equal.
-@return 0 if no update, ROW_INS_PREV if previous should be updated;
-currently we do the search so that only the low_match record can match
-enough to the search tuple, not the next record */
-UNIV_INLINE
-ulint
-row_ins_must_modify(
-/*================*/
- btr_cur_t* cursor) /*!< in: B-tree cursor */
-{
- ulint enough_match;
- rec_t* rec;
-
- /* NOTE: (compare to the note in row_ins_duplicate_error) Because node
- pointers on upper levels of the B-tree may match more to entry than
- to actual user records on the leaf level, we have to check if the
- candidate record is actually a user record. In a clustered index
- node pointers contain index->n_unique first fields, and in the case
- of a secondary index, all fields of the index. */
-
- enough_match = dict_index_get_n_unique_in_tree(cursor->index);
-
- if (cursor->low_match >= enough_match) {
-
- rec = btr_cur_get_rec(cursor);
-
- if (!page_rec_is_infimum(rec)) {
-
- return(ROW_INS_PREV);
- }
- }
-
- return(0);
-}
-
-/***************************************************************//**
-Tries to insert an index entry to an index. If the index is clustered
-and a record with the same unique key is found, the other record is
-necessarily marked deleted by a committed transaction, or a unique key
-violation error occurs. The delete marked record is then updated to an
-existing record, and we must write an undo log record on the delete
-marked record. If the index is secondary, and a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL if pessimistic retry needed,
-or error code */
-static
-ulint
-row_ins_index_entry_low(
-/*====================*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr) /*!< in: query thread */
-{
- btr_cur_t cursor;
- ulint ignore_sec_unique = 0;
- ulint modify = 0; /* remove warning */
- rec_t* insert_rec;
- rec_t* rec;
- ulint err;
- ulint n_unique;
- big_rec_t* big_rec = NULL;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
-
- log_free_check();
-
- mtr_start(&mtr);
-
- cursor.thr = thr;
-
- /* Note that we use PAGE_CUR_LE as the search mode, because then
- the function will return in both low_match and up_match of the
- cursor sensible values */
-
- if (!(thr_get_trx(thr)->check_unique_secondary)) {
- ignore_sec_unique = BTR_IGNORE_SEC_UNIQUE;
- }
-
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- mode | BTR_INSERT | ignore_sec_unique,
- &cursor, 0, &mtr);
-
- if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
- /* The insertion was made to the insert buffer already during
- the search: we are done */
-
- err = DB_SUCCESS;
-
- goto function_exit;
- }
-
-#ifdef UNIV_DEBUG
- {
- page_t* page = btr_cur_get_page(&cursor);
- rec_t* first_rec = page_rec_get_next(
- page_get_infimum_rec(page));
-
- ut_ad(page_rec_is_supremum(first_rec)
- || rec_get_n_fields(first_rec, index)
- == dtuple_get_n_fields(entry));
- }
-#endif
-
- n_unique = dict_index_get_n_unique(index);
-
- if (dict_index_is_unique(index) && (cursor.up_match >= n_unique
- || cursor.low_match >= n_unique)) {
-
- if (dict_index_is_clust(index)) {
- /* Note that the following may return also
- DB_LOCK_WAIT */
-
- err = row_ins_duplicate_error_in_clust(
- &cursor, entry, thr, &mtr);
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
- } else {
- mtr_commit(&mtr);
- err = row_ins_scan_sec_index_for_duplicate(
- index, entry, thr);
- mtr_start(&mtr);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- /* We did not find a duplicate and we have now
- locked with s-locks the necessary records to
- prevent any insertion of a duplicate by another
- transaction. Let us now reposition the cursor and
- continue the insertion. */
-
- btr_cur_search_to_nth_level(index, 0, entry,
- PAGE_CUR_LE,
- mode | BTR_INSERT,
- &cursor, 0, &mtr);
- }
- }
-
- modify = row_ins_must_modify(&cursor);
-
- if (modify != 0) {
- /* There is already an index entry with a long enough common
- prefix, we must convert the insert into a modify of an
- existing record */
-
- if (modify == ROW_INS_NEXT) {
- rec = page_rec_get_next(btr_cur_get_rec(&cursor));
-
- btr_cur_position(index, rec,
- btr_cur_get_block(&cursor),&cursor);
- }
-
- if (dict_index_is_clust(index)) {
- err = row_ins_clust_index_entry_by_modify(
- mode, &cursor, &heap, &big_rec, entry,
- thr, &mtr);
- } else {
- ut_ad(!n_ext);
- err = row_ins_sec_index_entry_by_modify(
- mode, &cursor, entry, thr, &mtr);
- }
- } else {
- if (mode == BTR_MODIFY_LEAF) {
- err = btr_cur_optimistic_insert(
- 0, &cursor, entry, &insert_rec, &big_rec,
- n_ext, thr, &mtr);
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- if (buf_LRU_buf_pool_running_out()) {
-
- err = DB_LOCK_TABLE_FULL;
-
- goto function_exit;
- }
- err = btr_cur_pessimistic_insert(
- 0, &cursor, entry, &insert_rec, &big_rec,
- n_ext, thr, &mtr);
- }
- }
-
-function_exit:
- mtr_commit(&mtr);
-
- if (UNIV_LIKELY_NULL(big_rec)) {
- rec_t* rec;
- ulint* offsets;
- mtr_start(&mtr);
-
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, 0, &mtr);
- rec = btr_cur_get_rec(&cursor);
- offsets = rec_get_offsets(rec, index, NULL,
- ULINT_UNDEFINED, &heap);
-
- err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr);
-
- if (modify) {
- dtuple_big_rec_free(big_rec);
- } else {
- dtuple_convert_back_big_rec(index, entry, big_rec);
- }
-
- mtr_commit(&mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/***************************************************************//**
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
-ulint
-row_ins_index_entry(
-/*================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- ibool foreign,/*!< in: TRUE=check foreign key constraints */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
-
- if (foreign && UT_LIST_GET_FIRST(index->table->foreign_list)) {
- err = row_ins_check_foreign_constraints(index->table, index,
- entry, thr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- /* Try first optimistic descent to the B-tree */
-
- err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
- n_ext, thr);
- if (err != DB_FAIL) {
-
- return(err);
- }
-
- /* Try then pessimistic descent to the B-tree */
-
- err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
- n_ext, thr);
- return(err);
-}
-
-/***********************************************************//**
-Sets the values of the dtuple fields in entry from the values of appropriate
-columns in row. */
-static
-void
-row_ins_index_entry_set_vals(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry to make */
- const dtuple_t* row) /*!< in: row */
-{
- ulint n_fields;
- ulint i;
-
- ut_ad(entry && row);
-
- n_fields = dtuple_get_n_fields(entry);
-
- for (i = 0; i < n_fields; i++) {
- dict_field_t* ind_field;
- dfield_t* field;
- const dfield_t* row_field;
- ulint len;
-
- field = dtuple_get_nth_field(entry, i);
- ind_field = dict_index_get_nth_field(index, i);
- row_field = dtuple_get_nth_field(row, ind_field->col->ind);
- len = dfield_get_len(row_field);
-
- /* Check column prefix indexes */
- if (ind_field->prefix_len > 0
- && dfield_get_len(row_field) != UNIV_SQL_NULL) {
-
- const dict_col_t* col
- = dict_field_get_col(ind_field);
-
- len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- ind_field->prefix_len,
- len, dfield_get_data(row_field));
-
- ut_ad(!dfield_is_ext(row_field));
- }
-
- dfield_set_data(field, dfield_get_data(row_field), len);
- if (dfield_is_ext(row_field)) {
- ut_ad(dict_index_is_clust(index));
- dfield_set_ext(field);
- }
- }
-}
-
-/***********************************************************//**
-Inserts a single index entry to the table.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static
-ulint
-row_ins_index_entry_step(
-/*=====================*/
- ins_node_t* node, /*!< in: row insert node */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
-
- ut_ad(dtuple_check_typed(node->row));
-
- row_ins_index_entry_set_vals(node->index, node->entry, node->row);
-
- ut_ad(dtuple_check_typed(node->entry));
-
- err = row_ins_index_entry(node->index, node->entry, 0, TRUE, thr);
-
- return(err);
-}
-
-/***********************************************************//**
-Allocates a row id for row and inits the node->index field. */
-UNIV_INLINE
-void
-row_ins_alloc_row_id_step(
-/*======================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- dulint row_id;
-
- ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
-
- if (dict_index_is_unique(dict_table_get_first_index(node->table))) {
-
- /* No row id is stored if the clustered index is unique */
-
- return;
- }
-
- /* Fill in row id value to row */
-
- row_id = dict_sys_get_new_row_id();
-
- dict_sys_write_row_id(node->row_id_buf, row_id);
-}
-
-/***********************************************************//**
-Gets a row to insert from the values list. */
-UNIV_INLINE
-void
-row_ins_get_row_from_values(
-/*========================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- que_node_t* list_node;
- dfield_t* dfield;
- dtuple_t* row;
- ulint i;
-
- /* The field values are copied in the buffers of the select node and
- it is safe to use them until we fetch from select again: therefore
- we can just copy the pointers */
-
- row = node->row;
-
- i = 0;
- list_node = node->values_list;
-
- while (list_node) {
- eval_exp(list_node);
-
- dfield = dtuple_get_nth_field(row, i);
- dfield_copy_data(dfield, que_node_get_val(list_node));
-
- i++;
- list_node = que_node_get_next(list_node);
- }
-}
-
-/***********************************************************//**
-Gets a row to insert from the select list. */
-UNIV_INLINE
-void
-row_ins_get_row_from_select(
-/*========================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- que_node_t* list_node;
- dfield_t* dfield;
- dtuple_t* row;
- ulint i;
-
- /* The field values are copied in the buffers of the select node and
- it is safe to use them until we fetch from select again: therefore
- we can just copy the pointers */
-
- row = node->row;
-
- i = 0;
- list_node = node->select->select_list;
-
- while (list_node) {
- dfield = dtuple_get_nth_field(row, i);
- dfield_copy_data(dfield, que_node_get_val(list_node));
-
- i++;
- list_node = que_node_get_next(list_node);
- }
-}
-
-/***********************************************************//**
-Inserts a row to a table.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static
-ulint
-row_ins(
-/*====*/
- ins_node_t* node, /*!< in: row insert node */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
-
- ut_ad(node && thr);
-
- if (node->state == INS_NODE_ALLOC_ROW_ID) {
-
- row_ins_alloc_row_id_step(node);
-
- node->index = dict_table_get_first_index(node->table);
- node->entry = UT_LIST_GET_FIRST(node->entry_list);
-
- if (node->ins_type == INS_SEARCHED) {
-
- row_ins_get_row_from_select(node);
-
- } else if (node->ins_type == INS_VALUES) {
-
- row_ins_get_row_from_values(node);
- }
-
- node->state = INS_NODE_INSERT_ENTRIES;
- }
-
- ut_ad(node->state == INS_NODE_INSERT_ENTRIES);
-
- while (node->index != NULL) {
- err = row_ins_index_entry_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- node->index = dict_table_get_next_index(node->index);
- node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
- }
-
- ut_ad(node->entry == NULL);
-
- node->state = INS_NODE_ALLOC_ROW_ID;
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************//**
-Inserts a row to a table. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_ins_step(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ins_node_t* node;
- que_node_t* parent;
- sel_node_t* sel_node;
- trx_t* trx;
- ulint err;
-
- ut_ad(thr);
-
- trx = thr_get_trx(thr);
-
- trx_start_if_not_started(trx);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
-
- parent = que_node_get_parent(node);
- sel_node = node->select;
-
- if (thr->prev_node == parent) {
- node->state = INS_NODE_SET_IX_LOCK;
- }
-
- /* If this is the first time this node is executed (or when
- execution resumes after wait for the table IX lock), set an
- IX lock on the table and reset the possible select node. MySQL's
- partitioned table code may also call an insert within the same
- SQL statement AFTER it has used this table handle to do a search.
- This happens, for example, when a row update moves it to another
- partition. In that case, we have already set the IX lock on the
- table during the search operation, and there is no need to set
- it again here. But we must write trx->id to node->trx_id_buf. */
-
- trx_write_trx_id(node->trx_id_buf, trx->id);
-
- if (node->state == INS_NODE_SET_IX_LOCK) {
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- if (UT_DULINT_EQ(trx->id, node->trx_id)) {
- /* No need to do IX-locking */
-
- goto same_trx;
- }
-
- err = lock_table(0, node->table, LOCK_IX, thr);
-
- if (err != DB_SUCCESS) {
-
- goto error_handling;
- }
-
- node->trx_id = trx->id;
-same_trx:
- node->state = INS_NODE_ALLOC_ROW_ID;
-
- if (node->ins_type == INS_SEARCHED) {
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch a row to insert */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
- }
-
- if ((node->ins_type == INS_SEARCHED)
- && (sel_node->state != SEL_NODE_FETCH)) {
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to insert */
- thr->run_node = parent;
-
- return(thr);
- }
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = row_ins(node, thr);
-
-error_handling:
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- /* err == DB_LOCK_WAIT or SQL error detected */
- return(NULL);
- }
-
- /* DO THE TRIGGER ACTIONS HERE */
-
- if (node->ins_type == INS_SEARCHED) {
- /* Fetch a row to insert */
-
- thr->run_node = sel_node;
- } else {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
diff --git a/storage/innodb_plugin/row/row0mysql.c b/storage/innodb_plugin/row/row0mysql.c
deleted file mode 100644
index 181c39de881..00000000000
--- a/storage/innodb_plugin/row/row0mysql.c
+++ /dev/null
@@ -1,4213 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0mysql.c
-Interface between Innobase row operations and MySQL.
-Contains also create table and other data dictionary operations.
-
-Created 9/17/2000 Heikki Tuuri
-*******************************************************/
-
-#include "row0mysql.h"
-
-#ifdef UNIV_NONINL
-#include "row0mysql.ic"
-#endif
-
-#include "row0ins.h"
-#include "row0merge.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "que0que.h"
-#include "pars0pars.h"
-#include "dict0dict.h"
-#include "dict0crea.h"
-#include "dict0load.h"
-#include "dict0boot.h"
-#include "trx0roll.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "trx0undo.h"
-#include "lock0lock.h"
-#include "rem0cmp.h"
-#include "log0log.h"
-#include "btr0sea.h"
-#include "fil0fil.h"
-#include "ibuf0ibuf.h"
-
-/** Provide optional 4.x backwards compatibility for 5.0 and above */
-UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
-
-/** Chain node of the list of tables to drop in the background. */
-typedef struct row_mysql_drop_struct row_mysql_drop_t;
-
-/** Chain node of the list of tables to drop in the background. */
-struct row_mysql_drop_struct{
- char* table_name; /*!< table name */
- UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
- /*!< list chain node */
-};
-
-/** @brief List of tables we should drop in background.
-
-ALTER TABLE in MySQL requires that the table handler can drop the
-table in background when there are no queries to it any
-more. Protected by kernel_mutex. */
-static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
-/** Flag: has row_mysql_drop_list been initialized? */
-static ibool row_mysql_drop_list_inited = FALSE;
-
-/** Magic table names for invoking various monitor threads */
-/* @{ */
-static const char S_innodb_monitor[] = "innodb_monitor";
-static const char S_innodb_lock_monitor[] = "innodb_lock_monitor";
-static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor";
-static const char S_innodb_table_monitor[] = "innodb_table_monitor";
-static const char S_innodb_mem_validate[] = "innodb_mem_validate";
-/* @} */
-
-/** Evaluates to true if str1 equals str2_onstack, used for comparing
-the magic table names.
-@param str1 in: string to compare
-@param str1_len in: length of str1, in bytes, including terminating NUL
-@param str2_onstack in: char[] array containing a NUL terminated string
-@return TRUE if str1 equals str2_onstack */
-#define STR_EQ(str1, str1_len, str2_onstack) \
- ((str1_len) == sizeof(str2_onstack) \
- && memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0)
-
-/*******************************************************************//**
-Determine if the given name is a name reserved for MySQL system tables.
-@return TRUE if name is a MySQL system table name */
-static
-ibool
-row_mysql_is_system_table(
-/*======================*/
- const char* name)
-{
- if (strncmp(name, "mysql/", 6) != 0) {
-
- return(FALSE);
- }
-
- return(0 == strcmp(name + 6, "host")
- || 0 == strcmp(name + 6, "user")
- || 0 == strcmp(name + 6, "db"));
-}
-
-/*********************************************************************//**
-If a table is not yet in the drop list, adds the table to the list of tables
-which the master thread drops in background. We need this on Unix because in
-ALTER TABLE MySQL may call drop table even if the table has running queries on
-it. Also, if there are running foreign key checks on the table, we drop the
-table lazily.
-@return TRUE if the table was not yet in the drop list, and was added there */
-static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- const char* name); /*!< in: table name */
-
-/*******************************************************************//**
-Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
-static
-void
-row_mysql_delay_if_needed(void)
-/*===========================*/
-{
- if (srv_dml_needed_delay) {
- os_thread_sleep(srv_dml_needed_delay);
- }
-}
-
-/*******************************************************************//**
-Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
-void
-row_mysql_prebuilt_free_blob_heap(
-/*==============================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a
- ha_innobase:: table handle */
-{
- mem_heap_free(prebuilt->blob_heap);
- prebuilt->blob_heap = NULL;
-}
-
-/*******************************************************************//**
-Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format.
-@return pointer to the data, we skip the 1 or 2 bytes at the start
-that are used to store the len */
-UNIV_INTERN
-byte*
-row_mysql_store_true_var_len(
-/*=========================*/
- byte* dest, /*!< in: where to store */
- ulint len, /*!< in: length, must fit in two bytes */
- ulint lenlen) /*!< in: storage length of len: either 1 or 2 bytes */
-{
- if (lenlen == 2) {
- ut_a(len < 256 * 256);
-
- mach_write_to_2_little_endian(dest, len);
-
- return(dest + 2);
- }
-
- ut_a(lenlen == 1);
- ut_a(len < 256);
-
- mach_write_to_1(dest, len);
-
- return(dest + 1);
-}
-
-/*******************************************************************//**
-Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data.
-@return pointer to the data, we skip the 1 or 2 bytes at the start
-that are used to store the len */
-UNIV_INTERN
-const byte*
-row_mysql_read_true_varchar(
-/*========================*/
- ulint* len, /*!< out: variable-length field length */
- const byte* field, /*!< in: field in the MySQL format */
- ulint lenlen) /*!< in: storage length of len: either 1
- or 2 bytes */
-{
- if (lenlen == 2) {
- *len = mach_read_from_2_little_endian(field);
-
- return(field + 2);
- }
-
- ut_a(lenlen == 1);
-
- *len = mach_read_from_1(field);
-
- return(field + 1);
-}
-
-/*******************************************************************//**
-Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
-void
-row_mysql_store_blob_ref(
-/*=====================*/
- byte* dest, /*!< in: where to store */
- ulint col_len,/*!< in: dest buffer size: determines into
- how many bytes the BLOB length is stored,
- the space for the length may vary from 1
- to 4 bytes */
- const void* data, /*!< in: BLOB data; if the value to store
- is SQL NULL this should be NULL pointer */
- ulint len) /*!< in: BLOB length; if the value to store
- is SQL NULL this should be 0; remember
- also to set the NULL bit in the MySQL record
- header! */
-{
- /* MySQL might assume the field is set to zero except the length and
- the pointer fields */
-
- memset(dest, '\0', col_len);
-
- /* In dest there are 1 - 4 bytes reserved for the BLOB length,
- and after that 8 bytes reserved for the pointer to the data.
- In 32-bit architectures we only use the first 4 bytes of the pointer
- slot. */
-
- ut_a(col_len - 8 > 1 || len < 256);
- ut_a(col_len - 8 > 2 || len < 256 * 256);
- ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
-
- mach_write_to_n_little_endian(dest, col_len - 8, len);
-
- memcpy(dest + col_len - 8, &data, sizeof data);
-}
-
-/*******************************************************************//**
-Reads a reference to a BLOB in the MySQL format.
-@return pointer to BLOB data */
-UNIV_INTERN
-const byte*
-row_mysql_read_blob_ref(
-/*====================*/
- ulint* len, /*!< out: BLOB length */
- const byte* ref, /*!< in: BLOB reference in the
- MySQL format */
- ulint col_len) /*!< in: BLOB reference length
- (not BLOB length) */
-{
- byte* data;
-
- *len = mach_read_from_n_little_endian(ref, col_len - 8);
-
- memcpy(&data, ref + col_len - 8, sizeof data);
-
- return(data);
-}
-
-/**************************************************************//**
-Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
-The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c.
-@return up to which byte we used buf in the conversion */
-UNIV_INTERN
-byte*
-row_mysql_store_col_in_innobase_format(
-/*===================================*/
- dfield_t* dfield, /*!< in/out: dfield where dtype
- information must be already set when
- this function is called! */
- byte* buf, /*!< in/out: buffer for a converted
- integer value; this must be at least
- col_len long then! */
- ibool row_format_col, /*!< TRUE if the mysql_data is from
- a MySQL row, FALSE if from a MySQL
- key value;
- in MySQL, a true VARCHAR storage
- format differs in a row and in a
- key value: in a key value the length
- is always stored in 2 bytes! */
- const byte* mysql_data, /*!< in: MySQL column value, not
- SQL NULL; NOTE that dfield may also
- get a pointer to mysql_data,
- therefore do not discard this as long
- as dfield is used! */
- ulint col_len, /*!< in: MySQL column length; NOTE that
- this is the storage length of the
- column in the MySQL format row, not
- necessarily the length of the actual
- payload data; if the column is a true
- VARCHAR then this is irrelevant */
- ulint comp) /*!< in: nonzero=compact format */
-{
- const byte* ptr = mysql_data;
- const dtype_t* dtype;
- ulint type;
- ulint lenlen;
-
- dtype = dfield_get_type(dfield);
-
- type = dtype->mtype;
-
- if (type == DATA_INT) {
- /* Store integer data in Innobase in a big-endian format,
- sign bit negated if the data is a signed integer. In MySQL,
- integers are stored in a little-endian format. */
-
- byte* p = buf + col_len;
-
- for (;;) {
- p--;
- *p = *mysql_data;
- if (p == buf) {
- break;
- }
- mysql_data++;
- }
-
- if (!(dtype->prtype & DATA_UNSIGNED)) {
-
- *buf ^= 128;
- }
-
- ptr = buf;
- buf += col_len;
- } else if ((type == DATA_VARCHAR
- || type == DATA_VARMYSQL
- || type == DATA_BINARY)) {
-
- if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) {
- /* The length of the actual data is stored to 1 or 2
- bytes at the start of the field */
-
- if (row_format_col) {
- if (dtype->prtype & DATA_LONG_TRUE_VARCHAR) {
- lenlen = 2;
- } else {
- lenlen = 1;
- }
- } else {
- /* In a MySQL key value, lenlen is always 2 */
- lenlen = 2;
- }
-
- ptr = row_mysql_read_true_varchar(&col_len, mysql_data,
- lenlen);
- } else {
- /* Remove trailing spaces from old style VARCHAR
- columns. */
-
- /* Handle UCS2 strings differently. */
- ulint mbminlen = dtype_get_mbminlen(dtype);
-
- ptr = mysql_data;
-
- if (mbminlen == 2) {
- /* space=0x0020 */
- /* Trim "half-chars", just in case. */
- col_len &= ~1;
-
- while (col_len >= 2 && ptr[col_len - 2] == 0x00
- && ptr[col_len - 1] == 0x20) {
- col_len -= 2;
- }
- } else {
- ut_a(mbminlen == 1);
- /* space=0x20 */
- while (col_len > 0
- && ptr[col_len - 1] == 0x20) {
- col_len--;
- }
- }
- }
- } else if (comp && type == DATA_MYSQL
- && dtype_get_mbminlen(dtype) == 1
- && dtype_get_mbmaxlen(dtype) > 1) {
- /* In some cases we strip trailing spaces from UTF-8 and other
- multibyte charsets, from FIXED-length CHAR columns, to save
- space. UTF-8 would otherwise normally use 3 * the string length
- bytes to store an ASCII string! */
-
- /* We assume that this CHAR field is encoded in a
- variable-length character set where spaces have
- 1:1 correspondence to 0x20 bytes, such as UTF-8.
-
- Consider a CHAR(n) field, a field of n characters.
- It will contain between n * mbminlen and n * mbmaxlen bytes.
- We will try to truncate it to n bytes by stripping
- space padding. If the field contains single-byte
- characters only, it will be truncated to n characters.
- Consider a CHAR(5) field containing the string ".a "
- where "." denotes a 3-byte character represented by
- the bytes "$%&". After our stripping, the string will
- be stored as "$%&a " (5 bytes). The string ".abc "
- will be stored as "$%&abc" (6 bytes).
-
- The space padding will be restored in row0sel.c, function
- row_sel_field_store_in_mysql_format(). */
-
- ulint n_chars;
-
- ut_a(!(dtype_get_len(dtype) % dtype_get_mbmaxlen(dtype)));
-
- n_chars = dtype_get_len(dtype) / dtype_get_mbmaxlen(dtype);
-
- /* Strip space padding. */
- while (col_len > n_chars && ptr[col_len - 1] == 0x20) {
- col_len--;
- }
- } else if (type == DATA_BLOB && row_format_col) {
-
- ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
- }
-
- dfield_set_data(dfield, ptr, col_len);
-
- return(buf);
-}
-
-/**************************************************************//**
-Convert a row in the MySQL format to a row in the Innobase format. Note that
-the function to convert a MySQL format key value to an InnoDB dtuple is
-row_sel_convert_mysql_key_to_innobase() in row0sel.c. */
-static
-void
-row_mysql_convert_row_to_innobase(
-/*==============================*/
- dtuple_t* row, /*!< in/out: Innobase row where the
- field type information is already
- copied there! */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct where template
- must be of type ROW_MYSQL_WHOLE_ROW */
- byte* mysql_rec) /*!< in: row in the MySQL format;
- NOTE: do not discard as long as
- row is used, as row may contain
- pointers to this record! */
-{
- mysql_row_templ_t* templ;
- dfield_t* dfield;
- ulint i;
-
- ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
- ut_ad(prebuilt->mysql_template);
-
- for (i = 0; i < prebuilt->n_template; i++) {
-
- templ = prebuilt->mysql_template + i;
- dfield = dtuple_get_nth_field(row, i);
-
- if (templ->mysql_null_bit_mask != 0) {
- /* Column may be SQL NULL */
-
- if (mysql_rec[templ->mysql_null_byte_offset]
- & (byte) (templ->mysql_null_bit_mask)) {
-
- /* It is SQL NULL */
-
- dfield_set_null(dfield);
-
- goto next_column;
- }
- }
-
- row_mysql_store_col_in_innobase_format(
- dfield,
- prebuilt->ins_upd_rec_buff + templ->mysql_col_offset,
- TRUE, /* MySQL row format data */
- mysql_rec + templ->mysql_col_offset,
- templ->mysql_col_len,
- dict_table_is_comp(prebuilt->table));
-next_column:
- ;
- }
-}
-
-/****************************************************************//**
-Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running the
-query thread */
-UNIV_INTERN
-ibool
-row_mysql_handle_errors(
-/*====================*/
- ulint* new_err,/*!< out: possible new error encountered in
- lock wait, or if no new error, the value
- of trx->error_state at the entry of this
- function */
- trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept) /*!< in: savepoint or NULL */
-{
- ulint err;
-
-handle_new_error:
- err = trx->error_state;
-
- ut_a(err != DB_SUCCESS);
-
- trx->error_state = DB_SUCCESS;
-
- switch (err) {
- case DB_LOCK_WAIT_TIMEOUT:
- if (row_rollback_on_timeout) {
- trx_general_rollback_for_mysql(trx, NULL);
- break;
- }
- /* fall through */
- case DB_DUPLICATE_KEY:
- case DB_FOREIGN_DUPLICATE_KEY:
- case DB_TOO_BIG_RECORD:
- case DB_ROW_IS_REFERENCED:
- case DB_NO_REFERENCED_ROW:
- case DB_CANNOT_ADD_CONSTRAINT:
- case DB_TOO_MANY_CONCURRENT_TRXS:
- case DB_OUT_OF_FILE_SPACE:
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_general_rollback_for_mysql(trx, savept);
- }
- /* MySQL will roll back the latest SQL statement */
- break;
- case DB_LOCK_WAIT:
- srv_suspend_mysql_thread(thr);
-
- if (trx->error_state != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- goto handle_new_error;
- }
-
- *new_err = err;
-
- return(TRUE);
-
- case DB_DEADLOCK:
- case DB_LOCK_TABLE_FULL:
- /* Roll back the whole transaction; this resolution was added
- to version 3.23.43 */
-
- trx_general_rollback_for_mysql(trx, NULL);
- break;
-
- case DB_MUST_GET_MORE_FILE_SPACE:
- fputs("InnoDB: The database cannot continue"
- " operation because of\n"
- "InnoDB: lack of space. You must add"
- " a new data file to\n"
- "InnoDB: my.cnf and restart the database.\n", stderr);
-
- exit(1);
-
- case DB_CORRUPTION:
- fputs("InnoDB: We detected index corruption"
- " in an InnoDB type table.\n"
- "InnoDB: You have to dump + drop + reimport"
- " the table or, in\n"
- "InnoDB: a case of widespread corruption,"
- " dump all InnoDB\n"
- "InnoDB: tables and recreate the"
- " whole InnoDB tablespace.\n"
- "InnoDB: If the mysqld server crashes"
- " after the startup or when\n"
- "InnoDB: you dump the tables, look at\n"
- "InnoDB: " REFMAN "forcing-recovery.html"
- " for help.\n", stderr);
- break;
- default:
- fprintf(stderr, "InnoDB: unknown error code %lu\n",
- (ulong) err);
- ut_error;
- }
-
- if (trx->error_state != DB_SUCCESS) {
- *new_err = trx->error_state;
- } else {
- *new_err = err;
- }
-
- trx->error_state = DB_SUCCESS;
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Create a prebuilt struct for a MySQL table handle.
-@return own: a prebuilt struct */
-UNIV_INTERN
-row_prebuilt_t*
-row_create_prebuilt(
-/*================*/
- dict_table_t* table) /*!< in: Innobase table handle */
-{
- row_prebuilt_t* prebuilt;
- mem_heap_t* heap;
- dict_index_t* clust_index;
- dtuple_t* ref;
- ulint ref_len;
-
- heap = mem_heap_create(sizeof *prebuilt + 128);
-
- prebuilt = mem_heap_zalloc(heap, sizeof *prebuilt);
-
- prebuilt->magic_n = ROW_PREBUILT_ALLOCATED;
- prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED;
-
- prebuilt->table = table;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->heap = heap;
-
- prebuilt->pcur = btr_pcur_create_for_mysql();
- prebuilt->clust_pcur = btr_pcur_create_for_mysql();
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = 99999999;
-
- prebuilt->search_tuple = dtuple_create(
- heap, 2 * dict_table_get_n_cols(table));
-
- clust_index = dict_table_get_first_index(table);
-
- /* Make sure that search_tuple is long enough for clustered index */
- ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
- ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- prebuilt->clust_ref = ref;
-
- prebuilt->autoinc_error = 0;
- prebuilt->autoinc_offset = 0;
-
- /* Default to 1, we will set the actual value later in
- ha_innobase::get_auto_increment(). */
- prebuilt->autoinc_increment = 1;
-
- prebuilt->autoinc_last_value = 0;
-
- return(prebuilt);
-}
-
-/********************************************************************//**
-Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
-void
-row_prebuilt_free(
-/*==============*/
- row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
- ibool dict_locked) /*!< in: TRUE=data dictionary locked */
-{
- ulint i;
-
- if (UNIV_UNLIKELY
- (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
- || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu,"
- " magic n2 %lu, table name ",
- (ulong) prebuilt->magic_n,
- (ulong) prebuilt->magic_n2);
- ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- prebuilt->magic_n = ROW_PREBUILT_FREED;
- prebuilt->magic_n2 = ROW_PREBUILT_FREED;
-
- btr_pcur_free_for_mysql(prebuilt->pcur);
- btr_pcur_free_for_mysql(prebuilt->clust_pcur);
-
- if (prebuilt->mysql_template) {
- mem_free(prebuilt->mysql_template);
- }
-
- if (prebuilt->ins_graph) {
- que_graph_free_recursive(prebuilt->ins_graph);
- }
-
- if (prebuilt->sel_graph) {
- que_graph_free_recursive(prebuilt->sel_graph);
- }
-
- if (prebuilt->upd_graph) {
- que_graph_free_recursive(prebuilt->upd_graph);
- }
-
- if (prebuilt->blob_heap) {
- mem_heap_free(prebuilt->blob_heap);
- }
-
- if (prebuilt->old_vers_heap) {
- mem_heap_free(prebuilt->old_vers_heap);
- }
-
- for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
- if (prebuilt->fetch_cache[i] != NULL) {
-
- if ((ROW_PREBUILT_FETCH_MAGIC_N != mach_read_from_4(
- (prebuilt->fetch_cache[i]) - 4))
- || (ROW_PREBUILT_FETCH_MAGIC_N != mach_read_from_4(
- (prebuilt->fetch_cache[i])
- + prebuilt->mysql_row_len))) {
- fputs("InnoDB: Error: trying to free"
- " a corrupt fetch buffer.\n", stderr);
-
- mem_analyze_corruption(
- prebuilt->fetch_cache[i]);
-
- ut_error;
- }
-
- mem_free((prebuilt->fetch_cache[i]) - 4);
- }
- }
-
- dict_table_decrement_handle_count(prebuilt->table, dict_locked);
-
- mem_heap_free(prebuilt->heap);
-}
-
-/*********************************************************************//**
-Updates the transaction pointers in query graphs stored in the prebuilt
-struct. */
-UNIV_INTERN
-void
-row_update_prebuilt_trx(
-/*====================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
- in MySQL handle */
- trx_t* trx) /*!< in: transaction handle */
-{
- if (trx->magic_n != TRX_MAGIC_N) {
- fprintf(stderr,
- "InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: trx handle. Magic n %lu\n",
- (ulong) trx->magic_n);
-
- mem_analyze_corruption(trx);
-
- ut_error;
- }
-
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- prebuilt->trx = trx;
-
- if (prebuilt->ins_graph) {
- prebuilt->ins_graph->trx = trx;
- }
-
- if (prebuilt->upd_graph) {
- prebuilt->upd_graph->trx = trx;
- }
-
- if (prebuilt->sel_graph) {
- prebuilt->sel_graph->trx = trx;
- }
-}
-
-/*********************************************************************//**
-Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
-has not yet been built in the prebuilt struct, then this function first
-builds it.
-@return prebuilt dtuple; the column type information is also set in it */
-static
-dtuple_t*
-row_get_prebuilt_insert_row(
-/*========================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- ins_node_t* node;
- dtuple_t* row;
- dict_table_t* table = prebuilt->table;
-
- ut_ad(prebuilt && table && prebuilt->trx);
-
- if (prebuilt->ins_node == NULL) {
-
- /* Not called before for this handle: create an insert node
- and query graph to the prebuilt struct */
-
- node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
-
- prebuilt->ins_node = node;
-
- if (prebuilt->ins_upd_rec_buff == NULL) {
- prebuilt->ins_upd_rec_buff = mem_heap_alloc(
- prebuilt->heap, prebuilt->mysql_row_len);
- }
-
- row = dtuple_create(prebuilt->heap,
- dict_table_get_n_cols(table));
-
- dict_table_copy_types(row, table);
-
- ins_node_set_new_row(node, row);
-
- prebuilt->ins_graph = que_node_get_parent(
- pars_complete_graph_for_exec(node,
- prebuilt->trx,
- prebuilt->heap));
- prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
- }
-
- return(prebuilt->ins_node->row);
-}
-
-/*********************************************************************//**
-Updates the table modification counter and calculates new estimates
-for table and index statistics if necessary. */
-UNIV_INLINE
-void
-row_update_statistics_if_needed(
-/*============================*/
- dict_table_t* table) /*!< in: table */
-{
- ulint counter;
-
- counter = table->stat_modified_counter;
-
- table->stat_modified_counter = counter + 1;
-
- /* Calculate new statistics if 1 / 16 of table has been modified
- since the last time a statistics batch was run, or if
- stat_modified_counter > 2 000 000 000 (to avoid wrap-around).
- We calculate statistics at most every 16th round, since we may have
- a counter table which is very small and updated very often. */
-
- if (counter > 2000000000
- || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) {
-
- dict_update_statistics(table);
- }
-}
-
-/*********************************************************************//**
-Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
-function should be called at the the end of an SQL statement, by the
-connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
-void
-row_unlock_table_autoinc_for_mysql(
-/*===============================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- if (lock_trx_holds_autoinc_locks(trx)) {
- mutex_enter(&kernel_mutex);
-
- lock_release_autoinc_locks(trx);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/*********************************************************************//**
-Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
-AUTO_INC lock gives exclusive access to the auto-inc counter of the
-table. The lock is reserved only for the duration of an SQL statement.
-It is not compatible with another AUTO_INC or exclusive lock on the
-table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_lock_table_autoinc_for_mysql(
-/*=============================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
- table handle */
-{
- trx_t* trx = prebuilt->trx;
- ins_node_t* node = prebuilt->ins_node;
- const dict_table_t* table = prebuilt->table;
- que_thr_t* thr;
- ulint err;
- ibool was_lock_wait;
-
- ut_ad(trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- /* If we already hold an AUTOINC lock on the table then do nothing.
- Note: We peek at the value of the current owner without acquiring
- the kernel mutex. **/
- if (trx == table->autoinc_trx) {
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "setting auto-inc lock";
-
- if (node == NULL) {
- row_get_prebuilt_insert_row(prebuilt);
- node = prebuilt->ins_node;
- }
-
- /* We use the insert query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(prebuilt->ins_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started(trx);
-
- err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return((int) err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Sets a table lock on the table mentioned in prebuilt.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_lock_table_for_mysql(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
- table handle */
- dict_table_t* table, /*!< in: table to lock, or NULL
- if prebuilt->table should be
- locked as
- prebuilt->select_lock_type */
- ulint mode) /*!< in: lock mode of table
- (ignored if table==NULL) */
-{
- trx_t* trx = prebuilt->trx;
- que_thr_t* thr;
- ulint err;
- ibool was_lock_wait;
-
- ut_ad(trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- trx->op_info = "setting table lock";
-
- if (prebuilt->sel_graph == NULL) {
- /* Build a dummy select query graph */
- row_prebuild_sel_graph(prebuilt);
- }
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(prebuilt->sel_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started(trx);
-
- if (table) {
- err = lock_table(0, table, mode, thr);
- } else {
- err = lock_table(0, prebuilt->table,
- prebuilt->select_lock_type, thr);
- }
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return((int) err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Does an insert for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_insert_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: row in the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- trx_savept_t savept;
- que_thr_t* thr;
- ulint err;
- ibool was_lock_wait;
- trx_t* trx = prebuilt->trx;
- ins_node_t* node = prebuilt->ins_node;
-
- ut_ad(trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- if (prebuilt->table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
- return(DB_ERROR);
- }
-
- if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY(srv_created_new_raw || srv_force_recovery)) {
- fputs("InnoDB: A new raw disk partition was initialized or\n"
- "InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that"
- " newraw is replaced\n"
- "InnoDB: with raw, and innodb_force_... is removed.\n",
- stderr);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "inserting";
-
- row_mysql_delay_if_needed();
-
- trx_start_if_not_started(trx);
-
- if (node == NULL) {
- row_get_prebuilt_insert_row(prebuilt);
- node = prebuilt->ins_node;
- }
-
- row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec);
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(prebuilt->ins_graph);
-
- if (prebuilt->sql_stat_start) {
- node->state = INS_NODE_SET_IX_LOCK;
- prebuilt->sql_stat_start = FALSE;
- } else {
- node->state = INS_NODE_ALLOC_ROW_ID;
- }
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- row_ins_step(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- /* TODO: what is this? */ thr->lock_state= QUE_THR_LOCK_ROW;
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
- &savept);
- thr->lock_state= QUE_THR_LOCK_NOLOCK;
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return((int) err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- prebuilt->table->stat_n_rows++;
-
- srv_n_rows_inserted++;
-
- if (prebuilt->table->stat_n_rows == 0) {
- /* Avoid wrap-over */
- prebuilt->table->stat_n_rows--;
- }
-
- row_update_statistics_if_needed(prebuilt->table);
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Builds a dummy query graph used in selects. */
-UNIV_INTERN
-void
-row_prebuild_sel_graph(
-/*===================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- sel_node_t* node;
-
- ut_ad(prebuilt && prebuilt->trx);
-
- if (prebuilt->sel_graph == NULL) {
-
- node = sel_node_create(prebuilt->heap);
-
- prebuilt->sel_graph = que_node_get_parent(
- pars_complete_graph_for_exec(node,
- prebuilt->trx,
- prebuilt->heap));
-
- prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
- }
-}
-
-/*********************************************************************//**
-Creates an query graph node of 'update' type to be used in the MySQL
-interface.
-@return own: update node */
-UNIV_INTERN
-upd_node_t*
-row_create_update_node_for_mysql(
-/*=============================*/
- dict_table_t* table, /*!< in: table to update */
- mem_heap_t* heap) /*!< in: mem heap from which allocated */
-{
- upd_node_t* node;
-
- node = upd_node_create(heap);
-
- node->in_mysql_interface = TRUE;
- node->is_delete = FALSE;
- node->searched_update = FALSE;
- node->select = NULL;
- node->pcur = btr_pcur_create_for_mysql();
- node->table = table;
-
- node->update = upd_create(dict_table_get_n_cols(table), heap);
-
- node->update_n_fields = dict_table_get_n_cols(table);
-
- UT_LIST_INIT(node->columns);
- node->has_clust_rec_x_lock = TRUE;
- node->cmpl_info = 0;
-
- node->table_sym = NULL;
- node->col_assign_list = NULL;
-
- return(node);
-}
-
-/*********************************************************************//**
-Gets pointer to a prebuilt update vector used in updates. If the update
-graph has not yet been built in the prebuilt struct, then this function
-first builds it.
-@return prebuilt update vector */
-UNIV_INTERN
-upd_t*
-row_get_prebuilt_update_vector(
-/*===========================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- dict_table_t* table = prebuilt->table;
- upd_node_t* node;
-
- ut_ad(prebuilt && table && prebuilt->trx);
-
- if (prebuilt->upd_node == NULL) {
-
- /* Not called before for this handle: create an update node
- and query graph to the prebuilt struct */
-
- node = row_create_update_node_for_mysql(table, prebuilt->heap);
-
- prebuilt->upd_node = node;
-
- prebuilt->upd_graph = que_node_get_parent(
- pars_complete_graph_for_exec(node,
- prebuilt->trx,
- prebuilt->heap));
- prebuilt->upd_graph->state = QUE_FORK_ACTIVE;
- }
-
- return(prebuilt->upd_node->update);
-}
-
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_update_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: the row to be updated, in
- the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- trx_savept_t savept;
- ulint err;
- que_thr_t* thr;
- ibool was_lock_wait;
- dict_index_t* clust_index;
- /* ulint ref_len; */
- upd_node_t* node;
- dict_table_t* table = prebuilt->table;
- trx_t* trx = prebuilt->trx;
-
- ut_ad(prebuilt && trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- UT_NOT_USED(mysql_rec);
-
- if (prebuilt->table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
- return(DB_ERROR);
- }
-
- if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY(srv_created_new_raw || srv_force_recovery)) {
- fputs("InnoDB: A new raw disk partition was initialized or\n"
- "InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that newraw"
- " is replaced\n"
- "InnoDB: with raw, and innodb_force_... is removed.\n",
- stderr);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "updating or deleting";
-
- row_mysql_delay_if_needed();
-
- trx_start_if_not_started(trx);
-
- node = prebuilt->upd_node;
-
- clust_index = dict_table_get_first_index(table);
-
- if (prebuilt->pcur->btr_cur.index == clust_index) {
- btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur);
- } else {
- btr_pcur_copy_stored_position(node->pcur,
- prebuilt->clust_pcur);
- }
-
- ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
-
- /* MySQL seems to call rnd_pos before updating each row it
- has cached: we can get the correct cursor position from
- prebuilt->pcur; NOTE that we cannot build the row reference
- from mysql_rec if the clustered index was automatically
- generated for the table: MySQL does not know anything about
- the row id used as the clustered index key */
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(prebuilt->upd_graph);
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- ut_ad(!prebuilt->sql_stat_start);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- row_upd_step(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- if (err == DB_RECORD_NOT_FOUND) {
- trx->error_state = DB_SUCCESS;
- trx->op_info = "";
-
- return((int) err);
- }
-
- thr->lock_state= QUE_THR_LOCK_ROW;
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
- &savept);
- thr->lock_state= QUE_THR_LOCK_NOLOCK;
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return((int) err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- if (node->is_delete) {
- if (prebuilt->table->stat_n_rows > 0) {
- prebuilt->table->stat_n_rows--;
- }
-
- srv_n_rows_deleted++;
- } else {
- srv_n_rows_updated++;
- }
-
- row_update_statistics_if_needed(prebuilt->table);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or
-this session is using a READ COMMITTED isolation level. Before
-calling this function we must use trx_reset_new_rec_lock_info() and
-trx_register_new_rec_lock() to store the information which new record locks
-really were set. This function removes a newly set lock under prebuilt->pcur,
-and also under prebuilt->clust_pcur. Currently, this is only used and tested
-in the case of an UPDATE or a DELETE statement, where the row lock is of the
-LOCK_X type.
-Thus, this implements a 'mini-rollback' that releases the latest record
-locks we set.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_unlock_for_mysql(
-/*=================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL
- handle */
- ibool has_latches_on_recs)/*!< TRUE if called so that we have
- the latches on the records under pcur
- and clust_pcur, and we do not need to
- reposition the cursors. */
-{
- btr_pcur_t* pcur = prebuilt->pcur;
- btr_pcur_t* clust_pcur = prebuilt->clust_pcur;
- trx_t* trx = prebuilt->trx;
-
- ut_ad(prebuilt && trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- if (UNIV_UNLIKELY
- (!srv_locks_unsafe_for_binlog
- && trx->isolation_level != TRX_ISO_READ_COMMITTED)) {
-
- fprintf(stderr,
- "InnoDB: Error: calling row_unlock_for_mysql though\n"
- "InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n"
- "InnoDB: this session is not using"
- " READ COMMITTED isolation level.\n");
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "unlock_row";
-
- if (prebuilt->new_rec_locks >= 1) {
-
- const rec_t* rec;
- dict_index_t* index;
- trx_id_t rec_trx_id;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- /* Restore the cursor position and find the record */
-
- if (!has_latches_on_recs) {
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr);
- }
-
- rec = btr_pcur_get_rec(pcur);
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- if (prebuilt->new_rec_locks >= 2) {
- /* Restore the cursor position and find the record
- in the clustered index. */
-
- if (!has_latches_on_recs) {
- btr_pcur_restore_position(BTR_SEARCH_LEAF,
- clust_pcur, &mtr);
- }
-
- rec = btr_pcur_get_rec(clust_pcur);
- index = btr_pcur_get_btr_cur(clust_pcur)->index;
- }
-
- if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
- /* This is not a clustered index record. We
- do not know how to unlock the record. */
- goto no_unlock;
- }
-
- /* If the record has been modified by this
- transaction, do not unlock it. */
-
- if (index->trx_id_offset) {
- rec_trx_id = trx_read_trx_id(rec
- + index->trx_id_offset);
- } else {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- if (ut_dulint_cmp(rec_trx_id, trx->id) != 0) {
- /* We did not update the record: unlock it */
-
- rec = btr_pcur_get_rec(pcur);
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- lock_rec_unlock(trx, btr_pcur_get_block(pcur),
- rec, prebuilt->select_lock_type);
-
- if (prebuilt->new_rec_locks >= 2) {
- rec = btr_pcur_get_rec(clust_pcur);
- index = btr_pcur_get_btr_cur(clust_pcur)->index;
-
- lock_rec_unlock(trx,
- btr_pcur_get_block(clust_pcur),
- rec,
- prebuilt->select_lock_type);
- }
- }
-no_unlock:
- mtr_commit(&mtr);
- }
-
- trx->op_info = "";
-
- return(DB_SUCCESS);
-}
-
-/**********************************************************************//**
-Does a cascaded delete or set null in a foreign key operation.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_update_cascade_for_mysql(
-/*=========================*/
- que_thr_t* thr, /*!< in: query thread */
- upd_node_t* node, /*!< in: update node used in the cascade
- or set null operation */
- dict_table_t* table) /*!< in: table where we do the operation */
-{
- ulint err;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- row_upd_step(thr);
-
- err = trx->error_state;
-
- /* Note that the cascade node is a subnode of another InnoDB
- query graph node. We do a normal lock wait in this node, but
- all errors are handled by the parent node. */
-
- if (err == DB_LOCK_WAIT) {
- /* Handle lock wait here */
-
- que_thr_stop_for_mysql(thr);
-
- srv_suspend_mysql_thread(thr);
-
- /* Note that a lock wait may also end in a lock wait timeout,
- or this transaction is picked as a victim in selective
- deadlock resolution */
-
- if (trx->error_state != DB_SUCCESS) {
-
- return(trx->error_state);
- }
-
- /* Retry operation after a normal lock wait */
-
- goto run_again;
- }
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (node->is_delete) {
- if (table->stat_n_rows > 0) {
- table->stat_n_rows--;
- }
-
- srv_n_rows_deleted++;
- } else {
- srv_n_rows_updated++;
- }
-
- row_update_statistics_if_needed(table);
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if a table is such that we automatically created a clustered
-index on it (on row id).
-@return TRUE if the clustered index was generated automatically */
-UNIV_INTERN
-ibool
-row_table_got_default_clust_index(
-/*==============================*/
- const dict_table_t* table) /*!< in: table */
-{
- const dict_index_t* clust_index;
-
- clust_index = dict_table_get_first_index(table);
-
- return(dict_index_get_nth_col(clust_index, 0)->mtype == DATA_SYS);
-}
-
-/*********************************************************************//**
-Calculates the key number used inside MySQL for an Innobase index. We have
-to take into account if we generated a default clustered index for the table
-@return the key number used inside MySQL */
-UNIV_INTERN
-ulint
-row_get_mysql_key_number_for_index(
-/*===============================*/
- const dict_index_t* index) /*!< in: index */
-{
- const dict_index_t* ind;
- ulint i;
-
- ut_a(index);
-
- i = 0;
- ind = dict_table_get_first_index(index->table);
-
- while (index != ind) {
- ind = dict_table_get_next_index(ind);
- i++;
- }
-
- if (row_table_got_default_clust_index(index->table)) {
- ut_a(i > 0);
- i--;
- }
-
- return(i);
-}
-
-/*********************************************************************//**
-Locks the data dictionary in shared mode from modifications, for performing
-foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
-void
-row_mysql_freeze_data_dictionary_func(
-/*==================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- ulint line) /*!< in: line number */
-{
- ut_a(trx->dict_operation_lock_mode == 0);
-
- rw_lock_s_lock_func(&dict_operation_lock, 0, file, line);
-
- trx->dict_operation_lock_mode = RW_S_LATCH;
-}
-
-/*********************************************************************//**
-Unlocks the data dictionary shared lock. */
-UNIV_INTERN
-void
-row_mysql_unfreeze_data_dictionary(
-/*===============================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- trx->dict_operation_lock_mode = 0;
-}
-
-/*********************************************************************//**
-Locks the data dictionary exclusively for performing a table create or other
-data dictionary modification operation. */
-UNIV_INTERN
-void
-row_mysql_lock_data_dictionary_func(
-/*================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- ulint line) /*!< in: line number */
-{
- ut_a(trx->dict_operation_lock_mode == 0
- || trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks or lock waits can occur then in these operations */
-
- rw_lock_x_lock_func(&dict_operation_lock, 0, file, line);
- trx->dict_operation_lock_mode = RW_X_LATCH;
-
- mutex_enter(&(dict_sys->mutex));
-}
-
-/*********************************************************************//**
-Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
-void
-row_mysql_unlock_data_dictionary(
-/*=============================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&dict_operation_lock);
-
- trx->dict_operation_lock_mode = 0;
-}
-
-/*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate().
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_create_table_for_mysql(
-/*=======================*/
- dict_table_t* table, /*!< in, own: table definition
- (will be freed) */
- trx_t* trx) /*!< in: transaction handle */
-{
- tab_node_t* node;
- mem_heap_t* heap;
- que_thr_t* thr;
- const char* table_name;
- ulint table_name_len;
- ulint err;
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- if (srv_created_new_raw) {
- fputs("InnoDB: A new raw disk partition was initialized:\n"
- "InnoDB: we do not allow database modifications"
- " by the user.\n"
- "InnoDB: Shut down mysqld and edit my.cnf so that newraw"
- " is replaced with raw.\n", stderr);
-err_exit:
- dict_mem_table_free(table);
- trx_commit_for_mysql(trx);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "creating table";
-
- if (row_mysql_is_system_table(table->name)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to create a MySQL system"
- " table %s of type InnoDB.\n"
- "InnoDB: MySQL system tables must be"
- " of the MyISAM type!\n",
- table->name);
- goto err_exit;
- }
-
- trx_start_if_not_started(trx);
-
- /* The table name is prefixed with the database name and a '/'.
- Certain table names starting with 'innodb_' have their special
- meaning regardless of the database name. Thus, we need to
- ignore the database name prefix in the comparisons. */
- table_name = strchr(table->name, '/');
- ut_a(table_name);
- table_name++;
- table_name_len = strlen(table_name) + 1;
-
- if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) {
-
- /* Table equals "innodb_monitor":
- start monitor prints */
-
- srv_print_innodb_monitor = TRUE;
-
- /* The lock timeout monitor thread also takes care
- of InnoDB monitor prints */
-
- os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_lock_monitor)) {
-
- srv_print_innodb_monitor = TRUE;
- srv_print_innodb_lock_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_tablespace_monitor)) {
-
- srv_print_innodb_tablespace_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_table_monitor)) {
-
- srv_print_innodb_table_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_mem_validate)) {
- /* We define here a debugging feature intended for
- developers */
-
- fputs("Validating InnoDB memory:\n"
- "to use this feature you must compile InnoDB with\n"
- "UNIV_MEM_DEBUG defined in univ.i and"
- " the server must be\n"
- "quiet because allocation from a mem heap"
- " is not protected\n"
- "by any semaphore.\n", stderr);
-#ifdef UNIV_MEM_DEBUG
- ut_a(mem_validate());
- fputs("Memory validated\n", stderr);
-#else /* UNIV_MEM_DEBUG */
- fputs("Memory NOT validated (recompile with UNIV_MEM_DEBUG)\n",
- stderr);
-#endif /* UNIV_MEM_DEBUG */
- }
-
- heap = mem_heap_create(512);
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- node = tab_create_graph_create(table, heap);
-
- thr = pars_complete_graph_for_exec(node, trx, heap);
-
- ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
- que_run_threads(thr);
-
- err = trx->error_state;
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, NULL);
- /* TO DO: free table? The code below will dereference
- table->name, though. */
- }
-
- switch (err) {
- case DB_OUT_OF_FILE_SPACE:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: cannot create table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" because tablespace full\n", stderr);
-
- if (dict_table_get_low(table->name)) {
-
- row_drop_table_for_mysql(table->name, trx, FALSE);
- trx_commit_for_mysql(trx);
- }
- break;
-
- case DB_DUPLICATE_KEY:
- /* We may also get err == DB_ERROR if the .ibd file for the
- table already exists */
-
- break;
- }
-
- que_graph_free((que_t*) que_node_get_parent(thr));
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Does an index creation operation for MySQL. TODO: currently failure
-to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table.
-@return error number or DB_SUCCESS */
-UNIV_INTERN
-int
-row_create_index_for_mysql(
-/*=======================*/
- dict_index_t* index, /*!< in, own: index definition
- (will be freed) */
- trx_t* trx, /*!< in: transaction handle */
- const ulint* field_lengths) /*!< in: if not NULL, must contain
- dict_index_get_n_fields(index)
- actual field lengths for the
- index columns, which are
- then checked for not being too
- large. */
-{
- ind_node_t* node;
- mem_heap_t* heap;
- que_thr_t* thr;
- ulint err;
- ulint i;
- ulint len;
- char* table_name;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- trx->op_info = "creating index";
-
- /* Copy the table name because we may want to drop the
- table later, after the index object is freed (inside
- que_run_threads()) and thus index->table_name is not available. */
- table_name = mem_strdup(index->table_name);
-
- trx_start_if_not_started(trx);
-
- /* Check that the same column does not appear twice in the index.
- Starting from 4.0.14, InnoDB should be able to cope with that, but
- safer not to allow them. */
-
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- ulint j;
-
- for (j = 0; j < i; j++) {
- if (0 == ut_strcmp(
- dict_index_get_nth_field(index, j)->name,
- dict_index_get_nth_field(index, i)->name)) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: column ", stderr);
- ut_print_name(stderr, trx, FALSE,
- dict_index_get_nth_field(
- index, i)->name);
- fputs(" appears twice in ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: This is not allowed"
- " in InnoDB.\n", stderr);
-
- err = DB_COL_APPEARS_TWICE_IN_INDEX;
-
- goto error_handling;
- }
- }
-
- /* Check also that prefix_len and actual length
- < DICT_MAX_INDEX_COL_LEN */
-
- len = dict_index_get_nth_field(index, i)->prefix_len;
-
- if (field_lengths) {
- len = ut_max(len, field_lengths[i]);
- }
-
- if (len >= DICT_MAX_INDEX_COL_LEN) {
- err = DB_TOO_BIG_RECORD;
-
- goto error_handling;
- }
- }
-
- heap = mem_heap_create(512);
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- /* Note that the space id where we store the index is inherited from
- the table in dict_build_index_def_step() in dict0crea.c. */
-
- node = ind_create_graph_create(index, heap);
-
- thr = pars_complete_graph_for_exec(node, trx, heap);
-
- ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
- que_run_threads(thr);
-
- err = trx->error_state;
-
- que_graph_free((que_t*) que_node_get_parent(thr));
-
-error_handling:
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_general_rollback_for_mysql(trx, NULL);
-
- row_drop_table_for_mysql(table_name, trx, FALSE);
-
- trx_commit_for_mysql(trx);
-
- trx->error_state = DB_SUCCESS;
- }
-
- trx->op_info = "";
-
- mem_free(table_name);
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-both participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. Check also that foreign key
-constraints which reference this table are ok.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_table_add_foreign_constraints(
-/*==============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the
- database name before it: test.table2 */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-{
- ulint err;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_a(sql_string);
-
- trx->op_info = "adding foreign keys";
-
- trx_start_if_not_started(trx);
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- err = dict_create_foreign_constraints(trx, sql_string, name,
- reject_fks);
- if (err == DB_SUCCESS) {
- /* Check that also referencing constraints are ok */
- err = dict_load_foreigns(name, TRUE);
- }
-
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_general_rollback_for_mysql(trx, NULL);
-
- row_drop_table_for_mysql(name, trx, FALSE);
-
- trx_commit_for_mysql(trx);
-
- trx->error_state = DB_SUCCESS;
- }
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Drops a table for MySQL as a background operation. MySQL relies on Unix
-in ALTER TABLE to the fact that the table handler does not remove the
-table before all handles to it has been removed. Furhermore, the MySQL's
-call to drop table must be non-blocking. Therefore we do the drop table
-as a background operation, which is taken care of by the master thread
-in srv0srv.c.
-@return error code or DB_SUCCESS */
-static
-int
-row_drop_table_for_mysql_in_background(
-/*===================================*/
- const char* name) /*!< in: table name */
-{
- ulint error;
- trx_t* trx;
-
- trx = trx_allocate_for_background();
-
- /* If the original transaction was dropping a table referenced by
- foreign keys, we must set the following to be able to drop the
- table: */
-
- trx->check_foreigns = FALSE;
-
- /* fputs("InnoDB: Error: Dropping table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" in background drop list\n", stderr); */
-
- /* Try to drop the table in InnoDB */
-
- error = row_drop_table_for_mysql(name, trx, FALSE);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- trx_commit_for_mysql(trx);
-
- trx_free_for_background(trx);
-
- return((int) error);
-}
-
-/*********************************************************************//**
-The master thread in srv0srv.c calls this regularly to drop tables which
-we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix.
-@return how many tables dropped + remaining tables in list */
-UNIV_INTERN
-ulint
-row_drop_tables_for_mysql_in_background(void)
-/*=========================================*/
-{
- row_mysql_drop_t* drop;
- dict_table_t* table;
- ulint n_tables;
- ulint n_tables_dropped = 0;
-loop:
- mutex_enter(&kernel_mutex);
-
- if (!row_mysql_drop_list_inited) {
-
- UT_LIST_INIT(row_mysql_drop_list);
- row_mysql_drop_list_inited = TRUE;
- }
-
- drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
-
- n_tables = UT_LIST_GET_LEN(row_mysql_drop_list);
-
- mutex_exit(&kernel_mutex);
-
- if (drop == NULL) {
- /* All tables dropped */
-
- return(n_tables + n_tables_dropped);
- }
-
- mutex_enter(&(dict_sys->mutex));
- table = dict_table_get_low(drop->table_name);
- mutex_exit(&(dict_sys->mutex));
-
- if (table == NULL) {
- /* If for some reason the table has already been dropped
- through some other mechanism, do not try to drop it */
-
- goto already_dropped;
- }
-
- if (DB_SUCCESS != row_drop_table_for_mysql_in_background(
- drop->table_name)) {
- /* If the DROP fails for some table, we return, and let the
- main thread retry later */
-
- return(n_tables + n_tables_dropped);
- }
-
- n_tables_dropped++;
-
-already_dropped:
- mutex_enter(&kernel_mutex);
-
- UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Dropped table ", stderr);
- ut_print_name(stderr, NULL, TRUE, drop->table_name);
- fputs(" in background drop queue.\n", stderr);
-
- mem_free(drop->table_name);
-
- mem_free(drop);
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
-}
-
-/*********************************************************************//**
-Get the background drop list length. NOTE: the caller must own the kernel
-mutex!
-@return how many tables in list */
-UNIV_INTERN
-ulint
-row_get_background_drop_list_len_low(void)
-/*======================================*/
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (!row_mysql_drop_list_inited) {
-
- UT_LIST_INIT(row_mysql_drop_list);
- row_mysql_drop_list_inited = TRUE;
- }
-
- return(UT_LIST_GET_LEN(row_mysql_drop_list));
-}
-
-/*********************************************************************//**
-If a table is not yet in the drop list, adds the table to the list of tables
-which the master thread drops in background. We need this on Unix because in
-ALTER TABLE MySQL may call drop table even if the table has running queries on
-it. Also, if there are running foreign key checks on the table, we drop the
-table lazily.
-@return TRUE if the table was not yet in the drop list, and was added there */
-static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- const char* name) /*!< in: table name */
-{
- row_mysql_drop_t* drop;
-
- mutex_enter(&kernel_mutex);
-
- if (!row_mysql_drop_list_inited) {
-
- UT_LIST_INIT(row_mysql_drop_list);
- row_mysql_drop_list_inited = TRUE;
- }
-
- /* Look if the table already is in the drop list */
- drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
-
- while (drop != NULL) {
- if (strcmp(drop->table_name, name) == 0) {
- /* Already in the list */
-
- mutex_exit(&kernel_mutex);
-
- return(FALSE);
- }
-
- drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop);
- }
-
- drop = mem_alloc(sizeof(row_mysql_drop_t));
-
- drop->table_name = mem_strdup(name);
-
- UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop);
-
- /* fputs("InnoDB: Adding table ", stderr);
- ut_print_name(stderr, trx, TRUE, drop->table_name);
- fputs(" to background drop list\n", stderr); */
-
- mutex_exit(&kernel_mutex);
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Discards the tablespace of a table which stored in an .ibd file. Discarding
-means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_discard_tablespace_for_mysql(
-/*=============================*/
- const char* name, /*!< in: table name */
- trx_t* trx) /*!< in: transaction handle */
-{
- dict_foreign_t* foreign;
- dulint new_id;
- dict_table_t* table;
- ibool success;
- ulint err;
- pars_info_t* info = NULL;
-
- /* How do we prevent crashes caused by ongoing operations on
- the table? Old operations could try to access non-existent
- pages.
-
- 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
- MySQL table lock on the table before we can do DISCARD
- TABLESPACE. Then there are no running queries on the table.
-
- 2) Purge and rollback: we assign a new table id for the
- table. Since purge and rollback look for the table based on
- the table id, they see the table as 'dropped' and discard
- their operations.
-
- 3) Insert buffer: we remove all entries for the tablespace in
- the insert buffer tree; as long as the tablespace mem object
- does not exist, ongoing insert buffer page merges are
- discarded in buf0rea.c. If we recreate the tablespace mem
- object with IMPORT TABLESPACE later, then the tablespace will
- have the same id, but the tablespace_version field in the mem
- object is different, and ongoing old insert buffer page merges
- get discarded.
-
- 4) Linear readahead and random readahead: we use the same
- method as in 3) to discard ongoing operations.
-
- 5) FOREIGN KEY operations: if
- table->n_foreign_key_checks_running > 0, we do not allow the
- discard. We also reserve the data dictionary latch. */
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- trx->op_info = "discarding tablespace";
- trx_start_if_not_started(trx);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- row_mysql_lock_data_dictionary(trx);
-
- table = dict_table_get_low(name);
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
-
- goto funct_exit;
- }
-
- if (table->space == 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: is in the system tablespace 0"
- " which cannot be discarded\n", stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- if (table->n_foreign_key_checks_running > 0) {
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: You are trying to DISCARD table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there is a foreign key check"
- " running on it.\n"
- "InnoDB: Cannot discard the table.\n",
- stderr);
-
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign && foreign->foreign_table == table) {
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- if (foreign && trx->check_foreigns) {
-
- FILE* ef = dict_foreign_err_file;
-
- /* We only allow discarding a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- err = DB_CANNOT_DROP_CONSTRAINT;
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot DISCARD table ", ef);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "because it is referenced by ", ef);
- ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- goto funct_exit;
- }
-
- new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
-
- /* Remove all locks except the table-level S and X locks. */
- lock_remove_all_on_table(table, FALSE);
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "table_name", name);
- pars_info_add_dulint_literal(info, "new_id", new_id);
-
- err = que_eval_sql(info,
- "PROCEDURE DISCARD_TABLESPACE_PROC () IS\n"
- "old_id CHAR;\n"
- "BEGIN\n"
- "SELECT ID INTO old_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " COMMIT WORK;\n"
- " RETURN;\n"
- "END IF;\n"
- "UPDATE SYS_TABLES SET ID = :new_id\n"
- " WHERE ID = old_id;\n"
- "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = old_id;\n"
- "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = old_id;\n"
- "COMMIT WORK;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, NULL);
- trx->error_state = DB_SUCCESS;
- } else {
- dict_table_change_id_in_cache(table, new_id);
-
- success = fil_discard_tablespace(table->space);
-
- if (!success) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, NULL);
- trx->error_state = DB_SUCCESS;
-
- err = DB_ERROR;
- } else {
- /* Set the flag which tells that now it is legal to
- IMPORT a tablespace for this table */
- table->tablespace_discarded = TRUE;
- table->ibd_file_missing = TRUE;
- }
- }
-
-funct_exit:
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*****************************************************************//**
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_import_tablespace_for_mysql(
-/*============================*/
- const char* name, /*!< in: table name */
- trx_t* trx) /*!< in: transaction handle */
-{
- dict_table_t* table;
- ibool success;
- ib_uint64_t current_lsn;
- ulint err = DB_SUCCESS;
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- trx_start_if_not_started(trx);
-
- trx->op_info = "importing tablespace";
-
- current_lsn = log_get_lsn();
-
- /* It is possible, though very improbable, that the lsn's in the
- tablespace to be imported have risen above the current system lsn, if
- a lengthy purge, ibuf merge, or rollback was performed on a backup
- taken with ibbackup. If that is the case, reset page lsn's in the
- file. We assume that mysqld was shut down after it performed these
- cleanup operations on the .ibd file, so that it stamped the latest lsn
- to the FIL_PAGE_FILE_FLUSH_LSN in the first page of the .ibd file.
-
- TODO: reset also the trx id's in clustered index records and write
- a new space id to each data page. That would allow us to import clean
- .ibd files from another MySQL installation. */
-
- success = fil_reset_too_high_lsns(name, current_lsn);
-
- if (!success) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: cannot reset lsn's in table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
-
- err = DB_ERROR;
-
- row_mysql_lock_data_dictionary(trx);
-
- goto funct_exit;
- }
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- row_mysql_lock_data_dictionary(trx);
-
- table = dict_table_get_low(name);
-
- if (!table) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: does not exist in the InnoDB data dictionary\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
-
- err = DB_TABLE_NOT_FOUND;
-
- goto funct_exit;
- }
-
- if (table->space == 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: is in the system tablespace 0"
- " which cannot be imported\n", stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- if (!table->tablespace_discarded) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: you are trying to"
- " IMPORT a tablespace\n"
- "InnoDB: ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(", though you have not called DISCARD on it yet\n"
- "InnoDB: during the lifetime of the mysqld process!\n",
- stderr);
-
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* Play safe and remove all insert buffer entries, though we should
- have removed them already when DISCARD TABLESPACE was called */
-
- ibuf_delete_for_discarded_space(table->space);
-
- success = fil_open_single_table_tablespace(
- TRUE, table->space,
- table->flags == DICT_TF_COMPACT ? 0 : table->flags,
- table->name);
- if (success) {
- table->ibd_file_missing = FALSE;
- table->tablespace_discarded = FALSE;
- } else {
- if (table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: cannot find or open in the"
- " database directory the .ibd file of\n"
- "InnoDB: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
- }
-
- err = DB_ERROR;
- }
-
-funct_exit:
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Truncates a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_truncate_table_for_mysql(
-/*=========================*/
- dict_table_t* table, /*!< in: table handle */
- trx_t* trx) /*!< in: transaction handle */
-{
- dict_foreign_t* foreign;
- ulint err;
- mem_heap_t* heap;
- byte* buf;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- mtr_t mtr;
- dulint new_id;
- ulint recreate_space = 0;
- pars_info_t* info = NULL;
-
- /* How do we prevent crashes caused by ongoing operations on
- the table? Old operations could try to access non-existent
- pages.
-
- 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
- MySQL table lock on the table before we can do TRUNCATE
- TABLE. Then there are no running queries on the table. This is
- guaranteed, because in ha_innobase::store_lock(), we do not
- weaken the TL_WRITE lock requested by MySQL when executing
- SQLCOM_TRUNCATE.
-
- 2) Purge and rollback: we assign a new table id for the
- table. Since purge and rollback look for the table based on
- the table id, they see the table as 'dropped' and discard
- their operations.
-
- 3) Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
- so we do not have to remove insert buffer records, as the
- insert buffer works at a low level. If a freed page is later
- reallocated, the allocator will remove the ibuf entries for
- it.
-
- When we truncate *.ibd files by recreating them (analogous to
- DISCARD TABLESPACE), we remove all entries for the table in the
- insert buffer tree. This is not strictly necessary, because
- in 6) we will assign a new tablespace identifier, but we can
- free up some space in the system tablespace.
-
- 4) Linear readahead and random readahead: we use the same
- method as in 3) to discard ongoing operations. (This is only
- relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
-
- 5) FOREIGN KEY operations: if
- table->n_foreign_key_checks_running > 0, we do not allow the
- TRUNCATE. We also reserve the data dictionary latch.
-
- 6) Crash recovery: To prevent the application of pre-truncation
- redo log records on the truncated tablespace, we will assign
- a new tablespace identifier to the truncated tablespace. */
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- ut_ad(table);
-
- if (srv_created_new_raw) {
- fputs("InnoDB: A new raw disk partition was initialized:\n"
- "InnoDB: we do not allow database modifications"
- " by the user.\n"
- "InnoDB: Shut down mysqld and edit my.cnf so that newraw"
- " is replaced with raw.\n", stderr);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "truncating table";
-
- trx_start_if_not_started(trx);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- ut_a(trx->dict_operation_lock_mode == 0);
- /* Prevent foreign key checks etc. while we are truncating the
- table */
-
- row_mysql_lock_data_dictionary(trx);
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign && foreign->foreign_table == table) {
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- if (foreign && trx->check_foreigns) {
- FILE* ef = dict_foreign_err_file;
-
- /* We only allow truncating a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot truncate table ", ef);
- ut_print_name(ef, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- err = DB_ERROR;
- goto funct_exit;
- }
-
- /* TODO: could we replace the counter n_foreign_key_checks_running
- with lock checks on the table? Acquire here an exclusive lock on the
- table, and rewrite lock0lock.c and the lock wait in srv0srv.c so that
- they can cope with the table having been truncated here? Foreign key
- checks take an IS or IX lock on the table. */
-
- if (table->n_foreign_key_checks_running > 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Cannot truncate table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because there is a foreign key check"
- " running on it.\n",
- stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* Remove all locks except the table-level S and X locks. */
- lock_remove_all_on_table(table, FALSE);
-
- trx->table_id = table->id;
-
- if (table->space && !table->dir_path_of_temp_table) {
- /* Discard and create the single-table tablespace. */
- ulint space = table->space;
- ulint flags = fil_space_get_flags(space);
-
- if (flags != ULINT_UNDEFINED
- && fil_discard_tablespace(space)) {
-
- dict_index_t* index;
-
- space = 0;
-
- if (fil_create_new_single_table_tablespace(
- &space, table->name, FALSE, flags,
- FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: TRUNCATE TABLE %s failed to"
- " create a new tablespace\n",
- table->name);
- table->ibd_file_missing = 1;
- err = DB_ERROR;
- goto funct_exit;
- }
-
- recreate_space = space;
-
- /* Replace the space_id in the data dictionary cache.
- The persisent data dictionary (SYS_TABLES.SPACE
- and SYS_INDEXES.SPACE) are updated later in this
- function. */
- table->space = space;
- index = dict_table_get_first_index(table);
- do {
- index->space = space;
- index = dict_table_get_next_index(index);
- } while (index);
-
- mtr_start(&mtr);
- fsp_header_init(space,
- FIL_IBD_FILE_INITIAL_SIZE, &mtr);
- mtr_commit(&mtr);
- }
- }
-
- /* scan SYS_INDEXES for all indexes of the table */
- heap = mem_heap_create(800);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
- dict_index_copy_types(tuple, sys_index, 1);
-
- mtr_start(&mtr);
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &pcur, &mtr);
- for (;;) {
- rec_t* rec;
- const byte* field;
- ulint len;
- ulint root_page_no;
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* The end of SYS_INDEXES has been reached. */
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
-
- if (memcmp(buf, field, len) != 0) {
- /* End of indexes for the table (TABLE_ID mismatch). */
- break;
- }
-
- if (rec_get_deleted_flag(rec, FALSE)) {
- /* The index has been dropped. */
- goto next_rec;
- }
-
- /* This call may commit and restart mtr
- and reposition pcur. */
- root_page_no = dict_truncate_index_tree(table, recreate_space,
- &pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (root_page_no != FIL_NULL) {
- page_rec_write_index_page_no(
- rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
- root_page_no, &mtr);
- /* We will need to commit and restart the
- mini-transaction in order to avoid deadlocks.
- The dict_truncate_index_tree() call has allocated
- a page in this mini-transaction, and the rest of
- this loop could latch another index page. */
- mtr_commit(&mtr);
- mtr_start(&mtr);
- btr_pcur_restore_position(BTR_MODIFY_LEAF,
- &pcur, &mtr);
- }
-
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- mem_heap_free(heap);
-
- new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
-
- info = pars_info_create();
-
- pars_info_add_int4_literal(info, "space", (lint) table->space);
- pars_info_add_dulint_literal(info, "old_id", table->id);
- pars_info_add_dulint_literal(info, "new_id", new_id);
-
- err = que_eval_sql(info,
- "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES"
- " SET ID = :new_id, SPACE = :space\n"
- " WHERE ID = :old_id;\n"
- "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = :old_id;\n"
- "UPDATE SYS_INDEXES"
- " SET TABLE_ID = :new_id, SPACE = :space\n"
- " WHERE TABLE_ID = :old_id;\n"
- "COMMIT WORK;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, NULL);
- trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to assign a new identifier to table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: after truncating it. Background processes"
- " may corrupt the table!\n", stderr);
- err = DB_ERROR;
- } else {
- dict_table_change_id_in_cache(table, new_id);
- }
-
- /* MySQL calls ha_innobase::reset_auto_increment() which does
- the same thing. */
- dict_table_autoinc_lock(table);
- dict_table_autoinc_initialize(table, 1);
- dict_table_autoinc_unlock(table);
- dict_update_statistics(table);
-
- trx_commit_for_mysql(trx);
-
-funct_exit:
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- srv_wake_master_thread();
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Drops a table for MySQL. If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. If the data dictionary was not already locked
-by the transaction, the transaction will be committed. Otherwise, the
-data dictionary will remain locked.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_drop_table_for_mysql(
-/*=====================*/
- const char* name, /*!< in: table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool drop_db)/*!< in: TRUE=dropping whole database */
-{
- dict_foreign_t* foreign;
- dict_table_t* table;
- ulint space_id;
- ulint err;
- const char* table_name;
- ulint namelen;
- ibool locked_dictionary = FALSE;
- pars_info_t* info = NULL;
-
- ut_a(name != NULL);
-
- if (srv_created_new_raw) {
- fputs("InnoDB: A new raw disk partition was initialized:\n"
- "InnoDB: we do not allow database modifications"
- " by the user.\n"
- "InnoDB: Shut down mysqld and edit my.cnf so that newraw"
- " is replaced with raw.\n", stderr);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "dropping table";
-
- trx_start_if_not_started(trx);
-
- /* The table name is prefixed with the database name and a '/'.
- Certain table names starting with 'innodb_' have their special
- meaning regardless of the database name. Thus, we need to
- ignore the database name prefix in the comparisons. */
- table_name = strchr(name, '/');
- ut_a(table_name);
- table_name++;
- namelen = strlen(table_name) + 1;
-
- if (namelen == sizeof S_innodb_monitor
- && !memcmp(table_name, S_innodb_monitor,
- sizeof S_innodb_monitor)) {
-
- /* Table name equals "innodb_monitor":
- stop monitor prints */
-
- srv_print_innodb_monitor = FALSE;
- srv_print_innodb_lock_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_lock_monitor
- && !memcmp(table_name, S_innodb_lock_monitor,
- sizeof S_innodb_lock_monitor)) {
- srv_print_innodb_monitor = FALSE;
- srv_print_innodb_lock_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_tablespace_monitor
- && !memcmp(table_name, S_innodb_tablespace_monitor,
- sizeof S_innodb_tablespace_monitor)) {
-
- srv_print_innodb_tablespace_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_table_monitor
- && !memcmp(table_name, S_innodb_table_monitor,
- sizeof S_innodb_table_monitor)) {
-
- srv_print_innodb_table_monitor = FALSE;
- }
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- if (trx->dict_operation_lock_mode != RW_X_LATCH) {
- /* Prevent foreign key checks etc. while we are dropping the
- table */
-
- row_mysql_lock_data_dictionary(trx);
-
- locked_dictionary = TRUE;
- }
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- table = dict_table_get_low(name);
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" does not exist in the InnoDB internal\n"
- "InnoDB: data dictionary though MySQL is"
- " trying to drop it.\n"
- "InnoDB: Have you copied the .frm file"
- " of the table to the\n"
- "InnoDB: MySQL database directory"
- " from another database?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
- }
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign && foreign->foreign_table == table) {
-check_next_foreign:
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- if (foreign && trx->check_foreigns
- && !(drop_db && dict_tables_have_same_db(
- name, foreign->foreign_table_name))) {
- FILE* ef = dict_foreign_err_file;
-
- /* We only allow dropping a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- err = DB_CANNOT_DROP_CONSTRAINT;
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot drop table ", ef);
- ut_print_name(ef, trx, TRUE, name);
- fputs("\n"
- "because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- goto funct_exit;
- }
-
- if (foreign && trx->check_foreigns) {
- goto check_next_foreign;
- }
-
- if (table->n_mysql_handles_opened > 0) {
- ibool added;
-
- added = row_add_table_to_background_drop_list(table->name);
-
- if (added) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: MySQL is"
- " trying to drop table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there are still"
- " open handles to it.\n"
- "InnoDB: Adding the table to the"
- " background drop queue.\n",
- stderr);
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
- }
-
- goto funct_exit;
- }
-
- /* TODO: could we replace the counter n_foreign_key_checks_running
- with lock checks on the table? Acquire here an exclusive lock on the
- table, and rewrite lock0lock.c and the lock wait in srv0srv.c so that
- they can cope with the table having been dropped here? Foreign key
- checks take an IS or IX lock on the table. */
-
- if (table->n_foreign_key_checks_running > 0) {
-
- const char* table_name = table->name;
- ibool added;
-
- added = row_add_table_to_background_drop_list(table_name);
-
- if (added) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: You are trying to drop table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table_name);
- fputs("\n"
- "InnoDB: though there is a"
- " foreign key check running on it.\n"
- "InnoDB: Adding the table to"
- " the background drop queue.\n",
- stderr);
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
-
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
- }
-
- goto funct_exit;
- }
-
- /* Remove all locks there are on the table or its records */
- lock_remove_all_on_table(table, TRUE);
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->table_id = table->id;
-
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in deleting the dictionary data from system
- tables in Innobase. Deleting a row from SYS_INDEXES table also
- frees the file segments of the B-tree associated with the index. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "table_name", name);
-
- err = que_eval_sql(info,
- "PROCEDURE DROP_TABLE_PROC () IS\n"
- "sys_foreign_id CHAR;\n"
- "table_id CHAR;\n"
- "index_id CHAR;\n"
- "foreign_id CHAR;\n"
- "found INT;\n"
- "BEGIN\n"
- "SELECT ID INTO table_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " RETURN;\n"
- "END IF;\n"
- "found := 1;\n"
- "SELECT ID INTO sys_foreign_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = 'SYS_FOREIGN'\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "IF (:table_name = 'SYS_FOREIGN') THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "IF (:table_name = 'SYS_FOREIGN_COLS') THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "WHILE found = 1 LOOP\n"
- " SELECT ID INTO foreign_id\n"
- " FROM SYS_FOREIGN\n"
- " WHERE FOR_NAME = :table_name\n"
- " AND TO_BINARY(FOR_NAME)\n"
- " = TO_BINARY(:table_name)\n"
- " LOCK IN SHARE MODE;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "found := 1;\n"
- "WHILE found = 1 LOOP\n"
- " SELECT ID INTO index_id\n"
- " FROM SYS_INDEXES\n"
- " WHERE TABLE_ID = table_id\n"
- " LOCK IN SHARE MODE;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FIELDS\n"
- " WHERE INDEX_ID = index_id;\n"
- " DELETE FROM SYS_INDEXES\n"
- " WHERE ID = index_id\n"
- " AND TABLE_ID = table_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "DELETE FROM SYS_COLUMNS\n"
- "WHERE TABLE_ID = table_id;\n"
- "DELETE FROM SYS_TABLES\n"
- "WHERE ID = table_id;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err != DB_SUCCESS) {
- ut_a(err == DB_OUT_OF_FILE_SPACE);
-
- err = DB_MUST_GET_MORE_FILE_SPACE;
-
- row_mysql_handle_errors(&err, trx, NULL, NULL);
-
- ut_error;
- } else {
- ibool is_path;
- const char* name_or_path;
- mem_heap_t* heap;
-
- heap = mem_heap_create(200);
-
- /* Clone the name, in case it has been allocated
- from table->heap, which will be freed by
- dict_table_remove_from_cache(table) below. */
- name = mem_heap_strdup(heap, name);
- space_id = table->space;
-
- if (table->dir_path_of_temp_table != NULL) {
- is_path = TRUE;
- name_or_path = mem_heap_strdup(
- heap, table->dir_path_of_temp_table);
- } else {
- is_path = FALSE;
- name_or_path = name;
- }
-
- dict_table_remove_from_cache(table);
-
- if (dict_load_table(name) != NULL) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: not able to remove table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" from the dictionary cache!\n", stderr);
- err = DB_ERROR;
- }
-
- /* Do not drop possible .ibd tablespace if something went
- wrong: we do not want to delete valuable data of the user */
-
- if (err == DB_SUCCESS && space_id > 0) {
- if (!fil_space_for_table_exists_in_mem(space_id,
- name_or_path,
- is_path,
- FALSE, TRUE)) {
- err = DB_SUCCESS;
-
- fprintf(stderr,
- "InnoDB: We removed now the InnoDB"
- " internal data dictionary entry\n"
- "InnoDB: of table ");
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, ".\n");
- } else if (!fil_delete_tablespace(space_id)) {
- fprintf(stderr,
- "InnoDB: We removed now the InnoDB"
- " internal data dictionary entry\n"
- "InnoDB: of table ");
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, ".\n");
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: not able to"
- " delete tablespace %lu of table ",
- (ulong) space_id);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("!\n", stderr);
- err = DB_ERROR;
- }
- }
-
- mem_heap_free(heap);
- }
-funct_exit:
-
- if (locked_dictionary) {
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
- }
-
- trx->op_info = "";
-
- srv_wake_master_thread();
-
- return((int) err);
-}
-
-/*******************************************************************//**
-Drop all foreign keys in a database, see Bug#18942.
-Called at the end of row_drop_database_for_mysql().
-@return error code or DB_SUCCESS */
-static
-ulint
-drop_all_foreign_keys_in_db(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx) /*!< in: transaction handle */
-{
- pars_info_t* pinfo;
- ulint err;
-
- ut_a(name[strlen(name) - 1] == '/');
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "dbname", name);
-
-/** true if for_name is not prefixed with dbname */
-#define TABLE_NOT_IN_THIS_DB \
-"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname"
-
- err = que_eval_sql(pinfo,
- "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n"
- "foreign_id CHAR;\n"
- "for_name CHAR;\n"
- "found INT;\n"
- "DECLARE CURSOR cur IS\n"
- "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME >= :dbname\n"
- "LOCK IN SHARE MODE\n"
- "ORDER BY FOR_NAME;\n"
- "BEGIN\n"
- "found := 1;\n"
- "OPEN cur;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur INTO foreign_id, for_name;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n"
- " found := 0;\n"
- " ELSIF (1=1) THEN\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur;\n"
- "COMMIT WORK;\n"
- "END;\n",
- FALSE, /* do not reserve dict mutex,
- we are already holding it */
- trx);
-
- return(err);
-}
-
-/*********************************************************************//**
-Drops a database for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-row_drop_database_for_mysql(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx) /*!< in: transaction handle */
-{
- dict_table_t* table;
- char* table_name;
- int err = DB_SUCCESS;
- ulint namelen = strlen(name);
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- ut_a(name != NULL);
- ut_a(name[namelen - 1] == '/');
-
- trx->op_info = "dropping database";
-
- trx_start_if_not_started(trx);
-loop:
- row_mysql_lock_data_dictionary(trx);
-
- while ((table_name = dict_get_first_table_name_in_db(name))) {
- ut_a(memcmp(table_name, name, namelen) == 0);
-
- table = dict_table_get_low(table_name);
-
- ut_a(table);
-
- /* Wait until MySQL does not have any queries running on
- the table */
-
- if (table->n_mysql_handles_opened > 0) {
- row_mysql_unlock_data_dictionary(trx);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: MySQL is trying to"
- " drop database ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: though there are still"
- " open handles to table ", stderr);
- ut_print_name(stderr, trx, TRUE, table_name);
- fputs(".\n", stderr);
-
- os_thread_sleep(1000000);
-
- mem_free(table_name);
-
- goto loop;
- }
-
- err = row_drop_table_for_mysql(table_name, trx, TRUE);
- trx_commit_for_mysql(trx);
-
- if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error %lu for table ",
- (ulint) err);
- ut_print_name(stderr, trx, TRUE, table_name);
- putc('\n', stderr);
- mem_free(table_name);
- break;
- }
-
- mem_free(table_name);
- }
-
- if (err == DB_SUCCESS) {
- /* after dropping all tables try to drop all leftover
- foreign keys in case orphaned ones exist */
- err = (int) drop_all_foreign_keys_in_db(name, trx);
-
- if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error %d while "
- "dropping all foreign keys", err);
- }
- }
-
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL.
-@return TRUE if temporary table */
-static
-ibool
-row_is_mysql_tmp_table_name(
-/*========================*/
- const char* name) /*!< in: table name in the form
- 'database/tablename' */
-{
- return(strstr(name, "/#sql") != NULL);
- /* return(strstr(name, "/@0023sql") != NULL); */
-}
-
-/****************************************************************//**
-Delete a single constraint.
-@return error code or DB_SUCCESS */
-static
-int
-row_delete_constraint_low(
-/*======================*/
- const char* id, /*!< in: constraint id */
- trx_t* trx) /*!< in: transaction handle */
-{
- pars_info_t* info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", id);
-
- return((int) que_eval_sql(info,
- "PROCEDURE DELETE_CONSTRAINT () IS\n"
- "BEGIN\n"
- "DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n"
- "DELETE FROM SYS_FOREIGN WHERE ID = :id;\n"
- "END;\n"
- , FALSE, trx));
-}
-
-/****************************************************************//**
-Delete a single constraint.
-@return error code or DB_SUCCESS */
-static
-int
-row_delete_constraint(
-/*==================*/
- const char* id, /*!< in: constraint id */
- const char* database_name, /*!< in: database name, with the
- trailing '/' */
- mem_heap_t* heap, /*!< in: memory heap */
- trx_t* trx) /*!< in: transaction handle */
-{
- ulint err;
-
- /* New format constraints have ids <databasename>/<constraintname>. */
- err = row_delete_constraint_low(
- mem_heap_strcat(heap, database_name, id), trx);
-
- if ((err == DB_SUCCESS) && !strchr(id, '/')) {
- /* Old format < 4.0.18 constraints have constraint ids
- NUMBER_NUMBER. We only try deleting them if the
- constraint name does not contain a '/' character, otherwise
- deleting a new format constraint named 'foo/bar' from
- database 'baz' would remove constraint 'bar' from database
- 'foo', if it existed. */
-
- err = row_delete_constraint_low(id, trx);
- }
-
- return((int) err);
-}
-
-/*********************************************************************//**
-Renames a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_rename_table_for_mysql(
-/*=======================*/
- const char* old_name, /*!< in: old table name */
- const char* new_name, /*!< in: new table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool commit) /*!< in: if TRUE then commit trx */
-{
- dict_table_t* table;
- ulint err = DB_ERROR;
- mem_heap_t* heap = NULL;
- const char** constraints_to_drop = NULL;
- ulint n_constraints_to_drop = 0;
- ibool old_is_tmp, new_is_tmp;
- pars_info_t* info = NULL;
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- ut_a(old_name != NULL);
- ut_a(new_name != NULL);
-
- if (srv_created_new_raw || srv_force_recovery) {
- fputs("InnoDB: A new raw disk partition was initialized or\n"
- "InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that newraw"
- " is replaced\n"
- "InnoDB: with raw, and innodb_force_... is removed.\n",
- stderr);
-
- goto funct_exit;
- } else if (row_mysql_is_system_table(new_name)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to create a MySQL"
- " system table %s of type InnoDB.\n"
- "InnoDB: MySQL system tables must be"
- " of the MyISAM type!\n",
- new_name);
-
- goto funct_exit;
- }
-
- trx->op_info = "renaming table";
- trx_start_if_not_started(trx);
-
- old_is_tmp = row_is_mysql_tmp_table_name(old_name);
- new_is_tmp = row_is_mysql_tmp_table_name(new_name);
-
- table = dict_table_get_low(old_name);
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" does not exist in the InnoDB internal\n"
- "InnoDB: data dictionary though MySQL is"
- " trying to rename the table.\n"
- "InnoDB: Have you copied the .frm file"
- " of the table to the\n"
- "InnoDB: MySQL database directory"
- " from another database?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
- } else if (table->ibd_file_missing) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" does not have an .ibd file"
- " in the database directory.\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
- } else if (new_is_tmp) {
- /* MySQL is doing an ALTER TABLE command and it renames the
- original table to a temporary table name. We want to preserve
- the original foreign key constraint definitions despite the
- name change. An exception is those constraints for which
- the ALTER TABLE contained DROP FOREIGN KEY <foreign key id>.*/
-
- heap = mem_heap_create(100);
-
- err = dict_foreign_parse_drop_constraints(
- heap, trx, table, &n_constraints_to_drop,
- &constraints_to_drop);
-
- if (err != DB_SUCCESS) {
-
- goto funct_exit;
- }
- }
-
- /* We use the private SQL parser of Innobase to generate the query
- graphs needed in updating the dictionary data from system tables. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_table_name", new_name);
- pars_info_add_str_literal(info, "old_table_name", old_name);
-
- err = que_eval_sql(info,
- "PROCEDURE RENAME_TABLE () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES SET NAME = :new_table_name\n"
- " WHERE NAME = :old_table_name;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err != DB_SUCCESS) {
-
- goto end;
- } else if (!new_is_tmp) {
- /* Rename all constraints. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_table_name", new_name);
- pars_info_add_str_literal(info, "old_table_name", old_name);
-
- err = que_eval_sql(
- info,
- "PROCEDURE RENAME_CONSTRAINT_IDS () IS\n"
- "gen_constr_prefix CHAR;\n"
- "new_db_name CHAR;\n"
- "foreign_id CHAR;\n"
- "new_foreign_id CHAR;\n"
- "old_db_name_len INT;\n"
- "old_t_name_len INT;\n"
- "new_db_name_len INT;\n"
- "id_len INT;\n"
- "found INT;\n"
- "BEGIN\n"
- "found := 1;\n"
- "old_db_name_len := INSTR(:old_table_name, '/')-1;\n"
- "new_db_name_len := INSTR(:new_table_name, '/')-1;\n"
- "new_db_name := SUBSTR(:new_table_name, 0,\n"
- " new_db_name_len);\n"
- "old_t_name_len := LENGTH(:old_table_name);\n"
- "gen_constr_prefix := CONCAT(:old_table_name,\n"
- " '_ibfk_');\n"
- "WHILE found = 1 LOOP\n"
- " SELECT ID INTO foreign_id\n"
- " FROM SYS_FOREIGN\n"
- " WHERE FOR_NAME = :old_table_name\n"
- " AND TO_BINARY(FOR_NAME)\n"
- " = TO_BINARY(:old_table_name)\n"
- " LOCK IN SHARE MODE;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " UPDATE SYS_FOREIGN\n"
- " SET FOR_NAME = :new_table_name\n"
- " WHERE ID = foreign_id;\n"
- " id_len := LENGTH(foreign_id);\n"
- " IF (INSTR(foreign_id, '/') > 0) THEN\n"
- " IF (INSTR(foreign_id,\n"
- " gen_constr_prefix) > 0)\n"
- " THEN\n"
- " new_foreign_id :=\n"
- " CONCAT(:new_table_name,\n"
- " SUBSTR(foreign_id, old_t_name_len,\n"
- " id_len - old_t_name_len));\n"
- " ELSE\n"
- " new_foreign_id :=\n"
- " CONCAT(new_db_name,\n"
- " SUBSTR(foreign_id,\n"
- " old_db_name_len,\n"
- " id_len - old_db_name_len));\n"
- " END IF;\n"
- " UPDATE SYS_FOREIGN\n"
- " SET ID = new_foreign_id\n"
- " WHERE ID = foreign_id;\n"
- " UPDATE SYS_FOREIGN_COLS\n"
- " SET ID = new_foreign_id\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- " END IF;\n"
- "END LOOP;\n"
- "UPDATE SYS_FOREIGN SET REF_NAME = :new_table_name\n"
- "WHERE REF_NAME = :old_table_name\n"
- " AND TO_BINARY(REF_NAME)\n"
- " = TO_BINARY(:old_table_name);\n"
- "END;\n"
- , FALSE, trx);
-
- } else if (n_constraints_to_drop > 0) {
- /* Drop some constraints of tmp tables. */
-
- ulint db_name_len = dict_get_db_name_len(old_name) + 1;
- char* db_name = mem_heap_strdupl(heap, old_name,
- db_name_len);
- ulint i;
-
- for (i = 0; i < n_constraints_to_drop; i++) {
- err = row_delete_constraint(constraints_to_drop[i],
- db_name, heap, trx);
-
- if (err != DB_SUCCESS) {
- break;
- }
- }
- }
-
-end:
- if (err != DB_SUCCESS) {
- if (err == DB_DUPLICATE_KEY) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error; possible reasons:\n"
- "InnoDB: 1) Table rename would cause"
- " two FOREIGN KEY constraints\n"
- "InnoDB: to have the same internal name"
- " in case-insensitive comparison.\n"
- "InnoDB: 2) table ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs(" exists in the InnoDB internal data\n"
- "InnoDB: dictionary though MySQL is"
- " trying to rename table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" to it.\n"
- "InnoDB: Have you deleted the .frm file"
- " and not used DROP TABLE?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: If table ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs(" is a temporary table #sql..., then"
- " it can be that\n"
- "InnoDB: there are still queries running"
- " on the table, and it will be\n"
- "InnoDB: dropped automatically when"
- " the queries end.\n"
- "InnoDB: You can drop the orphaned table"
- " inside InnoDB by\n"
- "InnoDB: creating an InnoDB table with"
- " the same name in another\n"
- "InnoDB: database and copying the .frm file"
- " to the current database.\n"
- "InnoDB: Then MySQL thinks the table exists,"
- " and DROP TABLE will\n"
- "InnoDB: succeed.\n", stderr);
- }
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, NULL);
- trx->error_state = DB_SUCCESS;
- } else {
- /* The following call will also rename the .ibd data file if
- the table is stored in a single-table tablespace */
-
- if (!dict_table_rename_in_cache(table, new_name,
- !new_is_tmp)) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, NULL);
- trx->error_state = DB_SUCCESS;
- goto funct_exit;
- }
-
- /* We only want to switch off some of the type checking in
- an ALTER, not in a RENAME. */
-
- err = dict_load_foreigns(
- new_name, !old_is_tmp || trx->check_foreigns);
-
- if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
-
- if (old_is_tmp) {
- fputs(" InnoDB: Error: in ALTER TABLE ",
- stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs("\n"
- "InnoDB: has or is referenced"
- " in foreign key constraints\n"
- "InnoDB: which are not compatible"
- " with the new table definition.\n",
- stderr);
- } else {
- fputs(" InnoDB: Error: in RENAME TABLE"
- " table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs("\n"
- "InnoDB: is referenced in"
- " foreign key constraints\n"
- "InnoDB: which are not compatible"
- " with the new table definition.\n",
- stderr);
- }
-
- ut_a(dict_table_rename_in_cache(table,
- old_name, FALSE));
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, NULL);
- trx->error_state = DB_SUCCESS;
- }
- }
-
-funct_exit:
-
- if (commit) {
- trx_commit_for_mysql(trx);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
-in the read view of the current transaction.
-@return TRUE if ok */
-static
-ibool
-row_scan_and_check_index(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL */
- dict_index_t* index, /*!< in: index */
- ulint* n_rows) /*!< out: number of entries seen in the
- current consistent read */
-{
- dtuple_t* prev_entry = NULL;
- ulint matched_fields;
- ulint matched_bytes;
- byte* buf;
- ulint ret;
- rec_t* rec;
- ibool is_ok = TRUE;
- int cmp;
- ibool contains_null;
- ulint i;
- ulint cnt;
- mem_heap_t* heap = NULL;
- ulint n_ext;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
- rec_offs_init(offsets_);
-
- *n_rows = 0;
-
- if (!row_merge_is_index_usable(prebuilt->trx, index)) {
- /* A newly created index may lack some delete-marked
- records that may exist in the read view of
- prebuilt->trx. Thus, such indexes must not be
- accessed by consistent read. */
- return(is_ok);
- }
-
- buf = mem_alloc(UNIV_PAGE_SIZE);
- heap = mem_heap_create(100);
-
- /* Make a dummy template in prebuilt, which we will use
- in scanning the index entries */
-
- prebuilt->index = index;
- /* row_merge_is_index_usable() was already checked above. */
- prebuilt->index_usable = TRUE;
- prebuilt->sql_stat_start = TRUE;
- prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
- prebuilt->n_template = 0;
- prebuilt->need_to_access_clustered = FALSE;
-
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
- prebuilt->select_lock_type = LOCK_NONE;
- cnt = 1000;
-
- ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0);
-loop:
- /* Check thd->killed every 1,000 scanned rows */
- if (--cnt == 0) {
- if (trx_is_interrupted(prebuilt->trx)) {
- goto func_exit;
- }
- cnt = 1000;
- }
-
- switch (ret) {
- case DB_SUCCESS:
- break;
- default:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: CHECK TABLE on ", stderr);
- dict_index_name_print(stderr, prebuilt->trx, index);
- fprintf(stderr, " returned %lu\n", ret);
- /* fall through (this error is ignored by CHECK TABLE) */
- case DB_END_OF_INDEX:
-func_exit:
- mem_free(buf);
- mem_heap_free(heap);
-
- return(is_ok);
- }
-
- *n_rows = *n_rows + 1;
-
- /* row_search... returns the index record in buf, record origin offset
- within buf stored in the first 4 bytes, because we have built a dummy
- template */
-
- rec = buf + mach_read_from_4(buf);
-
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- if (prev_entry != NULL) {
- matched_fields = 0;
- matched_bytes = 0;
-
- cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
- &matched_fields,
- &matched_bytes);
- contains_null = FALSE;
-
- /* In a unique secondary index we allow equal key values if
- they contain SQL NULLs */
-
- for (i = 0;
- i < dict_index_get_n_ordering_defined_by_user(index);
- i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(prev_entry, i))) {
-
- contains_null = TRUE;
- }
- }
-
- if (cmp > 0) {
- fputs("InnoDB: index records in a wrong order in ",
- stderr);
-not_ok:
- dict_index_name_print(stderr,
- prebuilt->trx, index);
- fputs("\n"
- "InnoDB: prev record ", stderr);
- dtuple_print(stderr, prev_entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- is_ok = FALSE;
- } else if (dict_index_is_unique(index)
- && !contains_null
- && matched_fields
- >= dict_index_get_n_ordering_defined_by_user(
- index)) {
-
- fputs("InnoDB: duplicate key in ", stderr);
- goto not_ok;
- }
- }
-
- {
- mem_heap_t* tmp_heap = NULL;
-
- /* Empty the heap on each round. But preserve offsets[]
- for the row_rec_to_index_entry() call, by copying them
- into a separate memory heap when needed. */
- if (UNIV_UNLIKELY(offsets != offsets_)) {
- ulint size = rec_offs_get_n_alloc(offsets)
- * sizeof *offsets;
-
- tmp_heap = mem_heap_create(size);
- offsets = mem_heap_dup(tmp_heap, offsets, size);
- }
-
- mem_heap_empty(heap);
-
- prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec,
- index, offsets,
- &n_ext, heap);
-
- if (UNIV_LIKELY_NULL(tmp_heap)) {
- mem_heap_free(tmp_heap);
- }
- }
-
- ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
-
- goto loop;
-}
-
-/*********************************************************************//**
-Checks a table for corruption.
-@return DB_ERROR or DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_check_table_for_mysql(
-/*======================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- dict_table_t* table = prebuilt->table;
- dict_index_t* index;
- ulint n_rows;
- ulint n_rows_in_table = ULINT_UNDEFINED;
- ulint ret = DB_SUCCESS;
- ulint old_isolation_level;
-
- if (table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- table->name);
- return(DB_ERROR);
- }
-
- prebuilt->trx->op_info = "checking table";
-
- old_isolation_level = prebuilt->trx->isolation_level;
-
- /* We must run the index record counts at an isolation level
- >= READ COMMITTED, because a dirty read can see a wrong number
- of records in some index; to play safe, we use always
- REPEATABLE READ here */
-
- prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- /* Enlarge the fatal lock wait timeout during CHECK TABLE. */
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- /* fputs("Validating index ", stderr);
- ut_print_name(stderr, trx, FALSE, index->name);
- putc('\n', stderr); */
-
- if (!btr_validate_index(index, prebuilt->trx)) {
- ret = DB_ERROR;
- } else {
- if (!row_scan_and_check_index(prebuilt,index, &n_rows)){
- ret = DB_ERROR;
- }
-
- if (trx_is_interrupted(prebuilt->trx)) {
- ret = DB_INTERRUPTED;
- break;
- }
-
- /* fprintf(stderr, "%lu entries in index %s\n", n_rows,
- index->name); */
-
- if (index == dict_table_get_first_index(table)) {
- n_rows_in_table = n_rows;
- } else if (n_rows != n_rows_in_table) {
-
- ret = DB_ERROR;
-
- fputs("Error: ", stderr);
- dict_index_name_print(stderr,
- prebuilt->trx, index);
- fprintf(stderr,
- " contains %lu entries,"
- " should be %lu\n",
- (ulong) n_rows,
- (ulong) n_rows_in_table);
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- /* Restore the original isolation level */
- prebuilt->trx->isolation_level = old_isolation_level;
-
- /* We validate also the whole adaptive hash index for all tables
- at every CHECK TABLE */
-
- if (!btr_search_validate()) {
-
- ret = DB_ERROR;
- }
-
- /* Restore the fatal lock wait timeout after CHECK TABLE. */
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- prebuilt->trx->op_info = "";
-
- return(ret);
-}
-
-/*********************************************************************//**
-Determines if a table is a magic monitor table.
-@return TRUE if monitor table */
-UNIV_INTERN
-ibool
-row_is_magic_monitor_table(
-/*=======================*/
- const char* table_name) /*!< in: name of the table, in the
- form database/table_name */
-{
- const char* name; /* table_name without database/ */
- ulint len;
-
- name = strchr(table_name, '/');
- ut_a(name != NULL);
- name++;
- len = strlen(name) + 1;
-
- if (STR_EQ(name, len, S_innodb_monitor)
- || STR_EQ(name, len, S_innodb_lock_monitor)
- || STR_EQ(name, len, S_innodb_tablespace_monitor)
- || STR_EQ(name, len, S_innodb_table_monitor)
- || STR_EQ(name, len, S_innodb_mem_validate)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/innodb_plugin/row/row0purge.c b/storage/innodb_plugin/row/row0purge.c
deleted file mode 100644
index 500ebe571ab..00000000000
--- a/storage/innodb_plugin/row/row0purge.c
+++ /dev/null
@@ -1,689 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0purge.c
-Purge obsolete records
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0purge.h"
-
-#ifdef UNIV_NONINL
-#include "row0purge.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "row0vers.h"
-#include "row0mysql.h"
-#include "log0log.h"
-
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return own: purge node */
-UNIV_INTERN
-purge_node_t*
-row_purge_node_create(
-/*==================*/
- que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- purge_node_t* node;
-
- ut_ad(parent && heap);
-
- node = mem_heap_alloc(heap, sizeof(purge_node_t));
-
- node->common.type = QUE_NODE_PURGE;
- node->common.parent = parent;
-
- node->heap = mem_heap_create(256);
-
- return(node);
-}
-
-/***********************************************************//**
-Repositions the pcur in the purge node on the clustered index record,
-if found.
-@return TRUE if the record was found */
-static
-ibool
-row_purge_reposition_pcur(
-/*======================*/
- ulint mode, /*!< in: latching mode */
- purge_node_t* node, /*!< in: row purge node */
- mtr_t* mtr) /*!< in: mtr */
-{
- ibool found;
-
- if (node->found_clust) {
- found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
-
- return(found);
- }
-
- found = row_search_on_row_ref(&(node->pcur), mode, node->table,
- node->ref, mtr);
- node->found_clust = found;
-
- if (found) {
- btr_pcur_store_position(&(node->pcur), mtr);
- }
-
- return(found);
-}
-
-/***********************************************************//**
-Removes a delete marked clustered index record if possible.
-@return TRUE if success, or if not found, or if modified after the
-delete marking */
-static
-ibool
-row_purge_remove_clust_if_poss_low(
-/*===============================*/
- purge_node_t* node, /*!< in: row purge node */
- ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- dict_index_t* index;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ibool success;
- ulint err;
- mtr_t mtr;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- index = dict_table_get_first_index(node->table);
-
- pcur = &(node->pcur);
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- mtr_start(&mtr);
-
- success = row_purge_reposition_pcur(mode, node, &mtr);
-
- if (!success) {
- /* The record is already removed */
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- return(TRUE);
- }
-
- rec = btr_pcur_get_rec(pcur);
-
- if (0 != ut_dulint_cmp(node->roll_ptr, row_get_rec_roll_ptr(
- rec, index, rec_get_offsets(
- rec, index, offsets_,
- ULINT_UNDEFINED, &heap)))) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- /* Someone else has modified the record later: do not remove */
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- return(TRUE);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- RB_NONE, &mtr);
-
- if (err == DB_SUCCESS) {
- success = TRUE;
- } else if (err == DB_OUT_OF_FILE_SPACE) {
- success = FALSE;
- } else {
- ut_error;
- }
- }
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- return(success);
-}
-
-/***********************************************************//**
-Removes a clustered index record if it has not been modified after the delete
-marking. */
-static
-void
-row_purge_remove_clust_if_poss(
-/*===========================*/
- purge_node_t* node) /*!< in: row purge node */
-{
- ibool success;
- ulint n_tries = 0;
-
- /* fputs("Purge: Removing clustered record\n", stderr); */
-
- success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
- if (success) {
-
- return;
- }
-retry:
- success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- ut_a(success);
-}
-
-/***********************************************************//**
-Removes a secondary index entry if possible.
-@return TRUE if success or if not found */
-static
-ibool
-row_purge_remove_sec_if_poss_low(
-/*=============================*/
- purge_node_t* node, /*!< in: row purge node */
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry, /*!< in: index entry */
- ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
- BTR_MODIFY_TREE */
-{
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- ibool success;
- ibool old_has = 0; /* remove warning */
- ibool found;
- ulint err;
- mtr_t mtr;
- mtr_t mtr_vers;
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
- if (!found) {
- /* Not found. This is a legitimate condition. In a
- rollback, InnoDB will remove secondary recs that would
- be purged anyway. Then the actual purge will not find
- the secondary index record. Also, the purge itself is
- eager: if it comes to consider a secondary index
- record, and notices it does not need to exist in the
- index, it will remove it. Then if/when the purge
- comes to consider the secondary index record a second
- time, it will not exist any more in the index. */
-
- /* fputs("PURGE:........sec entry not found\n", stderr); */
- /* dtuple_print(stderr, entry); */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(TRUE);
- }
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- /* We should remove the index record if no later version of the row,
- which cannot be purged yet, requires its existence. If some requires,
- we should do nothing. */
-
- mtr_start(&mtr_vers);
-
- success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr_vers);
-
- if (success) {
- old_has = row_vers_old_has_index_entry(
- TRUE, btr_pcur_get_rec(&(node->pcur)),
- &mtr_vers, index, entry);
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
-
- if (!success || !old_has) {
- /* Remove the index record */
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- RB_NONE, &mtr);
- success = err == DB_SUCCESS;
- ut_a(success || err == DB_OUT_OF_FILE_SPACE);
- }
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(success);
-}
-
-/***********************************************************//**
-Removes a secondary index entry if possible. */
-UNIV_INLINE
-void
-row_purge_remove_sec_if_poss(
-/*=========================*/
- purge_node_t* node, /*!< in: row purge node */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry */
-{
- ibool success;
- ulint n_tries = 0;
-
- /* fputs("Purge: Removing secondary record\n", stderr); */
-
- success = row_purge_remove_sec_if_poss_low(node, index, entry,
- BTR_MODIFY_LEAF);
- if (success) {
-
- return;
- }
-retry:
- success = row_purge_remove_sec_if_poss_low(node, index, entry,
- BTR_MODIFY_TREE);
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- ut_a(success);
-}
-
-/***********************************************************//**
-Purges a delete marking of a record. */
-static
-void
-row_purge_del_mark(
-/*===============*/
- purge_node_t* node) /*!< in: row purge node */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
-
- ut_ad(node);
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- /* Build the index entry */
- entry = row_build_index_entry(node->row, NULL, index, heap);
- ut_a(entry);
- row_purge_remove_sec_if_poss(node, index, entry);
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- row_purge_remove_clust_if_poss(node);
-}
-
-/***********************************************************//**
-Purges an update of an existing record. Also purges an update of a delete
-marked record if that record contained an externally stored field. */
-static
-void
-row_purge_upd_exist_or_extern(
-/*==========================*/
- purge_node_t* node) /*!< in: row purge node */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- ibool is_insert;
- ulint rseg_id;
- ulint page_no;
- ulint offset;
- ulint i;
- mtr_t mtr;
-
- ut_ad(node);
-
- if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
-
- goto skip_secondaries;
- }
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- if (row_upd_changes_ord_field_binary(NULL, node->index,
- node->update)) {
- /* Build the older version of the index entry */
- entry = row_build_index_entry(node->row, NULL,
- index, heap);
- ut_a(entry);
- row_purge_remove_sec_if_poss(node, index, entry);
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
-skip_secondaries:
- /* Free possible externally stored fields */
- for (i = 0; i < upd_get_n_fields(node->update); i++) {
-
- const upd_field_t* ufield
- = upd_get_nth_field(node->update, i);
-
- if (dfield_is_ext(&ufield->new_val)) {
- buf_block_t* block;
- ulint internal_offset;
- byte* data_field;
-
- /* We use the fact that new_val points to
- node->undo_rec and get thus the offset of
- dfield data inside the undo record. Then we
- can calculate from node->roll_ptr the file
- address of the new_val data */
-
- internal_offset
- = ((const byte*)
- dfield_get_data(&ufield->new_val))
- - node->undo_rec;
-
- ut_a(internal_offset < UNIV_PAGE_SIZE);
-
- trx_undo_decode_roll_ptr(node->roll_ptr,
- &is_insert, &rseg_id,
- &page_no, &offset);
- mtr_start(&mtr);
-
- /* We have to acquire an X-latch to the clustered
- index tree */
-
- index = dict_table_get_first_index(node->table);
-
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- /* NOTE: we must also acquire an X-latch to the
- root page of the tree. We will need it when we
- free pages from the tree. If the tree is of height 1,
- the tree X-latch does NOT protect the root page,
- because it is also a leaf page. Since we will have a
- latch on an undo log page, we would break the
- latching order if we would only later latch the
- root page of such a tree! */
-
- btr_root_get(index, &mtr);
-
- /* We assume in purge of externally stored fields
- that the space id of the undo log record is 0! */
-
- block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- data_field = buf_block_get_frame(block)
- + offset + internal_offset;
-
- ut_a(dfield_get_len(&ufield->new_val)
- >= BTR_EXTERN_FIELD_REF_SIZE);
- btr_free_externally_stored_field(
- index,
- data_field + dfield_get_len(&ufield->new_val)
- - BTR_EXTERN_FIELD_REF_SIZE,
- NULL, NULL, NULL, 0, RB_NONE, &mtr);
- mtr_commit(&mtr);
- }
- }
-}
-
-/***********************************************************//**
-Parses the row reference and other info in a modify undo log record.
-@return TRUE if purge operation required: NOTE that then the CALLER
-must unfreeze data dictionary! */
-static
-ibool
-row_purge_parse_undo_rec(
-/*=====================*/
- purge_node_t* node, /*!< in: row undo node */
- ibool* updated_extern,
- /*!< out: TRUE if an externally stored field
- was updated */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_index_t* clust_index;
- byte* ptr;
- trx_t* trx;
- undo_no_t undo_no;
- dulint table_id;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- ulint info_bits;
- ulint type;
- ulint cmpl_info;
-
- ut_ad(node && thr);
-
- trx = thr_get_trx(thr);
-
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
- updated_extern, &undo_no, &table_id);
- node->rec_type = type;
-
- if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
-
- return(FALSE);
- }
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
- node->table = NULL;
-
- if (type == TRX_UNDO_UPD_EXIST_REC
- && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
-
- /* Purge requires no changes to indexes: we may return */
-
- return(FALSE);
- }
-
- /* Prevent DROP TABLE etc. from running when we are doing the purge
- for this row */
-
- row_mysql_freeze_data_dictionary(trx);
-
- mutex_enter(&(dict_sys->mutex));
-
- node->table = dict_table_get_on_id_low(table_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- if (node->table == NULL) {
- /* The table has been dropped: no need to do purge */
-err_exit:
- row_mysql_unfreeze_data_dictionary(trx);
- return(FALSE);
- }
-
- if (node->table->ibd_file_missing) {
- /* We skip purge of missing .ibd files */
-
- node->table = NULL;
-
- goto err_exit;
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- if (clust_index == NULL) {
- /* The table was corrupt in the data dictionary */
-
- goto err_exit;
- }
-
- ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
- node->heap);
-
- ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, trx,
- node->heap, &(node->update));
-
- /* Read to the partial row the fields that occur in indexes */
-
- if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- ptr = trx_undo_rec_get_partial_row(
- ptr, clust_index, &node->row,
- type == TRX_UNDO_UPD_DEL_REC,
- node->heap);
- }
-
- return(TRUE);
-}
-
-/***********************************************************//**
-Fetches an undo log record and does the purge for the recorded operation.
-If none left, or the current purge completed, returns the control to the
-parent node, which is always a query thread node.
-@return DB_SUCCESS if operation successfully completed, else error code */
-static
-ulint
-row_purge(
-/*======*/
- purge_node_t* node, /*!< in: row purge node */
- que_thr_t* thr) /*!< in: query thread */
-{
- roll_ptr_t roll_ptr;
- ibool purge_needed;
- ibool updated_extern;
- trx_t* trx;
-
- ut_ad(node && thr);
-
- trx = thr_get_trx(thr);
-
- node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
- &(node->reservation),
- node->heap);
- if (!node->undo_rec) {
- /* Purge completed for this query thread */
-
- thr->run_node = que_node_get_parent(node);
-
- return(DB_SUCCESS);
- }
-
- node->roll_ptr = roll_ptr;
-
- if (node->undo_rec == &trx_purge_dummy_rec) {
- purge_needed = FALSE;
- } else {
- purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
- thr);
- /* If purge_needed == TRUE, we must also remember to unfreeze
- data dictionary! */
- }
-
- if (purge_needed) {
- node->found_clust = FALSE;
-
- node->index = dict_table_get_next_index(
- dict_table_get_first_index(node->table));
-
- if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
- row_purge_del_mark(node);
-
- } else if (updated_extern
- || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
-
- row_purge_upd_exist_or_extern(node);
- }
-
- if (node->found_clust) {
- btr_pcur_close(&(node->pcur));
- }
-
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- /* Do some cleanup */
- trx_purge_rec_release(node->reservation);
- mem_heap_empty(node->heap);
-
- thr->run_node = node;
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************//**
-Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_purge_step(
-/*===========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- purge_node_t* node;
- ulint err;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
-
- err = row_purge(node, thr);
-
- ut_ad(err == DB_SUCCESS);
-
- return(thr);
-}
diff --git a/storage/innodb_plugin/row/row0row.c b/storage/innodb_plugin/row/row0row.c
deleted file mode 100644
index 128ac3ba3e8..00000000000
--- a/storage/innodb_plugin/row/row0row.c
+++ /dev/null
@@ -1,1168 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0row.c
-General row routines
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-
-#ifdef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
-#include "data0type.h"
-#include "dict0dict.h"
-#include "btr0btr.h"
-#include "ha_prototypes.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0ext.h"
-#include "row0upd.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-#include "ut0mem.h"
-
-/*********************************************************************//**
-Gets the offset of trx id field, in bytes relative to the origin of
-a clustered index record.
-@return offset of DATA_TRX_ID */
-UNIV_INTERN
-ulint
-row_get_trx_id_offset(
-/*==================*/
- const rec_t* rec __attribute__((unused)),
- /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ulint pos;
- ulint offset;
- ulint len;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
-
- offset = rec_get_nth_field_offs(offsets, pos, &len);
-
- ut_ad(len == DATA_TRX_ID_LEN);
-
- return(offset);
-}
-
-/*****************************************************************//**
-When an insert or purge to a table is performed, this function builds
-the entry to be inserted into or purged from an index on the table.
-@return index entry which should be inserted or purged, or NULL if the
-externally stored columns in the clustered index record are
-unavailable and ext != NULL */
-UNIV_INTERN
-dtuple_t*
-row_build_index_entry(
-/*==================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- row_ext_t* ext, /*!< in: externally stored column prefixes,
- or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the index entry is allocated */
-{
- dtuple_t* entry;
- ulint entry_len;
- ulint i;
-
- ut_ad(row && index && heap);
- ut_ad(dtuple_check_typed(row));
-
- entry_len = dict_index_get_n_fields(index);
- entry = dtuple_create(heap, entry_len);
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- dtuple_set_n_fields_cmp(entry, entry_len);
- /* There may only be externally stored columns
- in a clustered index B-tree of a user table. */
- ut_a(!ext);
- } else {
- dtuple_set_n_fields_cmp(
- entry, dict_index_get_n_unique_in_tree(index));
- }
-
- for (i = 0; i < entry_len; i++) {
- const dict_field_t* ind_field
- = dict_index_get_nth_field(index, i);
- const dict_col_t* col
- = ind_field->col;
- ulint col_no
- = dict_col_get_no(col);
- dfield_t* dfield
- = dtuple_get_nth_field(entry, i);
- const dfield_t* dfield2
- = dtuple_get_nth_field(row, col_no);
- ulint len
- = dfield_get_len(dfield2);
-
- dfield_copy(dfield, dfield2);
-
- if (dfield_is_null(dfield) || ind_field->prefix_len == 0) {
- continue;
- }
-
- /* If a column prefix index, take only the prefix.
- Prefix-indexed columns may be externally stored. */
- ut_ad(col->ord_part);
-
- if (UNIV_LIKELY_NULL(ext)) {
- /* See if the column is stored externally. */
- const byte* buf = row_ext_lookup(ext, col_no,
- &len);
- if (UNIV_LIKELY_NULL(buf)) {
- if (UNIV_UNLIKELY(buf == field_ref_zero)) {
- return(NULL);
- }
- dfield_set_data(dfield, buf, len);
- }
- } else if (dfield_is_ext(dfield)) {
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- len -= BTR_EXTERN_FIELD_REF_SIZE;
- ut_a(ind_field->prefix_len <= len
- || dict_index_is_clust(index));
- }
-
- len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- ind_field->prefix_len, len, dfield_get_data(dfield));
- dfield_set_len(dfield, len);
- }
-
- ut_ad(dtuple_check_typed(entry));
-
- return(entry);
-}
-
-/*******************************************************************//**
-An inverse function to row_build_index_entry. Builds a row from a
-record in a clustered index.
-@return own: row built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_build(
-/*======*/
- ulint type, /*!< in: ROW_COPY_POINTERS or
- ROW_COPY_DATA; the latter
- copies also the data fields to
- heap while the first only
- places pointers to data fields
- on the index page, and thus is
- more efficient */
- const dict_index_t* index, /*!< in: clustered index */
- const rec_t* rec, /*!< in: record in the clustered
- index; NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
- const dict_table_t* col_table,
- /*!< in: table, to check which
- externally stored columns
- occur in the ordering columns
- of an index, or NULL if
- index->table should be
- consulted instead */
- row_ext_t** ext, /*!< out, own: cache of
- externally stored column
- prefixes, or NULL */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
-{
- dtuple_t* row;
- const dict_table_t* table;
- ulint n_fields;
- ulint n_ext_cols;
- ulint* ext_cols = NULL; /* remove warning */
- ulint len;
- ulint row_len;
- byte* buf;
- ulint i;
- ulint j;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- ut_ad(index && rec && heap);
- ut_ad(dict_index_is_clust(index));
-
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &tmp_heap);
- } else {
- ut_ad(rec_offs_validate(rec, index, offsets));
- }
-
- if (type != ROW_COPY_POINTERS) {
- /* Take a copy of rec to heap */
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, (ulint*) offsets);
- }
-
- table = index->table;
- row_len = dict_table_get_n_cols(table);
-
- row = dtuple_create(heap, row_len);
-
- dict_table_copy_types(row, table);
-
- dtuple_set_info_bits(row, rec_get_info_bits(
- rec, dict_table_is_comp(table)));
-
- n_fields = rec_offs_n_fields(offsets);
- n_ext_cols = rec_offs_n_extern(offsets);
- if (n_ext_cols) {
- ext_cols = mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols);
- }
-
- for (i = j = 0; i < n_fields; i++) {
- dict_field_t* ind_field
- = dict_index_get_nth_field(index, i);
- const dict_col_t* col
- = dict_field_get_col(ind_field);
- ulint col_no
- = dict_col_get_no(col);
- dfield_t* dfield
- = dtuple_get_nth_field(row, col_no);
-
- if (ind_field->prefix_len == 0) {
-
- const byte* field = rec_get_nth_field(
- rec, offsets, i, &len);
-
- dfield_set_data(dfield, field, len);
- }
-
- if (rec_offs_nth_extern(offsets, i)) {
- dfield_set_ext(dfield);
-
- if (UNIV_LIKELY_NULL(col_table)) {
- ut_a(col_no
- < dict_table_get_n_cols(col_table));
- col = dict_table_get_nth_col(
- col_table, col_no);
- }
-
- if (col->ord_part) {
- /* We will have to fetch prefixes of
- externally stored columns that are
- referenced by column prefixes. */
- ext_cols[j++] = col_no;
- }
- }
- }
-
- ut_ad(dtuple_check_typed(row));
-
- if (j) {
- *ext = row_ext_create(j, ext_cols, row,
- dict_table_zip_size(index->table),
- heap);
- } else {
- *ext = NULL;
- }
-
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- return(row);
-}
-
-/*******************************************************************//**
-Converts an index record to a typed data tuple.
-@return index entry built; does not set info_bits, and the data fields
-in the entry will point directly to rec */
-UNIV_INTERN
-dtuple_t*
-row_rec_to_index_entry_low(
-/*=======================*/
- const rec_t* rec, /*!< in: record in the index */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
-{
- dtuple_t* entry;
- dfield_t* dfield;
- ulint i;
- const byte* field;
- ulint len;
- ulint rec_len;
-
- ut_ad(rec && heap && index);
- /* Because this function may be invoked by row0merge.c
- on a record whose header is in different format, the check
- rec_offs_validate(rec, index, offsets) must be avoided here. */
- ut_ad(n_ext);
- *n_ext = 0;
-
- rec_len = rec_offs_n_fields(offsets);
-
- entry = dtuple_create(heap, rec_len);
-
- dtuple_set_n_fields_cmp(entry,
- dict_index_get_n_unique_in_tree(index));
- ut_ad(rec_len == dict_index_get_n_fields(index));
-
- dict_index_copy_types(entry, index, rec_len);
-
- for (i = 0; i < rec_len; i++) {
-
- dfield = dtuple_get_nth_field(entry, i);
- field = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield_set_data(dfield, field, len);
-
- if (rec_offs_nth_extern(offsets, i)) {
- dfield_set_ext(dfield);
- (*n_ext)++;
- }
- }
-
- ut_ad(dtuple_check_typed(entry));
-
- return(entry);
-}
-
-/*******************************************************************//**
-Converts an index record to a typed data tuple. NOTE that externally
-stored (often big) fields are NOT copied to heap.
-@return own: index entry built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
- ulint type, /*!< in: ROW_COPY_DATA, or
- ROW_COPY_POINTERS: the former
- copies also the data fields to
- heap as the latter only places
- pointers to data fields on the
- index page */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the dtuple is used! */
- const dict_index_t* index, /*!< in: index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
-{
- dtuple_t* entry;
- byte* buf;
-
- ut_ad(rec && heap && index);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (type == ROW_COPY_DATA) {
- /* Take a copy of rec to heap */
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, offsets);
- }
-
- entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);
-
- dtuple_set_info_bits(entry,
- rec_get_info_bits(rec, rec_offs_comp(offsets)));
-
- return(entry);
-}
-
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record.
-@return own: row reference built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_build_row_ref(
-/*==============*/
- ulint type, /*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap, whereas the latter only places pointers
- to data fields on the index page */
- dict_index_t* index, /*!< in: secondary index */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- mem_heap_t* heap) /*!< in: memory heap from which the memory
- needed is allocated */
-{
- dict_table_t* table;
- dict_index_t* clust_index;
- dfield_t* dfield;
- dtuple_t* ref;
- const byte* field;
- ulint len;
- ulint ref_len;
- ulint pos;
- byte* buf;
- ulint clust_col_prefix_len;
- ulint i;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(index && rec && heap);
- ut_ad(!dict_index_is_clust(index));
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &tmp_heap);
- /* Secondary indexes must not contain externally stored columns. */
- ut_ad(!rec_offs_any_extern(offsets));
-
- if (type == ROW_COPY_DATA) {
- /* Take a copy of rec to heap */
-
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
-
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, offsets);
- }
-
- table = index->table;
-
- clust_index = dict_table_get_first_index(table);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
- ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- dfield_set_data(dfield, field, len);
-
- /* If the primary key contains a column prefix, then the
- secondary index may contain a longer prefix of the same
- column, or the full column, and we must adjust the length
- accordingly. */
-
- clust_col_prefix_len = dict_index_get_nth_field(
- clust_index, i)->prefix_len;
-
- if (clust_col_prefix_len > 0) {
- if (len != UNIV_SQL_NULL) {
-
- const dtype_t* dtype
- = dfield_get_type(dfield);
-
- dfield_set_len(dfield,
- dtype_get_at_most_n_mbchars(
- dtype->prtype,
- dtype->mbminlen,
- dtype->mbmaxlen,
- clust_col_prefix_len,
- len, (char*) field));
- }
- }
- }
-
- ut_ad(dtuple_check_typed(ref));
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- return(ref);
-}
-
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INTERN
-void
-row_build_row_ref_in_tuple(
-/*=======================*/
- dtuple_t* ref, /*!< in/out: row reference built;
- see the NOTE below! */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: the data fields in ref
- will point directly into this
- record, therefore, the buffer
- page of this record must be at
- least s-latched and the latch
- held as long as the row
- reference is used! */
- const dict_index_t* index, /*!< in: secondary index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index)
- or NULL */
- trx_t* trx) /*!< in: transaction */
-{
- const dict_index_t* clust_index;
- dfield_t* dfield;
- const byte* field;
- ulint len;
- ulint ref_len;
- ulint pos;
- ulint clust_col_prefix_len;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- ut_a(ref);
- ut_a(index);
- ut_a(rec);
- ut_ad(!dict_index_is_clust(index));
-
- if (UNIV_UNLIKELY(!index->table)) {
- fputs("InnoDB: table ", stderr);
-notfound:
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fputs(" for index ", stderr);
- ut_print_name(stderr, trx, FALSE, index->name);
- fputs(" not found\n", stderr);
- ut_error;
- }
-
- clust_index = dict_table_get_first_index(index->table);
-
- if (UNIV_UNLIKELY(!clust_index)) {
- fputs("InnoDB: clust index for table ", stderr);
- goto notfound;
- }
-
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
- } else {
- ut_ad(rec_offs_validate(rec, index, offsets));
- }
-
- /* Secondary indexes must not contain externally stored columns. */
- ut_ad(!rec_offs_any_extern(offsets));
- ref_len = dict_index_get_n_unique(clust_index);
-
- ut_ad(ref_len == dtuple_get_n_fields(ref));
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- dfield_set_data(dfield, field, len);
-
- /* If the primary key contains a column prefix, then the
- secondary index may contain a longer prefix of the same
- column, or the full column, and we must adjust the length
- accordingly. */
-
- clust_col_prefix_len = dict_index_get_nth_field(
- clust_index, i)->prefix_len;
-
- if (clust_col_prefix_len > 0) {
- if (len != UNIV_SQL_NULL) {
-
- const dtype_t* dtype
- = dfield_get_type(dfield);
-
- dfield_set_len(dfield,
- dtype_get_at_most_n_mbchars(
- dtype->prtype,
- dtype->mbminlen,
- dtype->mbmaxlen,
- clust_col_prefix_len,
- len, (char*) field));
- }
- }
- }
-
- ut_ad(dtuple_check_typed(ref));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***************************************************************//**
-Searches the clustered index record for a row, if we have the row reference.
-@return TRUE if found */
-UNIV_INTERN
-ibool
-row_search_on_row_ref(
-/*==================*/
- btr_pcur_t* pcur, /*!< out: persistent cursor, which must
- be closed by the caller */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- const dict_table_t* table, /*!< in: table */
- const dtuple_t* ref, /*!< in: row reference */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- ulint low_match;
- rec_t* rec;
- dict_index_t* index;
-
- ut_ad(dtuple_check_typed(ref));
-
- index = dict_table_get_first_index(table);
-
- ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index));
-
- btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
-
- low_match = btr_pcur_get_low_match(pcur);
-
- rec = btr_pcur_get_rec(pcur);
-
- if (page_rec_is_infimum(rec)) {
-
- return(FALSE);
- }
-
- if (low_match != dtuple_get_n_fields(ref)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved.
-@return record or NULL, if no record found */
-UNIV_INTERN
-rec_t*
-row_get_clust_rec(
-/*==============*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: secondary index */
- dict_index_t** clust_index,/*!< out: clustered index */
- mtr_t* mtr) /*!< in: mtr */
-{
- mem_heap_t* heap;
- dtuple_t* ref;
- dict_table_t* table;
- btr_pcur_t pcur;
- ibool found;
- rec_t* clust_rec;
-
- ut_ad(!dict_index_is_clust(index));
-
- table = index->table;
-
- heap = mem_heap_create(256);
-
- ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap);
-
- found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);
-
- clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL;
-
- mem_heap_free(heap);
-
- btr_pcur_close(&pcur);
-
- *clust_index = dict_table_get_first_index(table);
-
- return(clust_rec);
-}
-
-/***************************************************************//**
-Searches an index record.
-@return TRUE if found */
-UNIV_INTERN
-ibool
-row_search_index_entry(
-/*===================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry, /*!< in: index entry */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
- be closed by the caller */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint n_fields;
- ulint low_match;
- rec_t* rec;
-
- ut_ad(dtuple_check_typed(entry));
-
- btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
- low_match = btr_pcur_get_low_match(pcur);
-
- rec = btr_pcur_get_rec(pcur);
-
- n_fields = dtuple_get_n_fields(entry);
-
- return(!page_rec_is_infimum(rec) && low_match == n_fields);
-}
-
-#include <my_sys.h>
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) that is of
-type DATA_INT using "prtype" and writes the result to "buf".
-If the data is in unknown format, then nothing is written to "buf",
-0 is returned and "format_in_hex" is set to TRUE, otherwise
-"format_in_hex" is left untouched.
-Not more than "buf_size" bytes are written to "buf".
-The result is always '\0'-terminated (provided buf_size > 0) and the
-number of bytes that were written to "buf" is returned (including the
-terminating '\0').
-@return number of bytes that were written */
-static
-ulint
-row_raw_format_int(
-/*===============*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- ulint prtype, /*!< in: precise type */
- char* buf, /*!< out: output buffer */
- ulint buf_size, /*!< in: output buffer size
- in bytes */
- ibool* format_in_hex) /*!< out: should the data be
- formated in hex */
-{
- ulint ret;
-
- if (data_len <= sizeof(ullint)) {
-
- ullint value;
- ibool unsigned_type = prtype & DATA_UNSIGNED;
-
- value = mach_read_int_type((const byte*) data,
- data_len, unsigned_type);
-
- if (unsigned_type) {
-
- ret = ut_snprintf(buf, buf_size, "%llu",
- value) + 1;
- } else {
-
- ret = ut_snprintf(buf, buf_size, "%lld",
- (long long) value) + 1;
- }
-
- } else {
-
- *format_in_hex = TRUE;
- ret = 0;
- }
-
- return(ut_min(ret, buf_size));
-}
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) that is of
-type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "prtype" and writes the
-result to "buf".
-If the data is in binary format, then nothing is written to "buf",
-0 is returned and "format_in_hex" is set to TRUE, otherwise
-"format_in_hex" is left untouched.
-Not more than "buf_size" bytes are written to "buf".
-The result is always '\0'-terminated (provided buf_size > 0) and the
-number of bytes that were written to "buf" is returned (including the
-terminating '\0').
-@return number of bytes that were written */
-static
-ulint
-row_raw_format_str(
-/*===============*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- ulint prtype, /*!< in: precise type */
- char* buf, /*!< out: output buffer */
- ulint buf_size, /*!< in: output buffer size
- in bytes */
- ibool* format_in_hex) /*!< out: should the data be
- formated in hex */
-{
- ulint charset_coll;
-
- if (buf_size == 0) {
-
- return(0);
- }
-
- /* we assume system_charset_info is UTF-8 */
-
- charset_coll = dtype_get_charset_coll(prtype);
-
- if (UNIV_LIKELY(dtype_is_utf8(prtype))) {
-
- return(ut_str_sql_format(data, data_len, buf, buf_size));
- }
- /* else */
-
- if (charset_coll == DATA_MYSQL_BINARY_CHARSET_COLL) {
-
- *format_in_hex = TRUE;
- return(0);
- }
- /* else */
-
- return(innobase_raw_format(data, data_len, charset_coll,
- buf, buf_size));
-}
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) using
-"dict_field" and writes the result to "buf".
-Not more than "buf_size" bytes are written to "buf".
-The result is always NUL-terminated (provided buf_size is positive) and the
-number of bytes that were written to "buf" is returned (including the
-terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
-ulint
-row_raw_format(
-/*===========*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- const dict_field_t* dict_field, /*!< in: index field */
- char* buf, /*!< out: output buffer */
- ulint buf_size) /*!< in: output buffer size
- in bytes */
-{
- ulint mtype;
- ulint prtype;
- ulint ret;
- ibool format_in_hex;
-
- if (buf_size == 0) {
-
- return(0);
- }
-
- if (data_len == UNIV_SQL_NULL) {
-
- ret = ut_snprintf((char*) buf, buf_size, "NULL") + 1;
-
- return(ut_min(ret, buf_size));
- }
-
- mtype = dict_field->col->mtype;
- prtype = dict_field->col->prtype;
-
- format_in_hex = FALSE;
-
- switch (mtype) {
- case DATA_INT:
-
- ret = row_raw_format_int(data, data_len, prtype,
- buf, buf_size, &format_in_hex);
- break;
- case DATA_CHAR:
- case DATA_VARCHAR:
- case DATA_MYSQL:
- case DATA_VARMYSQL:
-
- ret = row_raw_format_str(data, data_len, prtype,
- buf, buf_size, &format_in_hex);
- break;
- /* XXX support more data types */
- default:
-
- format_in_hex = TRUE;
- }
-
- if (format_in_hex) {
-
- if (UNIV_LIKELY(buf_size > 2)) {
-
- memcpy(buf, "0x", 2);
- buf += 2;
- buf_size -= 2;
- ret = 2 + ut_raw_to_hex(data, data_len,
- buf, buf_size);
- } else {
-
- buf[0] = '\0';
- ret = 1;
- }
- }
-
- return(ret);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-#include "ut0dbg.h"
-
-void
-test_row_raw_format_int()
-{
- ulint ret;
- char buf[128];
- ibool format_in_hex;
-
-#define CALL_AND_TEST(data, data_len, prtype, buf, buf_size,\
- ret_expected, buf_expected, format_in_hex_expected)\
- do {\
- ibool ok = TRUE;\
- ulint i;\
- memset(buf, 'x', 10);\
- buf[10] = '\0';\
- format_in_hex = FALSE;\
- fprintf(stderr, "TESTING \"\\x");\
- for (i = 0; i < data_len; i++) {\
- fprintf(stderr, "%02hhX", data[i]);\
- }\
- fprintf(stderr, "\", %lu, %lu, %lu\n",\
- (ulint) data_len, (ulint) prtype,\
- (ulint) buf_size);\
- ret = row_raw_format_int(data, data_len, prtype,\
- buf, buf_size, &format_in_hex);\
- if (ret != ret_expected) {\
- fprintf(stderr, "expected ret %lu, got %lu\n",\
- (ulint) ret_expected, ret);\
- ok = FALSE;\
- }\
- if (strcmp((char*) buf, buf_expected) != 0) {\
- fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
- buf_expected, buf);\
- ok = FALSE;\
- }\
- if (format_in_hex != format_in_hex_expected) {\
- fprintf(stderr, "expected format_in_hex %d, got %d\n",\
- (int) format_in_hex_expected,\
- (int) format_in_hex);\
- ok = FALSE;\
- }\
- if (ok) {\
- fprintf(stderr, "OK: %lu, \"%s\" %d\n\n",\
- (ulint) ret, buf, (int) format_in_hex);\
- } else {\
- return;\
- }\
- } while (0)
-
-#if 1
- /* min values for signed 1-8 byte integers */
-
- CALL_AND_TEST("\x00", 1, 0,
- buf, sizeof(buf), 5, "-128", 0);
-
- CALL_AND_TEST("\x00\x00", 2, 0,
- buf, sizeof(buf), 7, "-32768", 0);
-
- CALL_AND_TEST("\x00\x00\x00", 3, 0,
- buf, sizeof(buf), 9, "-8388608", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00", 4, 0,
- buf, sizeof(buf), 12, "-2147483648", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, 0,
- buf, sizeof(buf), 14, "-549755813888", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, 0,
- buf, sizeof(buf), 17, "-140737488355328", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, 0,
- buf, sizeof(buf), 19, "-36028797018963968", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, 0,
- buf, sizeof(buf), 21, "-9223372036854775808", 0);
-
- /* min values for unsigned 1-8 byte integers */
-
- CALL_AND_TEST("\x00", 1, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00", 2, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00", 3, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00", 4, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- /* max values for signed 1-8 byte integers */
-
- CALL_AND_TEST("\xFF", 1, 0,
- buf, sizeof(buf), 4, "127", 0);
-
- CALL_AND_TEST("\xFF\xFF", 2, 0,
- buf, sizeof(buf), 6, "32767", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF", 3, 0,
- buf, sizeof(buf), 8, "8388607", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, 0,
- buf, sizeof(buf), 11, "2147483647", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, 0,
- buf, sizeof(buf), 13, "549755813887", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, 0,
- buf, sizeof(buf), 16, "140737488355327", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, 0,
- buf, sizeof(buf), 18, "36028797018963967", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, 0,
- buf, sizeof(buf), 20, "9223372036854775807", 0);
-
- /* max values for unsigned 1-8 byte integers */
-
- CALL_AND_TEST("\xFF", 1, DATA_UNSIGNED,
- buf, sizeof(buf), 4, "255", 0);
-
- CALL_AND_TEST("\xFF\xFF", 2, DATA_UNSIGNED,
- buf, sizeof(buf), 6, "65535", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF", 3, DATA_UNSIGNED,
- buf, sizeof(buf), 9, "16777215", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, DATA_UNSIGNED,
- buf, sizeof(buf), 11, "4294967295", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, DATA_UNSIGNED,
- buf, sizeof(buf), 14, "1099511627775", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, DATA_UNSIGNED,
- buf, sizeof(buf), 16, "281474976710655", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, DATA_UNSIGNED,
- buf, sizeof(buf), 18, "72057594037927935", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, DATA_UNSIGNED,
- buf, sizeof(buf), 21, "18446744073709551615", 0);
-
- /* some random values */
-
- CALL_AND_TEST("\x52", 1, 0,
- buf, sizeof(buf), 4, "-46", 0);
-
- CALL_AND_TEST("\x0E", 1, DATA_UNSIGNED,
- buf, sizeof(buf), 3, "14", 0);
-
- CALL_AND_TEST("\x62\xCE", 2, 0,
- buf, sizeof(buf), 6, "-7474", 0);
-
- CALL_AND_TEST("\x29\xD6", 2, DATA_UNSIGNED,
- buf, sizeof(buf), 6, "10710", 0);
-
- CALL_AND_TEST("\x7F\xFF\x90", 3, 0,
- buf, sizeof(buf), 5, "-112", 0);
-
- CALL_AND_TEST("\x00\xA1\x16", 3, DATA_UNSIGNED,
- buf, sizeof(buf), 6, "41238", 0);
-
- CALL_AND_TEST("\x7F\xFF\xFF\xF7", 4, 0,
- buf, sizeof(buf), 3, "-9", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x5C", 4, DATA_UNSIGNED,
- buf, sizeof(buf), 3, "92", 0);
-
- CALL_AND_TEST("\x7F\xFF\xFF\xFF\xFF\xFF\xDC\x63", 8, 0,
- buf, sizeof(buf), 6, "-9117", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x01\x64\x62", 8, DATA_UNSIGNED,
- buf, sizeof(buf), 6, "91234", 0);
-#endif
-
- /* speed test */
-
- speedo_t speedo;
- ulint i;
-
- speedo_reset(&speedo);
-
- for (i = 0; i < 1000000; i++) {
- row_raw_format_int("\x23", 1,
- 0, buf, sizeof(buf),
- &format_in_hex);
- row_raw_format_int("\x23", 1,
- DATA_UNSIGNED, buf, sizeof(buf),
- &format_in_hex);
-
- row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8,
- 0, buf, sizeof(buf),
- &format_in_hex);
- row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8,
- DATA_UNSIGNED, buf, sizeof(buf),
- &format_in_hex);
- }
-
- speedo_show(&speedo);
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innodb_plugin/row/row0sel.c b/storage/innodb_plugin/row/row0sel.c
deleted file mode 100644
index 3ef9726588e..00000000000
--- a/storage/innodb_plugin/row/row0sel.c
+++ /dev/null
@@ -1,4736 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/***************************************************//**
-@file row/row0sel.c
-Select
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0sel.h"
-
-#ifdef UNIV_NONINL
-#include "row0sel.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0undo.h"
-#include "trx0trx.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "mach0data.h"
-#include "que0que.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "row0vers.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "eval0eval.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-#include "row0mysql.h"
-#include "read0read.h"
-#include "buf0lru.h"
-#include "ha_prototypes.h"
-
-/* Maximum number of rows to prefetch; MySQL interface has another parameter */
-#define SEL_MAX_N_PREFETCH 16
-
-/* Number of rows fetched, after which to start prefetching; MySQL interface
-has another parameter */
-#define SEL_PREFETCH_LIMIT 1
-
-/* When a select has accessed about this many pages, it returns control back
-to que_run_threads: this is to allow canceling runaway queries */
-
-#define SEL_COST_LIMIT 100
-
-/* Flags for search shortcut */
-#define SEL_FOUND 0
-#define SEL_EXHAUSTED 1
-#define SEL_RETRY 2
-
-/********************************************************************//**
-Returns TRUE if the user-defined column in a secondary index record
-is alphabetically the same as the corresponding BLOB column in the clustered
-index record.
-NOTE: the comparison is NOT done as a binary comparison, but character
-fields are compared with collation!
-@return TRUE if the columns are equal */
-static
-ibool
-row_sel_sec_rec_is_for_blob(
-/*========================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint mbminlen, /*!< in: minimum length of a
- multi-byte character */
- ulint mbmaxlen, /*!< in: maximum length of a
- multi-byte character */
- const byte* clust_field, /*!< in: the locally stored part of
- the clustered index column, including
- the BLOB pointer; the clustered
- index record must be covered by
- a lock or a page latch to protect it
- against deletion (rollback or purge) */
- ulint clust_len, /*!< in: length of clust_field */
- const byte* sec_field, /*!< in: column in secondary index */
- ulint sec_len, /*!< in: length of sec_field */
- ulint zip_size) /*!< in: compressed page size, or 0 */
-{
- ulint len;
- byte buf[DICT_MAX_INDEX_COL_LEN];
-
- len = btr_copy_externally_stored_field_prefix(buf, sizeof buf,
- zip_size,
- clust_field, clust_len);
-
- if (UNIV_UNLIKELY(len == 0)) {
- /* The BLOB was being deleted as the server crashed.
- There should not be any secondary index records
- referring to this clustered index record, because
- btr_free_externally_stored_field() is called after all
- secondary index entries of the row have been purged. */
- return(FALSE);
- }
-
- len = dtype_get_at_most_n_mbchars(prtype, mbminlen, mbmaxlen,
- sec_len, len, (const char*) buf);
-
- return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
-}
-
-/********************************************************************//**
-Returns TRUE if the user-defined column values in a secondary index record
-are alphabetically the same as the corresponding columns in the clustered
-index record.
-NOTE: the comparison is NOT done as a binary comparison, but character
-fields are compared with collation!
-@return TRUE if the secondary record is equal to the corresponding
-fields in the clustered record, when compared with collation */
-static
-ibool
-row_sel_sec_rec_is_for_clust_rec(
-/*=============================*/
- const rec_t* sec_rec, /*!< in: secondary index record */
- dict_index_t* sec_index, /*!< in: secondary index */
- const rec_t* clust_rec, /*!< in: clustered index record;
- must be protected by a lock or
- a page latch against deletion
- in rollback or purge */
- dict_index_t* clust_index) /*!< in: clustered index */
-{
- const byte* sec_field;
- ulint sec_len;
- const byte* clust_field;
- ulint n;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint clust_offsets_[REC_OFFS_NORMAL_SIZE];
- ulint sec_offsets_[REC_OFFS_SMALL_SIZE];
- ulint* clust_offs = clust_offsets_;
- ulint* sec_offs = sec_offsets_;
- ibool is_equal = TRUE;
-
- rec_offs_init(clust_offsets_);
- rec_offs_init(sec_offsets_);
-
- if (rec_get_deleted_flag(clust_rec,
- dict_table_is_comp(clust_index->table))) {
-
- /* The clustered index record is delete-marked;
- it is not visible in the read view. Besides,
- if there are any externally stored columns,
- some of them may have already been purged. */
- return(FALSE);
- }
-
- clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
- ULINT_UNDEFINED, &heap);
- sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
- ULINT_UNDEFINED, &heap);
-
- n = dict_index_get_n_ordering_defined_by_user(sec_index);
-
- for (i = 0; i < n; i++) {
- const dict_field_t* ifield;
- const dict_col_t* col;
- ulint clust_pos;
- ulint clust_len;
- ulint len;
-
- ifield = dict_index_get_nth_field(sec_index, i);
- col = dict_field_get_col(ifield);
- clust_pos = dict_col_get_clust_pos(col, clust_index);
-
- clust_field = rec_get_nth_field(
- clust_rec, clust_offs, clust_pos, &clust_len);
- sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
-
- len = clust_len;
-
- if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL) {
-
- if (rec_offs_nth_extern(clust_offs, clust_pos)) {
- len -= BTR_EXTERN_FIELD_REF_SIZE;
- }
-
- len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- ifield->prefix_len, len, (char*) clust_field);
-
- if (rec_offs_nth_extern(clust_offs, clust_pos)
- && len < sec_len) {
- if (!row_sel_sec_rec_is_for_blob(
- col->mtype, col->prtype,
- col->mbminlen, col->mbmaxlen,
- clust_field, clust_len,
- sec_field, sec_len,
- dict_table_zip_size(
- clust_index->table))) {
- goto inequal;
- }
-
- continue;
- }
- }
-
- if (0 != cmp_data_data(col->mtype, col->prtype,
- clust_field, len,
- sec_field, sec_len)) {
-inequal:
- is_equal = FALSE;
- goto func_exit;
- }
- }
-
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(is_equal);
-}
-
-/*********************************************************************//**
-Creates a select node struct.
-@return own: select node struct */
-UNIV_INTERN
-sel_node_t*
-sel_node_create(
-/*============*/
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- sel_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(sel_node_t));
- node->common.type = QUE_NODE_SELECT;
- node->state = SEL_NODE_OPEN;
-
- node->plans = NULL;
-
- return(node);
-}
-
-/*********************************************************************//**
-Frees the memory private to a select node when a query graph is freed,
-does not free the heap where the node was originally created. */
-UNIV_INTERN
-void
-sel_node_free_private(
-/*==================*/
- sel_node_t* node) /*!< in: select node struct */
-{
- ulint i;
- plan_t* plan;
-
- if (node->plans != NULL) {
- for (i = 0; i < node->n_tables; i++) {
- plan = sel_node_get_nth_plan(node, i);
-
- btr_pcur_close(&(plan->pcur));
- btr_pcur_close(&(plan->clust_pcur));
-
- if (plan->old_vers_heap) {
- mem_heap_free(plan->old_vers_heap);
- }
- }
- }
-}
-
-/*********************************************************************//**
-Evaluates the values in a select list. If there are aggregate functions,
-their argument value is added to the aggregate total. */
-UNIV_INLINE
-void
-sel_eval_select_list(
-/*=================*/
- sel_node_t* node) /*!< in: select node */
-{
- que_node_t* exp;
-
- exp = node->select_list;
-
- while (exp) {
- eval_exp(exp);
-
- exp = que_node_get_next(exp);
- }
-}
-
-/*********************************************************************//**
-Assigns the values in the select list to the possible into-variables in
-SELECT ... INTO ... */
-UNIV_INLINE
-void
-sel_assign_into_var_values(
-/*=======================*/
- sym_node_t* var, /*!< in: first variable in a list of variables */
- sel_node_t* node) /*!< in: select node */
-{
- que_node_t* exp;
-
- if (var == NULL) {
-
- return;
- }
-
- exp = node->select_list;
-
- while (var) {
- ut_ad(exp);
-
- eval_node_copy_val(var->alias, exp);
-
- exp = que_node_get_next(exp);
- var = que_node_get_next(var);
- }
-}
-
-/*********************************************************************//**
-Resets the aggregate value totals in the select list of an aggregate type
-query. */
-UNIV_INLINE
-void
-sel_reset_aggregate_vals(
-/*=====================*/
- sel_node_t* node) /*!< in: select node */
-{
- func_node_t* func_node;
-
- ut_ad(node->is_aggregate);
-
- func_node = node->select_list;
-
- while (func_node) {
- eval_node_set_int_val(func_node, 0);
-
- func_node = que_node_get_next(func_node);
- }
-
- node->aggregate_already_fetched = FALSE;
-}
-
-/*********************************************************************//**
-Copies the input variable values when an explicit cursor is opened. */
-UNIV_INLINE
-void
-row_sel_copy_input_variable_vals(
-/*=============================*/
- sel_node_t* node) /*!< in: select node */
-{
- sym_node_t* var;
-
- var = UT_LIST_GET_FIRST(node->copy_variables);
-
- while (var) {
- eval_node_copy_val(var, var->alias);
-
- var->indirection = NULL;
-
- var = UT_LIST_GET_NEXT(col_var_list, var);
- }
-}
-
-/*********************************************************************//**
-Fetches the column values from a record. */
-static
-void
-row_sel_fetch_columns(
-/*==================*/
- dict_index_t* index, /*!< in: record index */
- const rec_t* rec, /*!< in: record in a clustered or non-clustered
- index; must be protected by a page latch */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- sym_node_t* column) /*!< in: first column in a column list, or
- NULL */
-{
- dfield_t* val;
- ulint index_type;
- ulint field_no;
- const byte* data;
- ulint len;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (dict_index_is_clust(index)) {
- index_type = SYM_CLUST_FIELD_NO;
- } else {
- index_type = SYM_SEC_FIELD_NO;
- }
-
- while (column) {
- mem_heap_t* heap = NULL;
- ibool needs_copy;
-
- field_no = column->field_nos[index_type];
-
- if (field_no != ULINT_UNDEFINED) {
-
- if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
- field_no))) {
-
- /* Copy an externally stored field to the
- temporary heap */
-
- heap = mem_heap_create(1);
-
- data = btr_rec_copy_externally_stored_field(
- rec, offsets,
- dict_table_zip_size(index->table),
- field_no, &len, heap);
-
- ut_a(len != UNIV_SQL_NULL);
-
- needs_copy = TRUE;
- } else {
- data = rec_get_nth_field(rec, offsets,
- field_no, &len);
-
- if (len == UNIV_SQL_NULL) {
- len = UNIV_SQL_NULL;
- }
-
- needs_copy = column->copy_val;
- }
-
- if (needs_copy) {
- eval_node_copy_and_alloc_val(column, data,
- len);
- } else {
- val = que_node_get_val(column);
- dfield_set_data(val, data, len);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-}
-
-/*********************************************************************//**
-Allocates a prefetch buffer for a column when prefetch is first time done. */
-static
-void
-sel_col_prefetch_buf_alloc(
-/*=======================*/
- sym_node_t* column) /*!< in: symbol table node for a column */
-{
- sel_buf_t* sel_buf;
- ulint i;
-
- ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
-
- column->prefetch_buf = mem_alloc(SEL_MAX_N_PREFETCH
- * sizeof(sel_buf_t));
- for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
- sel_buf = column->prefetch_buf + i;
-
- sel_buf->data = NULL;
-
- sel_buf->val_buf_size = 0;
- }
-}
-
-/*********************************************************************//**
-Frees a prefetch buffer for a column, including the dynamically allocated
-memory for data stored there. */
-UNIV_INTERN
-void
-sel_col_prefetch_buf_free(
-/*======================*/
- sel_buf_t* prefetch_buf) /*!< in, own: prefetch buffer */
-{
- sel_buf_t* sel_buf;
- ulint i;
-
- for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
- sel_buf = prefetch_buf + i;
-
- if (sel_buf->val_buf_size > 0) {
-
- mem_free(sel_buf->data);
- }
- }
-}
-
-/*********************************************************************//**
-Pops the column values for a prefetched, cached row from the column prefetch
-buffers and places them to the val fields in the column nodes. */
-static
-void
-sel_pop_prefetched_row(
-/*===================*/
- plan_t* plan) /*!< in: plan node for a table */
-{
- sym_node_t* column;
- sel_buf_t* sel_buf;
- dfield_t* val;
- byte* data;
- ulint len;
- ulint val_buf_size;
-
- ut_ad(plan->n_rows_prefetched > 0);
-
- column = UT_LIST_GET_FIRST(plan->columns);
-
- while (column) {
- val = que_node_get_val(column);
-
- if (!column->copy_val) {
- /* We did not really push any value for the
- column */
-
- ut_ad(!column->prefetch_buf);
- ut_ad(que_node_get_val_buf_size(column) == 0);
- ut_d(dfield_set_null(val));
-
- goto next_col;
- }
-
- ut_ad(column->prefetch_buf);
- ut_ad(!dfield_is_ext(val));
-
- sel_buf = column->prefetch_buf + plan->first_prefetched;
-
- data = sel_buf->data;
- len = sel_buf->len;
- val_buf_size = sel_buf->val_buf_size;
-
- /* We must keep track of the allocated memory for
- column values to be able to free it later: therefore
- we swap the values for sel_buf and val */
-
- sel_buf->data = dfield_get_data(val);
- sel_buf->len = dfield_get_len(val);
- sel_buf->val_buf_size = que_node_get_val_buf_size(column);
-
- dfield_set_data(val, data, len);
- que_node_set_val_buf_size(column, val_buf_size);
-next_col:
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-
- plan->n_rows_prefetched--;
-
- plan->first_prefetched++;
-}
-
-/*********************************************************************//**
-Pushes the column values for a prefetched, cached row to the column prefetch
-buffers from the val fields in the column nodes. */
-UNIV_INLINE
-void
-sel_push_prefetched_row(
-/*====================*/
- plan_t* plan) /*!< in: plan node for a table */
-{
- sym_node_t* column;
- sel_buf_t* sel_buf;
- dfield_t* val;
- byte* data;
- ulint len;
- ulint pos;
- ulint val_buf_size;
-
- if (plan->n_rows_prefetched == 0) {
- pos = 0;
- plan->first_prefetched = 0;
- } else {
- pos = plan->n_rows_prefetched;
-
- /* We have the convention that pushing new rows starts only
- after the prefetch stack has been emptied: */
-
- ut_ad(plan->first_prefetched == 0);
- }
-
- plan->n_rows_prefetched++;
-
- ut_ad(pos < SEL_MAX_N_PREFETCH);
-
- column = UT_LIST_GET_FIRST(plan->columns);
-
- while (column) {
- if (!column->copy_val) {
- /* There is no sense to push pointers to database
- page fields when we do not keep latch on the page! */
-
- goto next_col;
- }
-
- if (!column->prefetch_buf) {
- /* Allocate a new prefetch buffer */
-
- sel_col_prefetch_buf_alloc(column);
- }
-
- sel_buf = column->prefetch_buf + pos;
-
- val = que_node_get_val(column);
-
- data = dfield_get_data(val);
- len = dfield_get_len(val);
- val_buf_size = que_node_get_val_buf_size(column);
-
- /* We must keep track of the allocated memory for
- column values to be able to free it later: therefore
- we swap the values for sel_buf and val */
-
- dfield_set_data(val, sel_buf->data, sel_buf->len);
- que_node_set_val_buf_size(column, sel_buf->val_buf_size);
-
- sel_buf->data = data;
- sel_buf->len = len;
- sel_buf->val_buf_size = val_buf_size;
-next_col:
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-}
-
-/*********************************************************************//**
-Builds a previous version of a clustered index record for a consistent read
-@return DB_SUCCESS or error code */
-static
-ulint
-row_sel_build_prev_vers(
-/*====================*/
- read_view_t* read_view, /*!< in: read view */
- dict_index_t* index, /*!< in: plan node for table */
- rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
- rec_get_offsets(rec, plan->index) */
- mem_heap_t** offset_heap, /*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t** old_vers_heap, /*!< out: old version heap to use */
- rec_t** old_vers, /*!< out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint err;
-
- if (*old_vers_heap) {
- mem_heap_empty(*old_vers_heap);
- } else {
- *old_vers_heap = mem_heap_create(512);
- }
-
- err = row_vers_build_for_consistent_read(
- rec, mtr, index, offsets, read_view, offset_heap,
- *old_vers_heap, old_vers);
- return(err);
-}
-
-/*********************************************************************//**
-Builds the last committed version of a clustered index record for a
-semi-consistent read.
-@return DB_SUCCESS or error code */
-static
-ulint
-row_sel_build_committed_vers_for_mysql(
-/*===================================*/
- dict_index_t* clust_index, /*!< in: clustered index */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
- const rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
- rec_get_offsets(rec, clust_index) */
- mem_heap_t** offset_heap, /*!< in/out: memory heap from which
- the offsets are allocated */
- const rec_t** old_vers, /*!< out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint err;
-
- if (prebuilt->old_vers_heap) {
- mem_heap_empty(prebuilt->old_vers_heap);
- } else {
- prebuilt->old_vers_heap = mem_heap_create(200);
- }
-
- err = row_vers_build_for_semi_consistent_read(
- rec, mtr, clust_index, offsets, offset_heap,
- prebuilt->old_vers_heap, old_vers);
- return(err);
-}
-
-/*********************************************************************//**
-Tests the conditions which determine when the index segment we are searching
-through has been exhausted.
-@return TRUE if row passed the tests */
-UNIV_INLINE
-ibool
-row_sel_test_end_conds(
-/*===================*/
- plan_t* plan) /*!< in: plan for the table; the column values must
- already have been retrieved and the right sides of
- comparisons evaluated */
-{
- func_node_t* cond;
-
- /* All conditions in end_conds are comparisons of a column to an
- expression */
-
- cond = UT_LIST_GET_FIRST(plan->end_conds);
-
- while (cond) {
- /* Evaluate the left side of the comparison, i.e., get the
- column value if there is an indirection */
-
- eval_sym(cond->args);
-
- /* Do the comparison */
-
- if (!eval_cmp(cond)) {
-
- return(FALSE);
- }
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Tests the other conditions.
-@return TRUE if row passed the tests */
-UNIV_INLINE
-ibool
-row_sel_test_other_conds(
-/*=====================*/
- plan_t* plan) /*!< in: plan for the table; the column values must
- already have been retrieved */
-{
- func_node_t* cond;
-
- cond = UT_LIST_GET_FIRST(plan->other_conds);
-
- while (cond) {
- eval_exp(cond);
-
- if (!eval_node_get_ibool_val(cond)) {
-
- return(FALSE);
- }
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Retrieves the clustered index record corresponding to a record in a
-non-clustered index. Does the necessary locking.
-@return DB_SUCCESS or error code */
-static
-ulint
-row_sel_get_clust_rec(
-/*==================*/
- sel_node_t* node, /*!< in: select_node */
- plan_t* plan, /*!< in: plan node for table */
- rec_t* rec, /*!< in: record in a non-clustered index */
- que_thr_t* thr, /*!< in: query thread */
- rec_t** out_rec,/*!< out: clustered record or an old version of
- it, NULL if the old version did not exist
- in the read view, i.e., it was a fresh
- inserted version */
- mtr_t* mtr) /*!< in: mtr used to get access to the
- non-clustered record; the same mtr is used to
- access the clustered index */
-{
- dict_index_t* index;
- rec_t* clust_rec;
- rec_t* old_vers;
- ulint err;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- *out_rec = NULL;
-
- offsets = rec_get_offsets(rec,
- btr_pcur_get_btr_cur(&plan->pcur)->index,
- offsets, ULINT_UNDEFINED, &heap);
-
- row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
-
- index = dict_table_get_first_index(plan->table);
-
- btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
- BTR_SEARCH_LEAF, &plan->clust_pcur,
- 0, mtr);
-
- clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
-
- /* Note: only if the search ends up on a non-infimum record is the
- low_match value the real match to the search tuple */
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(&(plan->clust_pcur))
- < dict_index_get_n_unique(index)) {
-
- ut_a(rec_get_deleted_flag(rec,
- dict_table_is_comp(plan->table)));
- ut_a(node->read_view);
-
- /* In a rare case it is possible that no clust rec is found
- for a delete-marked secondary index record: if in row0umod.c
- in row_undo_mod_remove_clust_low() we have already removed
- the clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case we know that the
- clustered index record did not exist in the read view of
- trx. */
-
- goto func_exit;
- }
-
- offsets = rec_get_offsets(clust_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!node->read_view) {
- /* Try to place a lock on the index record */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED isolation level
- we lock only the record, i.e., next-key locking is
- not used. */
- ulint lock_type;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED) {
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = lock_clust_rec_read_check_and_lock(
- 0, btr_pcur_get_block(&plan->clust_pcur),
- clust_rec, index, offsets,
- node->row_lock_mode, lock_type, thr);
-
- if (err != DB_SUCCESS) {
-
- goto err_exit;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- old_vers = NULL;
-
- if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
- node->read_view)) {
-
- err = row_sel_build_prev_vers(
- node->read_view, index, clust_rec,
- &offsets, &heap, &plan->old_vers_heap,
- &old_vers, mtr);
-
- if (err != DB_SUCCESS) {
-
- goto err_exit;
- }
-
- clust_rec = old_vers;
-
- if (clust_rec == NULL) {
- goto func_exit;
- }
- }
-
- /* If we had to go to an earlier version of row or the
- secondary index record is delete marked, then it may be that
- the secondary index record corresponding to clust_rec
- (or old_vers) is not rec; in that case we must ignore
- such row because in our snapshot rec would not have existed.
- Remember that from rec we cannot see directly which transaction
- id corresponds to it: we have to go to the clustered index
- record. A query where we want to fetch all rows where
- the secondary index value is in some interval would return
- a wrong result if we would not drop rows which we come to
- visit through secondary index records that would not really
- exist in our snapshot. */
-
- if ((old_vers
- || rec_get_deleted_flag(rec, dict_table_is_comp(
- plan->table)))
- && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
- clust_rec, index)) {
- goto func_exit;
- }
- }
-
- /* Fetch the columns needed in test conditions. The clustered
- index record is protected by a page latch that was acquired
- when plan->clust_pcur was positioned. The latch will not be
- released until mtr_commit(mtr). */
-
- row_sel_fetch_columns(index, clust_rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
- *out_rec = clust_rec;
-func_exit:
- err = DB_SUCCESS;
-err_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/*********************************************************************//**
-Sets a lock on a record.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-ulint
-sel_set_rec_lock(
-/*=============*/
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint mode, /*!< in: lock mode */
- ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOC_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- ulint err;
-
- trx = thr_get_trx(thr);
-
- if (UT_LIST_GET_LEN(trx->trx_locks) > 10000) {
- if (buf_LRU_buf_pool_running_out()) {
-
- return(DB_LOCK_TABLE_FULL);
- }
- }
-
- if (dict_index_is_clust(index)) {
- err = lock_clust_rec_read_check_and_lock(
- 0, block, rec, index, offsets, mode, type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, block, rec, index, offsets, mode, type, thr);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Opens a pcur to a table index. */
-static
-void
-row_sel_open_pcur(
-/*==============*/
- plan_t* plan, /*!< in: table plan */
- ibool search_latch_locked,
- /*!< in: TRUE if the thread currently
- has the search latch locked in
- s-mode */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- func_node_t* cond;
- que_node_t* exp;
- ulint n_fields;
- ulint has_search_latch = 0; /* RW_S_LATCH or 0 */
- ulint i;
-
- if (search_latch_locked) {
- has_search_latch = RW_S_LATCH;
- }
-
- index = plan->index;
-
- /* Calculate the value of the search tuple: the exact match columns
- get their expressions evaluated when we evaluate the right sides of
- end_conds */
-
- cond = UT_LIST_GET_FIRST(plan->end_conds);
-
- while (cond) {
- eval_exp(que_node_get_next(cond->args));
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
-
- if (plan->n_exact_match < n_fields) {
- /* There is a non-exact match field which must be
- evaluated separately */
-
- eval_exp(plan->tuple_exps[n_fields - 1]);
- }
-
- for (i = 0; i < n_fields; i++) {
- exp = plan->tuple_exps[i];
-
- dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
- que_node_get_val(exp));
- }
-
- /* Open pcur to the index */
-
- btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
- BTR_SEARCH_LEAF, &plan->pcur,
- has_search_latch, mtr);
- } else {
- /* Open the cursor to the start or the end of the index
- (FALSE: no init) */
-
- btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
- &(plan->pcur), FALSE, mtr);
- }
-
- ut_ad(plan->n_rows_prefetched == 0);
- ut_ad(plan->n_rows_fetched == 0);
- ut_ad(plan->cursor_at_end == FALSE);
-
- plan->pcur_is_open = TRUE;
-}
-
-/*********************************************************************//**
-Restores a stored pcur position to a table index.
-@return TRUE if the cursor should be moved to the next record after we
-return from this function (moved to the previous, in the case of a
-descending cursor) without processing again the current cursor
-record */
-static
-ibool
-row_sel_restore_pcur_pos(
-/*=====================*/
- plan_t* plan, /*!< in: table plan */
- mtr_t* mtr) /*!< in: mtr */
-{
- ibool equal_position;
- ulint relative_position;
-
- ut_ad(!plan->cursor_at_end);
-
- relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
-
- equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF,
- &(plan->pcur), mtr);
-
- /* If the cursor is traveling upwards, and relative_position is
-
- (1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
- yet on the successor of the page infimum;
- (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
- first record GREATER than the predecessor of a page supremum; we have
- not yet processed the cursor record: no need to move the cursor to the
- next record;
- (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
- last record LESS or EQUAL to the old stored user record; (a) if
- equal_position is FALSE, this means that the cursor is now on a record
- less than the old user record, and we must move to the next record;
- (b) if equal_position is TRUE, then if
- plan->stored_cursor_rec_processed is TRUE, we must move to the next
- record, else there is no need to move the cursor. */
-
- if (plan->asc) {
- if (relative_position == BTR_PCUR_ON) {
-
- if (equal_position) {
-
- return(plan->stored_cursor_rec_processed);
- }
-
- return(TRUE);
- }
-
- ut_ad(relative_position == BTR_PCUR_AFTER
- || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
-
- return(FALSE);
- }
-
- /* If the cursor is traveling downwards, and relative_position is
-
- (1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
- the last record LESS than the successor of a page infimum; we have not
- processed the cursor record: no need to move the cursor;
- (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
- first record GREATER than the predecessor of a page supremum; we have
- processed the cursor record: we should move the cursor to the previous
- record;
- (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
- last record LESS or EQUAL to the old stored user record; (a) if
- equal_position is FALSE, this means that the cursor is now on a record
- less than the old user record, and we need not move to the previous
- record; (b) if equal_position is TRUE, then if
- plan->stored_cursor_rec_processed is TRUE, we must move to the previous
- record, else there is no need to move the cursor. */
-
- if (relative_position == BTR_PCUR_BEFORE
- || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
-
- return(FALSE);
- }
-
- if (relative_position == BTR_PCUR_ON) {
-
- if (equal_position) {
-
- return(plan->stored_cursor_rec_processed);
- }
-
- return(FALSE);
- }
-
- ut_ad(relative_position == BTR_PCUR_AFTER
- || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Resets a plan cursor to a closed state. */
-UNIV_INLINE
-void
-plan_reset_cursor(
-/*==============*/
- plan_t* plan) /*!< in: plan */
-{
- plan->pcur_is_open = FALSE;
- plan->cursor_at_end = FALSE;
- plan->n_rows_fetched = 0;
- plan->n_rows_prefetched = 0;
-}
-
-/*********************************************************************//**
-Tries to do a shortcut to fetch a clustered index record with a unique key,
-using the hash index if possible (not always).
-@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
-static
-ulint
-row_sel_try_search_shortcut(
-/*========================*/
- sel_node_t* node, /*!< in: select node for a consistent read */
- plan_t* plan, /*!< in: plan for a unique search in clustered
- index */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ulint ret;
- rec_offs_init(offsets_);
-
- index = plan->index;
-
- ut_ad(node->read_view);
- ut_ad(plan->unique_search);
- ut_ad(!plan->must_get_clust);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- row_sel_open_pcur(plan, TRUE, mtr);
-
- rec = btr_pcur_get_rec(&(plan->pcur));
-
- if (!page_rec_is_user_rec(rec)) {
-
- return(SEL_RETRY);
- }
-
- ut_ad(plan->mode == PAGE_CUR_GE);
-
- /* As the cursor is now placed on a user record after a search with
- the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
- fields in the user record matched to the search tuple */
-
- if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
-
- return(SEL_EXHAUSTED);
- }
-
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (dict_index_is_clust(index)) {
- if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
- node->read_view)) {
- ret = SEL_RETRY;
- goto func_exit;
- }
- } else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
-
- ret = SEL_RETRY;
- goto func_exit;
- }
-
- /* Test the deleted flag. */
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
-
- ret = SEL_EXHAUSTED;
- goto func_exit;
- }
-
- /* Fetch the columns needed in test conditions. The index
- record is protected by a page latch that was acquired when
- plan->pcur was positioned. The latch will not be released
- until mtr_commit(mtr). */
-
- row_sel_fetch_columns(index, rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
-
- /* Test the rest of search conditions */
-
- if (!row_sel_test_other_conds(plan)) {
-
- ret = SEL_EXHAUSTED;
- goto func_exit;
- }
-
- ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
-
- plan->n_rows_fetched++;
- ret = SEL_FOUND;
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(ret);
-}
-
-/*********************************************************************//**
-Performs a select step.
-@return DB_SUCCESS or error code */
-static
-ulint
-row_sel(
-/*====*/
- sel_node_t* node, /*!< in: select node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_index_t* index;
- plan_t* plan;
- mtr_t mtr;
- ibool moved;
- rec_t* rec;
- rec_t* old_vers;
- rec_t* clust_rec;
- ibool search_latch_locked;
- ibool consistent_read;
-
- /* The following flag becomes TRUE when we are doing a
- consistent read from a non-clustered index and we must look
- at the clustered index to find out the previous delete mark
- state of the non-clustered record: */
-
- ibool cons_read_requires_clust_rec = FALSE;
- ulint cost_counter = 0;
- ibool cursor_just_opened;
- ibool must_go_to_next;
- ibool mtr_has_extra_clust_latch = FALSE;
- /* TRUE if the search was made using
- a non-clustered index, and we had to
- access the clustered record: now &mtr
- contains a clustered index latch, and
- &mtr must be committed before we move
- to the next non-clustered record */
- ulint found_flag;
- ulint err;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(thr->run_node == node);
-
- search_latch_locked = FALSE;
-
- if (node->read_view) {
- /* In consistent reads, we try to do with the hash index and
- not to use the buffer page get. This is to reduce memory bus
- load resulting from semaphore operations. The search latch
- will be s-locked when we access an index with a unique search
- condition, but not locked when we access an index with a
- less selective search condition. */
-
- consistent_read = TRUE;
- } else {
- consistent_read = FALSE;
- }
-
-table_loop:
- /* TABLE LOOP
- ----------
- This is the outer major loop in calculating a join. We come here when
- node->fetch_table changes, and after adding a row to aggregate totals
- and, of course, when this function is called. */
-
- ut_ad(mtr_has_extra_clust_latch == FALSE);
-
- plan = sel_node_get_nth_plan(node, node->fetch_table);
- index = plan->index;
-
- if (plan->n_rows_prefetched > 0) {
- sel_pop_prefetched_row(plan);
-
- goto next_table_no_mtr;
- }
-
- if (plan->cursor_at_end) {
- /* The cursor has already reached the result set end: no more
- rows to process for this table cursor, as also the prefetch
- stack was empty */
-
- ut_ad(plan->pcur_is_open);
-
- goto table_exhausted_no_mtr;
- }
-
- /* Open a cursor to index, or restore an open cursor position */
-
- mtr_start(&mtr);
-
- if (consistent_read && plan->unique_search && !plan->pcur_is_open
- && !plan->must_get_clust
- && !plan->table->big_rows) {
- if (!search_latch_locked) {
- rw_lock_s_lock(&btr_search_latch);
-
- search_latch_locked = TRUE;
- } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
-
- /* There is an x-latch request waiting: release the
- s-latch for a moment; as an s-latch here is often
- kept for some 10 searches before being released,
- a waiting x-latch request would block other threads
- from acquiring an s-latch for a long time, lowering
- performance significantly in multiprocessors. */
-
- rw_lock_s_unlock(&btr_search_latch);
- rw_lock_s_lock(&btr_search_latch);
- }
-
- found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
-
- if (found_flag == SEL_FOUND) {
-
- goto next_table;
-
- } else if (found_flag == SEL_EXHAUSTED) {
-
- goto table_exhausted;
- }
-
- ut_ad(found_flag == SEL_RETRY);
-
- plan_reset_cursor(plan);
-
- mtr_commit(&mtr);
- mtr_start(&mtr);
- }
-
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
-
- search_latch_locked = FALSE;
- }
-
- if (!plan->pcur_is_open) {
- /* Evaluate the expressions to build the search tuple and
- open the cursor */
-
- row_sel_open_pcur(plan, search_latch_locked, &mtr);
-
- cursor_just_opened = TRUE;
-
- /* A new search was made: increment the cost counter */
- cost_counter++;
- } else {
- /* Restore pcur position to the index */
-
- must_go_to_next = row_sel_restore_pcur_pos(plan, &mtr);
-
- cursor_just_opened = FALSE;
-
- if (must_go_to_next) {
- /* We have already processed the cursor record: move
- to the next */
-
- goto next_rec;
- }
- }
-
-rec_loop:
- /* RECORD LOOP
- -----------
- In this loop we use pcur and try to fetch a qualifying row, and
- also fill the prefetch buffer for this table if n_rows_fetched has
- exceeded a threshold. While we are inside this loop, the following
- holds:
- (1) &mtr is started,
- (2) pcur is positioned and open.
-
- NOTE that if cursor_just_opened is TRUE here, it means that we came
- to this point right after row_sel_open_pcur. */
-
- ut_ad(mtr_has_extra_clust_latch == FALSE);
-
- rec = btr_pcur_get_rec(&(plan->pcur));
-
- /* PHASE 1: Set a lock if specified */
-
- if (!node->asc && cursor_just_opened
- && !page_rec_is_supremum(rec)) {
-
- /* When we open a cursor for a descending search, we must set
- a next-key lock on the successor record: otherwise it would
- be possible to insert new records next to the cursor position,
- and it might be that these new records should appear in the
- search result set, resulting in the phantom problem. */
-
- if (!consistent_read) {
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED isolation
- level, we lock only the record, i.e., next-key
- locking is not used. */
-
- rec_t* next_rec = page_rec_get_next(rec);
- ulint lock_type;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- offsets = rec_get_offsets(next_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level
- == TRX_ISO_READ_COMMITTED) {
-
- if (page_rec_is_supremum(next_rec)) {
-
- goto skip_lock;
- }
-
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
- next_rec, index, offsets,
- node->row_lock_mode,
- lock_type, thr);
-
- if (err != DB_SUCCESS) {
- /* Note that in this case we will store in pcur
- the PREDECESSOR of the record we are waiting
- the lock for */
-
- goto lock_wait_or_error;
- }
- }
- }
-
-skip_lock:
- if (page_rec_is_infimum(rec)) {
-
- /* The infimum record on a page cannot be in the result set,
- and neither can a record lock be placed on it: we skip such
- a record. We also increment the cost counter as we may have
- processed yet another page of index. */
-
- cost_counter++;
-
- goto next_rec;
- }
-
- if (!consistent_read) {
- /* Try to place a lock on the index record */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED isolation level,
- we lock only the record, i.e., next-key locking is
- not used. */
-
- ulint lock_type;
- trx_t* trx;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- trx = thr_get_trx(thr);
-
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED) {
-
- if (page_rec_is_supremum(rec)) {
-
- goto next_rec;
- }
-
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
- rec, index, offsets,
- node->row_lock_mode, lock_type, thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
-
- if (page_rec_is_supremum(rec)) {
-
- /* A page supremum record cannot be in the result set: skip
- it now when we have placed a possible lock on it */
-
- goto next_rec;
- }
-
- ut_ad(page_rec_is_user_rec(rec));
-
- if (cost_counter > SEL_COST_LIMIT) {
-
- /* Now that we have placed the necessary locks, we can stop
- for a while and store the cursor position; NOTE that if we
- would store the cursor position BEFORE placing a record lock,
- it might happen that the cursor would jump over some records
- that another transaction could meanwhile insert adjacent to
- the cursor: this would result in the phantom problem. */
-
- goto stop_for_a_while;
- }
-
- /* PHASE 2: Check a mixed index mix id if needed */
-
- if (plan->unique_search && cursor_just_opened) {
-
- ut_ad(plan->mode == PAGE_CUR_GE);
-
- /* As the cursor is now placed on a user record after a search
- with the mode PAGE_CUR_GE, the up_match field in the cursor
- tells how many fields in the user record matched to the search
- tuple */
-
- if (btr_pcur_get_up_match(&(plan->pcur))
- < plan->n_exact_match) {
- goto table_exhausted;
- }
-
- /* Ok, no need to test end_conds or mix id */
-
- }
-
- /* We are ready to look at a possible new index entry in the result
- set: the cursor is now placed on a user record */
-
- /* PHASE 3: Get previous version in a consistent read */
-
- cons_read_requires_clust_rec = FALSE;
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (consistent_read) {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- if (dict_index_is_clust(index)) {
-
- if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
- node->read_view)) {
-
- err = row_sel_build_prev_vers(
- node->read_view, index, rec,
- &offsets, &heap, &plan->old_vers_heap,
- &old_vers, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- if (old_vers == NULL) {
- offsets = rec_get_offsets(
- rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- /* Fetch the columns needed in
- test conditions. The clustered
- index record is protected by a
- page latch that was acquired
- by row_sel_open_pcur() or
- row_sel_restore_pcur_pos().
- The latch will not be released
- until mtr_commit(mtr). */
-
- row_sel_fetch_columns(
- index, rec, offsets,
- UT_LIST_GET_FIRST(
- plan->columns));
-
- if (!row_sel_test_end_conds(plan)) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- rec = old_vers;
- }
- } else if (!lock_sec_rec_cons_read_sees(rec,
- node->read_view)) {
- cons_read_requires_clust_rec = TRUE;
- }
- }
-
- /* PHASE 4: Test search end conditions and deleted flag */
-
- /* Fetch the columns needed in test conditions. The record is
- protected by a page latch that was acquired by
- row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch
- will not be released until mtr_commit(mtr). */
-
- row_sel_fetch_columns(index, rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
-
- /* Test the selection end conditions: these can only contain columns
- which already are found in the index, even though the index might be
- non-clustered */
-
- if (plan->unique_search && cursor_just_opened) {
-
- /* No test necessary: the test was already made above */
-
- } else if (!row_sel_test_end_conds(plan)) {
-
- goto table_exhausted;
- }
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))
- && !cons_read_requires_clust_rec) {
-
- /* The record is delete marked: we can skip it if this is
- not a consistent read which might see an earlier version
- of a non-clustered index record */
-
- if (plan->unique_search) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- /* PHASE 5: Get the clustered index record, if needed and if we did
- not do the search using the clustered index */
-
- if (plan->must_get_clust || cons_read_requires_clust_rec) {
-
- /* It was a non-clustered index and we must fetch also the
- clustered index record */
-
- err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
- &mtr);
- mtr_has_extra_clust_latch = TRUE;
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- /* Retrieving the clustered record required a search:
- increment the cost counter */
-
- cost_counter++;
-
- if (clust_rec == NULL) {
- /* The record did not exist in the read view */
- ut_ad(consistent_read);
-
- goto next_rec;
- }
-
- if (rec_get_deleted_flag(clust_rec,
- dict_table_is_comp(plan->table))) {
-
- /* The record is delete marked: we can skip it */
-
- goto next_rec;
- }
-
- if (node->can_get_updated) {
-
- btr_pcur_store_position(&(plan->clust_pcur), &mtr);
- }
- }
-
- /* PHASE 6: Test the rest of search conditions */
-
- if (!row_sel_test_other_conds(plan)) {
-
- if (plan->unique_search) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- /* PHASE 7: We found a new qualifying row for the current table; push
- the row if prefetch is on, or move to the next table in the join */
-
- plan->n_rows_fetched++;
-
- ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
-
- if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
- || plan->unique_search || plan->no_prefetch
- || plan->table->big_rows) {
-
- /* No prefetch in operation: go to the next table */
-
- goto next_table;
- }
-
- sel_push_prefetched_row(plan);
-
- if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
-
- /* The prefetch buffer is now full */
-
- sel_pop_prefetched_row(plan);
-
- goto next_table;
- }
-
-next_rec:
- ut_ad(!search_latch_locked);
-
- if (mtr_has_extra_clust_latch) {
-
- /* We must commit &mtr if we are moving to the next
- non-clustered index record, because we could break the
- latching order if we would access a different clustered
- index page right away without releasing the previous. */
-
- goto commit_mtr_for_a_while;
- }
-
- if (node->asc) {
- moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
- } else {
- moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
- }
-
- if (!moved) {
-
- goto table_exhausted;
- }
-
- cursor_just_opened = FALSE;
-
- /* END OF RECORD LOOP
- ------------------ */
- goto rec_loop;
-
-next_table:
- /* We found a record which satisfies the conditions: we can move to
- the next table or return a row in the result set */
-
- ut_ad(btr_pcur_is_on_user_rec(&plan->pcur));
-
- if (plan->unique_search && !node->can_get_updated) {
-
- plan->cursor_at_end = TRUE;
- } else {
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = TRUE;
-
- btr_pcur_store_position(&(plan->pcur), &mtr);
- }
-
- mtr_commit(&mtr);
-
- mtr_has_extra_clust_latch = FALSE;
-
-next_table_no_mtr:
- /* If we use 'goto' to this label, it means that the row was popped
- from the prefetched rows stack, and &mtr is already committed */
-
- if (node->fetch_table + 1 == node->n_tables) {
-
- sel_eval_select_list(node);
-
- if (node->is_aggregate) {
-
- goto table_loop;
- }
-
- sel_assign_into_var_values(node->into_list, node);
-
- thr->run_node = que_node_get_parent(node);
-
- err = DB_SUCCESS;
- goto func_exit;
- }
-
- node->fetch_table++;
-
- /* When we move to the next table, we first reset the plan cursor:
- we do not care about resetting it when we backtrack from a table */
-
- plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
-
- goto table_loop;
-
-table_exhausted:
- /* The table cursor pcur reached the result set end: backtrack to the
- previous table in the join if we do not have cached prefetched rows */
-
- plan->cursor_at_end = TRUE;
-
- mtr_commit(&mtr);
-
- mtr_has_extra_clust_latch = FALSE;
-
- if (plan->n_rows_prefetched > 0) {
- /* The table became exhausted during a prefetch */
-
- sel_pop_prefetched_row(plan);
-
- goto next_table_no_mtr;
- }
-
-table_exhausted_no_mtr:
- if (node->fetch_table == 0) {
- err = DB_SUCCESS;
-
- if (node->is_aggregate && !node->aggregate_already_fetched) {
-
- node->aggregate_already_fetched = TRUE;
-
- sel_assign_into_var_values(node->into_list, node);
-
- thr->run_node = que_node_get_parent(node);
- } else {
- node->state = SEL_NODE_NO_MORE_ROWS;
-
- thr->run_node = que_node_get_parent(node);
- }
-
- goto func_exit;
- }
-
- node->fetch_table--;
-
- goto table_loop;
-
-stop_for_a_while:
- /* Return control for a while to que_run_threads, so that runaway
- queries can be canceled. NOTE that when we come here, we must, in a
- locking read, have placed the necessary (possibly waiting request)
- record lock on the cursor record or its successor: when we reposition
- the cursor, this record lock guarantees that nobody can meanwhile have
- inserted new records which should have appeared in the result set,
- which would result in the phantom problem. */
-
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = FALSE;
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
- err = DB_SUCCESS;
- goto func_exit;
-
-commit_mtr_for_a_while:
- /* Stores the cursor position and commits &mtr; this is used if
- &mtr may contain latches which would break the latching order if
- &mtr would not be committed and the latches released. */
-
- plan->stored_cursor_rec_processed = TRUE;
-
- ut_ad(!search_latch_locked);
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
- mtr_has_extra_clust_latch = FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
-
- goto table_loop;
-
-lock_wait_or_error:
- /* See the note at stop_for_a_while: the same holds for this case */
-
- ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc);
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = FALSE;
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
-
-func_exit:
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
- }
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/**********************************************************************//**
-Performs a select step. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_sel_step(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint i_lock_mode;
- sym_node_t* table_node;
- sel_node_t* node;
- ulint err;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
-
- /* If this is a new time this node is executed (or when execution
- resumes after wait for a table intention lock), set intention locks
- on the tables, or assign a read view */
-
- if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
-
- node->state = SEL_NODE_OPEN;
- }
-
- if (node->state == SEL_NODE_OPEN) {
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started(thr_get_trx(thr));
-
- plan_reset_cursor(sel_node_get_nth_plan(node, 0));
-
- if (node->consistent_read) {
- /* Assign a read view for the query */
- node->read_view = trx_assign_read_view(
- thr_get_trx(thr));
- } else {
- if (node->set_x_locks) {
- i_lock_mode = LOCK_IX;
- } else {
- i_lock_mode = LOCK_IS;
- }
-
- table_node = node->table_list;
-
- while (table_node) {
- err = lock_table(0, table_node->table,
- i_lock_mode, thr);
- if (err != DB_SUCCESS) {
- thr_get_trx(thr)->error_state = err;
-
- return(NULL);
- }
-
- table_node = que_node_get_next(table_node);
- }
- }
-
- /* If this is an explicit cursor, copy stored procedure
- variable values, so that the values cannot change between
- fetches (currently, we copy them also for non-explicit
- cursors) */
-
- if (node->explicit_cursor
- && UT_LIST_GET_FIRST(node->copy_variables)) {
-
- row_sel_copy_input_variable_vals(node);
- }
-
- node->state = SEL_NODE_FETCH;
- node->fetch_table = 0;
-
- if (node->is_aggregate) {
- /* Reset the aggregate total values */
- sel_reset_aggregate_vals(node);
- }
- }
-
- err = row_sel(node, thr);
-
- /* NOTE! if queries are parallelized, the following assignment may
- have problems; the assignment should be made only if thr is the
- only top-level thr in the graph: */
-
- thr->graph->last_sel_node = node;
-
- if (err != DB_SUCCESS) {
- thr_get_trx(thr)->error_state = err;
-
- return(NULL);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs a fetch for a cursor.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-fetch_step(
-/*=======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- sel_node_t* sel_node;
- fetch_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- sel_node = node->cursor_def;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
-
- if (thr->prev_node != que_node_get_parent(node)) {
-
- if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
-
- if (node->into_list) {
- sel_assign_into_var_values(node->into_list,
- sel_node);
- } else {
- void* ret = (*node->func->func)(
- sel_node, node->func->arg);
-
- if (!ret) {
- sel_node->state
- = SEL_NODE_NO_MORE_ROWS;
- }
- }
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
- }
-
- /* Make the fetch node the parent of the cursor definition for
- the time of the fetch, so that execution knows to return to this
- fetch node after a row has been selected or we know that there is
- no row left */
-
- sel_node->common.parent = node;
-
- if (sel_node->state == SEL_NODE_CLOSED) {
- fprintf(stderr,
- "InnoDB: Error: fetch called on a closed cursor\n");
-
- thr_get_trx(thr)->error_state = DB_ERROR;
-
- return(NULL);
- }
-
- thr->run_node = sel_node;
-
- return(thr);
-}
-
-/****************************************************************//**
-Sample callback function for fetch that prints each row.
-@return always returns non-NULL */
-UNIV_INTERN
-void*
-row_fetch_print(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: not used */
-{
- sel_node_t* node = row;
- que_node_t* exp;
- ulint i = 0;
-
- UT_NOT_USED(user_arg);
-
- fprintf(stderr, "row_fetch_print: row %p\n", row);
-
- exp = node->select_list;
-
- while (exp) {
- dfield_t* dfield = que_node_get_val(exp);
- const dtype_t* type = dfield_get_type(dfield);
-
- fprintf(stderr, " column %lu:\n", (ulong)i);
-
- dtype_print(type);
- putc('\n', stderr);
-
- if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
- ut_print_buf(stderr, dfield_get_data(dfield),
- dfield_get_len(dfield));
- putc('\n', stderr);
- } else {
- fputs(" <NULL>;\n", stderr);
- }
-
- exp = que_node_get_next(exp);
- i++;
- }
-
- return((void*)42);
-}
-
-/****************************************************************//**
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4.
-@return always returns NULL */
-UNIV_INTERN
-void*
-row_fetch_store_uint4(
-/*==================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: data pointer */
-{
- sel_node_t* node = row;
- ib_uint32_t* val = user_arg;
- ulint tmp;
-
- dfield_t* dfield = que_node_get_val(node->select_list);
- const dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(dtype_get_prtype(type) & DATA_UNSIGNED);
- ut_a(len == 4);
-
- tmp = mach_read_from_4(dfield_get_data(dfield));
- *val = (ib_uint32_t) tmp;
-
- return(NULL);
-}
-
-/***********************************************************//**
-Prints a row in a select result.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_printf_step(
-/*============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- row_printf_node_t* node;
- sel_node_t* sel_node;
- que_node_t* arg;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- sel_node = node->sel_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
-
- if (thr->prev_node == que_node_get_parent(node)) {
-
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch next row to print */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
-
- if (sel_node->state != SEL_NODE_FETCH) {
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to print */
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
- }
-
- arg = sel_node->select_list;
-
- while (arg) {
- dfield_print_also_hex(que_node_get_val(arg));
-
- fputs(" ::: ", stderr);
-
- arg = que_node_get_next(arg);
- }
-
- putc('\n', stderr);
-
- /* Fetch next row to print */
-
- thr->run_node = sel_node;
-
- return(thr);
-}
-
-/****************************************************************//**
-Converts a key value stored in MySQL format to an Innobase dtuple. The last
-field of the key value may be just a prefix of a fixed length field: hence
-the parameter key_len. But currently we do not allow search keys where the
-last field is only a prefix of the full key field len and print a warning if
-such appears. A counterpart of this function is
-ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-UNIV_INTERN
-void
-row_sel_convert_mysql_key_to_innobase(
-/*==================================*/
- dtuple_t* tuple, /*!< in/out: tuple where to build;
- NOTE: we assume that the type info
- in the tuple is already according
- to index! */
- byte* buf, /*!< in: buffer to use in field
- conversions */
- ulint buf_len, /*!< in: buffer length */
- dict_index_t* index, /*!< in: index of the key value */
- const byte* key_ptr, /*!< in: MySQL key value */
- ulint key_len, /*!< in: MySQL key value length */
- trx_t* trx) /*!< in: transaction */
-{
- byte* original_buf = buf;
- const byte* original_key_ptr = key_ptr;
- dict_field_t* field;
- dfield_t* dfield;
- ulint data_offset;
- ulint data_len;
- ulint data_field_len;
- ibool is_null;
- const byte* key_end;
- ulint n_fields = 0;
-
- /* For documentation of the key value storage format in MySQL, see
- ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-
- key_end = key_ptr + key_len;
-
- /* Permit us to access any field in the tuple (ULINT_MAX): */
-
- dtuple_set_n_fields(tuple, ULINT_MAX);
-
- dfield = dtuple_get_nth_field(tuple, 0);
- field = dict_index_get_nth_field(index, 0);
-
- if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
- /* A special case: we are looking for a position in the
- generated clustered index which InnoDB automatically added
- to a table with no primary key: the first and the only
- ordering column is ROW_ID which InnoDB stored to the key_ptr
- buffer. */
-
- ut_a(key_len == DATA_ROW_ID_LEN);
-
- dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
-
- dtuple_set_n_fields(tuple, 1);
-
- return;
- }
-
- while (key_ptr < key_end) {
-
- ulint type = dfield_get_type(dfield)->mtype;
- ut_a(field->col->mtype == type);
-
- data_offset = 0;
- is_null = FALSE;
-
- if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
- /* The first byte in the field tells if this is
- an SQL NULL value */
-
- data_offset = 1;
-
- if (*key_ptr != 0) {
- dfield_set_null(dfield);
-
- is_null = TRUE;
- }
- }
-
- /* Calculate data length and data field total length */
-
- if (type == DATA_BLOB) {
- /* The key field is a column prefix of a BLOB or
- TEXT */
-
- ut_a(field->prefix_len > 0);
-
- /* MySQL stores the actual data length to the first 2
- bytes after the optional SQL NULL marker byte. The
- storage format is little-endian, that is, the most
- significant byte at a higher address. In UTF-8, MySQL
- seems to reserve field->prefix_len bytes for
- storing this field in the key value buffer, even
- though the actual value only takes data_len bytes
- from the start. */
-
- data_len = key_ptr[data_offset]
- + 256 * key_ptr[data_offset + 1];
- data_field_len = data_offset + 2 + field->prefix_len;
-
- data_offset += 2;
-
- /* Now that we know the length, we store the column
- value like it would be a fixed char field */
-
- } else if (field->prefix_len > 0) {
- /* Looks like MySQL pads unused end bytes in the
- prefix with space. Therefore, also in UTF-8, it is ok
- to compare with a prefix containing full prefix_len
- bytes, and no need to take at most prefix_len / 3
- UTF-8 characters from the start.
- If the prefix is used as the upper end of a LIKE
- 'abc%' query, then MySQL pads the end with chars
- 0xff. TODO: in that case does it any harm to compare
- with the full prefix_len bytes. How do characters
- 0xff in UTF-8 behave? */
-
- data_len = field->prefix_len;
- data_field_len = data_offset + data_len;
- } else {
- data_len = dfield_get_type(dfield)->len;
- data_field_len = data_offset + data_len;
- }
-
- if (UNIV_UNLIKELY
- (dtype_get_mysql_type(dfield_get_type(dfield))
- == DATA_MYSQL_TRUE_VARCHAR)
- && UNIV_LIKELY(type != DATA_INT)) {
- /* In a MySQL key value format, a true VARCHAR is
- always preceded by 2 bytes of a length field.
- dfield_get_type(dfield)->len returns the maximum
- 'payload' len in bytes. That does not include the
- 2 bytes that tell the actual data length.
-
- We added the check != DATA_INT to make sure we do
- not treat MySQL ENUM or SET as a true VARCHAR! */
-
- data_len += 2;
- data_field_len += 2;
- }
-
- /* Storing may use at most data_len bytes of buf */
-
- if (UNIV_LIKELY(!is_null)) {
- row_mysql_store_col_in_innobase_format(
- dfield, buf,
- FALSE, /* MySQL key value format col */
- key_ptr + data_offset, data_len,
- dict_table_is_comp(index->table));
- buf += data_len;
- }
-
- key_ptr += data_field_len;
-
- if (UNIV_UNLIKELY(key_ptr > key_end)) {
- /* The last field in key was not a complete key field
- but a prefix of it.
-
- Print a warning about this! HA_READ_PREFIX_LAST does
- not currently work in InnoDB with partial-field key
- value prefixes. Since MySQL currently uses a padding
- trick to calculate LIKE 'abc%' type queries there
- should never be partial-field prefixes in searches. */
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Warning: using a partial-field"
- " key prefix in search.\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, ". Last data field length %lu bytes,\n"
- "InnoDB: key ptr now exceeds"
- " key end by %lu bytes.\n"
- "InnoDB: Key value in the MySQL format:\n",
- (ulong) data_field_len,
- (ulong) (key_ptr - key_end));
- fflush(stderr);
- ut_print_buf(stderr, original_key_ptr, key_len);
- putc('\n', stderr);
-
- if (!is_null) {
- ulint len = dfield_get_len(dfield);
- dfield_set_len(dfield, len
- - (ulint) (key_ptr - key_end));
- }
- }
-
- n_fields++;
- field++;
- dfield++;
- }
-
- ut_a(buf <= original_buf + buf_len);
-
- /* We set the length of tuple to n_fields: we assume that the memory
- area allocated for it is big enough (usually bigger than n_fields). */
-
- dtuple_set_n_fields(tuple, n_fields);
-}
-
-/**************************************************************//**
-Stores the row id to the prebuilt struct. */
-static
-void
-row_sel_store_row_id_to_prebuilt(
-/*=============================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt */
- const rec_t* index_rec, /*!< in: record */
- const dict_index_t* index, /*!< in: index of the record */
- const ulint* offsets) /*!< in: rec_get_offsets
- (index_rec, index) */
-{
- const byte* data;
- ulint len;
-
- ut_ad(rec_offs_validate(index_rec, index, offsets));
-
- data = rec_get_nth_field(
- index_rec, offsets,
- dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
-
- if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
- fprintf(stderr,
- "InnoDB: Error: Row id field is"
- " wrong length %lu in ", (ulong) len);
- dict_index_name_print(stderr, prebuilt->trx, index);
- fprintf(stderr, "\n"
- "InnoDB: Field number %lu, record:\n",
- (ulong) dict_index_get_sys_col_pos(index,
- DATA_ROW_ID));
- rec_print_new(stderr, index_rec, offsets);
- putc('\n', stderr);
- ut_error;
- }
-
- ut_memcpy(prebuilt->row_id, data, len);
-}
-
-/**************************************************************//**
-Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
-function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */
-static
-void
-row_sel_field_store_in_mysql_format(
-/*================================*/
- byte* dest, /*!< in/out: buffer where to store; NOTE
- that BLOBs are not in themselves
- stored here: the caller must allocate
- and copy the BLOB into buffer before,
- and pass the pointer to the BLOB in
- 'data' */
- const mysql_row_templ_t* templ,
- /*!< in: MySQL column template.
- Its following fields are referenced:
- type, is_unsigned, mysql_col_len,
- mbminlen, mbmaxlen */
- const byte* data, /*!< in: data to store */
- ulint len) /*!< in: length of the data */
-{
- byte* ptr;
- byte* field_end;
- byte* pad_ptr;
-
- ut_ad(len != UNIV_SQL_NULL);
-
- switch (templ->type) {
- case DATA_INT:
- /* Convert integer data from Innobase to a little-endian
- format, sign bit restored to normal */
-
- ptr = dest + len;
-
- for (;;) {
- ptr--;
- *ptr = *data;
- if (ptr == dest) {
- break;
- }
- data++;
- }
-
- if (!templ->is_unsigned) {
- dest[len - 1] = (byte) (dest[len - 1] ^ 128);
- }
-
- ut_ad(templ->mysql_col_len == len);
- break;
-
- case DATA_VARCHAR:
- case DATA_VARMYSQL:
- case DATA_BINARY:
- field_end = dest + templ->mysql_col_len;
-
- if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
- /* This is a >= 5.0.3 type true VARCHAR. Store the
- length of the data to the first byte or the first
- two bytes of dest. */
-
- dest = row_mysql_store_true_var_len(
- dest, len, templ->mysql_length_bytes);
- }
-
- /* Copy the actual data */
- ut_memcpy(dest, data, len);
-
- /* Pad with trailing spaces. We pad with spaces also the
- unused end of a >= 5.0.3 true VARCHAR column, just in case
- MySQL expects its contents to be deterministic. */
-
- pad_ptr = dest + len;
-
- ut_ad(templ->mbminlen <= templ->mbmaxlen);
-
- /* We handle UCS2 charset strings differently. */
- if (templ->mbminlen == 2) {
- /* A space char is two bytes, 0x0020 in UCS2 */
-
- if (len & 1) {
- /* A 0x20 has been stripped from the column.
- Pad it back. */
-
- if (pad_ptr < field_end) {
- *pad_ptr = 0x20;
- pad_ptr++;
- }
- }
-
- /* Pad the rest of the string with 0x0020 */
-
- while (pad_ptr < field_end) {
- *pad_ptr = 0x00;
- pad_ptr++;
- *pad_ptr = 0x20;
- pad_ptr++;
- }
- } else {
- ut_ad(templ->mbminlen == 1);
- /* space=0x20 */
-
- memset(pad_ptr, 0x20, field_end - pad_ptr);
- }
- break;
-
- case DATA_BLOB:
- /* Store a pointer to the BLOB buffer to dest: the BLOB was
- already copied to the buffer in row_sel_store_mysql_rec */
-
- row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
- len);
- break;
-
- case DATA_MYSQL:
- memcpy(dest, data, len);
-
- ut_ad(templ->mysql_col_len >= len);
- ut_ad(templ->mbmaxlen >= templ->mbminlen);
-
- ut_ad(templ->mbmaxlen > templ->mbminlen
- || templ->mysql_col_len == len);
- /* The following assertion would fail for old tables
- containing UTF-8 ENUM columns due to Bug #9526. */
- ut_ad(!templ->mbmaxlen
- || !(templ->mysql_col_len % templ->mbmaxlen));
- ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len);
-
- if (templ->mbminlen != templ->mbmaxlen) {
- /* Pad with spaces. This undoes the stripping
- done in row0mysql.ic, function
- row_mysql_store_col_in_innobase_format(). */
-
- memset(dest + len, 0x20, templ->mysql_col_len - len);
- }
- break;
-
- default:
-#ifdef UNIV_DEBUG
- case DATA_SYS_CHILD:
- case DATA_SYS:
- /* These column types should never be shipped to MySQL. */
- ut_ad(0);
-
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- case DATA_DECIMAL:
- /* Above are the valid column types for MySQL data. */
-#endif /* UNIV_DEBUG */
- ut_ad(templ->mysql_col_len == len);
- memcpy(dest, data, len);
- }
-}
-
-/**************************************************************//**
-Convert a row in the Innobase format to a row in the MySQL format.
-Note that the template in prebuilt may advise us to copy only a few
-columns to mysql_rec, other columns are left blank. All columns may not
-be needed in the query.
-@return TRUE if success, FALSE if could not allocate memory for a BLOB
-(though we may also assert in that case) */
-static
-ibool
-row_sel_store_mysql_rec(
-/*====================*/
- byte* mysql_rec, /*!< out: row in the MySQL format */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
- const rec_t* rec, /*!< in: Innobase record in the index
- which was described in prebuilt's
- template; must be protected by
- a page latch */
- const ulint* offsets) /*!< in: array returned by
- rec_get_offsets() */
-{
- mysql_row_templ_t* templ;
- mem_heap_t* extern_field_heap = NULL;
- mem_heap_t* heap;
- const byte* data;
- ulint len;
- ulint i;
-
- ut_ad(prebuilt->mysql_template);
- ut_ad(prebuilt->default_rec);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
- mem_heap_free(prebuilt->blob_heap);
- prebuilt->blob_heap = NULL;
- }
-
- for (i = 0; i < prebuilt->n_template; i++) {
-
- templ = prebuilt->mysql_template + i;
-
- if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
- templ->rec_field_no))) {
-
- /* Copy an externally stored field to the temporary
- heap */
-
- ut_a(!prebuilt->trx->has_search_latch);
-
- if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
- if (prebuilt->blob_heap == NULL) {
- prebuilt->blob_heap = mem_heap_create(
- UNIV_PAGE_SIZE);
- }
-
- heap = prebuilt->blob_heap;
- } else {
- extern_field_heap
- = mem_heap_create(UNIV_PAGE_SIZE);
-
- heap = extern_field_heap;
- }
-
- /* NOTE: if we are retrieving a big BLOB, we may
- already run out of memory in the next call, which
- causes an assert */
-
- data = btr_rec_copy_externally_stored_field(
- rec, offsets,
- dict_table_zip_size(prebuilt->table),
- templ->rec_field_no, &len, heap);
-
- ut_a(len != UNIV_SQL_NULL);
- } else {
- /* Field is stored in the row. */
-
- data = rec_get_nth_field(rec, offsets,
- templ->rec_field_no, &len);
-
- if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
- && len != UNIV_SQL_NULL) {
-
- /* It is a BLOB field locally stored in the
- InnoDB record: we MUST copy its contents to
- prebuilt->blob_heap here because later code
- assumes all BLOB values have been copied to a
- safe place. */
-
- if (prebuilt->blob_heap == NULL) {
- prebuilt->blob_heap = mem_heap_create(
- UNIV_PAGE_SIZE);
- }
-
- data = memcpy(mem_heap_alloc(
- prebuilt->blob_heap, len),
- data, len);
- }
- }
-
- if (len != UNIV_SQL_NULL) {
- row_sel_field_store_in_mysql_format(
- mysql_rec + templ->mysql_col_offset,
- templ, data, len);
-
- /* Cleanup */
- if (extern_field_heap) {
- mem_heap_free(extern_field_heap);
- extern_field_heap = NULL;
- }
-
- if (templ->mysql_null_bit_mask) {
- /* It is a nullable column with a non-NULL
- value */
- mysql_rec[templ->mysql_null_byte_offset]
- &= ~(byte) templ->mysql_null_bit_mask;
- }
- } else {
- /* MySQL assumes that the field for an SQL
- NULL value is set to the default value. */
-
- mysql_rec[templ->mysql_null_byte_offset]
- |= (byte) templ->mysql_null_bit_mask;
- memcpy(mysql_rec + templ->mysql_col_offset,
- (const byte*) prebuilt->default_rec
- + templ->mysql_col_offset,
- templ->mysql_col_len);
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Builds a previous version of a clustered index record for a consistent read
-@return DB_SUCCESS or error code */
-static
-ulint
-row_sel_build_prev_vers_for_mysql(
-/*==============================*/
- read_view_t* read_view, /*!< in: read view */
- dict_index_t* clust_index, /*!< in: clustered index */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
- const rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
- rec_get_offsets(rec, clust_index) */
- mem_heap_t** offset_heap, /*!< in/out: memory heap from which
- the offsets are allocated */
- rec_t** old_vers, /*!< out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint err;
-
- if (prebuilt->old_vers_heap) {
- mem_heap_empty(prebuilt->old_vers_heap);
- } else {
- prebuilt->old_vers_heap = mem_heap_create(200);
- }
-
- err = row_vers_build_for_consistent_read(
- rec, mtr, clust_index, offsets, read_view, offset_heap,
- prebuilt->old_vers_heap, old_vers);
- return(err);
-}
-
-/*********************************************************************//**
-Retrieves the clustered index record corresponding to a record in a
-non-clustered index. Does the necessary locking. Used in the MySQL
-interface.
-@return DB_SUCCESS or error code */
-static
-ulint
-row_sel_get_clust_rec_for_mysql(
-/*============================*/
- row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */
- dict_index_t* sec_index,/*!< in: secondary index where rec resides */
- const rec_t* rec, /*!< in: record in a non-clustered index; if
- this is a locking read, then rec is not
- allowed to be delete-marked, and that would
- not make sense either */
- que_thr_t* thr, /*!< in: query thread */
- const rec_t** out_rec,/*!< out: clustered record or an old version of
- it, NULL if the old version did not exist
- in the read view, i.e., it was a fresh
- inserted version */
- ulint** offsets,/*!< in: offsets returned by
- rec_get_offsets(rec, sec_index);
- out: offsets returned by
- rec_get_offsets(out_rec, clust_index) */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mtr_t* mtr) /*!< in: mtr used to get access to the
- non-clustered record; the same mtr is used to
- access the clustered index */
-{
- dict_index_t* clust_index;
- const rec_t* clust_rec;
- rec_t* old_vers;
- ulint err;
- trx_t* trx;
-
- *out_rec = NULL;
- trx = thr_get_trx(thr);
-
- row_build_row_ref_in_tuple(prebuilt->clust_ref, rec,
- sec_index, *offsets, trx);
-
- clust_index = dict_table_get_first_index(sec_index->table);
-
- btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
- PAGE_CUR_LE, BTR_SEARCH_LEAF,
- prebuilt->clust_pcur, 0, mtr);
-
- clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
-
- prebuilt->clust_pcur->trx_if_known = trx;
-
- /* Note: only if the search ends up on a non-infimum record is the
- low_match value the real match to the search tuple */
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(prebuilt->clust_pcur)
- < dict_index_get_n_unique(clust_index)) {
-
- /* In a rare case it is possible that no clust rec is found
- for a delete-marked secondary index record: if in row0umod.c
- in row_undo_mod_remove_clust_low() we have already removed
- the clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case we know that the
- clustered index record did not exist in the read view of
- trx. */
-
- if (!rec_get_deleted_flag(rec,
- dict_table_is_comp(sec_index->table))
- || prebuilt->select_lock_type != LOCK_NONE) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: error clustered record"
- " for sec rec not found\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, sec_index);
- fputs("\n"
- "InnoDB: sec index record ", stderr);
- rec_print(stderr, rec, sec_index);
- fputs("\n"
- "InnoDB: clust index record ", stderr);
- rec_print(stderr, clust_rec, clust_index);
- putc('\n', stderr);
- trx_print(stderr, trx, 600);
-
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- }
-
- clust_rec = NULL;
-
- goto func_exit;
- }
-
- *offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
- ULINT_UNDEFINED, offset_heap);
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
- /* Try to place a lock on the index record; we are searching
- the clust rec with a unique condition, hence
- we set a LOCK_REC_NOT_GAP type lock */
-
- err = lock_clust_rec_read_check_and_lock(
- 0, btr_pcur_get_block(prebuilt->clust_pcur),
- clust_rec, clust_index, *offsets,
- prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr);
- if (err != DB_SUCCESS) {
-
- goto err_exit;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- old_vers = NULL;
-
- /* If the isolation level allows reading of uncommitted data,
- then we never look for an earlier version */
-
- if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
- && !lock_clust_rec_cons_read_sees(
- clust_rec, clust_index, *offsets,
- trx->read_view)) {
-
- /* The following call returns 'offsets' associated with
- 'old_vers' */
- err = row_sel_build_prev_vers_for_mysql(
- trx->read_view, clust_index, prebuilt,
- clust_rec, offsets, offset_heap, &old_vers,
- mtr);
-
- if (err != DB_SUCCESS || old_vers == NULL) {
-
- goto err_exit;
- }
-
- clust_rec = old_vers;
- }
-
- /* If we had to go to an earlier version of row or the
- secondary index record is delete marked, then it may be that
- the secondary index record corresponding to clust_rec
- (or old_vers) is not rec; in that case we must ignore
- such row because in our snapshot rec would not have existed.
- Remember that from rec we cannot see directly which transaction
- id corresponds to it: we have to go to the clustered index
- record. A query where we want to fetch all rows where
- the secondary index value is in some interval would return
- a wrong result if we would not drop rows which we come to
- visit through secondary index records that would not really
- exist in our snapshot. */
-
- if (clust_rec
- && (old_vers
- || rec_get_deleted_flag(rec, dict_table_is_comp(
- sec_index->table)))
- && !row_sel_sec_rec_is_for_clust_rec(
- rec, sec_index, clust_rec, clust_index)) {
- clust_rec = NULL;
-#ifdef UNIV_SEARCH_DEBUG
- } else {
- ut_a(clust_rec == NULL
- || row_sel_sec_rec_is_for_clust_rec(
- rec, sec_index, clust_rec, clust_index));
-#endif
- }
- }
-
-func_exit:
- *out_rec = clust_rec;
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
- /* We may use the cursor in update or in unlock_row():
- store its position */
-
- btr_pcur_store_position(prebuilt->clust_pcur, mtr);
- }
-
- err = DB_SUCCESS;
-err_exit:
- return(err);
-}
-
-/********************************************************************//**
-Restores cursor position after it has been stored. We have to take into
-account that the record cursor was positioned on may have been deleted.
-Then we may have to move the cursor one step up or down.
-@return TRUE if we may need to process the record the cursor is now
-positioned on (i.e. we should not go to the next record yet) */
-static
-ibool
-sel_restore_position_for_mysql(
-/*===========================*/
- ibool* same_user_rec, /*!< out: TRUE if we were able to restore
- the cursor on a user record with the
- same ordering prefix in in the
- B-tree index */
- ulint latch_mode, /*!< in: latch mode wished in
- restoration */
- btr_pcur_t* pcur, /*!< in: cursor whose position
- has been stored */
- ibool moves_up, /*!< in: TRUE if the cursor moves up
- in the index */
- mtr_t* mtr) /*!< in: mtr; CAUTION: may commit
- mtr temporarily! */
-{
- ibool success;
- ulint relative_position;
-
- relative_position = pcur->rel_pos;
-
- success = btr_pcur_restore_position(latch_mode, pcur, mtr);
-
- *same_user_rec = success;
-
- if (relative_position == BTR_PCUR_ON) {
- if (success) {
- return(FALSE);
- }
-
- if (moves_up) {
- btr_pcur_move_to_next(pcur, mtr);
- }
-
- return(TRUE);
- }
-
- if (relative_position == BTR_PCUR_AFTER
- || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) {
-
- if (moves_up) {
- return(TRUE);
- }
-
- if (btr_pcur_is_on_user_rec(pcur)) {
- btr_pcur_move_to_prev(pcur, mtr);
- }
-
- return(TRUE);
- }
-
- ut_ad(relative_position == BTR_PCUR_BEFORE
- || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
-
- if (moves_up && btr_pcur_is_on_user_rec(pcur)) {
- btr_pcur_move_to_next(pcur, mtr);
- }
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Pops a cached row for MySQL from the fetch cache. */
-UNIV_INLINE
-void
-row_sel_pop_cached_row_for_mysql(
-/*=============================*/
- byte* buf, /*!< in/out: buffer where to copy the
- row */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct */
-{
- ulint i;
- mysql_row_templ_t* templ;
- byte* cached_rec;
- ut_ad(prebuilt->n_fetch_cached > 0);
- ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
-
- if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
- /* Copy cache record field by field, don't touch fields that
- are not covered by current key */
- cached_rec = prebuilt->fetch_cache[
- prebuilt->fetch_cache_first];
-
- for (i = 0; i < prebuilt->n_template; i++) {
- templ = prebuilt->mysql_template + i;
- ut_memcpy(buf + templ->mysql_col_offset,
- cached_rec + templ->mysql_col_offset,
- templ->mysql_col_len);
- /* Copy NULL bit of the current field from cached_rec
- to buf */
- if (templ->mysql_null_bit_mask) {
- buf[templ->mysql_null_byte_offset]
- ^= (buf[templ->mysql_null_byte_offset]
- ^ cached_rec[templ->mysql_null_byte_offset])
- & (byte)templ->mysql_null_bit_mask;
- }
- }
- }
- else {
- ut_memcpy(buf,
- prebuilt->fetch_cache[prebuilt->fetch_cache_first],
- prebuilt->mysql_prefix_len);
- }
- prebuilt->n_fetch_cached--;
- prebuilt->fetch_cache_first++;
-
- if (prebuilt->n_fetch_cached == 0) {
- prebuilt->fetch_cache_first = 0;
- }
-}
-
-/********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
-row_sel_push_cache_row_for_mysql(
-/*=============================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
- const rec_t* rec, /*!< in: record to push; must
- be protected by a page latch */
- const ulint* offsets) /*!< in: rec_get_offsets() */
-{
- byte* buf;
- ulint i;
-
- ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_a(!prebuilt->templ_contains_blob);
-
- if (prebuilt->fetch_cache[0] == NULL) {
- /* Allocate memory for the fetch cache */
-
- for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
-
- /* A user has reported memory corruption in these
- buffers in Linux. Put magic numbers there to help
- to track a possible bug. */
-
- buf = mem_alloc(prebuilt->mysql_row_len + 8);
-
- prebuilt->fetch_cache[i] = buf + 4;
-
- mach_write_to_4(buf, ROW_PREBUILT_FETCH_MAGIC_N);
- mach_write_to_4(buf + 4 + prebuilt->mysql_row_len,
- ROW_PREBUILT_FETCH_MAGIC_N);
- }
- }
-
- ut_ad(prebuilt->fetch_cache_first == 0);
-
- if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
- prebuilt->fetch_cache[
- prebuilt->n_fetch_cached],
- prebuilt, rec, offsets))) {
- ut_error;
- }
-
- prebuilt->n_fetch_cached++;
-}
-
-/*********************************************************************//**
-Tries to do a shortcut to fetch a clustered index record with a unique key,
-using the hash index if possible (not always). We assume that the search
-mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
-btr search latch has been locked in S-mode.
-@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
-static
-ulint
-row_sel_try_search_shortcut_for_mysql(
-/*==================================*/
- const rec_t** out_rec,/*!< out: record if found */
- row_prebuilt_t* prebuilt,/*!< in: prebuilt struct */
- ulint** offsets,/*!< in/out: for rec_get_offsets(*out_rec) */
- mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
- mtr_t* mtr) /*!< in: started mtr */
-{
- dict_index_t* index = prebuilt->index;
- const dtuple_t* search_tuple = prebuilt->search_tuple;
- btr_pcur_t* pcur = prebuilt->pcur;
- trx_t* trx = prebuilt->trx;
- const rec_t* rec;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(!prebuilt->templ_contains_blob);
-
- btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, pcur,
-#ifndef UNIV_SEARCH_DEBUG
- RW_S_LATCH,
-#else
- 0,
-#endif
- mtr);
- rec = btr_pcur_get_rec(pcur);
-
- if (!page_rec_is_user_rec(rec)) {
-
- return(SEL_RETRY);
- }
-
- /* As the cursor is now placed on a user record after a search with
- the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
- fields in the user record matched to the search tuple */
-
- if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
-
- return(SEL_EXHAUSTED);
- }
-
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- *offsets = rec_get_offsets(rec, index, *offsets,
- ULINT_UNDEFINED, heap);
-
- if (!lock_clust_rec_cons_read_sees(rec, index,
- *offsets, trx->read_view)) {
-
- return(SEL_RETRY);
- }
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
-
- return(SEL_EXHAUSTED);
- }
-
- *out_rec = rec;
-
- return(SEL_FOUND);
-}
-
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor!
-@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
-UNIV_INTERN
-ulint
-row_search_for_mysql(
-/*=================*/
- byte* buf, /*!< in/out: buffer for the fetched
- row in the MySQL format */
- ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
- table handle; this contains the info
- of search_tuple, index; if search
- tuple contains 0 fields then we
- position the cursor at the start or
- the end of the index, depending on
- 'mode' */
- ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX */
- ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
- ROW_SEL_PREV; NOTE: if this is != 0,
- then prebuilt must have a pcur
- with stored position! In opening of a
- cursor 'direction' should be 0. */
-{
- dict_index_t* index = prebuilt->index;
- ibool comp = dict_table_is_comp(index->table);
- const dtuple_t* search_tuple = prebuilt->search_tuple;
- btr_pcur_t* pcur = prebuilt->pcur;
- trx_t* trx = prebuilt->trx;
- dict_index_t* clust_index;
- que_thr_t* thr;
- const rec_t* rec;
- const rec_t* result_rec;
- const rec_t* clust_rec;
- ulint err = DB_SUCCESS;
- ibool unique_search = FALSE;
- ibool unique_search_from_clust_index = FALSE;
- ibool mtr_has_extra_clust_latch = FALSE;
- ibool moves_up = FALSE;
- ibool set_also_gap_locks = TRUE;
- /* if the query is a plain locking SELECT, and the isolation level
- is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */
- ibool did_semi_consistent_read = FALSE;
- /* if the returned record was locked and we did a semi-consistent
- read (fetch the newest committed version), then this is set to
- TRUE */
-#ifdef UNIV_SEARCH_DEBUG
- ulint cnt = 0;
-#endif /* UNIV_SEARCH_DEBUG */
- ulint next_offs;
- ibool same_user_rec;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- rec_offs_init(offsets_);
-
- ut_ad(index && pcur && search_tuple);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you used"
- " DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
-
- return(DB_ERROR);
- }
-
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
-
- return(DB_MISSING_HISTORY);
- }
-
- if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
-#if 0
- /* August 19, 2005 by Heikki: temporarily disable this error
- print until the cursor lock count is done correctly.
- See bugs #12263 and #12456!*/
-
- if (trx->n_mysql_tables_in_use == 0
- && UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) {
- /* Note that if MySQL uses an InnoDB temp table that it
- created inside LOCK TABLES, then n_mysql_tables_in_use can
- be zero; in that case select_lock_type is set to LOCK_X in
- ::start_stmt. */
-
- fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n"
- "InnoDB: but it has not locked"
- " any tables in ::external_lock()!\n",
- stderr);
- trx_print(stderr, trx, 600);
- fputc('\n', stderr);
- }
-#endif
-
-#if 0
- fprintf(stderr, "Match mode %lu\n search tuple ",
- (ulong) match_mode);
- dtuple_print(search_tuple);
- fprintf(stderr, "N tables locked %lu\n",
- (ulong) trx->mysql_n_tables_locked);
-#endif
- /*-------------------------------------------------------------*/
- /* PHASE 0: Release a possible s-latch we are holding on the
- adaptive hash index latch if there is someone waiting behind */
-
- if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
- && trx->has_search_latch) {
-
- /* There is an x-latch request on the adaptive hash index:
- release the s-latch to reduce starvation and wait for
- BTR_SEA_TIMEOUT rounds before trying to keep it again over
- calls from MySQL */
-
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
-
- trx->search_latch_timeout = BTR_SEA_TIMEOUT;
- }
-
- /* Reset the new record lock info if srv_locks_unsafe_for_binlog
- is set or session is using a READ COMMITED isolation level. Then
- we are able to remove the record locks set here on an individual
- row. */
- prebuilt->new_rec_locks = 0;
-
- /*-------------------------------------------------------------*/
- /* PHASE 1: Try to pop the row from the prefetch cache */
-
- if (UNIV_UNLIKELY(direction == 0)) {
- trx->op_info = "starting index read";
-
- prebuilt->n_rows_fetched = 0;
- prebuilt->n_fetch_cached = 0;
- prebuilt->fetch_cache_first = 0;
-
- if (prebuilt->sel_graph == NULL) {
- /* Build a dummy select query graph */
- row_prebuild_sel_graph(prebuilt);
- }
- } else {
- trx->op_info = "fetching rows";
-
- if (prebuilt->n_rows_fetched == 0) {
- prebuilt->fetch_direction = direction;
- }
-
- if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) {
- if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) {
- ut_error;
- /* TODO: scrollable cursor: restore cursor to
- the place of the latest returned row,
- or better: prevent caching for a scroll
- cursor! */
- }
-
- prebuilt->n_rows_fetched = 0;
- prebuilt->n_fetch_cached = 0;
- prebuilt->fetch_cache_first = 0;
-
- } else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) {
- row_sel_pop_cached_row_for_mysql(buf, prebuilt);
-
- prebuilt->n_rows_fetched++;
-
- srv_n_rows_read++;
- err = DB_SUCCESS;
- goto func_exit;
- }
-
- if (prebuilt->fetch_cache_first > 0
- && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
-
- /* The previous returned row was popped from the fetch
- cache, but the cache was not full at the time of the
- popping: no more rows can exist in the result set */
-
- err = DB_RECORD_NOT_FOUND;
- goto func_exit;
- }
-
- prebuilt->n_rows_fetched++;
-
- if (prebuilt->n_rows_fetched > 1000000000) {
- /* Prevent wrap-over */
- prebuilt->n_rows_fetched = 500000000;
- }
-
- mode = pcur->search_mode;
- }
-
- /* In a search where at most one record in the index may match, we
- can use a LOCK_REC_NOT_GAP type record lock when locking a
- non-delete-marked matching record.
-
- Note that in a unique secondary index there may be different
- delete-marked versions of a record where only the primary key
- values differ: thus in a secondary index we must use next-key
- locks when locking delete-marked records. */
-
- if (match_mode == ROW_SEL_EXACT
- && dict_index_is_unique(index)
- && dtuple_get_n_fields(search_tuple)
- == dict_index_get_n_unique(index)
- && (dict_index_is_clust(index)
- || !dtuple_contains_null(search_tuple))) {
-
- /* Note above that a UNIQUE secondary index can contain many
- rows with the same key value if one of the columns is the SQL
- null. A clustered index under MySQL can never contain null
- columns because we demand that all the columns in primary key
- are non-null. */
-
- unique_search = TRUE;
-
- /* Even if the condition is unique, MySQL seems to try to
- retrieve also a second row if a primary key contains more than
- 1 column. Return immediately if this is not a HANDLER
- command. */
-
- if (UNIV_UNLIKELY(direction != 0
- && !prebuilt->used_in_HANDLER)) {
-
- err = DB_RECORD_NOT_FOUND;
- goto func_exit;
- }
- }
-
- mtr_start(&mtr);
-
- /*-------------------------------------------------------------*/
- /* PHASE 2: Try fast adaptive hash index search if possible */
-
- /* Next test if this is the special case where we can use the fast
- adaptive hash index to try the search. Since we must release the
- search system latch when we retrieve an externally stored field, we
- cannot use the adaptive hash index in a search in the case the row
- may be long and there may be externally stored fields */
-
- if (UNIV_UNLIKELY(direction == 0)
- && unique_search
- && dict_index_is_clust(index)
- && !prebuilt->templ_contains_blob
- && !prebuilt->used_in_HANDLER
- && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
-
- mode = PAGE_CUR_GE;
-
- unique_search_from_clust_index = TRUE;
-
- if (trx->mysql_n_tables_locked == 0
- && prebuilt->select_lock_type == LOCK_NONE
- && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
- && trx->read_view) {
-
- /* This is a SELECT query done as a consistent read,
- and the read view has already been allocated:
- let us try a search shortcut through the hash
- index.
- NOTE that we must also test that
- mysql_n_tables_locked == 0, because this might
- also be INSERT INTO ... SELECT ... or
- CREATE TABLE ... SELECT ... . Our algorithm is
- NOT prepared to inserts interleaved with the SELECT,
- and if we try that, we can deadlock on the adaptive
- hash index semaphore! */
-
-#ifndef UNIV_SEARCH_DEBUG
- if (!trx->has_search_latch) {
- rw_lock_s_lock(&btr_search_latch);
- trx->has_search_latch = TRUE;
- }
-#endif
- switch (row_sel_try_search_shortcut_for_mysql(
- &rec, prebuilt, &offsets, &heap,
- &mtr)) {
- case SEL_FOUND:
-#ifdef UNIV_SEARCH_DEBUG
- ut_a(0 == cmp_dtuple_rec(search_tuple,
- rec, offsets));
-#endif
- /* At this point, rec is protected by
- a page latch that was acquired by
- row_sel_try_search_shortcut_for_mysql().
- The latch will not be released until
- mtr_commit(&mtr). */
-
- if (!row_sel_store_mysql_rec(buf, prebuilt,
- rec, offsets)) {
- err = DB_TOO_BIG_RECORD;
-
- /* We let the main loop to do the
- error handling */
- goto shortcut_fails_too_big_rec;
- }
-
- mtr_commit(&mtr);
-
- /* ut_print_name(stderr, index->name);
- fputs(" shortcut\n", stderr); */
-
- srv_n_rows_read++;
-
- err = DB_SUCCESS;
- goto release_search_latch_if_needed;
-
- case SEL_EXHAUSTED:
- mtr_commit(&mtr);
-
- /* ut_print_name(stderr, index->name);
- fputs(" record not found 2\n", stderr); */
-
- err = DB_RECORD_NOT_FOUND;
-release_search_latch_if_needed:
- if (trx->search_latch_timeout > 0
- && trx->has_search_latch) {
-
- trx->search_latch_timeout--;
-
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- }
-
- /* NOTE that we do NOT store the cursor
- position */
- goto func_exit;
-
- case SEL_RETRY:
- break;
-
- default:
- ut_ad(0);
- }
-shortcut_fails_too_big_rec:
- mtr_commit(&mtr);
- mtr_start(&mtr);
- }
- }
-
- /*-------------------------------------------------------------*/
- /* PHASE 3: Open or restore index cursor position */
-
- if (trx->has_search_latch) {
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- }
-
- trx_start_if_not_started(trx);
-
- if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && prebuilt->select_lock_type != LOCK_NONE
- && trx->mysql_thd != NULL
- && thd_is_select(trx->mysql_thd)) {
- /* It is a plain locking SELECT and the isolation
- level is low: do not lock gaps */
-
- set_also_gap_locks = FALSE;
- }
-
- /* Note that if the search mode was GE or G, then the cursor
- naturally moves upward (in fetch next) in alphabetical order,
- otherwise downward */
-
- if (UNIV_UNLIKELY(direction == 0)) {
- if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
- moves_up = TRUE;
- }
- } else if (direction == ROW_SEL_NEXT) {
- moves_up = TRUE;
- }
-
- thr = que_fork_get_first_thr(prebuilt->sel_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
- clust_index = dict_table_get_first_index(index->table);
-
- if (UNIV_LIKELY(direction != 0)) {
- ibool need_to_process = sel_restore_position_for_mysql(
- &same_user_rec, BTR_SEARCH_LEAF,
- pcur, moves_up, &mtr);
-
- if (UNIV_UNLIKELY(need_to_process)) {
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- /* We did a semi-consistent read,
- but the record was removed in
- the meantime. */
- prebuilt->row_read_type
- = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- } else if (UNIV_LIKELY(prebuilt->row_read_type
- != ROW_READ_DID_SEMI_CONSISTENT)) {
-
- /* The cursor was positioned on the record
- that we returned previously. If we need
- to repeat a semi-consistent read as a
- pessimistic locking read, the record
- cannot be skipped. */
-
- goto next_rec;
- }
-
- } else if (dtuple_get_n_fields(search_tuple) > 0) {
-
- btr_pcur_open_with_no_init(index, search_tuple, mode,
- BTR_SEARCH_LEAF,
- pcur, 0, &mtr);
-
- pcur->trx_if_known = trx;
-
- rec = btr_pcur_get_rec(pcur);
-
- if (!moves_up
- && !page_rec_is_supremum(rec)
- && set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the next index record
- to prevent phantoms in ORDER BY ... DESC queries */
- const rec_t* next = page_rec_get_next_const(rec);
-
- offsets = rec_get_offsets(next, index, offsets,
- ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
- next, index, offsets,
- prebuilt->select_lock_type,
- LOCK_GAP, thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
- } else {
- if (mode == PAGE_CUR_G) {
- btr_pcur_open_at_index_side(
- TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
- &mtr);
- } else if (mode == PAGE_CUR_L) {
- btr_pcur_open_at_index_side(
- FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
- &mtr);
- }
- }
-
- if (!prebuilt->sql_stat_start) {
- /* No need to set an intention lock or assign a read view */
-
- if (trx->read_view == NULL
- && prebuilt->select_lock_type == LOCK_NONE) {
-
- fputs("InnoDB: Error: MySQL is trying to"
- " perform a consistent read\n"
- "InnoDB: but the read view is not assigned!\n",
- stderr);
- trx_print(stderr, trx, 600);
- fputc('\n', stderr);
- ut_a(0);
- }
- } else if (prebuilt->select_lock_type == LOCK_NONE) {
- /* This is a consistent read */
- /* Assign a read view for the query */
-
- trx_assign_read_view(trx);
- prebuilt->sql_stat_start = FALSE;
- } else {
- ulint lock_mode;
- if (prebuilt->select_lock_type == LOCK_S) {
- lock_mode = LOCK_IS;
- } else {
- lock_mode = LOCK_IX;
- }
- err = lock_table(0, index->table, lock_mode, thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- prebuilt->sql_stat_start = FALSE;
- }
-
-rec_loop:
- /*-------------------------------------------------------------*/
- /* PHASE 4: Look for matching records in a loop */
-
- rec = btr_pcur_get_rec(pcur);
- ut_ad(!!page_rec_is_comp(rec) == comp);
-#ifdef UNIV_SEARCH_DEBUG
- /*
- fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
- page_get_page_no(page_align(rec)));
- rec_print(rec);
- */
-#endif /* UNIV_SEARCH_DEBUG */
-
- if (page_rec_is_infimum(rec)) {
-
- /* The infimum record on a page cannot be in the result set,
- and neither can a record lock be placed on it: we skip such
- a record. */
-
- goto next_rec;
- }
-
- if (page_rec_is_supremum(rec)) {
-
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a lock on the index record */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using a READ COMMITTED isolation
- level we do not lock gaps. Supremum record is really
- a gap and therefore we do not set locks there. */
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
- rec, index, offsets,
- prebuilt->select_lock_type,
- LOCK_ORDINARY, thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
- /* A page supremum record cannot be in the result set: skip
- it now that we have placed a possible lock on it */
-
- goto next_rec;
- }
-
- /*-------------------------------------------------------------*/
- /* Do sanity checks in case our cursor has bumped into page
- corruption */
-
- if (comp) {
- next_offs = rec_get_next_offs(rec, TRUE);
- if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
-
- goto wrong_offs;
- }
- } else {
- next_offs = rec_get_next_offs(rec, FALSE);
- if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
-
- goto wrong_offs;
- }
- }
-
- if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
-
-wrong_offs:
- if (srv_force_recovery == 0 || moves_up == FALSE) {
- ut_print_timestamp(stderr);
- buf_page_print(page_align(rec), 0);
- fprintf(stderr,
- "\nInnoDB: rec address %p,"
- " buf block fix count %lu\n",
- (void*) rec, (ulong)
- btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
- ->page.buf_fix_count);
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) page_get_page_no(page_align(rec)));
- dict_index_name_print(stderr, trx, index);
- fputs(". Run CHECK TABLE. You may need to\n"
- "InnoDB: restore from a backup, or"
- " dump + drop + reimport the table.\n",
- stderr);
-
- err = DB_CORRUPTION;
-
- goto lock_wait_or_error;
- } else {
- /* The user may be dumping a corrupt table. Jump
- over the corruption to recover as much as possible. */
-
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) page_get_page_no(page_align(rec)));
- dict_index_name_print(stderr, trx, index);
- fputs(". We try to skip the rest of the page.\n",
- stderr);
-
- btr_pcur_move_to_last_on_page(pcur, &mtr);
-
- goto next_rec;
- }
- }
- /*-------------------------------------------------------------*/
-
- /* Calculate the 'offsets' associated with 'rec' */
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
- if (!rec_validate(rec, offsets)
- || !btr_index_rec_validate(rec, index, FALSE)) {
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) page_get_page_no(page_align(rec)));
- dict_index_name_print(stderr, trx, index);
- fputs(". We try to skip the record.\n",
- stderr);
-
- goto next_rec;
- }
- }
-
- /* Note that we cannot trust the up_match value in the cursor at this
- place because we can arrive here after moving the cursor! Thus
- we have to recompare rec and search_tuple to determine if they
- match enough. */
-
- if (match_mode == ROW_SEL_EXACT) {
- /* Test if the index record matches completely to search_tuple
- in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
-
- /* fputs("Comparing rec and search tuple\n", stderr); */
-
- if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
-
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level
- == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the index
- record only if innodb_locks_unsafe_for_binlog
- option is not set or this session is not
- using a READ COMMITTED isolation level. */
-
- err = sel_set_rec_lock(
- btr_pcur_get_block(pcur),
- rec, index, offsets,
- prebuilt->select_lock_type, LOCK_GAP,
- thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
-
- btr_pcur_store_position(pcur, &mtr);
-
- err = DB_RECORD_NOT_FOUND;
- /* ut_print_name(stderr, index->name);
- fputs(" record not found 3\n", stderr); */
-
- goto normal_return;
- }
-
- } else if (match_mode == ROW_SEL_EXACT_PREFIX) {
-
- if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) {
-
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level
- == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the index
- record only if innodb_locks_unsafe_for_binlog
- option is not set or this session is not
- using a READ COMMITTED isolation level. */
-
- err = sel_set_rec_lock(
- btr_pcur_get_block(pcur),
- rec, index, offsets,
- prebuilt->select_lock_type, LOCK_GAP,
- thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
-
- btr_pcur_store_position(pcur, &mtr);
-
- err = DB_RECORD_NOT_FOUND;
- /* ut_print_name(stderr, index->name);
- fputs(" record not found 4\n", stderr); */
-
- goto normal_return;
- }
- }
-
- /* We are ready to look at a possible new index entry in the result
- set: the cursor is now placed on a user record */
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
- /* Try to place a lock on the index record; note that delete
- marked records are a special case in a unique search. If there
- is a non-delete marked record, then it is enough to lock its
- existence with LOCK_REC_NOT_GAP. */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using a READ COMMITED isolation
- level we lock only the record, i.e., next-key locking is
- not used. */
-
- ulint lock_type;
-
- if (!set_also_gap_locks
- || srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED
- || (unique_search
- && !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) {
-
- goto no_gap_lock;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- /* If we are doing a 'greater or equal than a primary key
- value' search from a clustered index, and we find a record
- that has that exact primary key value, then there is no need
- to lock the gap before the record, because no insert in the
- gap can be in our search range. That is, no phantom row can
- appear that way.
-
- An example: if col1 is the primary key, the search is WHERE
- col1 >= 100, and we find a record where col1 = 100, then no
- need to lock the gap before that record. */
-
- if (index == clust_index
- && mode == PAGE_CUR_GE
- && direction == 0
- && dtuple_get_n_fields_cmp(search_tuple)
- == dict_index_get_n_unique(index)
- && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
-no_gap_lock:
- lock_type = LOCK_REC_NOT_GAP;
- }
-
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
- rec, index, offsets,
- prebuilt->select_lock_type,
- lock_type, thr);
-
- switch (err) {
- const rec_t* old_vers;
- case DB_SUCCESS:
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED) {
- /* Note that a record of
- prebuilt->index was locked. */
- prebuilt->new_rec_locks = 1;
- }
- break;
- case DB_LOCK_WAIT:
- if (UNIV_LIKELY(prebuilt->row_read_type
- != ROW_READ_TRY_SEMI_CONSISTENT)
- || index != clust_index) {
-
- goto lock_wait_or_error;
- }
-
- /* The following call returns 'offsets'
- associated with 'old_vers' */
- err = row_sel_build_committed_vers_for_mysql(
- clust_index, prebuilt, rec,
- &offsets, &heap, &old_vers, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- mutex_enter(&kernel_mutex);
- if (trx->was_chosen_as_deadlock_victim) {
- mutex_exit(&kernel_mutex);
- err = DB_DEADLOCK;
-
- goto lock_wait_or_error;
- }
- if (UNIV_LIKELY(trx->wait_lock != NULL)) {
- lock_cancel_waiting_and_release(
- trx->wait_lock);
- prebuilt->new_rec_locks = 0;
- } else {
- mutex_exit(&kernel_mutex);
-
- /* The lock was granted while we were
- searching for the last committed version.
- Do a normal locking read. */
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED,
- &heap);
- err = DB_SUCCESS;
- /* Note that a record of
- prebuilt->index was locked. */
- prebuilt->new_rec_locks = 1;
- break;
- }
- mutex_exit(&kernel_mutex);
-
- if (old_vers == NULL) {
- /* The row was not yet committed */
-
- goto next_rec;
- }
-
- did_semi_consistent_read = TRUE;
- rec = old_vers;
- break;
- default:
-
- goto lock_wait_or_error;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
-
- /* Do nothing: we let a non-locking SELECT read the
- latest version of the record */
-
- } else if (index == clust_index) {
-
- /* Fetch a previous version of the row if the current
- one is not visible in the snapshot; if we have a very
- high force recovery level set, we try to avoid crashes
- by skipping this lookup */
-
- if (UNIV_LIKELY(srv_force_recovery < 5)
- && !lock_clust_rec_cons_read_sees(
- rec, index, offsets, trx->read_view)) {
-
- rec_t* old_vers;
- /* The following call returns 'offsets'
- associated with 'old_vers' */
- err = row_sel_build_prev_vers_for_mysql(
- trx->read_view, clust_index,
- prebuilt, rec, &offsets, &heap,
- &old_vers, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- if (old_vers == NULL) {
- /* The row did not exist yet in
- the read view */
-
- goto next_rec;
- }
-
- rec = old_vers;
- }
- } else if (!lock_sec_rec_cons_read_sees(rec, trx->read_view)) {
- /* We are looking into a non-clustered index,
- and to get the right version of the record we
- have to look also into the clustered index: this
- is necessary, because we can only get the undo
- information via the clustered index record. */
-
- ut_ad(index != clust_index);
-
- goto requires_clust_rec;
- }
- }
-
- /* NOTE that at this point rec can be an old version of a clustered
- index record built for a consistent read. We cannot assume after this
- point that rec is on a buffer pool page. Functions like
- page_rec_is_comp() cannot be used! */
-
- if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp))) {
-
- /* The record is delete-marked: we can skip it */
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE
- && !did_semi_consistent_read) {
-
- /* No need to keep a lock on a delete-marked record
- if we do not want to use next-key locking. */
-
- row_unlock_for_mysql(prebuilt, TRUE);
- }
-
- /* This is an optimization to skip setting the next key lock
- on the record that follows this delete-marked record. This
- optimization works because of the unique search criteria
- which precludes the presence of a range lock between this
- delete marked record and the record following it.
-
- For now this is applicable only to clustered indexes while
- doing a unique search. There is scope for further optimization
- applicable to unique secondary indexes. Current behaviour is
- to widen the scope of a lock on an already delete marked record
- if the same record is deleted twice by the same transaction */
- if (index == clust_index && unique_search) {
- err = DB_RECORD_NOT_FOUND;
-
- goto normal_return;
- }
-
- goto next_rec;
- }
-
- /* Get the clustered index record if needed, if we did not do the
- search using the clustered index. */
-
- if (index != clust_index && prebuilt->need_to_access_clustered) {
-
-requires_clust_rec:
- /* We use a 'goto' to the preceding label if a consistent
- read of a secondary index record requires us to look up old
- versions of the associated clustered index record. */
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- /* It was a non-clustered index and we must fetch also the
- clustered index record */
-
- mtr_has_extra_clust_latch = TRUE;
-
- /* The following call returns 'offsets' associated with
- 'clust_rec'. Note that 'clust_rec' can be an old version
- built for a consistent read. */
-
- err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
- thr, &clust_rec,
- &offsets, &heap, &mtr);
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- if (clust_rec == NULL) {
- /* The record did not exist in the read view */
- ut_ad(prebuilt->select_lock_type == LOCK_NONE);
-
- goto next_rec;
- }
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
- /* Note that both the secondary index record
- and the clustered index record were locked. */
- ut_ad(prebuilt->new_rec_locks == 1);
- prebuilt->new_rec_locks = 2;
- }
-
- if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) {
-
- /* The record is delete marked: we can skip it */
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* No need to keep a lock on a delete-marked
- record if we do not want to use next-key
- locking. */
-
- row_unlock_for_mysql(prebuilt, TRUE);
- }
-
- goto next_rec;
- }
-
- if (prebuilt->need_to_access_clustered) {
-
- result_rec = clust_rec;
-
- ut_ad(rec_offs_validate(result_rec, clust_index,
- offsets));
- } else {
- /* We used 'offsets' for the clust rec, recalculate
- them for 'rec' */
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- result_rec = rec;
- }
- } else {
- result_rec = rec;
- }
-
- /* We found a qualifying record 'result_rec'. At this point,
- 'offsets' are associated with 'result_rec'. */
-
- ut_ad(rec_offs_validate(result_rec,
- result_rec != rec ? clust_index : index,
- offsets));
-
- /* At this point, the clustered index record is protected
- by a page latch that was acquired when pcur was positioned.
- The latch will not be released until mtr_commit(&mtr). */
-
- if ((match_mode == ROW_SEL_EXACT
- || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
- && prebuilt->select_lock_type == LOCK_NONE
- && !prebuilt->templ_contains_blob
- && !prebuilt->clust_index_was_generated
- && !prebuilt->used_in_HANDLER
- && prebuilt->template_type
- != ROW_MYSQL_DUMMY_TEMPLATE) {
-
- /* Inside an update, for example, we do not cache rows,
- since we may use the cursor position to do the actual
- update, that is why we require ...lock_type == LOCK_NONE.
- Since we keep space in prebuilt only for the BLOBs of
- a single row, we cannot cache rows in the case there
- are BLOBs in the fields to be fetched. In HANDLER we do
- not cache rows because there the cursor is a scrollable
- cursor. */
-
- row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
- offsets);
- if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
-
- goto got_row;
- }
-
- goto next_rec;
- } else {
- if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) {
- memcpy(buf + 4, result_rec
- - rec_offs_extra_size(offsets),
- rec_offs_size(offsets));
- mach_write_to_4(buf,
- rec_offs_extra_size(offsets) + 4);
- } else {
- if (!row_sel_store_mysql_rec(buf, prebuilt,
- result_rec, offsets)) {
- err = DB_TOO_BIG_RECORD;
-
- goto lock_wait_or_error;
- }
- }
-
- if (prebuilt->clust_index_was_generated) {
- if (result_rec != rec) {
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED,
- &heap);
- }
- row_sel_store_row_id_to_prebuilt(prebuilt, rec,
- index, offsets);
- }
- }
-
- /* From this point on, 'offsets' are invalid. */
-
-got_row:
- /* We have an optimization to save CPU time: if this is a consistent
- read on a unique condition on the clustered index, then we do not
- store the pcur position, because any fetch next or prev will anyway
- return 'end of file'. Exceptions are locking reads and the MySQL
- HANDLER command where the user can move the cursor with PREV or NEXT
- even after a unique search. */
-
- if (!unique_search_from_clust_index
- || prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->used_in_HANDLER) {
-
- /* Inside an update always store the cursor position */
-
- btr_pcur_store_position(pcur, &mtr);
- }
-
- err = DB_SUCCESS;
-
- goto normal_return;
-
-next_rec:
- /* Reset the old and new "did semi-consistent read" flags. */
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- did_semi_consistent_read = FALSE;
- prebuilt->new_rec_locks = 0;
-
- /*-------------------------------------------------------------*/
- /* PHASE 5: Move the cursor to the next index record */
-
- if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
- /* We must commit mtr if we are moving to the next
- non-clustered index record, because we could break the
- latching order if we would access a different clustered
- index page right away without releasing the previous. */
-
- btr_pcur_store_position(pcur, &mtr);
-
- mtr_commit(&mtr);
- mtr_has_extra_clust_latch = FALSE;
-
- mtr_start(&mtr);
- if (sel_restore_position_for_mysql(&same_user_rec,
- BTR_SEARCH_LEAF,
- pcur, moves_up, &mtr)) {
-#ifdef UNIV_SEARCH_DEBUG
- cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
- goto rec_loop;
- }
- }
-
- if (moves_up) {
- if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
-not_moved:
- btr_pcur_store_position(pcur, &mtr);
-
- if (match_mode != 0) {
- err = DB_RECORD_NOT_FOUND;
- } else {
- err = DB_END_OF_INDEX;
- }
-
- goto normal_return;
- }
- } else {
- if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
- goto not_moved;
- }
- }
-
-#ifdef UNIV_SEARCH_DEBUG
- cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
- goto rec_loop;
-
-lock_wait_or_error:
- /* Reset the old and new "did semi-consistent read" flags. */
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- did_semi_consistent_read = FALSE;
-
- /*-------------------------------------------------------------*/
-
- btr_pcur_store_position(pcur, &mtr);
-
- mtr_commit(&mtr);
- mtr_has_extra_clust_latch = FALSE;
-
- trx->error_state = err;
-
- /* The following is a patch for MySQL */
-
- que_thr_stop_for_mysql(thr);
-
- thr->lock_state = QUE_THR_LOCK_ROW;
-
- if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
- /* It was a lock wait, and it ended */
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
- mtr_start(&mtr);
-
- sel_restore_position_for_mysql(&same_user_rec,
- BTR_SEARCH_LEAF, pcur,
- moves_up, &mtr);
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && !same_user_rec) {
-
- /* Since we were not able to restore the cursor
- on the same user record, we cannot use
- row_unlock_for_mysql() to unlock any records, and
- we must thus reset the new rec lock info. Since
- in lock0lock.c we have blocked the inheriting of gap
- X-locks, we actually do not have any new record locks
- set in this case.
-
- Note that if we were able to restore on the 'same'
- user record, it is still possible that we were actually
- waiting on a delete-marked record, and meanwhile
- it was removed by purge and inserted again by some
- other user. But that is no problem, because in
- rec_loop we will again try to set a lock, and
- new_rec_lock_info in trx will be right at the end. */
-
- prebuilt->new_rec_locks = 0;
- }
-
- mode = pcur->search_mode;
-
- goto rec_loop;
- }
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
-
-#ifdef UNIV_SEARCH_DEBUG
- /* fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
- goto func_exit;
-
-normal_return:
- /*-------------------------------------------------------------*/
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- mtr_commit(&mtr);
-
- if (prebuilt->n_fetch_cached > 0) {
- row_sel_pop_cached_row_for_mysql(buf, prebuilt);
-
- err = DB_SUCCESS;
- }
-
-#ifdef UNIV_SEARCH_DEBUG
- /* fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
- if (err == DB_SUCCESS) {
- srv_n_rows_read++;
- }
-
-func_exit:
- trx->op_info = "";
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Set or reset the "did semi-consistent read" flag on return.
- The flag did_semi_consistent_read is set if and only if
- the record being returned was fetched with a semi-consistent read. */
- ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
- || !did_semi_consistent_read);
-
- if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
- if (UNIV_UNLIKELY(did_semi_consistent_read)) {
- prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
- } else {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- }
- return(err);
-}
-
-/*******************************************************************//**
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache.
-@return TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
- trx_t* trx, /*!< in: transaction object */
- const char* norm_name) /*!< in: concatenation of database name,
- '/' char, table name */
-{
- dict_table_t* table;
- ibool ret = FALSE;
-
- table = dict_table_get(norm_name, FALSE);
-
- if (table == NULL) {
-
- return(FALSE);
- }
-
- mutex_enter(&kernel_mutex);
-
- /* Start the transaction if it is not started yet */
-
- trx_start_if_not_started_low(trx);
-
- /* If there are locks on the table or some trx has invalidated the
- cache up to our trx id, then ret = FALSE.
- We do not check what type locks there are on the table, though only
- IX type locks actually would require ret = FALSE. */
-
- if (UT_LIST_GET_LEN(table->locks) == 0
- && ut_dulint_cmp(trx->id,
- table->query_cache_inv_trx_id) >= 0) {
-
- ret = TRUE;
-
- /* If the isolation level is high, assign a read view for the
- transaction if it does not yet have one */
-
- if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
- && !trx->read_view) {
-
- trx->read_view = read_view_open_now(
- trx->id, trx->global_read_view_heap);
- trx->global_read_view = trx->read_view;
- }
- }
-
- mutex_exit(&kernel_mutex);
-
- return(ret);
-}
-
-/*******************************************************************//**
-Read the AUTOINC column from the current row. If the value is less than
-0 and the type is not unsigned then we reset the value to 0.
-@return value read from the column */
-static
-ib_uint64_t
-row_search_autoinc_read_column(
-/*===========================*/
- dict_index_t* index, /*!< in: index to read from */
- const rec_t* rec, /*!< in: current rec */
- ulint col_no, /*!< in: column number */
- ibool unsigned_type) /*!< in: signed or unsigned flag */
-{
- ulint len;
- const byte* data;
- ib_uint64_t value;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- data = rec_get_nth_field(rec, offsets, col_no, &len);
-
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len <= sizeof value);
-
- /* we assume AUTOINC value cannot be negative */
- value = mach_read_int_type(data, len, unsigned_type);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (!unsigned_type && (ib_int64_t) value < 0) {
- value = 0;
- }
-
- return(value);
-}
-
-/*******************************************************************//**
-Get the last row.
-@return current rec or NULL */
-static
-const rec_t*
-row_search_autoinc_get_rec(
-/*=======================*/
- btr_pcur_t* pcur, /*!< in: the current cursor */
- mtr_t* mtr) /*!< in: mini transaction */
-{
- do {
- const rec_t* rec = btr_pcur_get_rec(pcur);
-
- if (page_rec_is_user_rec(rec)) {
- return(rec);
- }
- } while (btr_pcur_move_to_prev(pcur, mtr));
-
- return(NULL);
-}
-
-/*******************************************************************//**
-Read the max AUTOINC value from an index.
-@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
-column name can't be found in index */
-UNIV_INTERN
-ulint
-row_search_max_autoinc(
-/*===================*/
- dict_index_t* index, /*!< in: index to search */
- const char* col_name, /*!< in: name of autoinc column */
- ib_uint64_t* value) /*!< out: AUTOINC value read */
-{
- ulint i;
- ulint n_cols;
- dict_field_t* dfield = NULL;
- ulint error = DB_SUCCESS;
-
- n_cols = dict_index_get_n_ordering_defined_by_user(index);
-
- /* Search the index for the AUTOINC column name */
- for (i = 0; i < n_cols; ++i) {
- dfield = dict_index_get_nth_field(index, i);
-
- if (strcmp(col_name, dfield->name) == 0) {
- break;
- }
- }
-
- *value = 0;
-
- /* Must find the AUTOINC column name */
- if (i < n_cols && dfield) {
- mtr_t mtr;
- btr_pcur_t pcur;
-
- mtr_start(&mtr);
-
- /* Open at the high/right end (FALSE), and INIT
- cursor (TRUE) */
- btr_pcur_open_at_index_side(
- FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
-
- if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
- const rec_t* rec;
-
- rec = row_search_autoinc_get_rec(&pcur, &mtr);
-
- if (rec != NULL) {
- ibool unsigned_type = (
- dfield->col->prtype & DATA_UNSIGNED);
-
- *value = row_search_autoinc_read_column(
- index, rec, i, unsigned_type);
- }
- }
-
- btr_pcur_close(&pcur);
-
- mtr_commit(&mtr);
- } else {
- error = DB_RECORD_NOT_FOUND;
- }
-
- return(error);
-}
diff --git a/storage/innodb_plugin/row/row0uins.c b/storage/innodb_plugin/row/row0uins.c
deleted file mode 100644
index 9f9c814f1a5..00000000000
--- a/storage/innodb_plugin/row/row0uins.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0uins.c
-Fresh insert undo
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0uins.h"
-
-#ifdef UNIV_NONINL
-#include "row0uins.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "dict0crea.h"
-#include "trx0undo.h"
-#include "trx0roll.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "row0undo.h"
-#include "row0vers.h"
-#include "trx0trx.h"
-#include "trx0rec.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-
-/***************************************************************//**
-Removes a clustered index record. The pcur in node was positioned on the
-record, now it is detached.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_ins_remove_clust_rec(
-/*==========================*/
- undo_node_t* node) /*!< in: undo node */
-{
- btr_cur_t* btr_cur;
- ibool success;
- ulint err;
- ulint n_tries = 0;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur),
- &mtr);
- ut_a(success);
-
- if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
- ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Drop the index tree associated with the row in
- SYS_INDEXES table: */
-
- dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF,
- &(node->pcur), &mtr);
- ut_a(success);
- }
-
- btr_cur = btr_pcur_get_btr_cur(&(node->pcur));
-
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- if (success) {
- trx_undo_rec_release(node->trx, node->undo_no);
-
- return(DB_SUCCESS);
- }
-retry:
- /* If did not succeed, try pessimistic descent to tree */
- mtr_start(&mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_TREE,
- &(node->pcur), &mtr);
- ut_a(success);
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- trx_is_recv(node->trx)
- ? RB_RECOVERY
- : RB_NORMAL, &mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (err == DB_OUT_OF_FILE_SPACE
- && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- trx_undo_rec_release(node->trx, node->undo_no);
-
- return(err);
-}
-
-/***************************************************************//**
-Removes a secondary index entry if found.
-@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_ins_remove_sec_low(
-/*========================*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry to remove */
-{
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- ibool found;
- ibool success;
- ulint err;
- mtr_t mtr;
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- if (!found) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(DB_SUCCESS);
- }
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
-
- if (success) {
- err = DB_SUCCESS;
- } else {
- err = DB_FAIL;
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- /* No need to distinguish RB_RECOVERY here, because we
- are deleting a secondary index record: the distinction
- between RB_NORMAL and RB_RECOVERY only matters when
- deleting a record that contains externally stored
- columns. */
- ut_ad(!dict_index_is_clust(index));
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- RB_NORMAL, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/***************************************************************//**
-Removes a secondary index entry from the index if found. Tries first
-optimistic, then pessimistic descent down the tree.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_ins_remove_sec(
-/*====================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry to insert */
-{
- ulint err;
- ulint n_tries = 0;
-
- /* Try first optimistic descent to the B-tree */
-
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry);
-
- if (err == DB_SUCCESS) {
-
- return(err);
- }
-
- /* Try then pessimistic descent to the B-tree */
-retry:
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Parses the row reference and other info in a fresh insert undo record. */
-static
-void
-row_undo_ins_parse_undo_rec(
-/*========================*/
- undo_node_t* node) /*!< in/out: row undo node */
-{
- dict_index_t* clust_index;
- byte* ptr;
- undo_no_t undo_no;
- dulint table_id;
- ulint type;
- ulint dummy;
- ibool dummy_extern;
-
- ut_ad(node);
-
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
- &dummy_extern, &undo_no, &table_id);
- ut_ad(type == TRX_UNDO_INSERT_REC);
- node->rec_type = type;
-
- node->update = NULL;
- node->table = dict_table_get_on_id(table_id, node->trx);
-
- /* Skip the UNDO if we can't find the table or the .ibd file. */
- if (UNIV_UNLIKELY(node->table == NULL)) {
- } else if (UNIV_UNLIKELY(node->table->ibd_file_missing)) {
- node->table = NULL;
- } else {
- clust_index = dict_table_get_first_index(node->table);
-
- if (clust_index != NULL) {
- ptr = trx_undo_rec_get_row_ref(
- ptr, clust_index, &node->ref, node->heap);
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: table ");
- ut_print_name(stderr, node->trx, TRUE,
- node->table->name);
- fprintf(stderr, " has no indexes, "
- "ignoring the table\n");
-
- node->table = NULL;
- }
- }
-}
-
-/***********************************************************//**
-Undoes a fresh insert of a row to a table. A fresh insert means that
-the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. InnoDB is eager in a rollback:
-if it figures out that an index record will be removed in the purge
-anyway, it will remove it in the rollback.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
-ulint
-row_undo_ins(
-/*=========*/
- undo_node_t* node) /*!< in: row undo node */
-{
- ut_ad(node);
- ut_ad(node->state == UNDO_NODE_INSERT);
-
- row_undo_ins_parse_undo_rec(node);
-
- if (!node->table || !row_undo_search_clust_to_pcur(node)) {
- trx_undo_rec_release(node->trx, node->undo_no);
-
- return(DB_SUCCESS);
- }
-
- /* Iterate over all the indexes and undo the insert.*/
-
- /* Skip the clustered index (the first index) */
- node->index = dict_table_get_next_index(
- dict_table_get_first_index(node->table));
-
- while (node->index != NULL) {
- dtuple_t* entry;
- ulint err;
-
- entry = row_build_index_entry(node->row, node->ext,
- node->index, node->heap);
- if (UNIV_UNLIKELY(!entry)) {
- /* The database must have crashed after
- inserting a clustered index record but before
- writing all the externally stored columns of
- that record. Because secondary index entries
- are inserted after the clustered index record,
- we may assume that the secondary index record
- does not exist. However, this situation may
- only occur during the rollback of incomplete
- transactions. */
- ut_a(trx_is_recv(node->trx));
- } else {
- err = row_undo_ins_remove_sec(node->index, entry);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- return(row_undo_ins_remove_clust_rec(node));
-}
diff --git a/storage/innodb_plugin/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c
deleted file mode 100644
index 6be475d8c78..00000000000
--- a/storage/innodb_plugin/row/row0umod.c
+++ /dev/null
@@ -1,815 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0umod.c
-Undo modify of a row
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0umod.h"
-
-#ifdef UNIV_NONINL
-#include "row0umod.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0undo.h"
-#include "trx0roll.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "row0undo.h"
-#include "row0vers.h"
-#include "trx0trx.h"
-#include "trx0rec.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "log0log.h"
-
-/* Considerations on undoing a modify operation.
-(1) Undoing a delete marking: all index records should be found. Some of
-them may have delete mark already FALSE, if the delete mark operation was
-stopped underway, or if the undo operation ended prematurely because of a
-system crash.
-(2) Undoing an update of a delete unmarked record: the newer version of
-an updated secondary index entry should be removed if no prior version
-of the clustered index record requires its existence. Otherwise, it should
-be delete marked.
-(3) Undoing an update of a delete marked record. In this kind of update a
-delete marked clustered index record was delete unmarked and possibly also
-some of its fields were changed. Now, it is possible that the delete marked
-version has become obsolete at the time the undo is started. */
-
-/***********************************************************//**
-Checks if also the previous version of the clustered index record was
-modified or inserted by the same transaction, and its undo number is such
-that it should be undone in the same rollback.
-@return TRUE if also previous modify or insert of this row should be undone */
-UNIV_INLINE
-ibool
-row_undo_mod_undo_also_prev_vers(
-/*=============================*/
- undo_node_t* node, /*!< in: row undo node */
- undo_no_t* undo_no)/*!< out: the undo number */
-{
- trx_undo_rec_t* undo_rec;
- trx_t* trx;
-
- trx = node->trx;
-
- if (0 != ut_dulint_cmp(node->new_trx_id, trx->id)) {
-
- *undo_no = ut_dulint_zero;
- return(FALSE);
- }
-
- undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);
-
- *undo_no = trx_undo_rec_get_undo_no(undo_rec);
-
- return(ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0);
-}
-
-/***********************************************************//**
-Undoes a modify in a clustered index record.
-@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static
-ulint
-row_undo_mod_clust_low(
-/*===================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr, /*!< in: mtr; must be committed before
- latching any further pages */
- ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
- ibool success;
-
- pcur = &(node->pcur);
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- success = btr_pcur_restore_position(mode, pcur, mtr);
-
- ut_ad(success);
-
- if (mode == BTR_MODIFY_LEAF) {
-
- err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG
- | BTR_KEEP_SYS_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
- } else {
- mem_heap_t* heap = NULL;
- big_rec_t* dummy_big_rec;
-
- ut_ad(mode == BTR_MODIFY_TREE);
-
- err = btr_cur_pessimistic_update(
- BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG
- | BTR_KEEP_SYS_FLAG,
- btr_cur, &heap, &dummy_big_rec, node->update,
- node->cmpl_info, thr, mtr);
-
- ut_a(!dummy_big_rec);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Removes a clustered index record after undo if possible.
-@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static
-ulint
-row_undo_mod_remove_clust_low(
-/*==========================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr __attribute__((unused)), /*!< in: query thread */
- mtr_t* mtr, /*!< in: mtr */
- ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
- ibool success;
-
- pcur = &(node->pcur);
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- success = btr_pcur_restore_position(mode, pcur, mtr);
-
- if (!success) {
-
- return(DB_SUCCESS);
- }
-
- /* Find out if we can remove the whole clustered index record */
-
- if (node->rec_type == TRX_UNDO_UPD_DEL_REC
- && !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
-
- /* Ok, we can remove */
- } else {
- return(DB_SUCCESS);
- }
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, mtr);
-
- if (success) {
- err = DB_SUCCESS;
- } else {
- err = DB_FAIL;
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- /* Note that since this operation is analogous to purge,
- we can free also inherited externally stored fields:
- hence the RB_NONE in the call below */
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, RB_NONE, mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Undoes a modify in a clustered index record. Sets also the node state for the
-next round of undo.
-@return DB_SUCCESS or error code: we may run out of file space */
-static
-ulint
-row_undo_mod_clust(
-/*===============*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- btr_pcur_t* pcur;
- mtr_t mtr;
- ulint err;
- ibool success;
- ibool more_vers;
- undo_no_t new_undo_no;
-
- ut_ad(node && thr);
-
- /* Check if also the previous version of the clustered index record
- should be undone in this same rollback operation */
-
- more_vers = row_undo_mod_undo_also_prev_vers(node, &new_undo_no);
-
- pcur = &(node->pcur);
-
- mtr_start(&mtr);
-
- /* Try optimistic processing of the record, keeping changes within
- the index page */
-
- err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF);
-
- if (err != DB_SUCCESS) {
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- /* We may have to modify tree structure: do a pessimistic
- descent down the index tree */
-
- mtr_start(&mtr);
-
- err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
- }
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
-
- mtr_start(&mtr);
-
- err = row_undo_mod_remove_clust_low(node, thr, &mtr,
- BTR_MODIFY_LEAF);
- if (err != DB_SUCCESS) {
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- /* We may have to modify tree structure: do a
- pessimistic descent down the index tree */
-
- mtr_start(&mtr);
-
- err = row_undo_mod_remove_clust_low(node, thr, &mtr,
- BTR_MODIFY_TREE);
- }
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
- }
-
- node->state = UNDO_NODE_FETCH_NEXT;
-
- trx_undo_rec_release(node->trx, node->undo_no);
-
- if (more_vers && err == DB_SUCCESS) {
-
- /* Reserve the undo log record to the prior version after
- committing &mtr: this is necessary to comply with the latching
- order, as &mtr may contain the fsp latch which is lower in
- the latch hierarchy than trx->undo_mutex. */
-
- success = trx_undo_rec_reserve(node->trx, new_undo_no);
-
- if (success) {
- node->state = UNDO_NODE_PREV_VERS;
- }
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Delete marks or removes a secondary index entry if found.
-@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_mod_del_mark_or_remove_sec_low(
-/*====================================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry */
- ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
- BTR_MODIFY_TREE */
-{
- ibool found;
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- ibool success;
- ibool old_has;
- ulint err;
- mtr_t mtr;
- mtr_t mtr_vers;
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- if (!found) {
- /* In crash recovery, the secondary index record may
- be missing if the UPDATE did not have time to insert
- the secondary index records before the crash. When we
- are undoing that UPDATE in crash recovery, the record
- may be missing.
-
- In normal processing, if an update ends in a deadlock
- before it has inserted all updated secondary index
- records, then the undo will not find those records. */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(DB_SUCCESS);
- }
-
- /* We should remove the index record if no prior version of the row,
- which cannot be purged yet, requires its existence. If some requires,
- we should delete mark the record. */
-
- mtr_start(&mtr_vers);
-
- success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
- &mtr_vers);
- ut_a(success);
-
- old_has = row_vers_old_has_index_entry(FALSE,
- btr_pcur_get_rec(&(node->pcur)),
- &mtr_vers, index, entry);
- if (old_has) {
- err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, TRUE, thr, &mtr);
- ut_ad(err == DB_SUCCESS);
- } else {
- /* Remove the index record */
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
- if (success) {
- err = DB_SUCCESS;
- } else {
- err = DB_FAIL;
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- /* No need to distinguish RB_RECOVERY here, because we
- are deleting a secondary index record: the distinction
- between RB_NORMAL and RB_RECOVERY only matters when
- deleting a record that contains externally stored
- columns. */
- ut_ad(!dict_index_is_clust(index));
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- RB_NORMAL, &mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
- }
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/***********************************************************//**
-Delete marks or removes a secondary index entry if found.
-NOTE that if we updated the fields of a delete-marked secondary index record
-so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
-return to the original values because we do not know them. But this should
-not cause problems because in row0sel.c, in queries we always retrieve the
-clustered index record or an earlier version of it, if the secondary index
-record through which we do the search is delete-marked.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_mod_del_mark_or_remove_sec(
-/*================================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry */
-{
- ulint err;
-
- err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
- entry, BTR_MODIFY_LEAF);
- if (err == DB_SUCCESS) {
-
- return(err);
- }
-
- err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
- entry, BTR_MODIFY_TREE);
- return(err);
-}
-
-/***********************************************************//**
-Delete unmarks a secondary index entry which must be found. It might not be
-delete-marked at the moment, but it does not harm to unmark it anyway. We also
-need to update the fields of the secondary index record if we updated its
-fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
-@return DB_FAIL or DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_mod_del_unmark_sec_and_undo_update(
-/*========================================*/
- ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or
- BTR_MODIFY_TREE */
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry */
-{
- mem_heap_t* heap;
- btr_pcur_t pcur;
- upd_t* update;
- ulint err = DB_SUCCESS;
- big_rec_t* dummy_big_rec;
- mtr_t mtr;
- trx_t* trx = thr_get_trx(thr);
-
- /* Ignore indexes that are being created. */
- if (UNIV_UNLIKELY(*index->name == TEMP_INDEX_PREFIX)) {
-
- return(DB_SUCCESS);
- }
-
- log_free_check();
- mtr_start(&mtr);
-
- if (UNIV_UNLIKELY(!row_search_index_entry(index, entry,
- mode, &pcur, &mtr))) {
- fputs("InnoDB: error in sec index entry del undo in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_pcur_get_rec(&pcur), index);
- putc('\n', stderr);
- trx_print(stderr, trx, 0);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- } else {
- btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, FALSE, thr, &mtr);
- ut_a(err == DB_SUCCESS);
- heap = mem_heap_create(100);
-
- update = row_upd_build_sec_rec_difference_binary(
- index, entry, btr_cur_get_rec(btr_cur), trx, heap);
- if (upd_get_n_fields(update) == 0) {
-
- /* Do nothing */
-
- } else if (mode == BTR_MODIFY_LEAF) {
- /* Try an optimistic updating of the record, keeping
- changes within the page */
-
- err = btr_cur_optimistic_update(
- BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
- btr_cur, update, 0, thr, &mtr);
- switch (err) {
- case DB_OVERFLOW:
- case DB_UNDERFLOW:
- case DB_ZIP_OVERFLOW:
- err = DB_FAIL;
- }
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- err = btr_cur_pessimistic_update(
- BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
- btr_cur, &heap, &dummy_big_rec,
- update, 0, thr, &mtr);
- ut_a(!dummy_big_rec);
- }
-
- mem_heap_free(heap);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/***********************************************************//**
-Undoes a modify in secondary indexes when undo record type is UPD_DEL.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_mod_upd_del_sec(
-/*=====================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- ulint err = DB_SUCCESS;
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- entry = row_build_index_entry(node->row, node->ext,
- index, heap);
- if (UNIV_UNLIKELY(!entry)) {
- /* The database must have crashed after
- inserting a clustered index record but before
- writing all the externally stored columns of
- that record. Because secondary index entries
- are inserted after the clustered index record,
- we may assume that the secondary index record
- does not exist. However, this situation may
- only occur during the rollback of incomplete
- transactions. */
- ut_a(trx_is_recv(thr_get_trx(thr)));
- } else {
- err = row_undo_mod_del_mark_or_remove_sec(
- node, thr, index, entry);
-
- if (err != DB_SUCCESS) {
-
- break;
- }
- }
-
- mem_heap_empty(heap);
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************//**
-Undoes a modify in secondary indexes when undo record type is DEL_MARK.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_mod_del_mark_sec(
-/*======================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- ulint err;
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- entry = row_build_index_entry(node->row, node->ext,
- index, heap);
- ut_a(entry);
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_LEAF, thr, index, entry);
- if (err == DB_FAIL) {
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_TREE, thr, index, entry);
- }
-
- if (err != DB_SUCCESS) {
-
- mem_heap_free(heap);
-
- return(err);
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************//**
-Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
-row_undo_mod_upd_exist_sec(
-/*=======================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- ulint err;
-
- if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
- /* No change in secondary indexes */
-
- return(DB_SUCCESS);
- }
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- if (row_upd_changes_ord_field_binary(node->row, node->index,
- node->update)) {
-
- /* Build the newest version of the index entry */
- entry = row_build_index_entry(node->row, node->ext,
- index, heap);
- ut_a(entry);
- /* NOTE that if we updated the fields of a
- delete-marked secondary index record so that
- alphabetically they stayed the same, e.g.,
- 'abc' -> 'aBc', we cannot return to the original
- values because we do not know them. But this should
- not cause problems because in row0sel.c, in queries
- we always retrieve the clustered index record or an
- earlier version of it, if the secondary index record
- through which we do the search is delete-marked. */
-
- err = row_undo_mod_del_mark_or_remove_sec(node, thr,
- index,
- entry);
- if (err != DB_SUCCESS) {
- mem_heap_free(heap);
-
- return(err);
- }
-
- /* We may have to update the delete mark in the
- secondary index record of the previous version of
- the row. We also need to update the fields of
- the secondary index record if we updated its fields
- but alphabetically they stayed the same, e.g.,
- 'abc' -> 'aBc'. */
- mem_heap_empty(heap);
- entry = row_build_index_entry(node->undo_row,
- node->undo_ext,
- index, heap);
- ut_a(entry);
-
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_LEAF, thr, index, entry);
- if (err == DB_FAIL) {
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_TREE, thr, index, entry);
- }
-
- if (err != DB_SUCCESS) {
- mem_heap_free(heap);
-
- return(err);
- }
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************//**
-Parses the row reference and other info in a modify undo log record. */
-static
-void
-row_undo_mod_parse_undo_rec(
-/*========================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_index_t* clust_index;
- byte* ptr;
- undo_no_t undo_no;
- dulint table_id;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- ulint info_bits;
- ulint type;
- ulint cmpl_info;
- ibool dummy_extern;
- trx_t* trx;
-
- ut_ad(node && thr);
- trx = thr_get_trx(thr);
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
- &dummy_extern, &undo_no, &table_id);
- node->rec_type = type;
-
- node->table = dict_table_get_on_id(table_id, trx);
-
- /* TODO: other fixes associated with DROP TABLE + rollback in the
- same table by another user */
-
- if (node->table == NULL) {
- /* Table was dropped */
- return;
- }
-
- if (node->table->ibd_file_missing) {
- /* We skip undo operations to missing .ibd files */
- node->table = NULL;
-
- return;
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
-
- ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
- node->heap);
-
- trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, trx,
- node->heap, &(node->update));
- node->new_roll_ptr = roll_ptr;
- node->new_trx_id = trx_id;
- node->cmpl_info = cmpl_info;
-}
-
-/***********************************************************//**
-Undoes a modify operation on a row of a table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-row_undo_mod(
-/*=========*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
-
- ut_ad(node && thr);
- ut_ad(node->state == UNDO_NODE_MODIFY);
-
- row_undo_mod_parse_undo_rec(node, thr);
-
- if (!node->table || !row_undo_search_clust_to_pcur(node)) {
- /* It is already undone, or will be undone by another query
- thread, or table was dropped */
-
- trx_undo_rec_release(node->trx, node->undo_no);
- node->state = UNDO_NODE_FETCH_NEXT;
-
- return(DB_SUCCESS);
- }
-
- node->index = dict_table_get_next_index(
- dict_table_get_first_index(node->table));
-
- if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
-
- err = row_undo_mod_upd_exist_sec(node, thr);
-
- } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
-
- err = row_undo_mod_del_mark_sec(node, thr);
- } else {
- ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
- err = row_undo_mod_upd_del_sec(node, thr);
- }
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- err = row_undo_mod_clust(node, thr);
-
- return(err);
-}
diff --git a/storage/innodb_plugin/row/row0undo.c b/storage/innodb_plugin/row/row0undo.c
deleted file mode 100644
index 3d739c9689a..00000000000
--- a/storage/innodb_plugin/row/row0undo.c
+++ /dev/null
@@ -1,377 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0undo.c
-Row undo
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0undo.h"
-
-#ifdef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0uins.h"
-#include "row0umod.h"
-#include "row0upd.h"
-#include "row0mysql.h"
-#include "srv0srv.h"
-
-/* How to undo row operations?
-(1) For an insert, we have stored a prefix of the clustered index record
-in the undo log. Using it, we look for the clustered record, and using
-that we look for the records in the secondary indexes. The insert operation
-may have been left incomplete, if the database crashed, for example.
-We may have look at the trx id and roll ptr to make sure the record in the
-clustered index is really the one for which the undo log record was
-written. We can use the framework we get from the original insert op.
-(2) Delete marking: We can use the framework we get from the original
-delete mark op. We only have to check the trx id.
-(3) Update: This may be the most complicated. We have to use the framework
-we get from the original update op.
-
-What if the same trx repeatedly deletes and inserts an identical row.
-Then the row id changes and also roll ptr. What if the row id was not
-part of the ordering fields in the clustered index? Maybe we have to write
-it to undo log. Well, maybe not, because if we order the row id and trx id
-in descending order, then the only undeleted copy is the first in the
-index. Our searches in row operations always position the cursor before
-the first record in the result set. But, if there is no key defined for
-a table, then it would be desirable that row id is in ascending order.
-So, lets store row id in descending order only if it is not an ordering
-field in the clustered index.
-
-NOTE: Deletes and inserts may lead to situation where there are identical
-records in a secondary index. Is that a problem in the B-tree? Yes.
-Also updates can lead to this, unless trx id and roll ptr are included in
-ord fields.
-(1) Fix in clustered indexes: include row id, trx id, and roll ptr
-in node pointers of B-tree.
-(2) Fix in secondary indexes: include all fields in node pointers, and
-if an entry is inserted, check if it is equal to the right neighbor,
-in which case update the right neighbor: the neighbor must be delete
-marked, set it unmarked and write the trx id of the current transaction.
-
-What if the same trx repeatedly updates the same row, updating a secondary
-index field or not? Updating a clustered index ordering field?
-
-(1) If it does not update the secondary index and not the clustered index
-ord field. Then the secondary index record stays unchanged, but the
-trx id in the secondary index record may be smaller than in the clustered
-index record. This is no problem?
-(2) If it updates secondary index ord field but not clustered: then in
-secondary index there are delete marked records, which differ in an
-ord field. No problem.
-(3) Updates clustered ord field but not secondary, and secondary index
-is unique. Then the record in secondary index is just updated at the
-clustered ord field.
-(4)
-
-Problem with duplicate records:
-Fix 1: Add a trx op no field to all indexes. A problem: if a trx with a
-bigger trx id has inserted and delete marked a similar row, our trx inserts
-again a similar row, and a trx with an even bigger id delete marks it. Then
-the position of the row should change in the index if the trx id affects
-the alphabetical ordering.
-
-Fix 2: If an insert encounters a similar row marked deleted, we turn the
-insert into an 'update' of the row marked deleted. Then we must write undo
-info on the update. A problem: what if a purge operation tries to remove
-the delete marked row?
-
-We can think of the database row versions as a linked list which starts
-from the record in the clustered index, and is linked by roll ptrs
-through undo logs. The secondary index records are references which tell
-what kinds of records can be found in this linked list for a record
-in the clustered index.
-
-How to do the purge? A record can be removed from the clustered index
-if its linked list becomes empty, i.e., the row has been marked deleted
-and its roll ptr points to the record in the undo log we are going through,
-doing the purge. Similarly, during a rollback, a record can be removed
-if the stored roll ptr in the undo log points to a trx already (being) purged,
-or if the roll ptr is NULL, i.e., it was a fresh insert. */
-
-/********************************************************************//**
-Creates a row undo node to a query graph.
-@return own: undo node */
-UNIV_INTERN
-undo_node_t*
-row_undo_node_create(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- undo_node_t* undo;
-
- ut_ad(trx && parent && heap);
-
- undo = mem_heap_alloc(heap, sizeof(undo_node_t));
-
- undo->common.type = QUE_NODE_UNDO;
- undo->common.parent = parent;
-
- undo->state = UNDO_NODE_FETCH_NEXT;
- undo->trx = trx;
-
- btr_pcur_init(&(undo->pcur));
-
- undo->heap = mem_heap_create(256);
-
- return(undo);
-}
-
-/***********************************************************//**
-Looks for the clustered index record when node has the row reference.
-The pcur in node is used in the search. If found, stores the row to node,
-and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
-caller, regardless of the return value */
-UNIV_INTERN
-ibool
-row_undo_search_clust_to_pcur(
-/*==========================*/
- undo_node_t* node) /*!< in: row undo node */
-{
- dict_index_t* clust_index;
- ibool found;
- mtr_t mtr;
- ibool ret;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- mtr_start(&mtr);
-
- clust_index = dict_table_get_first_index(node->table);
-
- found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
- node->table, node->ref, &mtr);
-
- rec = btr_pcur_get_rec(&(node->pcur));
-
- offsets = rec_get_offsets(rec, clust_index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!found || 0 != ut_dulint_cmp(node->roll_ptr,
- row_get_rec_roll_ptr(rec, clust_index,
- offsets))) {
-
- /* We must remove the reservation on the undo log record
- BEFORE releasing the latch on the clustered index page: this
- is to make sure that some thread will eventually undo the
- modification corresponding to node->roll_ptr. */
-
- /* fputs("--------------------undoing a previous version\n",
- stderr); */
-
- ret = FALSE;
- } else {
- node->row = row_build(ROW_COPY_DATA, clust_index, rec,
- offsets, NULL, &node->ext, node->heap);
- if (node->update) {
- node->undo_row = dtuple_copy(node->row, node->heap);
- row_upd_replace(node->undo_row, &node->undo_ext,
- clust_index, node->update, node->heap);
- } else {
- node->undo_row = NULL;
- node->undo_ext = NULL;
- }
-
- btr_pcur_store_position(&(node->pcur), &mtr);
-
- ret = TRUE;
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(ret);
-}
-
-/***********************************************************//**
-Fetches an undo log record and does the undo for the recorded operation.
-If none left, or a partial rollback completed, returns control to the
-parent node, which is always a query thread node.
-@return DB_SUCCESS if operation successfully completed, else error code */
-static
-ulint
-row_undo(
-/*=====*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
- trx_t* trx;
- roll_ptr_t roll_ptr;
- ibool locked_data_dict;
-
- ut_ad(node && thr);
-
- trx = node->trx;
-
- if (node->state == UNDO_NODE_FETCH_NEXT) {
-
- node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
- trx->roll_limit,
- &roll_ptr,
- node->heap);
- if (!node->undo_rec) {
- /* Rollback completed for this query thread */
-
- thr->run_node = que_node_get_parent(node);
-
- return(DB_SUCCESS);
- }
-
- node->roll_ptr = roll_ptr;
- node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- node->state = UNDO_NODE_INSERT;
- } else {
- node->state = UNDO_NODE_MODIFY;
- }
-
- } else if (node->state == UNDO_NODE_PREV_VERS) {
-
- /* Undo should be done to the same clustered index record
- again in this same rollback, restoring the previous version */
-
- roll_ptr = node->new_roll_ptr;
-
- node->undo_rec = trx_undo_get_undo_rec_low(roll_ptr,
- node->heap);
- node->roll_ptr = roll_ptr;
- node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- node->state = UNDO_NODE_INSERT;
- } else {
- node->state = UNDO_NODE_MODIFY;
- }
- }
-
- /* Prevent DROP TABLE etc. while we are rolling back this row.
- If we are doing a TABLE CREATE or some other dictionary operation,
- then we already have dict_operation_lock locked in x-mode. Do not
- try to lock again, because that would cause a hang. */
-
- locked_data_dict = (trx->dict_operation_lock_mode == 0);
-
- if (locked_data_dict) {
-
- row_mysql_lock_data_dictionary(trx);
- }
-
- if (node->state == UNDO_NODE_INSERT) {
-
- err = row_undo_ins(node);
-
- node->state = UNDO_NODE_FETCH_NEXT;
- } else {
- ut_ad(node->state == UNDO_NODE_MODIFY);
- err = row_undo_mod(node, thr);
- }
-
- if (locked_data_dict) {
-
- row_mysql_unlock_data_dictionary(trx);
- }
-
- /* Do some cleanup */
- btr_pcur_close(&(node->pcur));
-
- mem_heap_empty(node->heap);
-
- thr->run_node = node;
-
- return(err);
-}
-
-/***********************************************************//**
-Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_undo_step(
-/*==========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err;
- undo_node_t* node;
- trx_t* trx;
-
- ut_ad(thr);
-
- srv_activity_count++;
-
- trx = thr_get_trx(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_UNDO);
-
- err = row_undo(node, thr);
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- /* SQL error detected */
-
- fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n",
- (ulong) err);
-
- if (err == DB_OUT_OF_FILE_SPACE) {
- fprintf(stderr,
- "InnoDB: Error 13 means out of tablespace.\n"
- "InnoDB: Consider increasing"
- " your tablespace.\n");
-
- exit(1);
- }
-
- ut_error;
-
- return(NULL);
- }
-
- return(thr);
-}
diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c
deleted file mode 100644
index 58dfd43ead9..00000000000
--- a/storage/innodb_plugin/row/row0upd.c
+++ /dev/null
@@ -1,2177 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0upd.c
-Update of a row
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0upd.h"
-
-#ifdef UNIV_NONINL
-#include "row0upd.ic"
-#endif
-
-#include "dict0dict.h"
-#include "trx0undo.h"
-#include "rem0rec.h"
-#ifndef UNIV_HOTBACKUP
-#include "dict0boot.h"
-#include "dict0crea.h"
-#include "mach0data.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "que0que.h"
-#include "row0ext.h"
-#include "row0ins.h"
-#include "row0sel.h"
-#include "row0row.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "log0log.h"
-#include "pars0sym.h"
-#include "eval0eval.h"
-#include "buf0lru.h"
-
-
-/* What kind of latch and lock can we assume when the control comes to
- -------------------------------------------------------------------
-an update node?
---------------
-Efficiency of massive updates would require keeping an x-latch on a
-clustered index page through many updates, and not setting an explicit
-x-lock on clustered index records, as they anyway will get an implicit
-x-lock when they are updated. A problem is that the read nodes in the
-graph should know that they must keep the latch when passing the control
-up to the update node, and not set any record lock on the record which
-will be updated. Another problem occurs if the execution is stopped,
-as the kernel switches to another query thread, or the transaction must
-wait for a lock. Then we should be able to release the latch and, maybe,
-acquire an explicit x-lock on the record.
- Because this seems too complicated, we conclude that the less
-efficient solution of releasing all the latches when the control is
-transferred to another node, and acquiring explicit x-locks, is better. */
-
-/* How is a delete performed? If there is a delete without an
-explicit cursor, i.e., a searched delete, there are at least
-two different situations:
-the implicit select cursor may run on (1) the clustered index or
-on (2) a secondary index. The delete is performed by setting
-the delete bit in the record and substituting the id of the
-deleting transaction for the original trx id, and substituting a
-new roll ptr for previous roll ptr. The old trx id and roll ptr
-are saved in the undo log record. Thus, no physical changes occur
-in the index tree structure at the time of the delete. Only
-when the undo log is purged, the index records will be physically
-deleted from the index trees.
-
-The query graph executing a searched delete would consist of
-a delete node which has as a subtree a select subgraph.
-The select subgraph should return a (persistent) cursor
-in the clustered index, placed on page which is x-latched.
-The delete node should look for all secondary index records for
-this clustered index entry and mark them as deleted. When is
-the x-latch freed? The most efficient way for performing a
-searched delete is obviously to keep the x-latch for several
-steps of query graph execution. */
-
-/***********************************************************//**
-Checks if an update vector changes some of the first ordering fields of an
-index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes.
-@return TRUE if changes */
-static
-ibool
-row_upd_changes_first_fields_binary(
-/*================================*/
- dtuple_t* entry, /*!< in: old value of index entry */
- dict_index_t* index, /*!< in: index of entry */
- const upd_t* update, /*!< in: update vector for the row */
- ulint n); /*!< in: how many first fields to check */
-
-
-/*********************************************************************//**
-Checks if index currently is mentioned as a referenced index in a foreign
-key constraint.
-
-NOTE that since we do not hold dict_operation_lock when leaving the
-function, it may be that the referencing table has been dropped when
-we leave this function: this function is only for heuristic use!
-
-@return TRUE if referenced */
-static
-ibool
-row_upd_index_is_referenced(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction */
-{
- dict_table_t* table = index->table;
- dict_foreign_t* foreign;
- ibool froze_data_dict = FALSE;
- ibool is_referenced = FALSE;
-
- if (!UT_LIST_GET_FIRST(table->referenced_list)) {
-
- return(FALSE);
- }
-
- if (trx->dict_operation_lock_mode == 0) {
- row_mysql_freeze_data_dictionary(trx);
- froze_data_dict = TRUE;
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign) {
- if (foreign->referenced_index == index) {
-
- is_referenced = TRUE;
- goto func_exit;
- }
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
-func_exit:
- if (froze_data_dict) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- return(is_referenced);
-}
-
-/*********************************************************************//**
-Checks if possible foreign key constraints hold after a delete of the record
-under pcur.
-
-NOTE that this function will temporarily commit mtr and lose the
-pcur position!
-
-@return DB_SUCCESS or an error code */
-static
-ulint
-row_upd_check_references_constraints(
-/*=================================*/
- upd_node_t* node, /*!< in: row update node */
- btr_pcur_t* pcur, /*!< in: cursor positioned on a record; NOTE: the
- cursor position is lost in this function! */
- dict_table_t* table, /*!< in: table in question */
- dict_index_t* index, /*!< in: index of the cursor */
- ulint* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_foreign_t* foreign;
- mem_heap_t* heap;
- dtuple_t* entry;
- trx_t* trx;
- const rec_t* rec;
- ulint n_ext;
- ulint err;
- ibool got_s_lock = FALSE;
-
- if (UT_LIST_GET_FIRST(table->referenced_list) == NULL) {
-
- return(DB_SUCCESS);
- }
-
- trx = thr_get_trx(thr);
-
- rec = btr_pcur_get_rec(pcur);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- heap = mem_heap_create(500);
-
- entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
- &n_ext, heap);
-
- mtr_commit(mtr);
-
- mtr_start(mtr);
-
- if (trx->dict_operation_lock_mode == 0) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign) {
- /* Note that we may have an update which updates the index
- record, but does NOT update the first fields which are
- referenced in a foreign key constraint. Then the update does
- NOT break the constraint. */
-
- if (foreign->referenced_index == index
- && (node->is_delete
- || row_upd_changes_first_fields_binary(
- entry, index, node->update,
- foreign->n_fields))) {
-
- if (foreign->foreign_table == NULL) {
- dict_table_get(foreign->foreign_table_name,
- FALSE);
- }
-
- if (foreign->foreign_table) {
- mutex_enter(&(dict_sys->mutex));
-
- (foreign->foreign_table
- ->n_foreign_key_checks_running)++;
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_operation_lock temporarily!
- But the counter on the table protects 'foreign' from
- being dropped while the check is running. */
-
- err = row_ins_check_foreign_constraint(
- FALSE, foreign, table, entry, thr);
-
- if (foreign->foreign_table) {
- mutex_enter(&(dict_sys->mutex));
-
- ut_a(foreign->foreign_table
- ->n_foreign_key_checks_running > 0);
-
- (foreign->foreign_table
- ->n_foreign_key_checks_running)--;
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- if (err != DB_SUCCESS) {
-
- goto func_exit;
- }
- }
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- err = DB_SUCCESS;
-
-func_exit:
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/*********************************************************************//**
-Creates an update node for a query graph.
-@return own: update node */
-UNIV_INTERN
-upd_node_t*
-upd_node_create(
-/*============*/
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- upd_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(upd_node_t));
- node->common.type = QUE_NODE_UPDATE;
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
- node->in_mysql_interface = FALSE;
-
- node->row = NULL;
- node->ext = NULL;
- node->upd_row = NULL;
- node->upd_ext = NULL;
- node->index = NULL;
- node->update = NULL;
-
- node->foreign = NULL;
- node->cascade_heap = NULL;
- node->cascade_node = NULL;
-
- node->select = NULL;
-
- node->heap = mem_heap_create(128);
- node->magic_n = UPD_NODE_MAGIC_N;
-
- node->cmpl_info = 0;
-
- return(node);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Updates the trx id and roll ptr field in a clustered index record in database
-recovery. */
-UNIV_INTERN
-void
-row_upd_rec_sys_fields_in_recovery(
-/*===============================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint pos, /*!< in: TRX_ID position in rec */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */
-{
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- page_zip_write_trx_id_and_roll_ptr(
- page_zip, rec, offsets, pos, trx_id, roll_ptr);
- } else {
- byte* field;
- ulint len;
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
- trx_write_trx_id(field, trx_id);
- trx_write_roll_ptr(field + DATA_TRX_ID_LEN, roll_ptr);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
-void
-row_upd_index_entry_sys_field(
-/*==========================*/
- const dtuple_t* entry, /*!< in: index entry, where the memory buffers
- for sys fields are already allocated:
- the function just copies the new values to
- them */
- dict_index_t* index, /*!< in: clustered index */
- ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
- dulint val) /*!< in: value to write */
-{
- dfield_t* dfield;
- byte* field;
- ulint pos;
-
- ut_ad(dict_index_is_clust(index));
-
- pos = dict_index_get_sys_col_pos(index, type);
-
- dfield = dtuple_get_nth_field(entry, pos);
- field = dfield_get_data(dfield);
-
- if (type == DATA_TRX_ID) {
- trx_write_trx_id(field, val);
- } else {
- ut_ad(type == DATA_ROLL_PTR);
- trx_write_roll_ptr(field, val);
- }
-}
-
-/***********************************************************//**
-Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update.
-@return TRUE if the update changes the size of some field in index or
-the field is external in rec or update */
-UNIV_INTERN
-ibool
-row_upd_changes_field_size_or_external(
-/*===================================*/
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const upd_t* update) /*!< in: update vector */
-{
- const upd_field_t* upd_field;
- const dfield_t* new_val;
- ulint old_len;
- ulint new_len;
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(NULL, index, offsets));
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
-
- new_val = &(upd_field->new_val);
- new_len = dfield_get_len(new_val);
-
- if (dfield_is_null(new_val) && !rec_offs_comp(offsets)) {
- /* A bug fixed on Dec 31st, 2004: we looked at the
- SQL NULL size from the wrong field! We may backport
- this fix also to 4.0. The merge to 5.0 will be made
- manually immediately after we commit this to 4.1. */
-
- new_len = dict_col_get_sql_null_size(
- dict_index_get_nth_col(index,
- upd_field->field_no),
- 0);
- }
-
- old_len = rec_offs_nth_size(offsets, upd_field->field_no);
-
- if (rec_offs_comp(offsets)
- && rec_offs_nth_sql_null(offsets,
- upd_field->field_no)) {
- /* Note that in the compact table format, for a
- variable length field, an SQL NULL will use zero
- bytes in the offset array at the start of the physical
- record, but a zero-length value (empty string) will
- use one byte! Thus, we cannot use update-in-place
- if we update an SQL NULL varchar to an empty string! */
-
- old_len = UNIV_SQL_NULL;
- }
-
- if (dfield_is_ext(new_val) || old_len != new_len
- || rec_offs_nth_extern(offsets, upd_field->field_no)) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. */
-UNIV_INTERN
-void
-row_upd_rec_in_place(
-/*=================*/
- rec_t* rec, /*!< in/out: record where replaced */
- dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- const upd_t* update, /*!< in: update vector */
- page_zip_des_t* page_zip)/*!< in: compressed page with enough space
- available, or NULL */
-{
- const upd_field_t* upd_field;
- const dfield_t* new_val;
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (rec_offs_comp(offsets)) {
- rec_set_info_bits_new(rec, update->info_bits);
- } else {
- rec_set_info_bits_old(rec, update->info_bits);
- }
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
- new_val = &(upd_field->new_val);
- ut_ad(!dfield_is_ext(new_val) ==
- !rec_offs_nth_extern(offsets, upd_field->field_no));
-
- rec_set_nth_field(rec, offsets, upd_field->field_no,
- dfield_get_data(new_val),
- dfield_get_len(new_val));
- }
-
- if (UNIV_LIKELY_NULL(page_zip)) {
- page_zip_write_rec(page_zip, rec, index, offsets, 0);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record.
-@return new pointer to mlog */
-UNIV_INTERN
-byte*
-row_upd_write_sys_vals_to_log(
-/*==========================*/
- dict_index_t* index, /*!< in: clustered index */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
- byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
- in mlog */
- mtr_t* mtr __attribute__((unused))) /*!< in: mtr */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(mtr);
-
- log_ptr += mach_write_compressed(log_ptr,
- dict_index_get_sys_col_pos(
- index, DATA_TRX_ID));
-
- trx_write_roll_ptr(log_ptr, roll_ptr);
- log_ptr += DATA_ROLL_PTR_LEN;
-
- log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
-
- return(log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Parses the log data of system field values.
-@return log data end or NULL */
-UNIV_INTERN
-byte*
-row_upd_parse_sys_vals(
-/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint* pos, /*!< out: TRX_ID position in record */
- trx_id_t* trx_id, /*!< out: trx id */
- roll_ptr_t* roll_ptr)/*!< out: roll ptr */
-{
- ptr = mach_parse_compressed(ptr, end_ptr, pos);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + DATA_ROLL_PTR_LEN) {
-
- return(NULL);
- }
-
- *roll_ptr = trx_read_roll_ptr(ptr);
- ptr += DATA_ROLL_PTR_LEN;
-
- ptr = mach_dulint_parse_compressed(ptr, end_ptr, trx_id);
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
-void
-row_upd_index_write_log(
-/*====================*/
- const upd_t* update, /*!< in: update vector */
- byte* log_ptr,/*!< in: pointer to mlog buffer: must
- contain at least MLOG_BUF_MARGIN bytes
- of free space; the buffer is closed
- within this function */
- mtr_t* mtr) /*!< in: mtr into whose log to write */
-{
- const upd_field_t* upd_field;
- const dfield_t* new_val;
- ulint len;
- ulint n_fields;
- byte* buf_end;
- ulint i;
-
- n_fields = upd_get_n_fields(update);
-
- buf_end = log_ptr + MLOG_BUF_MARGIN;
-
- mach_write_to_1(log_ptr, update->info_bits);
- log_ptr++;
- log_ptr += mach_write_compressed(log_ptr, n_fields);
-
- for (i = 0; i < n_fields; i++) {
-
-#if MLOG_BUF_MARGIN <= 30
-# error "MLOG_BUF_MARGIN <= 30"
-#endif
-
- if (log_ptr + 30 > buf_end) {
- mlog_close(mtr, log_ptr);
-
- log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
- buf_end = log_ptr + MLOG_BUF_MARGIN;
- }
-
- upd_field = upd_get_nth_field(update, i);
-
- new_val = &(upd_field->new_val);
-
- len = dfield_get_len(new_val);
-
- log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
- log_ptr += mach_write_compressed(log_ptr, len);
-
- if (len != UNIV_SQL_NULL) {
- if (log_ptr + len < buf_end) {
- memcpy(log_ptr, dfield_get_data(new_val), len);
-
- log_ptr += len;
- } else {
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr,
- dfield_get_data(new_val),
- len);
-
- log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
- buf_end = log_ptr + MLOG_BUF_MARGIN;
- }
- }
- }
-
- mlog_close(mtr, log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Parses the log data written by row_upd_index_write_log.
-@return log data end or NULL */
-UNIV_INTERN
-byte*
-row_upd_index_parse(
-/*================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- mem_heap_t* heap, /*!< in: memory heap where update vector is
- built */
- upd_t** update_out)/*!< out: update vector */
-{
- upd_t* update;
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint len;
- ulint n_fields;
- ulint info_bits;
- ulint i;
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- info_bits = mach_read_from_1(ptr);
- ptr++;
- ptr = mach_parse_compressed(ptr, end_ptr, &n_fields);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- update = upd_create(n_fields, heap);
- update->info_bits = info_bits;
-
- for (i = 0; i < n_fields; i++) {
- ulint field_no;
- upd_field = upd_get_nth_field(update, i);
- new_val = &(upd_field->new_val);
-
- ptr = mach_parse_compressed(ptr, end_ptr, &field_no);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- upd_field->field_no = field_no;
-
- ptr = mach_parse_compressed(ptr, end_ptr, &len);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (len != UNIV_SQL_NULL) {
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- }
-
- dfield_set_data(new_val,
- mem_heap_dup(heap, ptr, len), len);
- ptr += len;
- } else {
- dfield_set_null(new_val);
- }
- }
-
- *update_out = update;
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Builds an update vector from those fields which in a secondary index entry
-differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings!
-@return own: update vector of differing fields */
-UNIV_INTERN
-upd_t*
-row_upd_build_sec_rec_difference_binary(
-/*====================================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: secondary index record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
-{
- upd_field_t* upd_field;
- const dfield_t* dfield;
- const byte* data;
- ulint len;
- upd_t* update;
- ulint n_diff;
- ulint i;
- ulint offsets_[REC_OFFS_SMALL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- /* This function is used only for a secondary index */
- ut_a(!dict_index_is_clust(index));
-
- update = upd_create(dtuple_get_n_fields(entry), heap);
-
- n_diff = 0;
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield = dtuple_get_nth_field(entry, i);
-
- /* NOTE that it may be that len != dfield_get_len(dfield) if we
- are updating in a character set and collation where strings of
- different length can be equal in an alphabetical comparison,
- and also in the case where we have a column prefix index
- and the last characters in the index field are spaces; the
- latter case probably caused the assertion failures reported at
- row0upd.c line 713 in versions 4.0.14 - 4.0.16. */
-
- /* NOTE: we compare the fields as binary strings!
- (No collation) */
-
- if (!dfield_data_is_binary_equal(dfield, len, data)) {
-
- upd_field = upd_get_nth_field(update, n_diff);
-
- dfield_copy(&(upd_field->new_val), dfield);
-
- upd_field_set_field_no(upd_field, i, index, trx);
-
- n_diff++;
- }
- }
-
- update->n_fields = n_diff;
-
- return(update);
-}
-
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
-trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings!
-@return own: update vector of differing fields, excluding roll ptr and
-trx id */
-UNIV_INTERN
-upd_t*
-row_upd_build_difference_binary(
-/*============================*/
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: clustered index record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
-{
- upd_field_t* upd_field;
- const dfield_t* dfield;
- const byte* data;
- ulint len;
- upd_t* update;
- ulint n_diff;
- ulint roll_ptr_pos;
- ulint trx_id_pos;
- ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- /* This function is used only for a clustered index */
- ut_a(dict_index_is_clust(index));
-
- update = upd_create(dtuple_get_n_fields(entry), heap);
-
- n_diff = 0;
-
- roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR);
- trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
-
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield = dtuple_get_nth_field(entry, i);
-
- /* NOTE: we compare the fields as binary strings!
- (No collation) */
-
- if (i == trx_id_pos || i == roll_ptr_pos) {
-
- goto skip_compare;
- }
-
- if (UNIV_UNLIKELY(!dfield_is_ext(dfield)
- != !rec_offs_nth_extern(offsets, i))
- || !dfield_data_is_binary_equal(dfield, len, data)) {
-
- upd_field = upd_get_nth_field(update, n_diff);
-
- dfield_copy(&(upd_field->new_val), dfield);
-
- upd_field_set_field_no(upd_field, i, index, trx);
-
- n_diff++;
- }
-skip_compare:
- ;
- }
-
- update->n_fields = n_diff;
-
- return(update);
-}
-
-/***********************************************************//**
-Fetch a prefix of an externally stored column. This is similar
-to row_ext_lookup(), but the row_ext_t holds the old values
-of the column and must not be poisoned with the new values.
-@return BLOB prefix */
-static
-byte*
-row_upd_ext_fetch(
-/*==============*/
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part */
- ulint local_len, /*!< in: length of data, in bytes */
- ulint zip_size, /*!< in: nonzero=compressed BLOB
- page size, zero for uncompressed
- BLOBs */
- ulint* len, /*!< in: length of prefix to fetch;
- out: fetched length of the prefix */
- mem_heap_t* heap) /*!< in: heap where to allocate */
-{
- byte* buf = mem_heap_alloc(heap, *len);
-
- *len = btr_copy_externally_stored_field_prefix(buf, *len,
- zip_size,
- data, local_len);
- /* We should never update records containing a half-deleted BLOB. */
- ut_a(*len);
-
- return(buf);
-}
-
-/***********************************************************//**
-Replaces the new column value stored in the update vector in
-the given index entry field. */
-static
-void
-row_upd_index_replace_new_col_val(
-/*==============================*/
- dfield_t* dfield, /*!< in/out: data field
- of the index entry */
- const dict_field_t* field, /*!< in: index field */
- const dict_col_t* col, /*!< in: field->col */
- const upd_field_t* uf, /*!< in: update field */
- mem_heap_t* heap, /*!< in: memory heap for allocating
- and copying the new value */
- ulint zip_size)/*!< in: compressed page
- size of the table, or 0 */
-{
- ulint len;
- const byte* data;
-
- dfield_copy_data(dfield, &uf->new_val);
-
- if (dfield_is_null(dfield)) {
- return;
- }
-
- len = dfield_get_len(dfield);
- data = dfield_get_data(dfield);
-
- if (field->prefix_len > 0) {
- ibool fetch_ext = dfield_is_ext(dfield)
- && len < (ulint) field->prefix_len
- + BTR_EXTERN_FIELD_REF_SIZE;
-
- if (fetch_ext) {
- ulint l = len;
-
- len = field->prefix_len;
-
- data = row_upd_ext_fetch(data, l, zip_size,
- &len, heap);
- }
-
- len = dtype_get_at_most_n_mbchars(col->prtype,
- col->mbminlen, col->mbmaxlen,
- field->prefix_len, len,
- (const char*) data);
-
- dfield_set_data(dfield, data, len);
-
- if (!fetch_ext) {
- dfield_dup(dfield, heap);
- }
-
- return;
- }
-
- switch (uf->orig_len) {
- byte* buf;
- case BTR_EXTERN_FIELD_REF_SIZE:
- /* Restore the original locally stored
- part of the column. In the undo log,
- InnoDB writes a longer prefix of externally
- stored columns, so that column prefixes
- in secondary indexes can be reconstructed. */
- dfield_set_data(dfield,
- data + len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- dfield_set_ext(dfield);
- /* fall through */
- case 0:
- dfield_dup(dfield, heap);
- break;
- default:
- /* Reconstruct the original locally
- stored part of the column. The data
- will have to be copied. */
- ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
- buf = mem_heap_alloc(heap, uf->orig_len);
- /* Copy the locally stored prefix. */
- memcpy(buf, data,
- uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE);
- /* Copy the BLOB pointer. */
- memcpy(buf + uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE,
- data + len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
-
- dfield_set_data(dfield, buf, uf->orig_len);
- dfield_set_ext(dfield);
- break;
- }
-}
-
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
-UNIV_INTERN
-void
-row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the index so
- that the field number in an upd_field is the
- index position */
- ibool order_only,
- /*!< in: if TRUE, limit the replacement to
- ordering fields of index; note that this
- does not work for non-clustered indexes. */
- mem_heap_t* heap) /*!< in: memory heap for allocating and
- copying the new values */
-{
- ulint i;
- ulint n_fields;
- const ulint zip_size = dict_table_zip_size(index->table);
-
- ut_ad(index);
-
- dtuple_set_info_bits(entry, update->info_bits);
-
- if (order_only) {
- n_fields = dict_index_get_n_unique(index);
- } else {
- n_fields = dict_index_get_n_fields(index);
- }
-
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- const dict_col_t* col;
- const upd_field_t* uf;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
- uf = upd_get_field_by_field_no(update, i);
-
- if (uf) {
- row_upd_index_replace_new_col_val(
- dtuple_get_nth_field(entry, i),
- field, col, uf, heap, zip_size);
- }
- }
-}
-
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
-UNIV_INTERN
-void
-row_upd_index_replace_new_col_vals(
-/*===============================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the
- CLUSTERED index so that the field number in
- an upd_field is the clustered index position */
- mem_heap_t* heap) /*!< in: memory heap for allocating and
- copying the new values */
-{
- ulint i;
- const dict_index_t* clust_index
- = dict_table_get_first_index(index->table);
- const ulint zip_size
- = dict_table_zip_size(index->table);
-
- dtuple_set_info_bits(entry, update->info_bits);
-
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- const dict_field_t* field;
- const dict_col_t* col;
- const upd_field_t* uf;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
- uf = upd_get_field_by_field_no(
- update, dict_col_get_clust_pos(col, clust_index));
-
- if (uf) {
- row_upd_index_replace_new_col_val(
- dtuple_get_nth_field(entry, i),
- field, col, uf, heap, zip_size);
- }
- }
-}
-
-/***********************************************************//**
-Replaces the new column values stored in the update vector. */
-UNIV_INTERN
-void
-row_upd_replace(
-/*============*/
- dtuple_t* row, /*!< in/out: row where replaced,
- indexed by col_no;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- row_ext_t** ext, /*!< out, own: NULL, or externally
- stored column prefixes */
- const dict_index_t* index, /*!< in: clustered index */
- const upd_t* update, /*!< in: an update vector built for the
- clustered index */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint col_no;
- ulint i;
- ulint n_cols;
- ulint n_ext_cols;
- ulint* ext_cols;
- const dict_table_t* table;
-
- ut_ad(row);
- ut_ad(ext);
- ut_ad(index);
- ut_ad(dict_index_is_clust(index));
- ut_ad(update);
- ut_ad(heap);
-
- n_cols = dtuple_get_n_fields(row);
- table = index->table;
- ut_ad(n_cols == dict_table_get_n_cols(table));
-
- ext_cols = mem_heap_alloc(heap, n_cols * sizeof *ext_cols);
- n_ext_cols = 0;
-
- dtuple_set_info_bits(row, update->info_bits);
-
- for (col_no = 0; col_no < n_cols; col_no++) {
-
- const dict_col_t* col
- = dict_table_get_nth_col(table, col_no);
- const ulint clust_pos
- = dict_col_get_clust_pos(col, index);
- dfield_t* dfield;
-
- if (UNIV_UNLIKELY(clust_pos == ULINT_UNDEFINED)) {
-
- continue;
- }
-
- dfield = dtuple_get_nth_field(row, col_no);
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- const upd_field_t* upd_field
- = upd_get_nth_field(update, i);
-
- if (upd_field->field_no != clust_pos) {
-
- continue;
- }
-
- dfield_copy_data(dfield, &upd_field->new_val);
- break;
- }
-
- if (dfield_is_ext(dfield) && col->ord_part) {
- ext_cols[n_ext_cols++] = col_no;
- }
- }
-
- if (n_ext_cols) {
- *ext = row_ext_create(n_ext_cols, ext_cols, row,
- dict_table_zip_size(table), heap);
- } else {
- *ext = NULL;
- }
-}
-
-/***********************************************************//**
-Checks if an update vector changes an ordering field of an index record.
-
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings!
-@return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
-ibool
-row_upd_changes_ord_field_binary(
-/*=============================*/
- const dtuple_t* row, /*!< in: old value of row, or NULL if the
- row and the data values in update are not
- known when this function is called, e.g., at
- compile time */
- dict_index_t* index, /*!< in: index of the record */
- const upd_t* update) /*!< in: update vector for the row; NOTE: the
- field numbers in this MUST be clustered index
- positions! */
-{
- ulint n_unique;
- ulint n_upd_fields;
- ulint i, j;
- dict_index_t* clust_index;
-
- ut_ad(update && index);
-
- n_unique = dict_index_get_n_unique(index);
- n_upd_fields = upd_get_n_fields(update);
-
- clust_index = dict_table_get_first_index(index->table);
-
- for (i = 0; i < n_unique; i++) {
-
- const dict_field_t* ind_field;
- const dict_col_t* col;
- ulint col_pos;
- ulint col_no;
-
- ind_field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(ind_field);
- col_pos = dict_col_get_clust_pos(col, clust_index);
- col_no = dict_col_get_no(col);
-
- for (j = 0; j < n_upd_fields; j++) {
-
- const upd_field_t* upd_field
- = upd_get_nth_field(update, j);
-
- /* Note that if the index field is a column prefix
- then it may be that row does not contain an externally
- stored part of the column value, and we cannot compare
- the datas */
-
- if (col_pos == upd_field->field_no
- && (row == NULL
- || ind_field->prefix_len > 0
- || !dfield_datas_are_binary_equal(
- dtuple_get_nth_field(row, col_no),
- &(upd_field->new_val)))) {
-
- return(TRUE);
- }
- }
- }
-
- return(FALSE);
-}
-
-/***********************************************************//**
-Checks if an update vector changes an ordering field of an index record.
-NOTE: we compare the fields as binary strings!
-@return TRUE if update vector may change an ordering field in an index
-record */
-UNIV_INTERN
-ibool
-row_upd_changes_some_index_ord_field_binary(
-/*========================================*/
- const dict_table_t* table, /*!< in: table */
- const upd_t* update) /*!< in: update vector for the row */
-{
- upd_field_t* upd_field;
- dict_index_t* index;
- ulint i;
-
- index = dict_table_get_first_index(table);
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- upd_field = upd_get_nth_field(update, i);
-
- if (dict_field_get_col(dict_index_get_nth_field(
- index, upd_field->field_no))
- ->ord_part) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/***********************************************************//**
-Checks if an update vector changes some of the first ordering fields of an
-index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes.
-@return TRUE if changes */
-static
-ibool
-row_upd_changes_first_fields_binary(
-/*================================*/
- dtuple_t* entry, /*!< in: index entry */
- dict_index_t* index, /*!< in: index of entry */
- const upd_t* update, /*!< in: update vector for the row */
- ulint n) /*!< in: how many first fields to check */
-{
- ulint n_upd_fields;
- ulint i, j;
- dict_index_t* clust_index;
-
- ut_ad(update && index);
- ut_ad(n <= dict_index_get_n_fields(index));
-
- n_upd_fields = upd_get_n_fields(update);
- clust_index = dict_table_get_first_index(index->table);
-
- for (i = 0; i < n; i++) {
-
- const dict_field_t* ind_field;
- const dict_col_t* col;
- ulint col_pos;
-
- ind_field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(ind_field);
- col_pos = dict_col_get_clust_pos(col, clust_index);
-
- ut_a(ind_field->prefix_len == 0);
-
- for (j = 0; j < n_upd_fields; j++) {
-
- upd_field_t* upd_field
- = upd_get_nth_field(update, j);
-
- if (col_pos == upd_field->field_no
- && !dfield_datas_are_binary_equal(
- dtuple_get_nth_field(entry, i),
- &(upd_field->new_val))) {
-
- return(TRUE);
- }
- }
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Copies the column values from a record. */
-UNIV_INLINE
-void
-row_upd_copy_columns(
-/*=================*/
- rec_t* rec, /*!< in: record in a clustered index */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- sym_node_t* column) /*!< in: first column in a column list, or
- NULL */
-{
- byte* data;
- ulint len;
-
- while (column) {
- data = rec_get_nth_field(rec, offsets,
- column->field_nos[SYM_CLUST_FIELD_NO],
- &len);
- if (len == UNIV_SQL_NULL) {
- len = UNIV_SQL_NULL;
- }
- eval_node_copy_and_alloc_val(column, data, len);
-
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-}
-
-/*********************************************************************//**
-Calculates the new values for fields to update. Note that row_upd_copy_columns
-must have been called first. */
-UNIV_INLINE
-void
-row_upd_eval_new_vals(
-/*==================*/
- upd_t* update) /*!< in/out: update vector */
-{
- que_node_t* exp;
- upd_field_t* upd_field;
- ulint n_fields;
- ulint i;
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
-
- exp = upd_field->exp;
-
- eval_exp(exp);
-
- dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp));
- }
-}
-
-/***********************************************************//**
-Stores to the heap the row on which the node->pcur is positioned. */
-static
-void
-row_upd_store_row(
-/*==============*/
- upd_node_t* node) /*!< in: row update node */
-{
- dict_index_t* clust_index;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);
-
- if (node->row != NULL) {
- mem_heap_empty(node->heap);
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- rec = btr_pcur_get_rec(node->pcur);
-
- offsets = rec_get_offsets(rec, clust_index, offsets_,
- ULINT_UNDEFINED, &heap);
- node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
- NULL, &node->ext, node->heap);
- if (node->is_delete) {
- node->upd_row = NULL;
- node->upd_ext = NULL;
- } else {
- node->upd_row = dtuple_copy(node->row, node->heap);
- row_upd_replace(node->upd_row, &node->upd_ext,
- clust_index, node->update, node->heap);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***********************************************************//**
-Updates a secondary index entry of a row.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static
-ulint
-row_upd_sec_index_entry(
-/*====================*/
- upd_node_t* node, /*!< in: row update node */
- que_thr_t* thr) /*!< in: query thread */
-{
- ibool check_ref;
- ibool found;
- dict_index_t* index;
- dtuple_t* entry;
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- mem_heap_t* heap;
- rec_t* rec;
- ulint err = DB_SUCCESS;
- mtr_t mtr;
- trx_t* trx = thr_get_trx(thr);
-
- index = node->index;
-
- check_ref = row_upd_index_is_referenced(index, trx);
-
- heap = mem_heap_create(1024);
-
- /* Build old index entry */
- entry = row_build_index_entry(node->row, node->ext, index, heap);
- ut_a(entry);
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
- &mtr);
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- rec = btr_cur_get_rec(btr_cur);
-
- if (UNIV_UNLIKELY(!found)) {
- fputs("InnoDB: error in sec index entry update in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, rec, index);
- putc('\n', stderr);
-
- trx_print(stderr, trx, 0);
-
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- } else {
- /* Delete mark the old index record; it can already be
- delete marked if we return after a lock wait in
- row_ins_index_entry below */
-
- if (!rec_get_deleted_flag(rec,
- dict_table_is_comp(index->table))) {
- err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE,
- thr, &mtr);
- if (err == DB_SUCCESS && check_ref) {
-
- ulint* offsets = rec_get_offsets(
- rec, index, NULL,
- ULINT_UNDEFINED, &heap);
- /* NOTE that the following call loses
- the position of pcur ! */
- err = row_upd_check_references_constraints(
- node, &pcur, index->table,
- index, offsets, thr, &mtr);
- }
- }
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- if (node->is_delete || err != DB_SUCCESS) {
-
- goto func_exit;
- }
-
- /* Build a new index entry */
- entry = row_build_index_entry(node->upd_row, node->upd_ext,
- index, heap);
- ut_a(entry);
-
- /* Insert new index entry */
- err = row_ins_index_entry(index, entry, 0, TRUE, thr);
-
-func_exit:
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************//**
-Updates the secondary index record if it is changed in the row update or
-deletes it if this is a delete.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-UNIV_INLINE
-ulint
-row_upd_sec_step(
-/*=============*/
- upd_node_t* node, /*!< in: row update node */
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC)
- || (node->state == UPD_NODE_UPDATE_SOME_SEC));
- ut_ad(!dict_index_is_clust(node->index));
-
- if (node->state == UPD_NODE_UPDATE_ALL_SEC
- || row_upd_changes_ord_field_binary(node->row, node->index,
- node->update)) {
- return(row_upd_sec_index_entry(node, thr));
- }
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************//**
-Marks the clustered index record deleted and inserts the updated version
-of the record to the index. This function should be used when the ordering
-fields of the clustered index record change. This should be quite rare in
-database applications.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static
-ulint
-row_upd_clust_rec_by_insert(
-/*========================*/
- upd_node_t* node, /*!< in: row update node */
- dict_index_t* index, /*!< in: clustered index of the record */
- que_thr_t* thr, /*!< in: query thread */
- ibool check_ref,/*!< in: TRUE if index may be referenced in
- a foreign key constraint */
- mtr_t* mtr) /*!< in: mtr; gets committed here */
-{
- mem_heap_t* heap = NULL;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- trx_t* trx;
- dict_table_t* table;
- dtuple_t* entry;
- ulint err;
-
- ut_ad(node);
- ut_ad(dict_index_is_clust(index));
-
- trx = thr_get_trx(thr);
- table = node->table;
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- if (node->state != UPD_NODE_INSERT_CLUSTERED) {
- rec_t* rec;
- dict_index_t* index;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
- rec_offs_init(offsets_);
-
- err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, TRUE, thr, mtr);
- if (err != DB_SUCCESS) {
- mtr_commit(mtr);
- return(err);
- }
-
- /* Mark as not-owned the externally stored fields which the new
- row inherits from the delete marked record: purge should not
- free those externally stored fields even if the delete marked
- record is removed from the index tree, or updated. */
-
- rec = btr_cur_get_rec(btr_cur);
- index = dict_table_get_first_index(table);
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
- btr_cur_mark_extern_inherited_fields(
- btr_cur_get_page_zip(btr_cur),
- rec, index, offsets, node->update, mtr);
- if (check_ref) {
- /* NOTE that the following call loses
- the position of pcur ! */
- err = row_upd_check_references_constraints(
- node, pcur, table, index, offsets, thr, mtr);
- if (err != DB_SUCCESS) {
- mtr_commit(mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
- }
- }
- }
-
- mtr_commit(mtr);
-
- if (!heap) {
- heap = mem_heap_create(500);
- }
- node->state = UPD_NODE_INSERT_CLUSTERED;
-
- entry = row_build_index_entry(node->upd_row, node->upd_ext,
- index, heap);
- ut_a(entry);
-
- row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
-
- if (node->upd_ext) {
- /* If we return from a lock wait, for example, we may have
- extern fields marked as not-owned in entry (marked in the
- if-branch above). We must unmark them. */
-
- btr_cur_unmark_dtuple_extern_fields(entry);
-
- /* We must mark non-updated extern fields in entry as
- inherited, so that a possible rollback will not free them. */
-
- btr_cur_mark_dtuple_inherited_extern(entry, node->update);
- }
-
- err = row_ins_index_entry(index, entry,
- node->upd_ext ? node->upd_ext->n_ext : 0,
- TRUE, thr);
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************//**
-Updates a clustered index record of a row when the ordering fields do
-not change.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static
-ulint
-row_upd_clust_rec(
-/*==============*/
- upd_node_t* node, /*!< in: row update node */
- dict_index_t* index, /*!< in: clustered index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; gets committed here */
-{
- mem_heap_t* heap = NULL;
- big_rec_t* big_rec = NULL;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
-
- ut_ad(node);
- ut_ad(dict_index_is_clust(index));
-
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
- dict_table_is_comp(index->table)));
-
- /* Try optimistic updating of the record, keeping changes within
- the page; we do not check locks because we assume the x-lock on the
- record to update */
-
- if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
- err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
- } else {
- err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
- }
-
- mtr_commit(mtr);
-
- if (UNIV_LIKELY(err == DB_SUCCESS)) {
-
- return(DB_SUCCESS);
- }
-
- if (buf_LRU_buf_pool_running_out()) {
-
- return(DB_LOCK_TABLE_FULL);
- }
- /* We may have to modify the tree structure: do a pessimistic descent
- down the index tree */
-
- mtr_start(mtr);
-
- /* NOTE: this transaction has an s-lock or x-lock on the record and
- therefore other transactions cannot modify the record when we have no
- latch on the page. In addition, we assume that other query threads of
- the same transaction do not modify the record in the meantime.
- Therefore we can assert that the restoration of the cursor succeeds. */
-
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
-
- ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
- dict_table_is_comp(index->table)));
-
- err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
- &heap, &big_rec, node->update,
- node->cmpl_info, thr, mtr);
- mtr_commit(mtr);
-
- if (err == DB_SUCCESS && big_rec) {
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_t* rec;
- rec_offs_init(offsets_);
-
- mtr_start(mtr);
-
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
- rec = btr_cur_get_rec(btr_cur);
- err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(btr_cur), rec,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- big_rec, mtr);
- mtr_commit(mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (big_rec) {
- dtuple_big_rec_free(big_rec);
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Delete marks a clustered index record.
-@return DB_SUCCESS if operation successfully completed, else error code */
-static
-ulint
-row_upd_del_mark_clust_rec(
-/*=======================*/
- upd_node_t* node, /*!< in: row update node */
- dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in/out: rec_get_offsets() for the
- record under the cursor */
- que_thr_t* thr, /*!< in: query thread */
- ibool check_ref,/*!< in: TRUE if index may be referenced in
- a foreign key constraint */
- mtr_t* mtr) /*!< in: mtr; gets committed here */
-{
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
-
- ut_ad(node);
- ut_ad(dict_index_is_clust(index));
- ut_ad(node->is_delete);
-
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- /* Store row because we have to build also the secondary index
- entries */
-
- row_upd_store_row(node);
-
- /* Mark the clustered index record deleted; we do not have to check
- locks, because we assume that we have an x-lock on the record */
-
- err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, TRUE, thr, mtr);
- if (err == DB_SUCCESS && check_ref) {
- /* NOTE that the following call loses the position of pcur ! */
-
- err = row_upd_check_references_constraints(node,
- pcur, index->table,
- index, offsets,
- thr, mtr);
- }
-
- mtr_commit(mtr);
-
- return(err);
-}
-
-/***********************************************************//**
-Updates the clustered index record.
-@return DB_SUCCESS if operation successfully completed, DB_LOCK_WAIT
-in case of a lock wait, else error code */
-static
-ulint
-row_upd_clust_step(
-/*===============*/
- upd_node_t* node, /*!< in: row update node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_index_t* index;
- btr_pcur_t* pcur;
- ibool success;
- ibool check_ref;
- ulint err;
- mtr_t* mtr;
- mtr_t mtr_buf;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
- rec_offs_init(offsets_);
-
- index = dict_table_get_first_index(node->table);
-
- check_ref = row_upd_index_is_referenced(index, thr_get_trx(thr));
-
- pcur = node->pcur;
-
- /* We have to restore the cursor to its position */
- mtr = &mtr_buf;
-
- mtr_start(mtr);
-
- /* If the restoration does not succeed, then the same
- transaction has deleted the record on which the cursor was,
- and that is an SQL error. If the restoration succeeds, it may
- still be that the same transaction has successively deleted
- and inserted a record with the same ordering fields, but in
- that case we know that the transaction has at least an
- implicit x-lock on the record. */
-
- ut_a(pcur->rel_pos == BTR_PCUR_ON);
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
-
- if (!success) {
- err = DB_RECORD_NOT_FOUND;
-
- mtr_commit(mtr);
-
- return(err);
- }
-
- /* If this is a row in SYS_INDEXES table of the data dictionary,
- then we have to free the file segments of the index tree associated
- with the index */
-
- if (node->is_delete
- && ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
-
- dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);
-
- mtr_commit(mtr);
-
- mtr_start(mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
- mtr);
- if (!success) {
- err = DB_ERROR;
-
- mtr_commit(mtr);
-
- return(err);
- }
- }
-
- rec = btr_pcur_get_rec(pcur);
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- if (!node->has_clust_rec_x_lock) {
- err = lock_clust_rec_modify_check_and_lock(
- 0, btr_pcur_get_block(pcur),
- rec, index, offsets, thr);
- if (err != DB_SUCCESS) {
- mtr_commit(mtr);
- goto exit_func;
- }
- }
-
- /* NOTE: the following function calls will also commit mtr */
-
- if (node->is_delete) {
- err = row_upd_del_mark_clust_rec(node, index, offsets,
- thr, check_ref, mtr);
- if (err == DB_SUCCESS) {
- node->state = UPD_NODE_UPDATE_ALL_SEC;
- node->index = dict_table_get_next_index(index);
- }
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
- }
-
- /* If the update is made for MySQL, we already have the update vector
- ready, else we have to do some evaluation: */
-
- if (UNIV_UNLIKELY(!node->in_mysql_interface)) {
- /* Copy the necessary columns from clust_rec and calculate the
- new values to set */
- row_upd_copy_columns(rec, offsets,
- UT_LIST_GET_FIRST(node->columns));
- row_upd_eval_new_vals(node->update);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
-
- err = row_upd_clust_rec(node, index, thr, mtr);
- return(err);
- }
-
- row_upd_store_row(node);
-
- if (row_upd_changes_ord_field_binary(node->row, index, node->update)) {
-
- /* Update causes an ordering field (ordering fields within
- the B-tree) of the clustered index record to change: perform
- the update by delete marking and inserting.
-
- TODO! What to do to the 'Halloween problem', where an update
- moves the record forward in index so that it is again
- updated when the cursor arrives there? Solution: the
- read operation must check the undo record undo number when
- choosing records to update. MySQL solves now the problem
- externally! */
-
- err = row_upd_clust_rec_by_insert(node, index, thr, check_ref,
- mtr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- node->state = UPD_NODE_UPDATE_ALL_SEC;
- } else {
- err = row_upd_clust_rec(node, index, thr, mtr);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- node->state = UPD_NODE_UPDATE_SOME_SEC;
- }
-
- node->index = dict_table_get_next_index(index);
-
- return(err);
-}
-
-/***********************************************************//**
-Updates the affected index records of a row. When the control is transferred
-to this node, we assume that we have a persistent cursor which was on a
-record, and the position of the cursor is stored in the cursor.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static
-ulint
-row_upd(
-/*====*/
- upd_node_t* node, /*!< in: row update node */
- que_thr_t* thr) /*!< in: query thread */
-{
- ulint err = DB_SUCCESS;
-
- ut_ad(node && thr);
-
- if (UNIV_LIKELY(node->in_mysql_interface)) {
-
- /* We do not get the cmpl_info value from the MySQL
- interpreter: we must calculate it on the fly: */
-
- if (node->is_delete
- || row_upd_changes_some_index_ord_field_binary(
- node->table, node->update)) {
- node->cmpl_info = 0;
- } else {
- node->cmpl_info = UPD_NODE_NO_ORD_CHANGE;
- }
- }
-
- if (node->state == UPD_NODE_UPDATE_CLUSTERED
- || node->state == UPD_NODE_INSERT_CLUSTERED) {
-
- err = row_upd_clust_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
- }
-
- if (!node->is_delete && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-
- goto function_exit;
- }
-
- while (node->index != NULL) {
- err = row_upd_sec_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
-function_exit:
- if (err == DB_SUCCESS) {
- /* Do some cleanup */
-
- if (node->row != NULL) {
- node->row = NULL;
- node->ext = NULL;
- node->upd_row = NULL;
- node->upd_ext = NULL;
- mem_heap_empty(node->heap);
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Updates a row in a table. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_upd_step(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- upd_node_t* node;
- sel_node_t* sel_node;
- que_node_t* parent;
- ulint err = DB_SUCCESS;
- trx_t* trx;
-
- ut_ad(thr);
-
- trx = thr_get_trx(thr);
-
- trx_start_if_not_started(trx);
-
- node = thr->run_node;
-
- sel_node = node->select;
-
- parent = que_node_get_parent(node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
-
- if (thr->prev_node == parent) {
- node->state = UPD_NODE_SET_IX_LOCK;
- }
-
- if (node->state == UPD_NODE_SET_IX_LOCK) {
-
- if (!node->has_clust_rec_x_lock) {
- /* It may be that the current session has not yet
- started its transaction, or it has been committed: */
-
- err = lock_table(0, node->table, LOCK_IX, thr);
-
- if (err != DB_SUCCESS) {
-
- goto error_handling;
- }
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- if (node->searched_update) {
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch a row to update */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
- }
-
- /* sel_node is NULL if we are in the MySQL interface */
-
- if (sel_node && (sel_node->state != SEL_NODE_FETCH)) {
-
- if (!node->searched_update) {
- /* An explicit cursor should be positioned on a row
- to update */
-
- ut_error;
-
- err = DB_ERROR;
-
- goto error_handling;
- }
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to update, or the select node performed the
- updates directly in-place */
-
- thr->run_node = parent;
-
- return(thr);
- }
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = row_upd(node, thr);
-
-error_handling:
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- return(NULL);
- }
-
- /* DO THE TRIGGER ACTIONS HERE */
-
- if (node->searched_update) {
- /* Fetch next row to update */
-
- thr->run_node = sel_node;
- } else {
- /* It was an explicit cursor update */
-
- thr->run_node = parent;
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- return(thr);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/row/row0vers.c b/storage/innodb_plugin/row/row0vers.c
deleted file mode 100644
index a4fbb5289aa..00000000000
--- a/storage/innodb_plugin/row/row0vers.c
+++ /dev/null
@@ -1,741 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0vers.c
-Row versions
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0vers.h"
-
-#ifdef UNIV_NONINL
-#include "row0vers.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-#include "lock0lock.h"
-
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
-index record. NOTE: the kernel mutex is temporarily released in this
-function!
-@return NULL if committed, else the active transaction */
-UNIV_INTERN
-trx_t*
-row_vers_impl_x_locked_off_kernel(
-/*==============================*/
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- dict_index_t* clust_index;
- rec_t* clust_rec;
- ulint* clust_offsets;
- rec_t* version;
- trx_id_t trx_id;
- mem_heap_t* heap;
- mem_heap_t* heap2;
- dtuple_t* row;
- dtuple_t* entry = NULL; /* assignment to eliminate compiler
- warning */
- trx_t* trx;
- ulint rec_del;
- ulint err;
- mtr_t mtr;
- ulint comp;
-
- ut_ad(mutex_own(&kernel_mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- mutex_exit(&kernel_mutex);
-
- mtr_start(&mtr);
-
- /* Search for the clustered index record: this is a time-consuming
- operation: therefore we release the kernel mutex; also, the release
- is required by the latching order convention. The latch on the
- clustered index locks the top of the stack of versions. We also
- reserve purge_latch to lock the bottom of the version stack. */
-
- clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
- &clust_index, &mtr);
- if (!clust_rec) {
- /* In a rare case it is possible that no clust rec is found
- for a secondary index record: if in row0umod.c
- row_undo_mod_remove_clust_low() we have already removed the
- clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case there cannot be
- any implicit lock on the secondary index record, because
- an active transaction which has modified the secondary index
- record has also modified the clustered index record. And in
- a rollback we always undo the modifications to secondary index
- records before the clustered index record. */
-
- mutex_enter(&kernel_mutex);
- mtr_commit(&mtr);
-
- return(NULL);
- }
-
- heap = mem_heap_create(1024);
- clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
- ULINT_UNDEFINED, &heap);
- trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
-
- mtr_s_lock(&(purge_sys->latch), &mtr);
-
- mutex_enter(&kernel_mutex);
-
- trx = NULL;
- if (!trx_is_active(trx_id)) {
- /* The transaction that modified or inserted clust_rec is no
- longer active: no implicit lock on rec */
- goto exit_func;
- }
-
- if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
- clust_offsets, TRUE)) {
- /* Corruption noticed: try to avoid a crash by returning */
- goto exit_func;
- }
-
- comp = page_rec_is_comp(rec);
- ut_ad(index->table == clust_index->table);
- ut_ad(!!comp == dict_table_is_comp(index->table));
- ut_ad(!comp == !page_rec_is_comp(clust_rec));
-
- /* We look up if some earlier version, which was modified by the trx_id
- transaction, of the clustered index record would require rec to be in
- a different state (delete marked or unmarked, or have different field
- values, or not existing). If there is such a version, then rec was
- modified by the trx_id transaction, and it has an implicit x-lock on
- rec. Note that if clust_rec itself would require rec to be in a
- different state, then the trx_id transaction has not yet had time to
- modify rec, and does not necessarily have an implicit x-lock on rec. */
-
- rec_del = rec_get_deleted_flag(rec, comp);
- trx = NULL;
-
- version = clust_rec;
-
- for (;;) {
- rec_t* prev_version;
- ulint vers_del;
- row_ext_t* ext;
- trx_id_t prev_trx_id;
-
- mutex_exit(&kernel_mutex);
-
- /* While we retrieve an earlier version of clust_rec, we
- release the kernel mutex, because it may take time to access
- the disk. After the release, we have to check if the trx_id
- transaction is still active. We keep the semaphore in mtr on
- the clust_rec page, so that no other transaction can update
- it and get an implicit x-lock on rec. */
-
- heap2 = heap;
- heap = mem_heap_create(1024);
- err = trx_undo_prev_version_build(clust_rec, &mtr, version,
- clust_index, clust_offsets,
- heap, &prev_version);
- mem_heap_free(heap2); /* free version and clust_offsets */
-
- if (prev_version == NULL) {
- mutex_enter(&kernel_mutex);
-
- if (!trx_is_active(trx_id)) {
- /* Transaction no longer active: no
- implicit x-lock */
-
- break;
- }
-
- /* If the transaction is still active,
- clust_rec must be a fresh insert, because no
- previous version was found. */
- ut_ad(err == DB_SUCCESS);
-
- /* It was a freshly inserted version: there is an
- implicit x-lock on rec */
-
- trx = trx_get_on_id(trx_id);
-
- break;
- }
-
- clust_offsets = rec_get_offsets(prev_version, clust_index,
- NULL, ULINT_UNDEFINED, &heap);
-
- vers_del = rec_get_deleted_flag(prev_version, comp);
- prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
- clust_offsets);
-
- /* If the trx_id and prev_trx_id are different and if
- the prev_version is marked deleted then the
- prev_trx_id must have already committed for the trx_id
- to be able to modify the row. Therefore, prev_trx_id
- cannot hold any implicit lock. */
- if (vers_del && 0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
-
- mutex_enter(&kernel_mutex);
- break;
- }
-
- /* The stack of versions is locked by mtr. Thus, it
- is safe to fetch the prefixes for externally stored
- columns. */
- row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
- clust_offsets, NULL, &ext, heap);
- entry = row_build_index_entry(row, ext, index, heap);
- /* entry may be NULL if a record was inserted in place
- of a deleted record, and the BLOB pointers of the new
- record were not initialized yet. But in that case,
- prev_version should be NULL. */
- ut_a(entry);
-
- mutex_enter(&kernel_mutex);
-
- if (!trx_is_active(trx_id)) {
- /* Transaction no longer active: no implicit x-lock */
-
- break;
- }
-
- /* If we get here, we know that the trx_id transaction is
- still active and it has modified prev_version. Let us check
- if prev_version would require rec to be in a different
- state. */
-
- /* The previous version of clust_rec must be
- accessible, because the transaction is still active
- and clust_rec was not a fresh insert. */
- ut_ad(err == DB_SUCCESS);
-
- /* We check if entry and rec are identified in the alphabetical
- ordering */
- if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
- /* The delete marks of rec and prev_version should be
- equal for rec to be in the state required by
- prev_version */
-
- if (rec_del != vers_del) {
- trx = trx_get_on_id(trx_id);
-
- break;
- }
-
- /* It is possible that the row was updated so that the
- secondary index record remained the same in
- alphabetical ordering, but the field values changed
- still. For example, 'abc' -> 'ABC'. Check also that. */
-
- dtuple_set_types_binary(entry,
- dtuple_get_n_fields(entry));
- if (0 != cmp_dtuple_rec(entry, rec, offsets)) {
-
- trx = trx_get_on_id(trx_id);
-
- break;
- }
- } else if (!rec_del) {
- /* The delete mark should be set in rec for it to be
- in the state required by prev_version */
-
- trx = trx_get_on_id(trx_id);
-
- break;
- }
-
- if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
- /* The versions modified by the trx_id transaction end
- to prev_version: no implicit x-lock */
-
- break;
- }
-
- version = prev_version;
- }/* for (;;) */
-
-exit_func:
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(trx);
-}
-
-/*****************************************************************//**
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view.
-@return TRUE if earlier version should be preserved */
-UNIV_INTERN
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
- trx_id_t trx_id, /*!< in: transaction id in the version */
- mtr_t* mtr) /*!< in: mtr holding the latch on the
- clustered index record; it will also
- hold the latch on purge_view */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- mtr_s_lock(&(purge_sys->latch), mtr);
-
- if (trx_purge_update_undo_must_exist(trx_id)) {
-
- /* A purge operation is not yet allowed to remove this
- delete marked record */
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************//**
-Finds out if a version of the record, where the version >= the current
-purge view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry and ientry are identified in
-the alphabetical ordering; exactly in this case we return TRUE.
-@return TRUE if earlier version should have */
-UNIV_INTERN
-ibool
-row_vers_old_has_index_entry(
-/*=========================*/
- ibool also_curr,/*!< in: TRUE if also rec is included in the
- versions to search; otherwise only versions
- prior to it are searched */
- const rec_t* rec, /*!< in: record in the clustered index; the
- caller must have a latch on the page */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /*!< in: the secondary index */
- const dtuple_t* ientry) /*!< in: the secondary index entry */
-{
- const rec_t* version;
- rec_t* prev_version;
- dict_index_t* clust_index;
- ulint* clust_offsets;
- mem_heap_t* heap;
- mem_heap_t* heap2;
- const dtuple_t* row;
- const dtuple_t* entry;
- ulint err;
- ulint comp;
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- mtr_s_lock(&(purge_sys->latch), mtr);
-
- clust_index = dict_table_get_first_index(index->table);
-
- comp = page_rec_is_comp(rec);
- ut_ad(!dict_table_is_comp(index->table) == !comp);
- heap = mem_heap_create(1024);
- clust_offsets = rec_get_offsets(rec, clust_index, NULL,
- ULINT_UNDEFINED, &heap);
-
- if (also_curr && !rec_get_deleted_flag(rec, comp)) {
- row_ext_t* ext;
-
- /* The stack of versions is locked by mtr.
- Thus, it is safe to fetch the prefixes for
- externally stored columns. */
- row = row_build(ROW_COPY_POINTERS, clust_index,
- rec, clust_offsets, NULL, &ext, heap);
- entry = row_build_index_entry(row, ext, index, heap);
-
- /* If entry == NULL, the record contains unset BLOB
- pointers. This must be a freshly inserted record. If
- this is called from
- row_purge_remove_sec_if_poss_low(), the thread will
- hold latches on the clustered index and the secondary
- index. Because the insert works in three steps:
-
- (1) insert the record to clustered index
- (2) store the BLOBs and update BLOB pointers
- (3) insert records to secondary indexes
-
- the purge thread can safely ignore freshly inserted
- records and delete the secondary index record. The
- thread that inserted the new record will be inserting
- the secondary index records. */
-
- /* NOTE that we cannot do the comparison as binary
- fields because the row is maybe being modified so that
- the clustered index record has already been updated to
- a different binary value in a char field, but the
- collation identifies the old and new value anyway! */
- if (entry && !dtuple_coll_cmp(ientry, entry)) {
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
- }
-
- version = rec;
-
- for (;;) {
- heap2 = heap;
- heap = mem_heap_create(1024);
- err = trx_undo_prev_version_build(rec, mtr, version,
- clust_index, clust_offsets,
- heap, &prev_version);
- mem_heap_free(heap2); /* free version and clust_offsets */
-
- if (err != DB_SUCCESS || !prev_version) {
- /* Versions end here */
-
- mem_heap_free(heap);
-
- return(FALSE);
- }
-
- clust_offsets = rec_get_offsets(prev_version, clust_index,
- NULL, ULINT_UNDEFINED, &heap);
-
- if (!rec_get_deleted_flag(prev_version, comp)) {
- row_ext_t* ext;
-
- /* The stack of versions is locked by mtr.
- Thus, it is safe to fetch the prefixes for
- externally stored columns. */
- row = row_build(ROW_COPY_POINTERS, clust_index,
- prev_version, clust_offsets,
- NULL, &ext, heap);
- entry = row_build_index_entry(row, ext, index, heap);
-
- /* If entry == NULL, the record contains unset
- BLOB pointers. This must be a freshly
- inserted record that we can safely ignore.
- For the justification, see the comments after
- the previous row_build_index_entry() call. */
-
- /* NOTE that we cannot do the comparison as binary
- fields because maybe the secondary index record has
- already been updated to a different binary value in
- a char field, but the collation identifies the old
- and new value anyway! */
-
- if (entry && !dtuple_coll_cmp(ientry, entry)) {
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
- }
-
- version = prev_version;
- }
-}
-
-/*****************************************************************//**
-Constructs the version of a clustered index record which a consistent
-read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
-ulint
-row_vers_build_for_consistent_read(
-/*===============================*/
- const rec_t* rec, /*!< in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec */
- dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
- rec_get_offsets(rec, index) */
- read_view_t* view, /*!< in: the consistent read view */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
- *old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- rec_t** old_vers)/*!< out, own: old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-{
- const rec_t* version;
- rec_t* prev_version;
- trx_id_t trx_id;
- mem_heap_t* heap = NULL;
- byte* buf;
- ulint err;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(rec_offs_validate(rec, index, *offsets));
-
- trx_id = row_get_rec_trx_id(rec, index, *offsets);
-
- ut_ad(!read_view_sees_trx_id(view, trx_id));
-
- rw_lock_s_lock(&(purge_sys->latch));
- version = rec;
-
- for (;;) {
- mem_heap_t* heap2 = heap;
- trx_undo_rec_t* undo_rec;
- roll_ptr_t roll_ptr;
- undo_no_t undo_no;
- heap = mem_heap_create(1024);
-
- /* If we have high-granularity consistent read view and
- creating transaction of the view is the same as trx_id in
- the record we see this record only in the case when
- undo_no of the record is < undo_no in the view. */
-
- if (view->type == VIEW_HIGH_GRANULARITY
- && ut_dulint_cmp(view->creator_trx_id, trx_id) == 0) {
-
- roll_ptr = row_get_rec_roll_ptr(version, index,
- *offsets);
- undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
- mem_heap_empty(heap);
-
- if (ut_dulint_cmp(view->undo_no, undo_no) > 0) {
- /* The view already sees this version: we can
- copy it to in_heap and return */
-
- buf = mem_heap_alloc(in_heap,
- rec_offs_size(*offsets));
- *old_vers = rec_copy(buf, version, *offsets);
- rec_offs_make_valid(*old_vers, index,
- *offsets);
- err = DB_SUCCESS;
-
- break;
- }
- }
-
- err = trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version);
- if (heap2) {
- mem_heap_free(heap2); /* free version */
- }
-
- if (err != DB_SUCCESS) {
- break;
- }
-
- if (prev_version == NULL) {
- /* It was a freshly inserted version */
- *old_vers = NULL;
- err = DB_SUCCESS;
-
- break;
- }
-
- *offsets = rec_get_offsets(prev_version, index, *offsets,
- ULINT_UNDEFINED, offset_heap);
-
- trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
-
- if (read_view_sees_trx_id(view, trx_id)) {
-
- /* The view already sees this version: we can copy
- it to in_heap and return */
-
- buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
- *old_vers = rec_copy(buf, prev_version, *offsets);
- rec_offs_make_valid(*old_vers, index, *offsets);
- err = DB_SUCCESS;
-
- break;
- }
-
- version = prev_version;
- }/* for (;;) */
-
- mem_heap_free(heap);
- rw_lock_s_unlock(&(purge_sys->latch));
-
- return(err);
-}
-
-/*****************************************************************//**
-Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
-ulint
-row_vers_build_for_semi_consistent_read(
-/*====================================*/
- const rec_t* rec, /*!< in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec */
- dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
- rec_get_offsets(rec, index) */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
- *old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-{
- const rec_t* version;
- mem_heap_t* heap = NULL;
- byte* buf;
- ulint err;
- trx_id_t rec_trx_id = ut_dulint_zero;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(rec_offs_validate(rec, index, *offsets));
-
- rw_lock_s_lock(&(purge_sys->latch));
- /* The S-latch on purge_sys prevents the purge view from
- changing. Thus, if we have an uncommitted transaction at
- this point, then purge cannot remove its undo log even if
- the transaction could commit now. */
-
- version = rec;
-
- for (;;) {
- trx_t* version_trx;
- mem_heap_t* heap2;
- rec_t* prev_version;
- trx_id_t version_trx_id;
-
- version_trx_id = row_get_rec_trx_id(version, index, *offsets);
- if (rec == version) {
- rec_trx_id = version_trx_id;
- }
-
- mutex_enter(&kernel_mutex);
- version_trx = trx_get_on_id(version_trx_id);
- mutex_exit(&kernel_mutex);
-
- if (!version_trx
- || version_trx->conc_state == TRX_NOT_STARTED
- || version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
-
- /* We found a version that belongs to a
- committed transaction: return it. */
-
- if (rec == version) {
- *old_vers = rec;
- err = DB_SUCCESS;
- break;
- }
-
- /* We assume that a rolled-back transaction stays in
- TRX_ACTIVE state until all the changes have been
- rolled back and the transaction is removed from
- the global list of transactions. */
-
- if (!ut_dulint_cmp(rec_trx_id, version_trx_id)) {
- /* The transaction was committed while
- we searched for earlier versions.
- Return the current version as a
- semi-consistent read. */
-
- version = rec;
- *offsets = rec_get_offsets(version,
- index, *offsets,
- ULINT_UNDEFINED,
- offset_heap);
- }
-
- buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
- *old_vers = rec_copy(buf, version, *offsets);
- rec_offs_make_valid(*old_vers, index, *offsets);
- err = DB_SUCCESS;
-
- break;
- }
-
- heap2 = heap;
- heap = mem_heap_create(1024);
-
- err = trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version);
- if (heap2) {
- mem_heap_free(heap2); /* free version */
- }
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- break;
- }
-
- if (prev_version == NULL) {
- /* It was a freshly inserted version */
- *old_vers = NULL;
- err = DB_SUCCESS;
-
- break;
- }
-
- version = prev_version;
- *offsets = rec_get_offsets(version, index, *offsets,
- ULINT_UNDEFINED, offset_heap);
- }/* for (;;) */
-
- if (heap) {
- mem_heap_free(heap);
- }
- rw_lock_s_unlock(&(purge_sys->latch));
-
- return(err);
-}
diff --git a/storage/innodb_plugin/srv/srv0que.c b/storage/innodb_plugin/srv/srv0que.c
deleted file mode 100644
index fc50a86a55c..00000000000
--- a/storage/innodb_plugin/srv/srv0que.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file srv/srv0que.c
-Server query execution
-
-Created 6/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "srv0que.h"
-
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "os0thread.h"
-#include "usr0sess.h"
-#include "que0que.h"
-
-/**********************************************************************//**
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-UNIV_INTERN
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad(thr);
- ut_ad(mutex_own(&kernel_mutex));
-
- UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
-
- srv_release_threads(SRV_WORKER, 1);
-}
diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
deleted file mode 100644
index 639da1ed2f3..00000000000
--- a/storage/innodb_plugin/srv/srv0srv.c
+++ /dev/null
@@ -1,2758 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, 2009 Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
-
-/**************************************************//**
-@file srv/srv0srv.c
-The database server main program
-
-NOTE: SQL Server 7 uses something which the documentation
-calls user mode scheduled threads (UMS threads). One such
-thread is usually allocated per processor. Win32
-documentation does not know any UMS threads, which suggests
-that the concept is internal to SQL Server 7. It may mean that
-SQL Server 7 does all the scheduling of threads itself, even
-in i/o waits. We should maybe modify InnoDB to use the same
-technique, because thread switches within NT may be too slow.
-
-SQL Server 7 also mentions fibers, which are cooperatively
-scheduled threads. They can boost performance by 5 %,
-according to the Delaney and Soukup's book.
-
-Windows 2000 will have something called thread pooling
-(see msdn website), which we could possibly use.
-
-Another possibility could be to use some very fast user space
-thread library. This might confuse NT though.
-
-Created 10/8/1995 Heikki Tuuri
-*******************************************************/
-
-/* Dummy comment */
-#include "srv0srv.h"
-
-#include "ut0mem.h"
-#include "ut0ut.h"
-#include "os0proc.h"
-#include "mem0mem.h"
-#include "mem0pool.h"
-#include "sync0sync.h"
-#include "thr0loc.h"
-#include "que0que.h"
-#include "srv0que.h"
-#include "log0recv.h"
-#include "pars0pars.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0purge.h"
-#include "ibuf0ibuf.h"
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "btr0sea.h"
-#include "dict0load.h"
-#include "dict0boot.h"
-#include "srv0start.h"
-#include "row0mysql.h"
-#include "ha_prototypes.h"
-#include "trx0i_s.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-
-/* This is set to TRUE if the MySQL user has set it in MySQL; currently
-affects only FOREIGN KEY definition parsing */
-UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
-
-/* The following counter is incremented whenever there is some user activity
-in the server */
-UNIV_INTERN ulint srv_activity_count = 0;
-
-/* The following is the maximum allowed duration of a lock wait. */
-UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
-
-/* How much data manipulation language (DML) statements need to be delayed,
-in microseconds, in order to reduce the lagging of the purge thread. */
-UNIV_INTERN ulint srv_dml_needed_delay = 0;
-
-UNIV_INTERN ibool srv_lock_timeout_and_monitor_active = FALSE;
-UNIV_INTERN ibool srv_error_monitor_active = FALSE;
-
-UNIV_INTERN const char* srv_main_thread_op_info = "";
-
-/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
-UNIV_INTERN const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
-
-/* Server parameters which are read from the initfile */
-
-/* The following three are dir paths which are catenated before file
-names, where the file name itself may also contain a path */
-
-UNIV_INTERN char* srv_data_home = NULL;
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN char* srv_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/** store to its own file each table created by an user; data
-dictionary tables are in the system tablespace 0 */
-UNIV_INTERN my_bool srv_file_per_table;
-/** The file format to use on new *.ibd files. */
-UNIV_INTERN ulint srv_file_format = 0;
-/** Whether to check file format during startup. A value of
-DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
-set it to the highest format we support. */
-UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
-
-#if DICT_TF_FORMAT_51
-# error "DICT_TF_FORMAT_51 must be 0!"
-#endif
-/** Place locks to records only i.e. do not use next-key locking except
-on duplicate key checking and foreign key checking */
-UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
-
-UNIV_INTERN ulint srv_n_data_files = 0;
-UNIV_INTERN char** srv_data_file_names = NULL;
-/* size in database pages */
-UNIV_INTERN ulint* srv_data_file_sizes = NULL;
-
-/* if TRUE, then we auto-extend the last data file */
-UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
-/* if != 0, this tells the max size auto-extending may increase the
-last data file size */
-UNIV_INTERN ulint srv_last_file_size_max = 0;
-/* If the last data file is auto-extended, we add this
-many pages to it at a time */
-UNIV_INTERN ulong srv_auto_extend_increment = 8;
-UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
-
-/* If the following is TRUE we do not allow inserts etc. This protects
-the user from forgetting the 'newraw' keyword to my.cnf */
-
-UNIV_INTERN ibool srv_created_new_raw = FALSE;
-
-UNIV_INTERN char** srv_log_group_home_dirs = NULL;
-
-UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
-UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
-/* size in database pages */
-UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
-/* size in database pages */
-UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
-UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
-
-/* Try to flush dirty pages so as to avoid IO bursts at
-the checkpoints. */
-UNIV_INTERN char srv_adaptive_flushing = TRUE;
-
-/* The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-UNIV_INTERN const byte* srv_latin1_ordering;
-
-/* use os/external memory allocator */
-UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
-/* requested size in kilobytes */
-UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
-/* previously requested size */
-UNIV_INTERN ulint srv_buf_pool_old_size;
-/* current size in kilobytes */
-UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
-/* size in bytes */
-UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
-UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
-
-/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
-instead. */
-UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
-UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
-UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
-
-/* User settable value of the number of pages that must be present
-in the buffer cache and accessed sequentially for InnoDB to trigger a
-readahead request. */
-UNIV_INTERN ulong srv_read_ahead_threshold = 56;
-
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN ibool srv_log_archive_on = FALSE;
-UNIV_INTERN ibool srv_archive_recovery = 0;
-UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/* This parameter is used to throttle the number of insert buffers that are
-merged in a batch. By increasing this parameter on a faster disk you can
-possibly reduce the number of I/O operations performed to complete the
-merge operation. The value of this parameter is used as is by the
-background loop when the system is idle (low load), on a busy system
-the parameter is scaled down by a factor of 4, this is to avoid putting
-a heavier load on the I/O sub system. */
-
-UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
-
-UNIV_INTERN char* srv_file_flush_method_str = NULL;
-UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-
-UNIV_INTERN ulint srv_max_n_open_files = 300;
-
-/* Number of IO operations per second the server can do */
-UNIV_INTERN ulong srv_io_capacity = 200;
-
-/* The InnoDB main thread tries to keep the ratio of modified pages
-in the buffer pool to all database pages in the buffer pool smaller than
-the following number. But it is not guaranteed that the value stays below
-that during a time of heavy update/insert activity. */
-
-UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
-
-/* variable counts amount of data read in total (in bytes) */
-UNIV_INTERN ulint srv_data_read = 0;
-
-/* here we count the amount of data written in total (in bytes) */
-UNIV_INTERN ulint srv_data_written = 0;
-
-/* the number of the log write requests done */
-UNIV_INTERN ulint srv_log_write_requests = 0;
-
-/* the number of physical writes to the log performed */
-UNIV_INTERN ulint srv_log_writes = 0;
-
-/* amount of data written to the log files in bytes */
-UNIV_INTERN ulint srv_os_log_written = 0;
-
-/* amount of writes being done to the log files */
-UNIV_INTERN ulint srv_os_log_pending_writes = 0;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-UNIV_INTERN ulint srv_log_waits = 0;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-UNIV_INTERN ulint srv_dblwr_writes = 0;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-UNIV_INTERN ulint srv_dblwr_pages_written = 0;
-
-/* in this variable we store the number of write requests issued */
-UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
-
-/* variable to count the number of pages that were written from buffer
-pool to the disk */
-UNIV_INTERN ulint srv_buf_pool_flushed = 0;
-
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-UNIV_INTERN ulint srv_buf_pool_reads = 0;
-
-/* structure to pass status variables to MySQL */
-UNIV_INTERN export_struc export_vars;
-
-/* If the following is != 0 we do not allow inserts etc. This protects
-the user from forgetting the innodb_force_recovery keyword to my.cnf */
-
-UNIV_INTERN ulint srv_force_recovery = 0;
-/*-----------------------*/
-/* We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
-UNIV_INTERN ulint srv_max_n_threads = 0;
-
-/* The following controls how many threads we let inside InnoDB concurrently:
-threads waiting for locks are not counted into the number because otherwise
-we could get a deadlock. MySQL creates a thread for each user session, and
-semaphore contention and convoy problems can occur withput this restriction.
-Value 10 should be good if there are less than 4 processors + 4 disks in the
-computer. Bigger computers need bigger values. Value 0 will disable the
-concurrency check. */
-
-UNIV_INTERN ulong srv_thread_concurrency = 0;
-
-/* this mutex protects srv_conc data structures */
-UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
-/* number of transactions that have declared_to_be_inside_innodb set.
-It used to be a non-error for this value to drop below zero temporarily.
-This is no longer true. We'll, however, keep the lint datatype to add
-assertions to catch any corner cases that we may have missed. */
-UNIV_INTERN lint srv_conc_n_threads = 0;
-/* number of OS threads waiting in the FIFO for a permission to enter
-InnoDB */
-UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
-
-typedef struct srv_conc_slot_struct srv_conc_slot_t;
-struct srv_conc_slot_struct{
- os_event_t event; /*!< event to wait */
- ibool reserved; /*!< TRUE if slot
- reserved */
- ibool wait_ended; /*!< TRUE when another
- thread has already set
- the event and the
- thread in this slot is
- free to proceed; but
- reserved may still be
- TRUE at that point */
- UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
-};
-
-/* queue of threads waiting to get in */
-UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
-/* array of wait slots */
-UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
-
-/* Number of times a thread is allowed to enter InnoDB within the same
-SQL query after it has once got the ticket at srv_conc_enter_innodb */
-#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
-#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
-/*-----------------------*/
-/* If the following is set to 1 then we do not run purge and insert buffer
-merge to completion before shutdown. If it is set to 2, do not even flush the
-buffer pool to data files at the shutdown: we effectively 'crash'
-InnoDB (but lose no committed transactions). */
-UNIV_INTERN ulint srv_fast_shutdown = 0;
-
-/* Generate a innodb_status.<pid> file */
-UNIV_INTERN ibool srv_innodb_status = FALSE;
-
-/* When estimating number of different key values in an index, sample
-this many index pages */
-UNIV_INTERN unsigned long long srv_stats_sample_pages = 8;
-
-UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
-UNIV_INTERN ibool srv_use_checksums = TRUE;
-
-UNIV_INTERN ibool srv_set_thread_priorities = TRUE;
-UNIV_INTERN int srv_query_thread_priority = 0;
-
-UNIV_INTERN ulong srv_replication_delay = 0;
-
-/*-------------------------------------------*/
-UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
-UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
-UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
-UNIV_INTERN ulong srv_spin_wait_delay = 6;
-UNIV_INTERN ibool srv_priority_boost = TRUE;
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool srv_print_thread_releases = FALSE;
-UNIV_INTERN ibool srv_print_lock_waits = FALSE;
-UNIV_INTERN ibool srv_print_buf_io = FALSE;
-UNIV_INTERN ibool srv_print_log_io = FALSE;
-UNIV_INTERN ibool srv_print_latch_waits = FALSE;
-#endif /* UNIV_DEBUG */
-
-UNIV_INTERN ulint srv_n_rows_inserted = 0;
-UNIV_INTERN ulint srv_n_rows_updated = 0;
-UNIV_INTERN ulint srv_n_rows_deleted = 0;
-UNIV_INTERN ulint srv_n_rows_read = 0;
-
-static ulint srv_n_rows_inserted_old = 0;
-static ulint srv_n_rows_updated_old = 0;
-static ulint srv_n_rows_deleted_old = 0;
-static ulint srv_n_rows_read_old = 0;
-
-UNIV_INTERN ulint srv_n_lock_wait_count = 0;
-UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
-UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
-UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
-
-
-/*
- Set the following to 0 if you want InnoDB to write messages on
- stderr on startup/shutdown
-*/
-UNIV_INTERN ibool srv_print_verbose_log = TRUE;
-UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-
-UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
-
-UNIV_INTERN time_t srv_last_monitor_time;
-
-UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
-
-/* Mutex for locking srv_monitor_file */
-UNIV_INTERN mutex_t srv_monitor_file_mutex;
-/* Temporary file for innodb monitor output */
-UNIV_INTERN FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-UNIV_INTERN FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
-UNIV_INTERN FILE* srv_misc_tmpfile;
-
-UNIV_INTERN ulint srv_main_thread_process_no = 0;
-UNIV_INTERN ulint srv_main_thread_id = 0;
-
-/* The following count work done by srv_master_thread. */
-
-/* Iterations by the 'once per second' loop. */
-static ulint srv_main_1_second_loops = 0;
-/* Calls to sleep by the 'once per second' loop. */
-static ulint srv_main_sleeps = 0;
-/* Iterations by the 'once per 10 seconds' loop. */
-static ulint srv_main_10_second_loops = 0;
-/* Iterations of the loop bounded by the 'background_loop' label. */
-static ulint srv_main_background_loops = 0;
-/* Iterations of the loop bounded by the 'flush_loop' label. */
-static ulint srv_main_flush_loops = 0;
-/* Log writes involving flush. */
-static ulint srv_log_writes_and_flush = 0;
-
-/* This is only ever touched by the master thread. It records the
-time when the last flush of log file has happened. The master
-thread ensures that we flush the log files at least once per
-second. */
-static time_t srv_last_log_flush_time;
-
-/* The master thread performs various tasks based on the current
-state of IO activity and the level of IO utilization is past
-intervals. Following macros define thresholds for these conditions. */
-#define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
-#define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
-#define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
-
-/*
- IMPLEMENTATION OF THE SERVER MAIN PROGRAM
- =========================================
-
-There is the following analogue between this database
-server and an operating system kernel:
-
-DB concept equivalent OS concept
----------- ---------------------
-transaction -- process;
-
-query thread -- thread;
-
-lock -- semaphore;
-
-transaction set to
-the rollback state -- kill signal delivered to a process;
-
-kernel -- kernel;
-
-query thread execution:
-(a) without kernel mutex
-reserved -- process executing in user mode;
-(b) with kernel mutex reserved
- -- process executing in kernel mode;
-
-The server is controlled by a master thread which runs at
-a priority higher than normal, that is, higher than user threads.
-It sleeps most of the time, and wakes up, say, every 300 milliseconds,
-to check whether there is anything happening in the server which
-requires intervention of the master thread. Such situations may be,
-for example, when flushing of dirty blocks is needed in the buffer
-pool or old version of database rows have to be cleaned away.
-
-The threads which we call user threads serve the queries of
-the clients and input from the console of the server.
-They run at normal priority. The server may have several
-communications endpoints. A dedicated set of user threads waits
-at each of these endpoints ready to receive a client request.
-Each request is taken by a single user thread, which then starts
-processing and, when the result is ready, sends it to the client
-and returns to wait at the same endpoint the thread started from.
-
-So, we do not have dedicated communication threads listening at
-the endpoints and dealing the jobs to dedicated worker threads.
-Our architecture saves one thread swithch per request, compared
-to the solution with dedicated communication threads
-which amounts to 15 microseconds on 100 MHz Pentium
-running NT. If the client
-is communicating over a network, this saving is negligible, but
-if the client resides in the same machine, maybe in an SMP machine
-on a different processor from the server thread, the saving
-can be important as the threads can communicate over shared
-memory with an overhead of a few microseconds.
-
-We may later implement a dedicated communication thread solution
-for those endpoints which communicate over a network.
-
-Our solution with user threads has two problems: for each endpoint
-there has to be a number of listening threads. If there are many
-communication endpoints, it may be difficult to set the right number
-of concurrent threads in the system, as many of the threads
-may always be waiting at less busy endpoints. Another problem
-is queuing of the messages, as the server internally does not
-offer any queue for jobs.
-
-Another group of user threads is intended for splitting the
-queries and processing them in parallel. Let us call these
-parallel communication threads. These threads are waiting for
-parallelized tasks, suspended on event semaphores.
-
-A single user thread waits for input from the console,
-like a command to shut the database.
-
-Utility threads are a different group of threads which takes
-care of the buffer pool flushing and other, mainly background
-operations, in the server.
-Some of these utility threads always run at a lower than normal
-priority, so that they are always in background. Some of them
-may dynamically boost their priority by the pri_adjust function,
-even to higher than normal priority, if their task becomes urgent.
-The running of utilities is controlled by high- and low-water marks
-of urgency. The urgency may be measured by the number of dirty blocks
-in the buffer pool, in the case of the flush thread, for example.
-When the high-water mark is exceeded, an utility starts running, until
-the urgency drops under the low-water mark. Then the utility thread
-suspend itself to wait for an event. The master thread is
-responsible of signaling this event when the utility thread is
-again needed.
-
-For each individual type of utility, some threads always remain
-at lower than normal priority. This is because pri_adjust is implemented
-so that the threads at normal or higher priority control their
-share of running time by calling sleep. Thus, if the load of the
-system sudenly drops, these threads cannot necessarily utilize
-the system fully. The background priority threads make up for this,
-starting to run when the load drops.
-
-When there is no activity in the system, also the master thread
-suspends itself to wait for an event making
-the server totally silent. The responsibility to signal this
-event is on the user thread which again receives a message
-from a client.
-
-There is still one complication in our server design. If a
-background utility thread obtains a resource (e.g., mutex) needed by a user
-thread, and there is also some other user activity in the system,
-the user thread may have to wait indefinitely long for the
-resource, as the OS does not schedule a background thread if
-there is some other runnable user thread. This problem is called
-priority inversion in real-time programming.
-
-One solution to the priority inversion problem would be to
-keep record of which thread owns which resource and
-in the above case boost the priority of the background thread
-so that it will be scheduled and it can release the resource.
-This solution is called priority inheritance in real-time programming.
-A drawback of this solution is that the overhead of acquiring a mutex
-increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
-the thread has to call os_thread_get_curr_id.
-This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
-pair. Note that the thread
-cannot store the information in the resource, say mutex, itself,
-because competing threads could wipe out the information if it is
-stored before acquiring the mutex, and if it stored afterwards,
-the information is outdated for the time of one machine instruction,
-at least. (To be precise, the information could be stored to
-lock_word in mutex if the machine supports atomic swap.)
-
-The above solution with priority inheritance may become actual in the
-future, but at the moment we plan to implement a more coarse solution,
-which could be called a global priority inheritance. If a thread
-has to wait for a long time, say 300 milliseconds, for a resource,
-we just guess that it may be waiting for a resource owned by a background
-thread, and boost the priority of all runnable background threads
-to the normal level. The background threads then themselves adjust
-their fixed priority back to background after releasing all resources
-they had (or, at some fixed points in their program code).
-
-What is the performance of the global priority inheritance solution?
-We may weigh the length of the wait time 300 milliseconds, during
-which the system processes some other thread
-to the cost of boosting the priority of each runnable background
-thread, rescheduling it, and lowering the priority again.
-On 100 MHz Pentium + NT this overhead may be of the order 100
-microseconds per thread. So, if the number of runnable background
-threads is not very big, say < 100, the cost is tolerable.
-Utility threads probably will access resources used by
-user threads not very often, so collisions of user threads
-to preempted utility threads should not happen very often.
-
-The thread table contains
-information of the current status of each thread existing in the system,
-and also the event semaphores used in suspending the master thread
-and utility and parallel communication threads when they have nothing to do.
-The thread table can be seen as an analogue to the process table
-in a traditional Unix implementation.
-
-The thread table is also used in the global priority inheritance
-scheme. This brings in one additional complication: threads accessing
-the thread table must have at least normal fixed priority,
-because the priority inheritance solution does not work if a background
-thread is preempted while possessing the mutex protecting the thread table.
-So, if a thread accesses the thread table, its priority has to be
-boosted at least to normal. This priority requirement can be seen similar to
-the privileged mode used when processing the kernel calls in traditional
-Unix.*/
-
-/* Thread slot in the thread table */
-struct srv_slot_struct{
- os_thread_id_t id; /*!< thread id */
- os_thread_t handle; /*!< thread handle */
- unsigned type:3; /*!< thread type: user, utility etc. */
- unsigned in_use:1; /*!< TRUE if this slot is in use */
- unsigned suspended:1; /*!< TRUE if the thread is waiting
- for the event of this slot */
- ib_time_t suspend_time; /*!< time when the thread was
- suspended */
- os_event_t event; /*!< event used in suspending the
- thread when it has nothing to do */
- que_thr_t* thr; /*!< suspended query thread (only
- used for MySQL threads) */
-};
-
-/* Table for MySQL threads where they will be suspended to wait for locks */
-UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
-
-UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
-
-UNIV_INTERN srv_sys_t* srv_sys = NULL;
-
-/* padding to prevent other memory update hotspots from residing on
-the same memory cache line */
-UNIV_INTERN byte srv_pad1[64];
-/* mutex protecting the server, trx structs, query threads, and lock table */
-UNIV_INTERN mutex_t* kernel_mutex_temp;
-/* padding to prevent other memory update hotspots from residing on
-the same memory cache line */
-UNIV_INTERN byte srv_pad2[64];
-
-#if 0
-/* The following three values measure the urgency of the jobs of
-buffer, version, and insert threads. They may vary from 0 - 1000.
-The server mutex protects all these variables. The low-water values
-tell that the server can acquiesce the utility when the value
-drops below this low-water mark. */
-
-static ulint srv_meter[SRV_MASTER + 1];
-static ulint srv_meter_low_water[SRV_MASTER + 1];
-static ulint srv_meter_high_water[SRV_MASTER + 1];
-static ulint srv_meter_high_water2[SRV_MASTER + 1];
-static ulint srv_meter_foreground[SRV_MASTER + 1];
-#endif
-
-/* The following values give info about the activity going on in
-the database. They are protected by the server mutex. The arrays
-are indexed by the type of the thread. */
-
-UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
-UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
-
-/***********************************************************************
-Prints counters for work done by srv_master_thread. */
-static
-void
-srv_print_master_thread_info(
-/*=========================*/
- FILE *file) /* in: output stream */
-{
- fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
- "%lu 10_second, %lu background, %lu flush\n",
- srv_main_1_second_loops, srv_main_sleeps,
- srv_main_10_second_loops, srv_main_background_loops,
- srv_main_flush_loops);
- fprintf(file, "srv_master_thread log flush and writes: %lu\n",
- srv_log_writes_and_flush);
-}
-
-/*********************************************************************//**
-Sets the info describing an i/o thread current state. */
-UNIV_INTERN
-void
-srv_set_io_thread_op_info(
-/*======================*/
- ulint i, /*!< in: the 'segment' of the i/o thread */
- const char* str) /*!< in: constant char string describing the
- state */
-{
- ut_a(i < SRV_MAX_N_IO_THREADS);
-
- srv_io_thread_op_info[i] = str;
-}
-
-/*********************************************************************//**
-Accessor function to get pointer to n'th slot in the server thread
-table.
-@return pointer to the slot */
-static
-srv_slot_t*
-srv_table_get_nth_slot(
-/*===================*/
- ulint index) /*!< in: index of the slot */
-{
- ut_a(index < OS_THREAD_MAX_N);
-
- return(srv_sys->threads + index);
-}
-
-/*********************************************************************//**
-Gets the number of threads in the system.
-@return sum of srv_n_threads[] */
-UNIV_INTERN
-ulint
-srv_get_n_threads(void)
-/*===================*/
-{
- ulint i;
- ulint n_threads = 0;
-
- mutex_enter(&kernel_mutex);
-
- for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
-
- n_threads += srv_n_threads[i];
- }
-
- mutex_exit(&kernel_mutex);
-
- return(n_threads);
-}
-
-/*********************************************************************//**
-Reserves a slot in the thread table for the current thread. Also creates the
-thread local storage struct for the current thread. NOTE! The server mutex
-has to be reserved by the caller!
-@return reserved slot index */
-static
-ulint
-srv_table_reserve_slot(
-/*===================*/
- enum srv_thread_type type) /*!< in: type of the thread */
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_a(type > 0);
- ut_a(type <= SRV_MASTER);
-
- i = 0;
- slot = srv_table_get_nth_slot(i);
-
- while (slot->in_use) {
- i++;
- slot = srv_table_get_nth_slot(i);
- }
-
- ut_a(slot->in_use == FALSE);
-
- slot->in_use = TRUE;
- slot->suspended = FALSE;
- slot->type = type;
- slot->id = os_thread_get_curr_id();
- slot->handle = os_thread_get_curr();
-
- thr_local_create();
-
- thr_local_set_slot_no(os_thread_get_curr_id(), i);
-
- return(i);
-}
-
-/*********************************************************************//**
-Suspends the calling thread to wait for the event in its thread slot.
-NOTE! The server mutex has to be reserved by the caller!
-@return event for the calling thread to wait */
-static
-os_event_t
-srv_suspend_thread(void)
-/*====================*/
-{
- srv_slot_t* slot;
- os_event_t event;
- ulint slot_no;
- enum srv_thread_type type;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Suspending thread %lu to slot %lu\n",
- (ulong) os_thread_get_curr_id(), (ulong) slot_no);
- }
-
- slot = srv_table_get_nth_slot(slot_no);
-
- type = slot->type;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
-
- event = slot->event;
-
- slot->suspended = TRUE;
-
- ut_ad(srv_n_threads_active[type] > 0);
-
- srv_n_threads_active[type]--;
-
- os_event_reset(event);
-
- return(event);
-}
-
-/*********************************************************************//**
-Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller!
-@return number of threads released: this may be less than n if not
-enough threads were suspended at the moment */
-UNIV_INTERN
-ulint
-srv_release_threads(
-/*================*/
- enum srv_thread_type type, /*!< in: thread type */
- ulint n) /*!< in: number of threads to release */
-{
- srv_slot_t* slot;
- ulint i;
- ulint count = 0;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
- ut_ad(n > 0);
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_table_get_nth_slot(i);
-
- if (slot->in_use && slot->type == type && slot->suspended) {
-
- slot->suspended = FALSE;
-
- srv_n_threads_active[type]++;
-
- os_event_set(slot->event);
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Releasing thread %lu type %lu"
- " from slot %lu\n",
- (ulong) slot->id, (ulong) type,
- (ulong) i);
- }
-
- count++;
-
- if (count == n) {
- break;
- }
- }
- }
-
- return(count);
-}
-
-/*********************************************************************//**
-Returns the calling thread type.
-@return SRV_COM, ... */
-UNIV_INTERN
-enum srv_thread_type
-srv_get_thread_type(void)
-/*=====================*/
-{
- ulint slot_no;
- srv_slot_t* slot;
- enum srv_thread_type type;
-
- mutex_enter(&kernel_mutex);
-
- slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
-
- slot = srv_table_get_nth_slot(slot_no);
-
- type = slot->type;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
-
- mutex_exit(&kernel_mutex);
-
- return(type);
-}
-
-/*********************************************************************//**
-Initializes the server. */
-UNIV_INTERN
-void
-srv_init(void)
-/*==========*/
-{
- srv_conc_slot_t* conc_slot;
- srv_slot_t* slot;
- ulint i;
-
- srv_sys = mem_alloc(sizeof(srv_sys_t));
-
- kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
- mutex_create(&kernel_mutex, SYNC_KERNEL);
-
- mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
-
- srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_table_get_nth_slot(i);
- slot->in_use = FALSE;
- slot->type=0; /* Avoid purify errors */
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
- }
-
- srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_mysql_table + i;
- slot->in_use = FALSE;
- slot->type = 0;
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
- }
-
- srv_lock_timeout_thread_event = os_event_create(NULL);
-
- for (i = 0; i < SRV_MASTER + 1; i++) {
- srv_n_threads_active[i] = 0;
- srv_n_threads[i] = 0;
-#if 0
- srv_meter[i] = 30;
- srv_meter_low_water[i] = 50;
- srv_meter_high_water[i] = 100;
- srv_meter_high_water2[i] = 200;
- srv_meter_foreground[i] = 250;
-#endif
- }
-
- UT_LIST_INIT(srv_sys->tasks);
-
- /* Create dummy indexes for infimum and supremum records */
-
- dict_ind_init();
-
- /* Init the server concurrency restriction data structures */
-
- os_fast_mutex_init(&srv_conc_mutex);
-
- UT_LIST_INIT(srv_conc_queue);
-
- srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- conc_slot = srv_conc_slots + i;
- conc_slot->reserved = FALSE;
- conc_slot->event = os_event_create(NULL);
- ut_a(conc_slot->event);
- }
-
- /* Initialize some INFORMATION SCHEMA internal structures */
- trx_i_s_cache_init(trx_i_s_cache);
-}
-
-/*********************************************************************//**
-Frees the data structures created in srv_init(). */
-UNIV_INTERN
-void
-srv_free(void)
-/*==========*/
-{
- os_fast_mutex_free(&srv_conc_mutex);
- mem_free(srv_conc_slots);
- srv_conc_slots = NULL;
-
- mem_free(srv_sys->threads);
- mem_free(srv_sys);
- srv_sys = NULL;
-
- mem_free(kernel_mutex_temp);
- kernel_mutex_temp = NULL;
- mem_free(srv_mysql_table);
- srv_mysql_table = NULL;
-
- trx_i_s_cache_free(trx_i_s_cache);
-}
-
-/*********************************************************************//**
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-UNIV_INTERN
-void
-srv_general_init(void)
-/*==================*/
-{
- ut_mem_init();
- /* Reset the system variables in the recovery module. */
- recv_sys_var_init();
- os_sync_init();
- sync_init();
- mem_init(srv_mem_pool_size);
- thr_local_init();
-}
-
-/*======================= InnoDB Server FIFO queue =======================*/
-
-/* Maximum allowable purge history length. <=0 means 'infinite'. */
-UNIV_INTERN ulong srv_max_purge_lag = 0;
-
-/*********************************************************************//**
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- ibool has_slept = FALSE;
- srv_conc_slot_t* slot = NULL;
- ulint i;
-
- if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- UT_WAIT_FOR(srv_conc_n_threads
- < (lint)srv_thread_concurrency,
- srv_replication_delay * 1000);
-
- return;
- }
-
- /* If trx has 'free tickets' to enter the engine left, then use one
- such ticket */
-
- if (trx->n_tickets_to_enter_innodb > 0) {
- trx->n_tickets_to_enter_innodb--;
-
- return;
- }
-
- os_fast_mutex_lock(&srv_conc_mutex);
-retry:
- if (trx->declared_to_be_inside_innodb) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to declare trx"
- " to enter InnoDB, but\n"
- "InnoDB: it already is declared.\n", stderr);
- trx_print(stderr, trx, 0);
- putc('\n', stderr);
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- ut_ad(srv_conc_n_threads >= 0);
-
- if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* If the transaction is not holding resources, let it sleep
- for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
-
- if (!has_slept && !trx->has_search_latch
- && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
-
- has_slept = TRUE; /* We let it sleep only once to avoid
- starvation */
-
- srv_conc_n_waiting_threads++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- trx->op_info = "sleeping before joining InnoDB queue";
-
- /* Peter Zaitsev suggested that we take the sleep away
- altogether. But the sleep may be good in pathological
- situations of lots of thread switches. Simply put some
- threads aside for a while to reduce the number of thread
- switches. */
- if (SRV_THREAD_SLEEP_DELAY > 0) {
- os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
- }
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_waiting_threads--;
-
- goto retry;
- }
-
- /* Too many threads inside: put the current thread to a queue */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_conc_slots + i;
-
- if (!slot->reserved) {
-
- break;
- }
- }
-
- if (i == OS_THREAD_MAX_N) {
- /* Could not find a free wait slot, we must let the
- thread enter */
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 0;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* Release possible search system latch this thread has */
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- /* Add to the queue */
- slot->reserved = TRUE;
- slot->wait_ended = FALSE;
-
- UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
-
- os_event_reset(slot->event);
-
- srv_conc_n_waiting_threads++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- /* Go to wait for the event; when a thread leaves InnoDB it will
- release this thread */
-
- trx->op_info = "waiting in InnoDB queue";
-
- os_event_wait(slot->event);
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_waiting_threads--;
-
- /* NOTE that the thread which released this thread already
- incremented the thread counter on behalf of this thread */
-
- slot->reserved = FALSE;
-
- UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
-
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*********************************************************************//**
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
-void
-srv_conc_force_enter_innodb(
-/*========================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- if (UNIV_LIKELY(!srv_thread_concurrency)) {
-
- return;
- }
-
- ut_ad(srv_conc_n_threads >= 0);
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 1;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*********************************************************************//**
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-UNIV_INTERN
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- srv_conc_slot_t* slot = NULL;
-
- if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- return;
- }
-
- if (trx->declared_to_be_inside_innodb == FALSE) {
-
- return;
- }
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- ut_ad(srv_conc_n_threads > 0);
- srv_conc_n_threads--;
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
- /* Look for a slot where a thread is waiting and no other
- thread has yet released the thread */
-
- slot = UT_LIST_GET_FIRST(srv_conc_queue);
-
- while (slot && slot->wait_ended == TRUE) {
- slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
- }
-
- if (slot != NULL) {
- slot->wait_ended = TRUE;
-
- /* We increment the count on behalf of the released
- thread */
-
- srv_conc_n_threads++;
- }
- }
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- if (slot != NULL) {
- os_event_set(slot->event);
- }
-}
-
-/*********************************************************************//**
-This must be called when a thread exits InnoDB. */
-UNIV_INTERN
-void
-srv_conc_exit_innodb(
-/*=================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- if (trx->n_tickets_to_enter_innodb > 0) {
- /* We will pretend the thread is still inside InnoDB though it
- now leaves the InnoDB engine. In this way we save
- a lot of semaphore operations. srv_conc_force_exit_innodb is
- used to declare the thread definitely outside InnoDB. It
- should be called when there is a lock wait or an SQL statement
- ends. */
-
- return;
- }
-
- srv_conc_force_exit_innodb(trx);
-}
-
-/*========================================================================*/
-
-/*********************************************************************//**
-Normalizes init parameter values to use units we use inside InnoDB.
-@return DB_SUCCESS or error code */
-static
-ulint
-srv_normalize_init_values(void)
-/*===========================*/
-{
- ulint n;
- ulint i;
-
- n = srv_n_data_files;
-
- for (i = 0; i < n; i++) {
- srv_data_file_sizes[i] = srv_data_file_sizes[i]
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
- }
-
- srv_last_file_size_max = srv_last_file_size_max
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
-
- srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
-
- srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Boots the InnoDB server.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-srv_boot(void)
-/*==========*/
-{
- ulint err;
-
- /* Transform the init parameter values given by MySQL to
- use units we use inside InnoDB: */
-
- err = srv_normalize_init_values();
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Initialize synchronization primitives, memory management, and thread
- local storage */
-
- srv_general_init();
-
- /* Initialize this module */
-
- srv_init();
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Reserves a slot in the thread table for the current MySQL OS thread.
-NOTE! The kernel mutex has to be reserved by the caller!
-@return reserved slot */
-static
-srv_slot_t*
-srv_table_reserve_slot_for_mysql(void)
-/*==================================*/
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- i = 0;
- slot = srv_mysql_table + i;
-
- while (slot->in_use) {
- i++;
-
- if (i >= OS_THREAD_MAX_N) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: There appear to be %lu MySQL"
- " threads currently waiting\n"
- "InnoDB: inside InnoDB, which is the"
- " upper limit. Cannot continue operation.\n"
- "InnoDB: We intentionally generate"
- " a seg fault to print a stack trace\n"
- "InnoDB: on Linux. But first we print"
- " a list of waiting threads.\n", (ulong) i);
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- fprintf(stderr,
- "Slot %lu: thread id %lu, type %lu,"
- " in use %lu, susp %lu, time %lu\n",
- (ulong) i,
- (ulong) os_thread_pf(slot->id),
- (ulong) slot->type,
- (ulong) slot->in_use,
- (ulong) slot->suspended,
- (ulong) difftime(ut_time(),
- slot->suspend_time));
- }
-
- ut_error;
- }
-
- slot = srv_mysql_table + i;
- }
-
- ut_a(slot->in_use == FALSE);
-
- slot->in_use = TRUE;
- slot->id = os_thread_get_curr_id();
- slot->handle = os_thread_get_curr();
-
- return(slot);
-}
-
-/***************************************************************//**
-Puts a MySQL OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-UNIV_INTERN
-void
-srv_suspend_mysql_thread(
-/*=====================*/
- que_thr_t* thr) /*!< in: query thread associated with the MySQL
- OS thread */
-{
- srv_slot_t* slot;
- os_event_t event;
- double wait_time;
- trx_t* trx;
- ulint had_dict_lock;
- ibool was_declared_inside_innodb = FALSE;
- ib_int64_t start_time = 0;
- ib_int64_t finish_time;
- ulint diff_time;
- ulint sec;
- ulint ms;
- ulong lock_wait_timeout;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- trx = thr_get_trx(thr);
-
- os_event_set(srv_lock_timeout_thread_event);
-
- mutex_enter(&kernel_mutex);
-
- trx->error_state = DB_SUCCESS;
-
- if (thr->state == QUE_THR_RUNNING) {
-
- ut_ad(thr->is_active == TRUE);
-
- /* The lock has already been released or this transaction
- was chosen as a deadlock victim: no need to suspend */
-
- if (trx->was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- ut_ad(thr->is_active == FALSE);
-
- slot = srv_table_reserve_slot_for_mysql();
-
- event = slot->event;
-
- slot->thr = thr;
-
- os_event_reset(event);
-
- slot->suspend_time = ut_time();
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- srv_n_lock_wait_count++;
- srv_n_lock_wait_current_count++;
-
- if (ut_usectime(&sec, &ms) == -1) {
- start_time = -1;
- } else {
- start_time = (ib_int64_t) sec * 1000000 + ms;
- }
- }
- /* Wake the lock timeout monitor thread, if it is suspended */
-
- os_event_set(srv_lock_timeout_thread_event);
-
- mutex_exit(&kernel_mutex);
-
- if (trx->declared_to_be_inside_innodb) {
-
- was_declared_inside_innodb = TRUE;
-
- /* We must declare this OS thread to exit InnoDB, since a
- possible other thread holding a lock which this thread waits
- for must be allowed to enter, sooner or later */
-
- srv_conc_force_exit_innodb(trx);
- }
-
- had_dict_lock = trx->dict_operation_lock_mode;
-
- switch (had_dict_lock) {
- case RW_S_LATCH:
- /* Release foreign key check latch */
- row_mysql_unfreeze_data_dictionary(trx);
- break;
- case RW_X_LATCH:
- /* Release fast index creation latch */
- row_mysql_unlock_data_dictionary(trx);
- break;
- }
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- /* Suspend this thread and wait for the event. */
-
- os_event_wait(event);
-
- /* After resuming, reacquire the data dictionary latch if
- necessary. */
-
- switch (had_dict_lock) {
- case RW_S_LATCH:
- row_mysql_freeze_data_dictionary(trx);
- break;
- case RW_X_LATCH:
- row_mysql_lock_data_dictionary(trx);
- break;
- }
-
- if (was_declared_inside_innodb) {
-
- /* Return back inside InnoDB */
-
- srv_conc_force_enter_innodb(trx);
- }
-
- mutex_enter(&kernel_mutex);
-
- /* Release the slot for others to use */
-
- slot->in_use = FALSE;
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- if (ut_usectime(&sec, &ms) == -1) {
- finish_time = -1;
- } else {
- finish_time = (ib_int64_t) sec * 1000000 + ms;
- }
-
- diff_time = (ulint) (finish_time - start_time);
-
- srv_n_lock_wait_current_count--;
- srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
- if (diff_time > srv_n_lock_max_wait_time &&
- /* only update the variable if we successfully
- retrieved the start and finish times. See Bug#36819. */
- start_time != -1 && finish_time != -1) {
- srv_n_lock_max_wait_time = diff_time;
- }
- }
-
- if (trx->was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- /* InnoDB system transactions (such as the purge, and
- incomplete transactions that are being rolled back after crash
- recovery) will use the global value of
- innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
- lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
-
- if (lock_wait_timeout < 100000000
- && wait_time > (double) lock_wait_timeout) {
-
- trx->error_state = DB_LOCK_WAIT_TIMEOUT;
- }
-}
-
-/********************************************************************//**
-Releases a MySQL OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-UNIV_INTERN
-void
-srv_release_mysql_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr) /*!< in: query thread associated with the
- MySQL OS thread */
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- if (slot->in_use && slot->thr == thr) {
- /* Found */
-
- os_event_set(slot->event);
-
- return;
- }
- }
-
- /* not found */
-}
-
-/******************************************************************//**
-Refreshes the values used to calculate per-second averages. */
-static
-void
-srv_refresh_innodb_monitor_stats(void)
-/*==================================*/
-{
- mutex_enter(&srv_innodb_monitor_mutex);
-
- srv_last_monitor_time = time(NULL);
-
- os_aio_refresh_stats();
-
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- log_refresh_stats();
-
- buf_refresh_io_stats();
-
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
-
- mutex_exit(&srv_innodb_monitor_mutex);
-}
-
-/******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor. */
-UNIV_INTERN
-void
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file, /*!< in: output stream */
- ulint* trx_start, /*!< out: file position of the start of
- the list of active transactions */
- ulint* trx_end) /*!< out: file position of the end of
- the list of active transactions */
-{
- double time_elapsed;
- time_t current_time;
- ulint n_reserved;
-
- mutex_enter(&srv_innodb_monitor_mutex);
-
- current_time = time(NULL);
-
- /* We add 0.001 seconds to time_elapsed to prevent division
- by zero if two users happen to call SHOW INNODB STATUS at the same
- time */
-
- time_elapsed = difftime(current_time, srv_last_monitor_time)
- + 0.001;
-
- srv_last_monitor_time = time(NULL);
-
- fputs("\n=====================================\n", file);
-
- ut_print_timestamp(file);
- fprintf(file,
- " INNODB MONITOR OUTPUT\n"
- "=====================================\n"
- "Per second averages calculated from the last %lu seconds\n",
- (ulong)time_elapsed);
-
- fputs("----------\n"
- "BACKGROUND THREAD\n"
- "----------\n", file);
- srv_print_master_thread_info(file);
-
- fputs("----------\n"
- "SEMAPHORES\n"
- "----------\n", file);
- sync_print(file);
-
- /* Conceptually, srv_innodb_monitor_mutex has a very high latching
- order level in sync0sync.h, while dict_foreign_err_mutex has a very
- low level 135. Therefore we can reserve the latter mutex here without
- a danger of a deadlock of threads. */
-
- mutex_enter(&dict_foreign_err_mutex);
-
- if (ftell(dict_foreign_err_file) != 0L) {
- fputs("------------------------\n"
- "LATEST FOREIGN KEY ERROR\n"
- "------------------------\n", file);
- ut_copy_file(file, dict_foreign_err_file);
- }
-
- mutex_exit(&dict_foreign_err_mutex);
-
- lock_print_info_summary(file);
- if (trx_start) {
- long t = ftell(file);
- if (t < 0) {
- *trx_start = ULINT_UNDEFINED;
- } else {
- *trx_start = (ulint) t;
- }
- }
- lock_print_info_all_transactions(file);
- if (trx_end) {
- long t = ftell(file);
- if (t < 0) {
- *trx_end = ULINT_UNDEFINED;
- } else {
- *trx_end = (ulint) t;
- }
- }
- fputs("--------\n"
- "FILE I/O\n"
- "--------\n", file);
- os_aio_print(file);
-
- fputs("-------------------------------------\n"
- "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
- "-------------------------------------\n", file);
- ibuf_print(file);
-
- ha_print_info(file, btr_search_sys->hash_index);
-
- fprintf(file,
- "%.2f hash searches/s, %.2f non-hash searches/s\n",
- (btr_cur_n_sea - btr_cur_n_sea_old)
- / time_elapsed,
- (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
- / time_elapsed);
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- fputs("---\n"
- "LOG\n"
- "---\n", file);
- log_print(file);
-
- fputs("----------------------\n"
- "BUFFER POOL AND MEMORY\n"
- "----------------------\n", file);
- fprintf(file,
- "Total memory allocated " ULINTPF
- "; in additional pool allocated " ULINTPF "\n",
- ut_total_allocated_memory,
- mem_pool_get_reserved(mem_comm_pool));
- fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
- dict_sys->size);
-
- buf_print_io(file);
-
- fputs("--------------\n"
- "ROW OPERATIONS\n"
- "--------------\n", file);
- fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
- (long) srv_conc_n_threads,
- (ulong) srv_conc_n_waiting_threads);
-
- fprintf(file, "%lu read views open inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->view_list));
-
- n_reserved = fil_space_get_n_reserved_extents(0);
- if (n_reserved > 0) {
- fprintf(file,
- "%lu tablespace extents now reserved for"
- " B-tree split operations\n",
- (ulong) n_reserved);
- }
-
-#ifdef UNIV_LINUX
- fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
- (ulong) srv_main_thread_process_no,
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#else
- fprintf(file, "Main thread id %lu, state: %s\n",
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#endif
- fprintf(file,
- "Number of rows inserted " ULINTPF
- ", updated " ULINTPF ", deleted " ULINTPF
- ", read " ULINTPF "\n",
- srv_n_rows_inserted,
- srv_n_rows_updated,
- srv_n_rows_deleted,
- srv_n_rows_read);
- fprintf(file,
- "%.2f inserts/s, %.2f updates/s,"
- " %.2f deletes/s, %.2f reads/s\n",
- (srv_n_rows_inserted - srv_n_rows_inserted_old)
- / time_elapsed,
- (srv_n_rows_updated - srv_n_rows_updated_old)
- / time_elapsed,
- (srv_n_rows_deleted - srv_n_rows_deleted_old)
- / time_elapsed,
- (srv_n_rows_read - srv_n_rows_read_old)
- / time_elapsed);
-
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
-
- fputs("----------------------------\n"
- "END OF INNODB MONITOR OUTPUT\n"
- "============================\n", file);
- mutex_exit(&srv_innodb_monitor_mutex);
- fflush(file);
-}
-
-/******************************************************************//**
-Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
-void
-srv_export_innodb_status(void)
-/*==========================*/
-{
- mutex_enter(&srv_innodb_monitor_mutex);
-
- export_vars.innodb_data_pending_reads
- = os_n_pending_reads;
- export_vars.innodb_data_pending_writes
- = os_n_pending_writes;
- export_vars.innodb_data_pending_fsyncs
- = fil_n_pending_log_flushes
- + fil_n_pending_tablespace_flushes;
- export_vars.innodb_data_fsyncs = os_n_fsyncs;
- export_vars.innodb_data_read = srv_data_read;
- export_vars.innodb_data_reads = os_n_file_reads;
- export_vars.innodb_data_writes = os_n_file_writes;
- export_vars.innodb_data_written = srv_data_written;
- export_vars.innodb_buffer_pool_read_requests = buf_pool->stat.n_page_gets;
- export_vars.innodb_buffer_pool_write_requests
- = srv_buf_pool_write_requests;
- export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
- export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
- export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
- export_vars.innodb_buffer_pool_read_ahead
- = buf_pool->stat.n_ra_pages_read;
- export_vars.innodb_buffer_pool_read_ahead_evicted
- = buf_pool->stat.n_ra_pages_evicted;
- export_vars.innodb_buffer_pool_pages_data
- = UT_LIST_GET_LEN(buf_pool->LRU);
- export_vars.innodb_buffer_pool_pages_dirty
- = UT_LIST_GET_LEN(buf_pool->flush_list);
- export_vars.innodb_buffer_pool_pages_free
- = UT_LIST_GET_LEN(buf_pool->free);
-#ifdef UNIV_DEBUG
- export_vars.innodb_buffer_pool_pages_latched
- = buf_get_latched_pages_number();
-#endif /* UNIV_DEBUG */
- export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
-
- export_vars.innodb_buffer_pool_pages_misc = buf_pool->curr_size
- - UT_LIST_GET_LEN(buf_pool->LRU)
- - UT_LIST_GET_LEN(buf_pool->free);
-#ifdef HAVE_ATOMIC_BUILTINS
- export_vars.innodb_have_atomic_builtins = 1;
-#else
- export_vars.innodb_have_atomic_builtins = 0;
-#endif
- export_vars.innodb_page_size = UNIV_PAGE_SIZE;
- export_vars.innodb_log_waits = srv_log_waits;
- export_vars.innodb_os_log_written = srv_os_log_written;
- export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
- export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
- export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
- export_vars.innodb_log_write_requests = srv_log_write_requests;
- export_vars.innodb_log_writes = srv_log_writes;
- export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
- export_vars.innodb_dblwr_writes = srv_dblwr_writes;
- export_vars.innodb_pages_created = buf_pool->stat.n_pages_created;
- export_vars.innodb_pages_read = buf_pool->stat.n_pages_read;
- export_vars.innodb_pages_written = buf_pool->stat.n_pages_written;
- export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
- export_vars.innodb_row_lock_current_waits
- = srv_n_lock_wait_current_count;
- export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
- if (srv_n_lock_wait_count > 0) {
- export_vars.innodb_row_lock_time_avg = (ulint)
- (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
- } else {
- export_vars.innodb_row_lock_time_avg = 0;
- }
- export_vars.innodb_row_lock_time_max
- = srv_n_lock_max_wait_time / 1000;
- export_vars.innodb_rows_read = srv_n_rows_read;
- export_vars.innodb_rows_inserted = srv_n_rows_inserted;
- export_vars.innodb_rows_updated = srv_n_rows_updated;
- export_vars.innodb_rows_deleted = srv_n_rows_deleted;
-
- mutex_exit(&srv_innodb_monitor_mutex);
-}
-
-/*********************************************************************//**
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
- void* arg __attribute__((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- srv_slot_t* slot;
- double time_elapsed;
- time_t current_time;
- time_t last_table_monitor_time;
- time_t last_tablespace_monitor_time;
- time_t last_monitor_time;
- ibool some_waits;
- double wait_time;
- ulint i;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Lock timeout thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- UT_NOT_USED(arg);
- srv_last_monitor_time = time(NULL);
- last_table_monitor_time = time(NULL);
- last_tablespace_monitor_time = time(NULL);
- last_monitor_time = time(NULL);
-loop:
- srv_lock_timeout_and_monitor_active = TRUE;
-
- /* When someone is waiting for a lock, we wake up every second
- and check if a timeout has passed for a lock wait */
-
- os_thread_sleep(1000000);
-
- current_time = time(NULL);
-
- time_elapsed = difftime(current_time, last_monitor_time);
-
- if (time_elapsed > 15) {
- last_monitor_time = time(NULL);
-
- if (srv_print_innodb_monitor) {
- srv_printf_innodb_monitor(stderr, NULL, NULL);
- }
-
- if (srv_innodb_status) {
- mutex_enter(&srv_monitor_file_mutex);
- rewind(srv_monitor_file);
- srv_printf_innodb_monitor(srv_monitor_file, NULL,
- NULL);
- os_file_set_eof(srv_monitor_file);
- mutex_exit(&srv_monitor_file_mutex);
- }
-
- if (srv_print_innodb_tablespace_monitor
- && difftime(current_time,
- last_tablespace_monitor_time) > 60) {
- last_tablespace_monitor_time = time(NULL);
-
- fputs("========================"
- "========================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
- "========================"
- "========================\n",
- stderr);
-
- fsp_print(0);
- fputs("Validating tablespace\n", stderr);
- fsp_validate(0);
- fputs("Validation ok\n"
- "---------------------------------------\n"
- "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
- "=======================================\n",
- stderr);
- }
-
- if (srv_print_innodb_table_monitor
- && difftime(current_time, last_table_monitor_time) > 60) {
-
- last_table_monitor_time = time(NULL);
-
- fputs("===========================================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLE MONITOR OUTPUT\n"
- "===========================================\n",
- stderr);
- dict_print();
-
- fputs("-----------------------------------\n"
- "END OF INNODB TABLE MONITOR OUTPUT\n"
- "==================================\n",
- stderr);
- }
- }
-
- mutex_enter(&kernel_mutex);
-
- some_waits = FALSE;
-
- /* Check of all slots if a thread is waiting there, and if it
- has exceeded the time limit */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- if (slot->in_use) {
- trx_t* trx;
- ulong lock_wait_timeout;
-
- some_waits = TRUE;
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- trx = thr_get_trx(slot->thr);
- lock_wait_timeout = thd_lock_wait_timeout(
- trx->mysql_thd);
-
- if (lock_wait_timeout < 100000000
- && (wait_time > (double) lock_wait_timeout
- || wait_time < 0)) {
-
- /* Timeout exceeded or a wrap-around in system
- time counter: cancel the lock request queued
- by the transaction and release possible
- other transactions waiting behind; it is
- possible that the lock has already been
- granted: in that case do nothing */
-
- if (trx->wait_lock) {
- lock_cancel_waiting_and_release(
- trx->wait_lock);
- }
- }
- }
- }
-
- os_event_reset(srv_lock_timeout_thread_event);
-
- mutex_exit(&kernel_mutex);
-
- if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
- goto exit_func;
- }
-
- if (some_waits || srv_print_innodb_monitor
- || srv_print_innodb_lock_monitor
- || srv_print_innodb_tablespace_monitor
- || srv_print_innodb_table_monitor) {
- goto loop;
- }
-
- /* No one was waiting for a lock and no monitor was active:
- suspend this thread */
-
- srv_lock_timeout_and_monitor_active = FALSE;
-
-#if 0
- /* The following synchronisation is disabled, since
- the InnoDB monitor output is to be updated every 15 seconds. */
- os_event_wait(srv_lock_timeout_thread_event);
-#endif
- goto loop;
-
-exit_func:
- srv_lock_timeout_and_monitor_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*********************************************************************//**
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_error_monitor_thread(
-/*=====================*/
- void* arg __attribute__((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- /* number of successive fatal timeouts observed */
- ulint fatal_cnt = 0;
- ib_uint64_t old_lsn;
- ib_uint64_t new_lsn;
-
- old_lsn = srv_start_lsn;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Error monitor thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-loop:
- srv_error_monitor_active = TRUE;
-
- /* Try to track a strange bug reported by Harald Fuchs and others,
- where the lsn seems to decrease at times */
-
- new_lsn = log_get_lsn();
-
- if (new_lsn < old_lsn) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: old log sequence number %llu"
- " was greater\n"
- "InnoDB: than the new log sequence number %llu!\n"
- "InnoDB: Please submit a bug report"
- " to http://bugs.mysql.com\n",
- old_lsn, new_lsn);
- }
-
- old_lsn = new_lsn;
-
- if (difftime(time(NULL), srv_last_monitor_time) > 60) {
- /* We referesh InnoDB Monitor values so that averages are
- printed from at most 60 last seconds */
-
- srv_refresh_innodb_monitor_stats();
- }
-
- /* Update the statistics collected for deciding LRU
- eviction policy. */
- buf_LRU_stat_update();
-
- /* Update the statistics collected for flush rate policy. */
- buf_flush_stat_update();
-
- /* In case mutex_exit is not a memory barrier, it is
- theoretically possible some threads are left waiting though
- the semaphore is already released. Wake up those threads: */
-
- sync_arr_wake_threads_if_sema_free();
-
- if (sync_array_print_long_waits()) {
- fatal_cnt++;
- if (fatal_cnt > 10) {
-
- fprintf(stderr,
- "InnoDB: Error: semaphore wait has lasted"
- " > %lu seconds\n"
- "InnoDB: We intentionally crash the server,"
- " because it appears to be hung.\n",
- (ulong) srv_fatal_semaphore_wait_threshold);
-
- ut_error;
- }
- } else {
- fatal_cnt = 0;
- }
-
- /* Flush stderr so that a database user gets the output
- to possible MySQL error file */
-
- fflush(stderr);
-
- os_thread_sleep(1000000);
-
- if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
-
- goto loop;
- }
-
- srv_error_monitor_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*******************************************************************//**
-Tells the InnoDB server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the kernel
-mutex, for performace reasons). */
-UNIV_INTERN
-void
-srv_active_wake_master_thread(void)
-/*===============================*/
-{
- srv_activity_count++;
-
- if (srv_n_threads_active[SRV_MASTER] == 0) {
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_MASTER, 1);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/*******************************************************************//**
-Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
-void
-srv_wake_master_thread(void)
-/*========================*/
-{
- srv_activity_count++;
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_MASTER, 1);
-
- mutex_exit(&kernel_mutex);
-}
-
-/**********************************************************************
-The master thread is tasked to ensure that flush of log file happens
-once every second in the background. This is to ensure that not more
-than one second of trxs are lost in case of crash when
-innodb_flush_logs_at_trx_commit != 1 */
-static
-void
-srv_sync_log_buffer_in_background(void)
-/*===================================*/
-{
- time_t current_time = time(NULL);
-
- srv_main_thread_op_info = "flushing log";
- if (difftime(current_time, srv_last_log_flush_time) >= 1) {
- log_buffer_sync_in_background(TRUE);
- srv_last_log_flush_time = current_time;
- srv_log_writes_and_flush++;
- }
-}
-
-/*********************************************************************//**
-The master thread controlling the server.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_master_thread(
-/*==============*/
- void* arg __attribute__((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- os_event_t event;
- ulint old_activity_count;
- ulint n_pages_purged = 0;
- ulint n_bytes_merged;
- ulint n_pages_flushed;
- ulint n_bytes_archived;
- ulint n_tables_to_drop;
- ulint n_ios;
- ulint n_ios_old;
- ulint n_ios_very_old;
- ulint n_pend_ios;
- ibool skip_sleep = FALSE;
- ulint i;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Master thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- srv_main_thread_process_no = os_proc_get_number();
- srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
-
- srv_table_reserve_slot(SRV_MASTER);
-
- mutex_enter(&kernel_mutex);
-
- srv_n_threads_active[SRV_MASTER]++;
-
- mutex_exit(&kernel_mutex);
-
-loop:
- /*****************************************************************/
- /* ---- When there is database activity by users, we cycle in this
- loop */
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- n_ios_very_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read
- + buf_pool->stat.n_pages_written;
- mutex_enter(&kernel_mutex);
-
- /* Store the user activity counter at the start of this loop */
- old_activity_count = srv_activity_count;
-
- mutex_exit(&kernel_mutex);
-
- if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
-
- goto suspend_thread;
- }
-
- /* ---- We run the following loop approximately once per second
- when there is database activity */
-
- srv_last_log_flush_time = time(NULL);
- skip_sleep = FALSE;
-
- for (i = 0; i < 10; i++) {
- n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read
- + buf_pool->stat.n_pages_written;
- srv_main_thread_op_info = "sleeping";
- srv_main_1_second_loops++;
-
- if (!skip_sleep) {
-
- os_thread_sleep(1000000);
- srv_main_sleeps++;
- }
-
- skip_sleep = FALSE;
-
- /* ALTER TABLE in MySQL requires on Unix that the table handler
- can drop tables lazily after there no longer are SELECT
- queries to them. */
-
- srv_main_thread_op_info = "doing background drop tables";
-
- row_drop_tables_for_mysql_in_background();
-
- srv_main_thread_op_info = "";
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
-
- srv_main_thread_op_info = "making checkpoint";
- log_free_check();
-
- /* If i/os during one second sleep were less than 5% of
- capacity, we assume that there is free disk i/o capacity
- available, and it makes sense to do an insert buffer merge. */
-
- n_pend_ios = buf_get_n_pending_ios()
- + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read
- + buf_pool->stat.n_pages_written;
- if (n_pend_ios < SRV_PEND_IO_THRESHOLD
- && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
- }
-
- if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
- > srv_max_buf_pool_modified_pct)) {
-
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
-
- srv_main_thread_op_info =
- "flushing buffer pool pages";
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- IB_ULONGLONG_MAX);
-
- /* If we had to do the flush, it may have taken
- even more than 1 second, and also, there may be more
- to flush. Do not sleep 1 second during the next
- iteration of this loop. */
-
- skip_sleep = TRUE;
- } else if (srv_adaptive_flushing) {
-
- /* Try to keep the rate of flushing of dirty
- pages such that redo log generation does not
- produce bursts of IO at checkpoint time. */
- ulint n_flush = buf_flush_get_desired_flush_rate();
-
- if (n_flush) {
- srv_main_thread_op_info =
- "flushing buffer pool pages";
- n_flush = ut_min(PCT_IO(100), n_flush);
- n_pages_flushed =
- buf_flush_batch(
- BUF_FLUSH_LIST,
- n_flush,
- IB_ULONGLONG_MAX);
- skip_sleep = TRUE;
- }
- }
-
- if (srv_activity_count == old_activity_count) {
-
- /* There is no user activity at the moment, go to
- the background loop */
-
- goto background_loop;
- }
- }
-
- /* ---- We perform the following code approximately once per
- 10 seconds when there is database activity */
-
-#ifdef MEM_PERIODIC_CHECK
- /* Check magic numbers of every allocated mem block once in 10
- seconds */
- mem_validate_all_blocks();
-#endif
- /* If i/os during the 10 second period were less than 200% of
- capacity, we assume that there is free disk i/o capacity
- available, and it makes sense to flush srv_io_capacity pages.
-
- Note that this is done regardless of the fraction of dirty
- pages relative to the max requested by the user. The one second
- loop above requests writes for that case. The writes done here
- are not required, and may be disabled. */
-
- n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read
- + buf_pool->stat.n_pages_written;
-
- srv_main_10_second_loops++;
- if (n_pend_ios < SRV_PEND_IO_THRESHOLD
- && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
-
- srv_main_thread_op_info = "flushing buffer pool pages";
- buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
- IB_ULONGLONG_MAX);
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
- }
-
- /* We run a batch of insert buffer merge every 10 seconds,
- even if the server were active */
-
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
-
- /* We run a full purge every 10 seconds, even if the server
- were active */
- do {
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
-
- srv_main_thread_op_info = "purging";
- n_pages_purged = trx_purge();
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
-
- } while (n_pages_purged);
-
- srv_main_thread_op_info = "flushing buffer pool pages";
-
- /* Flush a few oldest pages to make a new checkpoint younger */
-
- if (buf_get_modified_ratio_pct() > 70) {
-
- /* If there are lots of modified pages in the buffer pool
- (> 70 %), we assume we can afford reserving the disk(s) for
- the time it requires to flush 100 pages */
-
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- IB_ULONGLONG_MAX);
- } else {
- /* Otherwise, we only flush a small number of pages so that
- we do not unnecessarily use much disk i/o capacity from
- other work */
-
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(10),
- IB_ULONGLONG_MAX);
- }
-
- srv_main_thread_op_info = "making checkpoint";
-
- /* Make a new checkpoint about once in 10 seconds */
-
- log_checkpoint(TRUE, FALSE);
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
-
- /* ---- When there is database activity, we jump from here back to
- the start of loop */
-
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
-
- mutex_exit(&kernel_mutex);
-
- /* If the database is quiet, we enter the background loop */
-
- /*****************************************************************/
-background_loop:
- /* ---- In this loop we run background operations when the server
- is quiet from user activity. Also in the case of a shutdown, we
- loop here, flushing the buffer pool to the data files. */
-
- /* The server has been quiet for a while: start running background
- operations */
- srv_main_background_loops++;
- srv_main_thread_op_info = "doing background drop tables";
-
- n_tables_to_drop = row_drop_tables_for_mysql_in_background();
-
- if (n_tables_to_drop > 0) {
- /* Do not monopolize the CPU even if there are tables waiting
- in the background drop queue. (It is essentially a bug if
- MySQL tries to drop a table while there are still open handles
- to it and we had to put it to the background drop queue.) */
-
- os_thread_sleep(100000);
- }
-
- srv_main_thread_op_info = "purging";
-
- /* Run a full purge */
- do {
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- break;
- }
-
- srv_main_thread_op_info = "purging";
- n_pages_purged = trx_purge();
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
-
- } while (n_pages_purged);
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- srv_main_thread_op_info = "doing insert buffer merge";
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- n_bytes_merged = 0;
- } else {
- /* This should do an amount of IO similar to the number of
- dirty pages that will be flushed in the call to
- buf_flush_batch below. Otherwise, the system favors
- clean pages over cleanup throughput. */
- n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
- PCT_IO(100));
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
-flush_loop:
- srv_main_thread_op_info = "flushing buffer pool pages";
- srv_main_flush_loops++;
- if (srv_fast_shutdown < 2) {
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- IB_ULONGLONG_MAX);
- } else {
- /* In the fastest shutdown we do not flush the buffer pool
- to data files: we set n_pages_flushed to 0 artificially. */
-
- n_pages_flushed = 0;
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- srv_main_thread_op_info = "waiting for buffer pool flush to end";
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
-
- srv_main_thread_op_info = "making checkpoint";
-
- log_checkpoint(TRUE, FALSE);
-
- if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
-
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
-
- goto flush_loop;
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
- /*
- srv_main_thread_op_info = "archiving log (if log archive is on)";
-
- log_archive_do(FALSE, &n_bytes_archived);
- */
- n_bytes_archived = 0;
-
- /* Keep looping in the background loop if still work to do */
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- if (n_tables_to_drop + n_pages_flushed
- + n_bytes_archived != 0) {
-
- /* If we are doing a fast shutdown (= the default)
- we do not do purge or insert buffer merge. But we
- flush the buffer pool completely to disk.
- In a 'very fast' shutdown we do not flush the buffer
- pool to data files: we have set n_pages_flushed to
- 0 artificially. */
-
- goto background_loop;
- }
- } else if (n_tables_to_drop
- + n_pages_purged + n_bytes_merged + n_pages_flushed
- + n_bytes_archived != 0) {
- /* In a 'slow' shutdown we run purge and the insert buffer
- merge to completion */
-
- goto background_loop;
- }
-
- /* There is no work for background operations either: suspend
- master thread to wait for more server activity */
-
-suspend_thread:
- srv_main_thread_op_info = "suspending";
-
- mutex_enter(&kernel_mutex);
-
- if (row_get_background_drop_list_len_low() > 0) {
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- event = srv_suspend_thread();
-
- mutex_exit(&kernel_mutex);
-
- /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
- waits for database activity to die down when converting < 4.1.x
- databases, and relies on this string being exactly as it is. InnoDB
- manual also mentions this string in several places. */
- srv_main_thread_op_info = "waiting for server activity";
-
- os_event_wait(event);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- /* This is only extra safety, the thread should exit
- already when the event wait ends */
-
- os_thread_exit(NULL);
- }
-
- /* When there is user activity, InnoDB will set the event and the
- main thread goes back to loop. */
-
- goto loop;
-
- OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
-}
diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c
deleted file mode 100644
index d5f6120ca31..00000000000
--- a/storage/innodb_plugin/srv/srv0start.c
+++ /dev/null
@@ -1,2071 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
-
-/********************************************************************//**
-@file srv/srv0start.c
-Starts the InnoDB database server
-
-Created 2/16/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0mem.h"
-#include "mem0mem.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "dict0dict.h"
-#include "buf0buf.h"
-#include "os0file.h"
-#include "os0thread.h"
-#include "fil0fil.h"
-#include "fsp0fsp.h"
-#include "rem0rec.h"
-#include "mtr0mtr.h"
-#include "log0log.h"
-#include "log0recv.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "trx0trx.h"
-#include "trx0sys.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "rem0rec.h"
-#include "ibuf0ibuf.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#ifndef UNIV_HOTBACKUP
-# include "os0proc.h"
-# include "sync0sync.h"
-# include "buf0flu.h"
-# include "buf0rea.h"
-# include "dict0boot.h"
-# include "dict0load.h"
-# include "que0que.h"
-# include "usr0sess.h"
-# include "lock0lock.h"
-# include "trx0roll.h"
-# include "trx0purge.h"
-# include "lock0lock.h"
-# include "pars0pars.h"
-# include "btr0sea.h"
-# include "rem0cmp.h"
-# include "dict0crea.h"
-# include "row0ins.h"
-# include "row0sel.h"
-# include "row0upd.h"
-# include "row0row.h"
-# include "row0mysql.h"
-# include "btr0pcur.h"
-# include "thr0loc.h"
-# include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
-
-/** Log sequence number immediately after startup */
-UNIV_INTERN ib_uint64_t srv_start_lsn;
-/** Log sequence number at shutdown */
-UNIV_INTERN ib_uint64_t srv_shutdown_lsn;
-
-#ifdef HAVE_DARWIN_THREADS
-# include <sys/utsname.h>
-/** TRUE if the F_FULLFSYNC option is available */
-UNIV_INTERN ibool srv_have_fullfsync = FALSE;
-#endif
-
-/** TRUE if a raw partition is in use */
-UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE;
-
-/** TRUE if the server is being started, before rolling back any
-incomplete transactions */
-UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE;
-/** TRUE if the server is being started */
-UNIV_INTERN ibool srv_is_being_started = FALSE;
-/** TRUE if the server was successfully started */
-UNIV_INTERN ibool srv_was_started = FALSE;
-/** TRUE if innobase_start_or_create_for_mysql() has been called */
-static ibool srv_start_has_been_called = FALSE;
-
-/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
-SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
-
-/** Files comprising the system tablespace */
-static os_file_t files[1000];
-
-/** Mutex protecting the ios count */
-static mutex_t ios_mutex;
-/** Count of I/O operations in io_handler_thread() */
-static ulint ios;
-
-/** io_handler_thread parameters for thread identification */
-static ulint n[SRV_MAX_N_IO_THREADS + 5];
-/** io_handler_thread identifiers */
-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5];
-
-/** We use this mutex to test the return value of pthread_mutex_trylock
- on successful locking. HP-UX does NOT return 0, though Linux et al do. */
-static os_fast_mutex_t srv_os_test_mutex;
-
-/** Name of srv_monitor_file */
-static char* srv_monitor_file_name;
-#endif /* !UNIV_HOTBACKUP */
-
-/** */
-#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
-#define SRV_MAX_N_PENDING_SYNC_IOS 100
-
-
-/*********************************************************************//**
-Convert a numeric string that optionally ends in G or M, to a number
-containing megabytes.
-@return next character in string */
-static
-char*
-srv_parse_megabytes(
-/*================*/
- char* str, /*!< in: string containing a quantity in bytes */
- ulint* megs) /*!< out: the number in megabytes */
-{
- char* endp;
- ulint size;
-
- size = strtoul(str, &endp, 10);
-
- str = endp;
-
- switch (*str) {
- case 'G': case 'g':
- size *= 1024;
- /* fall through */
- case 'M': case 'm':
- str++;
- break;
- default:
- size /= 1024 * 1024;
- break;
- }
-
- *megs = size;
- return(str);
-}
-
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- char* str) /*!< in/out: the data file path string */
-{
- char* input_str;
- char* path;
- ulint size;
- ulint i = 0;
-
- srv_auto_extend_last_data_file = FALSE;
- srv_last_file_size_max = 0;
- srv_data_file_names = NULL;
- srv_data_file_sizes = NULL;
- srv_data_file_is_raw_partition = NULL;
-
- input_str = str;
-
- /* First calculate the number of data files and check syntax:
- path:size[M | G];path:size[M | G]... . Note that a Windows path may
- contain a drive name and a ':'. */
-
- while (*str != '\0') {
- path = str;
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == '\0') {
- return(FALSE);
- }
-
- str++;
-
- str = srv_parse_megabytes(str, &size);
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(str, &size);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (size == 0) {
- return(FALSE);
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i == 0) {
- /* If innodb_data_file_path was defined it must contain
- at least one data file definition */
-
- return(FALSE);
- }
-
- srv_data_file_names = malloc(i * sizeof *srv_data_file_names);
- srv_data_file_sizes = malloc(i * sizeof *srv_data_file_sizes);
- srv_data_file_is_raw_partition = malloc(
- i * sizeof *srv_data_file_is_raw_partition);
-
- srv_n_data_files = i;
-
- /* Then store the actual values to our arrays */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- /* Note that we must step over the ':' in a Windows path;
- a Windows path normally looks like C:\ibdata\ibdata1:1G, but
- a Windows raw partition may have a specification like
- \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == ':') {
- /* Make path a null-terminated string */
- *str = '\0';
- str++;
- }
-
- str = srv_parse_megabytes(str, &size);
-
- srv_data_file_names[i] = path;
- srv_data_file_sizes[i] = size;
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- srv_auto_extend_last_data_file = TRUE;
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(
- str, &srv_last_file_size_max);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- (srv_data_file_is_raw_partition)[i] = 0;
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- (srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
-
- if ((srv_data_file_is_raw_partition)[i] == 0) {
- (srv_data_file_is_raw_partition)[i] = SRV_OLD_RAW;
- }
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Reads log group home directories from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
- char* str) /*!< in/out: character string */
-{
- char* input_str;
- char* path;
- ulint i = 0;
-
- srv_log_group_home_dirs = NULL;
-
- input_str = str;
-
- /* First calculate the number of directories and check syntax:
- path;path;... */
-
- while (*str != '\0') {
- path = str;
-
- while (*str != ';' && *str != '\0') {
- str++;
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i != 1) {
- /* If innodb_log_group_home_dir was defined it must
- contain exactly one path definition under current MySQL */
-
- return(FALSE);
- }
-
- srv_log_group_home_dirs = malloc(i * sizeof *srv_log_group_home_dirs);
-
- /* Then store the actual values to our array */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- while (*str != ';' && *str != '\0') {
- str++;
- }
-
- if (*str == ';') {
- *str = '\0';
- str++;
- }
-
- srv_log_group_home_dirs[i] = path;
-
- i++;
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
-and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
-void
-srv_free_paths_and_sizes(void)
-/*==========================*/
-{
- free(srv_data_file_names);
- srv_data_file_names = NULL;
- free(srv_data_file_sizes);
- srv_data_file_sizes = NULL;
- free(srv_data_file_is_raw_partition);
- srv_data_file_is_raw_partition = NULL;
- free(srv_log_group_home_dirs);
- srv_log_group_home_dirs = NULL;
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-I/o-handler thread function.
-@return OS_THREAD_DUMMY_RETURN */
-static
-os_thread_ret_t
-io_handler_thread(
-/*==============*/
- void* arg) /*!< in: pointer to the number of the segment in
- the aio array */
-{
- ulint segment;
- ulint i;
-
- segment = *((ulint*)arg);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- for (i = 0;; i++) {
- fil_aio_wait(segment);
-
- mutex_enter(&ios_mutex);
- ios++;
- mutex_exit(&ios_mutex);
- }
-
- thr_local_free(os_thread_get_curr_id());
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit.
- The thread actually never comes here because it is exited in an
- os_event_wait(). */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR '\\'
-#else
-#define SRV_PATH_SEPARATOR '/'
-#endif
-
-/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str __attribute__((unused))) /*!< in/out: null-terminated
- character string */
-{
-#ifdef __WIN__
- for (; *str; str++) {
-
- if (*str == '/') {
- *str = '\\';
- }
- }
-#endif
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Calculates the low 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes.
-@return low 32 bytes of file size when expressed in bytes */
-static
-ulint
-srv_calc_low32(
-/*===========*/
- ulint file_size) /*!< in: file size in database pages */
-{
- return(0xFFFFFFFFUL & (file_size << UNIV_PAGE_SIZE_SHIFT));
-}
-
-/*********************************************************************//**
-Calculates the high 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes.
-@return high 32 bytes of file size when expressed in bytes */
-static
-ulint
-srv_calc_high32(
-/*============*/
- ulint file_size) /*!< in: file size in database pages */
-{
- return(file_size >> (32 - UNIV_PAGE_SIZE_SHIFT));
-}
-
-/*********************************************************************//**
-Creates or opens the log files and closes them.
-@return DB_SUCCESS or error code */
-static
-ulint
-open_or_create_log_file(
-/*====================*/
- ibool create_new_db, /*!< in: TRUE if we should create a
- new database */
- ibool* log_file_created, /*!< out: TRUE if new log file
- created */
- ibool log_file_has_been_opened,/*!< in: TRUE if a log file has been
- opened before: then it is an error
- to try to create another log file */
- ulint k, /*!< in: log group number */
- ulint i) /*!< in: log file number in group */
-{
- ibool ret;
- ulint size;
- ulint size_high;
- char name[10000];
- ulint dirnamelen;
-
- UT_NOT_USED(create_new_db);
-
- *log_file_created = FALSE;
-
- srv_normalize_path_for_win(srv_log_group_home_dirs[k]);
-
- dirnamelen = strlen(srv_log_group_home_dirs[k]);
- ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
- memcpy(name, srv_log_group_home_dirs[k], dirnamelen);
-
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- sprintf(name + dirnamelen, "%s%lu", "ib_logfile", (ulong) i);
-
- files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL,
- OS_LOG_FILE, &ret);
- if (ret == FALSE) {
- if (os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have errno set
- to 0 here, which causes our function to return 100;
- work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n", name);
-
- return(DB_ERROR);
- }
-
- files[i] = os_file_create(name, OS_FILE_OPEN, OS_FILE_AIO,
- OS_LOG_FILE, &ret);
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
-
- return(DB_ERROR);
- }
-
- ret = os_file_get_size(files[i], &size, &size_high);
- ut_a(ret);
-
- if (size != srv_calc_low32(srv_log_file_size)
- || size_high != srv_calc_high32(srv_log_file_size)) {
-
- fprintf(stderr,
- "InnoDB: Error: log file %s is"
- " of different size %lu %lu bytes\n"
- "InnoDB: than specified in the .cnf"
- " file %lu %lu bytes!\n",
- name, (ulong) size_high, (ulong) size,
- (ulong) srv_calc_high32(srv_log_file_size),
- (ulong) srv_calc_low32(srv_log_file_size));
-
- return(DB_ERROR);
- }
- } else {
- *log_file_created = TRUE;
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Log file %s did not exist:"
- " new to be created\n",
- name);
- if (log_file_has_been_opened) {
-
- return(DB_ERROR);
- }
-
- fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n",
- name, (ulong) srv_log_file_size
- >> (20 - UNIV_PAGE_SIZE_SHIFT));
-
- fprintf(stderr,
- "InnoDB: Database physically writes the file"
- " full: wait...\n");
-
- ret = os_file_set_size(name, files[i],
- srv_calc_low32(srv_log_file_size),
- srv_calc_high32(srv_log_file_size));
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n",
- name);
-
- return(DB_ERROR);
- }
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- if (i == 0) {
- /* Create in memory the file space object
- which is for this log group */
-
- fil_space_create(name,
- 2 * k + SRV_LOG_SPACE_FIRST_ID, 0, FIL_LOG);
- }
-
- ut_a(fil_validate());
-
- fil_node_create(name, srv_log_file_size,
- 2 * k + SRV_LOG_SPACE_FIRST_ID, FALSE);
-#ifdef UNIV_LOG_ARCHIVE
- /* If this is the first log group, create the file space object
- for archived logs.
- Under MySQL, no archiving ever done. */
-
- if (k == 0 && i == 0) {
- arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
-
- fil_space_create("arch_log_space", arch_space_id, 0, FIL_LOG);
- } else {
- arch_space_id = ULINT_UNDEFINED;
- }
-#endif /* UNIV_LOG_ARCHIVE */
- if (i == 0) {
- log_group_init(k, srv_n_log_files,
- srv_log_file_size * UNIV_PAGE_SIZE,
- 2 * k + SRV_LOG_SPACE_FIRST_ID,
- SRV_LOG_SPACE_FIRST_ID + 1); /* dummy arch
- space id */
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Creates or opens database data files and closes them.
-@return DB_SUCCESS or error code */
-static
-ulint
-open_or_create_data_files(
-/*======================*/
- ibool* create_new_db, /*!< out: TRUE if new database should be
- created */
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no,/*!< out: min of archived log
- numbers in data files */
- ulint* max_arch_log_no,/*!< out: max of archived log
- numbers in data files */
-#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t* min_flushed_lsn,/*!< out: min of flushed lsn
- values in data files */
- ib_uint64_t* max_flushed_lsn,/*!< out: max of flushed lsn
- values in data files */
- ulint* sum_of_new_sizes)/*!< out: sum of sizes of the
- new files added */
-{
- ibool ret;
- ulint i;
- ibool one_opened = FALSE;
- ibool one_created = FALSE;
- ulint size;
- ulint size_high;
- ulint rounded_size_pages;
- char name[10000];
-
- if (srv_n_data_files >= 1000) {
- fprintf(stderr, "InnoDB: can only have < 1000 data files\n"
- "InnoDB: you have defined %lu\n",
- (ulong) srv_n_data_files);
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes = 0;
-
- *create_new_db = FALSE;
-
- srv_normalize_path_for_win(srv_data_home);
-
- for (i = 0; i < srv_n_data_files; i++) {
- ulint dirnamelen;
-
- srv_normalize_path_for_win(srv_data_file_names[i]);
- dirnamelen = strlen(srv_data_home);
-
- ut_a(dirnamelen + strlen(srv_data_file_names[i])
- < (sizeof name) - 1);
- memcpy(name, srv_data_home, dirnamelen);
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- strcpy(name + dirnamelen, srv_data_file_names[i]);
-
- if (srv_data_file_is_raw_partition[i] == 0) {
-
- /* First we try to create the file: if it already
- exists, ret will get value FALSE */
-
- files[i] = os_file_create(name, OS_FILE_CREATE,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
-
- if (ret == FALSE && os_file_get_last_error(FALSE)
- != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our function
- to return 100; work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n",
- name);
-
- return(DB_ERROR);
- }
- } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
- /* The partition is opened, not created; then it is
- written over */
-
- srv_start_raw_disk_in_use = TRUE;
- srv_created_new_raw = TRUE;
-
- files[i] = os_file_create(name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
-
- return(DB_ERROR);
- }
- } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- srv_start_raw_disk_in_use = TRUE;
-
- ret = FALSE;
- } else {
- ut_a(0);
- }
-
- if (ret == FALSE) {
- /* We open the data file */
-
- if (one_created) {
- fprintf(stderr,
- "InnoDB: Error: data files can only"
- " be added at the end\n");
- fprintf(stderr,
- "InnoDB: of a tablespace, but"
- " data file %s existed beforehand.\n",
- name);
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- files[i] = os_file_create(
- name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
- } else if (i == 0) {
- files[i] = os_file_create(
- name, OS_FILE_OPEN_RETRY,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
- } else {
- files[i] = os_file_create(
- name, OS_FILE_OPEN, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
- }
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
- os_file_get_last_error(TRUE);
-
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
-
- goto skip_size_check;
- }
-
- ret = os_file_get_size(files[i], &size, &size_high);
- ut_a(ret);
- /* Round size downward to megabytes */
-
- rounded_size_pages
- = (size / (1024 * 1024) + 4096 * size_high)
- << (20 - UNIV_PAGE_SIZE_SHIFT);
-
- if (i == srv_n_data_files - 1
- && srv_auto_extend_last_data_file) {
-
- if (srv_data_file_sizes[i] > rounded_size_pages
- || (srv_last_file_size_max > 0
- && srv_last_file_size_max
- < rounded_size_pages)) {
-
- fprintf(stderr,
- "InnoDB: Error: auto-extending"
- " data file %s is"
- " of a different size\n"
- "InnoDB: %lu pages (rounded"
- " down to MB) than specified"
- " in the .cnf file:\n"
- "InnoDB: initial %lu pages,"
- " max %lu (relevant if"
- " non-zero) pages!\n",
- name,
- (ulong) rounded_size_pages,
- (ulong) srv_data_file_sizes[i],
- (ulong)
- srv_last_file_size_max);
-
- return(DB_ERROR);
- }
-
- srv_data_file_sizes[i] = rounded_size_pages;
- }
-
- if (rounded_size_pages != srv_data_file_sizes[i]) {
-
- fprintf(stderr,
- "InnoDB: Error: data file %s"
- " is of a different size\n"
- "InnoDB: %lu pages"
- " (rounded down to MB)\n"
- "InnoDB: than specified"
- " in the .cnf file %lu pages!\n",
- name,
- (ulong) rounded_size_pages,
- (ulong) srv_data_file_sizes[i]);
-
- return(DB_ERROR);
- }
-skip_size_check:
- fil_read_flushed_lsn_and_arch_log_no(
- files[i], one_opened,
-#ifdef UNIV_LOG_ARCHIVE
- min_arch_log_no, max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- min_flushed_lsn, max_flushed_lsn);
- one_opened = TRUE;
- } else {
- /* We created the data file and now write it full of
- zeros */
-
- one_created = TRUE;
-
- if (i > 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Data file %s did not"
- " exist: new to be created\n",
- name);
- } else {
- fprintf(stderr,
- "InnoDB: The first specified"
- " data file %s did not exist:\n"
- "InnoDB: a new database"
- " to be created!\n", name);
- *create_new_db = TRUE;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Setting file %s size to %lu MB\n",
- name,
- (ulong) (srv_data_file_sizes[i]
- >> (20 - UNIV_PAGE_SIZE_SHIFT)));
-
- fprintf(stderr,
- "InnoDB: Database physically writes the"
- " file full: wait...\n");
-
- ret = os_file_set_size(
- name, files[i],
- srv_calc_low32(srv_data_file_sizes[i]),
- srv_calc_high32(srv_data_file_sizes[i]));
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n", name);
-
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes = *sum_of_new_sizes
- + srv_data_file_sizes[i];
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- if (i == 0) {
- fil_space_create(name, 0, 0, FIL_TABLESPACE);
- }
-
- ut_a(fil_validate());
-
- fil_node_create(name, srv_data_file_sizes[i], 0,
- srv_data_file_is_raw_partition[i] != 0);
- }
-
- ios = 0;
-
- mutex_create(&ios_mutex, SYNC_NO_ORDER_CHECK);
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************
-Starts InnoDB and creates a new database if database files
-are not found and the user wants.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-int
-innobase_start_or_create_for_mysql(void)
-/*====================================*/
-{
- buf_pool_t* ret;
- ibool create_new_db;
- ibool log_file_created;
- ibool log_created = FALSE;
- ibool log_opened = FALSE;
- ib_uint64_t min_flushed_lsn;
- ib_uint64_t max_flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- ulint min_arch_log_no;
- ulint max_arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
- ulint sum_of_new_sizes;
- ulint sum_of_data_file_sizes;
- ulint tablespace_size_in_header;
- ulint err;
- ulint i;
- ulint io_limit;
- my_bool srv_file_per_table_original_value
- = srv_file_per_table;
- mtr_t mtr;
-#ifdef HAVE_DARWIN_THREADS
-# ifdef F_FULLFSYNC
- /* This executable has been compiled on Mac OS X 10.3 or later.
- Assume that F_FULLFSYNC is available at run-time. */
- srv_have_fullfsync = TRUE;
-# else /* F_FULLFSYNC */
- /* This executable has been compiled on Mac OS X 10.2
- or earlier. Determine if the executable is running
- on Mac OS X 10.3 or later. */
- struct utsname utsname;
- if (uname(&utsname)) {
- fputs("InnoDB: cannot determine Mac OS X version!\n", stderr);
- } else {
- srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
- }
- if (!srv_have_fullfsync) {
- fputs("InnoDB: On Mac OS X, fsync() may be"
- " broken on internal drives,\n"
- "InnoDB: making transactions unsafe!\n", stderr);
- }
-# endif /* F_FULLFSYNC */
-#endif /* HAVE_DARWIN_THREADS */
-
- if (sizeof(ulint) != sizeof(void*)) {
- fprintf(stderr,
- "InnoDB: Error: size of InnoDB's ulint is %lu,"
- " but size of void* is %lu.\n"
- "InnoDB: The sizes should be the same"
- " so that on a 64-bit platform you can\n"
- "InnoDB: allocate more than 4 GB of memory.",
- (ulong)sizeof(ulint), (ulong)sizeof(void*));
- }
-
- /* System tables are created in tablespace 0. Thus, we must
- temporarily clear srv_file_per_table. This is ok, because the
- server will not accept connections (which could modify
- innodb_file_per_table) until this function has returned. */
- srv_file_per_table = FALSE;
-#ifdef UNIV_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_IBUF_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n"
- "InnoDB: Crash recovery will fail with UNIV_IBUF_DEBUG\n");
-#endif
-
-#ifdef UNIV_SYNC_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_SEARCH_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_LOG_LSN_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
-#endif /* UNIV_LOG_LSN_DEBUG */
-#ifdef UNIV_MEM_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
-#endif
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- fprintf(stderr,
- "InnoDB: The InnoDB memory heap is disabled\n");
- }
-
- fprintf(stderr, "InnoDB: %s\n", IB_ATOMICS_STARTUP_MSG);
-
- /* Since InnoDB does not currently clean up all its internal data
- structures in MySQL Embedded Server Library server_end(), we
- print an error message if someone tries to start up InnoDB a
- second time during the process lifetime. */
-
- if (srv_start_has_been_called) {
- fprintf(stderr,
- "InnoDB: Error: startup called second time"
- " during the process lifetime.\n"
- "InnoDB: In the MySQL Embedded Server Library"
- " you cannot call server_init()\n"
- "InnoDB: more than once during"
- " the process lifetime.\n");
- }
-
- srv_start_has_been_called = TRUE;
-
-#ifdef UNIV_DEBUG
- log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
- /* yydebug = TRUE; */
-
- srv_is_being_started = TRUE;
- srv_startup_is_before_trx_rollback_phase = TRUE;
- os_aio_use_native_aio = FALSE;
-
-#ifdef __WIN__
- switch (os_get_os_version()) {
- case OS_WIN95:
- case OS_WIN31:
- case OS_WINNT:
- /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
- and NT use simulated aio. In NT Windows provides async i/o,
- but when run in conjunction with InnoDB Hot Backup, it seemed
- to corrupt the data files. */
-
- os_aio_use_native_aio = FALSE;
- break;
- default:
- /* On Win 2000 and XP use async i/o */
- os_aio_use_native_aio = TRUE;
- break;
- }
-#endif
- if (srv_file_flush_method_str == NULL) {
- /* These are the default options */
-
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#ifndef __WIN__
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
- srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
- srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
-#else
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
- srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
- os_aio_use_native_aio = FALSE;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- os_aio_use_native_aio = FALSE;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str,
- "async_unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#endif
- } else {
- fprintf(stderr,
- "InnoDB: Unrecognized value %s for"
- " innodb_flush_method\n",
- srv_file_flush_method_str);
- return(DB_ERROR);
- }
-
- /* Note that the call srv_boot() also changes the values of
- some variables to the units used by InnoDB internally */
-
- /* Set the maximum number of threads which can wait for a semaphore
- inside InnoDB: this is the 'sync wait array' size, as well as the
- maximum number of threads that can wait in the 'srv_conc array' for
- their time to enter InnoDB. */
-
-#if defined(__NETWARE__)
-
- /* Create less event semaphores because Win 98/ME had
- difficulty creating 40000 event semaphores. Comment from
- Novell, Inc.: also, these just take a lot of memory on
- NetWare. */
- srv_max_n_threads = 1000;
-#else
- if (srv_buf_pool_size >= 1000 * 1024 * 1024) {
- /* If buffer pool is less than 1000 MB,
- assume fewer threads. */
- srv_max_n_threads = 50000;
-
- } else if (srv_buf_pool_size >= 8 * 1024 * 1024) {
-
- srv_max_n_threads = 10000;
- } else {
- srv_max_n_threads = 1000; /* saves several MB of memory,
- especially in 64-bit
- computers */
- }
-#endif
- err = srv_boot();
-
- if (err != DB_SUCCESS) {
-
- return((int) err);
- }
-
- mutex_create(&srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
-
- if (srv_innodb_status) {
- srv_monitor_file_name = mem_alloc(
- strlen(fil_path_to_mysql_datadir)
- + 20 + sizeof "/innodb_status.");
- sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
- fil_path_to_mysql_datadir, os_proc_get_number());
- srv_monitor_file = fopen(srv_monitor_file_name, "w+");
- if (!srv_monitor_file) {
- fprintf(stderr, "InnoDB: unable to create %s: %s\n",
- srv_monitor_file_name, strerror(errno));
- return(DB_ERROR);
- }
- } else {
- srv_monitor_file_name = NULL;
- srv_monitor_file = os_file_create_tmpfile();
- if (!srv_monitor_file) {
- return(DB_ERROR);
- }
- }
-
- mutex_create(&srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
-
- srv_dict_tmpfile = os_file_create_tmpfile();
- if (!srv_dict_tmpfile) {
- return(DB_ERROR);
- }
-
- mutex_create(&srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
-
- srv_misc_tmpfile = os_file_create_tmpfile();
- if (!srv_misc_tmpfile) {
- return(DB_ERROR);
- }
-
- /* If user has set the value of innodb_file_io_threads then
- we'll emit a message telling the user that this parameter
- is now deprecated. */
- if (srv_n_file_io_threads != 4) {
- fprintf(stderr, "InnoDB: Warning:"
- " innodb_file_io_threads is deprecated."
- " Please use innodb_read_io_threads and"
- " innodb_write_io_threads instead\n");
- }
-
- /* Now overwrite the value on srv_n_file_io_threads */
- srv_n_file_io_threads = 2 + srv_n_read_io_threads
- + srv_n_write_io_threads;
-
- ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
-
- /* TODO: Investigate if SRV_N_PENDING_IOS_PER_THREAD (32) limit
- still applies to windows. */
- if (!os_aio_use_native_aio) {
- io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
- } else {
- io_limit = SRV_N_PENDING_IOS_PER_THREAD;
- }
-
- os_aio_init(io_limit,
- srv_n_read_io_threads,
- srv_n_write_io_threads,
- SRV_MAX_N_PENDING_SYNC_IOS);
-
- fil_init(srv_file_per_table ? 50000 : 5000,
- srv_max_n_open_files);
-
- ret = buf_pool_init();
-
- if (ret == NULL) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot allocate the memory"
- " for the buffer pool\n");
-
- return(DB_ERROR);
- }
-
-#ifdef UNIV_DEBUG
- /* We have observed deadlocks with a 5MB buffer pool but
- the actual lower limit could very well be a little higher. */
-
- if (srv_buf_pool_size <= 5 * 1024 * 1024) {
-
- fprintf(stderr, "InnoDB: Warning: Small buffer pool size "
- "(%luM), the flst_validate() debug function "
- "can cause a deadlock if the buffer pool fills up.\n",
- srv_buf_pool_size / 1024 / 1024);
- }
-#endif
-
- fsp_init();
- log_init();
-
- lock_sys_create(srv_lock_table_size);
-
- /* Create i/o-handler threads: */
-
- for (i = 0; i < srv_n_file_io_threads; i++) {
- n[i] = i;
-
- os_thread_create(io_handler_thread, n + i, thread_ids + i);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) {
- fprintf(stderr,
- "InnoDB: Error: you must set the log group"
- " home dir in my.cnf the\n"
- "InnoDB: same as log arch dir.\n");
-
- return(DB_ERROR);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (srv_n_log_files * srv_log_file_size >= 262144) {
- fprintf(stderr,
- "InnoDB: Error: combined size of log files"
- " must be < 4 GB\n");
-
- return(DB_ERROR);
- }
-
- sum_of_new_sizes = 0;
-
- for (i = 0; i < srv_n_data_files; i++) {
-#ifndef __WIN__
- if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= 262144) {
- fprintf(stderr,
- "InnoDB: Error: file size must be < 4 GB"
- " with this MySQL binary\n"
- "InnoDB: and operating system combination,"
- " in some OS's < 2 GB\n");
-
- return(DB_ERROR);
- }
-#endif
- sum_of_new_sizes += srv_data_file_sizes[i];
- }
-
- if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: tablespace size must be"
- " at least 10 MB\n");
-
- return(DB_ERROR);
- }
-
- err = open_or_create_data_files(&create_new_db,
-#ifdef UNIV_LOG_ARCHIVE
- &min_arch_log_no, &max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- &min_flushed_lsn, &max_flushed_lsn,
- &sum_of_new_sizes);
- if (err != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Could not open or create data files.\n"
- "InnoDB: If you tried to add new data files,"
- " and it failed here,\n"
- "InnoDB: you should now edit innodb_data_file_path"
- " in my.cnf back\n"
- "InnoDB: to what it was, and remove the"
- " new ibdata files InnoDB created\n"
- "InnoDB: in this failed attempt. InnoDB only wrote"
- " those files full of\n"
- "InnoDB: zeros, but did not yet use them in any way."
- " But be careful: do not\n"
- "InnoDB: remove old data files"
- " which contain your precious data!\n");
-
- return((int) err);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- srv_normalize_path_for_win(srv_arch_dir);
- srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < srv_n_log_files; i++) {
- err = open_or_create_log_file(create_new_db, &log_file_created,
- log_opened, 0, i);
- if (err != DB_SUCCESS) {
-
- return((int) err);
- }
-
- if (log_file_created) {
- log_created = TRUE;
- } else {
- log_opened = TRUE;
- }
- if ((log_opened && create_new_db)
- || (log_opened && log_created)) {
- fprintf(stderr,
- "InnoDB: Error: all log files must be"
- " created at the same time.\n"
- "InnoDB: All log files must be"
- " created also in database creation.\n"
- "InnoDB: If you want bigger or smaller"
- " log files, shut down the\n"
- "InnoDB: database and make sure there"
- " were no errors in shutdown.\n"
- "InnoDB: Then delete the existing log files."
- " Edit the .cnf file\n"
- "InnoDB: and start the database again.\n");
-
- return(DB_ERROR);
- }
- }
-
- /* Open all log files and data files in the system tablespace: we
- keep them open until database shutdown */
-
- fil_open_log_and_system_tablespace_files();
-
- if (log_created && !create_new_db
-#ifdef UNIV_LOG_ARCHIVE
- && !srv_archive_recovery
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
- if (max_flushed_lsn != min_flushed_lsn
-#ifdef UNIV_LOG_ARCHIVE
- || max_arch_log_no != min_arch_log_no
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
- fprintf(stderr,
- "InnoDB: Cannot initialize created"
- " log files because\n"
- "InnoDB: data files were not in sync"
- " with each other\n"
- "InnoDB: or the data files are corrupt.\n");
-
- return(DB_ERROR);
- }
-
- if (max_flushed_lsn < (ib_uint64_t) 1000) {
- fprintf(stderr,
- "InnoDB: Cannot initialize created"
- " log files because\n"
- "InnoDB: data files are corrupt,"
- " or new data files were\n"
- "InnoDB: created when the database"
- " was started previous\n"
- "InnoDB: time but the database"
- " was not shut down\n"
- "InnoDB: normally after that.\n");
-
- return(DB_ERROR);
- }
-
- mutex_enter(&(log_sys->mutex));
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Do not + 1 arch_log_no because we do not use log
- archiving */
- recv_reset_logs(max_flushed_lsn, max_arch_log_no, TRUE);
-#else
- recv_reset_logs(max_flushed_lsn, TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_exit(&(log_sys->mutex));
- }
-
- trx_sys_file_format_init();
-
- if (create_new_db) {
- mtr_start(&mtr);
- fsp_header_init(0, sum_of_new_sizes, &mtr);
-
- mtr_commit(&mtr);
-
- trx_sys_create();
- dict_create();
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
-#ifdef UNIV_LOG_ARCHIVE
- } else if (srv_archive_recovery) {
- fprintf(stderr,
- "InnoDB: Starting archive"
- " recovery from a backup...\n");
- err = recv_recovery_from_archive_start(
- min_flushed_lsn, srv_archive_recovery_limit_lsn,
- min_arch_log_no);
- if (err != DB_SUCCESS) {
-
- return(DB_ERROR);
- }
- /* Since ibuf init is in dict_boot, and ibuf is needed
- in any disk i/o, first call dict_boot */
-
- dict_boot();
- trx_sys_init_at_db_start();
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- /* Initialize the fsp free limit global variable in the log
- system */
- fsp_header_get_free_limit();
-
- recv_recovery_from_archive_finish();
-#endif /* UNIV_LOG_ARCHIVE */
- } else {
-
- /* Check if we support the max format that is stamped
- on the system tablespace.
- Note: We are NOT allowed to make any modifications to
- the TRX_SYS_PAGE_NO page before recovery because this
- page also contains the max_trx_id etc. important system
- variables that are required for recovery. We need to
- ensure that we return the system to a state where normal
- recovery is guaranteed to work. We do this by
- invalidating the buffer cache, this will force the
- reread of the page and restoration to its last known
- consistent state, this is REQUIRED for the recovery
- process to work. */
- err = trx_sys_file_format_max_check(
- srv_check_file_format_at_startup);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Invalidate the buffer pool to ensure that we reread
- the page that we read above, during recovery.
- Note that this is not as heavy weight as it seems. At
- this point there will be only ONE page in the buf_LRU
- and there must be no page in the buf_flush list. */
- buf_pool_invalidate();
-
- /* We always try to do a recovery, even if the database had
- been shut down normally: this is the normal startup path */
-
- err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
- IB_ULONGLONG_MAX,
- min_flushed_lsn,
- max_flushed_lsn);
- if (err != DB_SUCCESS) {
-
- return(DB_ERROR);
- }
-
- /* Since the insert buffer init is in dict_boot, and the
- insert buffer is needed in any disk i/o, first we call
- dict_boot(). Note that trx_sys_init_at_db_start() only needs
- to access space 0, and the insert buffer at this stage already
- works for space 0. */
-
- dict_boot();
- trx_sys_init_at_db_start();
-
- if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
- /* The following call is necessary for the insert
- buffer to work with multiple tablespaces. We must
- know the mapping between space id's and .ibd file
- names.
-
- In a crash recovery, we check that the info in data
- dictionary is consistent with what we already know
- about space id's from the call of
- fil_load_single_table_tablespaces().
-
- In a normal startup, we create the space objects for
- every table in the InnoDB data dictionary that has
- an .ibd file.
-
- We also determine the maximum tablespace id used.
-
- TODO: We may have incomplete transactions in the
- data dictionary tables. Does that harm the scanning of
- the data dictionary below? */
-
- dict_check_tablespaces_and_store_max_id(
- recv_needed_recovery);
- }
-
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- /* Initialize the fsp free limit global variable in the log
- system */
- fsp_header_get_free_limit();
-
- /* recv_recovery_from_checkpoint_finish needs trx lists which
- are initialized in trx_sys_init_at_db_start(). */
-
- recv_recovery_from_checkpoint_finish();
-
- /* It is possible that file_format tag has never
- been set. In this case we initialize it to minimum
- value. Important to note that we can do it ONLY after
- we have finished the recovery process so that the
- image of TRX_SYS_PAGE_NO is not stale. */
- trx_sys_file_format_tag_init();
- }
-
- if (!create_new_db && sum_of_new_sizes > 0) {
- /* New data file(s) were added */
- mtr_start(&mtr);
-
- fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
-
- mtr_commit(&mtr);
-
- /* Immediately write the log record about increased tablespace
- size to disk, so that it is durable even if mysqld would crash
- quickly */
-
- log_buffer_flush_to_disk();
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Archiving is always off under MySQL */
- if (!srv_log_archive_on) {
- ut_a(DB_SUCCESS == log_archive_noarchivelog());
- } else {
- mutex_enter(&(log_sys->mutex));
-
- start_archive = FALSE;
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- start_archive = TRUE;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (start_archive) {
- ut_a(DB_SUCCESS == log_archive_archivelog());
- }
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* fprintf(stderr, "Max allowed record size %lu\n",
- page_get_free_space_of_empty() / 2); */
-
- /* Create the thread which watches the timeouts for lock waits
- and prints InnoDB monitor info */
-
- os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL,
- thread_ids + 2 + SRV_MAX_N_IO_THREADS);
-
- /* Create the thread which warns of long semaphore waits */
- os_thread_create(&srv_error_monitor_thread, NULL,
- thread_ids + 3 + SRV_MAX_N_IO_THREADS);
- srv_is_being_started = FALSE;
-
- if (trx_doublewrite == NULL) {
- /* Create the doublewrite buffer to a new tablespace */
-
- trx_sys_create_doublewrite_buf();
- }
-
- err = dict_create_or_check_foreign_constraint_tables();
-
- if (err != DB_SUCCESS) {
- return((int)DB_ERROR);
- }
-
- /* Create the master thread which does purge and other utility
- operations */
-
- os_thread_create(&srv_master_thread, NULL, thread_ids
- + (1 + SRV_MAX_N_IO_THREADS));
-#ifdef UNIV_DEBUG
- /* buf_debug_prints = TRUE; */
-#endif /* UNIV_DEBUG */
- sum_of_data_file_sizes = 0;
-
- for (i = 0; i < srv_n_data_files; i++) {
- sum_of_data_file_sizes += srv_data_file_sizes[i];
- }
-
- tablespace_size_in_header = fsp_header_get_tablespace_size();
-
- if (!srv_auto_extend_last_data_file
- && sum_of_data_file_sizes != tablespace_size_in_header) {
-
- fprintf(stderr,
- "InnoDB: Error: tablespace size"
- " stored in header is %lu pages, but\n"
- "InnoDB: the sum of data file sizes is %lu pages\n",
- (ulong) tablespace_size_in_header,
- (ulong) sum_of_data_file_sizes);
-
- if (srv_force_recovery == 0
- && sum_of_data_file_sizes < tablespace_size_in_header) {
- /* This is a fatal error, the tail of a tablespace is
- missing */
-
- fprintf(stderr,
- "InnoDB: Cannot start InnoDB."
- " The tail of the system tablespace is\n"
- "InnoDB: missing. Have you edited"
- " innodb_data_file_path in my.cnf in an\n"
- "InnoDB: inappropriate way, removing"
- " ibdata files from there?\n"
- "InnoDB: You can set innodb_force_recovery=1"
- " in my.cnf to force\n"
- "InnoDB: a startup if you are trying"
- " to recover a badly corrupt database.\n");
-
- return(DB_ERROR);
- }
- }
-
- if (srv_auto_extend_last_data_file
- && sum_of_data_file_sizes < tablespace_size_in_header) {
-
- fprintf(stderr,
- "InnoDB: Error: tablespace size stored in header"
- " is %lu pages, but\n"
- "InnoDB: the sum of data file sizes"
- " is only %lu pages\n",
- (ulong) tablespace_size_in_header,
- (ulong) sum_of_data_file_sizes);
-
- if (srv_force_recovery == 0) {
-
- fprintf(stderr,
- "InnoDB: Cannot start InnoDB. The tail of"
- " the system tablespace is\n"
- "InnoDB: missing. Have you edited"
- " innodb_data_file_path in my.cnf in an\n"
- "InnoDB: inappropriate way, removing"
- " ibdata files from there?\n"
- "InnoDB: You can set innodb_force_recovery=1"
- " in my.cnf to force\n"
- "InnoDB: a startup if you are trying to"
- " recover a badly corrupt database.\n");
-
- return(DB_ERROR);
- }
- }
-
- /* Check that os_fast_mutexes work as expected */
- os_fast_mutex_init(&srv_os_test_mutex);
-
- if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
- fprintf(stderr,
- "InnoDB: Error: pthread_mutex_trylock returns"
- " an unexpected value on\n"
- "InnoDB: success! Cannot continue.\n");
- exit(1);
- }
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_lock(&srv_os_test_mutex);
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_free(&srv_os_test_mutex);
-
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB Plugin %s started; "
- "log sequence number %llu\n",
- INNODB_VERSION_STR, srv_start_lsn);
- }
-
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: !!! innodb_force_recovery"
- " is set to %lu !!!\n",
- (ulong) srv_force_recovery);
- }
-
- fflush(stderr);
-
- if (trx_doublewrite_must_reset_space_ids) {
- /* Actually, we did not change the undo log format between
- 4.0 and 4.1.1, and we would not need to run purge to
- completion. Note also that the purge algorithm in 4.1.1
- can process the history list again even after a full
- purge, because our algorithm does not cut the end of the
- history list in all cases so that it would become empty
- after a full purge. That mean that we may purge 4.0 type
- undo log even after this phase.
-
- The insert buffer record format changed between 4.0 and
- 4.1.1. It is essential that the insert buffer is emptied
- here! */
-
- fprintf(stderr,
- "InnoDB: You are upgrading to an"
- " InnoDB version which allows multiple\n"
- "InnoDB: tablespaces. Wait that purge"
- " and insert buffer merge run to\n"
- "InnoDB: completion...\n");
- for (;;) {
- os_thread_sleep(1000000);
-
- if (0 == strcmp(srv_main_thread_op_info,
- "waiting for server activity")) {
-
- ut_a(ibuf_is_empty());
-
- break;
- }
- }
- fprintf(stderr,
- "InnoDB: Full purge and insert buffer merge"
- " completed.\n");
-
- trx_sys_mark_upgraded_to_multiple_tablespaces();
-
- fprintf(stderr,
- "InnoDB: You have now successfully upgraded"
- " to the multiple tablespaces\n"
- "InnoDB: format. You should NOT DOWNGRADE"
- " to an earlier version of\n"
- "InnoDB: InnoDB! But if you absolutely need to"
- " downgrade, see\n"
- "InnoDB: " REFMAN "multiple-tablespaces.html\n"
- "InnoDB: for instructions.\n");
- }
-
- if (srv_force_recovery == 0) {
- /* In the insert buffer we may have even bigger tablespace
- id's, because we may have dropped those tablespaces, but
- insert buffer merge has not had time to clean the records from
- the ibuf tree. */
-
- ibuf_update_max_tablespace_id();
- }
-
- srv_file_per_table = srv_file_per_table_original_value;
-
- srv_was_started = TRUE;
-
- return((int) DB_SUCCESS);
-}
-
-/****************************************************************//**
-Shuts down the InnoDB database.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-int
-innobase_shutdown_for_mysql(void)
-/*=============================*/
-{
- ulint i;
-#ifdef __NETWARE__
- extern ibool panic_shutdown;
-#endif
- if (!srv_was_started) {
- if (srv_is_being_started) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: shutting down"
- " a not properly started\n"
- "InnoDB: or created database!\n");
- }
-
- return(DB_SUCCESS);
- }
-
- /* 1. Flush the buffer pool to disk, write the current lsn to
- the tablespace header(s), and copy all log data to archive.
- The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
- just free data structures after the shutdown. */
-
-
- if (srv_fast_shutdown == 2) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: MySQL has requested a very fast shutdown"
- " without flushing "
- "the InnoDB buffer pool to data files."
- " At the next mysqld startup "
- "InnoDB will do a crash recovery!\n");
- }
-
-#ifdef __NETWARE__
- if (!panic_shutdown)
-#endif
- logs_empty_and_mark_files_at_shutdown();
-
- if (srv_conc_n_threads != 0) {
- fprintf(stderr,
- "InnoDB: Warning: query counter shows %ld queries"
- " still\n"
- "InnoDB: inside InnoDB at shutdown\n",
- srv_conc_n_threads);
- }
-
- /* 2. Make all threads created by InnoDB to exit */
-
- srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
-
- /* In a 'very fast' shutdown, we do not need to wait for these threads
- to die; all which counts is that we flushed the log; a 'very fast'
- shutdown is essentially a crash. */
-
- if (srv_fast_shutdown == 2) {
- return(DB_SUCCESS);
- }
-
- /* All threads end up waiting for certain events. Put those events
- to the signaled state. Then the threads will exit themselves in
- os_thread_event_wait(). */
-
- for (i = 0; i < 1000; i++) {
- /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
- HERE OR EARLIER */
-
- /* a. Let the lock timeout thread exit */
- os_event_set(srv_lock_timeout_thread_event);
-
- /* b. srv error monitor thread exits automatically, no need
- to do anything here */
-
- /* c. We wake the master thread so that it exits */
- srv_wake_master_thread();
-
- /* d. Exit the i/o threads */
-
- os_aio_wake_all_threads_at_shutdown();
-
- os_mutex_enter(os_sync_mutex);
-
- if (os_thread_count == 0) {
- /* All the threads have exited or are just exiting;
- NOTE that the threads may not have completed their
- exit yet. Should we use pthread_join() to make sure
- they have exited? If we did, we would have to
- remove the pthread_detach() from
- os_thread_exit(). Now we just sleep 0.1
- seconds and hope that is enough! */
-
- os_mutex_exit(os_sync_mutex);
-
- os_thread_sleep(100000);
-
- break;
- }
-
- os_mutex_exit(os_sync_mutex);
-
- os_thread_sleep(100000);
- }
-
- if (i == 1000) {
- fprintf(stderr,
- "InnoDB: Warning: %lu threads created by InnoDB"
- " had not exited at shutdown!\n",
- (ulong) os_thread_count);
- }
-
- if (srv_monitor_file) {
- fclose(srv_monitor_file);
- srv_monitor_file = 0;
- if (srv_monitor_file_name) {
- unlink(srv_monitor_file_name);
- mem_free(srv_monitor_file_name);
- }
- }
- if (srv_dict_tmpfile) {
- fclose(srv_dict_tmpfile);
- srv_dict_tmpfile = 0;
- }
-
- if (srv_misc_tmpfile) {
- fclose(srv_misc_tmpfile);
- srv_misc_tmpfile = 0;
- }
-
- /* This must be disabled before closing the buffer pool
- and closing the data dictionary. */
- btr_search_disable();
-
- ibuf_close();
- log_shutdown();
- lock_sys_close();
- thr_local_close();
- trx_sys_file_format_close();
- trx_sys_close();
-
- mutex_free(&srv_monitor_file_mutex);
- mutex_free(&srv_dict_tmpfile_mutex);
- mutex_free(&srv_misc_tmpfile_mutex);
- dict_close();
- btr_search_sys_free();
-
- /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
- them */
- os_aio_free();
- sync_close();
- srv_free();
- fil_close();
-
- /* 4. Free the os_conc_mutex and all os_events and os_mutexes */
-
- os_sync_free();
-
- /* 5. Free all allocated memory */
-
- pars_lexer_close();
- log_mem_free();
- buf_pool_free();
- ut_free_all_mem();
- mem_close();
-
- if (os_thread_count != 0
- || os_event_count != 0
- || os_mutex_count != 0
- || os_fast_mutex_count != 0) {
- fprintf(stderr,
- "InnoDB: Warning: some resources were not"
- " cleaned up in shutdown:\n"
- "InnoDB: threads %lu, events %lu,"
- " os_mutexes %lu, os_fast_mutexes %lu\n",
- (ulong) os_thread_count, (ulong) os_event_count,
- (ulong) os_mutex_count, (ulong) os_fast_mutex_count);
- }
-
- if (dict_foreign_err_file) {
- fclose(dict_foreign_err_file);
- }
- if (lock_latest_err_file) {
- fclose(lock_latest_err_file);
- }
-
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Shutdown completed;"
- " log sequence number %llu\n",
- srv_shutdown_lsn);
- }
-
- srv_was_started = FALSE;
- srv_start_has_been_called = FALSE;
-
- return((int) DB_SUCCESS);
-}
-
-#ifdef __NETWARE__
-void set_panic_flag_for_netware()
-{
- extern ibool panic_shutdown;
- panic_shutdown = TRUE;
-}
-#endif /* __NETWARE__ */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/sync/sync0arr.c b/storage/innodb_plugin/sync/sync0arr.c
deleted file mode 100644
index ed9e25bf2f2..00000000000
--- a/storage/innodb_plugin/sync/sync0arr.c
+++ /dev/null
@@ -1,1022 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file sync/sync0arr.c
-The wait array used in synchronization primitives
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0arr.h"
-#ifdef UNIV_NONINL
-#include "sync0arr.ic"
-#endif
-
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "os0sync.h"
-#include "os0file.h"
-#include "srv0srv.h"
-
-/*
- WAIT ARRAY
- ==========
-
-The wait array consists of cells each of which has an
-an operating system event object created for it. The threads
-waiting for a mutex, for example, can reserve a cell
-in the array and suspend themselves to wait for the event
-to become signaled. When using the wait array, remember to make
-sure that some thread holding the synchronization object
-will eventually know that there is a waiter in the array and
-signal the object, to prevent infinite wait.
-Why we chose to implement a wait array? First, to make
-mutexes fast, we had to code our own implementation of them,
-which only in usually uncommon cases resorts to using
-slow operating system primitives. Then we had the choice of
-assigning a unique OS event for each mutex, which would
-be simpler, or using a global wait array. In some operating systems,
-the global wait array solution is more efficient and flexible,
-because we can do with a very small number of OS events,
-say 200. In NT 3.51, allocating events seems to be a quadratic
-algorithm, because 10 000 events are created fast, but
-100 000 events takes a couple of minutes to create.
-
-As of 5.0.30 the above mentioned design is changed. Since now
-OS can handle millions of wait events efficiently, we no longer
-have this concept of each cell of wait array having one event.
-Instead, now the event that a thread wants to wait on is embedded
-in the wait object (mutex or rw_lock). We still keep the global
-wait array for the sake of diagnostics and also to avoid infinite
-wait The error_monitor thread scans the global wait array to signal
-any waiting threads who have missed the signal. */
-
-/** A cell where an individual thread may wait suspended
-until a resource is released. The suspending is implemented
-using an operating system event semaphore. */
-struct sync_cell_struct {
- void* wait_object; /*!< pointer to the object the
- thread is waiting for; if NULL
- the cell is free for use */
- mutex_t* old_wait_mutex; /*!< the latest wait mutex in cell */
- rw_lock_t* old_wait_rw_lock;
- /*!< the latest wait rw-lock
- in cell */
- ulint request_type; /*!< lock type requested on the
- object */
- const char* file; /*!< in debug version file where
- requested */
- ulint line; /*!< in debug version line where
- requested */
- os_thread_id_t thread; /*!< thread id of this waiting
- thread */
- ibool waiting; /*!< TRUE if the thread has already
- called sync_array_event_wait
- on this cell */
- ib_int64_t signal_count; /*!< We capture the signal_count
- of the wait_object when we
- reset the event. This value is
- then passed on to os_event_wait
- and we wait only if the event
- has not been signalled in the
- period between the reset and
- wait call. */
- time_t reservation_time;/*!< time when the thread reserved
- the wait cell */
-};
-
-/* NOTE: It is allowed for a thread to wait
-for an event allocated for the array without owning the
-protecting mutex (depending on the case: OS or database mutex), but
-all changes (set or reset) to the state of the event must be made
-while owning the mutex. */
-
-/** Synchronization array */
-struct sync_array_struct {
- ulint n_reserved; /*!< number of currently reserved
- cells in the wait array */
- ulint n_cells; /*!< number of cells in the
- wait array */
- sync_cell_t* array; /*!< pointer to wait array */
- ulint protection; /*!< this flag tells which
- mutex protects the data */
- mutex_t mutex; /*!< possible database mutex
- protecting this data structure */
- os_mutex_t os_mutex; /*!< Possible operating system mutex
- protecting the data structure.
- As this data structure is used in
- constructing the database mutex,
- to prevent infinite recursion
- in implementation, we fall back to
- an OS mutex. */
- ulint sg_count; /*!< count of how many times an
- object has been signalled */
- ulint res_count; /*!< count of cell reservations
- since creation of the array */
-};
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores.
-@return TRUE if deadlock detected */
-static
-ibool
-sync_array_detect_deadlock(
-/*=======================*/
- sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /*!< in: cell where recursive search started */
- sync_cell_t* cell, /*!< in: cell to search */
- ulint depth); /*!< in: recursion depth */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*****************************************************************//**
-Gets the nth cell in array.
-@return cell */
-static
-sync_cell_t*
-sync_array_get_nth_cell(
-/*====================*/
- sync_array_t* arr, /*!< in: sync array */
- ulint n) /*!< in: index */
-{
- ut_a(arr);
- ut_a(n < arr->n_cells);
-
- return(arr->array + n);
-}
-
-/******************************************************************//**
-Reserves the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_enter(
-/*=============*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- ulint protection;
-
- protection = arr->protection;
-
- if (protection == SYNC_ARRAY_OS_MUTEX) {
- os_mutex_enter(arr->os_mutex);
- } else if (protection == SYNC_ARRAY_MUTEX) {
- mutex_enter(&(arr->mutex));
- } else {
- ut_error;
- }
-}
-
-/******************************************************************//**
-Releases the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_exit(
-/*============*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- ulint protection;
-
- protection = arr->protection;
-
- if (protection == SYNC_ARRAY_OS_MUTEX) {
- os_mutex_exit(arr->os_mutex);
- } else if (protection == SYNC_ARRAY_MUTEX) {
- mutex_exit(&(arr->mutex));
- } else {
- ut_error;
- }
-}
-
-/*******************************************************************//**
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called.
-@return own: created wait array */
-UNIV_INTERN
-sync_array_t*
-sync_array_create(
-/*==============*/
- ulint n_cells, /*!< in: number of cells in the array
- to create */
- ulint protection) /*!< in: either SYNC_ARRAY_OS_MUTEX or
- SYNC_ARRAY_MUTEX: determines the type
- of mutex protecting the data structure */
-{
- ulint sz;
- sync_array_t* arr;
-
- ut_a(n_cells > 0);
-
- /* Allocate memory for the data structures */
- arr = ut_malloc(sizeof(sync_array_t));
- memset(arr, 0x0, sizeof(*arr));
-
- sz = sizeof(sync_cell_t) * n_cells;
- arr->array = ut_malloc(sz);
- memset(arr->array, 0x0, sz);
-
- arr->n_cells = n_cells;
- arr->protection = protection;
-
- /* Then create the mutex to protect the wait array complex */
- if (protection == SYNC_ARRAY_OS_MUTEX) {
- arr->os_mutex = os_mutex_create(NULL);
- } else if (protection == SYNC_ARRAY_MUTEX) {
- mutex_create(&arr->mutex, SYNC_NO_ORDER_CHECK);
- } else {
- ut_error;
- }
-
- return(arr);
-}
-
-/******************************************************************//**
-Frees the resources in a wait array. */
-UNIV_INTERN
-void
-sync_array_free(
-/*============*/
- sync_array_t* arr) /*!< in, own: sync wait array */
-{
- ulint protection;
-
- ut_a(arr->n_reserved == 0);
-
- sync_array_validate(arr);
-
- protection = arr->protection;
-
- /* Release the mutex protecting the wait array complex */
-
- if (protection == SYNC_ARRAY_OS_MUTEX) {
- os_mutex_free(arr->os_mutex);
- } else if (protection == SYNC_ARRAY_MUTEX) {
- mutex_free(&(arr->mutex));
- } else {
- ut_error;
- }
-
- ut_free(arr->array);
- ut_free(arr);
-}
-
-/********************************************************************//**
-Validates the integrity of the wait array. Checks
-that the number of reserved cells equals the count variable. */
-UNIV_INTERN
-void
-sync_array_validate(
-/*================*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- ulint i;
- sync_cell_t* cell;
- ulint count = 0;
-
- sync_array_enter(arr);
-
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL) {
- count++;
- }
- }
-
- ut_a(count == arr->n_reserved);
-
- sync_array_exit(arr);
-}
-
-/*******************************************************************//**
-Returns the event that the thread owning the cell waits for. */
-static
-os_event_t
-sync_cell_get_event(
-/*================*/
- sync_cell_t* cell) /*!< in: non-empty sync array cell */
-{
- ulint type = cell->request_type;
-
- if (type == SYNC_MUTEX) {
- return(((mutex_t *) cell->wait_object)->event);
- } else if (type == RW_LOCK_WAIT_EX) {
- return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
- } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
- return(((rw_lock_t *) cell->wait_object)->event);
- }
-}
-
-/******************************************************************//**
-Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state. */
-UNIV_INTERN
-void
-sync_array_reserve_cell(
-/*====================*/
- sync_array_t* arr, /*!< in: wait array */
- void* object, /*!< in: pointer to the object to wait for */
- ulint type, /*!< in: lock request type */
- const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index) /*!< out: index of the reserved cell */
-{
- sync_cell_t* cell;
- os_event_t event;
- ulint i;
-
- ut_a(object);
- ut_a(index);
-
- sync_array_enter(arr);
-
- arr->res_count++;
-
- /* Reserve a new cell. */
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object == NULL) {
-
- cell->waiting = FALSE;
- cell->wait_object = object;
-
- if (type == SYNC_MUTEX) {
- cell->old_wait_mutex = object;
- } else {
- cell->old_wait_rw_lock = object;
- }
-
- cell->request_type = type;
-
- cell->file = file;
- cell->line = line;
-
- arr->n_reserved++;
-
- *index = i;
-
- sync_array_exit(arr);
-
- /* Make sure the event is reset and also store
- the value of signal_count at which the event
- was reset. */
- event = sync_cell_get_event(cell);
- cell->signal_count = os_event_reset(event);
-
- cell->reservation_time = time(NULL);
-
- cell->thread = os_thread_get_curr_id();
-
- return;
- }
- }
-
- ut_error; /* No free cell found */
-
- return;
-}
-
-/******************************************************************//**
-This function should be called when a thread starts to wait on
-a wait array cell. In the debug version this function checks
-if the wait for a semaphore will result in a deadlock, in which
-case prints info and asserts. */
-UNIV_INTERN
-void
-sync_array_wait_event(
-/*==================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index) /*!< in: index of the reserved cell */
-{
- sync_cell_t* cell;
- os_event_t event;
-
- ut_a(arr);
-
- sync_array_enter(arr);
-
- cell = sync_array_get_nth_cell(arr, index);
-
- ut_a(cell->wait_object);
- ut_a(!cell->waiting);
- ut_ad(os_thread_get_curr_id() == cell->thread);
-
- event = sync_cell_get_event(cell);
- cell->waiting = TRUE;
-
-#ifdef UNIV_SYNC_DEBUG
-
- /* We use simple enter to the mutex below, because if
- we cannot acquire it at once, mutex_enter would call
- recursively sync_array routines, leading to trouble.
- rw_lock_debug_mutex freezes the debug lists. */
-
- rw_lock_debug_mutex_enter();
-
- if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
-
- fputs("########################################\n", stderr);
- ut_error;
- }
-
- rw_lock_debug_mutex_exit();
-#endif
- sync_array_exit(arr);
-
- os_event_wait_low(event, cell->signal_count);
-
- sync_array_free_cell(arr, index);
-}
-
-/******************************************************************//**
-Reports info of a wait array cell. */
-static
-void
-sync_array_cell_print(
-/*==================*/
- FILE* file, /*!< in: file where to print */
- sync_cell_t* cell) /*!< in: sync cell */
-{
- mutex_t* mutex;
- rw_lock_t* rwlock;
- ulint type;
- ulint writer;
-
- type = cell->request_type;
-
- fprintf(file,
- "--Thread %lu has waited at %s line %lu"
- " for %.2f seconds the semaphore:\n",
- (ulong) os_thread_pf(cell->thread), cell->file,
- (ulong) cell->line,
- difftime(time(NULL), cell->reservation_time));
-
- if (type == SYNC_MUTEX) {
- /* We use old_wait_mutex in case the cell has already
- been freed meanwhile */
- mutex = cell->old_wait_mutex;
-
- fprintf(file,
- "Mutex at %p created file %s line %lu, lock var %lu\n"
-#ifdef UNIV_SYNC_DEBUG
- "Last time reserved in file %s line %lu, "
-#endif /* UNIV_SYNC_DEBUG */
- "waiters flag %lu\n",
- (void*) mutex, mutex->cfile_name, (ulong) mutex->cline,
- (ulong) mutex->lock_word,
-#ifdef UNIV_SYNC_DEBUG
- mutex->file_name, (ulong) mutex->line,
-#endif /* UNIV_SYNC_DEBUG */
- (ulong) mutex->waiters);
-
- } else if (type == RW_LOCK_EX
- || type == RW_LOCK_WAIT_EX
- || type == RW_LOCK_SHARED) {
-
- fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
-
- rwlock = cell->old_wait_rw_lock;
-
- fprintf(file,
- " RW-latch at %p created in file %s line %lu\n",
- (void*) rwlock, rwlock->cfile_name,
- (ulong) rwlock->cline);
- writer = rw_lock_get_writer(rwlock);
- if (writer != RW_LOCK_NOT_LOCKED) {
- fprintf(file,
- "a writer (thread id %lu) has"
- " reserved it in mode %s",
- (ulong) os_thread_pf(rwlock->writer_thread),
- writer == RW_LOCK_EX
- ? " exclusive\n"
- : " wait exclusive\n");
- }
-
- fprintf(file,
- "number of readers %lu, waiters flag %lu, "
- "lock_word: %lx\n"
- "Last time read locked in file %s line %lu\n"
- "Last time write locked in file %s line %lu\n",
- (ulong) rw_lock_get_reader_count(rwlock),
- (ulong) rwlock->waiters,
- rwlock->lock_word,
- rwlock->last_s_file_name,
- (ulong) rwlock->last_s_line,
- rwlock->last_x_file_name,
- (ulong) rwlock->last_x_line);
- } else {
- ut_error;
- }
-
- if (!cell->waiting) {
- fputs("wait has ended\n", file);
- }
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Looks for a cell with the given thread id.
-@return pointer to cell or NULL if not found */
-static
-sync_cell_t*
-sync_array_find_thread(
-/*===================*/
- sync_array_t* arr, /*!< in: wait array */
- os_thread_id_t thread) /*!< in: thread id */
-{
- ulint i;
- sync_cell_t* cell;
-
- for (i = 0; i < arr->n_cells; i++) {
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL
- && os_thread_eq(cell->thread, thread)) {
-
- return(cell); /* Found */
- }
- }
-
- return(NULL); /* Not found */
-}
-
-/******************************************************************//**
-Recursion step for deadlock detection.
-@return TRUE if deadlock detected */
-static
-ibool
-sync_array_deadlock_step(
-/*=====================*/
- sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /*!< in: cell where recursive search
- started */
- os_thread_id_t thread, /*!< in: thread to look at */
- ulint pass, /*!< in: pass value */
- ulint depth) /*!< in: recursion depth */
-{
- sync_cell_t* new;
- ibool ret;
-
- depth++;
-
- if (pass != 0) {
- /* If pass != 0, then we do not know which threads are
- responsible of releasing the lock, and no deadlock can
- be detected. */
-
- return(FALSE);
- }
-
- new = sync_array_find_thread(arr, thread);
-
- if (new == start) {
- /* Stop running of other threads */
-
- ut_dbg_stop_threads = TRUE;
-
- /* Deadlock */
- fputs("########################################\n"
- "DEADLOCK of threads detected!\n", stderr);
-
- return(TRUE);
-
- } else if (new) {
- ret = sync_array_detect_deadlock(arr, start, new, depth);
-
- if (ret) {
- return(TRUE);
- }
- }
- return(FALSE);
-}
-
-/******************************************************************//**
-This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores.
-@return TRUE if deadlock detected */
-static
-ibool
-sync_array_detect_deadlock(
-/*=======================*/
- sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /*!< in: cell where recursive search started */
- sync_cell_t* cell, /*!< in: cell to search */
- ulint depth) /*!< in: recursion depth */
-{
- mutex_t* mutex;
- rw_lock_t* lock;
- os_thread_id_t thread;
- ibool ret;
- rw_lock_debug_t*debug;
-
- ut_a(arr);
- ut_a(start);
- ut_a(cell);
- ut_ad(cell->wait_object);
- ut_ad(os_thread_get_curr_id() == start->thread);
- ut_ad(depth < 100);
-
- depth++;
-
- if (!cell->waiting) {
-
- return(FALSE); /* No deadlock here */
- }
-
- if (cell->request_type == SYNC_MUTEX) {
-
- mutex = cell->wait_object;
-
- if (mutex_get_lock_word(mutex) != 0) {
-
- thread = mutex->thread_id;
-
- /* Note that mutex->thread_id above may be
- also OS_THREAD_ID_UNDEFINED, because the
- thread which held the mutex maybe has not
- yet updated the value, or it has already
- released the mutex: in this case no deadlock
- can occur, as the wait array cannot contain
- a thread with ID_UNDEFINED value. */
-
- ret = sync_array_deadlock_step(arr, start, thread, 0,
- depth);
- if (ret) {
- fprintf(stderr,
- "Mutex %p owned by thread %lu file %s line %lu\n",
- mutex, (ulong) os_thread_pf(mutex->thread_id),
- mutex->file_name, (ulong) mutex->line);
- sync_array_cell_print(stderr, cell);
-
- return(TRUE);
- }
- }
-
- return(FALSE); /* No deadlock */
-
- } else if (cell->request_type == RW_LOCK_EX
- || cell->request_type == RW_LOCK_WAIT_EX) {
-
- lock = cell->wait_object;
-
- debug = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (debug != NULL) {
-
- thread = debug->thread_id;
-
- if (((debug->lock_type == RW_LOCK_EX)
- && !os_thread_eq(thread, cell->thread))
- || ((debug->lock_type == RW_LOCK_WAIT_EX)
- && !os_thread_eq(thread, cell->thread))
- || (debug->lock_type == RW_LOCK_SHARED)) {
-
- /* The (wait) x-lock request can block
- infinitely only if someone (can be also cell
- thread) is holding s-lock, or someone
- (cannot be cell thread) (wait) x-lock, and
- he is blocked by start thread */
-
- ret = sync_array_deadlock_step(
- arr, start, thread, debug->pass,
- depth);
- if (ret) {
-print:
- fprintf(stderr, "rw-lock %p ",
- (void*) lock);
- sync_array_cell_print(stderr, cell);
- rw_lock_debug_print(debug);
- return(TRUE);
- }
- }
-
- debug = UT_LIST_GET_NEXT(list, debug);
- }
-
- return(FALSE);
-
- } else if (cell->request_type == RW_LOCK_SHARED) {
-
- lock = cell->wait_object;
- debug = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (debug != NULL) {
-
- thread = debug->thread_id;
-
- if ((debug->lock_type == RW_LOCK_EX)
- || (debug->lock_type == RW_LOCK_WAIT_EX)) {
-
- /* The s-lock request can block infinitely
- only if someone (can also be cell thread) is
- holding (wait) x-lock, and he is blocked by
- start thread */
-
- ret = sync_array_deadlock_step(
- arr, start, thread, debug->pass,
- depth);
- if (ret) {
- goto print;
- }
- }
-
- debug = UT_LIST_GET_NEXT(list, debug);
- }
-
- return(FALSE);
-
- } else {
- ut_error;
- }
-
- return(TRUE); /* Execution never reaches this line: for compiler
- fooling only */
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Determines if we can wake up the thread waiting for a sempahore. */
-static
-ibool
-sync_arr_cell_can_wake_up(
-/*======================*/
- sync_cell_t* cell) /*!< in: cell to search */
-{
- mutex_t* mutex;
- rw_lock_t* lock;
-
- if (cell->request_type == SYNC_MUTEX) {
-
- mutex = cell->wait_object;
-
- if (mutex_get_lock_word(mutex) == 0) {
-
- return(TRUE);
- }
-
- } else if (cell->request_type == RW_LOCK_EX) {
-
- lock = cell->wait_object;
-
- if (lock->lock_word > 0) {
- /* Either unlocked or only read locked. */
-
- return(TRUE);
- }
-
- } else if (cell->request_type == RW_LOCK_WAIT_EX) {
-
- lock = cell->wait_object;
-
- /* lock_word == 0 means all readers have left */
- if (lock->lock_word == 0) {
-
- return(TRUE);
- }
- } else if (cell->request_type == RW_LOCK_SHARED) {
- lock = cell->wait_object;
-
- /* lock_word > 0 means no writer or reserved writer */
- if (lock->lock_word > 0) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/******************************************************************//**
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
-UNIV_INTERN
-void
-sync_array_free_cell(
-/*=================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index) /*!< in: index of the cell in array */
-{
- sync_cell_t* cell;
-
- sync_array_enter(arr);
-
- cell = sync_array_get_nth_cell(arr, index);
-
- ut_a(cell->wait_object != NULL);
-
- cell->waiting = FALSE;
- cell->wait_object = NULL;
- cell->signal_count = 0;
-
- ut_a(arr->n_reserved > 0);
- arr->n_reserved--;
-
- sync_array_exit(arr);
-}
-
-/**********************************************************************//**
-Increments the signalled count. */
-UNIV_INTERN
-void
-sync_array_object_signalled(
-/*========================*/
- sync_array_t* arr) /*!< in: wait array */
-{
-#ifdef HAVE_ATOMIC_BUILTINS
- (void) os_atomic_increment_ulint(&arr->sg_count, 1);
-#else
- sync_array_enter(arr);
-
- arr->sg_count++;
-
- sync_array_exit(arr);
-#endif
-}
-
-/**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server.
-
-Note that there's a race condition between this thread and mutex_exit
-changing the lock_word and calling signal_object, so sometimes this finds
-threads to wake up even when nothing has gone wrong. */
-UNIV_INTERN
-void
-sync_arr_wake_threads_if_sema_free(void)
-/*====================================*/
-{
- sync_array_t* arr = sync_primary_wait_array;
- sync_cell_t* cell;
- ulint count;
- ulint i;
- os_event_t event;
-
- sync_array_enter(arr);
-
- i = 0;
- count = 0;
-
- while (count < arr->n_reserved) {
-
- cell = sync_array_get_nth_cell(arr, i);
- i++;
-
- if (cell->wait_object == NULL) {
- continue;
- }
- count++;
-
- if (sync_arr_cell_can_wake_up(cell)) {
-
- event = sync_cell_get_event(cell);
-
- os_event_set(event);
- }
-
- }
-
- sync_array_exit(arr);
-}
-
-/**********************************************************************//**
-Prints warnings of long semaphore waits to stderr.
-@return TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
-ibool
-sync_array_print_long_waits(void)
-/*=============================*/
-{
- sync_cell_t* cell;
- ibool old_val;
- ibool noticed = FALSE;
- ulint i;
- ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
- ibool fatal = FALSE;
-
- for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
-
- cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
-
- if (cell->wait_object != NULL && cell->waiting
- && difftime(time(NULL), cell->reservation_time) > 240) {
- fputs("InnoDB: Warning: a long semaphore wait:\n",
- stderr);
- sync_array_cell_print(stderr, cell);
- noticed = TRUE;
- }
-
- if (cell->wait_object != NULL && cell->waiting
- && difftime(time(NULL), cell->reservation_time)
- > fatal_timeout) {
- fatal = TRUE;
- }
- }
-
- if (noticed) {
- fprintf(stderr,
- "InnoDB: ###### Starts InnoDB Monitor"
- " for 30 secs to print diagnostic info:\n");
- old_val = srv_print_innodb_monitor;
-
- /* If some crucial semaphore is reserved, then also the InnoDB
- Monitor can hang, and we do not get diagnostics. Since in
- many cases an InnoDB hang is caused by a pwrite() or a pread()
- call hanging inside the operating system, let us print right
- now the values of pending calls of these. */
-
- fprintf(stderr,
- "InnoDB: Pending preads %lu, pwrites %lu\n",
- (ulong)os_file_n_pending_preads,
- (ulong)os_file_n_pending_pwrites);
-
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
-
- os_thread_sleep(30000000);
-
- srv_print_innodb_monitor = old_val;
- fprintf(stderr,
- "InnoDB: ###### Diagnostic info printed"
- " to the standard error stream\n");
- }
-
- return(fatal);
-}
-
-/**********************************************************************//**
-Prints info of the wait array. */
-static
-void
-sync_array_output_info(
-/*===================*/
- FILE* file, /*!< in: file where to print */
- sync_array_t* arr) /*!< in: wait array; NOTE! caller must own the
- mutex */
-{
- sync_cell_t* cell;
- ulint count;
- ulint i;
-
- fprintf(file,
- "OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
- (long) arr->res_count, (long) arr->sg_count);
- i = 0;
- count = 0;
-
- while (count < arr->n_reserved) {
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL) {
- count++;
- sync_array_cell_print(file, cell);
- }
-
- i++;
- }
-}
-
-/**********************************************************************//**
-Prints info of the wait array. */
-UNIV_INTERN
-void
-sync_array_print_info(
-/*==================*/
- FILE* file, /*!< in: file where to print */
- sync_array_t* arr) /*!< in: wait array */
-{
- sync_array_enter(arr);
-
- sync_array_output_info(file, arr);
-
- sync_array_exit(arr);
-}
diff --git a/storage/innodb_plugin/sync/sync0rw.c b/storage/innodb_plugin/sync/sync0rw.c
deleted file mode 100644
index d231b6acdf7..00000000000
--- a/storage/innodb_plugin/sync/sync0rw.c
+++ /dev/null
@@ -1,1042 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file sync/sync0rw.c
-The read-write lock (for thread synchronization)
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0rw.h"
-#ifdef UNIV_NONINL
-#include "sync0rw.ic"
-#endif
-
-#include "os0thread.h"
-#include "mem0mem.h"
-#include "srv0srv.h"
-#include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
-
-/*
- IMPLEMENTATION OF THE RW_LOCK
- =============================
-The status of a rw_lock is held in lock_word. The initial value of lock_word is
-X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
-for each x-lock. This describes the lock state for each value of lock_word:
-
-lock_word == X_LOCK_DECR: Unlocked.
-0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers.
- (X_LOCK_DECR - lock_word) is the
- number of readers that hold the lock.
-lock_word == 0: Write locked
--X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer.
- (-lock_word) is the number of readers
- that hold the lock.
-lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
- decremented by X_LOCK_DECR once for each lock,
- so the number of locks is:
- ((-lock_word) / X_LOCK_DECR) + 1
-When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
-other values of lock_word are invalid.
-
-The lock_word is always read and updated atomically and consistently, so that
-it always represents the state of the lock, and the state of the lock changes
-with a single atomic operation. This lock_word holds all of the information
-that a thread needs in order to determine if it is eligible to gain the lock
-or if it must spin or sleep. The one exception to this is that writer_thread
-must be verified before recursive write locks: to solve this scenario, we make
-writer_thread readable by all threads, but only writeable by the x-lock holder.
-
-The other members of the lock obey the following rules to remain consistent:
-
-recursive: This and the writer_thread field together control the
- behaviour of recursive x-locking.
- lock->recursive must be FALSE in following states:
- 1) The writer_thread contains garbage i.e.: the
- lock has just been initialized.
- 2) The lock is not x-held and there is no
- x-waiter waiting on WAIT_EX event.
- 3) The lock is x-held or there is an x-waiter
- waiting on WAIT_EX event but the 'pass' value
- is non-zero.
- lock->recursive is TRUE iff:
- 1) The lock is x-held or there is an x-waiter
- waiting on WAIT_EX event and the 'pass' value
- is zero.
- This flag must be set after the writer_thread field
- has been updated with a memory ordering barrier.
- It is unset before the lock_word has been incremented.
-writer_thread: Is used only in recursive x-locking. Can only be safely
- read iff lock->recursive flag is TRUE.
- This field is uninitialized at lock creation time and
- is updated atomically when x-lock is acquired or when
- move_ownership is called. A thread is only allowed to
- set the value of this field to it's thread_id i.e.: a
- thread cannot set writer_thread to some other thread's
- id.
-waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
- signals, it should only be set to 1 when there are threads
- waiting on event. Must be 1 when a writer starts waiting to
- ensure the current x-locking thread sends a wake-up signal
- during unlock. May only be reset to 0 immediately before a
- a wake-up signal is sent to event. On most platforms, a
- memory barrier is required after waiters is set, and before
- verifying lock_word is still held, to ensure some unlocker
- really does see the flags new value.
-event: Threads wait on event for read or writer lock when another
- thread has an x-lock or an x-lock reservation (wait_ex). A
- thread may only wait on event after performing the following
- actions in order:
- (1) Record the counter value of event (with os_event_reset).
- (2) Set waiters to 1.
- (3) Verify lock_word <= 0.
- (1) must come before (2) to ensure signal is not missed.
- (2) must come before (3) to ensure a signal is sent.
- These restrictions force the above ordering.
- Immediately before sending the wake-up signal, we should:
- (1) Verify lock_word == X_LOCK_DECR (unlocked)
- (2) Reset waiters to 0.
-wait_ex_event: A thread may only wait on the wait_ex_event after it has
- performed the following actions in order:
- (1) Decrement lock_word by X_LOCK_DECR.
- (2) Record counter value of wait_ex_event (os_event_reset,
- called from sync_array_reserve_cell).
- (3) Verify that lock_word < 0.
- (1) must come first to ensures no other threads become reader
- or next writer, and notifies unlocker that signal must be sent.
- (2) must come before (3) to ensure the signal is not missed.
- These restrictions force the above ordering.
- Immediately before sending the wake-up signal, we should:
- Verify lock_word == 0 (waiting thread holds x_lock)
-*/
-
-
-/** number of spin waits on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t rw_s_spin_wait_count = 0;
-/** number of spin loop rounds on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t rw_s_spin_round_count = 0;
-
-/** number of OS waits on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t rw_s_os_wait_count = 0;
-
-/** number of unlocks (that unlock shared locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-UNIV_INTERN ib_int64_t rw_s_exit_count = 0;
-
-/** number of spin waits on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t rw_x_spin_wait_count = 0;
-/** number of spin loop rounds on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t rw_x_spin_round_count = 0;
-
-/** number of OS waits on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t rw_x_os_wait_count = 0;
-
-/** number of unlocks (that unlock exclusive locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-UNIV_INTERN ib_int64_t rw_x_exit_count = 0;
-
-/* The global list of rw-locks */
-UNIV_INTERN rw_lock_list_t rw_lock_list;
-UNIV_INTERN mutex_t rw_lock_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-
-UNIV_INTERN mutex_t rw_lock_debug_mutex;
-/* If deadlock detection does not get immediately the mutex,
-it may wait for this event */
-UNIV_INTERN os_event_t rw_lock_debug_event;
-/* This is set to TRUE, if there may be waiters for the event */
-UNIV_INTERN ibool rw_lock_debug_waiters;
-
-/******************************************************************//**
-Creates a debug info struct. */
-static
-rw_lock_debug_t*
-rw_lock_debug_create(void);
-/*======================*/
-/******************************************************************//**
-Frees a debug info struct. */
-static
-void
-rw_lock_debug_free(
-/*===============*/
- rw_lock_debug_t* info);
-
-/******************************************************************//**
-Creates a debug info struct.
-@return own: debug info struct */
-static
-rw_lock_debug_t*
-rw_lock_debug_create(void)
-/*======================*/
-{
- return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
-}
-
-/******************************************************************//**
-Frees a debug info struct. */
-static
-void
-rw_lock_debug_free(
-/*===============*/
- rw_lock_debug_t* info)
-{
- mem_free(info);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-rw_lock_create_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
-{
- /* If this is the very first time a synchronization object is
- created, then the following call initializes the sync system. */
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
-
- lock->mutex.cfile_name = cfile_name;
- lock->mutex.cline = cline;
-
- ut_d(lock->mutex.cmutex_name = cmutex_name);
- ut_d(lock->mutex.mutex_type = 1);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-# ifdef UNIV_DEBUG
- UT_NOT_USED(cmutex_name);
-# endif
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-
- lock->lock_word = X_LOCK_DECR;
- lock->waiters = 0;
-
- /* We set this value to signify that lock->writer_thread
- contains garbage at initialization and cannot be used for
- recursive x-locking. */
- lock->recursive = FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
- UT_LIST_INIT(lock->debug_list);
-
- lock->level = level;
-#endif /* UNIV_SYNC_DEBUG */
-
- lock->magic_n = RW_LOCK_MAGIC_N;
-
- lock->cfile_name = cfile_name;
- lock->cline = (unsigned int) cline;
-
- lock->count_os_wait = 0;
- lock->last_s_file_name = "not yet reserved";
- lock->last_x_file_name = "not yet reserved";
- lock->last_s_line = 0;
- lock->last_x_line = 0;
- lock->event = os_event_create(NULL);
- lock->wait_ex_event = os_event_create(NULL);
-
- mutex_enter(&rw_lock_list_mutex);
-
- if (UT_LIST_GET_LEN(rw_lock_list) > 0) {
- ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n
- == RW_LOCK_MAGIC_N);
- }
-
- UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
-
- mutex_exit(&rw_lock_list_mutex);
-}
-
-/******************************************************************//**
-Calling this function is obligatory only if the memory buffer containing
-the rw-lock is freed. Removes an rw-lock object from the global list. The
-rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
-void
-rw_lock_free(
-/*=========*/
- rw_lock_t* lock) /*!< in: rw-lock */
-{
- ut_ad(rw_lock_validate(lock));
- ut_a(lock->lock_word == X_LOCK_DECR);
-
- lock->magic_n = 0;
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_free(rw_lock_get_mutex(lock));
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-
- mutex_enter(&rw_lock_list_mutex);
- os_event_free(lock->event);
-
- os_event_free(lock->wait_ex_event);
-
- if (UT_LIST_GET_PREV(list, lock)) {
- ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
- }
- if (UT_LIST_GET_NEXT(list, lock)) {
- ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N);
- }
-
- UT_LIST_REMOVE(list, rw_lock_list, lock);
-
- mutex_exit(&rw_lock_list_mutex);
-}
-
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks.
-@return TRUE */
-UNIV_INTERN
-ibool
-rw_lock_validate(
-/*=============*/
- rw_lock_t* lock) /*!< in: rw-lock */
-{
- ut_a(lock);
-
- ulint waiters = rw_lock_get_waiters(lock);
- lint lock_word = lock->lock_word;
-
- ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
- ut_a(waiters == 0 || waiters == 1);
- ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/******************************************************************//**
-Lock an rw-lock in shared mode for the current thread. If the rw-lock is
-locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock, before suspending the thread. */
-UNIV_INTERN
-void
-rw_lock_s_lock_spin(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock
- will be passed to another thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- ulint index; /* index of the reserved wait cell */
- ulint i = 0; /* spin round count */
-
- ut_ad(rw_lock_validate(lock));
-
- rw_s_spin_wait_count++; /*!< Count calls to this function */
-lock_loop:
-
- /* Spin waiting for the writer field to become free */
- while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
-
- i++;
- }
-
- if (i == SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu spin wait rw-s-lock at %p"
- " cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()),
- (void*) lock,
- lock->cfile_name, (ulong) lock->cline, (ulong) i);
- }
-
- /* We try once again to obtain the lock */
- if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
- rw_s_spin_round_count += i;
-
- return; /* Success */
- } else {
-
- if (i < SYNC_SPIN_ROUNDS) {
- goto lock_loop;
- }
-
- rw_s_spin_round_count += i;
-
- sync_array_reserve_cell(sync_primary_wait_array,
- lock, RW_LOCK_SHARED,
- file_name, line,
- &index);
-
- /* Set waiters before checking lock_word to ensure wake-up
- signal is sent. This may lead to some unnecessary signals. */
- rw_lock_set_waiter_flag(lock);
-
- if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
- sync_array_free_cell(sync_primary_wait_array, index);
- return; /* Success */
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu OS wait rw-s-lock at %p"
- " cfile %s cline %lu\n",
- os_thread_pf(os_thread_get_curr_id()),
- (void*) lock, lock->cfile_name,
- (ulong) lock->cline);
- }
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_s_os_wait_count++;
-
- sync_array_wait_event(sync_primary_wait_array, index);
-
- i = 0;
- goto lock_loop;
- }
-}
-
-/******************************************************************//**
-This function is used in the insert buffer to move the ownership of an
-x-latch on a buffer frame to the current thread. The x-latch was set by
-the buffer read operation and it protected the buffer frame while the
-read was done. The ownership is moved because we want that the current
-thread is able to acquire a second x-latch which is stored in an mtr.
-This, in turn, is needed to pass the debug checks of index page
-operations. */
-UNIV_INTERN
-void
-rw_lock_x_lock_move_ownership(
-/*==========================*/
- rw_lock_t* lock) /*!< in: lock which was x-locked in the
- buffer read */
-{
- ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
-
- rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
-}
-
-/******************************************************************//**
-Function for the next writer to call. Waits for readers to exit.
-The caller must have already decremented lock_word by X_LOCK_DECR. */
-UNIV_INLINE
-void
-rw_lock_x_lock_wait(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
-#endif
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- ulint index;
- ulint i = 0;
-
- ut_ad(lock->lock_word <= 0);
-
- while (lock->lock_word < 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
- if(i < SYNC_SPIN_ROUNDS) {
- i++;
- continue;
- }
-
- /* If there is still a reader, then go to sleep.*/
- rw_x_spin_round_count += i;
- i = 0;
- sync_array_reserve_cell(sync_primary_wait_array,
- lock,
- RW_LOCK_WAIT_EX,
- file_name, line,
- &index);
- /* Check lock_word to ensure wake-up isn't missed.*/
- if(lock->lock_word < 0) {
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_x_os_wait_count++;
-
- /* Add debug info as it is needed to detect possible
- deadlock. We must add info for WAIT_EX thread for
- deadlock detection to work properly. */
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
- file_name, line);
-#endif
-
- sync_array_wait_event(sync_primary_wait_array,
- index);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass,
- RW_LOCK_WAIT_EX);
-#endif
- /* It is possible to wake when lock_word < 0.
- We must pass the while-loop check to proceed.*/
- } else {
- sync_array_free_cell(sync_primary_wait_array,
- index);
- }
- }
- rw_x_spin_round_count += i;
-}
-
-/******************************************************************//**
-Low-level function for acquiring an exclusive lock.
-@return RW_LOCK_NOT_LOCKED if did not succeed, RW_LOCK_EX if success. */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_low(
-/*===============*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
- if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
-
- /* lock->recursive also tells us if the writer_thread
- field is stale or active. As we are going to write
- our own thread id in that field it must be that the
- current writer_thread value is not active. */
- ut_a(!lock->recursive);
-
- /* Decrement occurred: we are writer or next-writer. */
- rw_lock_set_writer_id_and_recursion_flag(lock,
- pass ? FALSE : TRUE);
-
- rw_lock_x_lock_wait(lock,
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- file_name, line);
-
- } else {
- /* Decrement failed: relock or failed lock */
- if (!pass && lock->recursive
- && os_thread_eq(lock->writer_thread, curr_thread)) {
- /* Relock */
- lock->lock_word -= X_LOCK_DECR;
- } else {
- /* Another thread locked before us */
- return(FALSE);
- }
- }
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
- file_name, line);
-#endif
- lock->last_x_file_name = file_name;
- lock->last_x_line = (unsigned int) line;
-
- return(TRUE);
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread. If the rw-lock is locked
-in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock before suspending the thread. If the same thread has an x-lock
-on the rw-lock, locking succeed, with the following exception: if pass != 0,
-only a single x-lock may be taken on the lock. NOTE: If the same thread has
-an s-lock, locking does not succeed! */
-UNIV_INTERN
-void
-rw_lock_x_lock_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- ulint index; /*!< index of the reserved wait cell */
- ulint i; /*!< spin round count */
- ibool spinning = FALSE;
-
- ut_ad(rw_lock_validate(lock));
-
- i = 0;
-
-lock_loop:
-
- if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
- rw_x_spin_round_count += i;
-
- return; /* Locking succeeded */
-
- } else {
-
- if (!spinning) {
- spinning = TRUE;
- rw_x_spin_wait_count++;
- }
-
- /* Spin waiting for the lock_word to become free */
- while (i < SYNC_SPIN_ROUNDS
- && lock->lock_word <= 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0,
- srv_spin_wait_delay));
- }
-
- i++;
- }
- if (i == SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- } else {
- goto lock_loop;
- }
- }
-
- rw_x_spin_round_count += i;
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu spin wait rw-x-lock at %p"
- " cfile %s cline %lu rnds %lu\n",
- os_thread_pf(os_thread_get_curr_id()), (void*) lock,
- lock->cfile_name, (ulong) lock->cline, (ulong) i);
- }
-
- sync_array_reserve_cell(sync_primary_wait_array,
- lock,
- RW_LOCK_EX,
- file_name, line,
- &index);
-
- /* Waiters must be set before checking lock_word, to ensure signal
- is sent. This could lead to a few unnecessary wake-up signals. */
- rw_lock_set_waiter_flag(lock);
-
- if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
- sync_array_free_cell(sync_primary_wait_array, index);
- return; /* Locking succeeded */
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu OS wait for rw-x-lock at %p"
- " cfile %s cline %lu\n",
- os_thread_pf(os_thread_get_curr_id()), (void*) lock,
- lock->cfile_name, (ulong) lock->cline);
- }
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_x_os_wait_count++;
-
- sync_array_wait_event(sync_primary_wait_array, index);
-
- i = 0;
- goto lock_loop;
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_enter(void)
-/*==========================*/
-{
-loop:
- if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
- return;
- }
-
- os_event_reset(rw_lock_debug_event);
-
- rw_lock_debug_waiters = TRUE;
-
- if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
- return;
- }
-
- os_event_wait(rw_lock_debug_event);
-
- goto loop;
-}
-
-/******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void)
-/*==========================*/
-{
- mutex_exit(&rw_lock_debug_mutex);
-
- if (rw_lock_debug_waiters) {
- rw_lock_debug_waiters = FALSE;
- os_event_set(rw_lock_debug_event);
- }
-}
-
-/******************************************************************//**
-Inserts the debug information for an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_add_debug_info(
-/*===================*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint pass, /*!< in: pass value */
- ulint lock_type, /*!< in: lock type */
- const char* file_name, /*!< in: file where requested */
- ulint line) /*!< in: line where requested */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
- ut_ad(file_name);
-
- info = rw_lock_debug_create();
-
- rw_lock_debug_mutex_enter();
-
- info->file_name = file_name;
- info->line = line;
- info->lock_type = lock_type;
- info->thread_id = os_thread_get_curr_id();
- info->pass = pass;
-
- UT_LIST_ADD_FIRST(list, lock->debug_list, info);
-
- rw_lock_debug_mutex_exit();
-
- if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_add_level(lock, lock->level);
- }
-}
-
-/******************************************************************//**
-Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_remove_debug_info(
-/*======================*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint pass, /*!< in: pass value */
- ulint lock_type) /*!< in: lock type */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
-
- if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_reset_level(lock);
- }
-
- rw_lock_debug_mutex_enter();
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (info != NULL) {
- if ((pass == info->pass)
- && ((pass != 0)
- || os_thread_eq(info->thread_id,
- os_thread_get_curr_id()))
- && (info->lock_type == lock_type)) {
-
- /* Found! */
- UT_LIST_REMOVE(list, lock->debug_list, info);
- rw_lock_debug_mutex_exit();
-
- rw_lock_debug_free(info);
-
- return;
- }
-
- info = UT_LIST_GET_NEXT(list, info);
- }
-
- ut_error;
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Checks if the thread has locked the rw-lock in the specified mode, with
-the pass value == 0.
-@return TRUE if locked */
-UNIV_INTERN
-ibool
-rw_lock_own(
-/*========*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
- ut_ad(rw_lock_validate(lock));
-
- rw_lock_debug_mutex_enter();
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (info != NULL) {
-
- if (os_thread_eq(info->thread_id, os_thread_get_curr_id())
- && (info->pass == 0)
- && (info->lock_type == lock_type)) {
-
- rw_lock_debug_mutex_exit();
- /* Found! */
-
- return(TRUE);
- }
-
- info = UT_LIST_GET_NEXT(list, info);
- }
- rw_lock_debug_mutex_exit();
-
- return(FALSE);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode.
-@return TRUE if locked */
-UNIV_INTERN
-ibool
-rw_lock_is_locked(
-/*==============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-{
- ibool ret = FALSE;
-
- ut_ad(lock);
- ut_ad(rw_lock_validate(lock));
-
- if (lock_type == RW_LOCK_SHARED) {
- if (rw_lock_get_reader_count(lock) > 0) {
- ret = TRUE;
- }
- } else if (lock_type == RW_LOCK_EX) {
- if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
- ret = TRUE;
- }
- } else {
- ut_error;
- }
-
- return(ret);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/***************************************************************//**
-Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
-void
-rw_lock_list_print_info(
-/*====================*/
- FILE* file) /*!< in: file where to print */
-{
- rw_lock_t* lock;
- ulint count = 0;
- rw_lock_debug_t* info;
-
- mutex_enter(&rw_lock_list_mutex);
-
- fputs("-------------\n"
- "RW-LATCH INFO\n"
- "-------------\n", file);
-
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
-
- count++;
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_enter(&(lock->mutex));
-#endif
- if (lock->lock_word != X_LOCK_DECR) {
-
- fprintf(file, "RW-LOCK: %p ", (void*) lock);
-
- if (rw_lock_get_waiters(lock)) {
- fputs(" Waiters for the lock exist\n", file);
- } else {
- putc('\n', file);
- }
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
- while (info != NULL) {
- rw_lock_debug_print(info);
- info = UT_LIST_GET_NEXT(list, info);
- }
- }
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_exit(&(lock->mutex));
-#endif
-
- lock = UT_LIST_GET_NEXT(list, lock);
- }
-
- fprintf(file, "Total number of rw-locks %ld\n", count);
- mutex_exit(&rw_lock_list_mutex);
-}
-
-/***************************************************************//**
-Prints debug info of an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_print(
-/*==========*/
- rw_lock_t* lock) /*!< in: rw-lock */
-{
- rw_lock_debug_t* info;
-
- fprintf(stderr,
- "-------------\n"
- "RW-LATCH INFO\n"
- "RW-LATCH: %p ", (void*) lock);
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- /* We used to acquire lock->mutex here, but it would cause a
- recursive call to sync_thread_add_level() if UNIV_SYNC_DEBUG
- is defined. Since this function is only invoked from
- sync_thread_levels_g(), let us choose the smaller evil:
- performing dirty reads instead of causing bogus deadlocks or
- assertion failures. */
-#endif
- if (lock->lock_word != X_LOCK_DECR) {
-
- if (rw_lock_get_waiters(lock)) {
- fputs(" Waiters for the lock exist\n", stderr);
- } else {
- putc('\n', stderr);
- }
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
- while (info != NULL) {
- rw_lock_debug_print(info);
- info = UT_LIST_GET_NEXT(list, info);
- }
- }
-}
-
-/*********************************************************************//**
-Prints info of a debug struct. */
-UNIV_INTERN
-void
-rw_lock_debug_print(
-/*================*/
- rw_lock_debug_t* info) /*!< in: debug struct */
-{
- ulint rwt;
-
- rwt = info->lock_type;
-
- fprintf(stderr, "Locked: thread %ld file %s line %ld ",
- (ulong) os_thread_pf(info->thread_id), info->file_name,
- (ulong) info->line);
- if (rwt == RW_LOCK_SHARED) {
- fputs("S-LOCK", stderr);
- } else if (rwt == RW_LOCK_EX) {
- fputs("X-LOCK", stderr);
- } else if (rwt == RW_LOCK_WAIT_EX) {
- fputs("WAIT X-LOCK", stderr);
- } else {
- ut_error;
- }
- if (info->pass != 0) {
- fprintf(stderr, " pass value %lu", (ulong) info->pass);
- }
- putc('\n', stderr);
-}
-
-/***************************************************************//**
-Returns the number of currently locked rw-locks. Works only in the debug
-version.
-@return number of locked rw-locks */
-UNIV_INTERN
-ulint
-rw_lock_n_locked(void)
-/*==================*/
-{
- rw_lock_t* lock;
- ulint count = 0;
-
- mutex_enter(&rw_lock_list_mutex);
-
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
-
- if (lock->lock_word != X_LOCK_DECR) {
- count++;
- }
-
- lock = UT_LIST_GET_NEXT(list, lock);
- }
-
- mutex_exit(&rw_lock_list_mutex);
-
- return(count);
-}
-#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/innodb_plugin/sync/sync0sync.c b/storage/innodb_plugin/sync/sync0sync.c
deleted file mode 100644
index 569fc6328c4..00000000000
--- a/storage/innodb_plugin/sync/sync0sync.c
+++ /dev/null
@@ -1,1441 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file sync/sync0sync.c
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#ifdef UNIV_NONINL
-#include "sync0sync.ic"
-#endif
-
-#include "sync0rw.h"
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "buf0types.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-
-/*
- REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
- ============================================
-
-Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
-takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
-Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
-implement our own efficient spin lock mutex. Future operating systems may
-provide efficient spin locks, but we cannot count on that.
-
-Another reason for implementing a spin lock is that on multiprocessor systems
-it can be more efficient for a processor to run a loop waiting for the
-semaphore to be released than to switch to a different thread. A thread switch
-takes 25 us on both platforms mentioned above. See Gray and Reuter's book
-Transaction processing for background.
-
-How long should the spin loop last before suspending the thread? On a
-uniprocessor, spinning does not help at all, because if the thread owning the
-mutex is not executing, it cannot be released. Spinning actually wastes
-resources.
-
-On a multiprocessor, we do not know if the thread owning the mutex is
-executing or not. Thus it would make sense to spin as long as the operation
-guarded by the mutex would typically last assuming that the thread is
-executing. If the mutex is not released by that time, we may assume that the
-thread owning the mutex is not executing and suspend the waiting thread.
-
-A typical operation (where no i/o involved) guarded by a mutex or a read-write
-lock may last 1 - 20 us on the current Pentium platform. The longest
-operations are the binary searches on an index node.
-
-We conclude that the best choice is to set the spin time at 20 us. Then the
-system should work well on a multiprocessor. On a uniprocessor we have to
-make sure that thread swithches due to mutex collisions are not frequent,
-i.e., they do not happen every 100 us or so, because that wastes too much
-resources. If the thread switches are not frequent, the 20 us wasted in spin
-loop is not too much.
-
-Empirical studies on the effect of spin time should be done for different
-platforms.
-
-
- IMPLEMENTATION OF THE MUTEX
- ===========================
-
-For background, see Curt Schimmel's book on Unix implementation on modern
-architectures. The key points in the implementation are atomicity and
-serialization of memory accesses. The test-and-set instruction (XCHG in
-Pentium) must be atomic. As new processors may have weak memory models, also
-serialization of memory references may be necessary. The successor of Pentium,
-P6, has at least one mode where the memory model is weak. As far as we know,
-in Pentium all memory accesses are serialized in the program order and we do
-not have to worry about the memory model. On other processors there are
-special machine instructions called a fence, memory barrier, or storage
-barrier (STBAR in Sparc), which can be used to serialize the memory accesses
-to happen in program order relative to the fence instruction.
-
-Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
-the atomic test-and-set, but his algorithm should be modified for weak memory
-models. We do not use Lamport's algorithm, because we guess it is slower than
-the atomic test-and-set.
-
-Our mutex implementation works as follows: After that we perform the atomic
-test-and-set instruction on the memory word. If the test returns zero, we
-know we got the lock first. If the test returns not zero, some other thread
-was quicker and got the lock: then we spin in a loop reading the memory word,
-waiting it to become zero. It is wise to just read the word in the loop, not
-perform numerous test-and-set instructions, because they generate memory
-traffic between the cache and the main memory. The read loop can just access
-the cache, saving bus bandwidth.
-
-If we cannot acquire the mutex lock in the specified time, we reserve a cell
-in the wait array, set the waiters byte in the mutex to 1. To avoid a race
-condition, after setting the waiters byte and before suspending the waiting
-thread, we still have to check that the mutex is reserved, because it may
-have happened that the thread which was holding the mutex has just released
-it and did not see the waiters byte set to 1, a case which would lead the
-other thread to an infinite wait.
-
-LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
-=======
-thread will eventually call os_event_set() on that particular event.
-Thus no infinite wait is possible in this case.
-
-Proof: After making the reservation the thread sets the waiters field in the
-mutex to 1. Then it checks that the mutex is still reserved by some thread,
-or it reserves the mutex for itself. In any case, some thread (which may be
-also some earlier thread, not necessarily the one currently holding the mutex)
-will set the waiters field to 0 in mutex_exit, and then call
-os_event_set() with the mutex as an argument.
-Q.E.D.
-
-LEMMA 2: If an os_event_set() call is made after some thread has called
-=======
-the os_event_reset() and before it starts wait on that event, the call
-will not be lost to the second thread. This is true even if there is an
-intervening call to os_event_reset() by another thread.
-Thus no infinite wait is possible in this case.
-
-Proof (non-windows platforms): os_event_reset() returns a monotonically
-increasing value of signal_count. This value is increased at every
-call of os_event_set() If thread A has called os_event_reset() followed
-by thread B calling os_event_set() and then some other thread C calling
-os_event_reset(), the is_set flag of the event will be set to FALSE;
-but now if thread A calls os_event_wait_low() with the signal_count
-value returned from the earlier call of os_event_reset(), it will
-return immediately without waiting.
-Q.E.D.
-
-Proof (windows): If there is a writer thread which is forced to wait for
-the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
-The design of rw_lock ensures that there is one and only one thread
-that is able to change the state to RW_LOCK_WAIT_EX and this thread is
-guaranteed to acquire the lock after it is released by the current
-holders and before any other waiter gets the lock.
-On windows this thread waits on a separate event i.e.: wait_ex_event.
-Since only one thread can wait on this event there is no chance
-of this event getting reset before the writer starts wait on it.
-Therefore, this thread is guaranteed to catch the os_set_event()
-signalled unconditionally at the release of the lock.
-Q.E.D. */
-
-/* Number of spin waits on mutexes: for performance monitoring */
-
-/** The number of iterations in the mutex_spin_wait() spin loop.
-Intended for performance monitoring. */
-static ib_int64_t mutex_spin_round_count = 0;
-/** The number of mutex_spin_wait() calls. Intended for
-performance monitoring. */
-static ib_int64_t mutex_spin_wait_count = 0;
-/** The number of OS waits in mutex_spin_wait(). Intended for
-performance monitoring. */
-static ib_int64_t mutex_os_wait_count = 0;
-/** The number of mutex_exit() calls. Intended for performance
-monitoring. */
-UNIV_INTERN ib_int64_t mutex_exit_count = 0;
-
-/** The global array of wait cells for implementation of the database's own
-mutexes and read-write locks */
-UNIV_INTERN sync_array_t* sync_primary_wait_array;
-
-/** This variable is set to TRUE when sync_init is called */
-UNIV_INTERN ibool sync_initialized = FALSE;
-
-/** An acquired mutex or rw-lock and its level in the latching order */
-typedef struct sync_level_struct sync_level_t;
-/** Mutexes or rw-locks held by a thread */
-typedef struct sync_thread_struct sync_thread_t;
-
-#ifdef UNIV_SYNC_DEBUG
-/** The latch levels currently owned by threads are stored in this data
-structure; the size of this array is OS_THREAD_MAX_N */
-
-UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
-
-/** Mutex protecting sync_thread_level_arrays */
-UNIV_INTERN mutex_t sync_thread_mutex;
-#endif /* UNIV_SYNC_DEBUG */
-
-/** Global list of database mutexes (not OS mutexes) created. */
-UNIV_INTERN ut_list_base_node_t mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-UNIV_INTERN mutex_t mutex_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-UNIV_INTERN ibool sync_order_checks_on = FALSE;
-#endif /* UNIV_SYNC_DEBUG */
-
-/** Mutexes or rw-locks held by a thread */
-struct sync_thread_struct{
- os_thread_id_t id; /*!< OS thread id */
- sync_level_t* levels; /*!< level array for this thread; if
- this is NULL this slot is unused */
-};
-
-/** Number of slots reserved for each OS thread in the sync level array */
-#define SYNC_THREAD_N_LEVELS 10000
-
-/** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_struct{
- void* latch; /*!< pointer to a mutex or an rw-lock; NULL means that
- the slot is empty */
- ulint level; /*!< level of the latch in the latching order */
-};
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
- mutex_t* mutex, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
- const char* cmutex_name, /*!< in: mutex name */
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
- mutex_reset_lock_word(mutex);
-#else
- os_fast_mutex_init(&(mutex->os_fast_mutex));
- mutex->lock_word = 0;
-#endif
- mutex->event = os_event_create(NULL);
- mutex_set_waiters(mutex, 0);
-#ifdef UNIV_DEBUG
- mutex->magic_n = MUTEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
- mutex->line = 0;
- mutex->file_name = "not yet reserved";
- mutex->level = level;
-#endif /* UNIV_SYNC_DEBUG */
- mutex->cfile_name = cfile_name;
- mutex->cline = cline;
- mutex->count_os_wait = 0;
-#ifdef UNIV_DEBUG
- mutex->cmutex_name= cmutex_name;
- mutex->count_using= 0;
- mutex->mutex_type= 0;
- mutex->lspent_time= 0;
- mutex->lmax_spent_time= 0;
- mutex->count_spin_loop= 0;
- mutex->count_spin_rounds= 0;
- mutex->count_os_yield= 0;
-#endif /* UNIV_DEBUG */
-
- /* Check that lock_word is aligned; this is important on Intel */
- ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
-
- /* NOTE! The very first mutexes are not put to the mutex list */
-
- if ((mutex == &mutex_list_mutex)
-#ifdef UNIV_SYNC_DEBUG
- || (mutex == &sync_thread_mutex)
-#endif /* UNIV_SYNC_DEBUG */
- ) {
-
- return;
- }
-
- mutex_enter(&mutex_list_mutex);
-
- ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
- || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
-
- UT_LIST_ADD_FIRST(list, mutex_list, mutex);
-
- mutex_exit(&mutex_list_mutex);
-}
-
-/******************************************************************//**
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free(
-/*=======*/
- mutex_t* mutex) /*!< in: mutex */
-{
- ut_ad(mutex_validate(mutex));
- ut_a(mutex_get_lock_word(mutex) == 0);
- ut_a(mutex_get_waiters(mutex) == 0);
-
- if (mutex != &mutex_list_mutex
-#ifdef UNIV_SYNC_DEBUG
- && mutex != &sync_thread_mutex
-#endif /* UNIV_SYNC_DEBUG */
- ) {
-
- mutex_enter(&mutex_list_mutex);
-
- ut_ad(!UT_LIST_GET_PREV(list, mutex)
- || UT_LIST_GET_PREV(list, mutex)->magic_n
- == MUTEX_MAGIC_N);
- ut_ad(!UT_LIST_GET_NEXT(list, mutex)
- || UT_LIST_GET_NEXT(list, mutex)->magic_n
- == MUTEX_MAGIC_N);
-
- UT_LIST_REMOVE(list, mutex_list, mutex);
-
- mutex_exit(&mutex_list_mutex);
- }
-
- os_event_free(mutex->event);
-
-#if !defined(HAVE_ATOMIC_BUILTINS)
- os_fast_mutex_free(&(mutex->os_fast_mutex));
-#endif
- /* If we free the mutex protecting the mutex list (freeing is
- not necessary), we have to reset the magic number AFTER removing
- it from the list. */
-#ifdef UNIV_DEBUG
- mutex->magic_n = 0;
-#endif /* UNIV_DEBUG */
-}
-
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return 0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name __attribute__((unused)),
- /*!< in: file name where mutex
- requested */
- ulint line __attribute__((unused)))
- /*!< in: line where requested */
-{
- ut_ad(mutex_validate(mutex));
-
- if (!mutex_test_and_set(mutex)) {
-
- ut_d(mutex->thread_id = os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
-
- return(0); /* Succeeded! */
- }
-
- return(1);
-}
-
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
- const mutex_t* mutex) /*!< in: mutex */
-{
- ut_a(mutex);
- ut_a(mutex->magic_n == MUTEX_MAGIC_N);
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only in the debug
-version.
-@return TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
- const mutex_t* mutex) /*!< in: mutex */
-{
- ut_ad(mutex_validate(mutex));
-
- return(mutex_get_lock_word(mutex) == 1
- && os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
-}
-#endif /* UNIV_DEBUG */
-
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
- mutex_t* mutex, /*!< in: mutex */
- ulint n) /*!< in: value to set */
-{
- volatile ulint* ptr; /* declared volatile to ensure that
- the value is stored to memory */
- ut_ad(mutex);
-
- ptr = &(mutex->waiters);
-
- *ptr = n; /* Here we assume that the write of a single
- word in memory is atomic */
-}
-
-/******************************************************************//**
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the mutex before suspending the thread. */
-UNIV_INTERN
-void
-mutex_spin_wait(
-/*============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line) /*!< in: line where requested */
-{
- ulint index; /* index of the reserved wait cell */
- ulint i; /* spin round count */
-#ifdef UNIV_DEBUG
- ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
- ulint ltime_diff;
- ulint sec;
- ulint ms;
- uint timer_started = 0;
-#endif /* UNIV_DEBUG */
- ut_ad(mutex);
-
- /* This update is not thread safe, but we don't mind if the count
- isn't exact. Moved out of ifdef that follows because we are willing
- to sacrifice the cost of counting this as the data is valuable.
- Count the number of calls to mutex_spin_wait. */
- mutex_spin_wait_count++;
-
-mutex_loop:
-
- i = 0;
-
- /* Spin waiting for the lock word to become zero. Note that we do
- not have to assume that the read access to the lock word is atomic,
- as the actual locking is always committed with atomic test-and-set.
- In reality, however, all processors probably have an atomic read of
- a memory word. */
-
-spin_loop:
- ut_d(mutex->count_spin_loop++);
-
- while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
-
- i++;
- }
-
- if (i == SYNC_SPIN_ROUNDS) {
-#ifdef UNIV_DEBUG
- mutex->count_os_yield++;
-#ifndef UNIV_HOTBACKUP
- if (timed_mutexes && timer_started == 0) {
- ut_usectime(&sec, &ms);
- lstart_time= (ib_int64_t)sec * 1000000 + ms;
- timer_started = 1;
- }
-#endif /* UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
- os_thread_yield();
- }
-
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr,
- "Thread %lu spin wait mutex at %p"
- " cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
- mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
-#endif
-
- mutex_spin_round_count += i;
-
- ut_d(mutex->count_spin_rounds += i);
-
- if (mutex_test_and_set(mutex) == 0) {
- /* Succeeded! */
-
- ut_d(mutex->thread_id = os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
-
- goto finish_timing;
- }
-
- /* We may end up with a situation where lock_word is 0 but the OS
- fast mutex is still reserved. On FreeBSD the OS does not seem to
- schedule a thread which is constantly calling pthread_mutex_trylock
- (in mutex_test_and_set implementation). Then we could end up
- spinning here indefinitely. The following 'i++' stops this infinite
- spin. */
-
- i++;
-
- if (i < SYNC_SPIN_ROUNDS) {
- goto spin_loop;
- }
-
- sync_array_reserve_cell(sync_primary_wait_array, mutex,
- SYNC_MUTEX, file_name, line, &index);
-
- /* The memory order of the array reservation and the change in the
- waiters field is important: when we suspend a thread, we first
- reserve the cell and then set waiters field to 1. When threads are
- released in mutex_exit, the waiters field is first set to zero and
- then the event is set to the signaled state. */
-
- mutex_set_waiters(mutex, 1);
-
- /* Try to reserve still a few times */
- for (i = 0; i < 4; i++) {
- if (mutex_test_and_set(mutex) == 0) {
- /* Succeeded! Free the reserved wait cell */
-
- sync_array_free_cell(sync_primary_wait_array, index);
-
- ut_d(mutex->thread_id = os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
-
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
- " mutex at %p\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()),
- (void*) mutex);
-#endif
-
- goto finish_timing;
-
- /* Note that in this case we leave the waiters field
- set to 1. We cannot reset it to zero, as we do not
- know if there are other waiters. */
- }
- }
-
- /* Now we know that there has been some thread holding the mutex
- after the change in the wait array and the waiters field was made.
- Now there is no risk of infinite wait on the event. */
-
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr,
- "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
- mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
-#endif
-
- mutex_os_wait_count++;
-
- mutex->count_os_wait++;
-#ifdef UNIV_DEBUG
- /* !!!!! Sometimes os_wait can be called without os_thread_yield */
-#ifndef UNIV_HOTBACKUP
- if (timed_mutexes == 1 && timer_started == 0) {
- ut_usectime(&sec, &ms);
- lstart_time= (ib_int64_t)sec * 1000000 + ms;
- timer_started = 1;
- }
-#endif /* UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
-
- sync_array_wait_event(sync_primary_wait_array, index);
- goto mutex_loop;
-
-finish_timing:
-#ifdef UNIV_DEBUG
- if (timed_mutexes == 1 && timer_started==1) {
- ut_usectime(&sec, &ms);
- lfinish_time= (ib_int64_t)sec * 1000000 + ms;
-
- ltime_diff= (ulint) (lfinish_time - lstart_time);
- mutex->lspent_time += ltime_diff;
-
- if (mutex->lmax_spent_time < ltime_diff) {
- mutex->lmax_spent_time= ltime_diff;
- }
- }
-#endif /* UNIV_DEBUG */
- return;
-}
-
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
- mutex_t* mutex) /*!< in: mutex */
-{
- mutex_set_waiters(mutex, 0);
-
- /* The memory order of resetting the waiters field and
- signaling the object is important. See LEMMA 1 above. */
- os_event_set(mutex->event);
- sync_array_object_signalled(sync_primary_wait_array);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
- mutex_t* mutex, /*!< in: mutex */
- const char* file_name, /*!< in: file where requested */
- ulint line) /*!< in: line where requested */
-{
- ut_ad(mutex);
- ut_ad(file_name);
-
- sync_thread_add_level(mutex, mutex->level);
-
- mutex->file_name = file_name;
- mutex->line = line;
-}
-
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_get_debug_info(
-/*=================*/
- mutex_t* mutex, /*!< in: mutex */
- const char** file_name, /*!< out: file where requested */
- ulint* line, /*!< out: line where requested */
- os_thread_id_t* thread_id) /*!< out: id of the thread which owns
- the mutex */
-{
- ut_ad(mutex);
-
- *file_name = mutex->file_name;
- *line = mutex->line;
- *thread_id = mutex->thread_id;
-}
-
-/******************************************************************//**
-Prints debug info of currently reserved mutexes. */
-static
-void
-mutex_list_print_info(
-/*==================*/
- FILE* file) /*!< in: file where to print */
-{
- mutex_t* mutex;
- const char* file_name;
- ulint line;
- os_thread_id_t thread_id;
- ulint count = 0;
-
- fputs("----------\n"
- "MUTEX INFO\n"
- "----------\n", file);
-
- mutex_enter(&mutex_list_mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex != NULL) {
- count++;
-
- if (mutex_get_lock_word(mutex) != 0) {
- mutex_get_debug_info(mutex, &file_name, &line,
- &thread_id);
- fprintf(file,
- "Locked mutex: addr %p thread %ld"
- " file %s line %ld\n",
- (void*) mutex, os_thread_pf(thread_id),
- file_name, line);
- }
-
- mutex = UT_LIST_GET_NEXT(list, mutex);
- }
-
- fprintf(file, "Total number of mutexes %ld\n", count);
-
- mutex_exit(&mutex_list_mutex);
-}
-
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void)
-/*==================*/
-{
- mutex_t* mutex;
- ulint count = 0;
-
- mutex_enter(&mutex_list_mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex != NULL) {
- if (mutex_get_lock_word(mutex) != 0) {
-
- count++;
- }
-
- mutex = UT_LIST_GET_NEXT(list, mutex);
- }
-
- mutex_exit(&mutex_list_mutex);
-
- ut_a(count >= 1);
-
- return(count - 1); /* Subtract one, because this function itself
- was holding one mutex (mutex_list_mutex) */
-}
-
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked. Works only in
-the debug version.
-@return TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void)
-/*================*/
-{
- return(mutex_n_reserved() + rw_lock_n_locked() == 0);
-}
-
-/******************************************************************//**
-Gets the value in the nth slot in the thread level arrays.
-@return pointer to thread slot */
-static
-sync_thread_t*
-sync_thread_level_arrays_get_nth(
-/*=============================*/
- ulint n) /*!< in: slot number */
-{
- ut_ad(n < OS_THREAD_MAX_N);
-
- return(sync_thread_level_arrays + n);
-}
-
-/******************************************************************//**
-Looks for the thread slot for the calling thread.
-@return pointer to thread slot, NULL if not found */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_slot(void)
-/*====================================*/
-
-{
- sync_thread_t* slot;
- os_thread_id_t id;
- ulint i;
-
- id = os_thread_get_curr_id();
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = sync_thread_level_arrays_get_nth(i);
-
- if (slot->levels && os_thread_eq(slot->id, id)) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/******************************************************************//**
-Looks for an unused thread slot.
-@return pointer to thread slot */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_free(void)
-/*====================================*/
-
-{
- sync_thread_t* slot;
- ulint i;
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = sync_thread_level_arrays_get_nth(i);
-
- if (slot->levels == NULL) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/******************************************************************//**
-Gets the value in the nth slot in the thread level array.
-@return pointer to level slot */
-static
-sync_level_t*
-sync_thread_levels_get_nth(
-/*=======================*/
- sync_level_t* arr, /*!< in: pointer to level array for an OS
- thread */
- ulint n) /*!< in: slot number */
-{
- ut_ad(n < SYNC_THREAD_N_LEVELS);
-
- return(arr + n);
-}
-
-/******************************************************************//**
-Checks if all the level values stored in the level array are greater than
-the given limit.
-@return TRUE if all greater */
-static
-ibool
-sync_thread_levels_g(
-/*=================*/
- sync_level_t* arr, /*!< in: pointer to level array for an OS
- thread */
- ulint limit, /*!< in: level limit */
- ulint warn) /*!< in: TRUE=display a diagnostic message */
-{
- sync_level_t* slot;
- rw_lock_t* lock;
- mutex_t* mutex;
- ulint i;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(arr, i);
-
- if (slot->latch != NULL) {
- if (slot->level <= limit) {
-
- if (!warn) {
-
- return(FALSE);
- }
-
- lock = slot->latch;
- mutex = slot->latch;
-
- fprintf(stderr,
- "InnoDB: sync levels should be"
- " > %lu but a level is %lu\n",
- (ulong) limit, (ulong) slot->level);
-
- if (mutex->magic_n == MUTEX_MAGIC_N) {
- fprintf(stderr,
- "Mutex created at %s %lu\n",
- mutex->cfile_name,
- (ulong) mutex->cline);
-
- if (mutex_get_lock_word(mutex) != 0) {
- const char* file_name;
- ulint line;
- os_thread_id_t thread_id;
-
- mutex_get_debug_info(
- mutex, &file_name,
- &line, &thread_id);
-
- fprintf(stderr,
- "InnoDB: Locked mutex:"
- " addr %p thread %ld"
- " file %s line %ld\n",
- (void*) mutex,
- os_thread_pf(
- thread_id),
- file_name,
- (ulong) line);
- } else {
- fputs("Not locked\n", stderr);
- }
- } else {
- rw_lock_print(lock);
- }
-
- return(FALSE);
- }
- }
- }
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Checks if the level value is stored in the level array.
-@return TRUE if stored */
-static
-ibool
-sync_thread_levels_contain(
-/*=======================*/
- sync_level_t* arr, /*!< in: pointer to level array for an OS
- thread */
- ulint level) /*!< in: level */
-{
- sync_level_t* slot;
- ulint i;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(arr, i);
-
- if (slot->latch != NULL) {
- if (slot->level == level) {
-
- return(TRUE);
- }
- }
- }
-
- return(FALSE);
-}
-
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return TRUE if empty except the exceptions specified below */
-UNIV_INTERN
-ibool
-sync_thread_levels_empty_gen(
-/*=========================*/
- ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
- allowed to be owned by the thread,
- also purge_is_running mutex is
- allowed */
-{
- sync_level_t* arr;
- sync_thread_t* thread_slot;
- sync_level_t* slot;
- ulint i;
-
- if (!sync_order_checks_on) {
-
- return(TRUE);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
- }
-
- arr = thread_slot->levels;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(arr, i);
-
- if (slot->latch != NULL
- && (!dict_mutex_allowed
- || (slot->level != SYNC_DICT
- && slot->level != SYNC_DICT_OPERATION))) {
-
- mutex_exit(&sync_thread_mutex);
- ut_error;
-
- return(FALSE);
- }
- }
-
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return TRUE if empty */
-UNIV_INTERN
-ibool
-sync_thread_levels_empty(void)
-/*==========================*/
-{
- return(sync_thread_levels_empty_gen(FALSE));
-}
-
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
- void* latch, /*!< in: pointer to a mutex or an rw-lock */
- ulint level) /*!< in: level in the latching order; if
- SYNC_LEVEL_VARYING, nothing is done */
-{
- sync_level_t* array;
- sync_level_t* slot;
- sync_thread_t* thread_slot;
- ulint i;
-
- if (!sync_order_checks_on) {
-
- return;
- }
-
- if ((latch == (void*)&sync_thread_mutex)
- || (latch == (void*)&mutex_list_mutex)
- || (latch == (void*)&rw_lock_debug_mutex)
- || (latch == (void*)&rw_lock_list_mutex)) {
-
- return;
- }
-
- if (level == SYNC_LEVEL_VARYING) {
-
- return;
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
- /* We have to allocate the level array for a new thread */
- array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
-
- thread_slot = sync_thread_level_arrays_find_free();
-
- thread_slot->id = os_thread_get_curr_id();
- thread_slot->levels = array;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(array, i);
-
- slot->latch = NULL;
- }
- }
-
- array = thread_slot->levels;
-
- /* NOTE that there is a problem with _NODE and _LEAF levels: if the
- B-tree height changes, then a leaf can change to an internal node
- or the other way around. We do not know at present if this can cause
- unnecessary assertion failures below. */
-
- switch (level) {
- case SYNC_NO_ORDER_CHECK:
- case SYNC_EXTERN_STORAGE:
- case SYNC_TREE_NODE_FROM_HASH:
- /* Do no order checking */
- break;
- case SYNC_MEM_POOL:
- case SYNC_MEM_HASH:
- case SYNC_RECV:
- case SYNC_WORK_QUEUE:
- case SYNC_LOG:
- case SYNC_THR_LOCAL:
- case SYNC_ANY_LATCH:
- case SYNC_TRX_SYS_HEADER:
- case SYNC_FILE_FORMAT_TAG:
- case SYNC_DOUBLEWRITE:
- case SYNC_BUF_POOL:
- case SYNC_SEARCH_SYS:
- case SYNC_SEARCH_SYS_CONF:
- case SYNC_TRX_LOCK_HEAP:
- case SYNC_KERNEL:
- case SYNC_IBUF_BITMAP_MUTEX:
- case SYNC_RSEG:
- case SYNC_TRX_UNDO:
- case SYNC_PURGE_LATCH:
- case SYNC_PURGE_SYS:
- case SYNC_DICT_AUTOINC_MUTEX:
- case SYNC_DICT_OPERATION:
- case SYNC_DICT_HEADER:
- case SYNC_TRX_I_S_RWLOCK:
- case SYNC_TRX_I_S_LAST_READ:
- if (!sync_thread_levels_g(array, level, TRUE)) {
- fprintf(stderr,
- "InnoDB: sync_thread_levels_g(array, %lu)"
- " does not hold!\n", level);
- ut_error;
- }
- break;
- case SYNC_BUF_BLOCK:
- /* Either the thread must own the buffer pool mutex
- (buf_pool_mutex), or it is allowed to latch only ONE
- buffer block (block->mutex or buf_pool_zip_mutex). */
- if (!sync_thread_levels_g(array, level, FALSE)) {
- ut_a(sync_thread_levels_g(array, level - 1, TRUE));
- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
- }
- break;
- case SYNC_REC_LOCK:
- if (sync_thread_levels_contain(array, SYNC_KERNEL)) {
- ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1,
- TRUE));
- } else {
- ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE));
- }
- break;
- case SYNC_IBUF_BITMAP:
- /* Either the thread must own the master mutex to all
- the bitmap pages, or it is allowed to latch only ONE
- bitmap page. */
- if (sync_thread_levels_contain(array,
- SYNC_IBUF_BITMAP_MUTEX)) {
- ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1,
- TRUE));
- } else {
- ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP,
- TRUE));
- }
- break;
- case SYNC_FSP_PAGE:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP));
- break;
- case SYNC_FSP:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP)
- || sync_thread_levels_g(array, SYNC_FSP, TRUE));
- break;
- case SYNC_TRX_UNDO_PAGE:
- ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
- || sync_thread_levels_contain(array, SYNC_RSEG)
- || sync_thread_levels_contain(array, SYNC_PURGE_SYS)
- || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE));
- break;
- case SYNC_RSEG_HEADER:
- ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
- break;
- case SYNC_RSEG_HEADER_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
- && sync_thread_levels_contain(array, SYNC_FSP_PAGE));
- break;
- case SYNC_TREE_NODE:
- ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
- || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
- || sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
- break;
- case SYNC_TREE_NODE_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
- || sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- break;
- case SYNC_INDEX_TREE:
- if (sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
- && sync_thread_levels_contain(array, SYNC_FSP)) {
- ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1,
- TRUE));
- } else {
- ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1,
- TRUE));
- }
- break;
- case SYNC_IBUF_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1, TRUE));
- break;
- case SYNC_IBUF_PESS_INSERT_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
- ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- break;
- case SYNC_IBUF_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
- ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- ut_a(!sync_thread_levels_contain(array,
- SYNC_IBUF_PESS_INSERT_MUTEX));
- break;
- case SYNC_DICT:
-#ifdef UNIV_DEBUG
- ut_a(buf_debug_prints
- || sync_thread_levels_g(array, SYNC_DICT, TRUE));
-#else /* UNIV_DEBUG */
- ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE));
-#endif /* UNIV_DEBUG */
- break;
- default:
- ut_error;
- }
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(array, i);
-
- if (slot->latch == NULL) {
- slot->latch = latch;
- slot->level = level;
-
- break;
- }
- }
-
- ut_a(i < SYNC_THREAD_N_LEVELS);
-
- mutex_exit(&sync_thread_mutex);
-}
-
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
- void* latch) /*!< in: pointer to a mutex or an rw-lock */
-{
- sync_level_t* array;
- sync_level_t* slot;
- sync_thread_t* thread_slot;
- ulint i;
-
- if (!sync_order_checks_on) {
-
- return(FALSE);
- }
-
- if ((latch == (void*)&sync_thread_mutex)
- || (latch == (void*)&mutex_list_mutex)
- || (latch == (void*)&rw_lock_debug_mutex)
- || (latch == (void*)&rw_lock_list_mutex)) {
-
- return(FALSE);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- ut_error;
-
- mutex_exit(&sync_thread_mutex);
- return(FALSE);
- }
-
- array = thread_slot->levels;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(array, i);
-
- if (slot->latch == latch) {
- slot->latch = NULL;
-
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
- }
- }
-
- if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
- rw_lock_t* rw_lock;
-
- rw_lock = (rw_lock_t*) latch;
-
- if (rw_lock->level == SYNC_LEVEL_VARYING) {
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
- }
- }
-
- ut_error;
-
- mutex_exit(&sync_thread_mutex);
-
- return(FALSE);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void)
-/*===========*/
-{
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_t* thread_slot;
- ulint i;
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_a(sync_initialized == FALSE);
-
- sync_initialized = TRUE;
-
- /* Create the primary system wait array which is protected by an OS
- mutex */
-
- sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
- SYNC_ARRAY_OS_MUTEX);
-#ifdef UNIV_SYNC_DEBUG
- /* Create the thread latch level array where the latch levels
- are stored for each OS thread */
-
- sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
- * sizeof(sync_thread_t));
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- thread_slot = sync_thread_level_arrays_get_nth(i);
- thread_slot->levels = NULL;
- }
-#endif /* UNIV_SYNC_DEBUG */
- /* Init the mutex list and create the mutex to protect it. */
-
- UT_LIST_INIT(mutex_list);
- mutex_create(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
-#ifdef UNIV_SYNC_DEBUG
- mutex_create(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Init the rw-lock list and create the mutex to protect it. */
-
- UT_LIST_INIT(rw_lock_list);
- mutex_create(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
-
-#ifdef UNIV_SYNC_DEBUG
- mutex_create(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
-
- rw_lock_debug_event = os_event_create(NULL);
- rw_lock_debug_waiters = FALSE;
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-/******************************************************************//**
-Frees the resources in InnoDB's own synchronization data structures. Use
-os_sync_free() after calling this. */
-UNIV_INTERN
-void
-sync_close(void)
-/*===========*/
-{
- mutex_t* mutex;
-
- sync_array_free(sync_primary_wait_array);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex) {
- mutex_free(mutex);
- mutex = UT_LIST_GET_FIRST(mutex_list);
- }
-
- mutex_free(&mutex_list_mutex);
-#ifdef UNIV_SYNC_DEBUG
- mutex_free(&sync_thread_mutex);
-
- /* Switch latching order checks on in sync0sync.c */
- sync_order_checks_on = FALSE;
-#endif /* UNIV_SYNC_DEBUG */
-
- sync_initialized = FALSE;
-}
-
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
- FILE* file) /*!< in: file where to print */
-{
-#ifdef UNIV_SYNC_DEBUG
- fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
- mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
-#endif
-
- fprintf(file,
- "Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
- "RW-shared spins %llu, OS waits %llu;"
- " RW-excl spins %llu, OS waits %llu\n",
- mutex_spin_wait_count,
- mutex_spin_round_count,
- mutex_os_wait_count,
- rw_s_spin_wait_count,
- rw_s_os_wait_count,
- rw_x_spin_wait_count,
- rw_x_os_wait_count);
-
- fprintf(file,
- "Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
- "%.2f RW-excl\n",
- (double) mutex_spin_round_count /
- (mutex_spin_wait_count ? mutex_spin_wait_count : 1),
- (double) rw_s_spin_round_count /
- (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
- (double) rw_x_spin_round_count /
- (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
-}
-
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
-void
-sync_print(
-/*=======*/
- FILE* file) /*!< in: file where to print */
-{
-#ifdef UNIV_SYNC_DEBUG
- mutex_list_print_info(file);
-
- rw_lock_list_print_info(file);
-#endif /* UNIV_SYNC_DEBUG */
-
- sync_array_print_info(file, sync_primary_wait_array);
-
- sync_print_wait_info(file);
-}
diff --git a/storage/innodb_plugin/thr/thr0loc.c b/storage/innodb_plugin/thr/thr0loc.c
deleted file mode 100644
index 59a234a6b72..00000000000
--- a/storage/innodb_plugin/thr/thr0loc.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file thr/thr0loc.c
-The thread local storage
-
-Created 10/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "thr0loc.h"
-#ifdef UNIV_NONINL
-#include "thr0loc.ic"
-#endif
-
-#include "sync0sync.h"
-#include "hash0hash.h"
-#include "mem0mem.h"
-#include "srv0srv.h"
-
-/*
- IMPLEMENTATION OF THREAD LOCAL STORAGE
- ======================================
-
-The threads sometimes need private data which depends on the thread id.
-This is implemented as a hash table, where the hash value is calculated
-from the thread id, to prepare for a large number of threads. The hash table
-is protected by a mutex. If you need modify the program and put new data to
-the thread local storage, just add it to struct thr_local_struct in the
-header file. */
-
-/** Mutex protecting thr_local_hash */
-static mutex_t thr_local_mutex;
-
-/** The hash table. The module is not yet initialized when it is NULL. */
-static hash_table_t* thr_local_hash = NULL;
-
-/** Thread local data */
-typedef struct thr_local_struct thr_local_t;
-
-/** @brief Thread local data.
-The private data for each thread should be put to
-the structure below and the accessor functions written
-for the field. */
-struct thr_local_struct{
- os_thread_id_t id; /*!< id of the thread which owns this struct */
- os_thread_t handle; /*!< operating system handle to the thread */
- ulint slot_no;/*!< the index of the slot in the thread table
- for this thread */
- ibool in_ibuf;/*!< TRUE if the thread is doing an ibuf
- operation */
- hash_node_t hash; /*!< hash chain node */
- ulint magic_n;/*!< magic number (THR_LOCAL_MAGIC_N) */
-};
-
-/** The value of thr_local_struct::magic_n */
-#define THR_LOCAL_MAGIC_N 1231234
-
-/*******************************************************************//**
-Returns the local storage struct for a thread.
-@return local storage */
-static
-thr_local_t*
-thr_local_get(
-/*==========*/
- os_thread_id_t id) /*!< in: thread id of the thread */
-{
- thr_local_t* local;
-
-try_again:
- ut_ad(thr_local_hash);
- ut_ad(mutex_own(&thr_local_mutex));
-
- /* Look for the local struct in the hash table */
-
- local = NULL;
-
- HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id),
- thr_local_t*, local,, os_thread_eq(local->id, id));
- if (local == NULL) {
- mutex_exit(&thr_local_mutex);
-
- thr_local_create();
-
- mutex_enter(&thr_local_mutex);
-
- goto try_again;
- }
-
- ut_ad(local->magic_n == THR_LOCAL_MAGIC_N);
-
- return(local);
-}
-
-/*******************************************************************//**
-Gets the slot number in the thread table of a thread.
-@return slot number */
-UNIV_INTERN
-ulint
-thr_local_get_slot_no(
-/*==================*/
- os_thread_id_t id) /*!< in: thread id of the thread */
-{
- ulint slot_no;
- thr_local_t* local;
-
- mutex_enter(&thr_local_mutex);
-
- local = thr_local_get(id);
-
- slot_no = local->slot_no;
-
- mutex_exit(&thr_local_mutex);
-
- return(slot_no);
-}
-
-/*******************************************************************//**
-Sets the slot number in the thread table of a thread. */
-UNIV_INTERN
-void
-thr_local_set_slot_no(
-/*==================*/
- os_thread_id_t id, /*!< in: thread id of the thread */
- ulint slot_no)/*!< in: slot number */
-{
- thr_local_t* local;
-
- mutex_enter(&thr_local_mutex);
-
- local = thr_local_get(id);
-
- local->slot_no = slot_no;
-
- mutex_exit(&thr_local_mutex);
-}
-
-/*******************************************************************//**
-Returns pointer to the 'in_ibuf' field within the current thread local
-storage.
-@return pointer to the in_ibuf field */
-UNIV_INTERN
-ibool*
-thr_local_get_in_ibuf_field(void)
-/*=============================*/
-{
- thr_local_t* local;
-
- mutex_enter(&thr_local_mutex);
-
- local = thr_local_get(os_thread_get_curr_id());
-
- mutex_exit(&thr_local_mutex);
-
- return(&(local->in_ibuf));
-}
-
-/*******************************************************************//**
-Creates a local storage struct for the calling new thread. */
-UNIV_INTERN
-void
-thr_local_create(void)
-/*==================*/
-{
- thr_local_t* local;
-
- if (thr_local_hash == NULL) {
- thr_local_init();
- }
-
- local = mem_alloc(sizeof(thr_local_t));
-
- local->id = os_thread_get_curr_id();
- local->handle = os_thread_get_curr();
- local->magic_n = THR_LOCAL_MAGIC_N;
-
- local->in_ibuf = FALSE;
-
- mutex_enter(&thr_local_mutex);
-
- HASH_INSERT(thr_local_t, hash, thr_local_hash,
- os_thread_pf(os_thread_get_curr_id()),
- local);
-
- mutex_exit(&thr_local_mutex);
-}
-
-/*******************************************************************//**
-Frees the local storage struct for the specified thread. */
-UNIV_INTERN
-void
-thr_local_free(
-/*===========*/
- os_thread_id_t id) /*!< in: thread id */
-{
- thr_local_t* local;
-
- mutex_enter(&thr_local_mutex);
-
- /* Look for the local struct in the hash table */
-
- HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id),
- thr_local_t*, local,, os_thread_eq(local->id, id));
- if (local == NULL) {
- mutex_exit(&thr_local_mutex);
-
- return;
- }
-
- HASH_DELETE(thr_local_t, hash, thr_local_hash,
- os_thread_pf(id), local);
-
- mutex_exit(&thr_local_mutex);
-
- ut_a(local->magic_n == THR_LOCAL_MAGIC_N);
-
- mem_free(local);
-}
-
-/****************************************************************//**
-Initializes the thread local storage module. */
-UNIV_INTERN
-void
-thr_local_init(void)
-/*================*/
-{
-
- ut_a(thr_local_hash == NULL);
-
- thr_local_hash = hash_create(OS_THREAD_MAX_N + 100);
-
- mutex_create(&thr_local_mutex, SYNC_THR_LOCAL);
-}
-
-/********************************************************************
-Close the thread local storage module. */
-UNIV_INTERN
-void
-thr_local_close(void)
-/*=================*/
-{
- ulint i;
-
- ut_a(thr_local_hash != NULL);
-
- /* Free the hash elements. We don't remove them from the table
- because we are going to destroy the table anyway. */
- for (i = 0; i < hash_get_n_cells(thr_local_hash); i++) {
- thr_local_t* local;
-
- local = HASH_GET_FIRST(thr_local_hash, i);
-
- while (local) {
- thr_local_t* prev_local = local;
-
- local = HASH_GET_NEXT(hash, prev_local);
- ut_a(prev_local->magic_n == THR_LOCAL_MAGIC_N);
- mem_free(prev_local);
- }
- }
-
- hash_table_free(thr_local_hash);
- thr_local_hash = NULL;
-}
diff --git a/storage/innodb_plugin/trx/trx0purge.c b/storage/innodb_plugin/trx/trx0purge.c
deleted file mode 100644
index abbfa3d7f81..00000000000
--- a/storage/innodb_plugin/trx/trx0purge.c
+++ /dev/null
@@ -1,1211 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0purge.c
-Purge old versions
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0purge.h"
-
-#ifdef UNIV_NONINL
-#include "trx0purge.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "mtr0log.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "read0read.h"
-#include "fut0fut.h"
-#include "que0que.h"
-#include "row0purge.h"
-#include "row0upd.h"
-#include "trx0rec.h"
-#include "srv0que.h"
-#include "os0thread.h"
-
-/** The global data structure coordinating a purge */
-UNIV_INTERN trx_purge_t* purge_sys = NULL;
-
-/** A dummy undo record used as a return value when we have a whole undo log
-which needs no purge */
-UNIV_INTERN trx_undo_rec_t trx_purge_dummy_rec;
-
-/*****************************************************************//**
-Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system.
-@return TRUE if is sure that it is preserved, also if the function
-returns FALSE, it is possible that the undo log still exists in the
-system */
-UNIV_INTERN
-ibool
-trx_purge_update_undo_must_exist(
-/*=============================*/
- trx_id_t trx_id) /*!< in: transaction id */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!read_view_sees_trx_id(purge_sys->view, trx_id)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*=================== PURGE RECORD ARRAY =============================*/
-
-/*******************************************************************//**
-Stores info of an undo log record during a purge.
-@return pointer to the storage cell */
-static
-trx_undo_inf_t*
-trx_purge_arr_store_info(
-/*=====================*/
- trx_id_t trx_no, /*!< in: transaction number */
- undo_no_t undo_no)/*!< in: undo number */
-{
- trx_undo_inf_t* cell;
- trx_undo_arr_t* arr;
- ulint i;
-
- arr = purge_sys->arr;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (!(cell->in_use)) {
- /* Not in use, we may store here */
- cell->undo_no = undo_no;
- cell->trx_no = trx_no;
- cell->in_use = TRUE;
-
- arr->n_used++;
-
- return(cell);
- }
- }
-}
-
-/*******************************************************************//**
-Removes info of an undo log record during a purge. */
-UNIV_INLINE
-void
-trx_purge_arr_remove_info(
-/*======================*/
- trx_undo_inf_t* cell) /*!< in: pointer to the storage cell */
-{
- trx_undo_arr_t* arr;
-
- arr = purge_sys->arr;
-
- cell->in_use = FALSE;
-
- ut_ad(arr->n_used > 0);
-
- arr->n_used--;
-}
-
-/*******************************************************************//**
-Gets the biggest pair of a trx number and an undo number in a purge array. */
-static
-void
-trx_purge_arr_get_biggest(
-/*======================*/
- trx_undo_arr_t* arr, /*!< in: purge array */
- trx_id_t* trx_no, /*!< out: transaction number: ut_dulint_zero
- if array is empty */
- undo_no_t* undo_no)/*!< out: undo number */
-{
- trx_undo_inf_t* cell;
- trx_id_t pair_trx_no;
- undo_no_t pair_undo_no;
- int trx_cmp;
- ulint n_used;
- ulint i;
- ulint n;
-
- n = 0;
- n_used = arr->n_used;
- pair_trx_no = ut_dulint_zero;
- pair_undo_no = ut_dulint_zero;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (cell->in_use) {
- n++;
- trx_cmp = ut_dulint_cmp(cell->trx_no, pair_trx_no);
-
- if ((trx_cmp > 0)
- || ((trx_cmp == 0)
- && (ut_dulint_cmp(cell->undo_no,
- pair_undo_no) >= 0))) {
-
- pair_trx_no = cell->trx_no;
- pair_undo_no = cell->undo_no;
- }
- }
-
- if (n == n_used) {
- *trx_no = pair_trx_no;
- *undo_no = pair_undo_no;
-
- return;
- }
- }
-}
-
-/****************************************************************//**
-Builds a purge 'query' graph. The actual purge is performed by executing
-this query graph.
-@return own: the query graph */
-static
-que_t*
-trx_purge_graph_build(void)
-/*=======================*/
-{
- mem_heap_t* heap;
- que_fork_t* fork;
- que_thr_t* thr;
- /* que_thr_t* thr2; */
-
- heap = mem_heap_create(512);
- fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap);
- fork->trx = purge_sys->trx;
-
- thr = que_thr_create(fork, heap);
-
- thr->child = row_purge_node_create(thr, heap);
-
- /* thr2 = que_thr_create(fork, fork, heap);
-
- thr2->child = row_purge_node_create(fork, thr2, heap); */
-
- return(fork);
-}
-
-/********************************************************************//**
-Creates the global purge system control structure and inits the history
-mutex. */
-UNIV_INTERN
-void
-trx_purge_sys_create(void)
-/*======================*/
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- purge_sys = mem_alloc(sizeof(trx_purge_t));
-
- purge_sys->state = TRX_STOP_PURGE;
-
- purge_sys->n_pages_handled = 0;
-
- purge_sys->purge_trx_no = ut_dulint_zero;
- purge_sys->purge_undo_no = ut_dulint_zero;
- purge_sys->next_stored = FALSE;
-
- rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH);
-
- mutex_create(&purge_sys->mutex, SYNC_PURGE_SYS);
-
- purge_sys->heap = mem_heap_create(256);
-
- purge_sys->arr = trx_undo_arr_create();
-
- purge_sys->sess = sess_open();
-
- purge_sys->trx = purge_sys->sess->trx;
-
- purge_sys->trx->is_purge = 1;
-
- ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED));
-
- purge_sys->query = trx_purge_graph_build();
-
- purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero,
- purge_sys->heap);
-}
-
-/************************************************************************
-Frees the global purge system control structure. */
-UNIV_INTERN
-void
-trx_purge_sys_close(void)
-/*======================*/
-{
- ut_ad(!mutex_own(&kernel_mutex));
-
- que_graph_free(purge_sys->query);
-
- ut_a(purge_sys->sess->trx->is_purge);
- purge_sys->sess->trx->conc_state = TRX_NOT_STARTED;
- sess_close(purge_sys->sess);
- purge_sys->sess = NULL;
-
- if (purge_sys->view != NULL) {
- /* Because acquiring the kernel mutex is a pre-condition
- of read_view_close(). We don't really need it here. */
- mutex_enter(&kernel_mutex);
-
- read_view_close(purge_sys->view);
- purge_sys->view = NULL;
-
- mutex_exit(&kernel_mutex);
- }
-
- trx_undo_arr_free(purge_sys->arr);
-
- rw_lock_free(&purge_sys->latch);
- mutex_free(&purge_sys->mutex);
-
- mem_heap_free(purge_sys->heap);
- mem_free(purge_sys);
-
- purge_sys = NULL;
-}
-
-/*================ UNDO LOG HISTORY LIST =============================*/
-
-/********************************************************************//**
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
-void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
- trx_t* trx, /*!< in: transaction */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_undo_t* undo;
- trx_rseg_t* rseg;
- trx_rsegf_t* rseg_header;
- trx_usegf_t* seg_header;
- trx_ulogf_t* undo_header;
- trx_upagef_t* page_header;
- ulint hist_size;
-
- undo = trx->update_undo;
-
- ut_ad(undo);
-
- rseg = undo->rseg;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, mtr);
-
- undo_header = undo_page + undo->hdr_offset;
- seg_header = undo_page + TRX_UNDO_SEG_HDR;
- page_header = undo_page + TRX_UNDO_PAGE_HDR;
-
- if (undo->state != TRX_UNDO_CACHED) {
- /* The undo log segment will not be reused */
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- ut_error;
- }
-
- trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
-
- hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr);
- ut_ad(undo->size == flst_get_len(
- seg_header + TRX_UNDO_PAGE_LIST, mtr));
-
- mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- hist_size + undo->size, MLOG_4BYTES, mtr);
- }
-
- /* Add the log as the first in the history list */
- flst_add_first(rseg_header + TRX_RSEG_HISTORY,
- undo_header + TRX_UNDO_HISTORY_NODE, mtr);
- mutex_enter(&kernel_mutex);
- trx_sys->rseg_history_len++;
- mutex_exit(&kernel_mutex);
-
- /* Write the trx number to the undo log header */
- mlog_write_dulint(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
- /* Write information about delete markings to the undo log header */
-
- if (!undo->del_marks) {
- mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, mtr);
- }
-
- if (rseg->last_page_no == FIL_NULL) {
-
- rseg->last_page_no = undo->hdr_page_no;
- rseg->last_offset = undo->hdr_offset;
- rseg->last_trx_no = trx->no;
- rseg->last_del_marks = undo->del_marks;
- }
-}
-
-/**********************************************************************//**
-Frees an undo log segment which is in the history list. Cuts the end of the
-history list at the youngest undo log in this segment. */
-static
-void
-trx_purge_free_segment(
-/*===================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- fil_addr_t hdr_addr, /*!< in: the file address of log_hdr */
- ulint n_removed_logs) /*!< in: count of how many undo logs we
- will cut off from the end of the
- history list */
-{
- page_t* undo_page;
- trx_rsegf_t* rseg_hdr;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- ibool freed;
- ulint seg_size;
- ulint hist_size;
- ibool marked = FALSE;
- mtr_t mtr;
-
- /* fputs("Freeing an update undo log segment\n", stderr); */
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
-loop:
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
-
- undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- hdr_addr.page, &mtr);
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- log_hdr = undo_page + hdr_addr.boffset;
-
- /* Mark the last undo log totally purged, so that if the system
- crashes, the tail of the undo log will not get accessed again. The
- list of pages in the undo log tail gets inconsistent during the
- freeing of the segment, and therefore purge should not try to access
- them again. */
-
- if (!marked) {
- mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, &mtr);
- marked = TRUE;
- }
-
- freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER,
- &mtr);
- if (!freed) {
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- goto loop;
- }
-
- /* The page list may now be inconsistent, but the length field
- stored in the list base node tells us how big it was before we
- started the freeing. */
-
- seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);
-
- /* We may free the undo log segment header page; it must be freed
- within the same mtr as the undo log header is removed from the
- history list: otherwise, in case of a database crash, the segment
- could become inaccessible garbage in the file space. */
-
- flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY,
- log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr);
-
- mutex_enter(&kernel_mutex);
- ut_ad(trx_sys->rseg_history_len >= n_removed_logs);
- trx_sys->rseg_history_len -= n_removed_logs;
- mutex_exit(&kernel_mutex);
-
- freed = FALSE;
-
- while (!freed) {
- /* Here we assume that a file segment with just the header
- page can be freed in a few steps, so that the buffer pool
- is not flooded with bufferfixed pages: see the note in
- fsp0fsp.c. */
-
- freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER,
- &mtr);
- }
-
- hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, &mtr);
- ut_ad(hist_size >= seg_size);
-
- mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
- hist_size - seg_size, MLOG_4BYTES, &mtr);
-
- ut_ad(rseg->curr_size >= seg_size);
-
- rseg->curr_size -= seg_size;
-
- mutex_exit(&(rseg->mutex));
-
- mtr_commit(&mtr);
-}
-
-/********************************************************************//**
-Removes unnecessary history data from a rollback segment. */
-static
-void
-trx_purge_truncate_rseg_history(
-/*============================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- trx_id_t limit_trx_no, /*!< in: remove update undo logs whose
- trx number is < limit_trx_no */
- undo_no_t limit_undo_no) /*!< in: if transaction number is equal
- to limit_trx_no, truncate undo records
- with undo number < limit_undo_no */
-{
- fil_addr_t hdr_addr;
- fil_addr_t prev_hdr_addr;
- trx_rsegf_t* rseg_hdr;
- page_t* undo_page;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- int cmp;
- ulint n_removed_logs = 0;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
-
- hdr_addr = trx_purge_get_log_from_hist(
- flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
-loop:
- if (hdr_addr.page == FIL_NULL) {
-
- mutex_exit(&(rseg->mutex));
-
- mtr_commit(&mtr);
-
- return;
- }
-
- undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- hdr_addr.page, &mtr);
-
- log_hdr = undo_page + hdr_addr.boffset;
-
- cmp = ut_dulint_cmp(mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO),
- limit_trx_no);
- if (cmp == 0) {
- trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page,
- hdr_addr.boffset, limit_undo_no);
- }
-
- if (cmp >= 0) {
- mutex_enter(&kernel_mutex);
- ut_a(trx_sys->rseg_history_len >= n_removed_logs);
- trx_sys->rseg_history_len -= n_removed_logs;
- mutex_exit(&kernel_mutex);
-
- flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
- log_hdr + TRX_UNDO_HISTORY_NODE,
- n_removed_logs, &mtr);
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- return;
- }
-
- prev_hdr_addr = trx_purge_get_log_from_hist(
- flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
- n_removed_logs++;
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE)
- && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) {
-
- /* We can free the whole log segment */
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- trx_purge_free_segment(rseg, hdr_addr, n_removed_logs);
-
- n_removed_logs = 0;
- } else {
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
- }
-
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
-
- hdr_addr = prev_hdr_addr;
-
- goto loop;
-}
-
-/********************************************************************//**
-Removes unnecessary history data from rollback segments. NOTE that when this
-function is called, the caller must not have any latches on undo log pages! */
-static
-void
-trx_purge_truncate_history(void)
-/*============================*/
-{
- trx_rseg_t* rseg;
- trx_id_t limit_trx_no;
- undo_no_t limit_undo_no;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
- &limit_undo_no);
-
- if (ut_dulint_is_zero(limit_trx_no)) {
-
- limit_trx_no = purge_sys->purge_trx_no;
- limit_undo_no = purge_sys->purge_undo_no;
- }
-
- /* We play safe and set the truncate limit at most to the purge view
- low_limit number, though this is not necessary */
-
- if (ut_dulint_cmp(limit_trx_no, purge_sys->view->low_limit_no) >= 0) {
- limit_trx_no = purge_sys->view->low_limit_no;
- limit_undo_no = ut_dulint_zero;
- }
-
- ut_ad((ut_dulint_cmp(limit_trx_no,
- purge_sys->view->low_limit_no) <= 0));
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- while (rseg) {
- trx_purge_truncate_rseg_history(rseg, limit_trx_no,
- limit_undo_no);
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
-}
-
-/********************************************************************//**
-Does a truncate if the purge array is empty. NOTE that when this function is
-called, the caller must not have any latches on undo log pages!
-@return TRUE if array empty */
-UNIV_INLINE
-ibool
-trx_purge_truncate_if_arr_empty(void)
-/*=================================*/
-{
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- if (purge_sys->arr->n_used == 0) {
-
- trx_purge_truncate_history();
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***********************************************************************//**
-Updates the last not yet purged history log info in rseg when we have purged
-a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
-static
-void
-trx_purge_rseg_get_next_history_log(
-/*================================*/
- trx_rseg_t* rseg) /*!< in: rollback segment */
-{
- page_t* undo_page;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- fil_addr_t prev_log_addr;
- trx_id_t trx_no;
- ibool del_marks;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- mutex_enter(&(rseg->mutex));
-
- ut_a(rseg->last_page_no != FIL_NULL);
-
- purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1);
- purge_sys->purge_undo_no = ut_dulint_zero;
- purge_sys->next_stored = FALSE;
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- rseg->last_page_no, &mtr);
- log_hdr = undo_page + rseg->last_offset;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- /* Increase the purge page count by one for every handled log */
-
- purge_sys->n_pages_handled++;
-
- prev_log_addr = trx_purge_get_log_from_hist(
- flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
- if (prev_log_addr.page == FIL_NULL) {
- /* No logs left in the history list */
-
- rseg->last_page_no = FIL_NULL;
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- mutex_enter(&kernel_mutex);
-
- /* Add debug code to track history list corruption reported
- on the MySQL mailing list on Nov 9, 2004. The fut0lst.c
- file-based list was corrupt. The prev node pointer was
- FIL_NULL, even though the list length was over 8 million nodes!
- We assume that purge truncates the history list in moderate
- size pieces, and if we here reach the head of the list, the
- list cannot be longer than 20 000 undo logs now. */
-
- if (trx_sys->rseg_history_len > 20000) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: purge reached the"
- " head of the history list,\n"
- "InnoDB: but its length is still"
- " reported as %lu! Make a detailed bug\n"
- "InnoDB: report, and submit it"
- " to http://bugs.mysql.com\n",
- (ulong) trx_sys->rseg_history_len);
- }
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- /* Read the trx number and del marks from the previous log header */
- mtr_start(&mtr);
-
- log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- prev_log_addr.page, &mtr)
- + prev_log_addr.boffset;
-
- trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
-
- del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);
-
- mtr_commit(&mtr);
-
- mutex_enter(&(rseg->mutex));
-
- rseg->last_page_no = prev_log_addr.page;
- rseg->last_offset = prev_log_addr.boffset;
- rseg->last_trx_no = trx_no;
- rseg->last_del_marks = del_marks;
-
- mutex_exit(&(rseg->mutex));
-}
-
-/***********************************************************************//**
-Chooses the next undo log to purge and updates the info in purge_sys. This
-function is used to initialize purge_sys when the next record to purge is
-not known, and also to update the purge system info on the next record when
-purge has handled the whole undo log for a transaction. */
-static
-void
-trx_purge_choose_next_log(void)
-/*===========================*/
-{
- trx_undo_rec_t* rec;
- trx_rseg_t* rseg;
- trx_rseg_t* min_rseg;
- trx_id_t min_trx_no;
- ulint space = 0; /* remove warning (??? bug ???) */
- ulint zip_size = 0;
- ulint page_no = 0; /* remove warning (??? bug ???) */
- ulint offset = 0; /* remove warning (??? bug ???) */
- mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
- ut_ad(purge_sys->next_stored == FALSE);
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- min_trx_no = ut_dulint_max;
-
- min_rseg = NULL;
-
- while (rseg) {
- mutex_enter(&(rseg->mutex));
-
- if (rseg->last_page_no != FIL_NULL) {
-
- if ((min_rseg == NULL)
- || (ut_dulint_cmp(min_trx_no,
- rseg->last_trx_no) > 0)) {
-
- min_rseg = rseg;
- min_trx_no = rseg->last_trx_no;
- space = rseg->space;
- zip_size = rseg->zip_size;
- ut_a(space == 0); /* We assume in purge of
- externally stored fields
- that space id == 0 */
- page_no = rseg->last_page_no;
- offset = rseg->last_offset;
- }
- }
-
- mutex_exit(&(rseg->mutex));
-
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
-
- if (min_rseg == NULL) {
-
- return;
- }
-
- mtr_start(&mtr);
-
- if (!min_rseg->last_del_marks) {
- /* No need to purge this log */
-
- rec = &trx_purge_dummy_rec;
- } else {
- rec = trx_undo_get_first_rec(space, zip_size, page_no, offset,
- RW_S_LATCH, &mtr);
- if (rec == NULL) {
- /* Undo log empty */
-
- rec = &trx_purge_dummy_rec;
- }
- }
-
- purge_sys->next_stored = TRUE;
- purge_sys->rseg = min_rseg;
-
- purge_sys->hdr_page_no = page_no;
- purge_sys->hdr_offset = offset;
-
- purge_sys->purge_trx_no = min_trx_no;
-
- if (rec == &trx_purge_dummy_rec) {
-
- purge_sys->purge_undo_no = ut_dulint_zero;
- purge_sys->page_no = page_no;
- purge_sys->offset = 0;
- } else {
- purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);
-
- purge_sys->page_no = page_get_page_no(page_align(rec));
- purge_sys->offset = page_offset(rec);
- }
-
- mtr_commit(&mtr);
-}
-
-/***********************************************************************//**
-Gets the next record to purge and updates the info in the purge system.
-@return copy of an undo log record or pointer to the dummy undo log record */
-static
-trx_undo_rec_t*
-trx_purge_get_next_rec(
-/*===================*/
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- trx_undo_rec_t* rec;
- trx_undo_rec_t* rec_copy;
- trx_undo_rec_t* rec2;
- trx_undo_rec_t* next_rec;
- page_t* undo_page;
- page_t* page;
- ulint offset;
- ulint page_no;
- ulint space;
- ulint zip_size;
- ulint type;
- ulint cmpl_info;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
- ut_ad(purge_sys->next_stored);
-
- space = purge_sys->rseg->space;
- zip_size = purge_sys->rseg->zip_size;
- page_no = purge_sys->page_no;
- offset = purge_sys->offset;
-
- if (offset == 0) {
- /* It is the dummy undo log record, which means that there is
- no need to purge this undo log */
-
- trx_purge_rseg_get_next_history_log(purge_sys->rseg);
-
- /* Look for the next undo log and record to purge */
-
- trx_purge_choose_next_log();
-
- return(&trx_purge_dummy_rec);
- }
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(space, zip_size,
- page_no, &mtr);
- rec = undo_page + offset;
-
- rec2 = rec;
-
- for (;;) {
- /* Try first to find the next record which requires a purge
- operation from the same page of the same undo log */
-
- next_rec = trx_undo_page_get_next_rec(rec2,
- purge_sys->hdr_page_no,
- purge_sys->hdr_offset);
- if (next_rec == NULL) {
- rec2 = trx_undo_get_next_rec(
- rec2, purge_sys->hdr_page_no,
- purge_sys->hdr_offset, &mtr);
- break;
- }
-
- rec2 = next_rec;
-
- type = trx_undo_rec_get_type(rec2);
-
- if (type == TRX_UNDO_DEL_MARK_REC) {
-
- break;
- }
-
- cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
-
- if (trx_undo_rec_get_extern_storage(rec2)) {
- break;
- }
-
- if ((type == TRX_UNDO_UPD_EXIST_REC)
- && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- break;
- }
- }
-
- if (rec2 == NULL) {
- mtr_commit(&mtr);
-
- trx_purge_rseg_get_next_history_log(purge_sys->rseg);
-
- /* Look for the next undo log and record to purge */
-
- trx_purge_choose_next_log();
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(space, zip_size,
- page_no, &mtr);
-
- rec = undo_page + offset;
- } else {
- page = page_align(rec2);
-
- purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
- purge_sys->page_no = page_get_page_no(page);
- purge_sys->offset = rec2 - page;
-
- if (undo_page != page) {
- /* We advance to a new page of the undo log: */
- purge_sys->n_pages_handled++;
- }
- }
-
- rec_copy = trx_undo_rec_copy(rec, heap);
-
- mtr_commit(&mtr);
-
- return(rec_copy);
-}
-
-/********************************************************************//**
-Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function.
-@return copy of an undo log record or pointer to trx_purge_dummy_rec,
-if the whole undo log can skipped in purge; NULL if none left */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_purge_fetch_next_rec(
-/*=====================*/
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- trx_undo_inf_t** cell, /*!< out: storage cell for the record in the
- purge array */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- trx_undo_rec_t* undo_rec;
-
- mutex_enter(&(purge_sys->mutex));
-
- if (purge_sys->state == TRX_STOP_PURGE) {
- trx_purge_truncate_if_arr_empty();
-
- mutex_exit(&(purge_sys->mutex));
-
- return(NULL);
- }
-
- if (!purge_sys->next_stored) {
- trx_purge_choose_next_log();
-
- if (!purge_sys->next_stored) {
- purge_sys->state = TRX_STOP_PURGE;
-
- trx_purge_truncate_if_arr_empty();
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Purge: No logs left in the"
- " history list; pages handled %lu\n",
- (ulong) purge_sys->n_pages_handled);
- }
-
- mutex_exit(&(purge_sys->mutex));
-
- return(NULL);
- }
- }
-
- if (purge_sys->n_pages_handled >= purge_sys->handle_limit) {
-
- purge_sys->state = TRX_STOP_PURGE;
-
- trx_purge_truncate_if_arr_empty();
-
- mutex_exit(&(purge_sys->mutex));
-
- return(NULL);
- }
-
- if (ut_dulint_cmp(purge_sys->purge_trx_no,
- purge_sys->view->low_limit_no) >= 0) {
- purge_sys->state = TRX_STOP_PURGE;
-
- trx_purge_truncate_if_arr_empty();
-
- mutex_exit(&(purge_sys->mutex));
-
- return(NULL);
- }
-
- /* fprintf(stderr, "Thread %lu purging trx %lu undo record %lu\n",
- os_thread_get_curr_id(),
- ut_dulint_get_low(purge_sys->purge_trx_no),
- ut_dulint_get_low(purge_sys->purge_undo_no)); */
-
- *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id,
- purge_sys->page_no,
- purge_sys->offset);
-
- *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no,
- purge_sys->purge_undo_no);
-
- ut_ad(ut_dulint_cmp(purge_sys->purge_trx_no,
- (purge_sys->view)->low_limit_no) < 0);
-
- /* The following call will advance the stored values of purge_trx_no
- and purge_undo_no, therefore we had to store them first */
-
- undo_rec = trx_purge_get_next_rec(heap);
-
- mutex_exit(&(purge_sys->mutex));
-
- return(undo_rec);
-}
-
-/*******************************************************************//**
-Releases a reserved purge undo record. */
-UNIV_INTERN
-void
-trx_purge_rec_release(
-/*==================*/
- trx_undo_inf_t* cell) /*!< in: storage cell */
-{
- trx_undo_arr_t* arr;
-
- mutex_enter(&(purge_sys->mutex));
-
- arr = purge_sys->arr;
-
- trx_purge_arr_remove_info(cell);
-
- mutex_exit(&(purge_sys->mutex));
-}
-
-/*******************************************************************//**
-This function runs a purge batch.
-@return number of undo log pages handled in the batch */
-UNIV_INTERN
-ulint
-trx_purge(void)
-/*===========*/
-{
- que_thr_t* thr;
- /* que_thr_t* thr2; */
- ulint old_pages_handled;
-
- mutex_enter(&(purge_sys->mutex));
-
- if (purge_sys->trx->n_active_thrs > 0) {
-
- mutex_exit(&(purge_sys->mutex));
-
- /* Should not happen */
-
- ut_error;
-
- return(0);
- }
-
- rw_lock_x_lock(&(purge_sys->latch));
-
- mutex_enter(&kernel_mutex);
-
- /* Close and free the old purge view */
-
- read_view_close(purge_sys->view);
- purge_sys->view = NULL;
- mem_heap_empty(purge_sys->heap);
-
- /* Determine how much data manipulation language (DML) statements
- need to be delayed in order to reduce the lagging of the purge
- thread. */
- srv_dml_needed_delay = 0; /* in microseconds; default: no delay */
-
- /* If we cannot advance the 'purge view' because of an old
- 'consistent read view', then the DML statements cannot be delayed.
- Also, srv_max_purge_lag <= 0 means 'infinity'. */
- if (srv_max_purge_lag > 0
- && !UT_LIST_GET_LAST(trx_sys->view_list)) {
- float ratio = (float) trx_sys->rseg_history_len
- / srv_max_purge_lag;
- if (ratio > ULINT_MAX / 10000) {
- /* Avoid overflow: maximum delay is 4295 seconds */
- srv_dml_needed_delay = ULINT_MAX;
- } else if (ratio > 1) {
- /* If the history list length exceeds the
- innodb_max_purge_lag, the
- data manipulation statements are delayed
- by at least 5000 microseconds. */
- srv_dml_needed_delay = (ulint) ((ratio - .5) * 10000);
- }
- }
-
- purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero,
- purge_sys->heap);
- mutex_exit(&kernel_mutex);
-
- rw_lock_x_unlock(&(purge_sys->latch));
-
- purge_sys->state = TRX_PURGE_ON;
-
- /* Handle at most 20 undo log pages in one purge batch */
-
- purge_sys->handle_limit = purge_sys->n_pages_handled + 20;
-
- old_pages_handled = purge_sys->n_pages_handled;
-
- mutex_exit(&(purge_sys->mutex));
-
- mutex_enter(&kernel_mutex);
-
- thr = que_fork_start_command(purge_sys->query);
-
- ut_ad(thr);
-
- /* thr2 = que_fork_start_command(purge_sys->query);
-
- ut_ad(thr2); */
-
-
- mutex_exit(&kernel_mutex);
-
- /* srv_que_task_enqueue(thr2); */
-
- if (srv_print_thread_releases) {
-
- fputs("Starting purge\n", stderr);
- }
-
- que_run_threads(thr);
-
- if (srv_print_thread_releases) {
-
- fprintf(stderr,
- "Purge ends; pages handled %lu\n",
- (ulong) purge_sys->n_pages_handled);
- }
-
- return(purge_sys->n_pages_handled - old_pages_handled);
-}
-
-/******************************************************************//**
-Prints information of the purge system to stderr. */
-UNIV_INTERN
-void
-trx_purge_sys_print(void)
-/*=====================*/
-{
- fprintf(stderr, "InnoDB: Purge system view:\n");
- read_view_print(purge_sys->view);
-
- fprintf(stderr, "InnoDB: Purge trx n:o " TRX_ID_FMT
- ", undo n:o " TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(purge_sys->purge_trx_no),
- TRX_ID_PREP_PRINTF(purge_sys->purge_undo_no));
- fprintf(stderr,
- "InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n"
- "InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n",
- (ulong) purge_sys->next_stored,
- (ulong) purge_sys->page_no,
- (ulong) purge_sys->offset,
- (ulong) purge_sys->hdr_page_no,
- (ulong) purge_sys->hdr_offset);
-}
diff --git a/storage/innodb_plugin/trx/trx0rec.c b/storage/innodb_plugin/trx/trx0rec.c
deleted file mode 100644
index 5097cf18dcd..00000000000
--- a/storage/innodb_plugin/trx/trx0rec.c
+++ /dev/null
@@ -1,1601 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0rec.c
-Transaction undo log record
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0rec.h"
-
-#ifdef UNIV_NONINL
-#include "trx0rec.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0undo.h"
-#include "mtr0log.h"
-#ifndef UNIV_HOTBACKUP
-#include "dict0dict.h"
-#include "ut0mem.h"
-#include "row0ext.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "trx0purge.h"
-#include "trx0rseg.h"
-#include "row0row.h"
-
-/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
-
-/**********************************************************************//**
-Writes the mtr log entry of the inserted undo log record on the undo log
-page. */
-UNIV_INLINE
-void
-trx_undof_page_add_undo_rec_log(
-/*============================*/
- page_t* undo_page, /*!< in: undo log page */
- ulint old_free, /*!< in: start offset of the inserted entry */
- ulint new_free, /*!< in: end offset of the entry */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- const byte* log_end;
- ulint len;
-
- log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
-
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
- log_ptr = mlog_write_initial_log_record_fast(
- undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
- len = new_free - old_free - 4;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
-
- if (log_ptr + len <= log_end) {
- memcpy(log_ptr, undo_page + old_free + 2, len);
- mlog_close(mtr, log_ptr + len);
- } else {
- mlog_close(mtr, log_ptr);
- mlog_catenate_string(mtr, undo_page + old_free + 2, len);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_add_undo_rec(
-/*========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page) /*!< in: page or NULL */
-{
- ulint len;
- byte* rec;
- ulint first_free;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- len = mach_read_from_2(ptr);
- ptr += 2;
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- }
-
- if (page == NULL) {
-
- return(ptr + len);
- }
-
- first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- rec = page + first_free;
-
- mach_write_to_2(rec, first_free + 4 + len);
- mach_write_to_2(rec + 2 + len, first_free);
-
- mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- first_free + 4 + len);
- ut_memcpy(rec + 2, ptr, len);
-
- return(ptr + len);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Calculates the free space left for extending an undo log record.
-@return bytes left */
-UNIV_INLINE
-ulint
-trx_undo_left(
-/*==========*/
- const page_t* page, /*!< in: undo log page */
- const byte* ptr) /*!< in: pointer to page */
-{
- /* The '- 10' is a safety margin, in case we have some small
- calculation error below */
-
- return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
-}
-
-/**********************************************************************//**
-Set the next and previous pointers in the undo page for the undo record
-that was written to ptr. Update the first free value by the number of bytes
-written for this undo record.
-@return offset of the inserted entry on the page if succeeded, 0 if fail */
-static
-ulint
-trx_undo_page_set_next_prev_and_add(
-/*================================*/
- page_t* undo_page, /*!< in/out: undo log page */
- byte* ptr, /*!< in: ptr up to where data has been
- written on this undo page. */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint first_free; /*!< offset within undo_page */
- ulint end_of_rec; /*!< offset within undo_page */
- byte* ptr_to_first_free;
- /* pointer within undo_page
- that points to the next free
- offset value within undo_page.*/
-
- ut_ad(ptr > undo_page);
- ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
-
- if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
-
- return(0);
- }
-
- ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
-
- first_free = mach_read_from_2(ptr_to_first_free);
-
- /* Write offset of the previous undo log record */
- mach_write_to_2(ptr, first_free);
- ptr += 2;
-
- end_of_rec = ptr - undo_page;
-
- /* Write offset of the next undo log record */
- mach_write_to_2(undo_page + first_free, end_of_rec);
-
- /* Update the offset to first free undo record */
- mach_write_to_2(ptr_to_first_free, end_of_rec);
-
- /* Write this log entry to the UNDO log */
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
- end_of_rec, mtr);
-
- return(first_free);
-}
-
-/**********************************************************************//**
-Reports in the undo log of an insert of a clustered index record.
-@return offset of the inserted entry on the page if succeed, 0 if fail */
-static
-ulint
-trx_undo_page_report_insert(
-/*========================*/
- page_t* undo_page, /*!< in: undo log page */
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* clust_entry, /*!< in: index entry which will be
- inserted to the clustered index */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint first_free;
- byte* ptr;
- ulint i;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- ptr = undo_page + first_free;
-
- ut_ad(first_free <= UNIV_PAGE_SIZE);
-
- if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
-
- /* Not enough space for writing the general parameters */
-
- return(0);
- }
-
- /* Reserve 2 bytes for the pointer to the next undo log record */
- ptr += 2;
-
- /* Store first some general parameters to the undo log */
- *ptr++ = TRX_UNDO_INSERT_REC;
- ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
- ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
- /*----------------------------------------*/
- /* Store then the fields required to uniquely determine the record
- to be inserted in the clustered index */
-
- for (i = 0; i < dict_index_get_n_unique(index); i++) {
-
- const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
- ulint flen = dfield_get_len(field);
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- ptr += mach_write_compressed(ptr, flen);
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, dfield_get_data(field), flen);
- ptr += flen;
- }
- }
-
- return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
-}
-
-/**********************************************************************//**
-Reads from an undo log record the general parameters.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_pars(
-/*==================*/
- trx_undo_rec_t* undo_rec, /*!< in: undo log record */
- ulint* type, /*!< out: undo record type:
- TRX_UNDO_INSERT_REC, ... */
- ulint* cmpl_info, /*!< out: compiler info, relevant only
- for update type records */
- ibool* updated_extern, /*!< out: TRUE if we updated an
- externally stored fild */
- undo_no_t* undo_no, /*!< out: undo log record number */
- dulint* table_id) /*!< out: table id */
-{
- byte* ptr;
- ulint type_cmpl;
-
- ptr = undo_rec + 2;
-
- type_cmpl = mach_read_from_1(ptr);
- ptr++;
-
- if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
- *updated_extern = TRUE;
- type_cmpl -= TRX_UNDO_UPD_EXTERN;
- } else {
- *updated_extern = FALSE;
- }
-
- *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
- *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
-
- *undo_no = mach_dulint_read_much_compressed(ptr);
- ptr += mach_dulint_get_much_compressed_size(*undo_no);
-
- *table_id = mach_dulint_read_much_compressed(ptr);
- ptr += mach_dulint_get_much_compressed_size(*table_id);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an undo log record a stored column value.
-@return remaining part of undo log record after reading these values */
-static
-byte*
-trx_undo_rec_get_col_val(
-/*=====================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- byte** field, /*!< out: pointer to stored field */
- ulint* len, /*!< out: length of the field, or UNIV_SQL_NULL */
- ulint* orig_len)/*!< out: original length of the locally
- stored part of an externally stored column, or 0 */
-{
- *len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*len);
-
- *orig_len = 0;
-
- switch (*len) {
- case UNIV_SQL_NULL:
- *field = NULL;
- break;
- case UNIV_EXTERN_STORAGE_FIELD:
- *orig_len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*orig_len);
- *len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*len);
- *field = ptr;
- ptr += *len;
-
- ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
- ut_ad(*len > *orig_len);
- ut_ad(*len >= REC_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE);
-
- *len += UNIV_EXTERN_STORAGE_FIELD;
- break;
- default:
- *field = ptr;
- if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
- ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
- } else {
- ptr += *len;
- }
- }
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Builds a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_row_ref(
-/*=====================*/
- byte* ptr, /*!< in: remaining part of a copy of an undo log
- record, at the start of the row reference;
- NOTE that this copy of the undo log record must
- be preserved as long as the row reference is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t** ref, /*!< out, own: row reference */
- mem_heap_t* heap) /*!< in: memory heap from which the memory
- needed is allocated */
-{
- ulint ref_len;
- ulint i;
-
- ut_ad(index && ptr && ref && heap);
- ut_a(dict_index_is_clust(index));
-
- ref_len = dict_index_get_n_unique(index);
-
- *ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(*ref, index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield_t* dfield;
- byte* field;
- ulint len;
- ulint orig_len;
-
- dfield = dtuple_get_nth_field(*ref, i);
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
-
- dfield_set_data(dfield, field, len);
- }
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Skips a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_skip_row_ref(
-/*======================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, at the start of the row reference */
- dict_index_t* index) /*!< in: clustered index */
-{
- ulint ref_len;
- ulint i;
-
- ut_ad(index && ptr);
- ut_a(dict_index_is_clust(index));
-
- ref_len = dict_index_get_n_unique(index);
-
- for (i = 0; i < ref_len; i++) {
- byte* field;
- ulint len;
- ulint orig_len;
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Fetch a prefix of an externally stored column, for writing to the undo log
-of an update or delete marking of a clustered index record.
-@return ext_buf */
-static
-byte*
-trx_undo_page_fetch_ext(
-/*====================*/
- byte* ext_buf, /*!< in: a buffer of
- REC_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE */
- ulint zip_size, /*!< compressed page size in bytes,
- or 0 for uncompressed BLOB */
- const byte* field, /*!< in: an externally stored column */
- ulint* len) /*!< in: length of field;
- out: used length of ext_buf */
-{
- /* Fetch the BLOB. */
- ulint ext_len = btr_copy_externally_stored_field_prefix(
- ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
- /* BLOBs should always be nonempty. */
- ut_a(ext_len);
- /* Append the BLOB pointer to the prefix. */
- memcpy(ext_buf + ext_len,
- field + *len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- *len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
- return(ext_buf);
-}
-
-/**********************************************************************//**
-Writes to the undo log a prefix of an externally stored column.
-@return undo log position */
-static
-byte*
-trx_undo_page_report_modify_ext(
-/*============================*/
- byte* ptr, /*!< in: undo log position,
- at least 15 bytes must be available */
- byte* ext_buf, /*!< in: a buffer of
- REC_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE,
- or NULL when should not fetch
- a longer prefix */
- ulint zip_size, /*!< compressed page size in bytes,
- or 0 for uncompressed BLOB */
- const byte** field, /*!< in/out: the locally stored part of
- the externally stored column */
- ulint* len) /*!< in/out: length of field, in bytes */
-{
- if (ext_buf) {
- /* If an ordering column is externally stored, we will
- have to store a longer prefix of the field. In this
- case, write to the log a marker followed by the
- original length and the real length of the field. */
- ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
-
- ptr += mach_write_compressed(ptr, *len);
-
- *field = trx_undo_page_fetch_ext(ext_buf, zip_size,
- *field, len);
-
- ptr += mach_write_compressed(ptr, *len);
- } else {
- ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
- + *len);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reports in the undo log of an update or delete marking of a clustered index
-record.
-@return byte offset of the inserted undo log entry on the page if
-succeed, 0 if fail */
-static
-ulint
-trx_undo_page_report_modify(
-/*========================*/
- page_t* undo_page, /*!< in: undo log page */
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: clustered index where update or
- delete marking is done */
- const rec_t* rec, /*!< in: clustered index record which
- has NOT yet been modified */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- const upd_t* update, /*!< in: update vector which tells the
- columns to be updated; in the case of
- a delete, this should be set to NULL */
- ulint cmpl_info, /*!< in: compiler info on secondary
- index updates */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_table_t* table;
- ulint first_free;
- byte* ptr;
- const byte* field;
- ulint flen;
- ulint col_no;
- ulint type_cmpl;
- byte* type_cmpl_ptr;
- ulint i;
- trx_id_t trx_id;
- ibool ignore_prefix = FALSE;
- byte ext_buf[REC_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE];
-
- ut_a(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
- table = index->table;
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- ptr = undo_page + first_free;
-
- ut_ad(first_free <= UNIV_PAGE_SIZE);
-
- if (trx_undo_left(undo_page, ptr) < 50) {
-
- /* NOTE: the value 50 must be big enough so that the general
- fields written below fit on the undo log page */
-
- return(0);
- }
-
- /* Reserve 2 bytes for the pointer to the next undo log record */
- ptr += 2;
-
- /* Store first some general parameters to the undo log */
-
- if (!update) {
- type_cmpl = TRX_UNDO_DEL_MARK_REC;
- } else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
- type_cmpl = TRX_UNDO_UPD_DEL_REC;
- /* We are about to update a delete marked record.
- We don't typically need the prefix in this case unless
- the delete marking is done by the same transaction
- (which we check below). */
- ignore_prefix = TRUE;
- } else {
- type_cmpl = TRX_UNDO_UPD_EXIST_REC;
- }
-
- type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
- type_cmpl_ptr = ptr;
-
- *ptr++ = (byte) type_cmpl;
- ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
-
- ptr += mach_dulint_write_much_compressed(ptr, table->id);
-
- /*----------------------------------------*/
- /* Store the state of the info bits */
-
- *ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
-
- /* Store the values of the system columns */
- field = rec_get_nth_field(rec, offsets,
- dict_index_get_sys_col_pos(
- index, DATA_TRX_ID), &flen);
- ut_ad(flen == DATA_TRX_ID_LEN);
-
- trx_id = trx_read_trx_id(field);
-
- /* If it is an update of a delete marked record, then we are
- allowed to ignore blob prefixes if the delete marking was done
- by some other trx as it must have committed by now for us to
- allow an over-write. */
- if (ignore_prefix) {
- ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
- }
- ptr += mach_dulint_write_compressed(ptr, trx_id);
-
- field = rec_get_nth_field(rec, offsets,
- dict_index_get_sys_col_pos(
- index, DATA_ROLL_PTR), &flen);
- ut_ad(flen == DATA_ROLL_PTR_LEN);
-
- ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));
-
- /*----------------------------------------*/
- /* Store then the fields required to uniquely determine the
- record which will be modified in the clustered index */
-
- for (i = 0; i < dict_index_get_n_unique(index); i++) {
-
- field = rec_get_nth_field(rec, offsets, i, &flen);
-
- /* The ordering columns must not be stored externally. */
- ut_ad(!rec_offs_nth_extern(offsets, i));
- ut_ad(dict_index_get_nth_col(index, i)->ord_part);
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- ptr += mach_write_compressed(ptr, flen);
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
-
- /*----------------------------------------*/
- /* Save to the undo log the old values of the columns to be updated. */
-
- if (update) {
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- ulint pos = upd_get_nth_field(update, i)->field_no;
-
- /* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- ptr += mach_write_compressed(ptr, pos);
-
- /* Save the old value of field */
- field = rec_get_nth_field(rec, offsets, pos, &flen);
-
- if (trx_undo_left(undo_page, ptr) < 15) {
-
- return(0);
- }
-
- if (rec_offs_nth_extern(offsets, pos)) {
- ptr = trx_undo_page_report_modify_ext(
- ptr,
- dict_index_get_nth_col(index, pos)
- ->ord_part
- && !ignore_prefix
- && flen < REC_MAX_INDEX_COL_LEN
- ? ext_buf : NULL,
- dict_table_zip_size(table),
- &field, &flen);
-
- /* Notify purge that it eventually has to
- free the old externally stored field */
-
- trx->update_undo->del_marks = TRUE;
-
- *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
- } else {
- ptr += mach_write_compressed(ptr, flen);
- }
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
- }
-
- /*----------------------------------------*/
- /* In the case of a delete marking, and also in the case of an update
- where any ordering field of any index changes, store the values of all
- columns which occur as ordering fields in any index. This info is used
- in the purge of old versions where we use it to build and search the
- delete marked index records, to look if we can remove them from the
- index tree. Note that starting from 4.0.14 also externally stored
- fields can be ordering in some index. Starting from 5.2, we no longer
- store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
- but we can construct the column prefix fields in the index by
- fetching the first page of the BLOB that is pointed to by the
- clustered index. This works also in crash recovery, because all pages
- (including BLOBs) are recovered before anything is rolled back. */
-
- if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- byte* old_ptr = ptr;
-
- trx->update_undo->del_marks = TRUE;
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- /* Reserve 2 bytes to write the number of bytes the stored
- fields take in this undo record */
-
- ptr += 2;
-
- for (col_no = 0; col_no < dict_table_get_n_cols(table);
- col_no++) {
-
- const dict_col_t* col
- = dict_table_get_nth_col(table, col_no);
-
- if (col->ord_part) {
- ulint pos;
-
- /* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5 + 15) {
-
- return(0);
- }
-
- pos = dict_index_get_nth_col_pos(index,
- col_no);
- ptr += mach_write_compressed(ptr, pos);
-
- /* Save the old value of field */
- field = rec_get_nth_field(rec, offsets, pos,
- &flen);
-
- if (rec_offs_nth_extern(offsets, pos)) {
- ptr = trx_undo_page_report_modify_ext(
- ptr,
- flen < REC_MAX_INDEX_COL_LEN
- && !ignore_prefix
- ? ext_buf : NULL,
- dict_table_zip_size(table),
- &field, &flen);
- } else {
- ptr += mach_write_compressed(
- ptr, flen);
- }
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr)
- < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
- }
-
- mach_write_to_2(old_ptr, ptr - old_ptr);
- }
-
- /*----------------------------------------*/
- /* Write pointers to the previous and the next undo log records */
- if (trx_undo_left(undo_page, ptr) < 2) {
-
- return(0);
- }
-
- mach_write_to_2(ptr, first_free);
- ptr += 2;
- mach_write_to_2(undo_page + first_free, ptr - undo_page);
-
- mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- ptr - undo_page);
-
- /* Write to the REDO log about this change in the UNDO log */
-
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
- ptr - undo_page, mtr);
- return(first_free);
-}
-
-/**********************************************************************//**
-Reads from an undo log update record the system field values of the old
-version.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
-byte*
-trx_undo_update_rec_get_sys_cols(
-/*=============================*/
- byte* ptr, /*!< in: remaining part of undo
- log record after reading
- general parameters */
- trx_id_t* trx_id, /*!< out: trx id */
- roll_ptr_t* roll_ptr, /*!< out: roll ptr */
- ulint* info_bits) /*!< out: info bits state */
-{
- /* Read the state of the info bits */
- *info_bits = mach_read_from_1(ptr);
- ptr += 1;
-
- /* Read the values of the system columns */
-
- *trx_id = mach_dulint_read_compressed(ptr);
- ptr += mach_dulint_get_compressed_size(*trx_id);
-
- *roll_ptr = mach_dulint_read_compressed(ptr);
- ptr += mach_dulint_get_compressed_size(*roll_ptr);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an update undo log record the number of updated fields.
-@return remaining part of undo log record after reading this value */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_n_upd_fields(
-/*=================================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- ulint* n) /*!< out: number of fields */
-{
- *n = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*n);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an update undo log record a stored field number.
-@return remaining part of undo log record after reading this value */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_field_no(
-/*=============================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- ulint* field_no)/*!< out: field number */
-{
- *field_no = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*field_no);
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Builds an update vector based on a remaining part of an undo log record.
-@return remaining part of the record, NULL if an error detected, which
-means that the record is corrupted */
-UNIV_INTERN
-byte*
-trx_undo_update_rec_get_update(
-/*===========================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, after reading the row reference
- NOTE that this copy of the undo log record must
- be preserved as long as the update vector is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
- TRX_UNDO_UPD_DEL_REC, or
- TRX_UNDO_DEL_MARK_REC; in the last case,
- only trx id and roll ptr fields are added to
- the update vector */
- trx_id_t trx_id, /*!< in: transaction id from this undo record */
- roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
- ulint info_bits,/*!< in: info bits from this undo record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap, /*!< in: memory heap from which the memory
- needed is allocated */
- upd_t** upd) /*!< out, own: update vector */
-{
- upd_field_t* upd_field;
- upd_t* update;
- ulint n_fields;
- byte* buf;
- ulint i;
-
- ut_a(dict_index_is_clust(index));
-
- if (type != TRX_UNDO_DEL_MARK_REC) {
- ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
- } else {
- n_fields = 0;
- }
-
- update = upd_create(n_fields + 2, heap);
-
- update->info_bits = info_bits;
-
- /* Store first trx id and roll ptr to update vector */
-
- upd_field = upd_get_nth_field(update, n_fields);
- buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
- trx_write_trx_id(buf, trx_id);
-
- upd_field_set_field_no(upd_field,
- dict_index_get_sys_col_pos(index, DATA_TRX_ID),
- index, trx);
- dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
-
- upd_field = upd_get_nth_field(update, n_fields + 1);
- buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
- trx_write_roll_ptr(buf, roll_ptr);
-
- upd_field_set_field_no(
- upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
- index, trx);
- dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
-
- /* Store then the updated ordinary columns to the update vector */
-
- for (i = 0; i < n_fields; i++) {
-
- byte* field;
- ulint len;
- ulint field_no;
- ulint orig_len;
-
- ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
-
- if (field_no >= dict_index_get_n_fields(index)) {
- fprintf(stderr,
- "InnoDB: Error: trying to access"
- " update undo rec field %lu in ",
- (ulong) field_no);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, "\n"
- "InnoDB: but index has only %lu fields\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Run also CHECK TABLE ",
- (ulong) dict_index_get_n_fields(index));
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fprintf(stderr, "\n"
- "InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
- (ulong) n_fields, (ulong) i, ptr);
- return(NULL);
- }
-
- upd_field = upd_get_nth_field(update, i);
-
- upd_field_set_field_no(upd_field, field_no, index, trx);
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
-
- upd_field->orig_len = orig_len;
-
- if (len == UNIV_SQL_NULL) {
- dfield_set_null(&upd_field->new_val);
- } else if (len < UNIV_EXTERN_STORAGE_FIELD) {
- dfield_set_data(&upd_field->new_val, field, len);
- } else {
- len -= UNIV_EXTERN_STORAGE_FIELD;
-
- dfield_set_data(&upd_field->new_val, field, len);
- dfield_set_ext(&upd_field->new_val);
- }
- }
-
- *upd = update;
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_partial_row(
-/*=========================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record of a suitable type, at the start of
- the stored index columns;
- NOTE that this copy of the undo log record must
- be preserved as long as the partial row is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t** row, /*!< out, own: partial row */
- ibool ignore_prefix, /*!< in: flag to indicate if we
- expect blob prefixes in undo. Used
- only in the assertion. */
- mem_heap_t* heap) /*!< in: memory heap from which the memory
- needed is allocated */
-{
- const byte* end_ptr;
- ulint row_len;
-
- ut_ad(index);
- ut_ad(ptr);
- ut_ad(row);
- ut_ad(heap);
- ut_ad(dict_index_is_clust(index));
-
- row_len = dict_table_get_n_cols(index->table);
-
- *row = dtuple_create(heap, row_len);
-
- dict_table_copy_types(*row, index->table);
-
- end_ptr = ptr + mach_read_from_2(ptr);
- ptr += 2;
-
- while (ptr != end_ptr) {
- dfield_t* dfield;
- byte* field;
- ulint field_no;
- const dict_col_t* col;
- ulint col_no;
- ulint len;
- ulint orig_len;
-
- ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
-
- col = dict_index_get_nth_col(index, field_no);
- col_no = dict_col_get_no(col);
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
-
- dfield = dtuple_get_nth_field(*row, col_no);
-
- dfield_set_data(dfield, field, len);
-
- if (len != UNIV_SQL_NULL
- && len >= UNIV_EXTERN_STORAGE_FIELD) {
- dfield_set_len(dfield,
- len - UNIV_EXTERN_STORAGE_FIELD);
- dfield_set_ext(dfield);
- /* If the prefix of this column is indexed,
- ensure that enough prefix is stored in the
- undo log record. */
- ut_a(ignore_prefix
- || !col->ord_part
- || dfield_get_len(dfield)
- >= REC_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE);
- }
- }
-
- return(ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Erases the unused undo log page end. */
-static
-void
-trx_undo_erase_page_end(
-/*====================*/
- page_t* undo_page, /*!< in: undo page whose end to erase */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint first_free;
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- memset(undo_page + first_free, 0xff,
- (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
-
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
-}
-
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (page == NULL) {
-
- return(ptr);
- }
-
- trx_undo_erase_page_end(page, mtr);
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Writes information to an undo log about an insert, update, or a delete marking
-of a clustered index record. This information is used in a rollback of the
-transaction and in consistent reads that must look to the history of this
-transaction.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-trx_undo_report_row_operation(
-/*==========================*/
- ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
- set, does nothing */
- ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or
- TRX_UNDO_MODIFY_OP */
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* clust_entry, /*!< in: in the case of an insert,
- index entry to insert into the
- clustered index, otherwise NULL */
- const upd_t* update, /*!< in: in the case of an update,
- the update vector, otherwise NULL */
- ulint cmpl_info, /*!< in: compiler info on secondary
- index updates */
- const rec_t* rec, /*!< in: in case of an update or delete
- marking, the record in the clustered
- index, otherwise NULL */
- roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
- inserted undo log record,
- ut_dulint_zero if BTR_NO_UNDO_LOG
- flag was specified */
-{
- trx_t* trx;
- trx_undo_t* undo;
- ulint page_no;
- trx_rseg_t* rseg;
- mtr_t mtr;
- ulint err = DB_SUCCESS;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_a(dict_index_is_clust(index));
-
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
-
- *roll_ptr = ut_dulint_zero;
-
- return(DB_SUCCESS);
- }
-
- ut_ad(thr);
- ut_ad((op_type != TRX_UNDO_INSERT_OP)
- || (clust_entry && !update && !rec));
-
- trx = thr_get_trx(thr);
- rseg = trx->rseg;
-
- mutex_enter(&(trx->undo_mutex));
-
- /* If the undo log is not assigned yet, assign one */
-
- if (op_type == TRX_UNDO_INSERT_OP) {
-
- if (trx->insert_undo == NULL) {
-
- err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
- }
-
- undo = trx->insert_undo;
-
- if (UNIV_UNLIKELY(!undo)) {
- /* Did not succeed */
- mutex_exit(&(trx->undo_mutex));
-
- return(err);
- }
- } else {
- ut_ad(op_type == TRX_UNDO_MODIFY_OP);
-
- if (trx->update_undo == NULL) {
-
- err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
-
- }
-
- undo = trx->update_undo;
-
- if (UNIV_UNLIKELY(!undo)) {
- /* Did not succeed */
- mutex_exit(&(trx->undo_mutex));
- return(err);
- }
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- }
-
- page_no = undo->last_page_no;
-
- mtr_start(&mtr);
-
- for (;;) {
- buf_block_t* undo_block;
- page_t* undo_page;
- ulint offset;
-
- undo_block = buf_page_get_gen(undo->space, undo->zip_size,
- page_no, RW_X_LATCH,
- undo->guess_block, BUF_GET,
- __FILE__, __LINE__, &mtr);
- buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
-
- undo_page = buf_block_get_frame(undo_block);
-
- if (op_type == TRX_UNDO_INSERT_OP) {
- offset = trx_undo_page_report_insert(
- undo_page, trx, index, clust_entry, &mtr);
- } else {
- offset = trx_undo_page_report_modify(
- undo_page, trx, index, rec, offsets, update,
- cmpl_info, &mtr);
- }
-
- if (UNIV_UNLIKELY(offset == 0)) {
- /* The record did not fit on the page. We erase the
- end segment of the undo log page and write a log
- record of it: this is to ensure that in the debug
- version the replicate page constructed using the log
- records stays identical to the original page */
-
- trx_undo_erase_page_end(undo_page, &mtr);
- mtr_commit(&mtr);
- } else {
- /* Success */
-
- mtr_commit(&mtr);
-
- undo->empty = FALSE;
- undo->top_page_no = page_no;
- undo->top_offset = offset;
- undo->top_undo_no = trx->undo_no;
- undo->guess_block = undo_block;
-
- UT_DULINT_INC(trx->undo_no);
-
- mutex_exit(&trx->undo_mutex);
-
- *roll_ptr = trx_undo_build_roll_ptr(
- op_type == TRX_UNDO_INSERT_OP,
- rseg->id, page_no, offset);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(DB_SUCCESS);
- }
-
- ut_ad(page_no == undo->last_page_no);
-
- /* We have to extend the undo log by one page */
-
- mtr_start(&mtr);
-
- /* When we add a page to an undo log, this is analogous to
- a pessimistic insert in a B-tree, and we must reserve the
- counterpart of the tree latch, which is the rseg mutex. */
-
- mutex_enter(&(rseg->mutex));
-
- page_no = trx_undo_add_page(trx, undo, &mtr);
-
- mutex_exit(&(rseg->mutex));
-
- if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
- /* Did not succeed: out of space */
-
- mutex_exit(&(trx->undo_mutex));
- mtr_commit(&mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(DB_OUT_OF_FILE_SPACE);
- }
- }
-}
-
-/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
-
-/******************************************************************//**
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists.
-@return own: copy of the record */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-/*======================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- trx_undo_rec_t* undo_rec;
- ulint rseg_id;
- ulint page_no;
- ulint offset;
- const page_t* undo_page;
- trx_rseg_t* rseg;
- ibool is_insert;
- mtr_t mtr;
-
- trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
- &offset);
- rseg = trx_rseg_get_on_id(rseg_id);
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- page_no, &mtr);
-
- undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
-
- mtr_commit(&mtr);
-
- return(undo_rec);
-}
-
-/******************************************************************//**
-Copies an undo record to heap.
-
-NOTE: the caller must have latches on the clustered index page and
-purge_view.
-
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
-truncated and we cannot fetch the old version */
-UNIV_INTERN
-ulint
-trx_undo_get_undo_rec(
-/*==================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- trx_id_t trx_id, /*!< in: id of the trx that generated
- the roll pointer: it points to an
- undo log of this transaction */
- trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!trx_purge_update_undo_must_exist(trx_id)) {
-
- /* It may be that the necessary undo log has already been
- deleted */
-
- return(DB_MISSING_HISTORY);
- }
-
- *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Build a previous version of a clustered index record. This function checks
-that the caller has a latch on the index page of the clustered index record
-and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked all the way down to the purge_view.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
-earlier than purge_view, which means that it may have been removed,
-DB_ERROR if corrupted record */
-UNIV_INTERN
-ulint
-trx_undo_prev_version_build(
-/*========================*/
- const rec_t* index_rec,/*!< in: clustered index record in the
- index tree */
- mtr_t* index_mtr __attribute__((unused)),
- /*!< in: mtr which contains the latch to
- index_rec page and purge_view */
- const rec_t* rec, /*!< in: version of a clustered index record */
- dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- mem_heap_t* heap, /*!< in: memory heap from which the memory
- needed is allocated */
- rec_t** old_vers)/*!< out, own: previous version, or NULL if
- rec is the first inserted version, or if
- history data has been deleted (an error),
- or if the purge COULD have removed the version
- though it has not yet done so */
-{
- trx_undo_rec_t* undo_rec = NULL;
- dtuple_t* entry;
- trx_id_t rec_trx_id;
- ulint type;
- undo_no_t undo_no;
- dulint table_id;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- roll_ptr_t old_roll_ptr;
- upd_t* update;
- byte* ptr;
- ulint info_bits;
- ulint cmpl_info;
- ibool dummy_extern;
- byte* buf;
- ulint err;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(index_mtr, index_rec,
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!dict_index_is_clust(index)) {
- fprintf(stderr, "InnoDB: Error: trying to access"
- " update undo rec for non-clustered index %s\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com\n"
- "InnoDB: index record ", index->name);
- rec_print(stderr, index_rec, index);
- fputs("\n"
- "InnoDB: record version ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- return(DB_ERROR);
- }
-
- roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
- old_roll_ptr = roll_ptr;
-
- *old_vers = NULL;
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- /* The record rec is the first inserted version */
-
- return(DB_SUCCESS);
- }
-
- rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- /* The undo record may already have been purged.
- This should never happen in InnoDB. */
-
- return(err);
- }
-
- ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
- &dummy_extern, &undo_no, &table_id);
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
-
- /* (a) If a clustered index record version is such that the
- trx id stamp in it is bigger than purge_sys->view, then the
- BLOBs in that version are known to exist (the purge has not
- progressed that far);
-
- (b) if the version is the first version such that trx id in it
- is less than purge_sys->view, and it is not delete-marked,
- then the BLOBs in that version are known to exist (the purge
- cannot have purged the BLOBs referenced by that version
- yet).
-
- This function does not fetch any BLOBs. The callers might, by
- possibly invoking row_ext_create() via row_build(). However,
- they should have all needed information in the *old_vers
- returned by this function. This is because *old_vers is based
- on the transaction undo log records. The function
- trx_undo_page_fetch_ext() will write BLOB prefixes to the
- transaction undo log that are at least as long as the longest
- possible column prefix in a secondary index. Thus, secondary
- index entries for *old_vers can be constructed without
- dereferencing any BLOB pointers. */
-
- ptr = trx_undo_rec_skip_row_ref(ptr, index);
-
- ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
- roll_ptr, info_bits,
- NULL, heap, &update);
-
- if (ut_dulint_cmp(table_id, index->table->id) != 0) {
- ptr = NULL;
-
- fprintf(stderr,
- "InnoDB: Error: trying to access update undo rec"
- " for table %s\n"
- "InnoDB: but the table id in the"
- " undo record is wrong\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Run also CHECK TABLE %s\n",
- index->table_name, index->table_name);
- }
-
- if (ptr == NULL) {
- /* The record was corrupted, return an error; these printfs
- should catch an elusive bug in row_vers_old_has_index_entry */
-
- fprintf(stderr,
- "InnoDB: table %s, index %s, n_uniq %lu\n"
- "InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
- "InnoDB: undo rec table id %lu %lu,"
- " index table id %lu %lu\n"
- "InnoDB: dump of 150 bytes in undo rec: ",
- index->table_name, index->name,
- (ulong) dict_index_get_n_unique(index),
- undo_rec, (ulong) type, (ulong) cmpl_info,
- (ulong) ut_dulint_get_high(table_id),
- (ulong) ut_dulint_get_low(table_id),
- (ulong) ut_dulint_get_high(index->table->id),
- (ulong) ut_dulint_get_low(index->table->id));
- ut_print_buf(stderr, undo_rec, 150);
- fputs("\n"
- "InnoDB: index record ", stderr);
- rec_print(stderr, index_rec, index);
- fputs("\n"
- "InnoDB: record version ", stderr);
- rec_print_new(stderr, rec, offsets);
- fprintf(stderr, "\n"
- "InnoDB: Record trx id " TRX_ID_FMT
- ", update rec trx id " TRX_ID_FMT "\n"
- "InnoDB: Roll ptr in rec %lu %lu, in update rec"
- " %lu %lu\n",
- TRX_ID_PREP_PRINTF(rec_trx_id),
- TRX_ID_PREP_PRINTF(trx_id),
- (ulong) ut_dulint_get_high(old_roll_ptr),
- (ulong) ut_dulint_get_low(old_roll_ptr),
- (ulong) ut_dulint_get_high(roll_ptr),
- (ulong) ut_dulint_get_low(roll_ptr));
-
- trx_purge_sys_print();
- return(DB_ERROR);
- }
-
- if (row_upd_changes_field_size_or_external(index, offsets, update)) {
- ulint n_ext;
-
- /* We have to set the appropriate extern storage bits in the
- old version of the record: the extern bits in rec for those
- fields that update does NOT update, as well as the bits for
- those fields that update updates to become externally stored
- fields. Store the info: */
-
- entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
- offsets, &n_ext, heap);
- n_ext += btr_push_update_extern_fields(entry, update, heap);
- /* The page containing the clustered index record
- corresponding to entry is latched in mtr. Thus the
- following call is safe. */
- row_upd_index_replace_new_col_vals(entry, index, update, heap);
-
- buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
- n_ext));
-
- *old_vers = rec_convert_dtuple_to_rec(buf, index,
- entry, n_ext);
- } else {
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
- *old_vers = rec_copy(buf, rec, offsets);
- rec_offs_make_valid(*old_vers, index, offsets);
- row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
- }
-
- return(DB_SUCCESS);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/trx/trx0roll.c b/storage/innodb_plugin/trx/trx0roll.c
deleted file mode 100644
index c925478cdf4..00000000000
--- a/storage/innodb_plugin/trx/trx0roll.c
+++ /dev/null
@@ -1,1366 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0roll.c
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0roll.h"
-
-#ifdef UNIV_NONINL
-#include "trx0roll.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0undo.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "usr0sess.h"
-#include "srv0que.h"
-#include "srv0start.h"
-#include "row0undo.h"
-#include "row0mysql.h"
-#include "lock0lock.h"
-#include "pars0pars.h"
-
-/** This many pages must be undone before a truncate is tried within
-rollback */
-#define TRX_ROLL_TRUNC_THRESHOLD 1
-
-/** In crash recovery, the current trx to be rolled back */
-static trx_t* trx_roll_crash_recv_trx = NULL;
-
-/** In crash recovery we set this to the undo n:o of the current trx to be
-rolled back. Then we can print how many % the rollback has progressed. */
-static ib_int64_t trx_roll_max_undo_no;
-
-/** Auxiliary variable which tells the previous progress % we printed */
-static ulint trx_roll_progress_printed_pct;
-
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-trx_general_rollback_for_mysql(
-/*===========================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
- partial rollback requested, or NULL for
- complete rollback */
-{
- mem_heap_t* heap;
- que_thr_t* thr;
- roll_node_t* roll_node;
-
- /* Tell Innobase server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- trx_start_if_not_started(trx);
-
- heap = mem_heap_create(512);
-
- roll_node = roll_node_create(heap);
-
- if (savept) {
- roll_node->partial = TRUE;
- roll_node->savept = *savept;
- }
-
- trx->error_state = DB_SUCCESS;
-
- thr = pars_complete_graph_for_exec(roll_node, trx, heap);
-
- ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
- que_run_threads(thr);
-
- mutex_enter(&kernel_mutex);
-
- while (trx->que_state != TRX_QUE_RUNNING) {
-
- mutex_exit(&kernel_mutex);
-
- os_thread_sleep(100000);
-
- mutex_enter(&kernel_mutex);
- }
-
- mutex_exit(&kernel_mutex);
-
- mem_heap_free(heap);
-
- ut_a(trx->error_state == DB_SUCCESS);
-
- /* Tell Innobase server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- return((int) trx->error_state);
-}
-
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-trx_rollback_for_mysql(
-/*===================*/
- trx_t* trx) /*!< in: transaction handle */
-{
- int err;
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "rollback";
-
- /* If we are doing the XA recovery of prepared transactions, then
- the transaction object does not have an InnoDB session object, and we
- set a dummy session that we use for all MySQL transactions. */
-
- err = trx_general_rollback_for_mysql(trx, NULL);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*******************************************************************//**
-Rollback the latest SQL statement for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-trx_rollback_last_sql_stat_for_mysql(
-/*=================================*/
- trx_t* trx) /*!< in: transaction handle */
-{
- int err;
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "rollback of SQL statement";
-
- err = trx_general_rollback_for_mysql(trx, &trx->last_sql_stat_start);
- /* The following call should not be needed, but we play safe: */
- trx_mark_sql_stat_end(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*******************************************************************//**
-Frees a single savepoint struct. */
-UNIV_INTERN
-void
-trx_roll_savepoint_free(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep) /*!< in: savepoint to free */
-{
- ut_a(savep != NULL);
- ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0);
-
- UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
- mem_free(savep->name);
- mem_free(savep);
-}
-
-/*******************************************************************//**
-Frees savepoint structs starting from savep, if savep == NULL then
-free all savepoints. */
-UNIV_INTERN
-void
-trx_roll_savepoints_free(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep) /*!< in: free all savepoints > this one;
- if this is NULL, free all savepoints
- of trx */
-{
- trx_named_savept_t* next_savep;
-
- if (savep == NULL) {
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
- } else {
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
- while (savep != NULL) {
- next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
-
- trx_roll_savepoint_free(trx, savep);
-
- savep = next_savep;
- }
-}
-
-/*******************************************************************//**
-Rolls back a transaction back to a named savepoint. Modifications after the
-savepoint are undone but InnoDB does NOT release the corresponding locks
-which are stored in memory. If a lock is 'implicit', that is, a new inserted
-row holds a lock where the lock information is carried by the trx id stored in
-the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-UNIV_INTERN
-ulint
-trx_rollback_to_savepoint_for_mysql(
-/*================================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
- position corresponding to this
- savepoint; MySQL needs this
- information to remove the
- binlog entries of the queries
- executed after the savepoint */
-{
- trx_named_savept_t* savep;
- ulint err;
-
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
-
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- /* Found */
- break;
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
- if (savep == NULL) {
-
- return(DB_NO_SAVEPOINT);
- }
-
- if (trx->conc_state == TRX_NOT_STARTED) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
- ut_print_name(stderr, trx, FALSE, savep->name);
- fputs(" though it is not started\n", stderr);
- return(DB_ERROR);
- }
-
- /* We can now free all savepoints strictly later than this one */
-
- trx_roll_savepoints_free(trx, savep);
-
- *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
-
- trx->op_info = "rollback to a savepoint";
-
- err = trx_general_rollback_for_mysql(trx, &savep->savept);
-
- /* Store the current undo_no of the transaction so that we know where
- to roll back if we have to roll back the next SQL statement: */
-
- trx_mark_sql_stat_end(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*******************************************************************//**
-Creates a named savepoint. If the transaction is not yet started, starts it.
-If there is already a savepoint of the same name, this call erases that old
-savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback.
-@return always DB_SUCCESS */
-UNIV_INTERN
-ulint
-trx_savepoint_for_mysql(
-/*====================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
- position corresponding to this
- connection at the time of the
- savepoint */
-{
- trx_named_savept_t* savep;
-
- ut_a(trx);
- ut_a(savepoint_name);
-
- trx_start_if_not_started(trx);
-
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
-
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- /* Found */
- break;
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
- if (savep) {
- /* There is a savepoint with the same name: free that */
-
- UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
-
- mem_free(savep->name);
- mem_free(savep);
- }
-
- /* Create a new savepoint and add it as the last in the list */
-
- savep = mem_alloc(sizeof(trx_named_savept_t));
-
- savep->name = mem_strdup(savepoint_name);
-
- savep->savept = trx_savept_take(trx);
-
- savep->mysql_binlog_cache_pos = binlog_cache_pos;
-
- UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Releases only the named savepoint. Savepoints which were set after this
-savepoint are left as is.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-UNIV_INTERN
-ulint
-trx_release_savepoint_for_mysql(
-/*============================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name) /*!< in: savepoint name */
-{
- trx_named_savept_t* savep;
-
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
-
- /* Search for the savepoint by name and free if found. */
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- trx_roll_savepoint_free(trx, savep);
- return(DB_SUCCESS);
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
- return(DB_NO_SAVEPOINT);
-}
-
-/*******************************************************************//**
-Determines if this transaction is rolling back an incomplete transaction
-in crash recovery.
-@return TRUE if trx is an incomplete transaction that is being rolled
-back in crash recovery */
-UNIV_INTERN
-ibool
-trx_is_recv(
-/*========*/
- const trx_t* trx) /*!< in: transaction */
-{
- return(trx == trx_roll_crash_recv_trx);
-}
-
-/*******************************************************************//**
-Returns a transaction savepoint taken at this point in time.
-@return savepoint */
-UNIV_INTERN
-trx_savept_t
-trx_savept_take(
-/*============*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_savept_t savept;
-
- savept.least_undo_no = trx->undo_no;
-
- return(savept);
-}
-
-/*******************************************************************//**
-Roll back an active transaction. */
-static
-void
-trx_rollback_active(
-/*================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- mem_heap_t* heap;
- que_fork_t* fork;
- que_thr_t* thr;
- roll_node_t* roll_node;
- dict_table_t* table;
- ib_int64_t rows_to_undo;
- const char* unit = "";
- ibool dictionary_locked = FALSE;
-
- heap = mem_heap_create(512);
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
-
- roll_node = roll_node_create(heap);
-
- thr->child = roll_node;
- roll_node->common.parent = thr;
-
- mutex_enter(&kernel_mutex);
-
- trx->graph = fork;
-
- ut_a(thr == que_fork_start_command(fork));
-
- trx_roll_crash_recv_trx = trx;
- trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no);
- trx_roll_progress_printed_pct = 0;
- rows_to_undo = trx_roll_max_undo_no;
-
- if (rows_to_undo > 1000000000) {
- rows_to_undo = rows_to_undo / 1000000;
- unit = "M";
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
- " rows to undo\n",
- TRX_ID_PREP_PRINTF(trx->id),
- (ulong) rows_to_undo, unit);
- mutex_exit(&kernel_mutex);
-
- trx->mysql_thread_id = os_thread_get_curr_id();
-
- trx->mysql_process_no = os_proc_get_number();
-
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- row_mysql_lock_data_dictionary(trx);
- dictionary_locked = TRUE;
- }
-
- que_run_threads(thr);
-
- mutex_enter(&kernel_mutex);
-
- while (trx->que_state != TRX_QUE_RUNNING) {
-
- mutex_exit(&kernel_mutex);
-
- fprintf(stderr,
- "InnoDB: Waiting for rollback of trx id %lu to end\n",
- (ulong) ut_dulint_get_low(trx->id));
- os_thread_sleep(100000);
-
- mutex_enter(&kernel_mutex);
- }
-
- mutex_exit(&kernel_mutex);
-
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
- && !ut_dulint_is_zero(trx->table_id)) {
-
- /* If the transaction was for a dictionary operation, we
- drop the relevant table, if it still exists */
-
- fprintf(stderr,
- "InnoDB: Dropping table with id %lu %lu"
- " in recovery if it exists\n",
- (ulong) ut_dulint_get_high(trx->table_id),
- (ulong) ut_dulint_get_low(trx->table_id));
-
- table = dict_table_get_on_id_low(trx->table_id);
-
- if (table) {
- ulint err;
-
- fputs("InnoDB: Table found: dropping table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" in recovery\n", stderr);
-
- err = row_drop_table_for_mysql(table->name, trx, TRUE);
- trx_commit_for_mysql(trx);
-
- ut_a(err == (int) DB_SUCCESS);
- }
- }
-
- if (dictionary_locked) {
- row_mysql_unlock_data_dictionary(trx);
- }
-
- fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
- " completed\n",
- TRX_ID_PREP_PRINTF(trx->id));
- mem_heap_free(heap);
-
- trx_roll_crash_recv_trx = NULL;
-}
-
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
-void
-trx_rollback_or_clean_recovered(
-/*============================*/
- ibool all) /*!< in: FALSE=roll back dictionary transactions;
- TRUE=roll back all non-PREPARED transactions */
-{
- trx_t* trx;
-
- mutex_enter(&kernel_mutex);
-
- if (!UT_LIST_GET_FIRST(trx_sys->trx_list)) {
- goto leave_function;
- }
-
- if (all) {
- fprintf(stderr,
- "InnoDB: Starting in background the rollback"
- " of uncommitted transactions\n");
- }
-
- mutex_exit(&kernel_mutex);
-
-loop:
- mutex_enter(&kernel_mutex);
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list); trx;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- if (!trx->is_recovered) {
- continue;
- }
-
- switch (trx->conc_state) {
- case TRX_NOT_STARTED:
- case TRX_PREPARED:
- continue;
-
- case TRX_COMMITTED_IN_MEMORY:
- mutex_exit(&kernel_mutex);
- fprintf(stderr,
- "InnoDB: Cleaning up trx with id "
- TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(trx->id));
- trx_cleanup_at_db_startup(trx);
- goto loop;
-
- case TRX_ACTIVE:
- if (all || trx_get_dict_operation(trx)
- != TRX_DICT_OP_NONE) {
- mutex_exit(&kernel_mutex);
- trx_rollback_active(trx);
- goto loop;
- }
- }
- }
-
- if (all) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Rollback of non-prepared"
- " transactions completed\n");
- }
-
-leave_function:
- mutex_exit(&kernel_mutex);
-}
-
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-trx_rollback_or_clean_all_recovered(
-/*================================*/
- void* arg __attribute__((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- trx_rollback_or_clean_recovered(TRUE);
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*******************************************************************//**
-Creates an undo number array.
-@return own: undo number array */
-UNIV_INTERN
-trx_undo_arr_t*
-trx_undo_arr_create(void)
-/*=====================*/
-{
- trx_undo_arr_t* arr;
- mem_heap_t* heap;
- ulint i;
-
- heap = mem_heap_create(1024);
-
- arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
-
- arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
- * UNIV_MAX_PARALLELISM);
- arr->n_cells = UNIV_MAX_PARALLELISM;
- arr->n_used = 0;
-
- arr->heap = heap;
-
- for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
-
- (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
- }
-
- return(arr);
-}
-
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
-void
-trx_undo_arr_free(
-/*==============*/
- trx_undo_arr_t* arr) /*!< in: undo number array */
-{
- ut_ad(arr->n_used == 0);
-
- mem_heap_free(arr->heap);
-}
-
-/*******************************************************************//**
-Stores info of an undo log record to the array if it is not stored yet.
-@return FALSE if the record already existed in the array */
-static
-ibool
-trx_undo_arr_store_info(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t undo_no)/*!< in: undo number */
-{
- trx_undo_inf_t* cell;
- trx_undo_inf_t* stored_here;
- trx_undo_arr_t* arr;
- ulint n_used;
- ulint n;
- ulint i;
-
- n = 0;
- arr = trx->undo_no_arr;
- n_used = arr->n_used;
- stored_here = NULL;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (!cell->in_use) {
- if (!stored_here) {
- /* Not in use, we may store here */
- cell->undo_no = undo_no;
- cell->in_use = TRUE;
-
- arr->n_used++;
-
- stored_here = cell;
- }
- } else {
- n++;
-
- if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
-
- if (stored_here) {
- stored_here->in_use = FALSE;
- ut_ad(arr->n_used > 0);
- arr->n_used--;
- }
-
- ut_ad(arr->n_used == n_used);
-
- return(FALSE);
- }
- }
-
- if (n == n_used && stored_here) {
-
- ut_ad(arr->n_used == 1 + n_used);
-
- return(TRUE);
- }
- }
-}
-
-/*******************************************************************//**
-Removes an undo number from the array. */
-static
-void
-trx_undo_arr_remove_info(
-/*=====================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- undo_no_t undo_no)/*!< in: undo number */
-{
- trx_undo_inf_t* cell;
- ulint n_used;
- ulint n;
- ulint i;
-
- n_used = arr->n_used;
- n = 0;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (cell->in_use
- && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
-
- cell->in_use = FALSE;
-
- ut_ad(arr->n_used > 0);
-
- arr->n_used--;
-
- return;
- }
- }
-}
-
-/*******************************************************************//**
-Gets the biggest undo number in an array.
-@return biggest value, ut_dulint_zero if the array is empty */
-static
-undo_no_t
-trx_undo_arr_get_biggest(
-/*=====================*/
- trx_undo_arr_t* arr) /*!< in: undo number array */
-{
- trx_undo_inf_t* cell;
- ulint n_used;
- undo_no_t biggest;
- ulint n;
- ulint i;
-
- n = 0;
- n_used = arr->n_used;
- biggest = ut_dulint_zero;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (cell->in_use) {
- n++;
- if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
-
- biggest = cell->undo_no;
- }
- }
-
- if (n == n_used) {
- return(biggest);
- }
- }
-}
-
-/***********************************************************************//**
-Tries truncate the undo logs. */
-UNIV_INTERN
-void
-trx_roll_try_truncate(
-/*==================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- trx_undo_arr_t* arr;
- undo_no_t limit;
- undo_no_t biggest;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&((trx->rseg)->mutex)));
-
- trx->pages_undone = 0;
-
- arr = trx->undo_no_arr;
-
- limit = trx->undo_no;
-
- if (arr->n_used > 0) {
- biggest = trx_undo_arr_get_biggest(arr);
-
- if (ut_dulint_cmp(biggest, limit) >= 0) {
-
- limit = ut_dulint_add(biggest, 1);
- }
- }
-
- if (trx->insert_undo) {
- trx_undo_truncate_end(trx, trx->insert_undo, limit);
- }
-
- if (trx->update_undo) {
- trx_undo_truncate_end(trx, trx->update_undo, limit);
- }
-}
-
-/***********************************************************************//**
-Pops the topmost undo log record in a single undo log and updates the info
-about the topmost record in the undo log memory struct.
-@return undo log record, the page s-latched */
-static
-trx_undo_rec_t*
-trx_roll_pop_top_rec(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* undo_page;
- ulint offset;
- trx_undo_rec_t* prev_rec;
- page_t* prev_rec_page;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
-
- undo_page = trx_undo_page_get_s_latched(undo->space, undo->zip_size,
- undo->top_page_no, mtr);
- offset = undo->top_offset;
-
- /* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n",
- os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
- ut_dulint_get_low(undo->top_undo_no)); */
-
- prev_rec = trx_undo_get_prev_rec(undo_page + offset,
- undo->hdr_page_no, undo->hdr_offset,
- mtr);
- if (prev_rec == NULL) {
-
- undo->empty = TRUE;
- } else {
- prev_rec_page = page_align(prev_rec);
-
- if (prev_rec_page != undo_page) {
-
- trx->pages_undone++;
- }
-
- undo->top_page_no = page_get_page_no(prev_rec_page);
- undo->top_offset = prev_rec - prev_rec_page;
- undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
- }
-
- return(undo_page + offset);
-}
-
-/********************************************************************//**
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
-@return undo log record copied to heap, NULL if none left, or if the
-undo number of the top record would be less than the limit */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t limit, /*!< in: least undo number we need */
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- trx_undo_t* undo;
- trx_undo_t* ins_undo;
- trx_undo_t* upd_undo;
- trx_undo_rec_t* undo_rec;
- trx_undo_rec_t* undo_rec_copy;
- undo_no_t undo_no;
- ibool is_insert;
- trx_rseg_t* rseg;
- ulint progress_pct;
- mtr_t mtr;
-
- rseg = trx->rseg;
-try_again:
- mutex_enter(&(trx->undo_mutex));
-
- if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
- mutex_enter(&(rseg->mutex));
-
- trx_roll_try_truncate(trx);
-
- mutex_exit(&(rseg->mutex));
- }
-
- ins_undo = trx->insert_undo;
- upd_undo = trx->update_undo;
-
- if (!ins_undo || ins_undo->empty) {
- undo = upd_undo;
- } else if (!upd_undo || upd_undo->empty) {
- undo = ins_undo;
- } else if (ut_dulint_cmp(upd_undo->top_undo_no,
- ins_undo->top_undo_no) > 0) {
- undo = upd_undo;
- } else {
- undo = ins_undo;
- }
-
- if (!undo || undo->empty
- || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
-
- if ((trx->undo_no_arr)->n_used == 0) {
- /* Rollback is ending */
-
- mutex_enter(&(rseg->mutex));
-
- trx_roll_try_truncate(trx);
-
- mutex_exit(&(rseg->mutex));
- }
-
- mutex_exit(&(trx->undo_mutex));
-
- return(NULL);
- }
-
- if (undo == ins_undo) {
- is_insert = TRUE;
- } else {
- is_insert = FALSE;
- }
-
- *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
- undo->top_page_no,
- undo->top_offset);
- mtr_start(&mtr);
-
- undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
-
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
-
- ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
-
- /* We print rollback progress info if we are in a crash recovery
- and the transaction has at least 1000 row operations to undo. */
-
- if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
-
- progress_pct = 100 - (ulint)
- ((ut_conv_dulint_to_longlong(undo_no) * 100)
- / trx_roll_max_undo_no);
- if (progress_pct != trx_roll_progress_printed_pct) {
- if (trx_roll_progress_printed_pct == 0) {
- fprintf(stderr,
- "\nInnoDB: Progress in percents:"
- " %lu", (ulong) progress_pct);
- } else {
- fprintf(stderr,
- " %lu", (ulong) progress_pct);
- }
- fflush(stderr);
- trx_roll_progress_printed_pct = progress_pct;
- }
- }
-
- trx->undo_no = undo_no;
-
- if (!trx_undo_arr_store_info(trx, undo_no)) {
- /* A query thread is already processing this undo log record */
-
- mutex_exit(&(trx->undo_mutex));
-
- mtr_commit(&mtr);
-
- goto try_again;
- }
-
- undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
-
- mutex_exit(&(trx->undo_mutex));
-
- mtr_commit(&mtr);
-
- return(undo_rec_copy);
-}
-
-/********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no)/*!< in: undo number of the record */
-{
- ibool ret;
-
- mutex_enter(&(trx->undo_mutex));
-
- ret = trx_undo_arr_store_info(trx, undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-
- return(ret);
-}
-
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no)/*!< in: undo number */
-{
- trx_undo_arr_t* arr;
-
- mutex_enter(&(trx->undo_mutex));
-
- arr = trx->undo_no_arr;
-
- trx_undo_arr_remove_info(arr, undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-}
-
-/*********************************************************************//**
-Starts a rollback operation. */
-UNIV_INTERN
-void
-trx_rollback(
-/*=========*/
- trx_t* trx, /*!< in: transaction */
- trx_sig_t* sig, /*!< in: signal starting the rollback */
- que_thr_t** next_thr)/*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the passed value is
- NULL, the parameter is ignored */
-{
- que_t* roll_graph;
- que_thr_t* thr;
- /* que_thr_t* thr2; */
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
-
- /* Initialize the rollback field in the transaction */
-
- if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
-
- trx->roll_limit = ut_dulint_zero;
-
- } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
-
- trx->roll_limit = (sig->savept).least_undo_no;
-
- } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
-
- trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
- } else {
- ut_error;
- }
-
- ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
-
- trx->pages_undone = 0;
-
- if (trx->undo_no_arr == NULL) {
- trx->undo_no_arr = trx_undo_arr_create();
- }
-
- /* Build a 'query' graph which will perform the undo operations */
-
- roll_graph = trx_roll_graph_build(trx);
-
- trx->graph = roll_graph;
- trx->que_state = TRX_QUE_ROLLING_BACK;
-
- thr = que_fork_start_command(roll_graph);
-
- ut_ad(thr);
-
- /* thr2 = que_fork_start_command(roll_graph);
-
- ut_ad(thr2); */
-
- if (next_thr && (*next_thr == NULL)) {
- *next_thr = thr;
- /* srv_que_task_enqueue_low(thr2); */
- } else {
- srv_que_task_enqueue_low(thr);
- /* srv_que_task_enqueue_low(thr2); */
- }
-}
-
-/****************************************************************//**
-Builds an undo 'query' graph for a transaction. The actual rollback is
-performed by executing this query graph like a query subprocedure call.
-The reply about the completion of the rollback will be sent by this
-graph.
-@return own: the query graph */
-UNIV_INTERN
-que_t*
-trx_roll_graph_build(
-/*=================*/
- trx_t* trx) /*!< in: trx handle */
-{
- mem_heap_t* heap;
- que_fork_t* fork;
- que_thr_t* thr;
- /* que_thr_t* thr2; */
-
- ut_ad(mutex_own(&kernel_mutex));
-
- heap = mem_heap_create(512);
- fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
- /* thr2 = que_thr_create(fork, heap); */
-
- thr->child = row_undo_node_create(trx, thr, heap);
- /* thr2->child = row_undo_node_create(trx, thr2, heap); */
-
- return(fork);
-}
-
-/*********************************************************************//**
-Finishes error processing after the necessary partial rollback has been
-done. */
-static
-void
-trx_finish_error_processing(
-/*========================*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
-
- if (sig->type == TRX_SIG_ERROR_OCCURRED) {
-
- trx_sig_remove(trx, sig);
- }
-
- sig = next_sig;
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/*********************************************************************//**
-Finishes a partial rollback operation. */
-static
-void
-trx_finish_partial_rollback_off_kernel(
-/*===================================*/
- trx_t* trx, /*!< in: transaction */
- que_thr_t** next_thr)/*!< in/out: next query thread to run;
- if the value which is passed in is a pointer
- to a NULL pointer, then the calling function
- can start running a new query thread; if this
- parameter is NULL, it is ignored */
-{
- trx_sig_t* sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- /* Remove the signal from the signal queue and send reply message
- to it */
-
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/****************************************************************//**
-Finishes a transaction rollback. */
-UNIV_INTERN
-void
-trx_finish_rollback_off_kernel(
-/*===========================*/
- que_t* graph, /*!< in: undo graph which can now be freed */
- trx_t* trx, /*!< in: transaction */
- que_thr_t** next_thr)/*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if this parameter is
- NULL, it is ignored */
-{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
-
- /* Free the memory reserved by the undo graph */
- que_graph_free(graph);
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
-
- trx_finish_partial_rollback_off_kernel(trx, next_thr);
-
- return;
-
- } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
-
- trx_finish_error_processing(trx);
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Trx %lu rollback finished\n",
- (ulong) ut_dulint_get_low(trx->id));
- }
-#endif /* UNIV_DEBUG */
-
- trx_commit_off_kernel(trx);
-
- /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
- send reply messages to them */
-
- trx->que_state = TRX_QUE_RUNNING;
-
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
-
- if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
-
- trx_sig_reply(sig, next_thr);
-
- trx_sig_remove(trx, sig);
- }
-
- sig = next_sig;
- }
-}
-
-/*********************************************************************//**
-Creates a rollback command node struct.
-@return own: rollback node struct */
-UNIV_INTERN
-roll_node_t*
-roll_node_create(
-/*=============*/
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- roll_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(roll_node_t));
- node->common.type = QUE_NODE_ROLLBACK;
- node->state = ROLL_NODE_SEND;
-
- node->partial = FALSE;
-
- return(node);
-}
-
-/***********************************************************//**
-Performs an execution step for a rollback command node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_rollback_step(
-/*==============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- roll_node_t* node;
- ulint sig_no;
- trx_savept_t* savept;
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = ROLL_NODE_SEND;
- }
-
- if (node->state == ROLL_NODE_SEND) {
- mutex_enter(&kernel_mutex);
-
- node->state = ROLL_NODE_WAIT;
-
- if (node->partial) {
- sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
- savept = &(node->savept);
- } else {
- sig_no = TRX_SIG_TOTAL_ROLLBACK;
- savept = NULL;
- }
-
- /* Send a rollback signal to the transaction */
-
- trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr,
- savept, NULL);
-
- thr->state = QUE_THR_SIG_REPLY_WAIT;
-
- mutex_exit(&kernel_mutex);
-
- return(NULL);
- }
-
- ut_ad(node->state == ROLL_NODE_WAIT);
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
diff --git a/storage/innodb_plugin/trx/trx0rseg.c b/storage/innodb_plugin/trx/trx0rseg.c
deleted file mode 100644
index 8d754788e2a..00000000000
--- a/storage/innodb_plugin/trx/trx0rseg.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0rseg.c
-Rollback segment
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0rseg.h"
-
-#ifdef UNIV_NONINL
-#include "trx0rseg.ic"
-#endif
-
-#include "trx0undo.h"
-#include "fut0lst.h"
-#include "srv0srv.h"
-#include "trx0purge.h"
-
-/******************************************************************//**
-Looks for a rollback segment, based on the rollback segment id.
-@return rollback segment */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- ulint id) /*!< in: rollback segment id */
-{
- trx_rseg_t* rseg;
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
- ut_ad(rseg);
-
- while (rseg->id != id) {
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- ut_ad(rseg);
- }
-
- return(rseg);
-}
-
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
-ulint
-trx_rseg_header_create(
-/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint max_size, /*!< in: max size in pages */
- ulint* slot_no, /*!< out: rseg id == slot number in trx sys */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint page_no;
- trx_rsegf_t* rsegf;
- trx_sysf_t* sys_header;
- ulint i;
- buf_block_t* block;
-
- ut_ad(mtr);
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
- sys_header = trx_sysf_get(mtr);
-
- *slot_no = trx_sysf_rseg_find_free(mtr);
-
- if (*slot_no == ULINT_UNDEFINED) {
-
- return(FIL_NULL);
- }
-
- /* Allocate a new file segment for the rollback segment */
- block = fseg_create(space, 0,
- TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
-
- if (block == NULL) {
- /* No space left */
-
- return(FIL_NULL);
- }
-
- buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
-
- page_no = buf_block_get_page_no(block);
-
- /* Get the rollback segment file page */
- rsegf = trx_rsegf_get_new(space, zip_size, page_no, mtr);
-
- /* Initialize max size field */
- mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
- MLOG_4BYTES, mtr);
-
- /* Initialize the history list */
-
- mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr);
- flst_init(rsegf + TRX_RSEG_HISTORY, mtr);
-
- /* Reset the undo log slots */
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
-
- trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
- }
-
- /* Add the rollback segment info to the free slot in the trx system
- header */
-
- trx_sysf_rseg_set_space(sys_header, *slot_no, space, mtr);
- trx_sysf_rseg_set_page_no(sys_header, *slot_no, page_no, mtr);
-
- return(page_no);
-}
-
-/***********************************************************************//**
-Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
-void
-trx_rseg_mem_free(
-/*==============*/
- trx_rseg_t* rseg) /* in, own: instance to free */
-{
- trx_undo_t* undo;
-
- mutex_free(&rseg->mutex);
-
- /* There can't be any active transactions. */
- ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
- ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
-
- undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
-
- while (undo != NULL) {
- trx_undo_t* prev_undo = undo;
-
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, prev_undo);
-
- trx_undo_mem_free(prev_undo);
- }
-
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
-
- while (undo != NULL) {
- trx_undo_t* prev_undo = undo;
-
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, prev_undo);
-
- trx_undo_mem_free(prev_undo);
- }
-
- trx_sys_set_nth_rseg(trx_sys, rseg->id, NULL);
-
- mem_free(rseg);
-}
-
-/***************************************************************************
-Creates and initializes a rollback segment object. The values for the
-fields are read from the header. The object is inserted to the rseg
-list of the trx system object and a pointer is inserted in the rseg
-array in the trx system object.
-@return own: rollback segment object */
-static
-trx_rseg_t*
-trx_rseg_mem_create(
-/*================*/
- ulint id, /*!< in: rollback segment id */
- ulint space, /*!< in: space where the segment placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the segment header */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_rsegf_t* rseg_header;
- trx_rseg_t* rseg;
- trx_ulogf_t* undo_log_hdr;
- fil_addr_t node_addr;
- ulint sum_of_undo_sizes;
- ulint len;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- rseg = mem_alloc(sizeof(trx_rseg_t));
-
- rseg->id = id;
- rseg->space = space;
- rseg->zip_size = zip_size;
- rseg->page_no = page_no;
-
- mutex_create(&rseg->mutex, SYNC_RSEG);
-
- UT_LIST_ADD_LAST(rseg_list, trx_sys->rseg_list, rseg);
-
- trx_sys_set_nth_rseg(trx_sys, id, rseg);
-
- rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr);
-
- rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE,
- MLOG_4BYTES, mtr);
-
- /* Initialize the undo log lists according to the rseg header */
-
- sum_of_undo_sizes = trx_undo_lists_init(rseg);
-
- rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr)
- + 1 + sum_of_undo_sizes;
-
- len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
- if (len > 0) {
- trx_sys->rseg_history_len += len;
-
- node_addr = trx_purge_get_log_from_hist(
- flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr));
- rseg->last_page_no = node_addr.page;
- rseg->last_offset = node_addr.boffset;
-
- undo_log_hdr = trx_undo_page_get(rseg->space, rseg->zip_size,
- node_addr.page,
- mtr) + node_addr.boffset;
-
- rseg->last_trx_no = mtr_read_dulint(
- undo_log_hdr + TRX_UNDO_TRX_NO, mtr);
- rseg->last_del_marks = mtr_read_ulint(
- undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
- } else {
- rseg->last_page_no = FIL_NULL;
- }
-
- return(rseg);
-}
-
-/*********************************************************************//**
-Creates the memory copies for rollback segments and initializes the
-rseg list and array in trx_sys at a database startup. */
-UNIV_INTERN
-void
-trx_rseg_list_and_array_init(
-/*=========================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
- ulint page_no;
- ulint space;
-
- UT_LIST_INIT(trx_sys->rseg_list);
-
- trx_sys->rseg_history_len = 0;
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- trx_sys_set_nth_rseg(trx_sys, i, NULL);
- } else {
- ulint zip_size;
-
- space = trx_sysf_rseg_get_space(sys_header, i, mtr);
-
- zip_size = space ? fil_space_get_zip_size(space) : 0;
-
- trx_rseg_mem_create(i, space, zip_size, page_no, mtr);
- }
- }
-}
-
-/****************************************************************//**
-Creates a new rollback segment to the database.
-@return the created segment object, NULL if fail */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint max_size, /*!< in: max size in pages */
- ulint* id, /*!< out: rseg id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint flags;
- ulint zip_size;
- ulint page_no;
- trx_rseg_t* rseg;
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
- mutex_enter(&kernel_mutex);
-
- page_no = trx_rseg_header_create(space, zip_size, max_size, id, mtr);
-
- if (page_no == FIL_NULL) {
-
- mutex_exit(&kernel_mutex);
- return(NULL);
- }
-
- rseg = trx_rseg_mem_create(*id, space, zip_size, page_no, mtr);
-
- mutex_exit(&kernel_mutex);
-
- return(rseg);
-}
diff --git a/storage/innodb_plugin/trx/trx0sys.c b/storage/innodb_plugin/trx/trx0sys.c
deleted file mode 100644
index 79e5af1c677..00000000000
--- a/storage/innodb_plugin/trx/trx0sys.c
+++ /dev/null
@@ -1,1613 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0sys.c
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0sys.h"
-
-#ifdef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#include "fsp0fsp.h"
-#include "mtr0log.h"
-#include "mtr0log.h"
-#include "trx0trx.h"
-#include "trx0rseg.h"
-#include "trx0undo.h"
-#include "srv0srv.h"
-#include "trx0purge.h"
-#include "log0log.h"
-#include "os0file.h"
-#include "read0read.h"
-
-/** The file format tag structure with id and name. */
-struct file_format_struct {
- ulint id; /*!< id of the file format */
- const char* name; /*!< text representation of the
- file format */
- mutex_t mutex; /*!< covers changes to the above
- fields */
-};
-
-/** The file format tag */
-typedef struct file_format_struct file_format_t;
-
-/** The transaction system */
-UNIV_INTERN trx_sys_t* trx_sys = NULL;
-/** The doublewrite buffer */
-UNIV_INTERN trx_doublewrite_t* trx_doublewrite = NULL;
-
-/** The following is set to TRUE when we are upgrading from pre-4.1
-format data files to the multiple tablespaces format data files */
-UNIV_INTERN ibool trx_doublewrite_must_reset_space_ids = FALSE;
-/** Set to TRUE when the doublewrite buffer is being created */
-UNIV_INTERN ibool trx_doublewrite_buf_is_being_created = FALSE;
-
-/** The following is TRUE when we are using the database in the
-post-4.1 format, i.e., we have successfully upgraded, or have created
-a new database installation */
-UNIV_INTERN ibool trx_sys_multiple_tablespace_format = FALSE;
-
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Master binlog file position. We have successfully got the updates
-up to this position. -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
-/* @} */
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Binlog file position, or -1 if unknown */
-UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
-/* @} */
-#endif /* !UNIV_HOTBACKUP */
-
-/** List of animal names representing file format. */
-static const char* file_format_name_map[] = {
- "Antelope",
- "Barracuda",
- "Cheetah",
- "Dragon",
- "Elk",
- "Fox",
- "Gazelle",
- "Hornet",
- "Impala",
- "Jaguar",
- "Kangaroo",
- "Leopard",
- "Moose",
- "Nautilus",
- "Ocelot",
- "Porpoise",
- "Quail",
- "Rabbit",
- "Shark",
- "Tiger",
- "Urchin",
- "Viper",
- "Whale",
- "Xenops",
- "Yak",
- "Zebra"
-};
-
-/** The number of elements in the file format name array. */
-static const ulint FILE_FORMAT_NAME_N
- = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
-
-#ifndef UNIV_HOTBACKUP
-/** This is used to track the maximum file format id known to InnoDB. It's
-updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
-or create a table. */
-static file_format_t file_format_max;
-
-/****************************************************************//**
-Determines if a page number is located inside the doublewrite buffer.
-@return TRUE if the location is inside the two blocks of the
-doublewrite buffer */
-UNIV_INTERN
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
- ulint page_no) /*!< in: page number */
-{
- if (trx_doublewrite == NULL) {
-
- return(FALSE);
- }
-
- if (page_no >= trx_doublewrite->block1
- && page_no < trx_doublewrite->block1
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- return(TRUE);
- }
-
- if (page_no >= trx_doublewrite->block2
- && page_no < trx_doublewrite->block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/****************************************************************//**
-Creates or initialializes the doublewrite buffer at a database start. */
-static
-void
-trx_doublewrite_init(
-/*=================*/
- byte* doublewrite) /*!< in: pointer to the doublewrite buf
- header on trx sys page */
-{
- trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
-
- /* Since we now start to use the doublewrite buffer, no need to call
- fsync() after every write to a data file */
-#ifdef UNIV_DO_FLUSH
- os_do_not_call_flush_at_each_write = TRUE;
-#endif /* UNIV_DO_FLUSH */
-
- mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
-
- trx_doublewrite->first_free = 0;
-
- trx_doublewrite->block1 = mach_read_from_4(
- doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
- trx_doublewrite->block2 = mach_read_from_4(
- doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
- trx_doublewrite->write_buf_unaligned = ut_malloc(
- (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
-
- trx_doublewrite->write_buf = ut_align(
- trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
- trx_doublewrite->buf_block_arr = mem_alloc(
- 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
-}
-
-/****************************************************************//**
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-UNIV_INTERN
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void)
-/*===============================================*/
-{
- buf_block_t* block;
- byte* doublewrite;
- mtr_t mtr;
-
- /* We upgraded to 4.1.x and reset the space id fields in the
- doublewrite buffer. Let us mark to the trx_sys header that the upgrade
- has been done. */
-
- mtr_start(&mtr);
-
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
-
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
- TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- /* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
-
- trx_sys_multiple_tablespace_format = TRUE;
-}
-
-/****************************************************************//**
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
-trx_sys_create_doublewrite_buf(void)
-/*================================*/
-{
- buf_block_t* block;
- buf_block_t* block2;
- buf_block_t* new_block;
- byte* doublewrite;
- byte* fseg_header;
- ulint page_no;
- ulint prev_page_no;
- ulint i;
- mtr_t mtr;
-
- if (trx_doublewrite) {
- /* Already inited */
-
- return;
- }
-
-start_again:
- mtr_start(&mtr);
- trx_doublewrite_buf_is_being_created = TRUE;
-
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has already been created:
- just read in some numbers */
-
- trx_doublewrite_init(doublewrite);
-
- mtr_commit(&mtr);
- trx_doublewrite_buf_is_being_created = FALSE;
- } else {
- fprintf(stderr,
- "InnoDB: Doublewrite buffer not found:"
- " creating new\n");
-
- if (buf_pool_get_curr_size()
- < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2 + 100)
- * UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite buffer:"
- " you must\n"
- "InnoDB: increase your buffer pool size.\n"
- "InnoDB: Cannot continue operation.\n");
-
- exit(1);
- }
-
- block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
- TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
-
- /* fseg_create acquires a second latch on the page,
- therefore we must declare it: */
-
- buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
-
- if (block2 == NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite buffer:"
- " you must\n"
- "InnoDB: increase your tablespace size.\n"
- "InnoDB: Cannot continue operation.\n");
-
- /* We exit without committing the mtr to prevent
- its modifications to the database getting to disk */
-
- exit(1);
- }
-
- fseg_header = buf_block_get_frame(block)
- + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
- prev_page_no = 0;
-
- for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2; i++) {
- page_no = fseg_alloc_free_page(fseg_header,
- prev_page_no + 1,
- FSP_UP, &mtr);
- if (page_no == FIL_NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite"
- " buffer: you must\n"
- "InnoDB: increase your"
- " tablespace size.\n"
- "InnoDB: Cannot continue operation.\n"
- );
-
- exit(1);
- }
-
- /* We read the allocated pages to the buffer pool;
- when they are written to disk in a flush, the space
- id and page number fields are also written to the
- pages. When we at database startup read pages
- from the doublewrite buffer, we know that if the
- space id and page number in them are the same as
- the page position in the tablespace, then the page
- has not been written to in doublewrite. */
-
- new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(new_block,
- SYNC_NO_ORDER_CHECK);
-
- if (i == FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == FSP_EXTENT_SIZE);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- } else if (i == FSP_EXTENT_SIZE / 2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- ut_a(page_no == 2 * FSP_EXTENT_SIZE);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- } else if (i > FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == prev_page_no + 1);
- }
-
- prev_page_no = page_no;
- }
-
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
- + TRX_SYS_DOUBLEWRITE_REPEAT,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
- TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- /* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
-
- fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
-
- trx_sys_multiple_tablespace_format = TRUE;
-
- goto start_again;
- }
-}
-
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-UNIV_INTERN
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
- ibool restore_corrupt_pages) /*!< in: TRUE=restore pages */
-{
- byte* buf;
- byte* read_buf;
- byte* unaligned_read_buf;
- ulint block1;
- ulint block2;
- ulint source_page_no;
- byte* page;
- byte* doublewrite;
- ulint space_id;
- ulint page_no;
- ulint i;
-
- /* We do the file i/o past the buffer pool */
-
- unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
- read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
-
- /* Read the trx sys header to check if we are using the doublewrite
- buffer */
-
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
- UNIV_PAGE_SIZE, read_buf, NULL);
- doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has been created */
-
- trx_doublewrite_init(doublewrite);
-
- block1 = trx_doublewrite->block1;
- block2 = trx_doublewrite->block2;
-
- buf = trx_doublewrite->write_buf;
- } else {
- goto leave_func;
- }
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
- != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
-
- /* We are upgrading from a version < 4.1.x to a version where
- multiple tablespaces are supported. We must reset the space id
- field in the pages in the doublewrite buffer because starting
- from this version the space id is stored to
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
-
- trx_doublewrite_must_reset_space_ids = TRUE;
-
- fprintf(stderr,
- "InnoDB: Resetting space id's in the"
- " doublewrite buffer\n");
- } else {
- trx_sys_multiple_tablespace_format = TRUE;
- }
-
- /* Read the pages from the doublewrite buffer to memory */
-
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- buf, NULL);
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- NULL);
- /* Check if any of these pages is half-written in data files, in the
- intended position */
-
- page = buf;
-
- for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
-
- page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
-
- if (trx_doublewrite_must_reset_space_ids) {
-
- space_id = 0;
- mach_write_to_4(page
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
- /* We do not need to calculate new checksums for the
- pages because the field .._SPACE_ID does not affect
- them. Write the page back to where we read it from. */
-
- if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- source_page_no = block1 + i;
- } else {
- source_page_no = block2
- + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
- }
-
- fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
- UNIV_PAGE_SIZE, page, NULL);
- /* printf("Resetting space id in page %lu\n",
- source_page_no); */
- } else {
- space_id = mach_read_from_4(
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- }
-
- if (!restore_corrupt_pages) {
- /* The database was shut down gracefully: no need to
- restore pages */
-
- } else if (!fil_tablespace_exists_in_mem(space_id)) {
- /* Maybe we have dropped the single-table tablespace
- and this page once belonged to it: do nothing */
-
- } else if (!fil_check_adress_in_tablespace(space_id,
- page_no)) {
- fprintf(stderr,
- "InnoDB: Warning: a page in the"
- " doublewrite buffer is not within space\n"
- "InnoDB: bounds; space id %lu"
- " page number %lu, page %lu in"
- " doublewrite buf.\n",
- (ulong) space_id, (ulong) page_no, (ulong) i);
-
- } else if (space_id == TRX_SYS_SPACE
- && ((page_no >= block1
- && page_no
- < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
- || (page_no >= block2
- && page_no
- < (block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
-
- /* It is an unwritten doublewrite buffer page:
- do nothing */
- } else {
- ulint zip_size = fil_space_get_zip_size(space_id);
-
- /* Read in the actual page from the file */
- fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
- page_no, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- read_buf, NULL);
-
- /* Check if the page is corrupt */
-
- if (UNIV_UNLIKELY
- (buf_page_is_corrupted(read_buf, zip_size))) {
-
- fprintf(stderr,
- "InnoDB: Warning: database page"
- " corruption or a failed\n"
- "InnoDB: file read of"
- " space %lu page %lu.\n"
- "InnoDB: Trying to recover it from"
- " the doublewrite buffer.\n",
- (ulong) space_id, (ulong) page_no);
-
- if (buf_page_is_corrupted(page, zip_size)) {
- fprintf(stderr,
- "InnoDB: Dump of the page:\n");
- buf_page_print(read_buf, zip_size);
- fprintf(stderr,
- "InnoDB: Dump of"
- " corresponding page"
- " in doublewrite buffer:\n");
- buf_page_print(page, zip_size);
-
- fprintf(stderr,
- "InnoDB: Also the page in the"
- " doublewrite buffer"
- " is corrupt.\n"
- "InnoDB: Cannot continue"
- " operation.\n"
- "InnoDB: You can try to"
- " recover the database"
- " with the my.cnf\n"
- "InnoDB: option:\n"
- "InnoDB: set-variable="
- "innodb_force_recovery=6\n");
- exit(1);
- }
-
- /* Write the good page from the
- doublewrite buffer to the intended
- position */
-
- fil_io(OS_FILE_WRITE, TRUE, space_id,
- zip_size, page_no, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- page, NULL);
- fprintf(stderr,
- "InnoDB: Recovered the page from"
- " the doublewrite buffer.\n");
- }
- }
-
- page += UNIV_PAGE_SIZE;
- }
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
-leave_func:
- ut_free(unaligned_read_buf);
-}
-
-/****************************************************************//**
-Checks that trx is in the trx list.
-@return TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
-/*============*/
- trx_t* in_trx) /*!< in: trx */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx != NULL) {
-
- if (trx == in_trx) {
-
- return(TRUE);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
-void
-trx_sys_flush_max_trx_id(void)
-/*==========================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
- trx_sys->max_trx_id, &mtr);
- mtr_commit(&mtr);
-}
-
-/*****************************************************************//**
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-UNIV_INTERN
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- const char* file_name,/*!< in: MySQL log file name */
- ib_int64_t offset, /*!< in: position in that log file */
- ulint field, /*!< in: offset of the MySQL log info field in
- the trx sys header */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_sysf_t* sys_header;
-
- if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
-
- /* We cannot fit the name to the 512 bytes we have reserved */
-
- return;
- }
-
- sys_header = trx_sysf_get(mtr);
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
- TRX_SYS_MYSQL_LOG_MAGIC_N,
- MLOG_4BYTES, mtr);
- }
-
- if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
- file_name)) {
-
- mlog_write_string(sys_header + field
- + TRX_SYS_MYSQL_LOG_NAME,
- (byte*) file_name, 1 + ut_strlen(file_name),
- mtr);
- }
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
- || (offset >> 32) > 0) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
- (ulint)(offset >> 32),
- MLOG_4BYTES, mtr);
- }
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
- (ulint)(offset & 0xFFFFFFFFUL),
- MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Stores the MySQL binlog offset info in the trx system header if
-the magic number shows it valid, and print the info to stderr */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset(void)
-/*===================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
- ulint trx_sys_mysql_bin_log_pos_high;
- ulint trx_sys_mysql_bin_log_pos_low;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
- trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
-
- trx_sys_mysql_bin_log_pos
- = (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
- + (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
-
- ut_memcpy(trx_sys_mysql_bin_log_name,
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- fprintf(stderr,
- "InnoDB: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
- trx_sys_mysql_bin_log_name);
-
- mtr_commit(&mtr);
-}
-
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void)
-/*====================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- fprintf(stderr,
- "InnoDB: In a MySQL replication slave the last"
- " master binlog file\n"
- "InnoDB: position %lu %lu, file name %s\n",
- (ulong) mach_read_from_4(sys_header
- + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(sys_header
- + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- /* Copy the master log position info to global variables we can
- use in ha_innobase.cc to initialize glob_mi to right values */
-
- ut_memcpy(trx_sys_mysql_master_log_name,
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME,
- TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- trx_sys_mysql_master_log_pos
- = (((ib_int64_t) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
- + ((ib_int64_t) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
- mtr_commit(&mtr);
-}
-
-/****************************************************************//**
-Looks for a free slot for a rollback segment in the trx system file copy.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_sysf_t* sys_header;
- ulint page_no;
- ulint i;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- sys_header = trx_sysf_get(mtr);
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/*****************************************************************//**
-Creates the file page for the transaction system. This function is called only
-at the database creation, before trx_sys_init. */
-static
-void
-trx_sysf_create(
-/*============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_sysf_t* sys_header;
- ulint slot_no;
- buf_block_t* block;
- page_t* page;
- ulint page_no;
- ulint i;
-
- ut_ad(mtr);
-
- /* Note that below we first reserve the file space x-latch, and
- then enter the kernel: we must do it in this order to conform
- to the latching order rules. */
-
- mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
- mutex_enter(&kernel_mutex);
-
- /* Create the trx sys file block in a new allocated file segment */
- block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
- mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
- ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
-
- page = buf_block_get_frame(block);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
- MLOG_2BYTES, mtr);
-
- /* Reset the doublewrite buffer magic number to zero so that we
- know that the doublewrite buffer has not yet been created (this
- suppresses a Valgrind warning) */
-
- mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
-
- sys_header = trx_sysf_get(mtr);
-
- /* Start counting transaction ids from number 1 up */
- mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
- ut_dulint_create(0, 1), mtr);
-
- /* Reset the rollback segment slots */
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
- trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
- trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
- }
-
- /* The remaining area (up to the page trailer) is uninitialized.
- Silence Valgrind warnings about it. */
- UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
- + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE),
- (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
- - (TRX_SYS_RSEGS
- + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE))
- + page - sys_header);
-
- /* Create the first rollback segment in the SYSTEM tablespace */
- page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
- mtr);
- ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
- ut_a(page_no != FIL_NULL);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*****************************************************************//**
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
-UNIV_INTERN
-void
-trx_sys_init_at_db_start(void)
-/*==========================*/
-{
- trx_sysf_t* sys_header;
- ib_int64_t rows_to_undo = 0;
- const char* unit = "";
- trx_t* trx;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- ut_ad(trx_sys == NULL);
-
- mutex_enter(&kernel_mutex);
-
- trx_sys = mem_alloc(sizeof(trx_sys_t));
-
- sys_header = trx_sysf_get(&mtr);
-
- trx_rseg_list_and_array_init(sys_header, &mtr);
-
- trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
- trx_sys_get_new_trx_id will evaluate to TRUE when the function
- is first time called, and the value for trx id will be written
- to the disk-based header! Thus trx id values will not overlap when
- the database is repeatedly started! */
-
- trx_sys->max_trx_id = ut_dulint_add(
- ut_dulint_align_up(mtr_read_dulint(
- sys_header
- + TRX_SYS_TRX_ID_STORE, &mtr),
- TRX_SYS_TRX_ID_WRITE_MARGIN),
- 2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
-
- UT_LIST_INIT(trx_sys->mysql_trx_list);
- trx_dummy_sess = sess_open();
- trx_lists_init_at_db_start();
-
- if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- for (;;) {
-
- if ( trx->conc_state != TRX_PREPARED) {
- rows_to_undo += ut_conv_dulint_to_longlong(
- trx->undo_no);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
-
- if (!trx) {
- break;
- }
- }
-
- if (rows_to_undo > 1000000000) {
- unit = "M";
- rows_to_undo = rows_to_undo / 1000000;
- }
-
- fprintf(stderr,
- "InnoDB: %lu transaction(s) which must be"
- " rolled back or cleaned up\n"
- "InnoDB: in total %lu%s row operations to undo\n",
- (ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
- (ulong) rows_to_undo, unit);
-
- fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
- TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
- }
-
- UT_LIST_INIT(trx_sys->view_list);
-
- trx_purge_sys_create();
-
- mutex_exit(&kernel_mutex);
-
- mtr_commit(&mtr);
-}
-
-/*****************************************************************//**
-Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
-void
-trx_sys_create(void)
-/*================*/
-{
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- trx_sysf_create(&mtr);
-
- mtr_commit(&mtr);
-
- trx_sys_init_at_db_start();
-}
-
-/*****************************************************************//**
-Update the file format tag.
-@return always TRUE */
-static
-ibool
-trx_sys_file_format_max_write(
-/*==========================*/
- ulint format_id, /*!< in: file format id */
- const char** name) /*!< out: max file format name, can
- be NULL */
-{
- mtr_t mtr;
- byte* ptr;
- buf_block_t* block;
- ulint tag_value_low;
-
- mtr_start(&mtr);
-
- block = buf_page_get(
- TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-
- file_format_max.id = format_id;
- file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
- ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
- tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
-
- if (name) {
- *name = file_format_max.name;
- }
-
- mlog_write_dulint(
- ptr,
- ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
- tag_value_low),
- &mtr);
-
- mtr_commit(&mtr);
-
- return(TRUE);
-}
-
-/*****************************************************************//**
-Read the file format tag.
-@return the file format or ULINT_UNDEFINED if not set. */
-static
-ulint
-trx_sys_file_format_max_read(void)
-/*==============================*/
-{
- mtr_t mtr;
- const byte* ptr;
- const buf_block_t* block;
- ulint format_id;
- dulint file_format_id;
-
- /* Since this is called during the startup phase it's safe to
- read the value without a covering mutex. */
- mtr_start(&mtr);
-
- block = buf_page_get(
- TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-
- ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
- file_format_id = mach_read_from_8(ptr);
-
- mtr_commit(&mtr);
-
- format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
-
- if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
- || format_id >= FILE_FORMAT_NAME_N) {
-
- /* Either it has never been tagged, or garbage in it. */
- return(ULINT_UNDEFINED);
- }
-
- return(format_id);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id) /*!< in: id of the file format */
-{
- ut_a(id < FILE_FORMAT_NAME_N);
-
- return(file_format_name_map[id]);
-}
-
-/*****************************************************************//**
-Check for the max file format tag stored on disk. Note: If max_format_id
-is == DICT_TF_FORMAT_MAX + 1 then we only print a warning.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-trx_sys_file_format_max_check(
-/*==========================*/
- ulint max_format_id) /*!< in: max format id to check */
-{
- ulint format_id;
-
- /* Check the file format in the tablespace. Do not try to
- recover if the file format is not supported by the engine
- unless forced by the user. */
- format_id = trx_sys_file_format_max_read();
- if (format_id == ULINT_UNDEFINED) {
- /* Format ID was not set. Set it to minimum possible
- value. */
- format_id = DICT_TF_FORMAT_51;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: highest supported file format is %s.\n",
- trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
-
- if (format_id > DICT_TF_FORMAT_MAX) {
-
- ut_a(format_id < FILE_FORMAT_NAME_N);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %s: the system tablespace is in a file "
- "format that this version doesn't support - %s\n",
- ((max_format_id <= DICT_TF_FORMAT_MAX)
- ? "Error" : "Warning"),
- trx_sys_file_format_id_to_name(format_id));
-
- if (max_format_id <= DICT_TF_FORMAT_MAX) {
- return(DB_ERROR);
- }
- }
-
- format_id = (format_id > max_format_id) ? format_id : max_format_id;
-
- /* We don't need a mutex here, as this function should only
- be called once at start up. */
- file_format_max.id = format_id;
- file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
- ulint format_id, /*!< in: file format id */
- const char** name) /*!< out: max file format name or
- NULL if not needed. */
-{
- ibool ret = FALSE;
-
- ut_a(format_id <= DICT_TF_FORMAT_MAX);
-
- mutex_enter(&file_format_max.mutex);
-
- /* Only update if not already same value. */
- if (format_id != file_format_max.id) {
-
- ret = trx_sys_file_format_max_write(format_id, name);
- }
-
- mutex_exit(&file_format_max.mutex);
-
- return(ret);
-}
-
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-UNIV_INTERN
-void
-trx_sys_file_format_tag_init(void)
-/*==============================*/
-{
- ulint format_id;
-
- format_id = trx_sys_file_format_max_read();
-
- /* If format_id is not set then set it to the minimum. */
- if (format_id == ULINT_UNDEFINED) {
- trx_sys_file_format_max_set(DICT_TF_FORMAT_51, NULL);
- }
-}
-
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
- const char** name, /*!< out: max file format name */
- ulint format_id) /*!< in: file format identifier */
-{
- ibool ret = FALSE;
-
- ut_a(name);
- ut_a(file_format_max.name != NULL);
- ut_a(format_id <= DICT_TF_FORMAT_MAX);
-
- mutex_enter(&file_format_max.mutex);
-
- if (format_id > file_format_max.id) {
-
- ret = trx_sys_file_format_max_write(format_id, name);
- }
-
- mutex_exit(&file_format_max.mutex);
-
- return(ret);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_max_get(void)
-/*=============================*/
-{
- return(file_format_max.name);
-}
-
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_init(void)
-/*==========================*/
-{
- mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
-
- /* We don't need a mutex here, as this function should only
- be called once at start up. */
- file_format_max.id = DICT_TF_FORMAT_51;
-
- file_format_max.name = trx_sys_file_format_id_to_name(
- file_format_max.id);
-}
-
-/*****************************************************************//**
-Closes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_close(void)
-/*===========================*/
-{
- /* Does nothing at the moment */
-}
-#else /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- const byte* page) /*!< in: buffer containing the trx
- system header page, i.e., page number
- TRX_SYS_PAGE_NO in the tablespace */
-{
- const trx_sysf_t* sys_header;
-
- sys_header = page + TRX_SYS;
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- == TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- fprintf(stderr,
- "ibbackup: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- }
-}
-
-
-/* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
- (This code duplicaton should be fixed at some point!)
-*/
-
-#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
-/* The offset of the file format tag on the trx system header page */
-#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
-/* We use these random constants to reduce the probability of reading
-garbage (from previous versions) that maps to an actual format id. We
-use these as bit masks at the time of reading and writing from/to disk. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
-
-/* END OF COPIED DEFINITIONS */
-
-
-/*****************************************************************//**
-Reads the file format id from the first system table space file.
-Even if the call succeeds and returns TRUE, the returned format id
-may be ULINT_UNDEFINED signalling that the format id was not present
-in the data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_file_format_id(
-/*========================*/
- const char *pathname, /*!< in: pathname of the first system
- table space file */
- ulint *format_id) /*!< out: file format of the system table
- space */
-{
- os_file_t file;
- ibool success;
- byte buf[UNIV_PAGE_SIZE * 2];
- page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
- const byte* ptr;
- dulint file_format_id;
-
- *format_id = ULINT_UNDEFINED;
-
- file = os_file_create_simple_no_error_handling(
- pathname,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
-" ibbackup: Error: trying to read system tablespace file format,\n"
-" ibbackup: but could not open the tablespace file %s!\n",
- pathname
- );
- return(FALSE);
- }
-
- /* Read the page on which file format is stored */
-
- success = os_file_read_no_error_handling(
- file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, 0, UNIV_PAGE_SIZE
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
-" ibbackup: Error: trying to read system table space file format,\n"
-" ibbackup: but failed to read the tablespace file %s!\n",
- pathname
- );
- os_file_close(file);
- return(FALSE);
- }
- os_file_close(file);
-
- /* get the file format from the page */
- ptr = page + TRX_SYS_FILE_FORMAT_TAG;
- file_format_id = mach_read_from_8(ptr);
-
- *format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
-
- if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
- || *format_id >= FILE_FORMAT_NAME_N) {
-
- /* Either it has never been tagged, or garbage in it. */
- *format_id = ULINT_UNDEFINED;
- return(TRUE);
- }
-
- return(TRUE);
-}
-
-
-/*****************************************************************//**
-Reads the file format id from the given per-table data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_pertable_file_format_id(
-/*=================================*/
- const char *pathname, /*!< in: pathname of a per-table
- datafile */
- ulint *format_id) /*!< out: file format of the per-table
- data file */
-{
- os_file_t file;
- ibool success;
- byte buf[UNIV_PAGE_SIZE * 2];
- page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
- const byte* ptr;
- ib_uint32_t flags;
-
- *format_id = ULINT_UNDEFINED;
-
- file = os_file_create_simple_no_error_handling(
- pathname,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
-" ibbackup: Error: trying to read per-table tablespace format,\n"
-" ibbackup: but could not open the tablespace file %s!\n",
- pathname
- );
- return(FALSE);
- }
-
- /* Read the first page of the per-table datafile */
-
- success = os_file_read_no_error_handling(
- file, page, 0, 0, UNIV_PAGE_SIZE
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
-" ibbackup: Error: trying to per-table data file format,\n"
-" ibbackup: but failed to read the tablespace file %s!\n",
- pathname
- );
- os_file_close(file);
- return(FALSE);
- }
- os_file_close(file);
-
- /* get the file format from the page */
- ptr = page + 54;
- flags = mach_read_from_4(ptr);
- if (flags == 0) {
- /* file format is Antelope */
- *format_id = 0;
- return (TRUE);
- } else if (flags & 1) {
- /* tablespace flags are ok */
- *format_id = (flags / 32) % 128;
- return (TRUE);
- } else {
- /* bad tablespace flags */
- return(FALSE);
- }
-}
-
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id) /*!< in: id of the file format */
-{
- if (!(id < FILE_FORMAT_NAME_N)) {
- /* unknown id */
- return ("Unknown");
- }
-
- return(file_format_name_map[id]);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************
-Shutdown/Close the transaction system. */
-UNIV_INTERN
-void
-trx_sys_close(void)
-/*===============*/
-{
- trx_rseg_t* rseg;
- read_view_t* view;
-
- ut_ad(trx_sys != NULL);
-
- /* Check that all read views are closed except read view owned
- by a purge. */
-
- if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
- fprintf(stderr,
- "InnoDB: Error: all read views were not closed"
- " before shutdown:\n"
- "InnoDB: %lu read views open \n",
- UT_LIST_GET_LEN(trx_sys->view_list) - 1);
- }
-
- sess_close(trx_dummy_sess);
- trx_dummy_sess = NULL;
-
- trx_purge_sys_close();
-
- mutex_enter(&kernel_mutex);
-
- /* Free the double write data structures. */
- ut_a(trx_doublewrite != NULL);
- ut_free(trx_doublewrite->write_buf_unaligned);
- trx_doublewrite->write_buf_unaligned = NULL;
-
- mem_free(trx_doublewrite->buf_block_arr);
- trx_doublewrite->buf_block_arr = NULL;
-
- mutex_free(&trx_doublewrite->mutex);
- mem_free(trx_doublewrite);
- trx_doublewrite = NULL;
-
- /* There can't be any active transactions. */
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- while (rseg != NULL) {
- trx_rseg_t* prev_rseg = rseg;
-
- rseg = UT_LIST_GET_NEXT(rseg_list, prev_rseg);
- UT_LIST_REMOVE(rseg_list, trx_sys->rseg_list, prev_rseg);
-
- trx_rseg_mem_free(prev_rseg);
- }
-
- view = UT_LIST_GET_FIRST(trx_sys->view_list);
-
- while (view != NULL) {
- read_view_t* prev_view = view;
-
- view = UT_LIST_GET_NEXT(view_list, prev_view);
-
- /* Views are allocated from the trx_sys->global_read_view_heap.
- So, we simply remove the element here. */
- UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
- }
-
- ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->rseg_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
-
- mem_free(trx_sys);
-
- trx_sys = NULL;
- mutex_exit(&kernel_mutex);
-}
diff --git a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c
deleted file mode 100644
index 21ba6e481a7..00000000000
--- a/storage/innodb_plugin/trx/trx0trx.c
+++ /dev/null
@@ -1,2063 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0trx.c
-The transaction
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0trx.h"
-
-#ifdef UNIV_NONINL
-#include "trx0trx.ic"
-#endif
-
-#include "trx0undo.h"
-#include "trx0rseg.h"
-#include "log0log.h"
-#include "que0que.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
-#include "read0read.h"
-#include "srv0srv.h"
-#include "thr0loc.h"
-#include "btr0sea.h"
-#include "os0proc.h"
-#include "trx0xa.h"
-#include "ha_prototypes.h"
-
-/** Dummy session used currently in MySQL interface */
-UNIV_INTERN sess_t* trx_dummy_sess = NULL;
-
-/** Number of transactions currently allocated for MySQL: protected by
-the kernel mutex */
-UNIV_INTERN ulint trx_n_mysql_transactions = 0;
-
-/*************************************************************//**
-Set detailed error message for the transaction. */
-UNIV_INTERN
-void
-trx_set_detailed_error(
-/*===================*/
- trx_t* trx, /*!< in: transaction struct */
- const char* msg) /*!< in: detailed error message */
-{
- ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
-}
-
-/*************************************************************//**
-Set detailed error message for the transaction from a file. Note that the
-file is rewinded before reading from it. */
-UNIV_INTERN
-void
-trx_set_detailed_error_from_file(
-/*=============================*/
- trx_t* trx, /*!< in: transaction struct */
- FILE* file) /*!< in: file to read message from */
-{
- os_file_read_string(file, trx->detailed_error,
- sizeof(trx->detailed_error));
-}
-
-/****************************************************************//**
-Creates and initializes a transaction object.
-@return own: the transaction */
-UNIV_INTERN
-trx_t*
-trx_create(
-/*=======*/
- sess_t* sess) /*!< in: session */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(sess);
-
- trx = mem_alloc(sizeof(trx_t));
-
- trx->magic_n = TRX_MAGIC_N;
-
- trx->op_info = "";
-
- trx->is_purge = 0;
- trx->is_recovered = 0;
- trx->conc_state = TRX_NOT_STARTED;
- trx->start_time = time(NULL);
-
- trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- trx->id = ut_dulint_zero;
- trx->no = ut_dulint_max;
-
- trx->support_xa = TRUE;
-
- trx->check_foreigns = TRUE;
- trx->check_unique_secondary = TRUE;
-
- trx->flush_log_later = FALSE;
- trx->must_flush_log_later = FALSE;
-
- trx->dict_operation = TRX_DICT_OP_NONE;
- trx->table_id = ut_dulint_zero;
-
- trx->mysql_thd = NULL;
- trx->mysql_query_str = NULL;
- trx->active_trans = 0;
- trx->duplicates = 0;
-
- trx->n_mysql_tables_in_use = 0;
- trx->mysql_n_tables_locked = 0;
-
- trx->mysql_log_file_name = NULL;
- trx->mysql_log_offset = 0;
-
- mutex_create(&trx->undo_mutex, SYNC_TRX_UNDO);
-
- trx->rseg = NULL;
-
- trx->undo_no = ut_dulint_zero;
- trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
- trx->insert_undo = NULL;
- trx->update_undo = NULL;
- trx->undo_no_arr = NULL;
-
- trx->error_state = DB_SUCCESS;
- trx->error_key_num = 0;
- trx->detailed_error[0] = '\0';
-
- trx->sess = sess;
- trx->que_state = TRX_QUE_RUNNING;
- trx->n_active_thrs = 0;
-
- trx->handling_signals = FALSE;
-
- UT_LIST_INIT(trx->signals);
- UT_LIST_INIT(trx->reply_signals);
-
- trx->graph = NULL;
-
- trx->wait_lock = NULL;
- trx->was_chosen_as_deadlock_victim = FALSE;
- UT_LIST_INIT(trx->wait_thrs);
-
- trx->lock_heap = mem_heap_create_in_buffer(256);
- UT_LIST_INIT(trx->trx_locks);
-
- UT_LIST_INIT(trx->trx_savepoints);
-
- trx->dict_operation_lock_mode = 0;
- trx->has_search_latch = FALSE;
- trx->search_latch_timeout = BTR_SEA_TIMEOUT;
-
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- trx->global_read_view_heap = mem_heap_create(256);
- trx->global_read_view = NULL;
- trx->read_view = NULL;
-
- /* Set X/Open XA transaction identification to NULL */
- memset(&trx->xid, 0, sizeof(trx->xid));
- trx->xid.formatID = -1;
-
- trx->n_autoinc_rows = 0;
-
- /* Remember to free the vector explicitly. */
- trx->autoinc_locks = ib_vector_create(
- mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 4), 4);
-
- return(trx);
-}
-
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_mysql(void)
-/*========================*/
-{
- trx_t* trx;
-
- mutex_enter(&kernel_mutex);
-
- trx = trx_create(trx_dummy_sess);
-
- trx_n_mysql_transactions++;
-
- UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- mutex_exit(&kernel_mutex);
-
- trx->mysql_thread_id = os_thread_get_curr_id();
-
- trx->mysql_process_no = os_proc_get_number();
-
- return(trx);
-}
-
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_background(void)
-/*=============================*/
-{
- trx_t* trx;
-
- mutex_enter(&kernel_mutex);
-
- trx = trx_create(trx_dummy_sess);
-
- mutex_exit(&kernel_mutex);
-
- return(trx);
-}
-
-/********************************************************************//**
-Releases the search latch if trx has reserved it. */
-UNIV_INTERN
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx) /*!< in: transaction */
-{
- if (trx->has_search_latch) {
- rw_lock_s_unlock(&btr_search_latch);
-
- trx->has_search_latch = FALSE;
- }
-}
-
-/********************************************************************//**
-Frees a transaction object. */
-UNIV_INTERN
-void
-trx_free(
-/*=====*/
- trx_t* trx) /*!< in, own: trx object */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (trx->declared_to_be_inside_innodb) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: Freeing a trx which is declared"
- " to be processing\n"
- "InnoDB: inside InnoDB.\n", stderr);
- trx_print(stderr, trx, 600);
- putc('\n', stderr);
-
- /* This is an error but not a fatal error. We must keep
- the counters like srv_conc_n_threads accurate. */
- srv_conc_force_exit_innodb(trx);
- }
-
- if (trx->n_mysql_tables_in_use != 0
- || trx->mysql_n_tables_locked != 0) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: MySQL is freeing a thd\n"
- "InnoDB: though trx->n_mysql_tables_in_use is %lu\n"
- "InnoDB: and trx->mysql_n_tables_locked is %lu.\n",
- (ulong)trx->n_mysql_tables_in_use,
- (ulong)trx->mysql_n_tables_locked);
-
- trx_print(stderr, trx, 600);
-
- ut_print_buf(stderr, trx, sizeof(trx_t));
- putc('\n', stderr);
- }
-
- ut_a(trx->magic_n == TRX_MAGIC_N);
-
- trx->magic_n = 11112222;
-
- ut_a(trx->conc_state == TRX_NOT_STARTED);
-
- mutex_free(&(trx->undo_mutex));
-
- ut_a(trx->insert_undo == NULL);
- ut_a(trx->update_undo == NULL);
-
- if (trx->undo_no_arr) {
- trx_undo_arr_free(trx->undo_no_arr);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
- ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
-
- ut_a(trx->wait_lock == NULL);
- ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- ut_a(!trx->has_search_latch);
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- if (trx->lock_heap) {
- mem_heap_free(trx->lock_heap);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
-
- if (trx->global_read_view_heap) {
- mem_heap_free(trx->global_read_view_heap);
- }
-
- trx->global_read_view = NULL;
-
- ut_a(trx->read_view == NULL);
-
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
- /* We allocated a dedicated heap for the vector. */
- ib_vector_free(trx->autoinc_locks);
-
- mem_free(trx);
-}
-
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
-void
-trx_free_for_mysql(
-/*===============*/
- trx_t* trx) /*!< in, own: trx object */
-{
- mutex_enter(&kernel_mutex);
-
- UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- trx_free(trx);
-
- ut_a(trx_n_mysql_transactions > 0);
-
- trx_n_mysql_transactions--;
-
- mutex_exit(&kernel_mutex);
-}
-
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx) /*!< in, own: trx object */
-{
- mutex_enter(&kernel_mutex);
-
- trx_free(trx);
-
- mutex_exit(&kernel_mutex);
-}
-
-/****************************************************************//**
-Inserts the trx handle in the trx system trx list in the right position.
-The list is sorted on the trx id so that the biggest id is at the list
-start. This function is used at the database startup to insert incomplete
-transactions to the list. */
-static
-void
-trx_list_insert_ordered(
-/*====================*/
- trx_t* trx) /*!< in: trx handle */
-{
- trx_t* trx2;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx2 != NULL) {
- if (ut_dulint_cmp(trx->id, trx2->id) >= 0) {
-
- ut_ad(ut_dulint_cmp(trx->id, trx2->id) == 1);
- break;
- }
- trx2 = UT_LIST_GET_NEXT(trx_list, trx2);
- }
-
- if (trx2 != NULL) {
- trx2 = UT_LIST_GET_PREV(trx_list, trx2);
-
- if (trx2 == NULL) {
- UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
- } else {
- UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list,
- trx2, trx);
- }
- } else {
- UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx);
- }
-}
-
-/****************************************************************//**
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-UNIV_INTERN
-void
-trx_lists_init_at_db_start(void)
-/*============================*/
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
- trx_t* trx;
-
- UT_LIST_INIT(trx_sys->trx_list);
-
- /* Look from the rollback segments if there exist undo logs for
- transactions */
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- while (rseg != NULL) {
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
-
- while (undo != NULL) {
-
- trx = trx_create(trx_dummy_sess);
-
- trx->is_recovered = TRUE;
- trx->id = undo->trx_id;
- trx->xid = undo->xid;
- trx->insert_undo = undo;
- trx->rseg = rseg;
-
- if (undo->state != TRX_UNDO_ACTIVE) {
-
- /* Prepared transactions are left in
- the prepared state waiting for a
- commit or abort decision from MySQL */
-
- if (undo->state == TRX_UNDO_PREPARED) {
-
- fprintf(stderr,
- "InnoDB: Transaction "
- TRX_ID_FMT
- " was in the"
- " XA prepared state.\n",
- TRX_ID_PREP_PRINTF(trx->id));
-
- if (srv_force_recovery == 0) {
-
- trx->conc_state = TRX_PREPARED;
- } else {
- fprintf(stderr,
- "InnoDB: Since"
- " innodb_force_recovery"
- " > 0, we will"
- " rollback it"
- " anyway.\n");
-
- trx->conc_state = TRX_ACTIVE;
- }
- } else {
- trx->conc_state
- = TRX_COMMITTED_IN_MEMORY;
- }
-
- /* We give a dummy value for the trx no;
- this should have no relevance since purge
- is not interested in committed transaction
- numbers, unless they are in the history
- list, in which case it looks the number
- from the disk based undo log structure */
-
- trx->no = trx->id;
- } else {
- trx->conc_state = TRX_ACTIVE;
-
- /* A running transaction always has the number
- field inited to ut_dulint_max */
-
- trx->no = ut_dulint_max;
- }
-
- if (undo->dict_operation) {
- trx_set_dict_operation(
- trx, TRX_DICT_OP_TABLE);
- trx->table_id = undo->table_id;
- }
-
- if (!undo->empty) {
- trx->undo_no = ut_dulint_add(undo->top_undo_no,
- 1);
- }
-
- trx_list_insert_ordered(trx);
-
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- }
-
- undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
-
- while (undo != NULL) {
- trx = trx_get_on_id(undo->trx_id);
-
- if (NULL == trx) {
- trx = trx_create(trx_dummy_sess);
-
- trx->is_recovered = TRUE;
- trx->id = undo->trx_id;
- trx->xid = undo->xid;
-
- if (undo->state != TRX_UNDO_ACTIVE) {
-
- /* Prepared transactions are left in
- the prepared state waiting for a
- commit or abort decision from MySQL */
-
- if (undo->state == TRX_UNDO_PREPARED) {
- fprintf(stderr,
- "InnoDB: Transaction "
- TRX_ID_FMT " was in the"
- " XA prepared state.\n",
- TRX_ID_PREP_PRINTF(
- trx->id));
-
- if (srv_force_recovery == 0) {
-
- trx->conc_state
- = TRX_PREPARED;
- } else {
- fprintf(stderr,
- "InnoDB: Since"
- " innodb_force_recovery"
- " > 0, we will"
- " rollback it"
- " anyway.\n");
-
- trx->conc_state
- = TRX_ACTIVE;
- }
- } else {
- trx->conc_state
- = TRX_COMMITTED_IN_MEMORY;
- }
-
- /* We give a dummy value for the trx
- number */
-
- trx->no = trx->id;
- } else {
- trx->conc_state = TRX_ACTIVE;
-
- /* A running transaction always has
- the number field inited to
- ut_dulint_max */
-
- trx->no = ut_dulint_max;
- }
-
- trx->rseg = rseg;
- trx_list_insert_ordered(trx);
-
- if (undo->dict_operation) {
- trx_set_dict_operation(
- trx, TRX_DICT_OP_TABLE);
- trx->table_id = undo->table_id;
- }
- }
-
- trx->update_undo = undo;
-
- if ((!undo->empty)
- && (ut_dulint_cmp(undo->top_undo_no,
- trx->undo_no) >= 0)) {
-
- trx->undo_no = ut_dulint_add(undo->top_undo_no,
- 1);
- }
-
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- }
-
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
-}
-
-/******************************************************************//**
-Assigns a rollback segment to a transaction in a round-robin fashion.
-Skips the SYSTEM rollback segment if another is available.
-@return assigned rollback segment id */
-UNIV_INLINE
-ulint
-trx_assign_rseg(void)
-/*=================*/
-{
- trx_rseg_t* rseg = trx_sys->latest_rseg;
-
- ut_ad(mutex_own(&kernel_mutex));
-loop:
- /* Get next rseg in a round-robin fashion */
-
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
-
- if (rseg == NULL) {
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
- }
-
- /* If it is the SYSTEM rollback segment, and there exist others, skip
- it */
-
- if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
- && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
- goto loop;
- }
-
- trx_sys->latest_rseg = rseg;
-
- return(rseg->id);
-}
-
-/****************************************************************//**
-Starts a new transaction.
-@return TRUE */
-UNIV_INTERN
-ibool
-trx_start_low(
-/*==========*/
- trx_t* trx, /*!< in: transaction */
- ulint rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-{
- trx_rseg_t* rseg;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->rseg == NULL);
-
- if (trx->is_purge) {
- trx->id = ut_dulint_zero;
- trx->conc_state = TRX_ACTIVE;
- trx->start_time = time(NULL);
-
- return(TRUE);
- }
-
- ut_ad(trx->conc_state != TRX_ACTIVE);
-
- if (rseg_id == ULINT_UNDEFINED) {
-
- rseg_id = trx_assign_rseg();
- }
-
- rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
-
- trx->id = trx_sys_get_new_trx_id();
-
- /* The initial value for trx->no: ut_dulint_max is used in
- read_view_open_now: */
-
- trx->no = ut_dulint_max;
-
- trx->rseg = rseg;
-
- trx->conc_state = TRX_ACTIVE;
- trx->start_time = time(NULL);
-
- UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
-
- return(TRUE);
-}
-
-/****************************************************************//**
-Starts a new transaction.
-@return TRUE */
-UNIV_INTERN
-ibool
-trx_start(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- ulint rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-{
- ibool ret;
-
- /* Update the info whether we should skip XA steps that eat CPU time
- For the duration of the transaction trx->support_xa is not reread
- from thd so any changes in the value take effect in the next
- transaction. This is to avoid a scenario where some undo
- generated by a transaction, has XA stuff, and other undo,
- generated by the same transaction, doesn't. */
- trx->support_xa = thd_supports_xa(trx->mysql_thd);
-
- mutex_enter(&kernel_mutex);
-
- ret = trx_start_low(trx, rseg_id);
-
- mutex_exit(&kernel_mutex);
-
- return(ret);
-}
-
-/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
-void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx) /*!< in: transaction */
-{
- page_t* update_hdr_page;
- ib_uint64_t lsn = 0;
- trx_rseg_t* rseg;
- trx_undo_t* undo;
- mtr_t mtr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx->must_flush_log_later = FALSE;
-
- rseg = trx->rseg;
-
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
-
- mutex_exit(&kernel_mutex);
-
- mtr_start(&mtr);
-
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based world, at the serialization point of the log sequence
- number lsn obtained below. */
-
- mutex_enter(&(rseg->mutex));
-
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(
- rseg, trx, trx->insert_undo, &mtr);
- }
-
- undo = trx->update_undo;
-
- if (undo) {
- mutex_enter(&kernel_mutex);
- trx->no = trx_sys_get_new_trx_no();
-
- mutex_exit(&kernel_mutex);
-
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
-
- update_hdr_page = trx_undo_set_state_at_finish(
- rseg, trx, undo, &mtr);
-
- /* We have to do the cleanup for the update log while
- holding the rseg mutex because update log headers
- have to be put to the history list in the order of
- the trx number. */
-
- trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
- }
-
- mutex_exit(&(rseg->mutex));
-
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name
- && trx->mysql_log_file_name[0] != '\0') {
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
- trx->mysql_log_file_name = NULL;
- }
-
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
-
- /*--------------*/
- mtr_commit(&mtr);
- /*--------------*/
- lsn = mtr.end_lsn;
-
- mutex_enter(&kernel_mutex);
- }
-
- ut_ad(trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED);
- ut_ad(mutex_own(&kernel_mutex));
-
- /* The following assignment makes the transaction committed in memory
- and makes its changes to data visible to other transactions.
- NOTE that there is a small discrepancy from the strict formal
- visibility rules here: a human user of the database can see
- modifications made by another transaction T even before the necessary
- log segment has been flushed to the disk. If the database happens to
- crash before the flush, the user has seen modifications from T which
- will never be a committed transaction. However, any transaction T2
- which sees the modifications of the committing transaction T, and
- which also itself makes modifications to the database, will get an lsn
- larger than the committing transaction T. In the case where the log
- flush fails, and T never gets committed, also T2 will never get
- committed. */
-
- /*--------------------------------------*/
- trx->conc_state = TRX_COMMITTED_IN_MEMORY;
- /*--------------------------------------*/
-
- /* If we release kernel_mutex below and we are still doing
- recovery i.e.: back ground rollback thread is still active
- then there is a chance that the rollback thread may see
- this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
- up calling trx_cleanup_at_db_startup(). This can happen
- in the case we are committing a trx here that is left in
- PREPARED state during the crash. Note that commit of the
- rollback of a PREPARED trx happens in the recovery thread
- while the rollback of other transactions happen in the
- background thread. To avoid this race we unconditionally
- unset the is_recovered flag from the trx. */
-
- trx->is_recovered = FALSE;
-
- lock_release_off_kernel(trx);
-
- if (trx->global_read_view) {
- read_view_close(trx->global_read_view);
- mem_heap_empty(trx->global_read_view_heap);
- trx->global_read_view = NULL;
- }
-
- trx->read_view = NULL;
-
- if (lsn) {
-
- mutex_exit(&kernel_mutex);
-
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
- }
-
- /* NOTE that we could possibly make a group commit more
- efficient here: call os_thread_yield here to allow also other
- trxs to come to commit! */
-
- /*-------------------------------------*/
-
- /* Depending on the my.cnf options, we may now write the log
- buffer to the log files, making the transaction durable if
- the OS does not crash. We may also flush the log files to
- disk, making the transaction durable also at an OS crash or a
- power outage.
-
- The idea in InnoDB's group commit is that a group of
- transactions gather behind a trx doing a physical disk write
- to log files, and when that physical write has been completed,
- one of those transactions does a write which commits the whole
- group. Note that this group commit will only bring benefit if
- there are > 2 users in the database. Then at least 2 users can
- gather behind one doing the physical log write to disk.
-
- If we are calling trx_commit() under prepare_commit_mutex, we
- will delay possible log write and flush to a separate function
- trx_commit_complete_for_mysql(), which is only called when the
- thread has released the mutex. This is to make the
- group commit algorithm to work. Otherwise, the prepare_commit
- mutex would serialize all commits and prevent a group of
- transactions from gathering. */
-
- if (trx->flush_log_later) {
- /* Do nothing yet */
- trx->must_flush_log_later = TRUE;
- } else if (srv_flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- FALSE);
- } else {
- /* Write the log to the log files AND flush
- them to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- trx->commit_lsn = lsn;
-
- /*-------------------------------------*/
-
- mutex_enter(&kernel_mutex);
- }
-
- /* Free all savepoints */
- trx_roll_free_all_savepoints(trx);
-
- trx->conc_state = TRX_NOT_STARTED;
- trx->rseg = NULL;
- trx->undo_no = ut_dulint_zero;
- trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
- trx->mysql_query_str = NULL;
-
- ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
- ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
-
- UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
-}
-
-/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-UNIV_INTERN
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx) /*!< in: transaction */
-{
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
- }
-
- trx->conc_state = TRX_NOT_STARTED;
- trx->rseg = NULL;
- trx->undo_no = ut_dulint_zero;
- trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
-
- UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
-}
-
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return consistent read view */
-UNIV_INTERN
-read_view_t*
-trx_assign_read_view(
-/*=================*/
- trx_t* trx) /*!< in: active transaction */
-{
- ut_ad(trx->conc_state == TRX_ACTIVE);
-
- if (trx->read_view) {
- return(trx->read_view);
- }
-
- mutex_enter(&kernel_mutex);
-
- if (!trx->read_view) {
- trx->read_view = read_view_open_now(
- trx->id, trx->global_read_view_heap);
- trx->global_read_view = trx->read_view;
- }
-
- mutex_exit(&kernel_mutex);
-
- return(trx->read_view);
-}
-
-/****************************************************************//**
-Commits a transaction. NOTE that the kernel mutex is temporarily released. */
-static
-void
-trx_handle_commit_sig_off_kernel(
-/*=============================*/
- trx_t* trx, /*!< in: transaction */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx->que_state = TRX_QUE_COMMITTING;
-
- trx_commit_off_kernel(trx);
-
- ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- /* Remove all TRX_SIG_COMMIT signals from the signal queue and send
- reply messages to them */
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
-
- if (sig->type == TRX_SIG_COMMIT) {
-
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
- }
-
- sig = next_sig;
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***********************************************************//**
-The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
-the TRX_QUE_RUNNING state and releases query threads which were
-waiting for a lock in the wait_thrs list. */
-UNIV_INTERN
-void
-trx_end_lock_wait(
-/*==============*/
- trx_t* trx) /*!< in: transaction */
-{
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
-
- while (thr != NULL) {
- que_thr_end_wait_no_next_thr(thr);
-
- UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***********************************************************//**
-Moves the query threads in the lock wait list to the SUSPENDED state and puts
-the transaction to the TRX_QUE_RUNNING state. */
-static
-void
-trx_lock_wait_to_suspended(
-/*=======================*/
- trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
-{
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
-
- while (thr != NULL) {
- thr->state = QUE_THR_SUSPENDED;
-
- UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***********************************************************//**
-Moves the query threads in the sig reply wait list of trx to the SUSPENDED
-state. */
-static
-void
-trx_sig_reply_wait_to_suspended(
-/*============================*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_sig_t* sig;
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sig = UT_LIST_GET_FIRST(trx->reply_signals);
-
- while (sig != NULL) {
- thr = sig->receiver;
-
- ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT);
-
- thr->state = QUE_THR_SUSPENDED;
-
- sig->receiver = NULL;
-
- UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig);
-
- sig = UT_LIST_GET_FIRST(trx->reply_signals);
- }
-}
-
-/*****************************************************************//**
-Checks the compatibility of a new signal with the other signals in the
-queue.
-@return TRUE if the signal can be queued */
-static
-ibool
-trx_sig_is_compatible(
-/*==================*/
- trx_t* trx, /*!< in: trx handle */
- ulint type, /*!< in: signal type */
- ulint sender) /*!< in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
-{
- trx_sig_t* sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- if (UT_LIST_GET_LEN(trx->signals) == 0) {
-
- return(TRUE);
- }
-
- if (sender == TRX_SIG_SELF) {
- if (type == TRX_SIG_ERROR_OCCURRED) {
-
- return(TRUE);
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- return(TRUE);
- } else {
- return(FALSE);
- }
- }
-
- ut_ad(sender == TRX_SIG_OTHER_SESS);
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- if (type == TRX_SIG_COMMIT) {
- while (sig != NULL) {
-
- if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
-
- return(FALSE);
- }
-
- sig = UT_LIST_GET_NEXT(signals, sig);
- }
-
- return(TRUE);
-
- } else if (type == TRX_SIG_TOTAL_ROLLBACK) {
- while (sig != NULL) {
-
- if (sig->type == TRX_SIG_COMMIT) {
-
- return(FALSE);
- }
-
- sig = UT_LIST_GET_NEXT(signals, sig);
- }
-
- return(TRUE);
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- return(TRUE);
- } else {
- ut_error;
-
- return(FALSE);
- }
-}
-
-/****************************************************************//**
-Sends a signal to a trx object. */
-UNIV_INTERN
-void
-trx_sig_send(
-/*=========*/
- trx_t* trx, /*!< in: trx handle */
- ulint type, /*!< in: signal type */
- ulint sender, /*!< in: TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver_thr, /*!< in: query thread which wants the
- reply, or NULL; if type is
- TRX_SIG_END_WAIT, this must be NULL */
- trx_savept_t* savept, /*!< in: possible rollback savepoint, or
- NULL */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-{
- trx_sig_t* sig;
- trx_t* receiver_trx;
-
- ut_ad(trx);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (!trx_sig_is_compatible(trx, type, sender)) {
- /* The signal is not compatible with the other signals in
- the queue: die */
-
- ut_error;
- }
-
- /* Queue the signal object */
-
- if (UT_LIST_GET_LEN(trx->signals) == 0) {
-
- /* The signal list is empty: the 'sig' slot must be unused
- (we improve performance a bit by avoiding mem_alloc) */
- sig = &(trx->sig);
- } else {
- /* It might be that the 'sig' slot is unused also in this
- case, but we choose the easy way of using mem_alloc */
-
- sig = mem_alloc(sizeof(trx_sig_t));
- }
-
- UT_LIST_ADD_LAST(signals, trx->signals, sig);
-
- sig->type = type;
- sig->sender = sender;
- sig->receiver = receiver_thr;
-
- if (savept) {
- sig->savept = *savept;
- }
-
- if (receiver_thr) {
- receiver_trx = thr_get_trx(receiver_thr);
-
- UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals,
- sig);
- }
-
- if (trx->sess->state == SESS_ERROR) {
-
- trx_sig_reply_wait_to_suspended(trx);
- }
-
- if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) {
- ut_error;
- }
-
- /* If there were no other signals ahead in the queue, try to start
- handling of the signal */
-
- if (UT_LIST_GET_FIRST(trx->signals) == sig) {
-
- trx_sig_start_handle(trx, next_thr);
- }
-}
-
-/****************************************************************//**
-Ends signal handling. If the session is in the error state, and
-trx->graph_before_signal_handling != NULL, then returns control to the error
-handling routine of the graph (currently just returns the control to the
-graph root which then will send an error message to the client). */
-UNIV_INTERN
-void
-trx_end_signal_handling(
-/*====================*/
- trx_t* trx) /*!< in: trx */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->handling_signals == TRUE);
-
- trx->handling_signals = FALSE;
-
- trx->graph = trx->graph_before_signal_handling;
-
- if (trx->graph && (trx->sess->state == SESS_ERROR)) {
-
- que_fork_error_handle(trx, trx->graph);
- }
-}
-
-/****************************************************************//**
-Starts handling of a trx signal. */
-UNIV_INTERN
-void
-trx_sig_start_handle(
-/*=================*/
- trx_t* trx, /*!< in: trx handle */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-{
- trx_sig_t* sig;
- ulint type;
-loop:
- /* We loop in this function body as long as there are queued signals
- we can process immediately */
-
- ut_ad(trx);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) {
-
- trx_end_signal_handling(trx);
-
- return;
- }
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- trx_start_low(trx, ULINT_UNDEFINED);
- }
-
- /* If the trx is in a lock wait state, moves the waiting query threads
- to the suspended state */
-
- if (trx->que_state == TRX_QUE_LOCK_WAIT) {
-
- trx_lock_wait_to_suspended(trx);
- }
-
- /* If the session is in the error state and this trx has threads
- waiting for reply from signals, moves these threads to the suspended
- state, canceling wait reservations; note that if the transaction has
- sent a commit or rollback signal to itself, and its session is not in
- the error state, then nothing is done here. */
-
- if (trx->sess->state == SESS_ERROR) {
- trx_sig_reply_wait_to_suspended(trx);
- }
-
- /* If there are no running query threads, we can start processing of a
- signal, otherwise we have to wait until all query threads of this
- transaction are aware of the arrival of the signal. */
-
- if (trx->n_active_thrs > 0) {
-
- return;
- }
-
- if (trx->handling_signals == FALSE) {
- trx->graph_before_signal_handling = trx->graph;
-
- trx->handling_signals = TRUE;
- }
-
- sig = UT_LIST_GET_FIRST(trx->signals);
- type = sig->type;
-
- if (type == TRX_SIG_COMMIT) {
-
- trx_handle_commit_sig_off_kernel(trx, next_thr);
-
- } else if ((type == TRX_SIG_TOTAL_ROLLBACK)
- || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) {
-
- trx_rollback(trx, sig, next_thr);
-
- /* No further signals can be handled until the rollback
- completes, therefore we return */
-
- return;
-
- } else if (type == TRX_SIG_ERROR_OCCURRED) {
-
- trx_rollback(trx, sig, next_thr);
-
- /* No further signals can be handled until the rollback
- completes, therefore we return */
-
- return;
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
- } else {
- ut_error;
- }
-
- goto loop;
-}
-
-/****************************************************************//**
-Send the reply message when a signal in the queue of the trx has been
-handled. */
-UNIV_INTERN
-void
-trx_sig_reply(
-/*==========*/
- trx_sig_t* sig, /*!< in: signal */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- trx_t* receiver_trx;
-
- ut_ad(sig);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (sig->receiver != NULL) {
- ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT);
-
- receiver_trx = thr_get_trx(sig->receiver);
-
- UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals,
- sig);
- ut_ad(receiver_trx->sess->state != SESS_ERROR);
-
- que_thr_end_wait(sig->receiver, next_thr);
-
- sig->receiver = NULL;
-
- }
-}
-
-/****************************************************************//**
-Removes a signal object from the trx signal queue. */
-UNIV_INTERN
-void
-trx_sig_remove(
-/*===========*/
- trx_t* trx, /*!< in: trx handle */
- trx_sig_t* sig) /*!< in, own: signal */
-{
- ut_ad(trx && sig);
- ut_ad(mutex_own(&kernel_mutex));
-
- ut_ad(sig->receiver == NULL);
-
- UT_LIST_REMOVE(signals, trx->signals, sig);
- sig->type = 0; /* reset the field to catch possible bugs */
-
- if (sig != &(trx->sig)) {
- mem_free(sig);
- }
-}
-
-/*********************************************************************//**
-Creates a commit command node struct.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-commit_node_create(
-/*===============*/
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- commit_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(commit_node_t));
- node->common.type = QUE_NODE_COMMIT;
- node->state = COMMIT_NODE_SEND;
-
- return(node);
-}
-
-/***********************************************************//**
-Performs an execution step for a commit type node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_commit_step(
-/*============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- commit_node_t* node;
- que_thr_t* next_thr;
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = COMMIT_NODE_SEND;
- }
-
- if (node->state == COMMIT_NODE_SEND) {
- mutex_enter(&kernel_mutex);
-
- node->state = COMMIT_NODE_WAIT;
-
- next_thr = NULL;
-
- thr->state = QUE_THR_SIG_REPLY_WAIT;
-
- /* Send the commit signal to the transaction */
-
- trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT, TRX_SIG_SELF,
- thr, NULL, &next_thr);
-
- mutex_exit(&kernel_mutex);
-
- return(next_thr);
- }
-
- ut_ad(node->state == COMMIT_NODE_WAIT);
-
- node->state = COMMIT_NODE_SEND;
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Does the transaction commit for MySQL.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-trx_commit_for_mysql(
-/*=================*/
- trx_t* trx) /*!< in: trx handle */
-{
- /* Because we do not do the commit by sending an Innobase
- sig to the transaction, we must here make sure that trx has been
- started. */
-
- ut_a(trx);
-
- trx_start_if_not_started(trx);
-
- trx->op_info = "committing";
-
- mutex_enter(&kernel_mutex);
-
- trx_commit_off_kernel(trx);
-
- mutex_exit(&kernel_mutex);
-
- trx->op_info = "";
-
- return(DB_SUCCESS);
-}
-
-/**********************************************************************//**
-If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE.
-@return 0 or error number */
-UNIV_INTERN
-ulint
-trx_commit_complete_for_mysql(
-/*==========================*/
- trx_t* trx) /*!< in: trx handle */
-{
- ib_uint64_t lsn = trx->commit_lsn;
-
- ut_a(trx);
-
- trx->op_info = "flushing log";
-
- if (!trx->must_flush_log_later) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- /* Write the log to the log files AND flush them to
- disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- trx->must_flush_log_later = FALSE;
-
- trx->op_info = "";
-
- return(0);
-}
-
-/**********************************************************************//**
-Marks the latest SQL statement ended. */
-UNIV_INTERN
-void
-trx_mark_sql_stat_end(
-/*==================*/
- trx_t* trx) /*!< in: trx handle */
-{
- ut_a(trx);
-
- if (trx->conc_state == TRX_NOT_STARTED) {
- trx->undo_no = ut_dulint_zero;
- }
-
- trx->last_sql_stat_start.least_undo_no = trx->undo_no;
-}
-
-/**********************************************************************//**
-Prints info about a transaction to the given file. The caller must own the
-kernel mutex and must have called
-innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL
-or InnoDB cannot meanwhile change the info printed here. */
-UNIV_INTERN
-void
-trx_print(
-/*======*/
- FILE* f, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print, or 0 to
- use the default max length */
-{
- ibool newline;
-
- fprintf(f, "TRANSACTION " TRX_ID_FMT, TRX_ID_PREP_PRINTF(trx->id));
-
- switch (trx->conc_state) {
- case TRX_NOT_STARTED:
- fputs(", not started", f);
- break;
- case TRX_ACTIVE:
- fprintf(f, ", ACTIVE %lu sec",
- (ulong)difftime(time(NULL), trx->start_time));
- break;
- case TRX_PREPARED:
- fprintf(f, ", ACTIVE (PREPARED) %lu sec",
- (ulong)difftime(time(NULL), trx->start_time));
- break;
- case TRX_COMMITTED_IN_MEMORY:
- fputs(", COMMITTED IN MEMORY", f);
- break;
- default:
- fprintf(f, " state %lu", (ulong) trx->conc_state);
- }
-
-#ifdef UNIV_LINUX
- fprintf(f, ", process no %lu", trx->mysql_process_no);
-#endif
- fprintf(f, ", OS thread id %lu",
- (ulong) os_thread_pf(trx->mysql_thread_id));
-
- if (*trx->op_info) {
- putc(' ', f);
- fputs(trx->op_info, f);
- }
-
- if (trx->is_recovered) {
- fputs(" recovered trx", f);
- }
-
- if (trx->is_purge) {
- fputs(" purge trx", f);
- }
-
- if (trx->declared_to_be_inside_innodb) {
- fprintf(f, ", thread declared inside InnoDB %lu",
- (ulong) trx->n_tickets_to_enter_innodb);
- }
-
- putc('\n', f);
-
- if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
- fprintf(f, "mysql tables in use %lu, locked %lu\n",
- (ulong) trx->n_mysql_tables_in_use,
- (ulong) trx->mysql_n_tables_locked);
- }
-
- newline = TRUE;
-
- switch (trx->que_state) {
- case TRX_QUE_RUNNING:
- newline = FALSE; break;
- case TRX_QUE_LOCK_WAIT:
- fputs("LOCK WAIT ", f); break;
- case TRX_QUE_ROLLING_BACK:
- fputs("ROLLING BACK ", f); break;
- case TRX_QUE_COMMITTING:
- fputs("COMMITTING ", f); break;
- default:
- fprintf(f, "que state %lu ", (ulong) trx->que_state);
- }
-
- if (0 < UT_LIST_GET_LEN(trx->trx_locks)
- || mem_heap_get_size(trx->lock_heap) > 400) {
- newline = TRUE;
-
- fprintf(f, "%lu lock struct(s), heap size %lu,"
- " %lu row lock(s)",
- (ulong) UT_LIST_GET_LEN(trx->trx_locks),
- (ulong) mem_heap_get_size(trx->lock_heap),
- (ulong) lock_number_of_rows_locked(trx));
- }
-
- if (trx->has_search_latch) {
- newline = TRUE;
- fputs(", holds adaptive hash latch", f);
- }
-
- if (!ut_dulint_is_zero(trx->undo_no)) {
- newline = TRUE;
- fprintf(f, ", undo log entries %lu",
- (ulong) ut_dulint_get_low(trx->undo_no));
- }
-
- if (newline) {
- putc('\n', f);
- }
-
- if (trx->mysql_thd != NULL) {
- innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len);
- }
-}
-
-/*******************************************************************//**
-Compares the "weight" (or size) of two transactions. Transactions that
-have edited non-transactional tables are considered heavier than ones
-that have not.
-@return <0, 0 or >0; similar to strcmp(3) */
-UNIV_INTERN
-int
-trx_weight_cmp(
-/*===========*/
- const trx_t* a, /*!< in: the first transaction to be compared */
- const trx_t* b) /*!< in: the second transaction to be compared */
-{
- ibool a_notrans_edit;
- ibool b_notrans_edit;
-
- /* If mysql_thd is NULL for a transaction we assume that it has
- not edited non-transactional tables. */
-
- a_notrans_edit = a->mysql_thd != NULL
- && thd_has_edited_nontrans_tables(a->mysql_thd);
-
- b_notrans_edit = b->mysql_thd != NULL
- && thd_has_edited_nontrans_tables(b->mysql_thd);
-
- if (a_notrans_edit && !b_notrans_edit) {
-
- return(1);
- }
-
- if (!a_notrans_edit && b_notrans_edit) {
-
- return(-1);
- }
-
- /* Either both had edited non-transactional tables or both had
- not, we fall back to comparing the number of altered/locked
- rows. */
-
-#if 0
- fprintf(stderr,
- "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
- __func__,
- ut_conv_dulint_to_longlong(a->undo_no),
- UT_LIST_GET_LEN(a->trx_locks),
- ut_conv_dulint_to_longlong(b->undo_no),
- UT_LIST_GET_LEN(b->trx_locks));
-#endif
-
- return(ut_dulint_cmp(TRX_WEIGHT(a), TRX_WEIGHT(b)));
-}
-
-/****************************************************************//**
-Prepares a transaction. */
-UNIV_INTERN
-void
-trx_prepare_off_kernel(
-/*===================*/
- trx_t* trx) /*!< in: transaction */
-{
- page_t* update_hdr_page;
- trx_rseg_t* rseg;
- ib_uint64_t lsn = 0;
- mtr_t mtr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- rseg = trx->rseg;
-
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
-
- mutex_exit(&kernel_mutex);
-
- mtr_start(&mtr);
-
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to TRX_UNDO_PREPARED: these modifications to the file data
- structure define the transaction as prepared in the
- file-based world, at the serialization point of lsn. */
-
- mutex_enter(&(rseg->mutex));
-
- if (trx->insert_undo != NULL) {
-
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction prepare for this transaction. */
-
- trx_undo_set_state_at_prepare(trx, trx->insert_undo,
- &mtr);
- }
-
- if (trx->update_undo) {
- update_hdr_page = trx_undo_set_state_at_prepare(
- trx, trx->update_undo, &mtr);
- }
-
- mutex_exit(&(rseg->mutex));
-
- /*--------------*/
- mtr_commit(&mtr); /* This mtr commit makes the
- transaction prepared in the file-based
- world */
- /*--------------*/
- lsn = mtr.end_lsn;
-
- mutex_enter(&kernel_mutex);
- }
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /*--------------------------------------*/
- trx->conc_state = TRX_PREPARED;
- /*--------------------------------------*/
-
- if (lsn) {
- /* Depending on the my.cnf options, we may now write the log
- buffer to the log files, making the prepared state of the
- transaction durable if the OS does not crash. We may also
- flush the log files to disk, making the prepared state of the
- transaction durable also at an OS crash or a power outage.
-
- The idea in InnoDB's group prepare is that a group of
- transactions gather behind a trx doing a physical disk write
- to log files, and when that physical write has been completed,
- one of those transactions does a write which prepares the whole
- group. Note that this group prepare will only bring benefit if
- there are > 2 users in the database. Then at least 2 users can
- gather behind one doing the physical log write to disk.
-
- TODO: find out if MySQL holds some mutex when calling this.
- That would spoil our group prepare algorithm. */
-
- mutex_exit(&kernel_mutex);
-
- if (srv_flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- FALSE);
- } else {
- /* Write the log to the log files AND flush
- them to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- mutex_enter(&kernel_mutex);
- }
-}
-
-/**********************************************************************//**
-Does the transaction prepare for MySQL.
-@return 0 or error number */
-UNIV_INTERN
-ulint
-trx_prepare_for_mysql(
-/*==================*/
- trx_t* trx) /*!< in: trx handle */
-{
- /* Because we do not do the prepare by sending an Innobase
- sig to the transaction, we must here make sure that trx has been
- started. */
-
- ut_a(trx);
-
- trx->op_info = "preparing";
-
- trx_start_if_not_started(trx);
-
- mutex_enter(&kernel_mutex);
-
- trx_prepare_off_kernel(trx);
-
- mutex_exit(&kernel_mutex);
-
- trx->op_info = "";
-
- return(0);
-}
-
-/**********************************************************************//**
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery.
-@return number of prepared transactions stored in xid_list */
-UNIV_INTERN
-int
-trx_recover_for_mysql(
-/*==================*/
- XID* xid_list, /*!< in/out: prepared transactions */
- ulint len) /*!< in: number of slots in xid_list */
-{
- trx_t* trx;
- ulint count = 0;
-
- ut_ad(xid_list);
- ut_ad(len);
-
- /* We should set those transactions which are in the prepared state
- to the xid_list */
-
- mutex_enter(&kernel_mutex);
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- if (trx->conc_state == TRX_PREPARED) {
- xid_list[count] = trx->xid;
-
- if (count == 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Starting recovery for"
- " XA transactions...\n");
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction " TRX_ID_FMT " in"
- " prepared state after recovery\n",
- TRX_ID_PREP_PRINTF(trx->id));
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction contains changes"
- " to %lu rows\n",
- (ulong) ut_conv_dulint_to_longlong(
- trx->undo_no));
-
- count++;
-
- if (count == len) {
- break;
- }
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- mutex_exit(&kernel_mutex);
-
- if (count > 0){
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %lu transactions in prepared state"
- " after recovery\n",
- (ulong) count);
- }
-
- return ((int) count);
-}
-
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL */
-UNIV_INTERN
-trx_t*
-trx_get_trx_by_xid(
-/*===============*/
- XID* xid) /*!< in: X/Open XA transaction identification */
-{
- trx_t* trx;
-
- if (xid == NULL) {
-
- return (NULL);
- }
-
- mutex_enter(&kernel_mutex);
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- /* Compare two X/Open XA transaction id's: their
- length should be the same and binary comparison
- of gtrid_lenght+bqual_length bytes should be
- the same */
-
- if (xid->gtrid_length == trx->xid.gtrid_length
- && xid->bqual_length == trx->xid.bqual_length
- && memcmp(xid->data, trx->xid.data,
- xid->gtrid_length + xid->bqual_length) == 0) {
- break;
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- mutex_exit(&kernel_mutex);
-
- if (trx) {
- if (trx->conc_state != TRX_PREPARED) {
-
- return(NULL);
- }
-
- return(trx);
- } else {
- return(NULL);
- }
-}
diff --git a/storage/innodb_plugin/trx/trx0undo.c b/storage/innodb_plugin/trx/trx0undo.c
deleted file mode 100644
index 3bb1b1cdf6c..00000000000
--- a/storage/innodb_plugin/trx/trx0undo.c
+++ /dev/null
@@ -1,1993 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0undo.c
-Transaction undo log
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0undo.h"
-
-#ifdef UNIV_NONINL
-#include "trx0undo.ic"
-#endif
-
-#include "fsp0fsp.h"
-#ifndef UNIV_HOTBACKUP
-#include "mach0data.h"
-#include "mtr0log.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-#include "trx0rec.h"
-#include "trx0purge.h"
-
-/* How should the old versions in the history list be managed?
- ----------------------------------------------------------
-If each transaction is given a whole page for its update undo log, file
-space consumption can be 10 times higher than necessary. Therefore,
-partly filled update undo log pages should be reusable. But then there
-is no way individual pages can be ordered so that the ordering agrees
-with the serialization numbers of the transactions on the pages. Thus,
-the history list must be formed of undo logs, not their header pages as
-it was in the old implementation.
- However, on a single header page the transactions are placed in
-the order of their serialization numbers. As old versions are purged, we
-may free the page when the last transaction on the page has been purged.
- A problem is that the purge has to go through the transactions
-in the serialization order. This means that we have to look through all
-rollback segments for the one that has the smallest transaction number
-in its history list.
- When should we do a purge? A purge is necessary when space is
-running out in any of the rollback segments. Then we may have to purge
-also old version which might be needed by some consistent read. How do
-we trigger the start of a purge? When a transaction writes to an undo log,
-it may notice that the space is running out. When a read view is closed,
-it may make some history superfluous. The server can have an utility which
-periodically checks if it can purge some history.
- In a parallellized purge we have the problem that a query thread
-can remove a delete marked clustered index record before another query
-thread has processed an earlier version of the record, which cannot then
-be done because the row cannot be constructed from the clustered index
-record. To avoid this problem, we will store in the update and delete mark
-undo record also the columns necessary to construct the secondary index
-entries which are modified.
- We can latch the stack of versions of a single clustered index record
-by taking a latch on the clustered index page. As long as the latch is held,
-no new versions can be added and no versions removed by undo. But, a purge
-can still remove old versions from the bottom of the stack. */
-
-/* How to protect rollback segments, undo logs, and history lists with
- -------------------------------------------------------------------
-latches?
--------
-The contention of the kernel mutex should be minimized. When a transaction
-does its first insert or modify in an index, an undo log is assigned for it.
-Then we must have an x-latch to the rollback segment header.
- When the transaction does more modifys or rolls back, the undo log is
-protected with undo_mutex in the transaction.
- When the transaction commits, its insert undo log is either reset and
-cached for a fast reuse, or freed. In these cases we must have an x-latch on
-the rollback segment page. The update undo log is put to the history list. If
-it is not suitable for reuse, its slot in the rollback segment is reset. In
-both cases, an x-latch must be acquired on the rollback segment.
- The purge operation steps through the history list without modifying
-it until a truncate operation occurs, which can remove undo logs from the end
-of the list and release undo log segments. In stepping through the list,
-s-latches on the undo log pages are enough, but in a truncate, x-latches must
-be obtained on the rollback segment and individual pages. */
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
- page_t* undo_page, /*!< in: undo log segment page */
- ulint type, /*!< in: undo log segment type */
- mtr_t* mtr); /*!< in: mtr */
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Creates and initializes an undo log memory object.
-@return own: the undo log memory object */
-static
-trx_undo_t*
-trx_undo_mem_create(
-/*================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint id, /*!< in: slot index within rseg */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open XA transaction identification*/
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header byte offset on page */
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function!
-@return undo log header byte offset on page */
-static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
- page_t* undo_page, /*!< in/out: insert undo log segment
- header page, x-latched */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-If an update undo log can be discarded immediately, this function frees the
-space, resetting the page to the proper state for caching. */
-static
-void
-trx_undo_discard_latest_update_undo(
-/*================================*/
- page_t* undo_page, /*!< in: header page of an undo log of size 1 */
- mtr_t* mtr); /*!< in: mtr */
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Gets the previous record in an undo log from the previous page.
-@return undo log record, the page s-latched, NULL if none */
-static
-trx_undo_rec_t*
-trx_undo_get_prev_rec_from_prev_page(
-/*=================================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- ulint prev_page_no;
- page_t* prev_page;
- page_t* undo_page;
-
- undo_page = page_align(rec);
-
- prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_NODE, mtr)
- .page;
-
- if (prev_page_no == FIL_NULL) {
-
- return(NULL);
- }
-
- space = page_get_space_id(undo_page);
- zip_size = fil_space_get_zip_size(space);
-
- prev_page = trx_undo_page_get_s_latched(space, zip_size,
- prev_page_no, mtr);
-
- return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
-}
-
-/***********************************************************************//**
-Gets the previous record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_prev_rec(
-/*==================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_undo_rec_t* prev_rec;
-
- prev_rec = trx_undo_page_get_prev_rec(rec, page_no, offset);
-
- if (prev_rec) {
-
- return(prev_rec);
- }
-
- /* We have to go to the previous undo log page to look for the
- previous record */
-
- return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset,
- mtr));
-}
-
-/***********************************************************************//**
-Gets the next record in an undo log from the next page.
-@return undo log record, the page latched, NULL if none */
-static
-trx_undo_rec_t*
-trx_undo_get_next_rec_from_next_page(
-/*=================================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- page_t* undo_page, /*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latch mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_ulogf_t* log_hdr;
- ulint next_page_no;
- page_t* next_page;
- ulint next;
-
- if (page_no == page_get_page_no(undo_page)) {
-
- log_hdr = undo_page + offset;
- next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
- if (next != 0) {
-
- return(NULL);
- }
- }
-
- next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_NODE, mtr)
- .page;
- if (next_page_no == FIL_NULL) {
-
- return(NULL);
- }
-
- if (mode == RW_S_LATCH) {
- next_page = trx_undo_page_get_s_latched(space, zip_size,
- next_page_no, mtr);
- } else {
- ut_ad(mode == RW_X_LATCH);
- next_page = trx_undo_page_get(space, zip_size,
- next_page_no, mtr);
- }
-
- return(trx_undo_page_get_first_rec(next_page, page_no, offset));
-}
-
-/***********************************************************************//**
-Gets the next record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_next_rec(
-/*==================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- trx_undo_rec_t* next_rec;
-
- next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
-
- if (next_rec) {
- return(next_rec);
- }
-
- space = page_get_space_id(page_align(rec));
- zip_size = fil_space_get_zip_size(space);
-
- return(trx_undo_get_next_rec_from_next_page(space, zip_size,
- page_align(rec),
- page_no, offset,
- RW_S_LATCH, mtr));
-}
-
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return undo log record, the page latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_first_rec(
-/*===================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* undo_page;
- trx_undo_rec_t* rec;
-
- if (mode == RW_S_LATCH) {
- undo_page = trx_undo_page_get_s_latched(space, zip_size,
- page_no, mtr);
- } else {
- undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
- }
-
- rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
-
- if (rec) {
- return(rec);
- }
-
- return(trx_undo_get_next_rec_from_next_page(space, zip_size,
- undo_page, page_no, offset,
- mode, mtr));
-}
-
-/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
-
-/**********************************************************************//**
-Writes the mtr log entry of an undo log page initialization. */
-UNIV_INLINE
-void
-trx_undo_page_init_log(
-/*===================*/
- page_t* undo_page, /*!< in: undo log page */
- ulint type, /*!< in: undo log type */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
-
- mlog_catenate_ulint_compressed(mtr, type);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_page_init_log(undo_page,type,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_page_init(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ulint type;
-
- ptr = mach_parse_compressed(ptr, end_ptr, &type);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- trx_undo_page_init(page, type, mtr);
- }
-
- return(ptr);
-}
-
-/********************************************************************//**
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
- page_t* undo_page, /*!< in: undo log segment page */
- ulint type, /*!< in: undo log segment type */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
- TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE,
- TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
-
- fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG);
-
- trx_undo_page_init_log(undo_page, type, mtr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Creates a new undo log segment in file.
-@return DB_SUCCESS if page creation OK possible error codes are:
-DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static
-ulint
-trx_undo_seg_create(
-/*================*/
- trx_rseg_t* rseg __attribute__((unused)),/*!< in: rollback segment */
- trx_rsegf_t* rseg_hdr,/*!< in: rollback segment header, page
- x-latched */
- ulint type, /*!< in: type of the segment: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- ulint* id, /*!< out: slot index within rseg header */
- page_t** undo_page,
- /*!< out: segment header page x-latched, NULL
- if there was an error */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint slot_no;
- ulint space;
- buf_block_t* block;
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- ulint n_reserved;
- ibool success;
- ulint err = DB_SUCCESS;
-
- ut_ad(mtr && id && rseg_hdr);
- ut_ad(mutex_own(&(rseg->mutex)));
-
- /* fputs(type == TRX_UNDO_INSERT
- ? "Creating insert undo log segment\n"
- : "Creating update undo log segment\n", stderr); */
- slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
-
- if (slot_no == ULINT_UNDEFINED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: cannot find a free slot for"
- " an undo log. Do you have too\n"
- "InnoDB: many active transactions"
- " running concurrently?\n");
-
- return(DB_TOO_MANY_CONCURRENT_TRXS);
- }
-
- space = page_get_space_id(page_align(rseg_hdr));
-
- success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
- mtr);
- if (!success) {
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- /* Allocate a new file segment for the undo log */
- block = fseg_create_general(space, 0,
- TRX_UNDO_SEG_HDR
- + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
-
- fil_space_release_free_extents(space, n_reserved);
-
- if (block == NULL) {
- /* No space left */
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- *undo_page = buf_block_get_frame(block);
-
- page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
-
- trx_undo_page_init(*undo_page, type, mtr);
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
- TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr);
-
- flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr);
-
- flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST,
- page_hdr + TRX_UNDO_PAGE_NODE, mtr);
-
- trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
- page_get_page_no(*undo_page), mtr);
- *id = slot_no;
-
- return(err);
-}
-
-/**********************************************************************//**
-Writes the mtr log entry of an undo log header initialization. */
-UNIV_INLINE
-void
-trx_undo_header_create_log(
-/*=======================*/
- const page_t* undo_page, /*!< in: undo log header page */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr);
-
- mlog_catenate_dulint_compressed(mtr, trx_id);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_header_create_log(undo_page,trx_id,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Creates a new undo log header in file. NOTE that this function has its own
-log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of
-this function!
-@return header byte offset on page */
-static
-ulint
-trx_undo_header_create(
-/*===================*/
- page_t* undo_page, /*!< in/out: undo log segment
- header page, x-latched; it is
- assumed that there is
- TRX_UNDO_LOG_XA_HDR_SIZE bytes
- free space on it */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- trx_ulogf_t* log_hdr;
- trx_ulogf_t* prev_log_hdr;
- ulint prev_log;
- ulint free;
- ulint new_free;
-
- ut_ad(mtr && undo_page);
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE);
-
- log_hdr = undo_page + free;
-
- new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
-
- ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
-
- prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
-
- if (prev_log != 0) {
- prev_log_hdr = undo_page + prev_log;
-
- mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free);
- }
-
- mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, free);
-
- log_hdr = undo_page + free;
-
- mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE);
-
- mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
- mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
-
- mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
- mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
-
- mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0);
- mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log);
-
- /* Write the log record about the header creation */
- trx_undo_header_create_log(undo_page, trx_id, mtr);
-
- return(free);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Write X/Open XA Transaction Identification (XID) to undo log header */
-static
-void
-trx_undo_write_xid(
-/*===============*/
- trx_ulogf_t* log_hdr,/*!< in: undo log header */
- const XID* xid, /*!< in: X/Open XA Transaction Identification */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
- (ulint)xid->formatID, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN,
- (ulint)xid->gtrid_length, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN,
- (ulint)xid->bqual_length, MLOG_4BYTES, mtr);
-
- mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data,
- XIDDATASIZE, mtr);
-}
-
-/********************************************************************//**
-Read X/Open XA Transaction Identification (XID) from undo log header */
-static
-void
-trx_undo_read_xid(
-/*==============*/
- trx_ulogf_t* log_hdr,/*!< in: undo log header */
- XID* xid) /*!< out: X/Open XA Transaction Identification */
-{
- xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
-
- xid->gtrid_length
- = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN);
- xid->bqual_length
- = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN);
-
- memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE);
-}
-
-/***************************************************************//**
-Adds space for the XA XID after an undo log old-style header. */
-static
-void
-trx_undo_header_add_space_for_xid(
-/*==============================*/
- page_t* undo_page,/*!< in: undo log segment header page */
- trx_ulogf_t* log_hdr,/*!< in: undo log header */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
- ulint free;
- ulint new_free;
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE);
-
- /* free is now the end offset of the old style undo log header */
-
- ut_a(free == (ulint)(log_hdr - undo_page) + TRX_UNDO_LOG_OLD_HDR_SIZE);
-
- new_free = free + (TRX_UNDO_LOG_XA_HDR_SIZE
- - TRX_UNDO_LOG_OLD_HDR_SIZE);
-
- /* Add space for a XID after the header, update the free offset
- fields on the undo log page and in the undo log header */
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_START, new_free,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, new_free,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, new_free,
- MLOG_2BYTES, mtr);
-}
-
-/**********************************************************************//**
-Writes the mtr log entry of an undo log header reuse. */
-UNIV_INLINE
-void
-trx_undo_insert_header_reuse_log(
-/*=============================*/
- const page_t* undo_page, /*!< in: undo log header page */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
-
- mlog_catenate_dulint_compressed(mtr, trx_id);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_insert_header_reuse_log(undo_page,trx_id,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_page_header(
-/*=======================*/
- ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- trx_id_t trx_id;
-
- ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- if (type == MLOG_UNDO_HDR_CREATE) {
- trx_undo_header_create(page, trx_id, mtr);
- } else {
- ut_ad(type == MLOG_UNDO_HDR_REUSE);
- trx_undo_insert_header_reuse(page, trx_id, mtr);
- }
- }
-
- return(ptr);
-}
-
-/***************************************************************//**
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function!
-@return undo log header byte offset on page */
-static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
- page_t* undo_page, /*!< in/out: insert undo log segment
- header page, x-latched */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- trx_ulogf_t* log_hdr;
- ulint free;
- ulint new_free;
-
- ut_ad(mtr && undo_page);
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
-
- ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
-
- log_hdr = undo_page + free;
-
- new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
-
- /* Insert undo data is not needed after commit: we may free all
- the space on the page */
-
- ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_INSERT);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
-
- log_hdr = undo_page + free;
-
- mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
- mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
-
- mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
- mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
-
- /* Write the log record MLOG_UNDO_HDR_REUSE */
- trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr);
-
- return(free);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Writes the redo log entry of an update undo log header discard. */
-UNIV_INLINE
-void
-trx_undo_discard_latest_log(
-/*========================*/
- page_t* undo_page, /*!< in: undo log header page */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_discard_latest_log(undo_page, mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page header discard.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(end_ptr);
-
- if (page) {
- trx_undo_discard_latest_update_undo(page, mtr);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-If an update undo log can be discarded immediately, this function frees the
-space, resetting the page to the proper state for caching. */
-static
-void
-trx_undo_discard_latest_update_undo(
-/*================================*/
- page_t* undo_page, /*!< in: header page of an undo log of size 1 */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- trx_ulogf_t* log_hdr;
- trx_ulogf_t* prev_log_hdr;
- ulint free;
- ulint prev_hdr_offset;
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
- log_hdr = undo_page + free;
-
- prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG);
-
- if (prev_hdr_offset != 0) {
- prev_log_hdr = undo_page + prev_hdr_offset;
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
- mach_read_from_2(prev_log_hdr
- + TRX_UNDO_LOG_START));
- mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0);
- }
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED);
- mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset);
-
- trx_undo_discard_latest_log(undo_page, mtr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Tries to add a page to the undo log segment where the undo log is placed.
-@return page number if success, else FIL_NULL */
-UNIV_INTERN
-ulint
-trx_undo_add_page(
-/*==============*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory object */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-{
- page_t* header_page;
- page_t* new_page;
- trx_rseg_t* rseg;
- ulint page_no;
- ulint n_reserved;
- ibool success;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(!mutex_own(&kernel_mutex));
- ut_ad(mutex_own(&(trx->rseg->mutex)));
-
- rseg = trx->rseg;
-
- if (rseg->curr_size == rseg->max_size) {
-
- return(FIL_NULL);
- }
-
- header_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- success = fsp_reserve_free_extents(&n_reserved, undo->space, 1,
- FSP_UNDO, mtr);
- if (!success) {
-
- return(FIL_NULL);
- }
-
- page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
- + TRX_UNDO_FSEG_HEADER,
- undo->top_page_no + 1, FSP_UP,
- TRUE, mtr);
-
- fil_space_release_free_extents(undo->space, n_reserved);
-
- if (page_no == FIL_NULL) {
-
- /* No space left */
-
- return(FIL_NULL);
- }
-
- undo->last_page_no = page_no;
-
- new_page = trx_undo_page_get(undo->space, undo->zip_size,
- page_no, mtr);
-
- trx_undo_page_init(new_page, undo->type, mtr);
-
- flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
- new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
- undo->size++;
- rseg->curr_size++;
-
- return(page_no);
-}
-
-/********************************************************************//**
-Frees an undo log page that is not the header page.
-@return last page number in remaining log */
-static
-ulint
-trx_undo_free_page(
-/*===============*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ibool in_history, /*!< in: TRUE if the undo log is in the history
- list */
- ulint space, /*!< in: space */
- ulint hdr_page_no, /*!< in: header page number */
- ulint page_no, /*!< in: page number to free: must not be the
- header page */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-{
- page_t* header_page;
- page_t* undo_page;
- fil_addr_t last_addr;
- trx_rsegf_t* rseg_header;
- ulint hist_size;
- ulint zip_size;
-
- ut_a(hdr_page_no != page_no);
- ut_ad(!mutex_own(&kernel_mutex));
- ut_ad(mutex_own(&(rseg->mutex)));
-
- zip_size = rseg->zip_size;
-
- undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
-
- header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
-
- flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
- undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
-
- fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
- space, page_no, mtr);
-
- last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR
- + TRX_UNDO_PAGE_LIST, mtr);
- rseg->curr_size--;
-
- if (in_history) {
- rseg_header = trx_rsegf_get(space, zip_size,
- rseg->page_no, mtr);
-
- hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr);
- ut_ad(hist_size > 0);
- mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- hist_size - 1, MLOG_4BYTES, mtr);
- }
-
- return(last_addr.page);
-}
-
-/********************************************************************//**
-Frees an undo log page when there is also the memory object for the undo
-log. */
-static
-void
-trx_undo_free_page_in_rollback(
-/*===========================*/
- trx_t* trx __attribute__((unused)), /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- ulint page_no,/*!< in: page number to free: must not be the
- header page */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-{
- ulint last_page_no;
-
- ut_ad(undo->hdr_page_no != page_no);
- ut_ad(mutex_own(&(trx->undo_mutex)));
-
- last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space,
- undo->hdr_page_no, page_no, mtr);
-
- undo->last_page_no = last_page_no;
- undo->size--;
-}
-
-/********************************************************************//**
-Empties an undo log header page of undo records for that undo log. Other
-undo logs may still have records on that page, if it is an update undo log. */
-static
-void
-trx_undo_empty_header_page(
-/*=======================*/
- ulint space, /*!< in: space */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* header_page;
- trx_ulogf_t* log_hdr;
- ulint end;
-
- header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
-
- log_hdr = header_page + hdr_offset;
-
- end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
-}
-
-/***********************************************************************//**
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-UNIV_INTERN
-void
-trx_undo_truncate_end(
-/*==================*/
- trx_t* trx, /*!< in: transaction whose undo log it is */
- trx_undo_t* undo, /*!< in: undo log */
- undo_no_t limit) /*!< in: all undo records with undo number
- >= this value should be truncated */
-{
- page_t* undo_page;
- ulint last_page_no;
- trx_undo_rec_t* rec;
- trx_undo_rec_t* trunc_here;
- trx_rseg_t* rseg;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&(trx->rseg->mutex)));
-
- rseg = trx->rseg;
-
- for (;;) {
- mtr_start(&mtr);
-
- trunc_here = NULL;
-
- last_page_no = undo->last_page_no;
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- last_page_no, &mtr);
-
- rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
- undo->hdr_offset);
- for (;;) {
- if (rec == NULL) {
- if (last_page_no == undo->hdr_page_no) {
-
- goto function_exit;
- }
-
- trx_undo_free_page_in_rollback(
- trx, undo, last_page_no, &mtr);
- break;
- }
-
- if (ut_dulint_cmp(trx_undo_rec_get_undo_no(rec), limit)
- >= 0) {
- /* Truncate at least this record off, maybe
- more */
- trunc_here = rec;
- } else {
- goto function_exit;
- }
-
- rec = trx_undo_page_get_prev_rec(rec,
- undo->hdr_page_no,
- undo->hdr_offset);
- }
-
- mtr_commit(&mtr);
- }
-
-function_exit:
- if (trunc_here) {
- mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE,
- trunc_here - undo_page, MLOG_2BYTES, &mtr);
- }
-
- mtr_commit(&mtr);
-}
-
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
-void
-trx_undo_truncate_start(
-/*====================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ulint space, /*!< in: space id of the log */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset on the page */
- undo_no_t limit) /*!< in: all undo pages with
- undo numbers < this value
- should be truncated; NOTE that
- the function only frees whole
- pages; the header page is not
- freed, but emptied, if all the
- records there are < limit */
-{
- page_t* undo_page;
- trx_undo_rec_t* rec;
- trx_undo_rec_t* last_rec;
- ulint page_no;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (ut_dulint_is_zero(limit)) {
-
- return;
- }
-loop:
- mtr_start(&mtr);
-
- rec = trx_undo_get_first_rec(space, rseg->zip_size,
- hdr_page_no, hdr_offset,
- RW_X_LATCH, &mtr);
- if (rec == NULL) {
- /* Already empty */
-
- mtr_commit(&mtr);
-
- return;
- }
-
- undo_page = page_align(rec);
-
- last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no,
- hdr_offset);
- if (ut_dulint_cmp(trx_undo_rec_get_undo_no(last_rec), limit) >= 0) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- page_no = page_get_page_no(undo_page);
-
- if (page_no == hdr_page_no) {
- trx_undo_empty_header_page(space, rseg->zip_size,
- hdr_page_no, hdr_offset,
- &mtr);
- } else {
- trx_undo_free_page(rseg, TRUE, space, hdr_page_no,
- page_no, &mtr);
- }
-
- mtr_commit(&mtr);
-
- goto loop;
-}
-
-/**********************************************************************//**
-Frees an undo log segment which is not in the history list. */
-static
-void
-trx_undo_seg_free(
-/*==============*/
- trx_undo_t* undo) /*!< in: undo log */
-{
- trx_rseg_t* rseg;
- fseg_header_t* file_seg;
- trx_rsegf_t* rseg_header;
- trx_usegf_t* seg_header;
- ibool finished;
- mtr_t mtr;
-
- rseg = undo->rseg;
-
- do {
-
- mtr_start(&mtr);
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- mutex_enter(&(rseg->mutex));
-
- seg_header = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no,
- &mtr) + TRX_UNDO_SEG_HDR;
-
- file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
-
- finished = fseg_free_step(file_seg, &mtr);
-
- if (finished) {
- /* Update the rseg header */
- rseg_header = trx_rsegf_get(
- rseg->space, rseg->zip_size, rseg->page_no,
- &mtr);
- trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
- &mtr);
- }
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
- } while (!finished);
-}
-
-/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
-
-/********************************************************************//**
-Creates and initializes an undo log memory object according to the values
-in the header in file, when the database is started. The memory object is
-inserted in the appropriate list of rseg.
-@return own: the undo log memory object */
-static
-trx_undo_t*
-trx_undo_mem_create_at_db_start(
-/*============================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint id, /*!< in: slot index within rseg */
- ulint page_no,/*!< in: undo log segment page number */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* undo_page;
- trx_upagef_t* page_header;
- trx_usegf_t* seg_header;
- trx_ulogf_t* undo_header;
- trx_undo_t* undo;
- ulint type;
- ulint state;
- trx_id_t trx_id;
- ulint offset;
- fil_addr_t last_addr;
- page_t* last_page;
- trx_undo_rec_t* rec;
- XID xid;
- ibool xid_exists = FALSE;
-
- if (id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) id);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- page_no, mtr);
-
- page_header = undo_page + TRX_UNDO_PAGE_HDR;
-
- type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES,
- mtr);
- seg_header = undo_page + TRX_UNDO_SEG_HDR;
-
- state = mach_read_from_2(seg_header + TRX_UNDO_STATE);
-
- offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG);
-
- undo_header = undo_page + offset;
-
- trx_id = mtr_read_dulint(undo_header + TRX_UNDO_TRX_ID, mtr);
-
- xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS,
- MLOG_1BYTE, mtr);
-
- /* Read X/Open XA transaction identification if it exists, or
- set it to NULL. */
-
- memset(&xid, 0, sizeof(xid));
- xid.formatID = -1;
-
- if (xid_exists == TRUE) {
- trx_undo_read_xid(undo_header, &xid);
- }
-
- mutex_enter(&(rseg->mutex));
-
- undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid,
- page_no, offset);
- mutex_exit(&(rseg->mutex));
-
- undo->dict_operation = mtr_read_ulint(
- undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr);
-
- undo->table_id = mtr_read_dulint(undo_header + TRX_UNDO_TABLE_ID, mtr);
- undo->state = state;
- undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr);
-
- /* If the log segment is being freed, the page list is inconsistent! */
- if (state == TRX_UNDO_TO_FREE) {
-
- goto add_to_list;
- }
-
- last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
-
- undo->last_page_no = last_addr.page;
- undo->top_page_no = last_addr.page;
-
- last_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- undo->last_page_no, mtr);
-
- rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
-
- if (rec == NULL) {
- undo->empty = TRUE;
- } else {
- undo->empty = FALSE;
- undo->top_offset = rec - last_page;
- undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
- }
-add_to_list:
- if (type == TRX_UNDO_INSERT) {
- if (state != TRX_UNDO_CACHED) {
- UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
- undo);
- } else {
- UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached,
- undo);
- }
- } else {
- ut_ad(type == TRX_UNDO_UPDATE);
- if (state != TRX_UNDO_CACHED) {
- UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list,
- undo);
- } else {
- UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached,
- undo);
- }
- }
-
- return(undo);
-}
-
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy. This
-function is only called when the database is started or a new rollback
-segment is created.
-@return the combined size of undo log segments in pages */
-UNIV_INTERN
-ulint
-trx_undo_lists_init(
-/*================*/
- trx_rseg_t* rseg) /*!< in: rollback segment memory object */
-{
- ulint page_no;
- trx_undo_t* undo;
- ulint size = 0;
- trx_rsegf_t* rseg_header;
- ulint i;
- mtr_t mtr;
-
- UT_LIST_INIT(rseg->update_undo_list);
- UT_LIST_INIT(rseg->update_undo_cached);
- UT_LIST_INIT(rseg->insert_undo_list);
- UT_LIST_INIT(rseg->insert_undo_cached);
-
- mtr_start(&mtr);
-
- rseg_header = trx_rsegf_get_new(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
-
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
- page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
-
- /* In forced recovery: try to avoid operations which look
- at database pages; undo logs are rapidly changing data, and
- the probability that they are in an inconsistent state is
- high */
-
- if (page_no != FIL_NULL
- && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
-
- undo = trx_undo_mem_create_at_db_start(rseg, i,
- page_no, &mtr);
- size += undo->size;
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- rseg_header = trx_rsegf_get(
- rseg->space, rseg->zip_size, rseg->page_no,
- &mtr);
- }
- }
-
- mtr_commit(&mtr);
-
- return(size);
-}
-
-/********************************************************************//**
-Creates and initializes an undo log memory object.
-@return own: the undo log memory object */
-static
-trx_undo_t*
-trx_undo_mem_create(
-/*================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint id, /*!< in: slot index within rseg */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open transaction identification */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header byte offset on page */
-{
- trx_undo_t* undo;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) id);
- ut_error;
- }
-
- undo = mem_alloc(sizeof(trx_undo_t));
-
- if (undo == NULL) {
-
- return NULL;
- }
-
- undo->id = id;
- undo->type = type;
- undo->state = TRX_UNDO_ACTIVE;
- undo->del_marks = FALSE;
- undo->trx_id = trx_id;
- undo->xid = *xid;
-
- undo->dict_operation = FALSE;
-
- undo->rseg = rseg;
-
- undo->space = rseg->space;
- undo->zip_size = rseg->zip_size;
- undo->hdr_page_no = page_no;
- undo->hdr_offset = offset;
- undo->last_page_no = page_no;
- undo->size = 1;
-
- undo->empty = TRUE;
- undo->top_page_no = page_no;
- undo->guess_block = NULL;
-
- return(undo);
-}
-
-/********************************************************************//**
-Initializes a cached undo log object for new use. */
-static
-void
-trx_undo_mem_init_for_reuse(
-/*========================*/
- trx_undo_t* undo, /*!< in: undo log to init */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open XA transaction identification*/
- ulint offset) /*!< in: undo log header byte offset on page */
-{
- ut_ad(mutex_own(&((undo->rseg)->mutex)));
-
- if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
-
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo->state = TRX_UNDO_ACTIVE;
- undo->del_marks = FALSE;
- undo->trx_id = trx_id;
- undo->xid = *xid;
-
- undo->dict_operation = FALSE;
-
- undo->hdr_offset = offset;
- undo->empty = TRUE;
-}
-
-/********************************************************************//**
-Frees an undo log memory copy. */
-UNIV_INTERN
-void
-trx_undo_mem_free(
-/*==============*/
- trx_undo_t* undo) /*!< in: the undo object to be freed */
-{
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id);
- ut_error;
- }
-
- mem_free(undo);
-}
-
-/**********************************************************************//**
-Creates a new undo log.
-@return DB_SUCCESS if successful in creating the new undo lob object,
-possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
-DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
-static
-ulint
-trx_undo_create(
-/*============*/
- trx_t* trx, /*!< in: transaction */
- trx_rseg_t* rseg, /*!< in: rollback segment memory copy */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open transaction identification*/
- trx_undo_t** undo, /*!< out: the new undo log object, undefined
- * if did not succeed */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_rsegf_t* rseg_header;
- ulint page_no;
- ulint offset;
- ulint id;
- page_t* undo_page;
- ulint err;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (rseg->curr_size == rseg->max_size) {
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- rseg->curr_size++;
-
- rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no,
- mtr);
-
- err = trx_undo_seg_create(rseg, rseg_header, type, &id,
- &undo_page, mtr);
-
- if (err != DB_SUCCESS) {
- /* Did not succeed */
-
- rseg->curr_size--;
-
- return(err);
- }
-
- page_no = page_get_page_no(undo_page);
-
- offset = trx_undo_header_create(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(undo_page,
- undo_page + offset, mtr);
- }
-
- *undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
- page_no, offset);
- if (*undo == NULL) {
-
- err = DB_OUT_OF_MEMORY;
- }
-
- return(err);
-}
-
-/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
-
-/********************************************************************//**
-Reuses a cached undo log.
-@return the undo log memory object, NULL if none cached */
-static
-trx_undo_t*
-trx_undo_reuse_cached(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is used */
- const XID* xid, /*!< in: X/Open XA transaction identification */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_undo_t* undo;
- page_t* undo_page;
- ulint offset;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (type == TRX_UNDO_INSERT) {
-
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
- if (undo == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
- } else {
- ut_ad(type == TRX_UNDO_UPDATE);
-
- undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
- if (undo == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
- }
-
- ut_ad(undo->size == 1);
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- if (type == TRX_UNDO_INSERT) {
- offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- }
- } else {
- ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_UPDATE);
-
- offset = trx_undo_header_create(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- }
- }
-
- trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
-
- return(undo);
-}
-
-/**********************************************************************//**
-Marks an undo log header as a header of a data dictionary operation
-transaction. */
-static
-void
-trx_undo_mark_as_dict_operation(
-/*============================*/
- trx_t* trx, /*!< in: dict op transaction */
- trx_undo_t* undo, /*!< in: assigned undo log */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* hdr_page;
-
- hdr_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- ut_error;
- case TRX_DICT_OP_INDEX:
- /* Do not discard the table on recovery. */
- undo->table_id = ut_dulint_zero;
- break;
- case TRX_DICT_OP_TABLE:
- undo->table_id = trx->table_id;
- break;
- }
-
- mlog_write_ulint(hdr_page + undo->hdr_offset
- + TRX_UNDO_DICT_TRANS,
- TRUE, MLOG_1BYTE, mtr);
-
- mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
- undo->table_id, mtr);
-
- undo->dict_operation = TRUE;
-}
-
-/**********************************************************************//**
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused.
-@return DB_SUCCESS if undo log assign successful, possible error codes
-are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE
-DB_OUT_OF_MEMORY */
-UNIV_INTERN
-ulint
-trx_undo_assign_undo(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
- mtr_t mtr;
- ulint err = DB_SUCCESS;
-
- ut_ad(trx);
- ut_ad(trx->rseg);
-
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
-
- mtr_start(&mtr);
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- mutex_enter(&(rseg->mutex));
-
- undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
- &mtr);
- if (undo == NULL) {
- err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
- &undo, &mtr);
- if (err != DB_SUCCESS) {
-
- goto func_exit;
- }
- }
-
- if (type == TRX_UNDO_INSERT) {
- UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo);
- ut_ad(trx->insert_undo == NULL);
- trx->insert_undo = undo;
- } else {
- UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo);
- ut_ad(trx->update_undo == NULL);
- trx->update_undo = undo;
- }
-
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- trx_undo_mark_as_dict_operation(trx, undo, &mtr);
- }
-
-func_exit:
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- return err;
-}
-
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction finish.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
-page_t*
-trx_undo_set_state_at_finish(
-/*=========================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- trx_t* trx __attribute__((unused)), /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- page_t* undo_page;
- ulint state;
-
- ut_ad(trx);
- ut_ad(undo);
- ut_ad(mtr);
- ut_ad(mutex_own(&rseg->mutex));
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- if (undo->size == 1
- && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE)
- < TRX_UNDO_PAGE_REUSE_LIMIT) {
-
- /* This is a heuristic to avoid the problem of all UNDO
- slots ending up in one of the UNDO lists. Previously if
- the server crashed with all the slots in one of the lists,
- transactions that required the slots of a different type
- would fail for lack of slots. */
-
- if (UT_LIST_GET_LEN(rseg->update_undo_list) < 500
- && UT_LIST_GET_LEN(rseg->insert_undo_list) < 500) {
-
- state = TRX_UNDO_CACHED;
- } else {
- state = TRX_UNDO_TO_FREE;
- }
-
- } else if (undo->type == TRX_UNDO_INSERT) {
-
- state = TRX_UNDO_TO_FREE;
- } else {
- state = TRX_UNDO_TO_PURGE;
- }
-
- undo->state = state;
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, state, MLOG_2BYTES, mtr);
-
- return(undo_page);
-}
-
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
-page_t*
-trx_undo_set_state_at_prepare(
-/*==========================*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- trx_ulogf_t* undo_header;
- page_t* undo_page;
- ulint offset;
-
- ut_ad(trx && undo && mtr);
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- /*------------------------------*/
- undo->state = TRX_UNDO_PREPARED;
- undo->xid = trx->xid;
- /*------------------------------*/
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, undo->state,
- MLOG_2BYTES, mtr);
-
- offset = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
- undo_header = undo_page + offset;
-
- mlog_write_ulint(undo_header + TRX_UNDO_XID_EXISTS,
- TRUE, MLOG_1BYTE, mtr);
-
- trx_undo_write_xid(undo_header, &undo->xid, mtr);
-
- return(undo_page);
-}
-
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-UNIV_INTERN
-void
-trx_undo_update_cleanup(
-/*====================*/
- trx_t* trx, /*!< in: trx owning the update undo log */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
-
- undo = trx->update_undo;
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
-
- UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
-
- trx->update_undo = NULL;
-
- if (undo->state == TRX_UNDO_CACHED) {
-
- UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
- } else {
- ut_ad(undo->state == TRX_UNDO_TO_PURGE);
-
- trx_undo_mem_free(undo);
- }
-}
-
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
-void
-trx_undo_insert_cleanup(
-/*====================*/
- trx_t* trx) /*!< in: transaction handle */
-{
- trx_undo_t* undo;
- trx_rseg_t* rseg;
-
- undo = trx->insert_undo;
- ut_ad(undo);
-
- rseg = trx->rseg;
-
- mutex_enter(&(rseg->mutex));
-
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo);
- trx->insert_undo = NULL;
-
- if (undo->state == TRX_UNDO_CACHED) {
-
- UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo);
- } else {
- ut_ad(undo->state == TRX_UNDO_TO_FREE);
-
- /* Delete first the undo log segment in the file */
-
- mutex_exit(&(rseg->mutex));
-
- trx_undo_seg_free(undo);
-
- mutex_enter(&(rseg->mutex));
-
- ut_ad(rseg->curr_size > undo->size);
-
- rseg->curr_size -= undo->size;
-
- trx_undo_mem_free(undo);
- }
-
- mutex_exit(&(rseg->mutex));
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/usr/usr0sess.c b/storage/innodb_plugin/usr/usr0sess.c
deleted file mode 100644
index 8087dcb4170..00000000000
--- a/storage/innodb_plugin/usr/usr0sess.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file usr/usr0sess.c
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#include "usr0sess.h"
-
-#ifdef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#include "trx0trx.h"
-
-/*********************************************************************//**
-Opens a session.
-@return own: session object */
-UNIV_INTERN
-sess_t*
-sess_open(void)
-/*===========*/
-{
- sess_t* sess;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sess = mem_alloc(sizeof(sess_t));
-
- sess->state = SESS_ACTIVE;
-
- sess->trx = trx_create(sess);
-
- UT_LIST_INIT(sess->graphs);
-
- return(sess);
-}
-
-/*********************************************************************//**
-Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
-void
-sess_close(
-/*=======*/
- sess_t* sess) /*!< in, own: session object */
-{
- ut_ad(!mutex_own(&kernel_mutex));
-
- ut_a(UT_LIST_GET_LEN(sess->graphs) == 0);
-
- trx_free_for_background(sess->trx);
- mem_free(sess);
-}
diff --git a/storage/innodb_plugin/ut/ut0byte.c b/storage/innodb_plugin/ut/ut0byte.c
deleted file mode 100644
index 4e093f72ce2..00000000000
--- a/storage/innodb_plugin/ut/ut0byte.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/***************************************************************//**
-@file ut/ut0byte.c
-Byte utilities
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0byte.h"
-
-#ifdef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
-
-/** Zero value for a dulint */
-UNIV_INTERN const dulint ut_dulint_zero = {0, 0};
-
-/** Maximum value for a dulint */
-UNIV_INTERN const dulint ut_dulint_max = {0xFFFFFFFFUL, 0xFFFFFFFFUL};
-
-#ifdef notdefined /* unused code */
-#include "ut0sort.h"
-
-/************************************************************//**
-Sort function for dulint arrays. */
-UNIV_INTERN
-void
-ut_dulint_sort(
-/*===========*/
- dulint* arr, /*!< in/out: array to be sorted */
- dulint* aux_arr,/*!< in/out: auxiliary array (same size as arr) */
- ulint low, /*!< in: low bound of sort interval, inclusive */
- ulint high) /*!< in: high bound of sort interval, noninclusive */
-{
- UT_SORT_FUNCTION_BODY(ut_dulint_sort, arr, aux_arr, low, high,
- ut_dulint_cmp);
-}
-#endif /* notdefined */
diff --git a/storage/innodb_plugin/ut/ut0dbg.c b/storage/innodb_plugin/ut/ut0dbg.c
deleted file mode 100644
index 4484e6c36de..00000000000
--- a/storage/innodb_plugin/ut/ut0dbg.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*****************************************************************//**
-@file ut/ut0dbg.c
-Debug utilities for Innobase.
-
-Created 1/30/1994 Heikki Tuuri
-**********************************************************************/
-
-#include "univ.i"
-#include "ut0dbg.h"
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#else
-/** This is used to eliminate compiler warnings */
-UNIV_INTERN ulint ut_dbg_zero = 0;
-#endif
-
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
-will stop at the next ut_a() or ut_ad(). */
-UNIV_INTERN ibool ut_dbg_stop_threads = FALSE;
-#endif
-#ifdef __NETWARE__
-/** Flag for ignoring further assertion failures. This is set to TRUE
-when on NetWare there happens an InnoDB assertion failure or other
-fatal error condition that requires an immediate shutdown. */
-UNIV_INTERN ibool panic_shutdown = FALSE;
-#elif !defined(UT_DBG_USE_ABORT)
-/** A null pointer that will be dereferenced to trigger a memory trap */
-UNIV_INTERN ulint* ut_dbg_null_ptr = NULL;
-#endif
-
-/*************************************************************//**
-Report a failed assertion. */
-UNIV_INTERN
-void
-ut_dbg_assertion_failed(
-/*====================*/
- const char* expr, /*!< in: the failed assertion (optional) */
- const char* file, /*!< in: source file containing the assertion */
- ulint line) /*!< in: line number of the assertion */
-{
- ut_print_timestamp(stderr);
-#ifdef UNIV_HOTBACKUP
- fprintf(stderr, " InnoDB: Assertion failure in file %s line %lu\n",
- file, line);
-#else /* UNIV_HOTBACKUP */
- fprintf(stderr,
- " InnoDB: Assertion failure in thread %lu"
- " in file %s line %lu\n",
- os_thread_pf(os_thread_get_curr_id()), file, line);
-#endif /* UNIV_HOTBACKUP */
- if (expr) {
- fprintf(stderr,
- "InnoDB: Failing assertion: %s\n", expr);
- }
-
- fputs("InnoDB: We intentionally generate a memory trap.\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com.\n"
- "InnoDB: If you get repeated assertion failures"
- " or crashes, even\n"
- "InnoDB: immediately after the mysqld startup, there may be\n"
- "InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
- "InnoDB: " REFMAN "forcing-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
- ut_dbg_stop_threads = TRUE;
-#endif
-}
-
-#ifdef __NETWARE__
-/*************************************************************//**
-Shut down MySQL/InnoDB after assertion failure. */
-UNIV_INTERN
-void
-ut_dbg_panic(void)
-/*==============*/
-{
- if (!panic_shutdown) {
- panic_shutdown = TRUE;
- innobase_shutdown_for_mysql();
- }
- exit(1);
-}
-#else /* __NETWARE__ */
-# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/*************************************************************//**
-Stop a thread after assertion failure. */
-UNIV_INTERN
-void
-ut_dbg_stop_thread(
-/*===============*/
- const char* file,
- ulint line)
-{
-#ifndef UNIV_HOTBACKUP
- fprintf(stderr, "InnoDB: Thread %lu stopped in file %s line %lu\n",
- os_thread_pf(os_thread_get_curr_id()), file, line);
- os_thread_sleep(1000000000);
-#endif /* !UNIV_HOTBACKUP */
-}
-# endif
-#endif /* __NETWARE__ */
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include <unistd.h>
-
-#ifndef timersub
-#define timersub(a, b, r) \
- do { \
- (r)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
- (r)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
- if ((r)->tv_usec < 0) { \
- (r)->tv_sec--; \
- (r)->tv_usec += 1000000; \
- } \
- } while (0)
-#endif /* timersub */
-
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
- speedo_t* speedo) /*!< out: speedo */
-{
- gettimeofday(&speedo->tv, NULL);
-
- getrusage(RUSAGE_SELF, &speedo->ru);
-}
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
- const speedo_t* speedo) /*!< in: speedo */
-{
- struct rusage ru_now;
- struct timeval tv_now;
- struct timeval tv_diff;
-
- getrusage(RUSAGE_SELF, &ru_now);
-
- gettimeofday(&tv_now, NULL);
-
-#define PRINT_TIMEVAL(prefix, tvp) \
- fprintf(stderr, "%s% 5ld.%06ld sec\n", \
- prefix, (tvp)->tv_sec, (tvp)->tv_usec)
-
- timersub(&tv_now, &speedo->tv, &tv_diff);
- PRINT_TIMEVAL("real", &tv_diff);
-
- timersub(&ru_now.ru_utime, &speedo->ru.ru_utime, &tv_diff);
- PRINT_TIMEVAL("user", &tv_diff);
-
- timersub(&ru_now.ru_stime, &speedo->ru.ru_stime, &tv_diff);
- PRINT_TIMEVAL("sys ", &tv_diff);
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innodb_plugin/ut/ut0list.c b/storage/innodb_plugin/ut/ut0list.c
deleted file mode 100644
index 895a575c535..00000000000
--- a/storage/innodb_plugin/ut/ut0list.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file ut/ut0list.c
-A double-linked list
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-#include "ut0list.h"
-#ifdef UNIV_NONINL
-#include "ut0list.ic"
-#endif
-
-/****************************************************************//**
-Create a new list.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create(void)
-/*=================*/
-{
- ib_list_t* list = mem_alloc(sizeof(ib_list_t));
-
- list->first = NULL;
- list->last = NULL;
- list->is_heap_list = FALSE;
-
- return(list);
-}
-
-/****************************************************************//**
-Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create_heap(
-/*================*/
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- ib_list_t* list = mem_heap_alloc(heap, sizeof(ib_list_t));
-
- list->first = NULL;
- list->last = NULL;
- list->is_heap_list = TRUE;
-
- return(list);
-}
-
-/****************************************************************//**
-Free a list. */
-UNIV_INTERN
-void
-ib_list_free(
-/*=========*/
- ib_list_t* list) /*!< in: list */
-{
- ut_a(!list->is_heap_list);
-
- /* We don't check that the list is empty because it's entirely valid
- to e.g. have all the nodes allocated from a single heap that is then
- freed after the list itself is freed. */
-
- mem_free(list);
-}
-
-/****************************************************************//**
-Add the data to the start of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_first(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- return(ib_list_add_after(list, ib_list_get_first(list), data, heap));
-}
-
-/****************************************************************//**
-Add the data to the end of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_last(
-/*=============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- return(ib_list_add_after(list, ib_list_get_last(list), data, heap));
-}
-
-/****************************************************************//**
-Add the data after the indicated node.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_after(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* prev_node, /*!< in: node preceding new node (can
- be NULL) */
- void* data, /*!< in: data */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- ib_list_node_t* node = mem_heap_alloc(heap, sizeof(ib_list_node_t));
-
- node->data = data;
-
- if (!list->first) {
- /* Empty list. */
-
- ut_a(!prev_node);
-
- node->prev = NULL;
- node->next = NULL;
-
- list->first = node;
- list->last = node;
- } else if (!prev_node) {
- /* Start of list. */
-
- node->prev = NULL;
- node->next = list->first;
-
- list->first->prev = node;
-
- list->first = node;
- } else {
- /* Middle or end of list. */
-
- node->prev = prev_node;
- node->next = prev_node->next;
-
- prev_node->next = node;
-
- if (node->next) {
- node->next->prev = node;
- } else {
- list->last = node;
- }
- }
-
- return(node);
-}
-
-/****************************************************************//**
-Remove the node from the list. */
-UNIV_INTERN
-void
-ib_list_remove(
-/*===========*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* node) /*!< in: node to remove */
-{
- if (node->prev) {
- node->prev->next = node->next;
- } else {
- /* First item in list. */
-
- ut_ad(list->first == node);
-
- list->first = node->next;
- }
-
- if (node->next) {
- node->next->prev = node->prev;
- } else {
- /* Last item in list. */
-
- ut_ad(list->last == node);
-
- list->last = node->prev;
- }
-}
diff --git a/storage/innodb_plugin/ut/ut0mem.c b/storage/innodb_plugin/ut/ut0mem.c
deleted file mode 100644
index 35a325b9ccd..00000000000
--- a/storage/innodb_plugin/ut/ut0mem.c
+++ /dev/null
@@ -1,708 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file ut/ut0mem.c
-Memory primitives
-
-Created 5/11/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0mem.h"
-
-#ifdef UNIV_NONINL
-#include "ut0mem.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-# include "os0thread.h"
-# include "srv0srv.h"
-
-#include <stdlib.h>
-
-/** This struct is placed first in every allocated memory block */
-typedef struct ut_mem_block_struct ut_mem_block_t;
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc(). Does not count malloc()
-if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
-UNIV_INTERN ulint ut_total_allocated_memory = 0;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-UNIV_INTERN os_fast_mutex_t ut_list_mutex;
-
-/** Dynamically allocated memory block */
-struct ut_mem_block_struct{
- UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
- /*!< mem block list node */
- ulint size; /*!< size of allocated memory */
- ulint magic_n;/*!< magic number (UT_MEM_MAGIC_N) */
-};
-
-/** The value of ut_mem_block_struct::magic_n. Used in detecting
-memory corruption. */
-#define UT_MEM_MAGIC_N 1601650166
-
-/** List of all memory blocks allocated from the operating system
-with malloc. Protected by ut_list_mutex. */
-static UT_LIST_BASE_NODE_T(ut_mem_block_t) ut_mem_block_list;
-
-/** Flag: has ut_mem_block_list been initialized? */
-static ibool ut_mem_block_list_inited = FALSE;
-
-/** A dummy pointer for generating a null pointer exception in
-ut_malloc_low() */
-static ulint* ut_mem_null_ptr = NULL;
-
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void)
-/*=============*/
-{
- ut_a(!ut_mem_block_list_inited);
- os_fast_mutex_init(&ut_list_mutex);
- UT_LIST_INIT(ut_mem_block_list);
- ut_mem_block_list_inited = TRUE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE.
-@return own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
- ulint n, /*!< in: number of bytes to allocate */
- ibool set_to_zero, /*!< in: TRUE if allocated memory should be
- set to zero if UNIV_SET_MEM_TO_ZERO is
- defined */
- ibool assert_on_error)/*!< in: if TRUE, we crash mysqld if the
- memory cannot be allocated */
-{
-#ifndef UNIV_HOTBACKUP
- ulint retry_count;
- void* ret;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- ret = malloc(n);
- ut_a(ret || !assert_on_error);
-
-#ifdef UNIV_SET_MEM_TO_ZERO
- if (set_to_zero) {
- memset(ret, '\0', n);
- UNIV_MEM_ALLOC(ret, n);
- }
-#endif
- return(ret);
- }
-
- ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
- ut_a(ut_mem_block_list_inited);
-
- retry_count = 0;
-retry:
- os_fast_mutex_lock(&ut_list_mutex);
-
- ret = malloc(n + sizeof(ut_mem_block_t));
-
- if (ret == NULL && retry_count < 60) {
- if (retry_count == 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: cannot allocate"
- " %lu bytes of\n"
- "InnoDB: memory with malloc!"
- " Total allocated memory\n"
- "InnoDB: by InnoDB %lu bytes."
- " Operating system errno: %lu\n"
- "InnoDB: Check if you should"
- " increase the swap file or\n"
- "InnoDB: ulimits of your operating system.\n"
- "InnoDB: On FreeBSD check you"
- " have compiled the OS with\n"
- "InnoDB: a big enough maximum process size.\n"
- "InnoDB: Note that in most 32-bit"
- " computers the process\n"
- "InnoDB: memory space is limited"
- " to 2 GB or 4 GB.\n"
- "InnoDB: We keep retrying"
- " the allocation for 60 seconds...\n",
- (ulong) n, (ulong) ut_total_allocated_memory,
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif
- );
- }
-
- os_fast_mutex_unlock(&ut_list_mutex);
-
- /* Sleep for a second and retry the allocation; maybe this is
- just a temporary shortage of memory */
-
- os_thread_sleep(1000000);
-
- retry_count++;
-
- goto retry;
- }
-
- if (ret == NULL) {
- /* Flush stderr to make more probable that the error
- message gets in the error file before we generate a seg
- fault */
-
- fflush(stderr);
-
- os_fast_mutex_unlock(&ut_list_mutex);
-
- /* Make an intentional seg fault so that we get a stack
- trace */
- /* Intentional segfault on NetWare causes an abend. Avoid this
- by graceful exit handling in ut_a(). */
-#if (!defined __NETWARE__)
- if (assert_on_error) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: We now intentionally"
- " generate a seg fault so that\n"
- "InnoDB: on Linux we get a stack trace.\n");
-
- if (*ut_mem_null_ptr) ut_mem_null_ptr = 0;
- } else {
- return(NULL);
- }
-#else
- ut_a(0);
-#endif
- }
-
- if (set_to_zero) {
-#ifdef UNIV_SET_MEM_TO_ZERO
- memset(ret, '\0', n + sizeof(ut_mem_block_t));
-#endif
- }
-
- UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t));
-
- ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t);
- ((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N;
-
- ut_total_allocated_memory += n + sizeof(ut_mem_block_t);
-
- UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list,
- ((ut_mem_block_t*)ret));
- os_fast_mutex_unlock(&ut_list_mutex);
-
- return((void*)((byte*)ret + sizeof(ut_mem_block_t)));
-#else /* !UNIV_HOTBACKUP */
- void* ret = malloc(n);
- ut_a(ret || !assert_on_error);
-
-# ifdef UNIV_SET_MEM_TO_ZERO
- if (set_to_zero) {
- memset(ret, '\0', n);
- }
-# endif
- return(ret);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/**********************************************************************//**
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined.
-@return own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc(
-/*======*/
- ulint n) /*!< in: number of bytes to allocate */
-{
-#ifndef UNIV_HOTBACKUP
- return(ut_malloc_low(n, TRUE, TRUE));
-#else /* !UNIV_HOTBACKUP */
- return(malloc(n));
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
-out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-ut_test_malloc(
-/*===========*/
- ulint n) /*!< in: try to allocate this many bytes */
-{
- void* ret;
-
- ret = malloc(n);
-
- if (ret == NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: cannot allocate"
- " %lu bytes of memory for\n"
- "InnoDB: a BLOB with malloc! Total allocated memory\n"
- "InnoDB: by InnoDB %lu bytes."
- " Operating system errno: %d\n"
- "InnoDB: Check if you should increase"
- " the swap file or\n"
- "InnoDB: ulimits of your operating system.\n"
- "InnoDB: On FreeBSD check you have"
- " compiled the OS with\n"
- "InnoDB: a big enough maximum process size.\n",
- (ulong) n,
- (ulong) ut_total_allocated_memory,
- (int) errno);
- return(FALSE);
- }
-
- free(ret);
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
- void* ptr) /*!< in, own: memory block */
-{
-#ifndef UNIV_HOTBACKUP
- ut_mem_block_t* block;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- free(ptr);
- return;
- }
-
- block = (ut_mem_block_t*)((byte*)ptr - sizeof(ut_mem_block_t));
-
- os_fast_mutex_lock(&ut_list_mutex);
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
- ut_a(ut_total_allocated_memory >= block->size);
-
- ut_total_allocated_memory -= block->size;
-
- UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
- free(block);
-
- os_fast_mutex_unlock(&ut_list_mutex);
-#else /* !UNIV_HOTBACKUP */
- free(ptr);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
- realloc() changes the size of the memory block pointed to
- by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem-
- ory will be uninitialized. If ptr is NULL, the call is
- equivalent to malloc(size); if size is equal to zero, the
- call is equivalent to free(ptr). Unless ptr is NULL, it
- must have been returned by an earlier call to malloc(),
- calloc() or realloc().
-
-RETURN VALUE
- realloc() returns a pointer to the newly allocated memory,
- which is suitably aligned for any kind of variable and may
- be different from ptr, or NULL if the request fails. If
- size was equal to 0, either NULL or a pointer suitable to
- be passed to free() is returned. If realloc() fails the
- original block is left untouched - it is not freed or
- moved.
-@return own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
- void* ptr, /*!< in: pointer to old block or NULL */
- ulint size) /*!< in: desired size */
-{
- ut_mem_block_t* block;
- ulint old_size;
- ulint min_size;
- void* new_ptr;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- return(realloc(ptr, size));
- }
-
- if (ptr == NULL) {
-
- return(ut_malloc(size));
- }
-
- if (size == 0) {
- ut_free(ptr);
-
- return(NULL);
- }
-
- block = (ut_mem_block_t*)((byte*)ptr - sizeof(ut_mem_block_t));
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
-
- old_size = block->size - sizeof(ut_mem_block_t);
-
- if (size < old_size) {
- min_size = size;
- } else {
- min_size = old_size;
- }
-
- new_ptr = ut_malloc(size);
-
- if (new_ptr == NULL) {
-
- return(NULL);
- }
-
- /* Copy the old data from ptr */
- ut_memcpy(new_ptr, ptr, min_size);
-
- ut_free(ptr);
-
- return(new_ptr);
-}
-
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void)
-/*=================*/
-{
- ut_mem_block_t* block;
-
- ut_a(ut_mem_block_list_inited);
- ut_mem_block_list_inited = FALSE;
- os_fast_mutex_free(&ut_list_mutex);
-
- while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) {
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
- ut_a(ut_total_allocated_memory >= block->size);
-
- ut_total_allocated_memory -= block->size;
-
- UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
- free(block);
- }
-
- if (ut_total_allocated_memory != 0) {
- fprintf(stderr,
- "InnoDB: Warning: after shutdown"
- " total allocated memory is %lu\n",
- (ulong) ut_total_allocated_memory);
- }
-
- ut_mem_block_list_inited = FALSE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy(
-/*=======*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size) /*!< in: size of destination buffer */
-{
- ulint src_size = strlen(src);
-
- if (size != 0) {
- ulint n = ut_min(src_size, size - 1);
-
- memcpy(dst, src, n);
- dst[n] = '\0';
- }
-
- return(src_size);
-}
-
-/**********************************************************************//**
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy_rev(
-/*===========*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size) /*!< in: size of destination buffer */
-{
- ulint src_size = strlen(src);
-
- if (size != 0) {
- ulint n = ut_min(src_size, size - 1);
-
- memcpy(dst, src + src_size - n, n + 1);
- }
-
- return(src_size);
-}
-
-/**********************************************************************//**
-Make a quoted copy of a NUL-terminated string. Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq().
-@return pointer to end of dest */
-UNIV_INTERN
-char*
-ut_strcpyq(
-/*=======*/
- char* dest, /*!< in: output buffer */
- char q, /*!< in: the quote character */
- const char* src) /*!< in: null-terminated string */
-{
- while (*src) {
- if ((*dest++ = *src++) == q) {
- *dest++ = q;
- }
- }
-
- return(dest);
-}
-
-/**********************************************************************//**
-Make a quoted copy of a fixed-length string. Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq().
-@return pointer to end of dest */
-UNIV_INTERN
-char*
-ut_memcpyq(
-/*=======*/
- char* dest, /*!< in: output buffer */
- char q, /*!< in: the quote character */
- const char* src, /*!< in: string to be quoted */
- ulint len) /*!< in: length of src */
-{
- const char* srcend = src + len;
-
- while (src < srcend) {
- if ((*dest++ = *src++) == q) {
- *dest++ = q;
- }
- }
-
- return(dest);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once.
-@return the number of times s2 occurs in s1 */
-UNIV_INTERN
-ulint
-ut_strcount(
-/*========*/
- const char* s1, /*!< in: string to search in */
- const char* s2) /*!< in: string to search for */
-{
- ulint count = 0;
- ulint len = strlen(s2);
-
- if (len == 0) {
-
- return(0);
- }
-
- for (;;) {
- s1 = strstr(s1, s2);
-
- if (!s1) {
-
- break;
- }
-
- count++;
- s1 += len;
- }
-
- return(count);
-}
-
-/**********************************************************************//**
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once.
-@return own: modified string, must be freed with mem_free() */
-UNIV_INTERN
-char*
-ut_strreplace(
-/*==========*/
- const char* str, /*!< in: string to operate on */
- const char* s1, /*!< in: string to replace */
- const char* s2) /*!< in: string to replace s1 with */
-{
- char* new_str;
- char* ptr;
- const char* str_end;
- ulint str_len = strlen(str);
- ulint s1_len = strlen(s1);
- ulint s2_len = strlen(s2);
- ulint count = 0;
- int len_delta = (int)s2_len - (int)s1_len;
-
- str_end = str + str_len;
-
- if (len_delta <= 0) {
- len_delta = 0;
- } else {
- count = ut_strcount(str, s1);
- }
-
- new_str = mem_alloc(str_len + count * len_delta + 1);
- ptr = new_str;
-
- while (str) {
- const char* next = strstr(str, s1);
-
- if (!next) {
- next = str_end;
- }
-
- memcpy(ptr, str, next - str);
- ptr += next - str;
-
- if (next == str_end) {
-
- break;
- }
-
- memcpy(ptr, s2, s2_len);
- ptr += s2_len;
-
- str = next + s1_len;
- }
-
- *ptr = '\0';
-
- return(new_str);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-void
-test_ut_str_sql_format()
-{
- char buf[128];
- ulint ret;
-
-#define CALL_AND_TEST(str, str_len, buf, buf_size, ret_expected, buf_expected)\
- do {\
- ibool ok = TRUE;\
- memset(buf, 'x', 10);\
- buf[10] = '\0';\
- fprintf(stderr, "TESTING \"%s\", %lu, %lu\n",\
- str, (ulint) str_len, (ulint) buf_size);\
- ret = ut_str_sql_format(str, str_len, buf, buf_size);\
- if (ret != ret_expected) {\
- fprintf(stderr, "expected ret %lu, got %lu\n",\
- (ulint) ret_expected, ret);\
- ok = FALSE;\
- }\
- if (strcmp((char*) buf, buf_expected) != 0) {\
- fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
- buf_expected, buf);\
- ok = FALSE;\
- }\
- if (ok) {\
- fprintf(stderr, "OK: %lu, \"%s\"\n\n",\
- (ulint) ret, buf);\
- } else {\
- return;\
- }\
- } while (0)
-
- CALL_AND_TEST("abcd", 4, buf, 0, 0, "xxxxxxxxxx");
-
- CALL_AND_TEST("abcd", 4, buf, 1, 1, "");
-
- CALL_AND_TEST("abcd", 4, buf, 2, 1, "");
-
- CALL_AND_TEST("abcd", 0, buf, 3, 3, "''");
- CALL_AND_TEST("abcd", 1, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 2, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 3, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 4, buf, 3, 1, "");
-
- CALL_AND_TEST("abcd", 0, buf, 4, 3, "''");
- CALL_AND_TEST("abcd", 1, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 2, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 3, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 4, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcde", 5, buf, 4, 4, "'a'");
- CALL_AND_TEST("'", 1, buf, 4, 3, "''");
- CALL_AND_TEST("''", 2, buf, 4, 3, "''");
- CALL_AND_TEST("a'", 2, buf, 4, 4, "'a'");
- CALL_AND_TEST("'a", 2, buf, 4, 3, "''");
- CALL_AND_TEST("ab", 2, buf, 4, 4, "'a'");
-
- CALL_AND_TEST("abcdef", 0, buf, 5, 3, "''");
- CALL_AND_TEST("abcdef", 1, buf, 5, 4, "'a'");
- CALL_AND_TEST("abcdef", 2, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 3, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 4, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 5, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 6, buf, 5, 5, "'ab'");
- CALL_AND_TEST("'", 1, buf, 5, 5, "''''");
- CALL_AND_TEST("''", 2, buf, 5, 5, "''''");
- CALL_AND_TEST("a'", 2, buf, 5, 4, "'a'");
- CALL_AND_TEST("'a", 2, buf, 5, 5, "''''");
- CALL_AND_TEST("ab", 2, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abc", 3, buf, 5, 5, "'ab'");
-
- CALL_AND_TEST("ab", 2, buf, 6, 5, "'ab'");
-
- CALL_AND_TEST("a'b'c", 5, buf, 32, 10, "'a''b''c'");
- CALL_AND_TEST("a'b'c'", 6, buf, 32, 12, "'a''b''c'''");
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/ut/ut0rnd.c b/storage/innodb_plugin/ut/ut0rnd.c
deleted file mode 100644
index cefd0990ecc..00000000000
--- a/storage/innodb_plugin/ut/ut0rnd.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/***************************************************************//**
-@file ut/ut0rnd.c
-Random numbers and hashing
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0rnd.h"
-
-#ifdef UNIV_NONINL
-#include "ut0rnd.ic"
-#endif
-
-/** These random numbers are used in ut_find_prime */
-/*@{*/
-#define UT_RANDOM_1 1.0412321
-#define UT_RANDOM_2 1.1131347
-#define UT_RANDOM_3 1.0132677
-/*@}*/
-
-/** Seed value of ut_rnd_gen_ulint(). */
-UNIV_INTERN ulint ut_rnd_ulint_counter = 65654363;
-
-/***********************************************************//**
-Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2.
-@return prime */
-UNIV_INTERN
-ulint
-ut_find_prime(
-/*==========*/
- ulint n) /*!< in: positive number > 100 */
-{
- ulint pow2;
- ulint i;
-
- n += 100;
-
- pow2 = 1;
- while (pow2 * 2 < n) {
- pow2 = 2 * pow2;
- }
-
- if ((double)n < 1.05 * (double)pow2) {
- n = (ulint) ((double)n * UT_RANDOM_1);
- }
-
- pow2 = 2 * pow2;
-
- if ((double)n > 0.95 * (double)pow2) {
- n = (ulint) ((double)n * UT_RANDOM_2);
- }
-
- if (n > pow2 - 20) {
- n += 30;
- }
-
- /* Now we have n far enough from powers of 2. To make
- n more random (especially, if it was not near
- a power of 2), we then multiply it by a random number. */
-
- n = (ulint) ((double)n * UT_RANDOM_3);
-
- for (;; n++) {
- i = 2;
- while (i * i <= n) {
- if (n % i == 0) {
- goto next_n;
- }
- i++;
- }
-
- /* Found a prime */
- break;
-next_n: ;
- }
-
- return(n);
-}
diff --git a/storage/innodb_plugin/ut/ut0ut.c b/storage/innodb_plugin/ut/ut0ut.c
deleted file mode 100644
index 498873e290a..00000000000
--- a/storage/innodb_plugin/ut/ut0ut.c
+++ /dev/null
@@ -1,625 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Sun Microsystems, Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/***************************************************************//**
-@file ut/ut0ut.c
-Various utilities for Innobase.
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0ut.h"
-
-#ifdef UNIV_NONINL
-#include "ut0ut.ic"
-#endif
-
-#include <stdarg.h>
-#include <string.h>
-#include <ctype.h>
-
-#ifndef UNIV_HOTBACKUP
-# include "trx0trx.h"
-# include "ha_prototypes.h"
-# include "mysql_com.h" /* NAME_LEN */
-#endif /* UNIV_HOTBACKUP */
-
-/** A constant to prevent the compiler from optimizing ut_delay() away. */
-UNIV_INTERN ibool ut_always_false = FALSE;
-
-#ifdef __WIN__
-/*****************************************************************//**
-NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix
-epoch starts from 1970/1/1. For selection of constant see:
-http://support.microsoft.com/kb/167296/ */
-#define WIN_TO_UNIX_DELTA_USEC ((ib_int64_t) 11644473600000000ULL)
-
-
-/*****************************************************************//**
-This is the Windows version of gettimeofday(2).
-@return 0 if all OK else -1 */
-static
-int
-ut_gettimeofday(
-/*============*/
- struct timeval* tv, /*!< out: Values are relative to Unix epoch */
- void* tz) /*!< in: not used */
-{
- FILETIME ft;
- ib_int64_t tm;
-
- if (!tv) {
- errno = EINVAL;
- return(-1);
- }
-
- GetSystemTimeAsFileTime(&ft);
-
- tm = (ib_int64_t) ft.dwHighDateTime << 32;
- tm |= ft.dwLowDateTime;
-
- ut_a(tm >= 0); /* If tm wraps over to negative, the quotient / 10
- does not work */
-
- tm /= 10; /* Convert from 100 nsec periods to usec */
-
- /* If we don't convert to the Unix epoch the value for
- struct timeval::tv_sec will overflow.*/
- tm -= WIN_TO_UNIX_DELTA_USEC;
-
- tv->tv_sec = (long) (tm / 1000000L);
- tv->tv_usec = (long) (tm % 1000000L);
-
- return(0);
-}
-#else
-/** An alias for gettimeofday(2). On Microsoft Windows, we have to
-reimplement this function. */
-#define ut_gettimeofday gettimeofday
-#endif
-
-/********************************************************//**
-Gets the high 32 bits in a ulint. That is makes a shift >> 32,
-but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion.
-@return a >> 32 */
-UNIV_INTERN
-ulint
-ut_get_high32(
-/*==========*/
- ulint a) /*!< in: ulint */
-{
- ib_int64_t i;
-
- i = (ib_int64_t)a;
-
- i = i >> 32;
-
- return((ulint)i);
-}
-
-/**********************************************************//**
-Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime.
-@return system time */
-UNIV_INTERN
-ib_time_t
-ut_time(void)
-/*=========*/
-{
- return(time(NULL));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Returns system time.
-Upon successful completion, the value 0 is returned; otherwise the
-value -1 is returned and the global variable errno is set to indicate the
-error.
-@return 0 on success, -1 otherwise */
-UNIV_INTERN
-int
-ut_usectime(
-/*========*/
- ulint* sec, /*!< out: seconds since the Epoch */
- ulint* ms) /*!< out: microseconds since the Epoch+*sec */
-{
- struct timeval tv;
- int ret;
- int errno_gettimeofday;
- int i;
-
- for (i = 0; i < 10; i++) {
-
- ret = ut_gettimeofday(&tv, NULL);
-
- if (ret == -1) {
- errno_gettimeofday = errno;
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: gettimeofday(): %s\n",
- strerror(errno_gettimeofday));
- os_thread_sleep(100000); /* 0.1 sec */
- errno = errno_gettimeofday;
- } else {
- break;
- }
- }
-
- if (ret != -1) {
- *sec = (ulint) tv.tv_sec;
- *ms = (ulint) tv.tv_usec;
- }
-
- return(ret);
-}
-
-/**********************************************************//**
-Returns the number of microseconds since epoch. Similar to
-time(3), the return value is also stored in *tloc, provided
-that tloc is non-NULL.
-@return us since epoch */
-UNIV_INTERN
-ullint
-ut_time_us(
-/*=======*/
- ullint* tloc) /*!< out: us since epoch, if non-NULL */
-{
- struct timeval tv;
- ullint us;
-
- ut_gettimeofday(&tv, NULL);
-
- us = (ullint) tv.tv_sec * 1000000 + tv.tv_usec;
-
- if (tloc != NULL) {
- *tloc = us;
- }
-
- return(us);
-}
-
-/**********************************************************//**
-Returns the number of milliseconds since some epoch. The
-value may wrap around. It should only be used for heuristic
-purposes.
-@return ms since epoch */
-UNIV_INTERN
-ulint
-ut_time_ms(void)
-/*============*/
-{
- struct timeval tv;
-
- ut_gettimeofday(&tv, NULL);
-
- return((ulint) tv.tv_sec * 1000 + tv.tv_usec / 1000);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Returns the difference of two times in seconds.
-@return time2 - time1 expressed in seconds */
-UNIV_INTERN
-double
-ut_difftime(
-/*========*/
- ib_time_t time2, /*!< in: time */
- ib_time_t time1) /*!< in: time */
-{
- return(difftime(time2, time1));
-}
-
-/**********************************************************//**
-Prints a timestamp to a file. */
-UNIV_INTERN
-void
-ut_print_timestamp(
-/*===============*/
- FILE* file) /*!< in: file where to print */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
- (int)cal_tm.wYear % 100,
- (int)cal_tm.wMonth,
- (int)cal_tm.wDay,
- (int)cal_tm.wHour,
- (int)cal_tm.wMinute,
- (int)cal_tm.wSecond);
-#else
- struct tm cal_tm;
- struct tm* cal_tm_ptr;
- time_t tm;
-
- time(&tm);
-
-#ifdef HAVE_LOCALTIME_R
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- cal_tm_ptr = localtime(&tm);
-#endif
- fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-
-/**********************************************************//**
-Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp(
-/*=================*/
- char* buf) /*!< in: buffer where to sprintf */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
- (int)cal_tm.wYear % 100,
- (int)cal_tm.wMonth,
- (int)cal_tm.wDay,
- (int)cal_tm.wHour,
- (int)cal_tm.wMinute,
- (int)cal_tm.wSecond);
-#else
- struct tm cal_tm;
- struct tm* cal_tm_ptr;
- time_t tm;
-
- time(&tm);
-
-#ifdef HAVE_LOCALTIME_R
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- cal_tm_ptr = localtime(&tm);
-#endif
- sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-
-#ifdef UNIV_HOTBACKUP
-/**********************************************************//**
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
- char* buf) /*!< in: buffer where to sprintf */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
- (int)cal_tm.wYear % 100,
- (int)cal_tm.wMonth,
- (int)cal_tm.wDay,
- (int)cal_tm.wHour,
- (int)cal_tm.wMinute,
- (int)cal_tm.wSecond);
-#else
- struct tm cal_tm;
- struct tm* cal_tm_ptr;
- time_t tm;
-
- time(&tm);
-
-#ifdef HAVE_LOCALTIME_R
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- cal_tm_ptr = localtime(&tm);
-#endif
- sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-
-/**********************************************************//**
-Returns current year, month, day. */
-UNIV_INTERN
-void
-ut_get_year_month_day(
-/*==================*/
- ulint* year, /*!< out: current year */
- ulint* month, /*!< out: month */
- ulint* day) /*!< out: day */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- *year = (ulint)cal_tm.wYear;
- *month = (ulint)cal_tm.wMonth;
- *day = (ulint)cal_tm.wDay;
-#else
- struct tm cal_tm;
- struct tm* cal_tm_ptr;
- time_t tm;
-
- time(&tm);
-
-#ifdef HAVE_LOCALTIME_R
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- cal_tm_ptr = localtime(&tm);
-#endif
- *year = (ulint)cal_tm_ptr->tm_year + 1900;
- *month = (ulint)cal_tm_ptr->tm_mon + 1;
- *day = (ulint)cal_tm_ptr->tm_mday;
-#endif
-}
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++.
-@return dummy value */
-UNIV_INTERN
-ulint
-ut_delay(
-/*=====*/
- ulint delay) /*!< in: delay in microseconds on 100 MHz Pentium */
-{
- ulint i, j;
-
- j = 0;
-
- for (i = 0; i < delay * 50; i++) {
- j += i;
- UT_RELAX_CPU();
- }
-
- if (ut_always_false) {
- ut_always_false = (ibool) j;
- }
-
- return(j);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
-void
-ut_print_buf(
-/*=========*/
- FILE* file, /*!< in: file where to print */
- const void* buf, /*!< in: memory buffer */
- ulint len) /*!< in: length of the buffer */
-{
- const byte* data;
- ulint i;
-
- UNIV_MEM_ASSERT_RW(buf, len);
-
- fprintf(file, " len %lu; hex ", len);
-
- for (data = (const byte*)buf, i = 0; i < len; i++) {
- fprintf(file, "%02lx", (ulong)*data++);
- }
-
- fputs("; asc ", file);
-
- data = (const byte*)buf;
-
- for (i = 0; i < len; i++) {
- int c = (int) *data++;
- putc(isprint(c) ? c : ' ', file);
- }
-
- putc(';', file);
-}
-
-/*************************************************************//**
-Calculates fast the number rounded up to the nearest power of 2.
-@return first power of 2 which is >= n */
-UNIV_INTERN
-ulint
-ut_2_power_up(
-/*==========*/
- ulint n) /*!< in: number != 0 */
-{
- ulint res;
-
- res = 1;
-
- ut_ad(n > 0);
-
- while (res < n) {
- res = res * 2;
- }
-
- return(res);
-}
-
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
-void
-ut_print_filename(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const char* name) /*!< in: name to print */
-{
- putc('\'', f);
- for (;;) {
- int c = *name++;
- switch (c) {
- case 0:
- goto done;
- case '\'':
- putc(c, f);
- /* fall through */
- default:
- putc(c, f);
- }
- }
-done:
- putc('\'', f);
-}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_name(
-/*==========*/
- FILE* f, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name) /*!< in: name to print */
-{
- ut_print_namel(f, trx, table_id, name, strlen(name));
-}
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_namel(
-/*===========*/
- FILE* f, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /*!< in: name to print */
- ulint namelen)/*!< in: length of name */
-{
- /* 2 * NAME_LEN for database and table name,
- and some slack for the #mysql50# prefix and quotes */
- char buf[3 * NAME_LEN];
- const char* bufend;
-
- bufend = innobase_convert_name(buf, sizeof buf,
- name, namelen,
- trx ? trx->mysql_thd : NULL,
- table_id);
-
- fwrite(buf, 1, bufend - buf, f);
-}
-
-/**********************************************************************//**
-Catenate files. */
-UNIV_INTERN
-void
-ut_copy_file(
-/*=========*/
- FILE* dest, /*!< in: output file */
- FILE* src) /*!< in: input file to be appended to output */
-{
- long len = ftell(src);
- char buf[4096];
-
- rewind(src);
- do {
- size_t maxs = len < (long) sizeof buf
- ? (size_t) len
- : sizeof buf;
- size_t size = fread(buf, 1, maxs, src);
- fwrite(buf, 1, size, dest);
- len -= (long) size;
- if (size < maxs) {
- break;
- }
- } while (len > 0);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef __WIN__
-# include <stdarg.h>
-/**********************************************************************//**
-A substitute for snprintf(3), formatted output conversion into
-a limited buffer.
-@return number of characters that would have been printed if the size
-were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
-int
-ut_snprintf(
-/*========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- ...) /*!< in: format values */
-{
- int res;
- va_list ap1;
- va_list ap2;
-
- va_start(ap1, fmt);
- va_start(ap2, fmt);
-
- res = _vscprintf(fmt, ap1);
- ut_a(res != -1);
-
- if (size > 0) {
- _vsnprintf(str, size, fmt, ap2);
-
- if ((size_t) res >= size) {
- str[size - 1] = '\0';
- }
- }
-
- va_end(ap1);
- va_end(ap2);
-
- return(res);
-}
-#endif /* __WIN__ */
diff --git a/storage/innodb_plugin/ut/ut0vec.c b/storage/innodb_plugin/ut/ut0vec.c
deleted file mode 100644
index 45f2bc9771f..00000000000
--- a/storage/innodb_plugin/ut/ut0vec.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file ut/ut0vec.c
-A vector of pointers to data items
-
-Created 4/6/2006 Osku Salerma
-************************************************************************/
-
-#include "ut0vec.h"
-#ifdef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
-#include <string.h>
-
-/****************************************************************//**
-Create a new vector with the given initial size.
-@return vector */
-UNIV_INTERN
-ib_vector_t*
-ib_vector_create(
-/*=============*/
- mem_heap_t* heap, /*!< in: heap */
- ulint size) /*!< in: initial size */
-{
- ib_vector_t* vec;
-
- ut_a(size > 0);
-
- vec = mem_heap_alloc(heap, sizeof(*vec));
-
- vec->heap = heap;
- vec->data = mem_heap_alloc(heap, sizeof(void*) * size);
- vec->used = 0;
- vec->total = size;
-
- return(vec);
-}
-
-/****************************************************************//**
-Push a new element to the vector, increasing its size if necessary. */
-UNIV_INTERN
-void
-ib_vector_push(
-/*===========*/
- ib_vector_t* vec, /*!< in: vector */
- void* elem) /*!< in: data element */
-{
- if (vec->used >= vec->total) {
- void** new_data;
- ulint new_total = vec->total * 2;
-
- new_data = mem_heap_alloc(vec->heap,
- sizeof(void*) * new_total);
- memcpy(new_data, vec->data, sizeof(void*) * vec->total);
-
- vec->data = new_data;
- vec->total = new_total;
- }
-
- vec->data[vec->used] = elem;
- vec->used++;
-}
diff --git a/storage/innodb_plugin/ut/ut0wqueue.c b/storage/innodb_plugin/ut/ut0wqueue.c
deleted file mode 100644
index 5220d1e17f4..00000000000
--- a/storage/innodb_plugin/ut/ut0wqueue.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-#include "ut0wqueue.h"
-
-/*******************************************************************//**
-@file ut/ut0wqueue.c
-A work queue
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-/****************************************************************//**
-Create a new work queue.
-@return work queue */
-UNIV_INTERN
-ib_wqueue_t*
-ib_wqueue_create(void)
-/*===================*/
-{
- ib_wqueue_t* wq = mem_alloc(sizeof(ib_wqueue_t));
-
- mutex_create(&wq->mutex, SYNC_WORK_QUEUE);
-
- wq->items = ib_list_create();
- wq->event = os_event_create(NULL);
-
- return(wq);
-}
-
-/****************************************************************//**
-Free a work queue. */
-UNIV_INTERN
-void
-ib_wqueue_free(
-/*===========*/
- ib_wqueue_t* wq) /*!< in: work queue */
-{
- ut_a(!ib_list_get_first(wq->items));
-
- mutex_free(&wq->mutex);
- ib_list_free(wq->items);
- os_event_free(wq->event);
-
- mem_free(wq);
-}
-
-/****************************************************************//**
-Add a work item to the queue. */
-UNIV_INTERN
-void
-ib_wqueue_add(
-/*==========*/
- ib_wqueue_t* wq, /*!< in: work queue */
- void* item, /*!< in: work item */
- mem_heap_t* heap) /*!< in: memory heap to use for allocating the
- list node */
-{
- mutex_enter(&wq->mutex);
-
- ib_list_add_last(wq->items, item, heap);
- os_event_set(wq->event);
-
- mutex_exit(&wq->mutex);
-}
-
-/****************************************************************//**
-Wait for a work item to appear in the queue.
-@return work item */
-UNIV_INTERN
-void*
-ib_wqueue_wait(
-/*===========*/
- ib_wqueue_t* wq) /*!< in: work queue */
-{
- ib_list_node_t* node;
-
- for (;;) {
- os_event_wait(wq->event);
-
- mutex_enter(&wq->mutex);
-
- node = ib_list_get_first(wq->items);
-
- if (node) {
- ib_list_remove(wq->items, node);
-
- if (!ib_list_get_first(wq->items)) {
- /* We must reset the event when the list
- gets emptied. */
- os_event_reset(wq->event);
- }
-
- break;
- }
-
- mutex_exit(&wq->mutex);
- }
-
- mutex_exit(&wq->mutex);
-
- return(node->data);
-}
diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am
index f50c312b8e4..6dd0d2bcbdb 100644
--- a/storage/myisam/Makefile.am
+++ b/storage/myisam/Makefile.am
@@ -150,5 +150,26 @@ SUFFIXES = .sh
@CHMOD@ +x $@-t
@MV@ $@-t $@
+if HAVE_DTRACE_DASH_G
+libmyisam_a_LIBADD = probes_mysql.o
+libmyisam_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES += probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = ha_myisam.o
+DTRACEPROVIDER = probes_mysql.d
+CLEANFILES += $(DTRACEPROVIDER) dtrace_sources
+
+dtrace_files:
+ echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+ echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+ -$(RM) -f probes_mysql.d
+ $(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+ echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+ $(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index 9c913b4f14d..612d02bbcd3 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -20,6 +20,7 @@
#define MYSQL_SERVER 1
#include "mysql_priv.h"
+#include "probes_mysql.h"
#include <mysql/plugin.h>
#include <m_ctype.h>
#include <my_bit.h>
@@ -1646,10 +1647,12 @@ int ha_myisam::index_read_map(uchar *buf, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_key_count);
int error=mi_rkey(file, buf, active_index, key, keypart_map, find_flag);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -1657,57 +1660,69 @@ int ha_myisam::index_read_idx_map(uchar *buf, uint index, const uchar *key,
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_key_count);
int error=mi_rkey(file, buf, index, key, keypart_map, find_flag);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_myisam::index_read_last_map(uchar *buf, const uchar *key,
key_part_map keypart_map)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ENTER("ha_myisam::index_read_last");
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_key_count);
int error=mi_rkey(file, buf, active_index, key, keypart_map,
HA_READ_PREFIX_LAST);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
DBUG_RETURN(error);
}
int ha_myisam::index_next(uchar *buf)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_next_count);
int error=mi_rnext(file,buf,active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_myisam::index_prev(uchar *buf)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_prev_count);
int error=mi_rprev(file,buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_myisam::index_first(uchar *buf)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_first_count);
int error=mi_rfirst(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_myisam::index_last(uchar *buf)
{
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
DBUG_ASSERT(inited==INDEX);
ha_statistic_increment(&SSV::ha_read_last_count);
int error=mi_rlast(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -1717,12 +1732,14 @@ int ha_myisam::index_next_same(uchar *buf,
{
int error;
DBUG_ASSERT(inited==INDEX);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_next_count);
do
{
error= mi_rnext_same(file,buf);
} while (error == HA_ERR_RECORD_DELETED);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -1736,9 +1753,12 @@ int ha_myisam::rnd_init(bool scan)
int ha_myisam::rnd_next(uchar *buf)
{
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error=mi_scan(file, buf);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_READ_ROW_DONE(error);
return error;
}
@@ -1749,9 +1769,12 @@ int ha_myisam::restart_rnd_next(uchar *buf, uchar *pos)
int ha_myisam::rnd_pos(uchar *buf, uchar *pos)
{
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ FALSE);
ha_statistic_increment(&SSV::ha_read_rnd_count);
int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length));
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_READ_ROW_DONE(error);
return error;
}
diff --git a/storage/myisammrg/Makefile.am b/storage/myisammrg/Makefile.am
index 1ca51bc9d03..b7eab6bfdb7 100644
--- a/storage/myisammrg/Makefile.am
+++ b/storage/myisammrg/Makefile.am
@@ -40,5 +40,26 @@ libmyisammrg_a_SOURCES = myrg_open.c myrg_extra.c myrg_info.c myrg_locking.c \
EXTRA_DIST = CMakeLists.txt plug.in
+if HAVE_DTRACE_DASH_G
+libmyisammrg_a_LIBADD = probes_mysql.o
+libmyisammrg_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = ha_myisammrg.o
+DTRACEPROVIDER = probes_mysql.d
+CLEANFILES += $(DTRACEPROVIDER) dtrace_sources
+
+dtrace_files:
+ echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+ echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+ -$(RM) -f probes_mysql.d
+ $(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+ echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+ $(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index e265dac2c82..addec5bb58a 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -96,6 +96,7 @@
#define MYSQL_SERVER 1
#include "mysql_priv.h"
+#include "probes_mysql.h"
#include <mysql/plugin.h>
#include <m_ctype.h>
#include "../myisam/ha_myisam.h"
@@ -735,9 +736,11 @@ int ha_myisammrg::index_read_map(uchar * buf, const uchar * key,
enum ha_rkey_function find_flag)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_key_count);
int error=myrg_rkey(file,buf,active_index, key, keypart_map, find_flag);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -746,9 +749,11 @@ int ha_myisammrg::index_read_idx_map(uchar * buf, uint index, const uchar * key,
enum ha_rkey_function find_flag)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_key_count);
int error=myrg_rkey(file,buf,index, key, keypart_map, find_flag);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -756,46 +761,56 @@ int ha_myisammrg::index_read_last_map(uchar *buf, const uchar *key,
key_part_map keypart_map)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_key_count);
int error=myrg_rkey(file,buf,active_index, key, keypart_map,
HA_READ_PREFIX_LAST);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_myisammrg::index_next(uchar * buf)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_next_count);
int error=myrg_rnext(file,buf,active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_myisammrg::index_prev(uchar * buf)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_prev_count);
int error=myrg_rprev(file,buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_myisammrg::index_first(uchar * buf)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_first_count);
int error=myrg_rfirst(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
int ha_myisammrg::index_last(uchar * buf)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_last_count);
int error=myrg_rlast(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -805,12 +820,14 @@ int ha_myisammrg::index_next_same(uchar * buf,
{
int error;
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
ha_statistic_increment(&SSV::ha_read_next_count);
do
{
error= myrg_rnext_same(file,buf);
} while (error == HA_ERR_RECORD_DELETED);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
@@ -825,9 +842,12 @@ int ha_myisammrg::rnd_init(bool scan)
int ha_myisammrg::rnd_next(uchar *buf)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error=myrg_rrnd(file, buf, HA_OFFSET_ERROR);
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_READ_ROW_DONE(error);
return error;
}
@@ -835,9 +855,12 @@ int ha_myisammrg::rnd_next(uchar *buf)
int ha_myisammrg::rnd_pos(uchar * buf, uchar *pos)
{
DBUG_ASSERT(this->file->children_attached);
+ MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+ TRUE);
ha_statistic_increment(&SSV::ha_read_rnd_count);
int error=myrg_rrnd(file, buf, my_get_ptr(pos,ref_length));
table->status=error ? STATUS_NOT_FOUND: 0;
+ MYSQL_READ_ROW_DONE(error);
return error;
}
diff --git a/storage/ndb/include/portlib/prefetch.h b/storage/ndb/include/portlib/prefetch.h
index f098c2ba6c0..fc4670115da 100644
--- a/storage/ndb/include/portlib/prefetch.h
+++ b/storage/ndb/include/portlib/prefetch.h
@@ -43,7 +43,7 @@ inline void prefetch(void* p)
__asm(" ldl r31,0(a0);", p);
#endif /* NDB_ALPHA */
#ifdef NDB_FORTE6
- sparc_prefetch_read_once(p);
+ sun_prefetch_read_once(p);
#else
(void)p;
#endif
@@ -55,7 +55,7 @@ inline void writehint(void* p)
__asm(" wh64 (a0);", p);
#endif /* NDB_ALPHA */
#ifdef NDB_FORTE6
- sparc_prefetch_write_once(p);
+ sun_prefetch_write_once(p);
#else
(void)p;
#endif
diff --git a/storage/ndb/src/common/util/version.c b/storage/ndb/src/common/util/version.c
index 56a92489131..b8d050ec97e 100644
--- a/storage/ndb/src/common/util/version.c
+++ b/storage/ndb/src/common/util/version.c
@@ -98,7 +98,7 @@ void ndbSetOwnVersion() {}
#ifndef TEST_VERSION
struct NdbUpGradeCompatible ndbCompatibleTable_full[] = {
- { MAKE_VERSION(5,1,NDB_VERSION_BUILD), MAKE_VERSION(5,1,18), UG_Range},
+ { MAKE_VERSION(5,4,NDB_VERSION_BUILD), MAKE_VERSION(5,1,18), UG_Range},
{ MAKE_VERSION(5,1,17), MAKE_VERSION(5,1,0), UG_Range},
{ MAKE_VERSION(5,0,NDB_VERSION_BUILD), MAKE_VERSION(5,0,12), UG_Range},
{ MAKE_VERSION(5,0,11), MAKE_VERSION(5,0,2), UG_Range},